<?xml version="1.0" encoding="utf-8"?>
<raweb xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" year="2017">
  <identification id="sequel" isproject="true">
    <shortname>SEQUEL</shortname>
    <projectName>Sequential Learning</projectName>
    <theme-de-recherche>Optimization, machine learning and statistical methods</theme-de-recherche>
    <domaine-de-recherche>Applied Mathematics, Computation and Simulation</domaine-de-recherche>
    <urlTeam>https://team.inria.fr/sequel/</urlTeam>
    <structure_exterieure type="Labs">
      <libelle>Centre de Recherche en Informatique, Signal et Automatique de Lille</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Université Charles de Gaulle (Lille 3)</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Université des sciences et technologies de Lille (Lille 1)</libelle>
    </structure_exterieure>
    <header_dates_team>Creation of the Project-Team: 2007 July 01</header_dates_team>
    <LeTypeProjet>Project-Team</LeTypeProjet>
    <keywordsSdN>
      <term>A3. - Data and knowledge</term>
      <term>A3.1. - Data</term>
      <term>A3.1.1. - Modeling, representation</term>
      <term>A3.1.4. - Uncertain data</term>
      <term>A3.3. - Data and knowledge analysis</term>
      <term>A3.3.1. - On-line analytical processing</term>
      <term>A3.3.2. - Data mining</term>
      <term>A3.3.3. - Big data analysis</term>
      <term>A3.4. - Machine learning and statistics</term>
      <term>A3.4.1. - Supervised learning</term>
      <term>A3.4.2. - Unsupervised learning</term>
      <term>A3.4.3. - Reinforcement learning</term>
      <term>A3.4.4. - Optimization and learning</term>
      <term>A3.4.6. - Neural networks</term>
      <term>A3.4.8. - Deep learning</term>
      <term>A3.5.2. - Recommendation systems</term>
      <term>A5.1. - Human-Computer Interaction</term>
      <term>A9. - Artificial intelligence</term>
      <term>A9.2. - Machine learning</term>
      <term>A9.3. - Signal analysis</term>
      <term>A9.4. - Natural language processing</term>
      <term>A9.7. - AI algorithmics</term>
    </keywordsSdN>
    <keywordsSecteurs>
      <term>B5.8. - Learning and training</term>
      <term>B6.1. - Software industry</term>
      <term>B7.2.1. - Smart vehicles</term>
      <term>B9.1.1. - E-learning, MOOC</term>
      <term>B9.4. - Sciences</term>
      <term>B9.4.5. - Data science</term>
    </keywordsSecteurs>
    <UR name="Lille"/>
  </identification>
  <team id="uid1">
    <person key="sequel-2014-idm27568">
      <firstname>Philippe</firstname>
      <lastname>Preux</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Team leader, Univ Charles de Gaulle, Professor, on Inria secondment since Sep 1st, 2016</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="dyogene-2014-idp72312">
      <firstname>Émilie</firstname>
      <lastname>Kaufmann</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>CNRS, Researcher</moreinfo>
    </person>
    <person key="sequel-2014-idm26088">
      <firstname>Alessandro</firstname>
      <lastname>Lazaric</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Researcher, on secondment at Facebook AI Research since June 2017</moreinfo>
    </person>
    <person key="tao-2015-idp83360">
      <firstname>Odalric</firstname>
      <lastname>Maillard</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
    </person>
    <person key="sequel-2014-idp68800">
      <firstname>Daniil</firstname>
      <lastname>Ryabko</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idp70232">
      <firstname>Michal</firstname>
      <lastname>Valko</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idp75664">
      <firstname>Romaric</firstname>
      <lastname>Gaudel</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ Charles de Gaulle, Associate Professor, until Apr 2017</moreinfo>
    </person>
    <person key="sequel-2014-idp76928">
      <firstname>Jérémie</firstname>
      <lastname>Mary</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ Charles de Gaulle, Associate Professor, until May 2017, on secondment at Criteo Research since June 2017</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idp79648">
      <firstname>Bilal</firstname>
      <lastname>Piot</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ des sciences et technologies de Lille, Associate Professor, until Jan 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp144544">
      <firstname>Ralph</firstname>
      <lastname>Bourdoukan</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ des sciences et technologies de Lille</moreinfo>
    </person>
    <person key="sequel-2017-idp147040">
      <firstname>Édouard</firstname>
      <lastname>Oyallon</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, since Nov 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp149504">
      <firstname>Matteo</firstname>
      <lastname>Pirotta</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="sequel-2016-idp174768">
      <firstname>James</firstname>
      <lastname>Ridgway</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, until Aug 2017</moreinfo>
    </person>
    <person key="sequel-2014-idp97464">
      <firstname>Marc</firstname>
      <lastname>Abeille</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ des sciences et technologies de Lille, until Dec 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp156912">
      <firstname>Sheikh Waqas</firstname>
      <lastname>Akhtar</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, since Oct 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp159344">
      <firstname>Mahsa</firstname>
      <lastname>Asadi</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, from Sep 2017 to Dec 2017</moreinfo>
    </person>
    <person key="sequel-2015-idp113016">
      <firstname>Merwan</firstname>
      <lastname>Barlier</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Orange Labs</moreinfo>
    </person>
    <person key="sequel-2014-idp99936">
      <firstname>Alexandre</firstname>
      <lastname>Bérard</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ des sciences et technologies de Lille</moreinfo>
    </person>
    <person key="sequel-2016-idp147888">
      <firstname>Lilian</firstname>
      <lastname>Besson</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>CentraleSupélec Rennes</moreinfo>
    </person>
    <person key="sequel-2014-idp101176">
      <firstname>Daniele</firstname>
      <lastname>Calandriello</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, until Dec 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp171616">
      <firstname>Nicolas</firstname>
      <lastname>Carrara</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Orange Labs</moreinfo>
    </person>
    <person key="sequel-2015-idp116704">
      <firstname>Ronan</firstname>
      <lastname>Fruit</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="sequel-2014-idp102400">
      <firstname>Pratik</firstname>
      <lastname>Gajane</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Orange Labs, until Nov 2017</moreinfo>
    </person>
    <person key="sequel-2014-idp104864">
      <firstname>Jean Bastien</firstname>
      <lastname>Grill</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria/ENS Paris</moreinfo>
    </person>
    <person key="sequel-2017-idp181360">
      <firstname>Édouard</firstname>
      <lastname>Leurent</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Renault, since Oct 2017</moreinfo>
    </person>
    <person key="mescal-2014-idp111616">
      <firstname>Alexis</firstname>
      <lastname>Martin</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, from Jan to Mar 2017</moreinfo>
    </person>
    <person key="sequel-2014-idp109840">
      <firstname>Julien</firstname>
      <lastname>Pérolat</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ des sciences et technologies de Lille, until Dec 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp188720">
      <firstname>Pierre</firstname>
      <lastname>Perrault</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, since Sep 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp191152">
      <firstname>Mathieu</firstname>
      <lastname>Seurin</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ des sciences et technologies de Lille, since Sep 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp193632">
      <firstname>Julien</firstname>
      <lastname>Seznec</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>LeLivreScolaire.fr, since Mar 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp196080">
      <firstname>Xuedong</firstname>
      <lastname>Shang</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria/ENS Rennes, since Oct 2017</moreinfo>
    </person>
    <person key="sequel-2015-idp110544">
      <firstname>Florian</firstname>
      <lastname>Strub</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ des sciences et technologies de Lille</moreinfo>
    </person>
    <person key="sequel-2017-idp200992">
      <firstname>Kiewan</firstname>
      <lastname>Villatel</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Critéo, since Oct 2017</moreinfo>
    </person>
    <person key="sequel-2016-idp172336">
      <firstname>Romain</firstname>
      <lastname>Warlop</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>55</moreinfo>
    </person>
    <person key="i4s-2015-idp77480">
      <firstname>Guillaume</firstname>
      <lastname>Gautier</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, since Feb 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp208288">
      <firstname>Subhojyoti</firstname>
      <lastname>Mukherjee</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, from Sep 2017 until Nov 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp210768">
      <firstname>Iuliia</firstname>
      <lastname>Olkhovskaia</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, from Feb 2017 until Jul 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp213248">
      <firstname>Georgios</firstname>
      <lastname>Papoudakis</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ des sciences et technologies de Lille, from May 2017 until Sep 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp196080">
      <firstname>Xuedong</firstname>
      <lastname>Shang</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ des sciences et technologies de Lille, from Feb 2017 until Jun 2017</moreinfo>
    </person>
    <person key="sequel-2014-idp88536">
      <firstname>Amelie</firstname>
      <lastname>Supervielle</lastname>
      <categoryPro>Assistant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="sequel-2017-idp220768">
      <firstname>Reda</firstname>
      <lastname>Alami</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Orange Labs, since Oct 2017</moreinfo>
    </person>
    <person key="tao-2015-idp138584">
      <firstname>Aditya</firstname>
      <lastname>Gopalan</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>ITT Madras, Mar 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp225712">
      <firstname>Mohammad Sadegh</firstname>
      <lastname>Talebi Mazraeh Shahi</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>ANR, since Jun 2017 until Sep 2017</moreinfo>
    </person>
    <person key="sequel-2017-idp228192">
      <firstname>Mohammadi</firstname>
      <lastname>Zaki</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Indian institute of Sicence, Mar 2017</moreinfo>
    </person>
    <person key="sequel-2016-idp194624">
      <firstname>Rémi</firstname>
      <lastname>Bardenet</lastname>
      <categoryPro>CollaborateurExterieur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>CNRS</moreinfo>
    </person>
    <person key="sequel-2014-idp71472">
      <firstname>Pierre</firstname>
      <lastname>Chainais</lastname>
      <categoryPro>CollaborateurExterieur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Ecole centrale de Lille</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idp76928">
      <firstname>Jérémie</firstname>
      <lastname>Mary</lastname>
      <categoryPro>CollaborateurExterieur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Criteo, since Jun 2017</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idp111080">
      <firstname>Olivier</firstname>
      <lastname>Pietquin</lastname>
      <categoryPro>CollaborateurExterieur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Deepmind London, since May 2016</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2017-idp241696">
      <firstname>Harm</firstname>
      <lastname>de Vries</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Université de Montréal, until Jun 2017</moreinfo>
    </person>
  </team>
  <presentation id="uid2">
    <bodyTitle>Overall Objectives</bodyTitle>
    <subsection id="uid3" level="1">
      <bodyTitle>Presentation</bodyTitle>
      <p><span class="smallcap" align="left">SequeL</span> means “Sequential Learning”. As such, <span class="smallcap" align="left">SequeL</span> focuses on the task of learning in artificial systems (either hardware, or software) that gather information along time. Such systems are named <i>(learning) agents</i> (or learning machines) in the following.
These data may be used to estimate some parameters of a model, which in turn, may be used for selecting actions in order to perform some long-term optimization task.</p>
      <p>For the purpose of model building, the agent needs to represent information collected so far in some compact form and use it to process newly available data.</p>
      <p>The acquired data may result from an observation process of an agent in interaction with its environment (the data thus represent a perception). This is the case when the agent makes decisions (in order to attain a certain objective) that impact the environment, and thus the observation process itself.</p>
      <p>Hence, in <span class="smallcap" align="left">SequeL</span>, the term <b>sequential</b> refers to two aspects:</p>
      <simplelist>
        <li id="uid4">
          <p noindent="true">The <b>sequential acquisition of data</b>, from which a model is learned (supervised and non supervised learning),</p>
        </li>
        <li id="uid5">
          <p noindent="true">the <b>sequential decision making task</b>, based on the learned model (reinforcement learning).</p>
        </li>
      </simplelist>
      <p>Examples of sequential learning problems include:</p>
      <descriptionlist>
        <label>Supervised learning</label>
        <li id="uid6">
          <p noindent="true">tasks deal with the prediction of some response given a certain set of observations of input variables and responses. New sample points keep on being observed.</p>
        </li>
        <label>Unsupervised learning</label>
        <li id="uid7">
          <p noindent="true">tasks deal with clustering objects, these latter making a flow of objects. The (unknown) number of clusters typically evolves during time, as new objects are observed.</p>
        </li>
        <label>Reinforcement learning</label>
        <li id="uid8">
          <p noindent="true">tasks deal with the control (a policy) of some system which has to be optimized (see <ref xlink:href="#sequel-2017-bid0" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>). We do not assume the availability of a model of the system to be controlled.</p>
        </li>
      </descriptionlist>
      <p>In all these cases, we mostly assume that the process can be considered stationary for at least a certain amount of time, and slowly evolving.</p>
      <p>We wish to have any-time algorithms, that is, at any moment, a prediction may be required/an action may be selected making full use, and hopefully, the best use, of the experience already gathered by the learning agent.</p>
      <p>The perception of the environment by the learning agent (using its sensors) is generally neither the best one to make a prediction, nor to take a decision (we deal with Partially Observable Markov Decision Problem). So, the perception has to be mapped in some way to a better, and relevant, state (or input) space.</p>
      <p>Finally, an important issue of prediction regards its evaluation: how wrong may we be when we perform a prediction? For real systems to be controlled, this issue can not be simply left unanswered.</p>
      <p spacebefore="6.0pt">To sum-up, in <span class="smallcap" align="left">SequeL</span>, the main issues regard:</p>
      <simplelist>
        <li id="uid9">
          <p noindent="true">the learning of a model: we focus on models that map some
input space <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>ℝ</mi><mi>P</mi></msup></math></formula> to <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ℝ</mi></math></formula>,</p>
        </li>
        <li id="uid10">
          <p noindent="true">the observation to state mapping,</p>
        </li>
        <li id="uid11">
          <p noindent="true">the choice of the action to perform (in the case of sequential
decision problem),</p>
        </li>
        <li id="uid12">
          <p noindent="true">the performance guarantees,</p>
        </li>
        <li id="uid13">
          <p noindent="true">the implementation of usable algorithms,</p>
        </li>
      </simplelist>
      <p>all that being understood in a <i>sequential</i> framework.</p>
    </subsection>
  </presentation>
  <fondements id="uid14">
    <bodyTitle>Research Program</bodyTitle>
    <subsection id="uid15" level="1">
      <bodyTitle>In Short</bodyTitle>
      <p><span class="smallcap" align="left">SequeL</span> is primarily grounded on two domains:</p>
      <simplelist>
        <li id="uid16">
          <p noindent="true">the problem of decision under uncertainty,</p>
        </li>
        <li id="uid17">
          <p noindent="true">statistical analysis and statistical learning, which provide the general concepts and tools to solve this problem.</p>
        </li>
      </simplelist>
      <p>To help the reader who is unfamiliar with these questions, we briefly present key ideas below.</p>
    </subsection>
    <subsection id="uid18" level="1">
      <bodyTitle>Decision-making Under Uncertainty</bodyTitle>
      <p>The phrase “Decision under uncertainty” refers to the problem of taking decisions when we do not have a full knowledge neither of the situation, nor of the consequences of the decisions, as well as when the consequences of decision are non deterministic.</p>
      <p>We introduce two specific sub-domains, namely the Markov decision processes which models sequential decision problems, and bandit problems.</p>
      <subsection id="uid19" level="2">
        <bodyTitle>Reinforcement Learning</bodyTitle>
        <p>Sequential decision processes occupy the heart of the <span class="smallcap" align="left">SequeL</span> project; a detailed presentation of this problem may be found in Puterman's book <ref xlink:href="#sequel-2017-bid1" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        <p>A Markov Decision Process (MDP) is defined as the tuple <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo>(</mo><mi>𝒳</mi><mo>,</mo><mi>𝒜</mi><mo>,</mo><mi>P</mi><mo>,</mo><mi>r</mi><mo>)</mo></mrow></math></formula> where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒳</mi></math></formula> is the state space, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒜</mi></math></formula> is the action space, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>P</mi></math></formula> is the probabilistic transition kernel, and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>r</mi><mo>:</mo><mi>𝒳</mi><mo>×</mo><mi>𝒜</mi><mo>×</mo><mi>𝒳</mi><mo>→</mo><mi>I</mi><mspace width="-0.166667em"/><mspace width="-0.166667em"/><mi>R</mi></mrow></math></formula> is the reward function. For the sake of simplicity, we assume in this introduction that the state and action spaces are finite. If the current state (at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula>) is <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula> and the chosen action is <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>a</mi><mo>∈</mo><mi>𝒜</mi></mrow></math></formula>, then the Markov assumption means that the transition probability to a new state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>x</mi><mo>'</mo></msup><mo>∈</mo><mi>𝒳</mi></mrow></math></formula> (at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>t</mi><mo>+</mo><mn>1</mn></mrow></math></formula>) only depends on <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo>(</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>)</mo></mrow></math></formula>. We write <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>p</mi><mo>(</mo><msup><mi>x</mi><mo>'</mo></msup><mo>|</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>)</mo></mrow></math></formula> the corresponding transition probability. During a transition <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mrow><mo>(</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>)</mo></mrow><mo>→</mo><msup><mi>x</mi><mo>'</mo></msup></mrow></math></formula>, a reward <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>r</mi><mo>(</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>,</mo><msup><mi>x</mi><mo>'</mo></msup><mo>)</mo></mrow></math></formula> is incurred.</p>
        <p>In the MDP (<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>𝒳</mi><mo>,</mo><mi>𝒜</mi><mo>,</mo><mi>P</mi><mo>,</mo><mi>r</mi><mo>)</mo></mrow></math></formula>, each initial state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mn>0</mn></msub></math></formula> and action sequence <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>1</mn></msub><mo>,</mo><mo>...</mo></mrow></math></formula> gives rise to a sequence of states <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><msub><mi>x</mi><mn>2</mn></msub><mo>,</mo><mo>...</mo></mrow></math></formula>, satisfying <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>ℙ</mi><mfenced separators="" open="(" close=")"><msub><mi>x</mi><mrow><mi>t</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>=</mo><msup><mi>x</mi><mo>'</mo></msup><mrow><mo>|</mo></mrow><msub><mi>x</mi><mi>t</mi></msub><mo>=</mo><mi>x</mi><mo>,</mo><msub><mi>a</mi><mi>t</mi></msub><mo>=</mo><mi>a</mi></mfenced><mo>=</mo><mi>p</mi><mrow><mo>(</mo><msup><mi>x</mi><mo>'</mo></msup><mo>|</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>)</mo></mrow><mo>,</mo></mrow></math></formula> and rewards <footnote id="uid20" id-text="1">Note that for simplicity, we considered the case of a deterministic reward function, but in many applications, the reward <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>r</mi><mi>t</mi></msub></math></formula> itself is a random variable.</footnote> <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>r</mi><mn>1</mn></msub><mo>,</mo><msub><mi>r</mi><mn>2</mn></msub><mo>,</mo><mo>...</mo></mrow></math></formula> defined by <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>r</mi><mi>t</mi></msub><mo>=</mo><mi>r</mi><mrow><mo>(</mo><msub><mi>x</mi><mi>t</mi></msub><mo>,</mo><msub><mi>a</mi><mi>t</mi></msub><mo>,</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>)</mo></mrow></mrow></math></formula>.</p>
        <p>The history of the process up to time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> is defined to be <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>H</mi><mi>t</mi></msub><mo>=</mo><mrow><mo>(</mo><msub><mi>x</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><mo>...</mo><mo>,</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>-</mo><mn>1</mn></mrow></msub><mo>,</mo><msub><mi>a</mi><mrow><mi>t</mi><mo>-</mo><mn>1</mn></mrow></msub><mo>,</mo><msub><mi>x</mi><mi>t</mi></msub><mo>)</mo></mrow></mrow></math></formula>. A policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> is a sequence of functions <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>π</mi><mn>0</mn></msub><mo>,</mo><msub><mi>π</mi><mn>1</mn></msub><mo>,</mo><mo>...</mo></mrow></math></formula>, where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>π</mi><mi>t</mi></msub></math></formula> maps the space of possible histories at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> to the space of probability distributions over the space of actions <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒜</mi></math></formula>. To follow a policy means that, in each time step, we assume that the process history up to time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> is <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><mo>...</mo><mo>,</mo><msub><mi>x</mi><mi>t</mi></msub></mrow></math></formula> and the probability of selecting an action <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>a</mi></math></formula> is equal to <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>π</mi><mi>t</mi></msub><mrow><mo>(</mo><msub><mi>x</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><mo>...</mo><mo>,</mo><msub><mi>x</mi><mi>t</mi></msub><mo>)</mo></mrow><mrow><mo>(</mo><mi>a</mi><mo>)</mo></mrow></mrow></math></formula>. A policy is called stationary (or Markovian) if <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>π</mi><mi>t</mi></msub></math></formula> depends only on the last visited state. In other words, a policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>π</mi><mo>=</mo><mo>(</mo><msub><mi>π</mi><mn>0</mn></msub><mo>,</mo><msub><mi>π</mi><mn>1</mn></msub><mo>,</mo><mo>...</mo><mo>)</mo></mrow></math></formula> is called stationary if <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>π</mi><mi>t</mi></msub><mrow><mo>(</mo><msub><mi>x</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><mo>...</mo><mo>,</mo><msub><mi>x</mi><mi>t</mi></msub><mo>)</mo></mrow><mo>=</mo><msub><mi>π</mi><mn>0</mn></msub><mrow><mo>(</mo><msub><mi>x</mi><mi>t</mi></msub><mo>)</mo></mrow></mrow></math></formula> holds for all <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>t</mi><mo>≥</mo><mn>0</mn></mrow></math></formula>. A policy is called deterministic if the probability distribution prescribed by the policy for any history is concentrated on a single action. Otherwise it is called a stochastic policy.</p>
        <p>We move from an MD process to an MD problem by formulating the goal of the agent, that is what the sought policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> has to optimize? It is very often formulated as maximizing (or minimizing), in expectation, some functional of the sequence of future rewards. For example, an usual functional is the infinite-time horizon sum of discounted rewards. For a given (stationary) policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>, we define the value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mi>π</mi></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula> of that policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> at a state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula> as the expected sum of discounted future rewards given that we state from the initial state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>x</mi></math></formula> and follow the policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>:</p>
        <formula id-text="1" id="uid21" textype="equation" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mrow>
              <msup>
                <mi>V</mi>
                <mi>π</mi>
              </msup>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>)</mo>
              </mrow>
              <mo>=</mo>
              <mi>𝔼</mi>
              <mfenced separators="" open="[" close="]">
                <munderover>
                  <mo>∑</mo>
                  <mrow>
                    <mi>t</mi>
                    <mo>=</mo>
                    <mn>0</mn>
                  </mrow>
                  <mi>∞</mi>
                </munderover>
                <msup>
                  <mi>γ</mi>
                  <mi>t</mi>
                </msup>
                <msub>
                  <mi>r</mi>
                  <mi>t</mi>
                </msub>
                <mo>|</mo>
                <msub>
                  <mi>x</mi>
                  <mn>0</mn>
                </msub>
                <mo>=</mo>
                <mi>x</mi>
                <mo>,</mo>
                <mi>π</mi>
              </mfenced>
              <mo>,</mo>
            </mrow>
          </math>
        </formula>
        <p>where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝔼</mi></math></formula> is the expectation operator and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>γ</mi><mo>∈</mo><mo>(</mo><mn>0</mn><mo>,</mo><mn>1</mn><mo>)</mo></mrow></math></formula> is the discount factor. This value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mi>π</mi></msup></math></formula> gives an evaluation of the performance of a given policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>. Other functionals of the sequence of future rewards may be considered, such as the undiscounted reward (see the stochastic shortest path problems <ref xlink:href="#sequel-2017-bid2" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) and average reward settings. Note also that, here, we considered the problem of maximizing a reward functional, but a formulation in terms of minimizing some cost or risk functional would be equivalent.</p>
        <p>In order to maximize a given functional in a sequential framework, one usually applies Dynamic Programming (DP)  <ref xlink:href="#sequel-2017-bid3" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, which introduces the optimal value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mo>*</mo></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula>, defined as the optimal expected sum of rewards when the agent starts from a state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>x</mi></math></formula>. We have <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mo>*</mo></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow><mo>=</mo><msub><mo movablelimits="true" form="prefix">sup</mo><mi>π</mi></msub><msup><mi>V</mi><mi>π</mi></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula>. Now, let us give two definitions about policies:</p>
        <simplelist>
          <li id="uid22">
            <p noindent="true">We say that a policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> is optimal, if it attains the optimal values <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mo>*</mo></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula> for any state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula>, <i>i.e.</i>, if <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mi>π</mi></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow><mo>=</mo><msup><mi>V</mi><mo>*</mo></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula> for all <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula>. Under mild conditions, deterministic stationary optimal policies exist <ref xlink:href="#sequel-2017-bid4" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. Such an optimal policy is written <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>π</mi><mo>*</mo></msup></math></formula>.</p>
          </li>
          <li id="uid23">
            <p noindent="true">We say that a (deterministic stationary) policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> is greedy with respect to (w.r.t.) some function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula> (defined on <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒳</mi></math></formula>) if, for all <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula>,</p>
            <formula type="display">
              <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
                <mrow>
                  <mi>π</mi>
                  <mrow>
                    <mo>(</mo>
                    <mi>x</mi>
                    <mo>)</mo>
                  </mrow>
                  <mo>∈</mo>
                  <mo form="prefix">arg</mo>
                  <munder>
                    <mo movablelimits="true" form="prefix">max</mo>
                    <mrow>
                      <mi>a</mi>
                      <mo>∈</mo>
                      <mi>𝒜</mi>
                    </mrow>
                  </munder>
                  <munder>
                    <mo>∑</mo>
                    <mrow>
                      <msup>
                        <mi>x</mi>
                        <mo>'</mo>
                      </msup>
                      <mo>∈</mo>
                      <mi>𝒳</mi>
                    </mrow>
                  </munder>
                  <mi>p</mi>
                  <mrow>
                    <mo>(</mo>
                    <msup>
                      <mi>x</mi>
                      <mo>'</mo>
                    </msup>
                    <mo>|</mo>
                    <mi>x</mi>
                    <mo>,</mo>
                    <mi>a</mi>
                    <mo>)</mo>
                  </mrow>
                  <mfenced separators="" open="[" close="]">
                    <mi>r</mi>
                    <mrow>
                      <mo>(</mo>
                      <mi>x</mi>
                      <mo>,</mo>
                      <mi>a</mi>
                      <mo>,</mo>
                      <msup>
                        <mi>x</mi>
                        <mo>'</mo>
                      </msup>
                      <mo>)</mo>
                    </mrow>
                    <mo>+</mo>
                    <mi>γ</mi>
                    <mi>V</mi>
                    <mrow>
                      <mo>(</mo>
                      <msup>
                        <mi>x</mi>
                        <mo>'</mo>
                      </msup>
                      <mo>)</mo>
                    </mrow>
                  </mfenced>
                  <mo>.</mo>
                </mrow>
              </math>
            </formula>
            <p> </p>
            <p noindent="true">where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo form="prefix">arg</mo><msub><mo movablelimits="true" form="prefix">max</mo><mrow><mi>a</mi><mo>∈</mo><mi>𝒜</mi></mrow></msub><mi>f</mi><mrow><mo>(</mo><mi>a</mi><mo>)</mo></mrow></mrow></math></formula> is the set of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>a</mi><mo>∈</mo><mi>𝒜</mi></mrow></math></formula> that maximizes <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>f</mi><mo>(</mo><mi>a</mi><mo>)</mo></mrow></math></formula>. For any function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula>, such a greedy policy always exists because <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒜</mi></math></formula> is finite.</p>
          </li>
        </simplelist>
        <p>The goal of Reinforcement Learning (RL), as well as that of dynamic programming, is to design an optimal policy (or a good approximation of it).</p>
        <p spacebefore="6.0pt">The well-known Dynamic Programming equation (also called the Bellman equation) provides a relation between the optimal value function at a state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>x</mi></math></formula> and the optimal value function at the successors states <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>x</mi><mo>'</mo></msup></math></formula> when choosing an optimal action: for all <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula>,</p>
        <formula id-text="2" id="uid24" textype="equation" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mrow>
              <msup>
                <mi>V</mi>
                <mo>*</mo>
              </msup>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>)</mo>
              </mrow>
              <mo>=</mo>
              <munder>
                <mo movablelimits="true" form="prefix">max</mo>
                <mrow>
                  <mi>a</mi>
                  <mo>∈</mo>
                  <mi>𝒜</mi>
                </mrow>
              </munder>
              <munder>
                <mo>∑</mo>
                <mrow>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>∈</mo>
                  <mi>𝒳</mi>
                </mrow>
              </munder>
              <mi>p</mi>
              <mrow>
                <mo>(</mo>
                <msup>
                  <mi>x</mi>
                  <mo>'</mo>
                </msup>
                <mo>|</mo>
                <mi>x</mi>
                <mo>,</mo>
                <mi>a</mi>
                <mo>)</mo>
              </mrow>
              <mfenced separators="" open="[" close="]">
                <mi>r</mi>
                <mrow>
                  <mo>(</mo>
                  <mi>x</mi>
                  <mo>,</mo>
                  <mi>a</mi>
                  <mo>,</mo>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>)</mo>
                </mrow>
                <mo>+</mo>
                <mi>γ</mi>
                <msup>
                  <mi>V</mi>
                  <mo>*</mo>
                </msup>
                <mrow>
                  <mo>(</mo>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>)</mo>
                </mrow>
              </mfenced>
              <mo>.</mo>
            </mrow>
          </math>
        </formula>
        <p>The benefit of introducing this concept of optimal value function relies on the property that, from the optimal value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mo>*</mo></msup></math></formula>, it is easy to derive an optimal behavior by choosing the actions according to a policy greedy w.r.t. <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mo>*</mo></msup></math></formula>. Indeed, we have the property that a policy greedy w.r.t. the optimal value function is an optimal policy:</p>
        <formula id-text="3" id="uid25" textype="equation" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mrow>
              <msup>
                <mi>π</mi>
                <mo>*</mo>
              </msup>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>)</mo>
              </mrow>
              <mo>∈</mo>
              <mo form="prefix">arg</mo>
              <munder>
                <mo movablelimits="true" form="prefix">max</mo>
                <mrow>
                  <mi>a</mi>
                  <mo>∈</mo>
                  <mi>𝒜</mi>
                </mrow>
              </munder>
              <munder>
                <mo>∑</mo>
                <mrow>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>∈</mo>
                  <mi>𝒳</mi>
                </mrow>
              </munder>
              <mi>p</mi>
              <mrow>
                <mo>(</mo>
                <msup>
                  <mi>x</mi>
                  <mo>'</mo>
                </msup>
                <mo>|</mo>
                <mi>x</mi>
                <mo>,</mo>
                <mi>a</mi>
                <mo>)</mo>
              </mrow>
              <mfenced separators="" open="[" close="]">
                <mi>r</mi>
                <mrow>
                  <mo>(</mo>
                  <mi>x</mi>
                  <mo>,</mo>
                  <mi>a</mi>
                  <mo>,</mo>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>)</mo>
                </mrow>
                <mo>+</mo>
                <mi>γ</mi>
                <msup>
                  <mi>V</mi>
                  <mo>*</mo>
                </msup>
                <mrow>
                  <mo>(</mo>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>)</mo>
                </mrow>
              </mfenced>
              <mo>.</mo>
            </mrow>
          </math>
        </formula>
        <p>In short, we would like to mention that most of the reinforcement learning methods developed so far are built on one (or both) of the two following approaches ( <ref xlink:href="#sequel-2017-bid5" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>):</p>
        <simplelist>
          <li id="uid26">
            <p noindent="true">Bellman's dynamic programming approach, based on the introduction of the value function. It consists in learning a “good” approximation of the optimal value function, and then using it to derive a greedy policy w.r.t. this approximation. The hope (well justified in several cases) is that the performance <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mi>π</mi></msup></math></formula> of the policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> greedy w.r.t. an approximation <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula> of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mo>*</mo></msup></math></formula> will be close to optimality. This approximation issue of the optimal value function is one of the major challenges inherent to the reinforcement learning problem. <b>Approximate dynamic programming</b> addresses the problem of estimating performance bounds (<i>e.g.</i> the loss in performance <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mrow><mo>|</mo><mo>|</mo></mrow><msup><mi>V</mi><mo>*</mo></msup><mo>-</mo><msup><mi>V</mi><mi>π</mi></msup><mrow><mo>|</mo><mo>|</mo></mrow></mrow></math></formula> resulting from using a policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>-greedy w.r.t. some approximation <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula>- instead of an optimal policy) in terms of the approximation error <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mrow><mo>|</mo><mo>|</mo></mrow><msup><mi>V</mi><mo>*</mo></msup><mo>-</mo><mi>V</mi><mrow><mo>|</mo><mo>|</mo></mrow></mrow></math></formula> of the optimal value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mo>*</mo></msup></math></formula> by <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula>. Approximation theory and Statistical Learning theory provide us with bounds in terms of the number of sample data used to represent the
functions, and the capacity and approximation power of the considered function spaces.</p>
          </li>
          <li id="uid27">
            <p noindent="true">Pontryagin's maximum principle approach, based on sensitivity analysis of the performance measure w.r.t. some control parameters. This approach, also called <b>direct policy search</b> in the Reinforcement Learning community aims at directly finding a good feedback control law in a parameterized policy space without trying to approximate the value function. The method consists in estimating the so-called <b>policy gradient</b>, <i>i.e.</i> the sensitivity of the performance measure (the value function) w.r.t. some parameters of the current policy. The idea being that an optimal control problem is replaced by a parametric optimization problem in the space of parameterized policies. As such, deriving a policy gradient estimate would lead to performing a stochastic gradient method in order to search for a local optimal parametric policy.</p>
          </li>
        </simplelist>
        <p>Finally, many extensions of the Markov decision processes exist, among which the Partially Observable MDPs (POMDPs) is the case where the current state does not contain all the necessary information required to decide for sure of the best action.</p>
      </subsection>
      <subsection id="uid28" level="2">
        <bodyTitle>Multi-arm Bandit Theory</bodyTitle>
        <p>Bandit problems illustrate the fundamental difficulty of decision making in the face of uncertainty: A decision maker must choose between what seems to be the best choice (“exploit”), or to test (“explore”) some alternative, hoping to discover a choice that beats the current best choice.</p>
        <p>The classical example of a bandit problem is deciding what treatment to give each patient in a clinical trial when the effectiveness of the treatments are initially unknown and the patients arrive sequentially. These bandit problems became popular with the seminal paper <ref xlink:href="#sequel-2017-bid6" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, after which they have found applications in diverse fields, such as control, economics, statistics, or learning theory.</p>
        <p>Formally, a K-armed bandit problem (<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>K</mi><mo>≥</mo><mn>2</mn></mrow></math></formula>) is specified by K real-valued distributions. In each time step a decision maker can select one of the distributions to obtain a sample from it. The samples obtained are considered as rewards. The distributions are initially unknown to the decision maker, whose goal is to maximize the sum of the rewards received, or equivalently, to minimize the regret which is defined as the loss compared to the total payoff that can be achieved given full knowledge of the problem, <i>i.e.</i>, when the arm giving the highest expected reward is pulled all the time.</p>
        <p>The name “bandit” comes from imagining a gambler playing with K slot machines. The gambler can pull the arm of any of the machines, which produces a random payoff as a result: When arm k is pulled, the random payoff is drawn from the distribution associated to k. Since the payoff distributions are initially unknown, the gambler must use exploratory actions to learn the utility of the individual arms. However, exploration has to be carefully controlled since excessive exploration may lead to unnecessary losses. Hence, to play well, the gambler must carefully balance exploration and exploitation. Auer <i>et al.</i> <ref xlink:href="#sequel-2017-bid7" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> introduced the algorithm UCB (Upper Confidence Bounds) that follows what is now called the “optimism in the face of uncertainty principle”. Their algorithm works by computing upper confidence bounds for all the arms and then choosing the arm with the highest such bound. They proved that the expected regret of their algorithm increases at most at a logarithmic rate
with the number of trials, and that the algorithm achieves the smallest possible regret up to some sub-logarithmic factor (for the considered family of distributions).</p>
      </subsection>
    </subsection>
    <subsection id="uid29" level="1">
      <bodyTitle>Statistical analysis of time series</bodyTitle>
      <p>Many of the problems of machine learning can be seen as extensions of classical problems of mathematical statistics to their (extremely) non-parametric and model-free cases. Other machine learning problems are founded on such statistical problems. Statistical problems of sequential learning are mainly those that are concerned with the analysis of time series. These problems are as follows.</p>
      <subsection id="uid30" level="2">
        <bodyTitle>Prediction of Sequences of Structured and Unstructured Data</bodyTitle>
        <p>Given a series of observations <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>⋯</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub></mrow></math></formula> it is required to give forecasts concerning the distribution of the future observations <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mrow><mi>n</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>,</mo><msub><mi>x</mi><mrow><mi>n</mi><mo>+</mo><mn>2</mn></mrow></msub><mo>,</mo><mo>⋯</mo></mrow></math></formula>; in the simplest case, that of the next outcome <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mrow><mi>n</mi><mo>+</mo><mn>1</mn></mrow></msub></math></formula>.
Then <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mrow><mi>n</mi><mo>+</mo><mn>1</mn></mrow></msub></math></formula> is revealed and the process continues. Different goals can be formulated in this setting. One can either make some assumptions on the probability
measure that generates the sequence <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>⋯</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub><mo>,</mo><mo>⋯</mo></mrow></math></formula>, such as that the outcomes are independent and identically distributed (i.i.d.),
or that the sequence is a Markov chain, that it is a stationary process, etc.
More generally, one can assume that the data is generated by a probability measure that belongs to a certain set <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒞</mi></math></formula>.
In these cases the goal is to have the discrepancy between the predicted and the “true” probabilities to go to zero, if possible, with guarantees
on the speed of convergence.</p>
        <p>Alternatively, rather than making some assumptions on the data, one can change the goal: the predicted probabilities should be asymptotically as good as those given by the best reference predictor from a certain pre-defined set.</p>
        <p>Another dimension of complexity in this problem concerns the nature of observations <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mi>i</mi></msub></math></formula>. In the simplest case,
they come from a finite space, but already basic applications often require real-valued observations. Moreover,
function or even graph-valued observations often arise in practice, in particular in applications concerning Web data.
In these settings estimating even simple characteristics of probability distributions of the future outcomes becomes
non-trivial, and new learning algorithms for solving these problems are in order.</p>
      </subsection>
      <subsection id="uid31" level="2">
        <bodyTitle>Hypothesis testing</bodyTitle>
        <p>Given a series of observations of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>⋯</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub><mo>,</mo><mo>⋯</mo></mrow></math></formula> generated by some unknown probability measure <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula>, the problem is to test a certain given hypothesis <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>0</mn></msub></math></formula> about <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula>, versus a given alternative hypothesis <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>1</mn></msub></math></formula>. There are many different examples of this problem. Perhaps the simplest one is testing a simple hypothesis “<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula> is Bernoulli i.i.d. measure with probability of 0 equals 1/2” versus “<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula> is Bernoulli i.i.d. with the parameter different from 1/2”. More
interesting cases include the problems of model verification: for example, testing that <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula> is a Markov chain, versus that it is a stationary ergodic process but not a Markov chain. In the case when we have not one but several series of observations, we may wish to test the hypothesis that they are independent, or that they are generated by the same distribution. Applications of these problems to a more general class of machine learning tasks include the problem of feature selection, the problem of testing that a certain behaviour (such as pulling a certain arm of a bandit, or using a certain policy) is better (in terms of achieving some goal, or collecting some rewards) than another behaviour, or than a class of other behaviours.</p>
        <p>The problem of hypothesis testing can also be studied in its general formulations: given two (abstract) hypothesis <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>0</mn></msub></math></formula> and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>1</mn></msub></math></formula> about the unknown measure that generates the data, find out whether it is possible to test <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>0</mn></msub></math></formula> against <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>1</mn></msub></math></formula> (with confidence), and if yes then how can one do it.</p>
      </subsection>
      <subsection id="uid32" level="2">
        <bodyTitle>Change Point Analysis</bodyTitle>
        <p>A stochastic process is generating the data. At some point, the process distribution changes.
In the “offline” situation, the statistician observes the resulting sequence of outcomes and has
to estimate the point or the points at which the change(s) occurred. In online setting, the goal is to
detect the change as quickly as possible.</p>
        <p>These are the classical problems in mathematical statistics, and probably among the last remaining statistical problems
not adequately addressed by machine learning methods. The reason for the latter is perhaps in that the problem is rather
challenging. Thus, most methods available so far are parametric methods concerning piece-wise constant distributions, and the
change in distribution is associated with the change in the mean. However, many applications, including DNA analysis,
the analysis of (user) behaviour data, etc., fail to comply with this kind of assumptions. Thus, our goal here is to provide completely non-parametric
methods allowing for any kind of changes in the time-series distribution.</p>
      </subsection>
      <subsection id="uid33" level="2">
        <bodyTitle>Clustering Time Series, Online and Offline</bodyTitle>
        <p>The problem of clustering, while being a classical problem of mathematical statistics, belongs to the realm of unsupervised learning. For time series, this problem can be formulated as follows: given several samples <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>x</mi><mn>1</mn></msup><mo>=</mo><mrow><mo>(</mo><msubsup><mi>x</mi><mn>1</mn><mn>1</mn></msubsup><mo>,</mo><mo>⋯</mo><mo>,</mo><msubsup><mi>x</mi><msub><mi>n</mi><mn>1</mn></msub><mn>1</mn></msubsup><mo>)</mo></mrow><mo>,</mo><mo>⋯</mo><mo>,</mo><msup><mi>x</mi><mi>N</mi></msup><mo>=</mo><mrow><mo>(</mo><msubsup><mi>x</mi><mi>N</mi><mn>1</mn></msubsup><mo>,</mo><mo>⋯</mo><mo>,</mo><msubsup><mi>x</mi><msub><mi>n</mi><mi>N</mi></msub><mi>N</mi></msubsup><mo>)</mo></mrow></mrow></math></formula>, we wish to group similar objects together. While this is of course not a precise formulation, it can be made precise if we assume that the samples were generated by <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>k</mi></math></formula> different distributions.</p>
        <p>The online version of the problem allows for the number of observed time series to grow with time, in general, in an arbitrary manner.</p>
      </subsection>
      <subsection id="uid34" level="2">
        <bodyTitle>Online Semi-Supervised Learning</bodyTitle>
        <p>Semi-supervised learning (SSL) is a field of machine learning that studies
learning from both labeled and unlabeled examples. This learning
paradigm is extremely useful for solving real-world problems, where
data is often abundant but the resources to label them are limited.</p>
        <p>Furthermore, <i>online</i> SSL is suitable for adaptive machine
learning systems. In the classification case, learning is viewed as a
repeated game against a potentially adversarial nature. At each step
<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> of this game, we observe an example <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>𝐱</mi><mi>𝐭</mi></msub></math></formula>, and then predict
its label <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mover accent="true"><mi>y</mi><mo>^</mo></mover><mi>t</mi></msub></math></formula>.</p>
        <p>The challenge of the game is that we only exceptionally observe the true label
<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>y</mi><mi>t</mi></msub></math></formula>. In the extreme case, which we also study, only a handful of labeled
examples are provided in advance and set the initial bias of the system while
unlabeled examples are gathered online and update the bias continuously.
Thus, if we want to adapt to changes in the environment, we have to rely on
indirect forms of feedback, such as the structure of data.</p>
      </subsection>
      <subsection id="uid35" level="2">
        <bodyTitle>Online Kernel and Graph-Based Methods</bodyTitle>
        <p>Large-scale kernel ridge regression is limited by the need to store a
large kernel matrix. Similarly, large-scale graph-based learning is
limited by storing the graph Laplacian. Furthermore, if the data come
online, at some point no finite storage is sufficient and per step
operations become slow.</p>
        <p>Our challenge is to design sparsification methods that give guaranteed
approximate solutions with a reduced storage requirements.</p>
      </subsection>
    </subsection>
  </fondements>
  <domaine id="uid36">
    <bodyTitle>Application Domains</bodyTitle>
    <subsection id="uid37" level="1">
      <bodyTitle>Sequential decision making under uncertainty and prediction</bodyTitle>
      <p>The spectrum of applications of our research is very wide: it ranges from the core of our research, that is sequential decision making under uncertainty, to the application of components used to solve this decision making problem.</p>
      <p>To be more specific, we work on computational advertizing and recommandation systems; these problems are considered as a sequential matching problem in which resources available in a limited amount have to be matched to meet some users' expectations. The sequential approach we advocate paves the way to better tackle the cold-start problem, and non stationary environments. More generally, these approaches are applied to the optimization of budgeted resources under uncertainty, in a time-varying environment, including constraints on computational times (typically, a decision has to be made in less than 1 ms in a recommandation system). An other field of applications of our research is related to education which we consider as a sequential matching problem between a student, and educational contents.</p>
      <p>The algorithms to solve these tasks heavily rely on tools from machine learning, statistics, and optimization. Henceforth, we also apply our work to more classical supervised learning, and prediction tasks, as well as unsupervised learning tasks. The whole range of methods is used, from decision forests, to kernel methods, to deep learning. For instance, we have recently used deep learning on images. We also have a line of works related to software development studying how machine learning can improve the quality of software being developed. More generally, we apply our research to data science.
</p>
    </subsection>
  </domaine>
  <highlights id="uid38">
    <bodyTitle>Highlights of the Year</bodyTitle>
    <subsection id="uid39" level="1">
      <bodyTitle>Highlights of the Year</bodyTitle>
      <simplelist>
        <li id="uid40">
          <p noindent="true">under the supervision of O. Pietquin and J. Mary, F. Strub and collaborators (among which University of Montreal) have introduced the <b>Guesswhat?!</b> game to study visually grounded dialogues interleaving vision and natural language. A dataset of 150k human-human dialogues has been collected and is freely available on the Internet. Supervised learning baselines and state-of-the-art reinforcement learning algoritms have been implemented and are available as open-source code. This work resulted in publications in prestigious conferences: as a spotlight at CVPR 2017, an oral at IJCAI 2017, and an other spotlight at NIPS 2017 <ref xlink:href="#sequel-2017-bid8" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, <ref xlink:href="#sequel-2017-bid9" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, <ref xlink:href="#sequel-2017-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. Spotlight presentations concern less than 3.5% of submisisons to NIPS, and 5% of submissions to CVPR.</p>
          <p>See <ref xlink:href="https://www.guesswhat.ai" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>www.<allowbreak/>guesswhat.<allowbreak/>ai</ref></p>
        </li>
        <li id="uid41">
          <p noindent="true">under the supervision of M. Valko and A. Lazaric, D. Calandriello and collaborators have provided the first breaking quadratic barrier for nonparametric learning. An open source implementation is available on the Internet. The work has been published in prestigious conferences: AI &amp; STATS, ICML and NIPS <ref xlink:href="#sequel-2017-bid11" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, <ref xlink:href="#sequel-2017-bid12" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, <ref xlink:href="#sequel-2017-bid13" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        </li>
      </simplelist>
    </subsection>
  </highlights>
  <logiciels id="uid42">
    <bodyTitle>New Software and Platforms</bodyTitle>
    <subsection id="uid43" level="1">
      <bodyTitle>BAC</bodyTitle>
      <p>
        <i>Bayesian Policy Gradient and Actor-Critic Algorithms</i>
      </p>
      <p noindent="true"><span class="smallcap" align="left">Keywords:</span> Machine learning - Incremental learning - Policy Learning</p>
      <p noindent="true"><span class="smallcap" align="left">Functional Description:</span> To address this issue, we proceed to supplement our Bayesian policy gradient framework with a new actor-critic learning model in which a Bayesian class of non-parametric critics, based on Gaussian process temporal difference learning, is used. Such critics model the action-value function as a Gaussian process, allowing Bayes’ rule to be used in computing the posterior distribution over action-value functions, conditioned on the observed data. Appropriate choices of the policy parameterization and of the prior covariance (kernel) between action-values allow us to obtain closed-form expressions for the posterior distribution of the gradient of the expected return with respect to the policy parameters. We perform detailed experimental comparisons of the proposed Bayesian policy gradient and actor-critic algorithms with classic Monte-Carlo based policy gradient methods, as well as with each other, on a number of reinforcement learning problems.</p>
      <simplelist>
        <li id="uid44">
          <p noindent="true">Contact: Michal Valko</p>
        </li>
        <li id="uid45">
          <p noindent="true">URL: <ref xlink:href="https://team.inria.fr/sequel/Software/BAC/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>team.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>sequel/<allowbreak/>Software/<allowbreak/>BAC/</ref></p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid46" level="1">
      <bodyTitle>GuessWhat?!</bodyTitle>
      <p>
        <i>GuessWhat?! Visual object discovery through multi-modal dialogue</i>
      </p>
      <p noindent="true"><span class="smallcap" align="left">Keywords:</span> Deep learning - Dialogue System</p>
      <p noindent="true"><span class="smallcap" align="left">Functional Description:</span> This project train a AI to play the GuessWhat?! game.
Thus, you can train an AI to ask questions, to answer questions about images. You can also perform basic visual reasoning.
This project is a testbed for future interactive dialogue system.</p>
      <simplelist>
        <li id="uid47">
          <p noindent="true">Partner: Universite de Montreal</p>
        </li>
        <li id="uid48">
          <p noindent="true">Contact: Florian Strub</p>
        </li>
        <li id="uid49">
          <p noindent="true">Publications: <ref xlink:href="https://hal.inria.fr/hal-01549641" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">GuessWhat?! Visual object discovery through multi-modal dialogue</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01549642" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">End-to-end optimization of goal-driven and visually grounded dialogue systems Harm de Vries</ref></p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid50" level="1">
      <bodyTitle>Squeak</bodyTitle>
      <p>
        <i>Sequential sampling for kernel matrix approximation</i>
      </p>
      <p noindent="true"><span class="smallcap" align="left">Keyword:</span> Machine learning</p>
      <simplelist>
        <li id="uid51">
          <p noindent="true">Contact: Daniele Calandriello</p>
        </li>
        <li id="uid52">
          <p noindent="true">URL: <ref xlink:href="http://researchers.lille.inria.fr/~valko/hp/serve.php?what=publications/squeak.py" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>researchers.<allowbreak/>lille.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>~valko/<allowbreak/>hp/<allowbreak/>serve.<allowbreak/>php?what=publications/<allowbreak/>squeak.<allowbreak/>py</ref></p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid53" level="1">
      <bodyTitle>OOR</bodyTitle>
      <p>
        <i>Optimistic Optimization in R</i>
      </p>
      <p noindent="true"><span class="smallcap" align="left">Keywords:</span> Black-box optimization - Machine learning</p>
      <simplelist>
        <li id="uid54">
          <p noindent="true">Contact: Mickael Binois</p>
        </li>
        <li id="uid55">
          <p noindent="true">URL: <ref xlink:href="https://cran.r-project.org/web/packages/OOR/index.html" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>cran.<allowbreak/>r-project.<allowbreak/>org/<allowbreak/>web/<allowbreak/>packages/<allowbreak/>OOR/<allowbreak/>index.<allowbreak/>html</ref></p>
        </li>
      </simplelist>
    </subsection>
  </logiciels>
  <resultats id="uid56">
    <bodyTitle>New Results</bodyTitle>
    <subsection id="uid57" level="1">
      <bodyTitle>Decision-making Under Uncertainty</bodyTitle>
      <subsection id="uid58" level="2">
        <bodyTitle>Reinforcement Learning</bodyTitle>
        <p><b>Thompson Sampling for Linear-Quadratic Control Problems</b>, <ref xlink:href="#sequel-2017-bid14" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>We consider the exploration-exploitation tradeoff in linear quadratic (LQ) control problems, where the state dynamics is linear and the cost function is quadratic in states and controls. We analyze the regret of Thompson sampling (TS) (a.k.a. posterior-sampling for reinforcement learning) in the frequentist setting, i.e., when the parameters characterizing the LQ dynamics are fixed. Despite the empirical and theoretical success in a wide range of problems from multi-armed bandit to linear bandit, we show that when studying the frequentist regret TS in control problems, we need to trade-off the frequency of sampling optimistic parameters and the frequency of switches in the control policy. This results in an overall regret of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msup><mi>T</mi><mrow><mn>2</mn><mo>/</mo><mn>3</mn></mrow></msup><mo>)</mo></mrow></math></formula>, which is significantly worse than the regret <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msqrt><mi>T</mi></msqrt><mo>)</mo></mrow></math></formula> achieved by the optimism-in-face-of-uncertainty algorithm in LQ control problems.</p>
        <p><b>Exploration–Exploitation in MDPs with Options</b>, <ref xlink:href="#sequel-2017-bid15" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>While a large body of empirical results show that temporally-extended actions and options may significantly affect the learning performance of an agent, the theoretical understanding of how and when options can be beneficial in online reinforcement learning is relatively limited. In this paper, we derive an upper and lower bound on the regret of a variant of UCRL using options. While we first analyze the algorithm in the general case of semi-Markov decision processes (SMDPs), we show how these results can be translated to the specific case of MDPs with options and we illustrate simple scenarios in which the regret of learning with options can be provably much smaller than the regret suffered when learning with primitive actions.</p>
        <p><b>Regret Minimization in MDPs with Options without Prior Knowledge</b>, <ref xlink:href="#sequel-2017-bid16" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>The option framework integrates temporal abstraction into the reinforcement learning model through the introduction of macro-actions (i.e., options). Recent works leveraged the mapping of Markov decision processes (MDPs) with options to semi-MDPs (SMDPs) and introduced SMDP-versions of exploration-exploitation algorithms (e.g., RMAX-SMDP and UCRL-SMDP) to analyze the impact of options on the learning performance. Nonetheless, the PAC-SMDP sample complexity of RMAX-SMDP can hardly be translated into equivalent PAC-MDP theoretical guarantees, while the regret analysis of UCRL-SMDP requires prior knowledge of the distributions of the cumulative reward and duration of each option, which are hardly available in practice. In this paper, we remove this limitation by combining the SMDP view together with the inner Markov structure of options into a novel algorithm whose regret performance matches UCRL-SMDP's up to an additive regret term. We show scenarios where this term is negligible and the advantage of temporal abstraction is preserved. We also report preliminary empirical results supporting the theoretical findings.</p>
        <p><b>Is the Bellman Residual a Bad Proxy?</b>, <ref xlink:href="#sequel-2017-bid17" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>This paper aims at theoretically and empirically comparing two standard optimization criteria for Reinforcement Learning: i) maximization of the mean value and ii) minimization of the Bellman residual. For that purpose, we place ourselves in the framework of policy search algorithms, that are usually designed to maximize the mean value, and derive a method that minimizes the residual T * v <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> – v <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> 1,ν over policies. A theoretical analysis shows how good this proxy is to policy optimization , and notably that it is better than its value-based counterpart. We also propose experiments on randomly generated generic Markov decision processes, specifically designed for studying the influence of the involved concentrability coefficient. They show that the Bellman residual is generally a bad proxy to policy optimization and that directly maximizing the mean value is much better, despite the current lack of deep theoretical analysis. This might seem obvious, as directly addressing the problem of interest is usually better, but given the prevalence of (projected) Bellman residual minimization in value-based reinforcement learning, we believe that this question is worth to be considered.</p>
        <p><b>Faut-il minimiser le résidu de Bellman ou maximiser la valeur moyenne ?</b>, <ref xlink:href="#sequel-2017-bid18" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p><b>Transfer Reinforcement Learning with Shared Dynamics</b>, <ref xlink:href="#sequel-2017-bid19" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>This article addresses a particular Transfer Reinforcement Learning (RL) problem: when dynamics do not change from one task to another, and only the reward function does. Our method relies on two ideas, the first one is that transition samples obtained from a task can be reused to learn on any other task: an immediate reward estimator is learnt in a supervised fashion and for each sample, the reward entry is changed by its reward estimate. The second idea consists in adopting the optimism in the face of uncertainty principle and to use upper bound reward estimates. Our method is tested on a navigation task, under four Transfer RL experimental settings: with a known reward function, with strong and weak expert knowledge on the reward function, and with a completely unknown reward function. It is also evaluated in a Multi-Task RL experiment and compared with the state-of-the-art algorithms. Results reveal that this method constitutes a major improvement for transfer/multi-task problems that share dynamics.</p>
      </subsection>
      <subsection id="uid59" level="2">
        <bodyTitle>Multi-arm Bandit Theory</bodyTitle>
        <p><b>Trading Off Rewards and Errors in Multi-armed Bandits</b>, <ref xlink:href="#sequel-2017-bid20" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>In multi-armed bandits, the most common objective is the maximization of the cumulative reward. Alternative settings include active exploration, where a learner tries to gain accurate estimates of the rewards of all arms. While these objectives are contrasting, in many scenarios it is desirable to trade off rewards and errors. For instance, in educational games the designer wants to gather generalizable knowledge about the behavior of the students and teaching strategies (small estimation errors) but, at the same time, the system needs to avoid giving a bad experience to the players, who may leave the system permanently (large reward). In this paper, we formalize this tradeoff and introduce the ForcingBalance algorithm whose performance is provably close to the best possible tradeoff strategy. Finally, we demonstrate on real-world educational data that ForcingBalance returns useful information about the arms without compromising the overall reward.</p>
        <p><b>Online Influence Maximization Under Independent Cascade Model with Semi-bandit Feedback</b>, <ref xlink:href="#sequel-2017-bid21" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>We study the online influence maximization problem in social networks under the independent cascade model. Specifically, we aim to learn the set of ” best influencers ” in a social network online while repeatedly interacting with it. We address the challenges of (i) combinatorial action space, since the number of feasible influencer sets grows exponentially with the maximum number of influencers, and (ii) limited feedback, since only the influenced portion of the network is observed. Under a stochastic semi-bandit feedback, we propose and analyze IMLinUCB, a computationally efficient UCB-based algorithm. Our bounds on the cumulative regret are polynomial in all quantities of interest, achieve near-optimal dependence on the number of interactions and reflect the topology of the network and the activation probabilities of its edges, thereby giving insights on the problem complexity. To the best of our knowledge, these are the first such results. Our experiments show that in several representative graph topologies, the regret of IMLinUCB scales as suggested by our upper bounds. IMLinUCB permits linear generalization and thus is both statistically and computationally suitable for large-scale problems. Our experiments also show that IMLinUCB with linear generalization can lead to low regret in real-world online influence maximization.</p>
        <p><b>Boundary Crossing for General Exponential Families</b>, <ref xlink:href="#sequel-2017-bid22" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>We consider parametric exponential families of dimension K on the real line. We study a variant of boundary crossing probabilities coming from the multi-armed bandit literature, in the case when the real-valued distributions form an exponential family of dimension K. Formally, our result is a concentration inequality that bounds the probability that B <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ψ</mi></math></formula> (<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>θ</mi></math></formula> n , <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>θ</mi></math></formula>) f (t/n)/n, where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>θ</mi></math></formula> is the parameter of an unknown target distribution, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>θ</mi></math></formula> n is the empirical parameter estimate built from n observations, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ψ</mi></math></formula> is the log-partition function of the exponential family and B <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ψ</mi></math></formula> is the corresponding Bregman divergence. From the perspective of stochastic multi-armed bandits, we pay special attention to the case when the boundary function f is logarithmic, as it enables to analyze the regret of the state-of-the-art KL-ucb and KL-ucb+ strategies, whose analysis was left open in such generality. Indeed, previous results only hold for the case when K = 1, while we provide results for arbitrary finite dimension K, thus considerably extending the existing results. Perhaps surprisingly, we highlight that the proof techniques to achieve these strong results already existed three decades ago in the work of T.L. Lai, and were apparently forgotten in the bandit community. We provide a modern rewriting of these beautiful techniques that we believe are useful beyond the application to stochastic multi-armed bandits.</p>
        <p><b>The Non-stationary Stochastic Multi-armed Bandit Problem</b>, Robin, Féraud, Maillard <ref xlink:href="#sequel-2017-bid23" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>
<footnote id="uid60" id-text="2">This work has been done while OA. Maillard was at Inria Saclay, in the TAO team.</footnote></p>
        <p><b>Linear Thompson Sampling Revisited</b>, <ref xlink:href="#sequel-2017-bid24" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>We derive an alternative proof for the regret of Thompson sampling (TS) in the stochastic linear bandit setting. While we obtain a regret bound of order <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mover accent="true"><mi>O</mi><mo>˜</mo></mover><mrow><mo>(</mo><msup><mi>d</mi><mrow><mn>3</mn><mo>/</mo><mn>2</mn></mrow></msup><msqrt><mi>T</mi></msqrt><mo>)</mo></mrow></mrow></math></formula> as in previous results, the proof sheds new light on the functioning of the TS. We leverage on the structure of the problem to show how the regret is related to the sensitivity (i.e., the gradient) of the objective function and how selecting optimal arms associated to <i>optimistic</i> parameters does control it. Thus we show that TS can be seen as a generic randomized algorithm where the sampling distribution is designed to have a fixed probability of being optimistic, at the cost of an additional <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msqrt><mi>d</mi></msqrt></math></formula> regret factor compared to a UCB-like approach. Furthermore, we show that our proof can be readily applied to regularized linear optimization and generalized linear model problems.</p>
        <p><b>Active Learning for Accurate Estimation of Linear Models</b>, <ref xlink:href="#sequel-2017-bid25" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>We explore the sequential decision-making problem where the goal is to estimate a number of linear models uniformly well, given a shared budget of random contexts independently sampled from a known distribution. For each incoming context, the decision-maker selects one of the linear models and receives an observation that is corrupted by the unknown noise level of that model. We present Trace-UCB, an adaptive allocation algorithm that learns the models' noise levels while balancing contexts accordingly across them, and prove bounds for its simple regret in both expectation and high-probability. We extend the algorithm and its bounds to the high dimensional setting , where the number of linear models times the dimension of the contexts is more than the total budget of samples. Simulations with real data suggest that Trace-UCB is remarkably robust , outperforming a number of baselines even when its assumptions are violated.</p>
        <p><b>Learning the Distribution with Largest Mean: Two Bandit Frameworks</b>, <ref xlink:href="#sequel-2017-bid26" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>Over the past few years, the multi-armed bandit model has become increasingly popular in the machine learning community, partly because of applications including online content optimization. This paper reviews two different sequential learning tasks that have been considered in the bandit literature ; they can be formulated as (sequentially) learning which distribution has the highest mean among a set of distributions, with some constraints on the learning process. For both of them (regret minimization and best arm identification) we present recent, asymptotically optimal algorithms. We compare the behaviors of the sampling rule of each algorithm as well as the complexity terms associated to each problem.</p>
        <p><b>On Bayesian Index Policies for Sequential Resource Allocation</b>, <ref xlink:href="#sequel-2017-bid27" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>This paper is about index policies for minimizing (frequentist) regret in a stochastic multi-armed bandit model, inspired by a Bayesian view on the problem. Our main contribution is to prove that the Bayes-UCB algorithm, which relies on quantiles of posterior distributions, is asymptotically optimal when the reward distributions belong to a one-dimensional exponential family, for a large class of prior distributions. We also show that the Bayesian literature gives new insight on what kind of exploration rates could be used in frequentist, UCB-type algorithms. Indeed, approximations of the Bayesian optimal solution or the Finite Horizon Gittins indices provide a justification for the kl-UCB+ and kl-UCB-H+ algorithms, whose asymptotic optimality is also established.</p>
        <p><b>Multi-Player Bandits Models Revisited</b>, <ref xlink:href="#sequel-2017-bid28" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>Multi-player Multi-Armed Bandits (MAB) have been extensively studied in the literature, motivated by applications to Cognitive Radio systems. Driven by such applications as well, we motivate the introduction of several levels of feedback for multi-player MAB algorithms. Most existing work assume that sensing information is available to the algorithm. Under this assumption, we improve the state-of-the-art lower bound for the regret of any decentralized algorithms and introduce two algorithms, RandTopM and MCTopM, that are shown to empirically outperform existing algorithms. Moreover, we provide strong theoretical guarantees for these algorithms, including a notion of asymptotic optimality in terms of the number of selections of bad arms. We then introduce a promising heuristic, called Selfish, that can operate without sensing information, which is crucial for emerging applications to Internet of Things networks. We investigate the empirical performance of this algorithm and provide some first theoretical elements for the understanding of its behavior.</p>
        <p><b>Multi-Armed Bandit Learning in IoT Networks: Learning helps even in non-stationary settings</b>, <ref xlink:href="#sequel-2017-bid29" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>Setting up the future Internet of Things (IoT) networks will require to support more and more communicating devices. We prove that intelligent devices in unlicensed bands can use Multi-Armed Bandit (MAB) learning algorithms to improve resource exploitation. We evaluate the performance of two classical MAB learning algorithms, UCB1 and Thompson Sampling, to handle the decentralized decision-making of Spectrum Access, applied to IoT networks; as well as learning performance with a growing number of intelligent end-devices. We show that using learning algorithms does help to fit more devices in such networks, even when all end-devices are intelligent and are dynamically changing channel. In the studied scenario, stochastic MAB learning provides a up to 16% gain in term of successful transmission probabilities, and has near optimal performance even in non-stationary and non-i.i.d. settings with a majority of intelligent devices.</p>
      </subsection>
      <subsection id="uid61" level="2">
        <bodyTitle>Nonparametric Statistics of Time Series</bodyTitle>
        <p><b>Efficient Tracking of a Growing Number of Experts</b>, <ref xlink:href="#sequel-2017-bid30" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>We consider a variation on the problem of prediction with expert advice, where new forecasters that were unknown until then may appear at each round. As often in prediction with expert advice, designing an algorithm that achieves near-optimal regret guarantees is straightforward, using aggregation of experts. However, when the comparison class is sufficiently rich, for instance when the best expert and the set of experts itself changes over time, such strategies naively require to maintain a prohibitive number of weights (typically exponential with the time horizon). By contrast, designing strategies that both achieve a near-optimal regret and maintain a reasonable number of weights is highly non-trivial. We consider three increasingly challenging objectives (simple regret, shifting regret and sparse shifting regret) that extend existing notions defined for a fixed expert ensemble; in each case, we design strategies that achieve tight regret bounds, adaptive to the parameters of the comparison class, while being computationally inexpensive. Moreover, our algorithms are anytime , agnostic to the number of incoming experts and completely parameter-free. Such remarkable results are made possible thanks to two simple but highly effective recipes: first the ” abstention trick ” that comes from the specialist framework and enables to handle the least challenging notions of regret, but is limited when addressing more sophisticated objectives. Second, the ” muting trick ” that we introduce to give more flexibility. We show how to combine these two tricks in order to handle the most challenging class of comparison strategies.</p>
      </subsection>
      <subsection id="uid62" level="2">
        <bodyTitle>Stochastic Games</bodyTitle>
        <p><b>Monte-Carlo Tree Search by Best Arm Identification</b>, <ref xlink:href="#sequel-2017-bid31" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>Recent advances in bandit tools and techniques for sequential learning are steadily enabling new applications and are promising the resolution of a range of challenging related problems. We study the game tree search problem, where the goal is to quickly identify the optimal move in a given game tree by sequentially sampling its stochastic payoffs. We develop new algorithms for trees of arbitrary depth, that operate by summarizing all deeper levels of the tree into confidence intervals at depth one, and applying a best arm identification procedure at the root. We prove new sample complexity guarantees with a refined dependence on the problem instance. We show experimentally that our algorithms outperform existing elimination-based algorithms and match previous special-purpose methods for depth-two trees.</p>
        <p><b>Learning Nash Equilibrium for General-Sum Markov Games from Batch Data</b>, <ref xlink:href="#sequel-2017-bid32" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>This paper addresses the problem of learning a Nash equilibrium in <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>γ</mi></math></formula>-discounted mul-tiplayer general-sum Markov Games (MGs) in a batch setting. As the number of players increases in MG, the agents may either collaborate or team apart to increase their final rewards. One solution to address this problem is to look for a Nash equilibrium. Although , several techniques were found for the subcase of two-player zero-sum MGs, those techniques fail to find a Nash equilibrium in general-sum Markov Games. In this paper, we introduce a new definition of-Nash equilibrium in MGs which grasps the strategy's quality for multiplayer games. We prove that minimizing the norm of two Bellman-like residuals implies to learn such an-Nash equilibrium. Then, we show that minimizing an empirical estimate of the L p norm of these Bellman-like residuals allows learning for general-sum games within the batch setting. Finally, we introduce a neural network architecture that successfully learns a Nash equilibrium in generic multiplayer general-sum turn-based MGs.</p>
      </subsection>
      <subsection id="uid63" level="2">
        <bodyTitle>Automata Learning</bodyTitle>
        <p><b>Spectral Learning from a Single Trajectory under Finite-State Policies</b>, <ref xlink:href="#sequel-2017-bid33" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>We present spectral methods of moments for learning sequential models from a single trajectory, in stark contrast with the classical literature that assumes the availability of multiple i.i.d. trajectories. Our approach leverages an efficient SVD-based learning algorithm for weighted automata and provides the first rigorous analysis for learning many important models using dependent data. We state and analyze the algorithm under three increasingly difficult scenarios: probabilistic automata, stochastic weighted automata, and reactive predictive state representations controlled by a finite-state policy. Our proofs include novel tools for studying mixing properties of stochastic weighted automata.</p>
      </subsection>
      <subsection id="uid64" level="2">
        <bodyTitle>Online Kernel and Graph-Based Methods</bodyTitle>
        <p><b>Distributed Adaptive Sampling for Kernel Matrix Approximation</b>, <ref xlink:href="#sequel-2017-bid11" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>Most kernel-based methods, such as kernel regression, kernel PCA, ICA, or k-means clustering, do not scale to large datasets, because constructing and storing the kernel matrix <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>𝐊</mi><mi>n</mi></msub></math></formula> requires at least <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msup><mi>n</mi><mn>2</mn></msup><mo>)</mo></mrow></math></formula> time and space for <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>n</mi></math></formula> samples. Recent works (Alaoui 2014, Musco 2016) show that sampling points with replacement according to their ridge leverage scores (RLS) generates small dictionaries of relevant points with strong spectral approximation guarantees for <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>𝐊</mi><mi>n</mi></msub></math></formula>. The drawback of RLS-based methods is that computing exact RLS requires constructing and storing the whole kernel matrix. In this paper, we introduce SQUEAK, a new algorithm for kernel approximation based on RLS sampling that sequentially processes the dataset, storing a dictionary which creates accurate kernel matrix approximations with a number of points that only depends on the effective dimension <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>d</mi><mi> eff </mi></msub><mrow><mo>(</mo><mi>γ</mi><mo>)</mo></mrow></mrow></math></formula> of the dataset. Moreover since all the RLS estimations are efficiently performed using only the small dictionary, SQUEAK never constructs the whole matrix
<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>𝐊</mi><mi>n</mi></msub></math></formula> runs in linear time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mover accent="true"><mi>O</mi><mo>˜</mo></mover><mrow><mo>(</mo><mi>n</mi><msub><mi>d</mi><mi> eff </mi></msub><msup><mrow><mo>(</mo><mi>γ</mi><mo>)</mo></mrow><mn>3</mn></msup><mo>)</mo></mrow></mrow></math></formula> w.r.t. <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>n</mi></math></formula>, and requires only a single pass over the dataset. We also propose a parallel and distributed version of SQUEAK achieving similar accuracy in as little as <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mover accent="true"><mi>O</mi><mo>˜</mo></mover><mrow><mo>(</mo><mi>l</mi><mi>o</mi><mi>g</mi><mrow><mo>(</mo><mi>n</mi><mo>)</mo></mrow><msub><mi>d</mi><mi> eff </mi></msub><msup><mrow><mo>(</mo><mi>γ</mi><mo>)</mo></mrow><mn>3</mn></msup><mo>)</mo></mrow></mrow></math></formula> time.</p>
        <p><b>Second-Order Kernel Online Convex Optimization with Adaptive Sketching</b>, <ref xlink:href="#sequel-2017-bid12" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>Kernel online convex optimization (KOCO) is a framework combining the expressiveness of non-parametric kernel models with the regret guarantees of online learning. First-order KOCO methods such as functional gradient descent require only <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><mi>t</mi><mo>)</mo></mrow></math></formula> time and space per iteration, and, when the only information on the losses is their convexity, achieve a minimax optimal <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msqrt><mi>T</mi></msqrt><mo>)</mo></mrow></math></formula> regret. Nonetheless, many common losses in kernel problems, such as squared loss, logistic loss, and squared hinge loss posses stronger curvature that can be exploited. In this case, second-order KOCO methods achieve <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><mo form="prefix">log</mo><mo>(</mo><mi>D</mi><mi>e</mi><mi>t</mi><mo>(</mo><mi>K</mi><mo>)</mo><mo>)</mo><mo>)</mo></mrow></math></formula> regret, which we show scales as <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msub><mi>d</mi><mrow><mi>e</mi><mi>f</mi><mi>f</mi></mrow></msub><mo form="prefix">log</mo><mi>T</mi><mo>)</mo></mrow></math></formula>, where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>d</mi><mrow><mi>e</mi><mi>f</mi><mi>f</mi></mrow></msub></math></formula> is the effective dimension of the problem and is usually much smaller than <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msqrt><mi>T</mi></msqrt><mo>)</mo></mrow></math></formula>. The main drawback of second-order methods is their much higher <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msup><mi>t</mi><mn>2</mn></msup><mo>)</mo></mrow></math></formula> space and time complexity. In this paper, we introduce kernel online Newton step (KONS), a new second-order KOCO method that also achieves <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msub><mi>d</mi><mrow><mi>e</mi><mi>f</mi><mi>f</mi></mrow></msub><mo form="prefix">log</mo><mi>T</mi><mo>)</mo></mrow></math></formula> regret. To address the computational complexity of second-order methods, we introduce a new matrix sketching algorithm for the kernel matrix <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>K</mi></math></formula>, and show that for a chosen parameter <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>γ</mi><mo>≤</mo><mn>1</mn></mrow></math></formula> our Sketched-KONS reduces the space and time complexity by a factor of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>γ</mi><mn>2</mn></msup></math></formula> to <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msup><mi>t</mi><mn>2</mn></msup><msup><mi>γ</mi><mn>2</mn></msup><mo>)</mo></mrow></math></formula> space and time per iteration, while incurring only <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>1</mn><mo>/</mo><mi>γ</mi></mrow></math></formula> times more regret.</p>
        <p><b>Efficient Second-order Online Kernel Learning with Adaptive Embedding</b>, <ref xlink:href="#sequel-2017-bid13" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>Online kernel learning (OKL) is a flexible framework to approach prediction problems, since the large approximation space provided by reproducing kernel Hilbert spaces can contain an accurate function for the problem. Nonetheless, optimizing over this space is computationally expensive. Not only first order methods accumulate <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msqrt><mi>T</mi></msqrt><mo>)</mo></mrow></math></formula> more loss than the optimal function, but the curse of kernelization results in a <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><mi>t</mi><mo>)</mo></mrow></math></formula> per step complexity. Second-order methods get closer to the optimum much faster, suffering only <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><mo form="prefix">log</mo><mrow><mo>(</mo><mi>T</mi><mo>)</mo></mrow><mo>)</mo></mrow></math></formula> regret, but second-order updates are even more expensive, with a <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msup><mi>t</mi><mn>2</mn></msup><mo>)</mo></mrow></math></formula> per-step cost. Existing approximate OKL methods try to reduce this complexity either by limiting the Support Vectors (SV) introduced in the predictor, or by avoiding the kernelization process altogether using embedding. Nonetheless, as long as the size of the approximation space or the number of SV does not grow over time, an adversary can always exploit the approximation process. In this paper, we propose PROS-N-KONS, a method that combines Nystrom sketching to project the input point in a small, accurate embedded space, and performs efficient second-order updates in this space. The embedded space is continuously updated to guarantee that the embedding remains accurate, and we show that the per-step cost only grows with the effective dimension of the problem and not with <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>T</mi></math></formula>. Moreover, the second-order updated allows us to achieve the logarithmic regret. We empirically compare our algorithm on recent large-scales benchmarks and show it performs favorably.</p>
        <p><b>Zonotope Hit-and-run for Efficient Sampling from Projection DPPs</b>, <ref xlink:href="#sequel-2017-bid34" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>Determinantal point processes (DPPs) are distributions over sets of items that model diversity using kernels. Their applications in machine learning include summary extraction and recommendation systems. Yet, the cost of sampling from a DPP is prohibitive in large-scale applications, which has triggered an effort towards efficient approximate samplers. We build a novel MCMC sampler that combines ideas from combinatorial geometry, linear programming, and Monte Carlo methods to sample from DPPs with a fixed sample cardinality, also called projection DPPs. Our sampler leverages the ability of the hit-and-run MCMC kernel to efficiently move across convex bodies. Previous theoretical results yield a fast mixing time of our chain when targeting a distribution that is close to a projection DPP, but not a DPP in general. Our empirical results demonstrate that this extends to sampling projection DPPs, i.e., our sampler is more sample-efficient than previous approaches which in turn translates to faster convergence when dealing with costly-to-evaluate functions, such as summary extraction in our experiments.</p>
      </subsection>
    </subsection>
    <subsection id="uid65" level="1">
      <bodyTitle>Statistical Learning and Bayesian Analysis</bodyTitle>
      <p><b>Universality of Bayesian mixture predictors</b>, <ref xlink:href="#sequel-2017-bid35" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
      <p>The problem is that of sequential probability forecasting for finite-valued time series. The data is generated by an unknown probability distribution over the space of all one-way infinite sequences. It is known that this measure belongs to a given set C, but the latter is completely arbitrary (uncountably infinite, without any structure given). The performance is measured with asymptotic average log loss. In this work it is shown that the minimax asymptotic performance is always attainable, and it is attained by a convex combination of a countably many measures from the set C (a Bayesian mixture). This was previously only known for the case when the best achievable asymptotic error is 0. This also contrasts previous results that show that in the non-realizable case all Bayesian mixtures may be suboptimal, while there is a predictor that achieves the optimal performance.</p>
      <p><b>Hypotheses Testing on Infinite Random Graphs</b>, <ref xlink:href="#sequel-2017-bid36" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
      <p>Drawing on some recent results that provide the formalism necessary to definite stationarity for infinite random graphs, this paper initiates the study of statistical and learning questions pertaining to these objects. Specifically, a criterion for the existence of a consistent test for complex hypotheses is presented, generalizing the corresponding results on time series. As an application, it is shown how one can test that a tree has the Markov property, or,more generally, to estimate its memory.</p>
      <p><b>Independence Clustering (Without a Matrix)</b>, <ref xlink:href="#sequel-2017-bid37" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
      <p>The independence clustering problem is considered in the following formulation: given a set <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>S</mi></math></formula> of random variables, it is required to find the finest partitioning <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo>{</mo><mi>U</mi><mo>_</mo><mn>1</mn><mo>,</mo><mo>⋯</mo><mo>,</mo><mi>U</mi><mo>_</mo><mi>k</mi><mo>}</mo></mrow></math></formula> of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>S</mi></math></formula> into clusters such that the clusters <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>U</mi><mo>_</mo><mn>1</mn><mo>,</mo><mo>⋯</mo><mo>,</mo><mi>U</mi><mo>_</mo><mi>k</mi></mrow></math></formula> are mutually independent. Since mutual independence is the target, pairwise similarity measurements are of no use, and thus traditional clustering algorithms are inapplicable. The distribution of the random variables in <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>S</mi></math></formula> is, in general, unknown, but a sample is available. Thus, the problem is cast in terms of time series. Two forms of sampling are considered: i.i.d. and stationary time series, with the main emphasis being on the latter, more general, case. A consistent, computationally tractable algorithm for each of the settings is proposed, and a number of open directions for further research are outlined.</p>
    </subsection>
    <subsection id="uid66" level="1">
      <bodyTitle>Applications</bodyTitle>
      <subsection id="uid67" level="2">
        <bodyTitle>Dialogue Systems and Natural Language</bodyTitle>
        <p><b>End-to-end Optimization of Goal-driven and Visually Grounded Dialogue Systems</b>, <ref xlink:href="#sequel-2017-bid8" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>End-to-end design of dialogue systems has recently become a popular research topic thanks to powerful tools such as encoder-decoder architectures for sequence-to-sequence learning. Yet, most current approaches cast human-machine dialogue management as a supervised learning problem, aiming at predicting the next utterance of a participant given the full history of the dialogue. This vision is too simplistic to render the intrinsic planning problem inherent to dialogue as well as its grounded nature , making the context of a dialogue larger than the sole history. This is why only chitchat and question answering tasks have been addressed so far using end-to-end architectures. In this paper, we introduce a Deep Reinforcement Learning method to optimize visually grounded task-oriented dialogues , based on the policy gradient algorithm. This approach is tested on a dataset of 120k dialogues collected through Mechanical Turk and provides encouraging results at solving both the problem of generating natural dialogues and the task of discovering a specific object in a complex picture.</p>
        <p><b>Online Learning and Transfer for User Adaptation in Dialogue Systems</b>, <ref xlink:href="#sequel-2017-bid38" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>We address the problem of user adaptation in Spoken Dialogue Systems. The goal is to quickly adapt online to a new user given a large amount of dialogues collected with other users. Previous works using Transfer for Reinforcement Learning tackled this problem when the number of source users remains limited. In this paper, we overcome this constraint by clustering the source users: each user cluster, represented by its centroid, is used as a potential source in the state-of-the-art Transfer Reinforcement Learning algorithm. Our benchmark compares several clustering approaches , including one based on a novel metric. All experiments are led on a negotiation dialogue task, and their results show significant improvements over baselines.</p>
        <p><b>GuessWhat?! Visual Object Discovery Through Multi-modal Dialogue</b>, <ref xlink:href="#sequel-2017-bid9" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>We introduce GuessWhat?!, a two-player guessing game as a testbed for research on the interplay of computer vision and dialogue systems. The goal of the game is to locate an unknown object in a rich image scene by asking a sequence of questions. Higher-level image understanding, like spatial reasoning and language grounding, is required to solve the proposed task. Our key contribution is the collection of a large-scale dataset consisting of 150K human-played games with a total of 800K visual question-answer pairs on 66K images. We explain our design decisions in collecting the dataset and introduce the oracle and questioner tasks that are associated with the two players of the game. We prototyped deep learning models to establish initial base-lines of the introduced tasks.</p>
        <p><b>LIG-CRIStAL System for the WMT17 Automatic Post-Editing Task</b>, <ref xlink:href="#sequel-2017-bid39" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>This paper presents the LIG-CRIStAL submission to the shared Automatic Post-Editing task of WMT 2017. We propose two neural post-editing models: a mono-source model with a task-specific attention mechanism, which performs particularly well in a low-resource scenario; and a chained architecture which makes use of the source sentence to provide extra context. This latter architecture manages to slightly improve our results when more training data is available. We present and discuss our results on two datasets (en-de and de-en) that are made available for the task.</p>
      </subsection>
      <subsection id="uid68" level="2">
        <bodyTitle>Recommendation systems</bodyTitle>
        <p><b>A Multi-Armed Bandit Model Selection for Cold-Start User Recommendation</b>, <ref xlink:href="#sequel-2017-bid40" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>How can we effectively recommend items to a user about whom we have no information? This is the problem we focus on, known as the cold-start problem. In this paper, we focus on the cold user problem.In most existing works, the cold-start problem is handled through the use of many kinds of information available about the user. However, what happens if we do not have any information?Recommender systems usually keep a substantial amount of prediction models that are available for analysis. Moreover, recommendations to new users yield uncertain returns. Assuming a number of alternative prediction models is available to select items to recommend to a cold user, this paper introduces a multi-armed bandit based model selection, named PdMS.In comparison with two baselines, PdMS improves the performance as measured by the nDCG.These improvements are demonstrated on real, public datasets.</p>
      </subsection>
      <subsection id="uid69" level="2">
        <bodyTitle>Software development</bodyTitle>
        <p><b>A Large-scale Study of Call Graph-based Impact Prediction using Mutation Testing</b>, <ref xlink:href="#sequel-2017-bid41" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>In software engineering, impact analysis consists in predicting the software elements (e.g. modules, classes, methods) potentially impacted by a change in the source code. Impact analysis is required to optimize the testing effort. In this paper, we propose a framework to predict error propagation. Based on 10 open-source Java projects and 5 classical mutation operators, we create 17000 mutants and study how the error they introduce propagates. This framework enables us to analyze impact prediction based on four types of call graph. Our results show that the sophistication indeed increases completeness of impact prediction. However, and surprisingly to us, the most basic call graph gives the highest trade-off between precision and recall for impact prediction.</p>
        <p><b>Correctness Attraction: A Study of Stability of Software Behavior under Runtime Perturbation</b>, <ref xlink:href="#sequel-2017-bid42" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>Can the execution of a software be perturbed without breaking the correctness of the output? In this paper, we devise a novel protocol to answer this rarely investigated question. In an experimental study, we observe that many perturbations do not break the correctness in ten subject programs. We call this phenomenon “correctness attraction”. The uniqueness of this protocol is that it considers a systematic exploration of the perturbation space as well as perfect oracles to determine the correctness of the output. To this extent, our findings on the stability of software under execution perturbations have a level of validity that has never been reported before in the scarce related work. A qualitative manual analysis enables us to set up the first taxonomy ever of the reasons behind correctness attraction.</p>
      </subsection>
      <subsection id="uid70" level="2">
        <bodyTitle>Graph theory</bodyTitle>
        <p><b>A generative model for sparse, evolving digraphs</b>, <ref xlink:href="#sequel-2017-bid43" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>Generating graphs that are similar to real ones is an open problem, while the similarity notion is quite elusive and hard to formalize. In this paper, we focus on sparse digraphs and propose SDG, an algorithm that aims at generating graphs similar to real ones. Since real graphs are evolving and this evolution is important to study in order to understand the underlying dynamical system, we tackle the problem of generating series of graphs. We propose SEDGE, an algorithm meant to generate series of graphs similar to a real series. SEDGE is an extension of SDG. We consider graphs that are representations of software programs and show experimentally that our approach outperforms other existing approaches. Experiments show the performance of both algorithms.</p>
        <p><b>A Spectral Algorithm with Additive Clustering for the Recovery of Overlapping Communities in Networks</b>, <ref xlink:href="#sequel-2017-bid44" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>This paper presents a novel spectral algorithm with additive clustering designed to identify overlapping communities in networks. The algorithm is based on geometric properties of the spectrum of the expected adjacency matrix in a random graph model that we call stochastic blockmodel with overlap (SBMO). An adaptive version of the algorithm, that does not require the knowledge of the number of hidden communities, is proved to be consistent under the SBMO when the degrees in the graph are (slightly more than) logarithmic. The algorithm is shown to perform well on simulated data and on real-world graphs with known overlapping communities.</p>
      </subsection>
      <subsection id="uid71" level="2">
        <bodyTitle>Deep Learning</bodyTitle>
        <p><b>Modulating early visual processing by language</b>, <ref xlink:href="#sequel-2017-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>It is commonly assumed that language refers to high-level visual concepts while leaving low-level visual processing unaffected. This view dominates the current literature in computational models for language-vision tasks, where visual and linguistic inputs are mostly processed independently before being fused into a single representation. In this paper, we deviate from this classic pipeline and propose to modulate the entire visual processing by a linguistic input. Specifically, we introduce Conditional Batch Normalization (CBN) as an efficient mechanism to modulate convolutional feature maps by a linguistic embedding. We apply CBN to a pre-trained Residual Network (ResNet), leading to the MODulatEd ResNet (MODERN) architecture, and show that this significantly improves strong baselines on two visual question answering tasks. Our ablation study confirms that modulating from the early stages of the visual processing is beneficial.</p>
        <p><b>FiLM: Visual Reasoning with a General Conditioning Layer</b>, <ref xlink:href="#sequel-2017-bid45" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>We introduce a general-purpose conditioning method for neural networks called FiLM: Feature-wise Linear Modulation. FiLM layers influence neural network computation via a simple , feature-wise affine transformation based on conditioning information. We show that FiLM layers are highly effective for visual reasoning - answering image-related questions which require a multi-step, high-level process - a task which has proven difficult for standard deep learning methods that do not explicitly model reasoning. Specifically, we show on visual reasoning tasks that FiLM layers 1) halve state-of-the-art error for the CLEVR benchmark, 2) modulate features in a coherent manner, 3) are robust to ablations and architectural modifications, and 4) generalize well to challenging, new data from few examples or even zero-shot.</p>
        <p><b>Learning Visual Reasoning Without Strong Priors</b>, <ref xlink:href="#sequel-2017-bid46" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>Achieving artificial visual reasoning - the ability to answer image-related questions which require a multi-step, high-level process - is an important step towards artificial general intelligence. This multi-modal task requires learning a question-dependent, structured reasoning process over images from language. Standard deep learning approaches tend to exploit biases in the data rather than learn this underlying structure, while leading methods learn to visually reason successfully but are hand-crafted for reasoning. We show that a general-purpose, Conditional Batch Normalization approach achieves state-of-the-art results on the CLEVR Visual Reasoning benchmark with a 2.4% error rate. We outperform the next best end-to-end method (4.5%) and even methods that use extra supervision (3.1%). We probe our model to shed light on how it reasons, showing it has learned a question-dependent, multi-step process. Previous work has operated under the assumption that visual reasoning calls for a specialized architecture, but we show that a general architecture with proper conditioning can learn to visually reason effectively. Index Terms: Deep Learning, Language and Vision Note: A full paper extending this study is available at http: //arxiv.org/abs/1709.07871, with additional references , experiments, and analysis.</p>
        <p><b>HoME: a Household Multimodal Environment</b>, <ref xlink:href="#sequel-2017-bid47" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        <p>We introduce HoME: a Household Multimodal Environment for artificial agents to learn from vision, audio, semantics, physics, and interaction with objects and other agents, all within a realistic context. HoME integrates over 45,000 diverse 3D house layouts based on the SUNCG dataset, a scale which may facilitate learning, generalization, and transfer. HoME is an open-source, OpenAI Gym-compatible platform extensible to tasks in reinforcement learning, language grounding, sound-based navigation, robotics, multi-agent learning, and more. We hope HoME better enables artificial agents to learn as humans do: in an interactive, multimodal, and richly contextualized setting.</p>
      </subsection>
    </subsection>
  </resultats>
  <contrats id="uid72">
    <bodyTitle>Bilateral Contracts and Grants with Industry</bodyTitle>
    <subsection id="uid73" level="1">
      <bodyTitle>Bilateral Contracts with Industry</bodyTitle>
      <subsection id="uid74" level="2">
        <bodyTitle>Lelivrescolaire.fr</bodyTitle>
        <simplelist>
          <li id="uid75">
            <p noindent="true">contract with <ref xlink:href="http://Lelivrescolaire.fr" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>Lelivrescolaire.<allowbreak/>fr</ref>; PI: Michal Valko</p>
            <p>Title: Sequential Machine Learning for Adaptive Educational Systems</p>
            <p>Duration: Mar. 2018 – Feb. 2021</p>
            <p>Abstract: Adaptive educational content are technologies which
adapt to the difficulties encountered by students. With the rise
of digital content in schools, the mass of data coming from
education enables but also ask for machine learning methods. Since
2010, Lelivrescolaire.fr has been developing some learning
materials for teachers and students through collaborative creation
process. For instance, during the school year 2015/2016, students
has achieved more than 8 000 000 exercises on its homework
platform Afterclasse.fr. Our approach would be based on sequential
machine learning: the algorithm learns to recommend some exercises
which adapt to students gradually as they answer.</p>
            <participants>
              <person key="sequel-2017-idp193632">
                <firstname>Julien</firstname>
                <lastname>Seznec</lastname>
              </person>
              <person key="sequel-2014-idm26088">
                <firstname>Alessandro</firstname>
                <lastname>Lazaric</lastname>
              </person>
              <person key="sequel-2014-idp70232">
                <firstname>Michal</firstname>
                <lastname>Valko</lastname>
              </person>
            </participants>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid76" level="2">
        <bodyTitle>OtherLang</bodyTitle>
        <simplelist>
          <li id="uid77">
            <p noindent="true">contract with “OtherLang”; PI: Romaric Gaudel</p>
            <p>Title: Tool to support foreign language practice</p>
            <p>Duration: 2 months</p>
            <p>Abstract: OtherLang develops an application to learn a foreign
language by reading documents and interacting wit other
people. During the time-line of the contract, SequeL brought his
knowledge about Recommender Systems which may be used either to
recommend documents to users or to recommend users to users.</p>
            <participants>
              <person key="sequel-2014-idp75664">
                <firstname>Romaric</firstname>
                <lastname>Gaudel</lastname>
              </person>
              <person key="sequel-2014-idm27568">
                <firstname>Philippe</firstname>
                <lastname>Preux</lastname>
              </person>
            </participants>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid78" level="2">
        <bodyTitle>Sidexa</bodyTitle>
        <simplelist>
          <li id="uid79">
            <p noindent="true">contract with “Sidexa”; PI: Jérémie Mary and then Philippe Preux</p>
            <p>Title: vision applied to the segmentation and recognition of car
body parts parts</p>
            <p>Duration: 3 months</p>
            <p>Abstract: We investigate deep learning to perform car body
segmentation. The result being very good, a second contract will
follow up this one in 2018.</p>
            <participants>
              <person key="sequel-2014-idp76928">
                <firstname>Jérémie</firstname>
                <lastname>Mary</lastname>
              </person>
              <person key="sequel-2014-idm27568">
                <firstname>Philippe</firstname>
                <lastname>Preux</lastname>
              </person>
            </participants>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid80" level="2">
        <bodyTitle>Renault</bodyTitle>
        <simplelist>
          <li id="uid81">
            <p noindent="true">contract with “Renault”; PI: Philippe Preux</p>
            <p>Title: State of the art in reinforcement learning regarding
autonomous car control and path planning.</p>
            <p>Duration: 3 months (Jan–Mar 2017)</p>
            <p>Abstract: This work has consisted in surveying the litterature
related to autonomous car control, and reinforcement learning.</p>
            <participants>
              <person key="mescal-2014-idp111616">
                <firstname>Alexis</firstname>
                <lastname>Martin</lastname>
              </person>
              <person key="tao-2015-idp83360">
                <firstname>Odalric</firstname>
                <lastname>Maillard</lastname>
              </person>
              <person key="sequel-2014-idm27568">
                <firstname>Philippe</firstname>
                <lastname>Preux</lastname>
              </person>
            </participants>
          </li>
          <li id="uid82">
            <p noindent="true">contract with Renault; PI: Philippe Preux</p>
            <p>Title: Control of an autonomous vehicle</p>
            <p>Duration: 3 years (12/2017–11/2020)</p>
            <p>Abstract: This contract comes along the CIFRE grant on the same
topic. This work is done in collaboration with the NON-A
team-project.</p>
            <participants>
              <person key="sequel-2017-idp181360">
                <firstname>Édouard</firstname>
                <lastname>Leurent</lastname>
              </person>
              <person key="tao-2015-idp83360">
                <firstname>Odalric</firstname>
                <lastname>Maillard</lastname>
              </person>
              <person key="sequel-2014-idm27568">
                <firstname>Philippe</firstname>
                <lastname>Preux</lastname>
              </person>
            </participants>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid83" level="2">
        <bodyTitle>Critéo</bodyTitle>
        <simplelist>
          <li id="uid84">
            <p noindent="true">contract with “Criteo”; PI: Philippe Preux</p>
            <p>Title: Computational advertizing</p>
            <p>Duration: 3 years (12/2017–11/2020)</p>
            <p>Abstract: This contract comes along the CIFRE grant on the same
topic. The goal is to investigate reinforcmeent learning and deep
learning on the problem of ad selection on the Internet.</p>
            <participants>
              <person key="sequel-2014-idm27568">
                <firstname>Philippe</firstname>
                <lastname>Preux</lastname>
              </person>
              <person key="sequel-2017-idp200992">
                <firstname>Kiewan</firstname>
                <lastname>Villatel</lastname>
              </person>
            </participants>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid85" level="2">
        <bodyTitle>Orange Labs</bodyTitle>
        <simplelist>
          <li id="uid86">
            <p noindent="true">contract with “Orange Labs”; PI: Philippe Preux</p>
            <p>Title: Sequential Learning and Decision Making under Partial Monitoring</p>
            <p>Duration: Oct. 2014 – Sep. 2017</p>
            <p>Abstract: This contract comes along the CIFRE grant on the same
topic. In applications such as recommendation systems, or
computational advertising, the return collected from the user is
partial: (s)he clicks on one item, or no item at all. We study
this setting in which only a “partial” information is gathered
in particular how to learn to behave optimaly in such a setting.</p>
            <participants>
              <person key="sequel-2014-idp102400">
                <firstname>Pratik</firstname>
                <lastname>Gajane</lastname>
              </person>
              <person key="sequel-2014-idm27568">
                <firstname>Philippe</firstname>
                <lastname>Preux</lastname>
              </person>
            </participants>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid87" level="2">
        <bodyTitle>Orange Labs</bodyTitle>
        <simplelist>
          <li id="uid88">
            <p noindent="true">contract with “Orange Labs”; PI: Olivier Pietquin</p>
            <p>Title: Inter User Transfer in dialogue systems</p>
            <p>Duration: 3 years</p>
            <p>Abstract: This contract comes along the CIFRE grant on the same
topic. The research aims at developing new algorithms to learn fast adaptation strategies for dialogue systems when a new user starts using them while we collected data from previous interactions with other users. Especially, it addresses the cold-start problem encountered when a new user faces the system, before samples can be collected to optimize the interaction strategy.</p>
            <participants>
              <person key="sequel-2015-idp113016">
                <firstname>Merwan</firstname>
                <lastname>Barlier</lastname>
              </person>
              <person key="sequel-2017-idp171616">
                <firstname>Nicolas</firstname>
                <lastname>Carrara</lastname>
              </person>
              <person key="sequel-2014-idp111080">
                <firstname>Olivier</firstname>
                <lastname>Pietquin</lastname>
              </person>
            </participants>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid89" level="2">
        <bodyTitle>55</bodyTitle>
        <simplelist>
          <li id="uid90">
            <p noindent="true">contract with “55”; PI: Jérémie Mary</p>
            <p>Title: Novel Learning and Exploration-Exploitation Methods for
Effective Recommender Systems</p>
            <p>Duration: Oct. 2015 – Sep. 2018</p>
            <p>Abstract: This contract comes along the CIFRE grant on the same
topic. In this Ph.D. thesis we intend to deal with this problem by
developing novel and more sophisticated recommendation strategies
in which the collection of data and the improvement of the
performance are considered as a unique process, where the
trade-off between the quality of the data and the performance of
the recommendation strategy is optimized over time. This work also
consider tensor methods (one layer of the tensor can be the time)
with the goal to scale them at RS level.</p>
          </li>
        </simplelist>
      </subsection>
    </subsection>
  </contrats>
  <partenariat id="uid91">
    <bodyTitle>Partnerships and Cooperations</bodyTitle>
    <subsection id="uid92" level="1">
      <bodyTitle>National Initiatives</bodyTitle>
      <subsection id="uid93" level="2">
        <bodyTitle>ANR BoB</bodyTitle>
        <participants>
          <person key="sequel-2016-idp194624">
            <firstname>Rémi</firstname>
            <lastname>Bardenet</lastname>
          </person>
          <person key="sequel-2014-idp70232">
            <firstname>Michal</firstname>
            <lastname>Valko</lastname>
          </person>
        </participants>
        <simplelist>
          <li id="uid94">
            <p noindent="true"><i>Title</i>: Bayesian statistics for expensive models and tall data</p>
          </li>
          <li id="uid95">
            <p noindent="true"><i>Type</i>: National Research Agency</p>
          </li>
          <li id="uid96">
            <p noindent="true"><i>Coordinator</i>: CNRS (Rémi Bardenet)</p>
          </li>
          <li id="uid97">
            <p noindent="true"><i>Duration</i>: 2016-2020</p>
          </li>
          <li id="uid98">
            <p noindent="true"><i>Abstract</i>:</p>
            <p>Bayesian methods are a popular class of statistical algorithms for
updating scientific beliefs. They turn data into decisions and
models, taking into account uncertainty about models and their
parameters. This makes Bayesian methods popular among applied
scientists such as biologists, physicists, or engineers. However,
at the heart of Bayesian analysis lie 1) repeated sweeps over the
full dataset considered, and 2) repeated evaluations of the model
that describes the observed physical process. The current trends to
large-scale data collection and complex models thus raises two main
issues. Experiments, observations, and numerical simulations in
many areas of science nowadays generate terabytes of data, as does
the LHC in particle physics for instance. Simultaneously, knowledge
creation is becoming more and more data-driven, which requires new
paradigms addressing how data are captured, processed, discovered,
exchanged, distributed, and analyzed. For statistical algorithms to
scale up, reaching a given performance must require as few
iterations and as little access to data as possible. It is not only
experimental measurements that are growing at a rapid pace. Cell
biologists tend to have scarce data but large-scale models of tens
of nonlinear differential equations to describe complex dynamics.
In such settings, evaluating the model once requires numerically
solving a large system of differential equations, which may take
minutes for some tens of differential equations on today’s
hardware. Iterative statistical processing that requires a million
sequential runs of the model is thus out of the question. In this
project, we tackle the fundamental cost-accuracy trade-off for
Bayesian methods, in order to produce generic inference algorithms
that scale favourably with the number of measurements in an
experiment and the number of runs of a statistical model. We propose
a collection of objectives with different risk-reward trade-offs to
tackle these two goals. In particular, for experiments with large
numbers of measurements, we further develop existing
subsampling-based Monte Carlo methods, while developing a novel
decision theory framework that includes data constraints. For
expensive models, we build an ambitious programme around Monte Carlo
methods that leverage determinantal processes, a rich class of
probabilistic tools that lead to accurate inference with limited
model evaluations. In short, using innovative techniques such as
subsampling-based Monte Carlo and determinantal point processes, we
propose in this project to push the boundaries of the applicability
of Bayesian inference.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid99" level="2">
        <bodyTitle>ANR Badass</bodyTitle>
        <participants>
          <person key="tao-2015-idp83360">
            <firstname>Odalric</firstname>
            <lastname>Maillard</lastname>
          </person>
          <person key="dyogene-2014-idp72312">
            <firstname>Émilie</firstname>
            <lastname>Kaufmann</lastname>
          </person>
        </participants>
        <simplelist>
          <li id="uid100">
            <p noindent="true"><i>Title</i>: BAnDits for non-Stationarity and Structure</p>
          </li>
          <li id="uid101">
            <p noindent="true"><i>Type</i>: National Research Agency</p>
          </li>
          <li id="uid102">
            <p noindent="true"><i>Coordinator</i>: Inria Lille (O. Maillard)</p>
          </li>
          <li id="uid103">
            <p noindent="true"><i>Duration</i>: 2016-2020</p>
          </li>
          <li id="uid104">
            <p noindent="true"><i>Abstract</i>: Motivated by the fact that a number of
modern applications of sequential decision making require
developing strategies that are especially robust to change in the
stationarity of the signal, and in order to anticipate and impact
the next generation of applications of the field, the BADASS
project intends to push theory and application of MAB to the next
level by incorporating non-stationary observations while retaining
near optimality against the best not necessarily constant decision
strategy. Since a non-stationary process typically decomposes into
chunks associated with some possibly hidden variables (states),
each corresponding to a stationary process, handling
non-stationarity crucially requires exploiting the (possibly
hidden) structure of the decision problem. For the same reason, a
MAB for which arms can be arbitrary non-stationary processes is
powerful enough to capture MDPs and even partially observable MDPs
as special cases, and it is thus important to jointly address the
issue of non-stationarity together with that of structure. In
order to advance these two nested challenges from a solid
theoretical standpoint, we intend to focus on the following
objectives: <i>(i)</i> To broaden the range of optimal
strategies for stationary MABs: current strategies are only known
to be provably optimal in a limited range of scenarios for which
the class of distribution (structure) is perfectly known; also,
recent heuristics possibly adaptive to the class need to be
further analyzed. <i>(ii)</i> To strengthen the literature on
pure sequential prediction (focusing on a single arm) for
non-stationary signals via the construction of adaptive confidence
sets and a novel measure of complexity: traditional approaches
consider a worst-case scenario and are thus overly conservative
and non-adaptive to simpler signals. <i>(iii)</i> To embed the
low-rank matrix completion and spectral methods in the context of
reinforcement learning, and further study models of structured
environments: promising heuristics in the context of
e.g. contextual MABs or Predictive State Representations require
stronger theoretical guarantees.</p>
            <p>This project will result in the development of a novel generation
of strategies to handle non-stationarity and structure that will
be evaluated in a number of test beds and validated by a rigorous
theoretical analysis. Beyond the significant advancement of the
state of the art in MAB and RL theory and the mathematical value
of the program, this JCJC BADASS is expected to strategically
impact societal and industrial applications, ranging from
personalized health-care and e-learning to computational
sustainability or rain-adaptive river-bank management to cite a
few.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid105" level="2">
        <bodyTitle>ANR ExTra-Learn</bodyTitle>
        <participants>
          <person key="sequel-2014-idm26088">
            <firstname>Alessandro</firstname>
            <lastname>Lazaric</lastname>
          </person>
          <person key="sequel-2014-idp76928">
            <firstname>Jérémie</firstname>
            <lastname>Mary</lastname>
          </person>
          <person key="sequel-2014-idp70232">
            <firstname>Michal</firstname>
            <lastname>Valko</lastname>
          </person>
        </participants>
        <simplelist>
          <li id="uid106">
            <p noindent="true"><i>Title</i>: Extraction and Transfer of Knowledge in Reinforcement Learning</p>
          </li>
          <li id="uid107">
            <p noindent="true"><i>Type</i>: National Research Agency (ANR-9011)</p>
          </li>
          <li id="uid108">
            <p noindent="true"><i>Coordinator</i>: Inria Lille (A. Lazaric)</p>
          </li>
          <li id="uid109">
            <p noindent="true"><i>Duration</i>: 2014-2018</p>
          </li>
          <li id="uid110">
            <p noindent="true"><i>Abstract</i>: ExTra-Learn is directly motivated by the
evidence that one of the key features that allows humans to
accomplish complicated tasks is their ability of building
knowledge from past experience and transfer it while learning new
tasks. We believe that integrating transfer of learning in machine
learning algorithms will dramatically improve their learning
performance and enable them to solve complex tasks. We identify in
the reinforcement learning (RL) framework the most suitable
candidate for this integration. RL formalizes the problem of
learning an optimal control policy from the experience directly
collected from an unknown environment. Nonetheless, practical
limitations of current algorithms encouraged research to focus on
how to integrate prior knowledge into the learning
process. Although this improves the performance of RL algorithms,
it dramatically reduces their autonomy. In this project we pursue
a paradigm shift from designing RL algorithms incorporating prior
knowledge, to methods able to incrementally discover, construct,
and transfer “prior” knowledge in a fully automatic way. More in
detail, three main elements of RL algorithms would significantly
benefit from transfer of knowledge. <i>(i)</i> For every new
task, RL algorithms need exploring the environment for a long
time, and this corresponds to slow learning processes for large
environments. Transfer learning would enable RL algorithms to
dramatically reduce the exploration of each new task by exploiting
its resemblance with tasks solved in the past. <i>(ii)</i> RL
algorithms evaluate the quality of a policy by computing its
state-value function. Whenever the number of states is too large,
approximation is needed. Since approximation may cause
instability, designing suitable approximation schemes is
particularly critical. While this is currently done by a domain
expert, we propose to perform this step automatically by
constructing features that incrementally adapt to the tasks
encountered over time. This would significantly reduce human
supervision and increase the accuracy and stability of RL
algorithms across different tasks. <i>(iii)</i> In order to
deal with complex environments, hierarchical RL solutions have
been proposed, where state representations and policies are
organized over a hierarchy of subtasks. This requires a careful
definition of the hierarchy, which, if not properly constructed,
may lead to very poor learning performance. The ambitious goal of
transfer learning is to automatically construct a hierarchy of
skills, which can be effectively reused over a wide range of
similar tasks.</p>
          </li>
          <li id="uid111">
            <p noindent="true"><i>Activity Report</i>: Research in ExTra-Learn continued in
investigating how knowledge can be transferred into reinforcement
learning algorithms to improve their performance. Pierre-Victor
Chaumier did a 4 months internship in SequeL studying how to
perform transfer neural networks across different games in the
Atari platform. Unfortunately, the preliminary results we obtained
were not very positive. We investigated different transfer models,
from basic transfer of a fully trained network, to co-train over
multiple games and retrain with initialization from a previous
network. In most of the cases, the improvement from transfer was
rather limited and in some cases even negative transfer effects
appeared. This seems to be intrinsic in the neural network
architecture which tends to overfit on one single task and it
poorly generlizes over alternative tasks. Another activity was
related to the study of macro-actions in RL. We proved for the
first time under which conditions macro-actions can actually
improve the learning speed of an RL exploration-exploitation
algorithm. This is the first step towards the automatic
identification and construction of useful macro-actions across
multiple tasks.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid112" level="2">
        <bodyTitle>ANR KEHATH</bodyTitle>
        <participants>
          <person key="sequel-2014-idp111080">
            <firstname>Olivier</firstname>
            <lastname>Pietquin</lastname>
          </person>
          <person key="sequel-2014-idp99936">
            <firstname>Alexandre</firstname>
            <lastname>Bérard</lastname>
          </person>
        </participants>
        <simplelist>
          <li id="uid113">
            <p noindent="true"><i>Acronym</i>: KEHATH</p>
          </li>
          <li id="uid114">
            <p noindent="true"><i>Title</i>: Advanced Quality Methods for Post-Edition of Machine Translation</p>
          </li>
          <li id="uid115">
            <p noindent="true"><i>Type</i>: ANR</p>
          </li>
          <li id="uid116">
            <p noindent="true"><i>Coordinator</i>: Lingua &amp; Machina</p>
          </li>
          <li id="uid117">
            <p noindent="true"><i>Duration</i>: 2014-2017</p>
          </li>
          <li id="uid118">
            <p noindent="true"><i>Other partners</i>: Univ. Lille 1, Laboratoire d'Informatique de Grenoble (LIG)</p>
          </li>
          <li id="uid119">
            <p noindent="true"><i>Abstract</i>: The translation community has seen a major
change over the last five years. Thanks to progress in the
training of statistical machine translation engines on corpora of
existing translations, machine translation has become good enough
so that it has become advantageous for translators to post-edit
machine outputs rather than translate from scratch. However,
current enhancement of machine translation (MT) systems from human
post-edition (PE) are rather basic: the post-edited output is
added to the training corpus and the translation model and
language model are re-trained, with no clear view of how much has
been improved and how much is left to be improved. Moreover, the
final PE result is the only feedback used: available technologies
do not take advantages of logged sequences of post-edition
actions, which inform on the cognitive processes of the
post-editor. The KEHATH project intends to address these issues
in two ways. Firstly, we will optimise advanced machine learning
techniques in the MT+PE loop. Our goal is to boost the impact of
PE, that is, reach the same performance with less PE or better
performance with the same amount of PE. In other words, we want to
improve machine translation learning curves. For this purpose,
active learning and reinforcement learning techniques will be
proposed and evaluated. Along with this, we will have to face
challenges such as MT systems heterogeneity (statistical and/or
rule-based), and ML scalability so as to improve domain-specific
MT. Secondly, since quality prediction (QP) on MT outputs is
crucial for translation project managers, we will implement and
evaluate in real-world conditions several confidence estimation
and error detection techniques previously developed at a
laboratory scale. A shared concern will be to work on continuous
domain-specific data flows to improve both MT and the performance
of indicators for quality prediction. The overall goal of the
KEHATH project is straightforward: gain additional machine
translation performance as fast as possible in each and every new
industrial translation project, so that post-edition time and cost
is drastically reduced. Basic research is the best way to reach
this goal, for an industrial impact that is powerful and
immediate.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid120" level="2">
        <bodyTitle>PEPS Project BIO</bodyTitle>
        <participants>
          <person key="dyogene-2014-idp72312">
            <firstname>Émilie</firstname>
            <lastname>Kaufmann</lastname>
          </person>
          <person key="sequel-2016-idp147888">
            <firstname>Lilian</firstname>
            <lastname>Besson</lastname>
          </person>
        </participants>
        <simplelist>
          <li id="uid121">
            <p noindent="true"><i>Title</i>: Bandits pour l'Internet des Objets</p>
          </li>
          <li id="uid122">
            <p noindent="true"><i>Type</i>: CNRS PEPS project</p>
          </li>
          <li id="uid123">
            <p noindent="true"><i>Coordinator</i>: CNRS (E. Kaufmann)</p>
          </li>
          <li id="uid124">
            <p noindent="true"><i>Duration</i>: april-december 2017</p>
          </li>
          <li id="uid125">
            <p noindent="true"><i>Abstract</i>: (in French)
Dans le but d’améliorer le qualité et de minimiser les coûts énergétiques des communications entre les objets communicants et leurs stations de base, nous cherchons dans ce projet à adapter les avancées récentes du domaine de la radio intelligente à la spécificité des communications de type Internet des Objets. Vu l’engorgement du spectre fréquentiel, il est nécessaire pour ces objets d’apprendre à détecter de manière adaptative quand et sur quelle fréquence communiquer. Nous proposons pour cette tâche l’utilisation d’algorithmes dits de bandit à plusieurs bras, déjà connus dans le contexte de la radio intelligente, mais pas toujours adaptés à la spécificité des communications pour l’Internet des Objets. Nous introduirons de nouveaux algorithmes de bandit multi-joueurs, traduisant la coordination nécessaire entre les multiples objets en plus de l’apprentissage de la qualité des canaux fréquentiel. Ensuite nous envisagerons une nouvelle modélisation, de type bandit adversarial, pour décrire les communications dans des standards comme LoRa où les objets reçoivent des messages de confirmation des stations de bases, conduisant à des algorithmes minimisant la latence de ces communications.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid126" level="2">
        <bodyTitle>National Partners</bodyTitle>
        <simplelist>
          <li id="uid127">
            <p noindent="true">ENS Paris-Saclay</p>
            <simplelist>
              <li id="uid128">
                <p noindent="true">M. Valko collaborated with V. Perchet on structured bandit problem. They co-supervise a PhD student (P. Perrault) together.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid129">
            <p noindent="true">Institut de Mathématiques de Toulouse</p>
            <simplelist>
              <li id="uid130">
                <p noindent="true">E. Kaufmann collaborated with Aurélien Garivier on sequential testing and structured bandit problems.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid131">
            <p noindent="true">CentraleSupélec Rennes</p>
            <simplelist>
              <li id="uid132">
                <p noindent="true">E. Kaufmann co-advises Lilian Besson, who works at CentraleSupélec with Christophe Moy. Christophe, Lilian and Émilie worked together on a PEPS project about bandits for Internet Of Things. One paper was published to the CROWNCOM conference, and another has been submitted to the ALT conference.</p>
              </li>
            </simplelist>
          </li>
        </simplelist>
      </subsection>
    </subsection>
    <subsection id="uid133" level="1">
      <bodyTitle>European Initiatives</bodyTitle>
      <subsection id="uid134" level="2">
        <bodyTitle>FP7 &amp; H2020 Projects</bodyTitle>
        <subsection id="uid135" level="3">
          <bodyTitle>H2020 BabyRobot</bodyTitle>
          <sanspuceslist>
            <li id="uid136">
              <p noindent="true">Program: H2020</p>
            </li>
            <li id="uid137">
              <p noindent="true">Project acronym: BabyRobot</p>
            </li>
            <li id="uid138">
              <p noindent="true">Project title: Child-Robot Communication and Collaboration</p>
            </li>
            <li id="uid139">
              <p noindent="true">Duration: 01/2016 - 12/2018</p>
            </li>
            <li id="uid140">
              <p noindent="true">Coordinator: Alexandros Potamianos (Athena Research and Innovation Center in Information Communication and Knowledge Technologies, Greece)</p>
            </li>
            <li id="uid141">
              <p noindent="true">Other partners: Institute of Communication and Computer Systems
(Greece), The University of Hertfordshire Higher Education
Corporation (UK), Universitaet Bielefeld (Germany), Kunlgliga
Tekniska Hoegskolan (Sweden), Blue Ocean Robotics ApS (Denmark),
Univ. Lille (France), Furhat Robotics AB (Sweden)</p>
            </li>
            <li id="uid142">
              <p noindent="true">Abstract: The crowning achievement of human communication is our
unique ability to share intentionality, create and execute on joint
plans. Using this paradigm we model human-robot communication as a
three step process: sharing attention, establishing common ground
and forming shared goals. Prerequisites for successful communication
are being able to decode the cognitive state of people around us
(mindreading) and building trust. Our main goal is to create robots
that analyze and track human behavior over time in the context of
their surroundings (situational) using audio-visual monitoring in
order to establish common ground and mind-reading capabilities. On
BabyRobot we focus on the typically developing and autistic spectrum
children user population. Children have unique communication skills,
are quick and adaptive learners, eager to embrace new robotic
technologies. This is especially relevant for special eduation where
the development of social skills is delayed or never fully develops
without intervention or therapy. Thus our second goal is to define,
implement and evaluate child-robot interaction application scenarios
for developing specific socio-affective, communication and
collaboration skills in typically developing and autistic spectrum
children. We will support not supplant the therapist or educator,
working hand-inhand to create a low risk environment for learning
and cognitive development. Breakthroughs in core robotic
technologies are needed to support this research mainly in the areas
of motion planning and control in constrained spaces, gestural
kinematics, sensorimotor learning and adaptation. Our third goal is
to push beyond the state-of-the-art in core robotic technologies to
support natural human-robot interaction and collaboration for
edutainment and healthcare applications. Creating robots that can
establish communication protocols and form collaboration plans on
the fly will have impact beyond the application scenarios
investigated here.</p>
            </li>
          </sanspuceslist>
        </subsection>
        <subsection id="uid143" level="3">
          <bodyTitle>CHIST-ERA DELTA</bodyTitle>
          <participants>
            <person key="sequel-2014-idp70232">
              <firstname>Michal</firstname>
              <lastname>Valko</lastname>
            </person>
            <person key="dyogene-2014-idp72312">
              <firstname>Émilie</firstname>
              <lastname>Kaufmann</lastname>
            </person>
          </participants>
          <sanspuceslist>
            <li id="uid144">
              <p noindent="true">Program: CHIST-ERA</p>
            </li>
            <li id="uid145">
              <p noindent="true">Project acronym: DELTA</p>
            </li>
            <li id="uid146">
              <p noindent="true">Project title: Dynamically Evolving Long-Term Autonomy</p>
            </li>
            <li id="uid147">
              <p noindent="true">Duration: October 2017 - December 2021</p>
            </li>
            <li id="uid148">
              <p noindent="true">Coordinator: Anders Jonsson (PI)</p>
            </li>
            <li id="uid149">
              <p noindent="true">Inria coPI: Michal Valko</p>
            </li>
            <li id="uid150">
              <p noindent="true">Other partners: UPF Spain, MUL Austria, ULG Belgium</p>
            </li>
            <li id="uid151">
              <p noindent="true">Abstract: Many complex autonomous systems (e.g., electrical
distribution networks) repeatedly select actions with the aim of
achieving a given objective. Reinforcement learning (RL) offers a
powerful framework for acquiring adaptive behaviour in this setting,
associating a scalar reward with each action and learning from
experience which action to select to maximise long-term
reward. Although RL has produced impressive results recently (e.g.,
achieving human-level play in Atari games and beating the human
world champion in the board game Go), most existing solutions only
work under strong assumptions: the environment model is stationary,
the objective is fixed, and trials end once the objective is met.
The aim of this project is to advance the state of the art of
fundamental research in lifelong RL by developing several novel RL
algorithms that relax the above assumptions. The new algorithms
should be robust to environmental changes, both in terms of the
observations that the system can make and the actions that the
system can perform. Moreover, the algorithms should be able to
operate over long periods of time while achieving different
objectives. The proposed algorithms will address three key problems
related to lifelong RL: planning, exploration, and task
decomposition. Planning is the problem of computing an action
selection strategy given a (possibly partial) model of the task at
hand. Exploration is the problem of selecting actions with the aim
of mapping out the environment rather than achieving a particular
objective. Task decomposition is the problem of defining different
objectives and assigning a separate action selection strategy to
each. The algorithms will be evaluated in two realistic scenarios:
active network management for electrical distribution networks, and
microgrid management. A test protocol will be developed to evaluate
each individual algorithm, as well as their combinations.</p>
            </li>
          </sanspuceslist>
        </subsection>
        <subsection id="uid152" level="3">
          <bodyTitle>CHIST-ERA IGLU</bodyTitle>
          <sanspuceslist>
            <li id="uid153">
              <p noindent="true">Program: CHIST-ERA</p>
            </li>
            <li id="uid154">
              <p noindent="true">Project acronym: IGLU</p>
            </li>
            <li id="uid155">
              <p noindent="true">Project title: Interactively Grounded Language Understanding</p>
            </li>
            <li id="uid156">
              <p noindent="true">Duration: 11/2015 - 10/2018</p>
            </li>
            <li id="uid157">
              <p noindent="true">Coordinator: Jean Rouat (Université de Sherbrooke, Canada)</p>
            </li>
            <li id="uid158">
              <p noindent="true">Other partners: UMONS (Belgique), Inria (France), Univ-Lille (France), KTH (sweden), Universidad de Zaragoza (Spain)</p>
            </li>
            <li id="uid159">
              <p noindent="true">Abstract: Language is an ability that develops in young children
through joint interaction with their caretakers and their physical
environment. At this level, human language understanding could be
referred as interpreting and expressing semantic concepts
(e.g. objects, actions and relations) through what can be perceived
(or inferred) from current context in the environment. Previous work
in the field of artificial intelligence has failed to address the
acquisition of such perceptually-grounded knowledge in virtual
agents (avatars), mainly because of the lack of physical embodiment
(ability to interact physically) and dialogue, communication skills
(ability to interact verbally). We believe that robotic agents are
more appropriate for this task, and that interaction is a so
important aspect of human language learning and understanding that
pragmatic knowledge (identifying or conveying intention) must be
present to complement semantic knowledge. Through a developmental
approach where knowledge grows in complexity while driven by
multimodal experience and language interaction with a human, we
propose an agent that will incorporate models of dialogues, human
emotions and intentions as part of its decision-making process. This
will lead anticipation and reaction not only based on its internal
state (own goal and intention, perception of the environment), but
also on the perceived state and intention of the human
interactant. This will be possible through the development of
advanced machine learning methods (combining developmental, deep and
reinforcement learning) to handle large-scale multimodal inputs,
besides leveraging state-of-the-art technological components
involved in a language-based dialog system available within the
consortium. Evaluations of learned skills and knowledge will be
performed using an integrated architecture in a culinary use-case,
and novel databases enabling research in grounded human language
understanding will be released.</p>
            </li>
          </sanspuceslist>
        </subsection>
      </subsection>
    </subsection>
    <subsection id="uid160" level="1">
      <bodyTitle>International Initiatives</bodyTitle>
      <subsection id="uid161" level="2">
        <bodyTitle>With CWI</bodyTitle>
        <sanspuceslist>
          <li id="uid162">
            <p noindent="true">Title: Non-parametric sequential prediction project</p>
            <sanspuceslist>
              <li id="uid163">
                <p noindent="true">Centrum Wiskunde &amp; Informatica (CWI), Amsterdam (NL) - Peter Grünwald</p>
              </li>
            </sanspuceslist>
          </li>
          <li id="uid164">
            <p noindent="true">Duration: 2016 - 2018</p>
          </li>
          <li id="uid165">
            <p noindent="true">Start year: 2016</p>
          </li>
          <li id="uid166">
            <p noindent="true">Abstract: The aim is to develop the theory of learning for sequential decision making under uncertainty problems.</p>
            <p>In 2017, this collaboration involved D. Ryabko, É. Kaufmann, J. Ridgway, M. Valko, O. Maillard. A post-doc funded by Inria has been recruited in Fall 2016.</p>
          </li>
          <li id="uid167">
            <p noindent="true">
              <ref xlink:href="https://project.inria.fr/inriacwi/projects/non-parametric-sequential-prediction-project/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>project.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>inriacwi/<allowbreak/>projects/<allowbreak/>non-parametric-sequential-prediction-project/</ref>
            </p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid168" level="2">
        <bodyTitle>
          <ref xlink:href="https://project.inria.fr/eduband/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">EduBand</ref>
        </bodyTitle>
        <sanspuceslist>
          <li id="uid169">
            <p noindent="true">Title: Educational Bandits</p>
          </li>
          <li id="uid170">
            <p noindent="true">International Partner (Institution - Laboratory - Researcher):</p>
            <sanspuceslist>
              <li id="uid171">
                <p noindent="true">Carnegie Mellon University (United States)
- Department of Computer Science, Theory of computation lab - Emma Brunskill</p>
              </li>
            </sanspuceslist>
          </li>
          <li id="uid172">
            <p noindent="true">Start year: 2015</p>
          </li>
          <li id="uid173">
            <p noindent="true">See also: <ref xlink:href="https://project.inria.fr/eduband/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>project.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>eduband/</ref></p>
          </li>
          <li id="uid174">
            <p noindent="true">Education can transform an individual's capacity and the
opportunities available to him. The proposed collaboration will
build on and develop novel machine learning approaches towards
enhancing (human) learning. Massive open online classes (MOOCs) are
enabling many more people to access education, but mostly operate
using status quo teaching methods. Even more important than access
is the opportunity for online software to radically improve the
efficiency, engagement and effectiveness of education. Existing
intelligent tutoring systems (ITSs) have had some promising
successes, but mostly rely on learning sciences research to
construct hand-built strategies for automated teaching. Online
systems make it possible to actively collect substantial amount of
data about how people learn, and offer a huge opportunity to
substantially accelerate progress in improving education. An
essential aspect of teaching is providing the right learning
experience for the student, but it is often unknown a priori
exactly how this should be achieved. This challenge can often be
cast as an instance of decision-making under uncertainty. In
particular, prior work by Brunskill and colleagues demonstrated
that reinforcement learning (RL) and multi-arm bandit (MAB) can be
very effective approaches to solve the problem of automated
teaching. The proposed collaboration is thus intended to explore
the potential interactions of the fields of online education and RL
and MAB. On the one hand, we will define novel RL and MAB settings
and problems in online education. On the other hand, we will
investigate how solutions developed in RL and MAB could be
integrated in ITS and MOOCs and improve their effectiveness.</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid175" level="2">
        <bodyTitle>Allocate</bodyTitle>
        <participants>
          <person key="sequel-2017-idp188720">
            <firstname>Pierre</firstname>
            <lastname>Perrault</lastname>
          </person>
          <person key="sequel-2017-idp193632">
            <firstname>Julien</firstname>
            <lastname>Seznec</lastname>
          </person>
          <person key="sequel-2014-idp70232">
            <firstname>Michal</firstname>
            <lastname>Valko</lastname>
          </person>
          <person key="dyogene-2014-idp72312">
            <firstname>Émilie</firstname>
            <lastname>Kaufmann</lastname>
          </person>
          <person key="tao-2015-idp83360">
            <firstname>Odalric</firstname>
            <lastname>Maillard</lastname>
          </person>
        </participants>
        <sanspuceslist>
          <li id="uid176">
            <p noindent="true">Title: Adaptive allocation of resources for recommender systems</p>
          </li>
          <li id="uid177">
            <p noindent="true">Inria contact: Michal Valko</p>
          </li>
          <li id="uid178">
            <p noindent="true">International Partner (Institution - Laboratory - Researcher):</p>
            <sanspuceslist>
              <li id="uid179">
                <p noindent="true">Univertät Potsdam, Germany
A. Carpentier</p>
              </li>
            </sanspuceslist>
          </li>
          <li id="uid180">
            <p noindent="true">Start year: 2017</p>
          </li>
          <li id="uid181">
            <p noindent="true">We plan to improve a practical scenario of <i>resource
allocation in market surveys</i>, such as product appraisals and music
recommendation. In practice, the market is typically divided into
segments: geographic regions, age groups, ...These groups are
then queried for preference with some fixed rule of a number of
queries per group. This testing is <i>costly and
non-adaptive</i>. The reason is some groups are easier to estimate
than others, but this is impossible to know a priori. Our challenge
is <b>adaptively allocate the optimal number of samples</b> to
each group and improve the efficient of market studies, by
providing <i>sample-efficient</i> solutions.</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid182" level="2">
        <bodyTitle>Informal International Partners</bodyTitle>
        <p>
          <b>Adobe Research</b>
        </p>
        <sanspuceslist>
          <li id="uid183">
            <p noindent="true">Branislav Kveton <i>Collaborator</i></p>
          </li>
          <li id="uid184">
            <p noindent="true">Zheng Wen <i>Collaborator</i></p>
          </li>
          <li id="uid185">
            <p noindent="true">Sharan Vaswani <i>Collaborator</i></p>
          </li>
          <li id="uid186">
            <p noindent="true">M. Valko collaborated with Adobe Research on online influence maximization in social networks. This led to a publication in NIPS 2017.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>Massachusetts Institute of Technology</b>
        </p>
        <sanspuceslist>
          <li id="uid187">
            <p noindent="true">Victor-Emmanuel Brunel <i>Collaborator</i></p>
          </li>
          <li id="uid188">
            <p noindent="true">M. Valko collaborated with V.-E. Brunel on the estimation of low rank determinantal point processes useful for diverse recommender systems.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>Univertät Potsdam</b>
        </p>
        <sanspuceslist>
          <li id="uid189">
            <p noindent="true">Alexandra Carpentier <i>Collaborator</i></p>
          </li>
          <li id="uid190">
            <p noindent="true">M. Valko collaborated with A. Carpentier on adaptive estimation of the block-diagonal matrices with application to market segmentations.
This collaboration formalized in September 2017 by creating a north-european associate team.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>University of California, Berkeley</b>
        </p>
        <sanspuceslist>
          <li id="uid191">
            <p noindent="true">Victor Gabillon <i>Collaborator</i></p>
          </li>
          <li id="uid192">
            <p noindent="true">M. Valko collaborated with V. Gabillon on the sample complexities in unknown type of environments.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>University of Southern California</b>
        </p>
        <sanspuceslist>
          <li id="uid193">
            <p noindent="true">Haipeng Luo <i>Collaborator</i></p>
          </li>
          <li id="uid194">
            <p noindent="true">M. Valko collaborated with H. Luo on online submodular minimization.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>Adobe Research</b>
        </p>
        <sanspuceslist>
          <li id="uid195">
            <p noindent="true">Mohammad Ghavamzadeh <i>Collaborator</i></p>
          </li>
          <li id="uid196">
            <p noindent="true">A. Lazaric collaborated with Adobe Research on active learning for accurate estimation of linear models. This led to a publication in ICML 2017.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>Stanford University</b>
        </p>
        <sanspuceslist>
          <li id="uid197">
            <p noindent="true">Carlos Riquelme <i>Collaborator</i></p>
          </li>
          <li id="uid198">
            <p noindent="true">A. Lazaric collaborated with Carlos Riquelme on active learning for accurate estimation of linear models. This led to a publication in ICML 2017.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>Stanford University</b>
        </p>
        <sanspuceslist>
          <li id="uid199">
            <p noindent="true">Emma Brunskill <i>Collaborator</i></p>
          </li>
          <li id="uid200">
            <p noindent="true">A. Lazaric collaborated with Emma Brunskill on exploration-exploitation with options in reinforcement learning. This led to a publication in NIPS 2017.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>University of California, Irvine</b>
        </p>
        <sanspuceslist>
          <li id="uid201">
            <p noindent="true">Anima Anandkumar <i>Collaborator</i></p>
          </li>
          <li id="uid202">
            <p noindent="true">Kamyar Azzizade <i>Collaborator</i></p>
          </li>
          <li id="uid203">
            <p noindent="true">A. Lazaric collaborated with A. Anandkumar and K. Azzizade on exploration-exploitation with in reinforcement learning with state clustering. This led to a submission to AI&amp;Stats 2018.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>University of Leoben</b>
        </p>
        <sanspuceslist>
          <li id="uid204">
            <p noindent="true">Ronald Ortner <i>Collaborator</i></p>
          </li>
          <li id="uid205">
            <p noindent="true">A. Lazaric collaborated with R. Ortner on exploration-exploitation in reinforcement learning with regularized optimization. This will lead to a submission to ICML 2018.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>Politecnico di Milano</b>
        </p>
        <sanspuceslist>
          <li id="uid206">
            <p noindent="true">Marcello Restelli <i>Collaborator</i></p>
          </li>
          <li id="uid207">
            <p noindent="true">Matteo Pirotta collaborate with M. Restelli on several topics in reinforcement learning. This will lead to publications to ICML 2017 and NIPS 2017.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>Lancaster University</b>
        </p>
        <sanspuceslist>
          <li id="uid208">
            <p noindent="true">B. Balle <i>Collaborator</i></p>
          </li>
          <li id="uid209">
            <p noindent="true">O. Maillard collaborated on spectral learning of Hankel matrices. This led to a publication at ICML.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>Mila, Université de Montréal</b>
        </p>
        <sanspuceslist>
          <li id="uid210">
            <p noindent="true">A. Courville <i>Collaborator</i></p>
          </li>
          <li id="uid211">
            <p noindent="true">F. Strub and O. Pietquin collaborate on deep reinforcement learning for language acquisition. This led to several papers at IJCAI, CVPR, and NIPS, as well as the guesswhat?! dataset and protocol, and the HOME dataset.</p>
          </li>
        </sanspuceslist>
        <p>
          <b>Uberlandia University, Brasil</b>
        </p>
        <sanspuceslist>
          <li id="uid212">
            <p noindent="true">C. Felicio <i>Collaborator</i></p>
          </li>
          <li id="uid213">
            <p noindent="true">Ph. Preux supervises this PhD on recommendation systems. This led to the defense of C. Felicio and a paper at UMAP.</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid214" level="2">
        <bodyTitle>International Initiatives</bodyTitle>
        <sanspuceslist>
          <li id="uid215">
            <p noindent="true">
              <b>SequeL</b>
            </p>
          </li>
          <li id="uid216">
            <p noindent="true">Title: The multi-armed bandit problem</p>
          </li>
          <li id="uid217">
            <p noindent="true">International Partner (Institution - Laboratory - Researcher):</p>
            <sanspuceslist>
              <li id="uid218">
                <p noindent="true">University of Leoben (Austria)
Peter Auer</p>
              </li>
            </sanspuceslist>
          </li>
          <li id="uid219">
            <p noindent="true">Duration: 2014 - 2018</p>
          </li>
          <li id="uid220">
            <p noindent="true">Start year: 2014</p>
          </li>
          <li id="uid221">
            <p noindent="true">In a nutshell, the collaboration is focusing on nonparametric
algorithms for active learning problems, mainly involving
theoretical analysis of reinforcement learning and bandits problems
beyond the traditional settings of finite-state MDPs (for RL) or
i.i.d. rewards (for bandits). Peter Auer from University of Leoben
is a worldwide leader in the field, having introduced the UCB
approach around 2000, along with its finite-time analysis. Today,
SequeL is likely to be the largest research group working in this
field in the world, enjoying worldwide recognition. SequeL and
P. Auer's group have been collaborating for a couple of years now;
they have co-authored papers, visited each other (sabbatical stay,
post-doc), coorganized workshops; the STREP Complacs partially
funds this very active collaboration.</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid222" level="2">
        <bodyTitle>International Initiatives</bodyTitle>
        <sanspuceslist>
          <li id="uid223">
            <p noindent="true">
              <b>Contextual multi-armed bandits with hidden structure</b>
            </p>
          </li>
          <li id="uid224">
            <p noindent="true">Title: Contextual multi-armed bandits with hidden structure</p>
          </li>
          <li id="uid225">
            <p noindent="true">International Partner (Institution - Laboratory - Researcher):</p>
            <sanspuceslist>
              <li id="uid226">
                <p noindent="true">IISc Bangalore (India) – Aditya Gopalan</p>
              </li>
            </sanspuceslist>
          </li>
          <li id="uid227">
            <p noindent="true">Duration: 2015 - 2017</p>
          </li>
          <li id="uid228">
            <p noindent="true">Recent advances in Multi-Armed Bandit (MAB) theory have yielded
key insights into, and driven the design of applications in,
sequential decision making in stochastic dynamical systems. Notable
among these are recommender systems, which have benefited greatly
from the study of contextual MABs incorporating user-specific
information (the context) into the decision problem from a rigorous
theoretical standpoint. In the proposed initiative, the key
features of (a) sequential interaction between a learner and the
users, and (b) a relatively small number of interactions per user
with the system, motivate the goal of efficiently exploiting the
underlying collective structure of users. The state-of-the-art
lacks a wellgrounded strategy with provably near-optimal guarantees
for general, low-rank user structure. Combining expertise in the
foundations of MAB theory together with recent advances in spectral
methods and low-rank matrix completion, we target the first
provably near-optimal sequential low-rank MAB</p>
          </li>
        </sanspuceslist>
      </subsection>
    </subsection>
    <subsection id="uid229" level="1">
      <bodyTitle>International Research Visitors</bodyTitle>
      <subsection id="uid230" level="2">
        <bodyTitle>Visits of International Scientists</bodyTitle>
        <subsection id="uid231" level="3">
          <bodyTitle>Internships</bodyTitle>
          <simplelist>
            <li id="uid232">
              <p noindent="true">Harm de Vries, PhD stduent, University of Montreal, Canada, Jan-Jun 2017</p>
            </li>
            <li id="uid233">
              <p noindent="true">Mohammad Sadegh Talebi Mazraeh Shahi, PhD student, KTH Royal Institute of Technology, Sweden, Jun-Sep 2017</p>
            </li>
            <li id="uid234">
              <p noindent="true">Xuedong Shang, master student, ENS Rennes, Feb–Jun 2017</p>
            </li>
            <li id="uid235">
              <p noindent="true">Iuliia Olkhovskaia, master student, Moscow Institute of Physics and Technology, Russia, Feb–Jul 2017</p>
            </li>
            <li id="uid236">
              <p noindent="true">Georgios Papoudakis, master student, Aristotle University of Thessalnoniki, Greece, May–Sep 2017</p>
            </li>
            <li id="uid237">
              <p noindent="true">Subhojyoti Mukherjee, master student, Indian Institute of technology, Sep-Nov 2017</p>
            </li>
            <li id="uid238">
              <p noindent="true">Mahsa Asadi, Shiraz University, Iran, Sep-Dec 2017</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
    </subsection>
  </partenariat>
  <diffusion id="uid239">
    <bodyTitle>Dissemination</bodyTitle>
    <subsection id="uid240" level="1">
      <bodyTitle>Promoting Scientific Activities</bodyTitle>
      <subsection id="uid241" level="2">
        <bodyTitle>Scientific Events Organisation</bodyTitle>
        <simplelist>
          <li id="uid242">
            <p noindent="true"><i>Visually grounded interaction and language</i>, workshop at NIPS 2017, organized by Florian Strub, Harm de Vries, Abhishek Das, Satwik Kottur, Stefan Lee, Mateusz Malinowski, Olivier Pietquin, Devi Parikh, Dhruv Batra, Aaron C Courville, Jérémie Mary. URL: <ref xlink:href="https://nips.cc/Conferences/2017/Schedule?showEvent=8766" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>nips.<allowbreak/>cc/<allowbreak/>Conferences/<allowbreak/>2017/<allowbreak/>Schedule?showEvent=8766</ref></p>
          </li>
          <li id="uid243">
            <p noindent="true">O. Maillard: Workshop of the working group <i>Sequential Structured Statistical Learning</i>, May 17 2017 at Institut des Hautes Etudes Scientifiques (Bures-sur-Yvette). URL: <ref xlink:href="https://sites.google.com/site/groupedetravailsssl" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>sites.<allowbreak/>google.<allowbreak/>com/<allowbreak/>site/<allowbreak/>groupedetravailsssl</ref></p>
          </li>
        </simplelist>
        <subsection id="uid244" level="3">
          <bodyTitle>Member of the Conference Program Committees</bodyTitle>
          <p>Members of <span class="smallcap" align="left">SequeL</span> have been involved in the following program committees in 2017:</p>
          <simplelist>
            <li id="uid245">
              <p noindent="true">Senior PC for International Joint Conference on Artificial Intelligence (IJCAI 2017)</p>
            </li>
            <li id="uid246">
              <p noindent="true">Senior PC for ACM KDD 2017</p>
            </li>
            <li id="uid247">
              <p noindent="true">International Conference on Artificial Intelligence and Statistics (AI &amp; STATS 2017)</p>
            </li>
            <li id="uid248">
              <p noindent="true">PC member for the international Conference On Learning Theory (COLT 2017)</p>
            </li>
            <li id="uid249">
              <p noindent="true">European Conference on Machine Learning (ECML 2017)</p>
            </li>
            <li id="uid250">
              <p noindent="true">1st Workshop on Transfer in Reinforcement Learning (TiRL) 2017</p>
            </li>
            <li id="uid251">
              <p noindent="true">The Third International Conference on Machine Learning, Optimization and Big Data (MOD 2017)</p>
            </li>
            <li id="uid252">
              <p noindent="true">French conferences:</p>
              <simplelist>
                <li id="uid253">
                  <p noindent="true">Extraction et Gestion de Conaissances (EGC),</p>
                </li>
                <li id="uid254">
                  <p noindent="true">Journées Francophones de Planification, Décision, Apprentissage (JFPDA)</p>
                </li>
                <li id="uid255">
                  <p noindent="true">Journées de la Société Francophone de Classification (SFC)</p>
                </li>
                <li id="uid256">
                  <p noindent="true">Conférence francophone sur l'Apprentissage Automatique (CAp)</p>
                </li>
              </simplelist>
            </li>
          </simplelist>
        </subsection>
        <subsection id="uid257" level="3">
          <bodyTitle>Reviewer</bodyTitle>
          <p>Édouard Oyallon receives a “best NIPS reviewer award”.</p>
          <p>Members of <span class="smallcap" align="left">SequeL</span> have reviewed papers for the following conferences:</p>
          <simplelist>
            <li id="uid258">
              <p noindent="true">AI&amp;Stats, COLT, ECML, ICML, IJCAI, NIPS, ALT.</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid259" level="2">
        <bodyTitle>Journal</bodyTitle>
        <subsection id="uid260" level="3">
          <bodyTitle>Reviewer - Reviewing Activities</bodyTitle>
          <simplelist>
            <li id="uid261">
              <p noindent="true">Automatica</p>
            </li>
            <li id="uid262">
              <p noindent="true">IEEE Transactions on Pattern Analysis and Machine Intelligence - Journal Reviewer</p>
            </li>
            <li id="uid263">
              <p noindent="true">IEEE transaction on Software Engineering</p>
            </li>
            <li id="uid264">
              <p noindent="true">International Federation of Automatic Control</p>
            </li>
            <li id="uid265">
              <p noindent="true">Bernoulli Journal</p>
            </li>
            <li id="uid266">
              <p noindent="true">Journal of Machine Learning Research</p>
            </li>
            <li id="uid267">
              <p noindent="true">IEEE Transaction on Signal Processing</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid268" level="2">
        <bodyTitle>Invited Talks</bodyTitle>
        <simplelist>
          <li id="uid269">
            <p noindent="true">R. Gaudel,
<i>Recommendation as a Sequential Process</i>, Presented on Februaray 1st, 2017, at Séminaire CMLA, Paris, France
(<i>CMLA 2017</i>)</p>
          </li>
          <li id="uid270">
            <p noindent="true">R. Gaudel,
<i>Recommendation as a Sequential Process</i>, Presented on January 10th, 2017, at Séminaire ENSAI, Rennes (Bruz), France
(<i>ENSAI 2017</i>)</p>
          </li>
          <li id="uid271">
            <p noindent="true">A. Lazaric,
<i>Spectral Methods for Reinforcement Learning</i>, Presented on April 10, 2017, at Amazon, Berlin, Germany</p>
          </li>
          <li id="uid272">
            <p noindent="true">M. Valko,
<i>SequeL, graphs in ML, and online recommender systems</i>, Presented on November 9th, 2017 at Plateau Inria Euratechnologies in Lille, France
(<i>Euratechnologies 2017</i>)</p>
          </li>
          <li id="uid273">
            <p noindent="true">M. Valko,
<i>Sequential sampling for kernel matrix approximation and online learning</i>
Presented on September 19th, DeepMind, London, UK
(<i>DeepMind 2017</i>)</p>
          </li>
          <li id="uid274">
            <p noindent="true">M. Valko,
<i>Active learning on networks and online influence maximization</i>, Presented on September 18th, 2017, Decision Theory and Network Science: Methods and Applications, Lancaster, UK
(<i>STOR-i 2017</i>)</p>
          </li>
          <li id="uid275">
            <p noindent="true">M. Valko,
<i>Side observation in graph bandits</i>, Presented on July 11th, 2017, ICML 2017 workshop on Picky Learners, Sydney, Australia (<i>ICML 2017</i>)</p>
          </li>
          <li id="uid276">
            <p noindent="true">M. Valko,
<i>Distributed sequential sampling for kernel matrix approximation</i>, Presented on June 28th, 2017, L'Institut de Mathématiques de Toulouse, France
(<i>IMT 2017</i>)</p>
          </li>
          <li id="uid277">
            <p noindent="true">M. Valko,
<i>Online sequential solutions for recommender systems</i>, Presented on June 14th, 2017 at Journées Scientifiques Inria 2017 in Nice, France
(<i>JS 2017</i>)</p>
          </li>
          <li id="uid278">
            <p noindent="true">M. Valko,
<i>Where is Justin Bieber?</i>, Presented on March 30th, 2017 at Dating day in Lille, France
(<i>Dating 2017</i>)</p>
          </li>
          <li id="uid279">
            <p noindent="true">M. Valko,
<i>Distributed sequential sampling for kernel matrix approximation</i>, Presented on March 22nd, 2017, for Universität Potsdam at Amazon
(<i>Berlin 2017</i>)</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid280" level="2">
        <bodyTitle>Scientific Expertise</bodyTitle>
        <simplelist>
          <li id="uid281">
            <p noindent="true">É. Kaufmann was a member of the committee of Experts for Hiring junior faculty in the maths departement of Université of Lille 1</p>
          </li>
          <li id="uid282">
            <p noindent="true">J.Mary was a member of the industrial transfer commission of Inria Lille</p>
          </li>
          <li id="uid283">
            <p noindent="true">Alessandro Lazaric was reviewer for NSFC-ISF Research Grant</p>
          </li>
          <li id="uid284">
            <p noindent="true">Philippe Preux is a member of the evaluation committee and participates in the hiring, promotion, and evaluation juries of Inria:</p>
            <simplelist>
              <li id="uid285">
                <p noindent="true">Inria CR1 hiring committee</p>
              </li>
              <li id="uid286">
                <p noindent="true">Inria Lille CR2 hiring committee</p>
              </li>
              <li id="uid287">
                <p noindent="true">Inria committee for researcher promotion</p>
              </li>
              <li id="uid288">
                <p noindent="true">Inria committee for PEDR</p>
              </li>
            </simplelist>
          </li>
          <li id="uid289">
            <p noindent="true">Philippe Preux was a member of the hiring committees for 1 professor and 2 associate professors at the Université de Lille 3</p>
          </li>
          <li id="uid290">
            <p noindent="true">Philippe Preux was a member of the committee for PhD grant of the “Pôle Métropolitain de la Côte d'Opale”</p>
          </li>
          <li id="uid291">
            <p noindent="true">Philippe Preux reviewed a proposal for ANRT (and declined invitation from ANR)</p>
          </li>
          <li id="uid292">
            <p noindent="true">M. Valko is an elected member of the evaluation committee and participates in the hiring, promotion, and evaluation juries of Inria, notably</p>
            <simplelist>
              <li id="uid293">
                <p noindent="true">Hiring committee for junior researchers at Inria Saclay (2017)</p>
              </li>
              <li id="uid294">
                <p noindent="true">Inria work group for deontological ethics (2017)</p>
              </li>
              <li id="uid295">
                <p noindent="true">Selection committee for Inria award for scientific excellence of junior and confirmed researchers (2017)</p>
              </li>
            </simplelist>
          </li>
          <li id="uid296">
            <p noindent="true">M. Valko was a member national Inria acceptance committee for hiring junior researchers</p>
          </li>
          <li id="uid297">
            <p noindent="true">M. Valko was a member of the committee of Experts for Hiring junior faculty at CMLA, ENS Paris-Saclay</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid298" level="2">
        <bodyTitle>Research Administration</bodyTitle>
        <simplelist>
          <li id="uid299">
            <p noindent="true"><i>M. Gaudel</i> was member of the Board of CRIStAL.</p>
          </li>
          <li id="uid300">
            <p noindent="true">Philippe Preux is:</p>
            <simplelist>
              <li id="uid301">
                <p noindent="true">“délégué scientifique adjoint” of the Inria center in Lille</p>
              </li>
              <li id="uid302">
                <p noindent="true">member of the Inria evaluation committee (CE)</p>
              </li>
              <li id="uid303">
                <p noindent="true">member of the Inria internal scientific committee (COSI)</p>
              </li>
              <li id="uid304">
                <p noindent="true">member of the scientific committee of CRIStAL</p>
              </li>
              <li id="uid305">
                <p noindent="true">the head of the “Data Intelligence” thematic group at CRIStAL</p>
              </li>
            </simplelist>
          </li>
        </simplelist>
      </subsection>
    </subsection>
    <subsection id="uid306" level="1">
      <bodyTitle>Teaching - Supervision - Juries</bodyTitle>
      <subsection id="uid307" level="2">
        <bodyTitle>Teaching</bodyTitle>
        <sanspuceslist>
          <li id="uid308">
            <p noindent="true">Master: É. Kaufmann, 2017/2018 Fall: Machine Learning, 18h eq TD, M2 Maths/Finances, Université de Lille 1</p>
          </li>
          <li id="uid309">
            <p noindent="true">Master: É. Kaufmann, 2016/2017 Spring: Data Mining, 36h eq TD, M1 Maths/Finances, Université de Lille 1</p>
          </li>
          <li id="uid310">
            <p noindent="true">Master: A. Lazaric, 2017/2018 Fall: Reinforcement Learning, 36h eqTD, M2, ENS Cachan</p>
          </li>
          <li id="uid311">
            <p noindent="true">Master: M. Valko, 2017/2018 Fall: Graphs in Machine Learning, 36h eqTD, M2, ENS Cachan</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid312" level="2">
        <bodyTitle>Supervision</bodyTitle>
        <sanspuceslist>
          <li id="uid313">
            <p noindent="true">PhD in progress: Marc Abeille, Exploration-exploitation in reinforcement learning, started Sept. 2014, advisor: Remi Munos, Alessandro Lazaric</p>
          </li>
          <li id="uid314">
            <p noindent="true">PhD in progress: Merwan Barlier, Human-in-the loop reinforrcement learning for dialogue systems, started Oct. 2014, advisor: Olivier Pietquin</p>
          </li>
          <li id="uid315">
            <p noindent="true">PhD in progress: Alexandre Bérard, Deep learning for post-editing and automatic translation, started Oct. 2014, advisor: Olivier Pietquin</p>
          </li>
          <li id="uid316">
            <p noindent="true">PhD in progress: Lilian Besson, Bandit approach to improve Internet Of Things Communications, started Oct. 2016, advisor: Émilie Kaufmann, Christophe Moy (CentraleSupélec Rennes)</p>
          </li>
          <li id="uid317">
            <p noindent="true">PhD in progress: Daniele Calandriello, Efficient Sequential Learning in Structured and Constrained Environment, Inria, started Oct. 2014, advisor: Michal Valko, Alessandro Lazaric</p>
          </li>
          <li id="uid318">
            <p noindent="true">PhD in progress: Ronan Fruit, Exploration-exploitation in hierarchical reinforcement learning, Inria, started Dec. 2015, advisor: Daniil Ryabko, Alessandro Lazaric</p>
          </li>
          <li id="uid319">
            <p noindent="true">PhD in progress: Pratik Gajane, Multi-armed bandits with unconventional
feedback, started Oct. 2014, defended Nov. 14th 2017, advisor: Philippe Preux</p>
          </li>
          <li id="uid320">
            <p noindent="true">PhD in progress: Guillaume Gautier, DPPs in ML, started Oct. 2016, advisor: Michal Valko; Rémi Bardenet</p>
          </li>
          <li id="uid321">
            <p noindent="true">PhD in progress: Jean-Bastien Grill, Création et analyse d'algorithmes efficaces pour la prise de décision dans un environnement inconnu et incertain, Inria/ENS Paris/Lille 1, started Oct. 2014, advisor: Rémi Munos, Michal Valko</p>
          </li>
          <li id="uid322">
            <p noindent="true">PhD in progress: Édouard Leurent, Autonomous vehicle control: application of machine learning to contextualized path planning, started Oct. 2017, advisor: Odalric Maillard, Philippe Preux, Denis Effimov (NON-A), Wilfrid Perruquetti (NON-A)</p>
          </li>
          <li id="uid323">
            <p noindent="true">PhD in progress: Sheikh Waqas Akhtar, Bandits for non-stationarity and structure, started Oct. 2017, advisor: Odalric Maillard, Daniil Ryabko.</p>
          </li>
          <li id="uid324">
            <p noindent="true">PhD in progress: Julien Perolat, Reinforcement learning: the multi-player case, started Oct.  2014, advisor: Olivier Pietquin</p>
          </li>
          <li id="uid325">
            <p noindent="true">PhD in progress: Pierre Perrault, Online Learning on Streaming Graphs, started Sep. 2017, advisor: Michal Valko; Vianney Perchet</p>
          </li>
          <li id="uid326">
            <p noindent="true">PhD in progress: Mathieu Seurin, Multi-scale rewards in reinforcement learning, started Oct. 2017, advisor: Olivier Pietquin, Philippe Preux</p>
          </li>
          <li id="uid327">
            <p noindent="true">PhD in progress: Julien Seznec, Sequential Learning for Educational Systems, started Mar. 2017, advisor: Michal Valko; Alessandro Lazaric, Jonathan Banon</p>
          </li>
          <li id="uid328">
            <p noindent="true">PhD in progress: Xuedong Shang, Adaptive methods for optimization in stochastic environments, started Oct. 2017, advisor: Émilie Kaufmann, Michal Valko</p>
          </li>
          <li id="uid329">
            <p noindent="true">PhD in progress: Florian Strub, Reinforcement Learning for visually grounded interaction, started Jan. 2016, advisors: Olivier Pietquin and Jeremie Mary</p>
          </li>
          <li id="uid330">
            <p noindent="true">PhD in progress: Kiewan Villatel, Deep Learning for Conversion
Rate Prediction in Online Advertising, started Oct. 2017,
advisor: Philippe Preux</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid331" level="2">
        <bodyTitle>Juries</bodyTitle>
        <p>PhD and HDR juries:</p>
        <simplelist>
          <li id="uid332">
            <p noindent="true">É.  Kaufmann, <i>Navikumar Modi</i>, CentraleSupélec Rennes, May 2017</p>
          </li>
          <li id="uid333">
            <p noindent="true">A. Lazaric:</p>
            <simplelist>
              <li id="uid334">
                <p noindent="true"><i>Stefano Paladino</i>, Politecnico di Milano, Dec 2017</p>
              </li>
              <li id="uid335">
                <p noindent="true"><i>Micheal Castronovo</i>, Université de Liege, March 2017</p>
              </li>
              <li id="uid336">
                <p noindent="true"><i>Raffaello Camoriano</i>, Universitá di Genova, April 2017</p>
              </li>
              <li id="uid337">
                <p noindent="true"><i>Claire Vernade</i>, TelecomParis Tech, October 2017</p>
              </li>
            </simplelist>
          </li>
          <li id="uid338">
            <p noindent="true">Ph. Preux:</p>
            <simplelist>
              <li id="uid339">
                <p noindent="true">Cricia Zilda Felicio Paixao, Uniervity Uberlandia, Brasil</p>
              </li>
              <li id="uid340">
                <p noindent="true">Thibault Gisselbrecht, LIP 6, UPMC, Paris</p>
              </li>
              <li id="uid341">
                <p noindent="true">Pratik Gajane, CRIStAL, Lille</p>
              </li>
            </simplelist>
          </li>
          <li id="uid342">
            <p noindent="true">M. Valko: <i>Clément Bouttier</i>, Université Toulouse 3 Paul Sabatier, June 2017</p>
          </li>
        </simplelist>
        <p>PhD mid-term evaluation:</p>
        <simplelist>
          <li id="uid343">
            <p noindent="true">M. Valko: <i>Thibault Liétard</i>, Université Lille, September 2017</p>
          </li>
        </simplelist>
      </subsection>
    </subsection>
    <subsection id="uid344" level="1">
      <bodyTitle>Popularization</bodyTitle>
      <simplelist>
        <li id="uid345">
          <p noindent="true">CNRS publishes an article about zonotope sampling presented at ICML (see <ref xlink:href="http://www.cnrs.fr/ins2i/spip.php?article2633" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>cnrs.<allowbreak/>fr/<allowbreak/>ins2i/<allowbreak/>spip.<allowbreak/>php?article2633</ref>).</p>
        </li>
        <li id="uid346">
          <p noindent="true">Julien Seznec publishes an article in <i>Les Echos</i> that discusses ML for education (November 2017).</p>
        </li>
        <li id="uid347">
          <p noindent="true">Émilie Kaufmann gave a popularization talk about bandit algorithms aimed at high school/prepa students at the MathPark seminar, organized at IHP in Paris (April 2017).</p>
        </li>
        <li id="uid348">
          <p noindent="true"><i>Avec GuessWhat?! quand l’humain joue, l’ordinateur s’initie au langage</i>, <ref xlink:href="https://www.inria.fr/centre/lille/actualites/avec-guesswhat-!-quand-l-humain-joue-l-ordinateur-s-initie-au-langage" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>www.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>centre/<allowbreak/>lille/<allowbreak/>actualites/<allowbreak/>avec-guesswhat-!-quand-l-humain-joue-l-ordinateur-s-initie-au-langage</ref></p>
        </li>
        <li id="uid349">
          <p noindent="true">Florian Strub and Mathieu Seurin demonstrated guesswhat?! during the celebrations of Inria 50th anniversary (November 2017).</p>
        </li>
        <li id="uid350">
          <p noindent="true">Philippe Preux:</p>
          <simplelist>
            <li id="uid351">
              <p noindent="true">interviewed for an article on <i>L’intelligence artificielle, est-ce vraiment de l’intelligence ?</i> in <i>BioTech.info</i>, Jan. 2017.</p>
            </li>
            <li id="uid352">
              <p noindent="true">participates to a debate about Artificial Intelligence, as part of the franceIA tour (Euratechnologies, Lille).</p>
            </li>
            <li id="uid353">
              <p noindent="true">interview by AFP in relation to alphaGo.</p>
            </li>
            <li id="uid354">
              <p noindent="true">interviewed for an article on AI and games, published in <i>Le figaro</i>.</p>
            </li>
            <li id="uid355">
              <p noindent="true">an interview that led to a publication in ATOS Connexion, the ATOS internal journal.</p>
            </li>
            <li id="uid356">
              <p noindent="true">a video has been made with him being interviewed on Artificial Intelligence by NordEka (to be available on youtube).</p>
            </li>
            <li id="uid357">
              <p noindent="true">has been selected to be protrayed at the “Soirée partenaires de l'université de Lille”, Nov.</p>
            </li>
            <li id="uid358">
              <p noindent="true">was a member of the organization comittee of the celebrations of the 50th Inria anniversary in Lille.</p>
            </li>
            <li id="uid359">
              <p noindent="true">co-organizes a meet-up on big data and machine learning at Inria.</p>
            </li>
          </simplelist>
        </li>
        <li id="uid360">
          <p noindent="true">M. Valko,
<i>Comment maximiser la détection des influenceurs sur les réseaux sociaux ?</i>, popularization talk, Presented on May 30th, 2017 at 13 France
(<i>Inria 13:45 2017</i>)</p>
        </li>
      </simplelist>
    </subsection>
  </diffusion>
  <biblio id="bibliography" html="bibliography" numero="10" titre="Bibliography">
    
    <biblStruct id="sequel-2017-bid71" type="article" rend="refer" n="refercite:cappe:hal-00738209">
      <identifiant type="hal" value="hal-00738209"/>
      <analytic>
        <title level="a">Kullback-Leibler Upper Confidence Bounds for Optimal Sequential Allocation</title>
        <author>
          <persName>
            <foreName>Olivier</foreName>
            <surname>Cappé</surname>
            <initial>O.</initial>
          </persName>
          <persName>
            <foreName>Aurélien</foreName>
            <surname>Garivier</surname>
            <initial>A.</initial>
          </persName>
          <persName key="tao-2015-idp83360">
            <foreName>Odalric-Ambrym</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Gilles</foreName>
            <surname>Stoltz</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Annals of Statistics</title>
        <imprint>
          <biblScope type="volume">41</biblScope>
          <biblScope type="number">3</biblScope>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">1516-1541</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-00738209" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-00738209</ref>
        </imprint>
      </monogr>
      <note type="bnote">Accepted, to appear in Annals of Statistics</note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid64" type="inproceedings" rend="refer" n="refercite:carpentier:hal-01304020">
      <identifiant type="hal" value="hal-01304020"/>
      <analytic>
        <title level="a">Revealing graph bandits for maximizing local influence</title>
        <author>
          <persName>
            <foreName>Alexandra</foreName>
            <surname>Carpentier</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">International Conference on Artificial Intelligence and Statistics</title>
        <loc>Seville, Spain</loc>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01304020" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01304020</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid62" type="inproceedings" rend="refer" n="refercite:devries:hal-01648683">
      <identifiant type="hal" value="hal-01648683"/>
      <analytic>
        <title level="a">Modulating early visual processing by language</title>
        <author>
          <persName>
            <foreName>Harm</foreName>
            <surname>De Vries</surname>
            <initial>H.</initial>
          </persName>
          <persName key="sequel-2015-idp110544">
            <foreName>Florian</foreName>
            <surname>Strub</surname>
            <initial>F.</initial>
          </persName>
          <persName key="sequel-2014-idp76928">
            <foreName>Jérémie</foreName>
            <surname>Mary</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Hugo</foreName>
            <surname>Larochelle</surname>
            <initial>H.</initial>
          </persName>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
          <persName>
            <foreName>Aaron</foreName>
            <surname>Courville</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Conference on Neural Information Processing Systems</title>
        <loc>Long Beach, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01648683" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01648683</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid68" type="article" rend="refer" n="refercite:gatti:hal-01237670">
      <identifiant type="hal" value="hal-01237670"/>
      <analytic>
        <title level="a">Truthful Learning Mechanisms for Multi–Slot Sponsored Search Auctions with Externalities</title>
        <author>
          <persName>
            <foreName>Nicola</foreName>
            <surname>Gatti</surname>
            <initial>N.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Marco</foreName>
            <surname>Rocco</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Francesco</foreName>
            <surname>Trovò</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Artificial Intelligence</title>
        <imprint>
          <biblScope type="volume">227</biblScope>
          <dateStruct>
            <month>October</month>
            <year>2015</year>
          </dateStruct>
          <biblScope type="pages">93-139</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01237670" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01237670</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid65" type="article" rend="refer" n="refercite:ghavamzadeh:hal-00776608">
      <identifiant type="hal" value="hal-00776608"/>
      <analytic>
        <title level="a">Bayesian Policy Gradient and Actor-Critic Algorithms</title>
        <author>
          <persName key="sequel-2014-idp65928">
            <foreName>Mohammad</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Yaakov</foreName>
            <surname>Engel</surname>
            <initial>Y.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Journal of Machine Learning Research</title>
        <imprint>
          <biblScope type="volume">17</biblScope>
          <biblScope type="number">66</biblScope>
          <dateStruct>
            <month>January</month>
            <year>2016</year>
          </dateStruct>
          <biblScope type="pages">1-53</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-00776608" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00776608</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid67" type="article" rend="refer" n="refercite:kadri:hal-01221329">
      <identifiant type="hal" value="hal-01221329"/>
      <analytic>
        <title level="a">Operator-valued Kernels for Learning from Functional Response Data</title>
        <author>
          <persName>
            <foreName>Hachem</foreName>
            <surname>Kadri</surname>
            <initial>H.</initial>
          </persName>
          <persName key="sequel-2014-idp74216">
            <foreName>Emmanuel</foreName>
            <surname>Duflos</surname>
            <initial>E.</initial>
          </persName>
          <persName key="sequel-2014-idm27568">
            <foreName>Philippe</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Stéphane</foreName>
            <surname>Canu</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Alain</foreName>
            <surname>Rakotomamonjy</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp120064">
            <foreName>Julien</foreName>
            <surname>Audiffren</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Journal of Machine Learning Research (JMLR)</title>
        <imprint>
          <dateStruct>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01221329" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01221329</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid63" type="article" rend="refer" n="refercite:kaufmann:hal-01024894">
      <identifiant type="hal" value="hal-01024894"/>
      <analytic>
        <title level="a">On the Complexity of Best Arm Identification in Multi-Armed Bandit Models</title>
        <author>
          <persName key="dyogene-2014-idp72312">
            <foreName>Emilie</foreName>
            <surname>Kaufmann</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Olivier</foreName>
            <surname>Cappé</surname>
            <initial>O.</initial>
          </persName>
          <persName>
            <foreName>Aurélien</foreName>
            <surname>Garivier</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Journal of Machine Learning Research</title>
        <imprint>
          <biblScope type="volume">17</biblScope>
          <dateStruct>
            <month>January</month>
            <year>2016</year>
          </dateStruct>
          <biblScope type="pages">1-42</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01024894" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01024894</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid66" type="article" rend="refer" n="refercite:lazaric:hal-01401513">
      <identifiant type="hal" value="hal-01401513"/>
      <analytic>
        <title level="a">Analysis of Classification-based Policy Iteration Algorithms</title>
        <author>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp65928">
            <foreName>Mohammad</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Journal of Machine Learning Research</title>
        <imprint>
          <biblScope type="volume">17</biblScope>
          <dateStruct>
            <year>2016</year>
          </dateStruct>
          <biblScope type="pages">1 - 30</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01401513" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01401513</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid70" type="article" rend="refer" n="refercite:MAL-038">
      <identifiant type="doi" value="10.1561/2200000038"/>
      <analytic>
        <title level="a">From Bandits to Monte-Carlo Tree Search: The Optimistic Principle Applied to Optimization and Planning</title>
        <author>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Foundations and Trends® in Machine Learning</title>
        <imprint>
          <biblScope type="volume">7</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">1-129</biblScope>
          <ref xlink:href="http://dx.doi.org/10.1561/2200000038" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>dx.<allowbreak/>doi.<allowbreak/>org/<allowbreak/>10.<allowbreak/>1561/<allowbreak/>2200000038</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid69" type="article" rend="refer" n="refercite:ortner:hal-01074077">
      <identifiant type="doi" value="10.1016/j.tcs.2014.09.026"/>
      <identifiant type="hal" value="hal-01074077"/>
      <analytic>
        <title level="a">Regret bounds for restless Markov bandits</title>
        <author>
          <persName>
            <foreName>Ronald</foreName>
            <surname>Ortner</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2014-idp68800">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>Peter</foreName>
            <surname>Auer</surname>
            <initial>P.</initial>
          </persName>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Journal of Theoretical Computer Science (TCS)</title>
        <imprint>
          <biblScope type="volume">558</biblScope>
          <dateStruct>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">62-76</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01074077" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01074077</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid58" type="phdthesis" rend="year" n="cite:abeille">
      <monogr>
        <title level="m">Exploration-Exploitation with Thompson Sampling in Linear Systems</title>
        <author>
          <persName key="sequel-2014-idp97464">
            <foreName>Marc</foreName>
            <surname>Abeille</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">Université de Lille</orgName>
          </publisher>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="typdoc">Ph. D. Thesis</note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid61" type="phdthesis" rend="year" n="cite:calandriello">
      <monogr>
        <title level="m">Efficient Sequential Learning in Structured and Constrained Environments</title>
        <author>
          <persName key="sequel-2014-idp101176">
            <foreName>Daniele</foreName>
            <surname>Calandriello</surname>
            <initial>D.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">Université de Lille</orgName>
          </publisher>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="typdoc">Ph. D. Thesis</note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid59" type="phdthesis" rend="year" n="cite:gajane">
      <monogr>
        <title level="m">Multi-armed bandits with unconventional feedback</title>
        <author>
          <persName key="sequel-2014-idp102400">
            <foreName>Pratik</foreName>
            <surname>Gajane</surname>
            <initial>P.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">Université de Lille</orgName>
          </publisher>
          <dateStruct>
            <month>November</month>
            <year>2017</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="typdoc">Ph. D. Thesis</note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid60" type="phdthesis" rend="year" n="cite:perolat">
      <monogr>
        <title level="m">Reinforcement learning: the multiplayer case</title>
        <author>
          <persName key="sequel-2014-idp109840">
            <foreName>Julien</foreName>
            <surname>Pérolat</surname>
            <initial>J.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">Université de Lille</orgName>
          </publisher>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="typdoc">Ph. D. Thesis</note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid42" type="article" rend="year" n="cite:danglot:hal-01378523">
      <identifiant type="doi" value="10.1007/s10664-017-9571-8"/>
      <identifiant type="hal" value="hal-01378523"/>
      <analytic>
        <title level="a">Correctness Attraction: A Study of Stability of Software Behavior Under Runtime Perturbation</title>
        <author>
          <persName key="dreampal-2014-idp76992">
            <foreName>Benjamin</foreName>
            <surname>Danglot</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2014-idm27568">
            <foreName>Philippe</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
          <persName key="diverse-2014-idm8000">
            <foreName>Benoit</foreName>
            <surname>Baudry</surname>
            <initial>B.</initial>
          </persName>
          <persName key="spirals-2014-idp82576">
            <foreName>Martin</foreName>
            <surname>Monperrus</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00527">
        <idno type="issn">1382-3256</idno>
        <title level="j">Empirical Software Engineering</title>
        <imprint>
          <dateStruct>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01378523" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01378523</ref>
        </imprint>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1611.09187" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1611.<allowbreak/>09187</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid50" type="article" rend="year" n="cite:dimitrakakis:hal-01500302">
      <identifiant type="hal" value="hal-01500302"/>
      <analytic>
        <title level="a">Differential Privacy for Bayesian Inference through Posterior Sampling</title>
        <author>
          <persName key="sequel-2015-idp103832">
            <foreName>Christos</foreName>
            <surname>Dimitrakakis</surname>
            <initial>C.</initial>
          </persName>
          <persName>
            <foreName>Blaine</foreName>
            <surname>Nelson</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Zuhe</foreName>
            <surname>Zhang</surname>
            <initial>Z.</initial>
          </persName>
          <persName>
            <foreName>Aikateirni</foreName>
            <surname>Mitrokotsa</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Benjamin I P</foreName>
            <surname>Rubinstein</surname>
            <initial>B. I. P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01187">
        <idno type="issn">1532-4435</idno>
        <title level="j">Journal of Machine Learning Research</title>
        <imprint>
          <biblScope type="volume">18</biblScope>
          <biblScope type="number">11</biblScope>
          <dateStruct>
            <month>April</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1−39</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01500302" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01500302</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid44" subtype="nonparu-d" type="article" rend="year" n="cite:kaufmann:hal-01163147">
      <identifiant type="hal" value="hal-01163147"/>
      <analytic>
        <title level="a">A Spectral Algorithm with Additive Clustering for the Recovery of Overlapping Communities in Networks</title>
        <author>
          <persName key="dyogene-2014-idp72312">
            <foreName>Emilie</foreName>
            <surname>Kaufmann</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Thomas</foreName>
            <surname>Bonald</surname>
            <initial>T.</initial>
          </persName>
          <persName key="dyogene-2014-idp61656">
            <foreName>Marc</foreName>
            <surname>Lelarge</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01855">
        <idno type="issn">0304-3975</idno>
        <title level="j">Journal of Theoretical Computer Science (TCS)</title>
        <imprint>
          <dateStruct>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01163147" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01163147</ref>
        </imprint>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1506.04158" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1506.<allowbreak/>04158</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid26" subtype="nonparu-d" type="article" rend="year" n="cite:kaufmann:hal-01449822">
      <identifiant type="hal" value="hal-01449822"/>
      <analytic>
        <title level="a">Learning the distribution with largest mean: two bandit frameworks</title>
        <author>
          <persName key="dyogene-2014-idp72312">
            <foreName>Emilie</foreName>
            <surname>Kaufmann</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Aurélien</foreName>
            <surname>Garivier</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid02803">
        <idno type="issn">2267-3059</idno>
        <title level="j">ESAIM: Proceedings and Surveys</title>
        <imprint>
          <biblScope type="volume">2017</biblScope>
          <dateStruct>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1 - 10</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01449822" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01449822</ref>
        </imprint>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1702.00001" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1702.<allowbreak/>00001</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid27" subtype="nonparu-d" type="article" rend="year" n="cite:kaufmann:hal-01251606">
      <identifiant type="hal" value="hal-01251606"/>
      <analytic>
        <title level="a">On Bayesian index policies for sequential resource allocation</title>
        <author>
          <persName key="dyogene-2014-idp72312">
            <foreName>Emilie</foreName>
            <surname>Kaufmann</surname>
            <initial>E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00146">
        <idno type="issn">0090-5364</idno>
        <title level="j">Annals of Statistics</title>
        <imprint>
          <dateStruct>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01251606" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01251606</ref>
        </imprint>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1601.01190" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1601.<allowbreak/>01190</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid41" type="article" rend="year" n="cite:musco:hal-01346046">
      <identifiant type="doi" value="10.1007/s11219-016-9332-8"/>
      <identifiant type="hal" value="hal-01346046"/>
      <analytic>
        <title level="a">A Large-scale Study of Call Graph-based Impact Prediction using Mutation Testing</title>
        <author>
          <persName key="sequel-2014-idp117600">
            <foreName>Vincenzo</foreName>
            <surname>Musco</surname>
            <initial>V.</initial>
          </persName>
          <persName key="spirals-2014-idp82576">
            <foreName>Martin</foreName>
            <surname>Monperrus</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idm27568">
            <foreName>Philippe</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01783">
        <idno type="issn">0963-9314</idno>
        <title level="j">Software Quality Journal</title>
        <imprint>
          <biblScope type="volume">25</biblScope>
          <biblScope type="number">3</biblScope>
          <dateStruct>
            <month>September</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">921–950</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01346046" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01346046</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid24" type="inproceedings" rend="year" n="cite:abeille:hal-01493561">
      <identifiant type="hal" value="hal-01493561"/>
      <analytic>
        <title level="a">Linear Thompson Sampling Revisited</title>
        <author>
          <persName key="sequel-2014-idp97464">
            <foreName>Marc</foreName>
            <surname>Abeille</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">AISTATS 2017 - 20th International Conference on Artificial Intelligence and Statistics</title>
        <loc>Fort Lauderdale, United States</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01493561" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01493561</ref>
        </imprint>
        <meeting id="cid388734">
          <title>International Conference on Artificial Intelligence and Statistics</title>
          <num>20</num>
          <abbr type="sigle">AISTATS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid14" type="inproceedings" rend="year" n="cite:abeille:hal-01493564">
      <identifiant type="hal" value="hal-01493564"/>
      <analytic>
        <title level="a">Thompson Sampling for Linear-Quadratic Control Problems</title>
        <author>
          <persName key="sequel-2014-idp97464">
            <foreName>Marc</foreName>
            <surname>Abeille</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">AISTATS 2017 - 20th International Conference on Artificial Intelligence and Statistics</title>
        <loc>Fort Lauderdale, United States</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01493564" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01493564</ref>
        </imprint>
        <meeting id="cid388734">
          <title>International Conference on Artificial Intelligence and Statistics</title>
          <num>20</num>
          <abbr type="sigle">AISTATS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid33" type="inproceedings" rend="year" n="cite:balle:hal-01590940">
      <identifiant type="hal" value="hal-01590940"/>
      <analytic>
        <title level="a">Spectral Learning from a Single Trajectory under Finite-State Policies</title>
        <author>
          <persName>
            <foreName>Borja</foreName>
            <surname>Balle</surname>
            <initial>B.</initial>
          </persName>
          <persName key="tao-2015-idp83360">
            <foreName>Odalric-Ambrym</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">International conference on Machine Learning</title>
        <loc>Sidney, France</loc>
        <title level="s">Proceedings of the International conference on Machine Learning</title>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01590940" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01590940</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>27</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid29" type="inproceedings" rend="year" n="cite:bonnefoi:hal-01575419">
      <identifiant type="hal" value="hal-01575419"/>
      <analytic>
        <title level="a">Multi-Armed Bandit Learning in IoT Networks: Learning helps even in non-stationary settings</title>
        <author>
          <persName>
            <foreName>Rémi</foreName>
            <surname>Bonnefoi</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2016-idp147888">
            <foreName>Lilian</foreName>
            <surname>Besson</surname>
            <initial>L.</initial>
          </persName>
          <persName>
            <foreName>Christophe</foreName>
            <surname>Moy</surname>
            <initial>C.</initial>
          </persName>
          <persName key="dyogene-2014-idp72312">
            <foreName>Emilie</foreName>
            <surname>Kaufmann</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Jacques</foreName>
            <surname>Palicot</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">CROWNCOM 2017 - 12th EAI International Conference on Cognitive Radio Oriented Wireless Networks</title>
        <loc>Lisbon, Portugal</loc>
        <imprint>
          <dateStruct>
            <month>September</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01575419" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01575419</ref>
        </imprint>
        <meeting id="cid623557">
          <title>IEEE International Conference on Cognitive Radio Oriented Wireless Networks and Communications</title>
          <num>2017</num>
          <abbr type="sigle">Crowncom</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid47" type="inproceedings" rend="year" n="cite:brodeur:hal-01653037">
      <identifiant type="hal" value="hal-01653037"/>
      <analytic>
        <title level="a">HoME: a Household Multimodal Environment</title>
        <author>
          <persName>
            <foreName>Simon</foreName>
            <surname>Brodeur</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Ethan</foreName>
            <surname>Perez</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Ankesh</foreName>
            <surname>Anand</surname>
            <initial>A.</initial>
          </persName>
          <persName key="flowers-2015-idp123880">
            <foreName>Florian</foreName>
            <surname>Golemo</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Luca</foreName>
            <surname>Celotti</surname>
            <initial>L.</initial>
          </persName>
          <persName key="sequel-2015-idp110544">
            <foreName>Florian</foreName>
            <surname>Strub</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Jean</foreName>
            <surname>Rouat</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Hugo</foreName>
            <surname>Larochelle</surname>
            <initial>H.</initial>
          </persName>
          <persName>
            <foreName>Aaron</foreName>
            <surname>Courville</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NIPS 2017's Visually-Grounded Interaction and Language Workshop</title>
        <loc>Long Beach, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01653037" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01653037</ref>
        </imprint>
        <meeting id="cid625901">
          <title>NIPS Visually-Grounded Interaction and Language Workshop</title>
          <num>2017</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1711.11017" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1711.<allowbreak/>11017</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid39" type="inproceedings" rend="year" n="cite:berard:hal-01580881">
      <identifiant type="hal" value="hal-01580881"/>
      <analytic>
        <title level="a">LIG-CRIStAL System for the WMT17 Automatic Post-Editing Task</title>
        <author>
          <persName key="sequel-2014-idp99936">
            <foreName>Alexandre</foreName>
            <surname>Bérard</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
          <persName>
            <foreName>Laurent</foreName>
            <surname>Besacier</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Second conference on machine translation (WMT17) during EMNLP 2017</title>
        <loc>Copenhague, Denmark</loc>
        <imprint>
          <dateStruct>
            <month>September</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01580881" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01580881</ref>
        </imprint>
        <meeting id="cid625903">
          <title>EMNLP Conference on machine translation</title>
          <num>2</num>
          <abbr type="sigle">EMNLP WMT</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid11" type="inproceedings" rend="year" n="cite:calandriello:hal-01482760">
      <identifiant type="hal" value="hal-01482760"/>
      <analytic>
        <title level="a">Distributed adaptive sampling for kernel matrix approximation</title>
        <author>
          <persName key="sequel-2014-idp101176">
            <foreName>Daniele</foreName>
            <surname>Calandriello</surname>
            <initial>D.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">International Conference on Artificial Intelligence and Statistics</title>
        <loc>Fort Lauderdale, United States</loc>
        <imprint>
          <dateStruct>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01482760" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01482760</ref>
        </imprint>
        <meeting id="cid388734">
          <title>International Conference on Artificial Intelligence and Statistics</title>
          <num>14</num>
          <abbr type="sigle">AISTATS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid13" type="inproceedings" rend="year" n="cite:calandriello:hal-01643961">
      <identifiant type="hal" value="hal-01643961"/>
      <analytic>
        <title level="a">Efficient second-order online kernel learning with adaptive embedding</title>
        <author>
          <persName key="sequel-2014-idp101176">
            <foreName>Daniele</foreName>
            <surname>Calandriello</surname>
            <initial>D.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NIPS 2017 : The Thirty-first Annual Conference on Neural Information Processing Systems</title>
        <loc>Long Beach, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1-17</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01643961" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01643961</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>31</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid12" type="inproceedings" rend="year" n="cite:calandriello:hal-01537799">
      <identifiant type="hal" value="hal-01537799"/>
      <analytic>
        <title level="a">Second-Order Kernel Online Convex Optimization with Adaptive Sketching</title>
        <author>
          <persName key="sequel-2014-idp101176">
            <foreName>Daniele</foreName>
            <surname>Calandriello</surname>
            <initial>D.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">International Conference on Machine Learning</title>
        <loc>Sydney, Australia</loc>
        <imprint>
          <dateStruct>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01537799" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01537799</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>27</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid38" type="inproceedings" rend="year" n="cite:carrara:hal-01557775">
      <identifiant type="hal" value="hal-01557775"/>
      <analytic>
        <title level="a">Online learning and transfer for user adaptation in dialogue systems</title>
        <author>
          <persName key="sequel-2017-idp171616">
            <foreName>Nicolas</foreName>
            <surname>Carrara</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>Romain</foreName>
            <surname>Laroche</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">SIGDIAL/SEMDIAL joint special session on negotiation dialog 2017</title>
        <loc>Saarbrücken, Germany</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01557775" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01557775</ref>
        </imprint>
        <meeting id="cid625902">
          <title>SIGDIAL/SEMDIAL joint special session on negotiation dialog</title>
          <num>2017</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid9" type="inproceedings" rend="year" n="cite:devries:hal-01549641">
      <identifiant type="hal" value="hal-01549641"/>
      <analytic>
        <title level="a">GuessWhat?! Visual object discovery through multi-modal dialogue</title>
        <author>
          <persName>
            <foreName>Harm</foreName>
            <surname>De Vries</surname>
            <initial>H.</initial>
          </persName>
          <persName key="sequel-2015-idp110544">
            <foreName>Florian</foreName>
            <surname>Strub</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Sarath</foreName>
            <surname>Chandar</surname>
            <initial>S.</initial>
          </persName>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
          <persName>
            <foreName>Hugo</foreName>
            <surname>Larochelle</surname>
            <initial>H.</initial>
          </persName>
          <persName>
            <foreName>Aaron</foreName>
            <surname>Courville</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Conference on Computer Vision and Pattern Recognition</title>
        <loc>Honolulu, United States</loc>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01549641" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01549641</ref>
        </imprint>
        <meeting id="cid82398">
          <title>IEEE International Conference on Computer Vision and Pattern Recognition</title>
          <num>2007</num>
          <abbr type="sigle">CVPR</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1611.08481" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1611.<allowbreak/>08481</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid10" type="inproceedings" rend="year" n="cite:devries:hal-01648683">
      <identifiant type="hal" value="hal-01648683"/>
      <analytic>
        <title level="a">Modulating early visual processing by language</title>
        <author>
          <persName>
            <foreName>Harm</foreName>
            <surname>De Vries</surname>
            <initial>H.</initial>
          </persName>
          <persName key="sequel-2015-idp110544">
            <foreName>Florian</foreName>
            <surname>Strub</surname>
            <initial>F.</initial>
          </persName>
          <persName key="sequel-2014-idp76928">
            <foreName>Jérémie</foreName>
            <surname>Mary</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Hugo</foreName>
            <surname>Larochelle</surname>
            <initial>H.</initial>
          </persName>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
          <persName>
            <foreName>Aaron</foreName>
            <surname>Courville</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NIPS 2017 - Conference on Neural Information Processing Systems</title>
        <loc>Long Beach, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1-14</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01648683" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01648683</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>31</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1707.00683" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1707.<allowbreak/>00683</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid20" type="inproceedings" rend="year" n="cite:erraqabi:hal-01482765">
      <identifiant type="hal" value="hal-01482765"/>
      <analytic>
        <title level="a">Trading off rewards and errors in multi-armed bandits</title>
        <author>
          <persName>
            <foreName>Akram</foreName>
            <surname>Erraqabi</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Emma</foreName>
            <surname>Brunskill</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Yun-En</foreName>
            <surname>Liu</surname>
            <initial>Y.-E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">International Conference on Artificial Intelligence and Statistics</title>
        <loc>Fort Lauderdale, United States</loc>
        <imprint>
          <dateStruct>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01482765" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01482765</ref>
        </imprint>
        <meeting id="cid388734">
          <title>International Conference on Artificial Intelligence and Statistics</title>
          <num>14</num>
          <abbr type="sigle">AISTATS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid40" type="inproceedings" rend="year" n="cite:felicio:hal-01517967">
      <identifiant type="hal" value="hal-01517967"/>
      <analytic>
        <title level="a"> A Multi-Armed Bandit Model Selection for Cold-Start User Recommendation</title>
        <author>
          <persName>
            <foreName>Crícia Z</foreName>
            <surname>Felício</surname>
            <initial>C. Z.</initial>
          </persName>
          <persName>
            <foreName>Klérisson V R</foreName>
            <surname>Paixão</surname>
            <initial>K. V. R.</initial>
          </persName>
          <persName>
            <foreName>Celia A Z</foreName>
            <surname>Barcelos</surname>
            <initial>C. A. Z.</initial>
          </persName>
          <persName key="sequel-2014-idm27568">
            <foreName>Philippe</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">25th ACM Conference on User Modelling, Adaptation and Personalization (UMAP)</title>
        <loc>Bratislava, Slovakia</loc>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01517967" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01517967</ref>
        </imprint>
        <meeting id="cid625898">
          <title>ACM Conference on User Modelling, Adaptation and Personalization</title>
          <num>2017</num>
          <abbr type="sigle">UMAP</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid15" type="inproceedings" rend="year" n="cite:fruit:hal-01493567">
      <identifiant type="hal" value="hal-01493567"/>
      <analytic>
        <title level="a">Exploration–Exploitation in MDPs with Options</title>
        <author>
          <persName key="sequel-2015-idp116704">
            <foreName>Ronan</foreName>
            <surname>Fruit</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">AISTATS 2017 - 20th International Conference on Artificial Intelligence and Statistics</title>
        <loc>Fort Lauderdale, United States</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01493567" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01493567</ref>
        </imprint>
        <meeting id="cid388734">
          <title>International Conference on Artificial Intelligence and Statistics</title>
          <num>20</num>
          <abbr type="sigle">AISTATS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid16" type="inproceedings" rend="year" n="cite:fruit:hal-01649082">
      <identifiant type="hal" value="hal-01649082"/>
      <analytic>
        <title level="a">Regret Minimization in MDPs with Options without Prior Knowledge</title>
        <author>
          <persName key="sequel-2015-idp116704">
            <foreName>Ronan</foreName>
            <surname>Fruit</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2017-idp149504">
            <foreName>Matteo</foreName>
            <surname>Pirotta</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Emma</foreName>
            <surname>Brunskill</surname>
            <initial>E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NIPS 2017 - Neural Information Processing Systems</title>
        <loc>Long Beach, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1-36</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01649082" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01649082</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>31</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid34" type="inproceedings" rend="year" n="cite:gautier:hal-01526577">
      <identifiant type="hal" value="hal-01526577"/>
      <analytic>
        <title level="a">Zonotope hit-and-run for efficient sampling from projection DPPs</title>
        <author>
          <persName key="i4s-2015-idp77480">
            <foreName>Guillaume</foreName>
            <surname>Gautier</surname>
            <initial>G.</initial>
          </persName>
          <persName key="sequel-2016-idp194624">
            <foreName>Rémi</foreName>
            <surname>Bardenet</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">International Conference on Machine Learning</title>
        <loc>Sydney, Australia</loc>
        <imprint>
          <dateStruct>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01526577" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01526577</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>27</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid18" type="inproceedings" rend="year" n="cite:geist:hal-01576347">
      <identifiant type="hal" value="hal-01576347"/>
      <analytic>
        <title level="a">Faut-il minimiser le résidu de Bellman ou maximiser la valeur moyenne ?</title>
        <author>
          <persName>
            <foreName>Matthieu</foreName>
            <surname>Geist</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idp79648">
            <foreName>Bilal</foreName>
            <surname>Piot</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="no" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Journées Francophones sur la Planification, la Décision et l'Apprentissage pour la conduite de systèmes (JFPDA 2017)</title>
        <loc>Caen, France</loc>
        <title level="s">Actes des Journées Francophones sur la Planification, la Décision et l'Apprentissage pour la conduite de systèmes (JFPDA 2017)</title>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01576347" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01576347</ref>
        </imprint>
        <meeting id="cid344313">
          <title>Journées Francophones Planification, Décision, Apprentissage</title>
          <num>2017</num>
          <abbr type="sigle">JFPDA</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid17" type="inproceedings" rend="year" n="cite:geist:hal-01629739">
      <identifiant type="hal" value="hal-01629739"/>
      <analytic>
        <title level="a">Is the Bellman residual a bad proxy?</title>
        <author>
          <persName>
            <foreName>Matthieu</foreName>
            <surname>Geist</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idp79648">
            <foreName>Bilal</foreName>
            <surname>Piot</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NIPS 2017 - Advances in Neural Information Processing Systems</title>
        <loc>Long Beach, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1-13</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01629739" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01629739</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>31</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid31" type="inproceedings" rend="year" n="cite:kaufmann:hal-01535907">
      <identifiant type="hal" value="hal-01535907"/>
      <analytic>
        <title level="a">Monte-Carlo Tree Search by Best Arm Identification</title>
        <author>
          <persName key="dyogene-2014-idp72312">
            <foreName>Emilie</foreName>
            <surname>Kaufmann</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Wouter M.</foreName>
            <surname>Koolen</surname>
            <initial>W. M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NIPS 2017 - 31st Annual Conference on Neural Information Processing Systems</title>
        <loc>Long Beach, United States</loc>
        <title level="s">Advances in Neural Information Processing Systems</title>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1-23</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01535907" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01535907</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>31</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1706.02986" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1706.<allowbreak/>02986</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid19" type="inproceedings" rend="year" n="cite:laroche:hal-01548649">
      <identifiant type="hal" value="hal-01548649"/>
      <analytic>
        <title level="a">Transfer Reinforcement Learning with Shared Dynamics</title>
        <author>
          <persName>
            <foreName>Romain</foreName>
            <surname>Laroche</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2015-idp113016">
            <foreName>Merwan</foreName>
            <surname>Barlier</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">AAAI-17 - Thirty-First AAAI Conference on Artificial Intelligence</title>
        <loc>San Francisco, United States</loc>
        <imprint>
          <dateStruct>
            <month>February</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">7</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01548649" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01548649</ref>
        </imprint>
        <meeting id="cid355099">
          <title>National Conference on Artificial Intelligence</title>
          <num>2017</num>
          <abbr type="sigle">AAAI</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid22" type="inproceedings" rend="year" n="cite:maillard:hal-01615427">
      <identifiant type="hal" value="hal-01615427"/>
      <analytic>
        <title level="a">Boundary Crossing for General Exponential Families</title>
        <author>
          <persName key="tao-2015-idp83360">
            <foreName>Odalric-Ambrym</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Algorithmic Learning Theory</title>
        <loc>Kyoto, Japan</loc>
        <title level="s">Proceedings of Algorithmic Learning Theory</title>
        <imprint>
          <biblScope type="volume">1</biblScope>
          <dateStruct>
            <month>October</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1 - 34</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01615427" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01615427</ref>
        </imprint>
        <meeting id="cid110465">
          <title>International Conference on Algorithmic Learning Theory</title>
          <num>28</num>
          <abbr type="sigle">ALT</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid51" type="inproceedings" rend="year" n="cite:metelli:hal-01653328">
      <identifiant type="hal" value="hal-01653328"/>
      <analytic>
        <title level="a">Compatible Reward Inverse Reinforcement Learning</title>
        <author>
          <persName>
            <foreName>Alberto Maria</foreName>
            <surname>Metelli</surname>
            <initial>A. M.</initial>
          </persName>
          <persName key="sequel-2017-idp149504">
            <foreName>Matteo</foreName>
            <surname>Pirotta</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Marcello</foreName>
            <surname>Restelli</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">The Thirty-first Annual Conference on Neural Information Processing Systems - NIPS 2017</title>
        <loc>Long Beach, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01653328" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01653328</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>31</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid30" type="inproceedings" rend="year" n="cite:mourtada:hal-01615424">
      <identifiant type="hal" value="hal-01615424"/>
      <analytic>
        <title level="a">Efficient tracking of a growing number of experts</title>
        <author>
          <persName>
            <foreName>Jaouad</foreName>
            <surname>Mourtada</surname>
            <initial>J.</initial>
          </persName>
          <persName key="tao-2015-idp83360">
            <foreName>Odalric-Ambrym</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Algorithmic Learning Theory</title>
        <loc>Tokyo, Japan</loc>
        <title level="s">Proceedings of Algorithmic Learning Theory</title>
        <imprint>
          <biblScope type="volume">76</biblScope>
          <dateStruct>
            <month>October</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1 - 23</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01615424" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01615424</ref>
        </imprint>
        <meeting id="cid110465">
          <title>International Conference on Algorithmic Learning Theory</title>
          <num>28</num>
          <abbr type="sigle">ALT</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid48" type="inproceedings" rend="year" n="cite:papini:hal-01653330">
      <identifiant type="hal" value="hal-01653330"/>
      <analytic>
        <title level="a">Adaptive Batch Size for Safe Policy Gradients</title>
        <author>
          <persName>
            <foreName>Matteo</foreName>
            <surname>Papini</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2017-idp149504">
            <foreName>Matteo</foreName>
            <surname>Pirotta</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Marcello</foreName>
            <surname>Restelli</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">The Thirty-first Annual Conference on Neural Information Processing Systems (NIPS)</title>
        <loc>Long Beach, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01653330" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01653330</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>31</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid43" type="inproceedings" rend="year" n="cite:papoudakis:hal-01617851">
      <identifiant type="doi" value="10.1007/978-3-319-72150-7_43"/>
      <identifiant type="hal" value="hal-01617851"/>
      <analytic>
        <title level="a">A generative model for sparse, evolving digraphs</title>
        <author>
          <persName key="sequel-2017-idp213248">
            <foreName>Georgios</foreName>
            <surname>Papoudakis</surname>
            <initial>G.</initial>
          </persName>
          <persName key="sequel-2014-idm27568">
            <foreName>Philippe</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
          <persName key="spirals-2014-idp82576">
            <foreName>Martin</foreName>
            <surname>Monperrus</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">6th International Conference on Complex Networks and their applications</title>
        <loc>Lyon, France</loc>
        <imprint>
          <dateStruct>
            <month>November</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01617851" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01617851</ref>
        </imprint>
        <meeting id="cid320802">
          <title>International Workshop and Conference on Complex Networks and their Applications</title>
          <num>6</num>
          <abbr type="sigle">NetSci</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1710.06298" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1710.<allowbreak/>06298</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid46" type="inproceedings" rend="year" n="cite:perez:hal-01648684">
      <identifiant type="hal" value="hal-01648684"/>
      <analytic>
        <title level="a">Learning Visual Reasoning Without Strong Priors</title>
        <author>
          <persName>
            <foreName>Ethan</foreName>
            <surname>Perez</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Harm</foreName>
            <surname>De Vries</surname>
            <initial>H.</initial>
          </persName>
          <persName key="sequel-2015-idp110544">
            <foreName>Florian</foreName>
            <surname>Strub</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Vincent</foreName>
            <surname>Dumoulin</surname>
            <initial>V.</initial>
          </persName>
          <persName>
            <foreName>Aaron</foreName>
            <surname>Courville</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ICML 2017's Machine Learning in Speech and Language Processing Workshop</title>
        <loc>Sidney, France</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01648684" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01648684</ref>
        </imprint>
        <meeting id="cid625900">
          <title>ICML Machine Learning in Speech and Language Processing Workshop</title>
          <num>2017</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1709.07871" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1709.<allowbreak/>07871</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid45" type="inproceedings" rend="year" n="cite:perez:hal-01648685">
      <identifiant type="hal" value="hal-01648685"/>
      <analytic>
        <title level="a">FiLM: Visual Reasoning with a General Conditioning Layer</title>
        <author>
          <persName>
            <foreName>Ethan</foreName>
            <surname>Perez</surname>
            <initial>E.</initial>
          </persName>
          <persName key="sequel-2015-idp110544">
            <foreName>Florian</foreName>
            <surname>Strub</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Harm</foreName>
            <surname>De Vries</surname>
            <initial>H.</initial>
          </persName>
          <persName>
            <foreName>Vincent</foreName>
            <surname>Dumoulin</surname>
            <initial>V.</initial>
          </persName>
          <persName>
            <foreName>Aaron</foreName>
            <surname>Courville</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">AAAI Conference on Artificial Intelligence</title>
        <loc>New Orleans, United States</loc>
        <imprint>
          <dateStruct>
            <month>February</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01648685" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01648685</ref>
        </imprint>
        <meeting id="cid355099">
          <title>National Conference on Artificial Intelligence</title>
          <num>26</num>
          <abbr type="sigle">AAAI</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1707.03017" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1707.<allowbreak/>03017</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid32" type="inproceedings" rend="year" n="cite:perolat:hal-01648489">
      <identifiant type="hal" value="hal-01648489"/>
      <analytic>
        <title level="a">Learning Nash Equilibrium for General-Sum Markov Games from Batch Data</title>
        <author>
          <persName key="sequel-2014-idp109840">
            <foreName>Julien</foreName>
            <surname>Pérolat</surname>
            <initial>J.</initial>
          </persName>
          <persName key="sequel-2015-idp110544">
            <foreName>Florian</foreName>
            <surname>Strub</surname>
            <initial>F.</initial>
          </persName>
          <persName key="sequel-2014-idp79648">
            <foreName>Bilal</foreName>
            <surname>Piot</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">AISTATS 2017 - The 20th International Conference on Artificial Intelligence and Statistics</title>
        <loc>Fort Lauderdale, United States</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1-14</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01648489" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01648489</ref>
        </imprint>
        <meeting id="cid388734">
          <title>International Conference on Artificial Intelligence and Statistics</title>
          <num>20</num>
          <abbr type="sigle">AISTATS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid25" type="inproceedings" rend="year" n="cite:riquelme:hal-01538762">
      <identifiant type="hal" value="hal-01538762"/>
      <analytic>
        <title level="a">Active Learning for Accurate Estimation of Linear Models</title>
        <author>
          <persName>
            <foreName>Carlos</foreName>
            <surname>Riquelme</surname>
            <initial>C.</initial>
          </persName>
          <persName key="sequel-2014-idp65928">
            <foreName>Mohammad</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ICML 2017 - 34th International Conference on Machine Learning</title>
        <loc>Sydney, Australia</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">36</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01538762" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01538762</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>34</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid36" type="inproceedings" rend="year" n="cite:ryabko:hal-01627330">
      <identifiant type="hal" value="hal-01627330"/>
      <analytic>
        <title level="a">Hypotheses testing on infinite random graphs</title>
        <author>
          <persName key="sequel-2014-idp68800">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ALT 2017 - 28th International Conference on Algorithmic Learning Theory</title>
        <loc>kyoto, Japan</loc>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1-12</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01627330" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01627330</ref>
        </imprint>
        <meeting id="cid110465">
          <title>International Conference on Algorithmic Learning Theory</title>
          <num>28</num>
          <abbr type="sigle">ALT</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1708.03131" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1708.<allowbreak/>03131</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid37" type="inproceedings" rend="year" n="cite:ryabko:hal-01627333">
      <identifiant type="hal" value="hal-01627333"/>
      <analytic>
        <title level="a">Independence clustering (without a matrix)</title>
        <author>
          <persName key="sequel-2014-idp68800">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NIPS 2017 - Thirty-first Annual Conference on Neural Information Processing Systems</title>
        <loc>Long Beach, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1-14</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01627333" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01627333</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>31</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1703.06700" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1703.<allowbreak/>06700</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid35" type="inproceedings" rend="year" n="cite:ryabko:hal-01627332">
      <identifiant type="hal" value="hal-01627332"/>
      <analytic>
        <title level="a">Universality of Bayesian mixture predictors</title>
        <author>
          <persName key="sequel-2014-idp68800">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ALT 2017 - 28th International Conference on Algorithmic Learning Theory</title>
        <loc>Kyoto, Japan</loc>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1-13</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01627332" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01627332</ref>
        </imprint>
        <meeting id="cid110465">
          <title>International Conference on Algorithmic Learning Theory</title>
          <num>28</num>
          <abbr type="sigle">ALT</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1610.08249" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1610.<allowbreak/>08249</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid8" type="inproceedings" rend="year" n="cite:strub:hal-01549642">
      <identifiant type="hal" value="hal-01549642"/>
      <analytic>
        <title level="a">End-to-end optimization of goal-driven and visually grounded dialogue systems Harm de Vries</title>
        <author>
          <persName key="sequel-2015-idp110544">
            <foreName>Florian</foreName>
            <surname>Strub</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Harm</foreName>
            <surname>De Vries</surname>
            <initial>H.</initial>
          </persName>
          <persName key="sequel-2014-idp76928">
            <foreName>Jeremie</foreName>
            <surname>Mary</surname>
            <initial>J.</initial>
          </persName>
          <persName key="sequel-2014-idp79648">
            <foreName>Bilal</foreName>
            <surname>Piot</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Aaron</foreName>
            <surname>Courville</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">International Joint Conference on Artificial Intelligence</title>
        <loc>Melbourne, Australia</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01549642" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01549642</ref>
        </imprint>
        <meeting id="cid307932">
          <title>International Joint Conference on Artificial Intelligence</title>
          <num>22</num>
          <abbr type="sigle">IJCAI</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1703.05423" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1703.<allowbreak/>05423</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid52" type="inproceedings" rend="year" n="cite:tosatto:hal-01653332">
      <identifiant type="hal" value="hal-01653332"/>
      <analytic>
        <title level="a">Boosted Fitted Q-Iteration</title>
        <author>
          <persName>
            <foreName>Samuele</foreName>
            <surname>Tosatto</surname>
            <initial>S.</initial>
          </persName>
          <persName key="sequel-2017-idp149504">
            <foreName>Matteo</foreName>
            <surname>Pirotta</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Carlo</foreName>
            <surname>D'Eramo</surname>
            <initial>C.</initial>
          </persName>
          <persName>
            <foreName>Marcello</foreName>
            <surname>Restelli</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">34th International Conference on Machine Learning (ICML)</title>
        <loc>Sydney, Australia</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01653332" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01653332</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>34</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid49" type="inproceedings" rend="year" n="cite:tziortziotis:hal-01593212">
      <identifiant type="hal" value="hal-01593212"/>
      <analytic>
        <title level="a">Bayesian Inference for Least Squares Temporal Difference Regularization</title>
        <author>
          <persName>
            <foreName>Nikolaos</foreName>
            <surname>Tziortziotis</surname>
            <initial>N.</initial>
          </persName>
          <persName key="sequel-2015-idp103832">
            <foreName>Christos</foreName>
            <surname>Dimitrakakis</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ECML 2017 - European Conference on Machine Learning</title>
        <loc>Skopje, Macedonia</loc>
        <imprint>
          <publisher>
            <orgName type="organisation">2017-09-22</orgName>
          </publisher>
          <dateStruct>
            <month>September</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01593212" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01593212</ref>
        </imprint>
        <meeting id="cid67091">
          <title>European Conference on Machine Learning</title>
          <num>2017</num>
          <abbr type="sigle">ECML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid21" type="inproceedings" rend="year" n="cite:wen:hal-01643976">
      <identifiant type="hal" value="hal-01643976"/>
      <analytic>
        <title level="a">Online influence maximization under independent cascade model with semi-bandit feedback</title>
        <author>
          <persName>
            <foreName>Zheng</foreName>
            <surname>Wen</surname>
            <initial>Z.</initial>
          </persName>
          <persName>
            <foreName>Branislav</foreName>
            <surname>Kveton</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Sharan</foreName>
            <surname>Vaswani</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NIPS 2017 - Neural Information Processing Systems</title>
        <loc>Long Beach, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">1-24</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01643976" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01643976</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>31</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid56" type="inproceedings" rend="year" n="cite:zanonboito:hal-01592091">
      <identifiant type="hal" value="hal-01592091"/>
      <analytic>
        <title level="a">Unwritten Languages Demand Attention Too! Word Discovery with Encoder-Decoder Models</title>
        <author>
          <persName>
            <foreName>Marcely</foreName>
            <surname>Zanon Boito</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idp99936">
            <foreName>Alexandre</foreName>
            <surname>Bérard</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Aline</foreName>
            <surname>Villavicencio</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Laurent</foreName>
            <surname>Besacier</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">IEEE Automatic Speech Recognition and Understanding (ASRU)</title>
        <loc>Okinawa, Japan</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01592091" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01592091</ref>
        </imprint>
        <meeting id="cid97192">
          <title>IEEE workshop on Automatic Speech Recognition and Understanding</title>
          <num>2017</num>
          <abbr type="sigle">ASRU</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid28" type="unpublished" rend="year" n="cite:besson:hal-01629733">
      <identifiant type="hal" value="hal-01629733"/>
      <monogr>
        <title level="m">Multi-Player Bandits Models Revisited</title>
        <author>
          <persName key="sequel-2016-idp147888">
            <foreName>Lilian</foreName>
            <surname>Besson</surname>
            <initial>L.</initial>
          </persName>
          <persName key="dyogene-2014-idp72312">
            <foreName>Emilie</foreName>
            <surname>Kaufmann</surname>
            <initial>E.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01629733" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01629733</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1711.02317" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1711.<allowbreak/>02317</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid53" type="unpublished" rend="year" n="cite:dimitrakakis:hal-01408294">
      <identifiant type="hal" value="hal-01408294"/>
      <monogr>
        <title level="m">Multi-view Sequential Games: The Helper-Agent Problem</title>
        <author>
          <persName key="sequel-2015-idp103832">
            <foreName>Christos</foreName>
            <surname>Dimitrakakis</surname>
            <initial>C.</initial>
          </persName>
          <persName>
            <foreName>Firas</foreName>
            <surname>Jarboui</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>David</foreName>
            <surname>Parkes</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>Lior</foreName>
            <surname>Seeman</surname>
            <initial>L.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>February</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01408294" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01408294</ref>
        </imprint>
      </monogr>
      <note type="bnote">working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid55" type="unpublished" rend="year" n="cite:dimitrakakis:hal-01531849">
      <identifiant type="hal" value="hal-01531849"/>
      <monogr>
        <title level="m">Subjective Fairness: Fairness is in the eye of the beholder</title>
        <author>
          <persName key="sequel-2015-idp103832">
            <foreName>Christos</foreName>
            <surname>Dimitrakakis</surname>
            <initial>C.</initial>
          </persName>
          <persName key="multispeech-2016-idp166368">
            <foreName>Yang</foreName>
            <surname>Liu</surname>
            <initial>Y.</initial>
          </persName>
          <persName>
            <foreName>David</foreName>
            <surname>Parkes</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>Goran</foreName>
            <surname>Radanovic</surname>
            <initial>G.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01531849" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01531849</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1706.00119" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1706.<allowbreak/>00119</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid54" type="unpublished" rend="year" n="cite:luedtke:hal-01338733">
      <identifiant type="hal" value="hal-01338733"/>
      <monogr>
        <title level="m">Asymptotically Optimal Algorithms for Budgeted Multiple Play Bandits</title>
        <author>
          <persName>
            <foreName>Alexander R.</foreName>
            <surname>Luedtke</surname>
            <initial>A. R.</initial>
          </persName>
          <persName key="dyogene-2014-idp72312">
            <foreName>Emilie</foreName>
            <surname>Kaufmann</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Antoine</foreName>
            <surname>Chambaz</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01338733" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01338733</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1606.09388" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1606.<allowbreak/>09388</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid57" type="unpublished" rend="year" n="cite:maillard:cel-01632228">
      <identifiant type="hal" value="cel-01632228"/>
      <monogr>
        <title level="m">Basic Concentration Properties of Real-Valued Distributions</title>
        <author>
          <persName key="tao-2015-idp83360">
            <foreName>Odalric-Ambrym</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>September</month>
            <year>2017</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/cel-01632228" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>cel-01632228</ref>
        </imprint>
      </monogr>
      <note type="bnote">Lecture</note>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid23" type="article" rend="foot" n="footcite:allesiardo:hal-01575000">
      <identifiant type="doi" value="10.1007/s41060-017-0050-5"/>
      <identifiant type="hal" value="hal-01575000"/>
      <analytic>
        <title level="a">The Non-stationary Stochastic Multi-armed Bandit Problem</title>
        <author>
          <persName>
            <foreName>Robin</foreName>
            <surname>Allesiardo</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Raphaël</foreName>
            <surname>Féraud</surname>
            <initial>R.</initial>
          </persName>
          <persName key="tao-2015-idp83360">
            <foreName>Odalric-Ambrym</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">International Journal of Data Science and Analytics</title>
        <imprint>
          <biblScope type="volume">3</biblScope>
          <biblScope type="number">4</biblScope>
          <dateStruct>
            <year>2017</year>
          </dateStruct>
          <biblScope type="pages">267–283</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01575000" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01575000</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid7" type="article" rend="foot" n="footcite:Aueretal2002">
      <analytic>
        <title level="a">Finite-time analysis of the multi-armed bandit problem</title>
        <author>
          <persName>
            <foreName>Peter</foreName>
            <surname>Auer</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>N.</foreName>
            <surname>Cesa-Bianchi</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>P.</foreName>
            <surname>Fischer</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Machine Learning</title>
        <imprint>
          <biblScope type="volume">47</biblScope>
          <biblScope type="number">2/3</biblScope>
          <dateStruct>
            <year>2002</year>
          </dateStruct>
          <biblScope type="pages">235–256</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid3" type="book" rend="foot" n="footcite:bellman">
      <monogr>
        <title level="m">Dynamic Programming</title>
        <author>
          <persName>
            <foreName>R.</foreName>
            <surname>Bellman</surname>
            <initial>R.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Princeton University Press</orgName>
          </publisher>
          <dateStruct>
            <year>1957</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid4" type="book" rend="foot" n="footcite:bertshreve78">
      <monogr>
        <title level="m">Stochastic Optimal Control (The Discrete Time Case)</title>
        <author>
          <persName>
            <foreName>D.P.</foreName>
            <surname>Bertsekas</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>S.E.</foreName>
            <surname>Shreve</surname>
            <initial>S.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Academic Press, New York</orgName>
          </publisher>
          <dateStruct>
            <year>1978</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid2" type="book" rend="foot" n="footcite:Bertsekas96">
      <monogr>
        <title level="m">Neuro-Dynamic Programming</title>
        <author>
          <persName>
            <foreName>D.P.</foreName>
            <surname>Bertsekas</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Tsitsiklis</surname>
            <initial>J.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Athena Scientific</orgName>
          </publisher>
          <dateStruct>
            <year>1996</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid1" type="book" rend="foot" n="footcite:puterman94">
      <monogr>
        <title level="m">Markov Decision Processes: Discrete Stochastic Dynamic Programming</title>
        <author>
          <persName>
            <foreName>M.L.</foreName>
            <surname>Puterman</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>John Wiley and Sons</orgName>
          </publisher>
          <dateStruct>
            <year>1994</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid6" type="article" rend="foot" n="footcite:Robbins1952">
      <analytic>
        <title level="a">Some aspects of the sequential design of experiments</title>
        <author>
          <persName>
            <foreName>H.</foreName>
            <surname>Robbins</surname>
            <initial>H.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Bull. Amer. Math. Soc.</title>
        <imprint>
          <biblScope type="volume">55</biblScope>
          <dateStruct>
            <year>1952</year>
          </dateStruct>
          <biblScope type="pages">527–535</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid0" type="book" rend="foot" n="footcite:sb">
      <monogr>
        <title level="m">Reinforcement learning: an introduction</title>
        <author>
          <persName>
            <foreName>R.S.</foreName>
            <surname>Sutton</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>A.G.</foreName>
            <surname>Barto</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>MIT Press</orgName>
          </publisher>
          <dateStruct>
            <year>1998</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2017-bid5" type="inbook" rend="foot" n="footcite:werbosHandbookADP">
      <analytic>
        <author>
          <persName>
            <foreName>P.</foreName>
            <surname>Werbos</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">ADP: Goals, Opportunities and Principles</title>
        <imprint>
          <publisher>
            <orgName>IEEE Press</orgName>
          </publisher>
          <dateStruct>
            <year>2004</year>
          </dateStruct>
          <biblScope type="pages">3–44</biblScope>
        </imprint>
      </monogr>
      <note type="bnote">Handbook of learning and approximate dynamic programming</note>
    </biblStruct>
  </biblio>
</raweb>
