<?xml version="1.0" encoding="utf-8"?>
<raweb xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" year="2014">
  <identification id="sequel" isproject="true">
    <shortname>SEQUEL</shortname>
    <projectName>Sequential Learning</projectName>
    <theme-de-recherche>Optimization, machine learning and statistical methods</theme-de-recherche>
    <domaine-de-recherche>Applied Mathematics, Computation and Simulation</domaine-de-recherche>
    <urlTeam>http://sequel.lille.inria.fr/</urlTeam>
    <datecreation>2007 July 01</datecreation>
    <structure_exterieure type="Labs">
      <libelle>Laboratoire d'informatique fondamentale de Lille (LIFL)</libelle>
    </structure_exterieure>
    <structure_exterieure type="Labs">
      <libelle>Laboratoire d'Automatique, de Génie Informatique et Signal (LAGIS)</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Université Charles de Gaulle (Lille 3)</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Ecole Centrale de Lille</libelle>
    </structure_exterieure>
    <UR name="Lille"/>
    <keywords>
      <term>Machine Learning</term>
      <term>Statistical Learning</term>
      <term>Sequential Learning</term>
      <term>Inference</term>
      <term>Analysis Of Algorithms</term>
    </keywords>
    <moreinfo/>
  </identification>
  <team id="uid1">
    <person key="sequel-2014-idm27568">
      <firstname>Philippe</firstname>
      <lastname>Preux</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Team leader, Univ. Lille III, Professor</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idm26088">
      <firstname>Alessandro</firstname>
      <lastname>Lazaric</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
    </person>
    <person key="sequel-2014-idp65928">
      <firstname>Mohammad</firstname>
      <lastname>Ghavamzadeh</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idp67360">
      <firstname>Rémi</firstname>
      <lastname>Munos</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Senior
Researcher</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idp68800">
      <firstname>Daniil</firstname>
      <lastname>Ryabko</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idp70232">
      <firstname>Michal</firstname>
      <lastname>Valko</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
    </person>
    <person key="sequel-2014-idp71472">
      <firstname>Pierre</firstname>
      <lastname>Chainais</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Ecole Centrale de Lille, Associate Professor</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idp72936">
      <firstname>Rémi</firstname>
      <lastname>Coulom</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Lille III, Associate Professor, until Sep 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp74216">
      <firstname>Emmanuel</firstname>
      <lastname>Duflos</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Ecole Centrale de Lille, Professor</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idp75664">
      <firstname>Romaric</firstname>
      <lastname>Gaudel</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Lille III, Associate Professor</moreinfo>
    </person>
    <person key="sequel-2014-idp76928">
      <firstname>Jérémie</firstname>
      <lastname>Mary</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Lille III, Associate Professor</moreinfo>
    </person>
    <person key="sequel-2014-idp78200">
      <firstname>Philippe</firstname>
      <lastname>Vanheeghe</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Ecole Centrale de Lille, Professor</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idp79648">
      <firstname>Bilal</firstname>
      <lastname>Piot</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Lille III, from Oct 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp80904">
      <firstname>Prashanth</firstname>
      <lastname>Lakshmanrao Anantha Padmanabha</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, until Oct 2014,
granted by EU FP7 Complacs</moreinfo>
    </person>
    <person key="sequel-2014-idp82200">
      <firstname>Gergely</firstname>
      <lastname>Neu</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, granted by ERCIM and Hermès</moreinfo>
    </person>
    <person key="sequel-2014-idp83480">
      <firstname>Balázs</firstname>
      <lastname>Szörényi</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, granted by EU FP7 Complacs</moreinfo>
    </person>
    <person key="sequel-2014-idp84760">
      <firstname>Timothé</firstname>
      <lastname>Collet</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Metz, from Mar 2014 until Nov 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp86016">
      <firstname>Layla</firstname>
      <lastname>El Asri</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>CIFRE Orange</moreinfo>
    </person>
    <person key="sequel-2014-idp87240">
      <firstname>Sergio</firstname>
      <lastname>Valcarcel Macua</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Technical University of Madrid, from
Feb 2014 until May 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp88536">
      <firstname>Amélie</firstname>
      <lastname>Supervielle</lastname>
      <categoryPro>Assistant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="sequel-2014-idp89776">
      <firstname>Nicolas</firstname>
      <lastname>Carion</lastname>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>ENS-Lyon, L3, from Jun 2014
until Jul 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp91048">
      <firstname>Jessica</firstname>
      <lastname>Chemali</lastname>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Master, Carnegie Mellon
University, from May 2014 until Aug 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp92352">
      <firstname>Valentin</firstname>
      <lastname>Owczarek</lastname>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Lille I, L3, from
Apr 2014 until Jun 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp93632">
      <firstname>Julien</firstname>
      <lastname>Rouse</lastname>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Lille III, L3, from Mar 2014 until
Jun 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp94912">
      <firstname>Mathias</firstname>
      <lastname>Sable Meyer</lastname>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>ENS-Cachan, L3, from Jun 2014 until
Jul 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp96192">
      <firstname>Othmane</firstname>
      <lastname>Safsafi</lastname>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>ENS-Ulm, L3, from
Jun 2014 until Aug 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp97464">
      <firstname>Marc</firstname>
      <lastname>Abeille</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Lille I, from Sep 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp98704">
      <firstname>Boris</firstname>
      <lastname>Baldassari</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Squoring, until Sep 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp99936">
      <firstname>Alexandre</firstname>
      <lastname>Berard</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Lille I, from Oct 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp101176">
      <firstname>Daniele</firstname>
      <lastname>Calandriello</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="sequel-2014-idp102400">
      <firstname>Pratik</firstname>
      <lastname>Gajane</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Orange Labs, from Oct 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp103632">
      <firstname>Hadrien</firstname>
      <lastname>Glaude</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Thales, from Feb 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp104864">
      <firstname>Jean Bastien</firstname>
      <lastname>Grill</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>ENS Cachan, from Oct 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp106104">
      <firstname>Frédéric</firstname>
      <lastname>Guillou</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Région Nord-Pas-de-Calais and TBS</moreinfo>
    </person>
    <person key="sequel-2014-idp107400">
      <firstname>Adrien</firstname>
      <lastname>Hoarau</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>DGA</moreinfo>
    </person>
    <person key="sequel-2014-idp108624">
      <firstname>Tomas</firstname>
      <lastname>Kocak</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="sequel-2014-idp109840">
      <firstname>Julien</firstname>
      <lastname>Perolat</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Lille I, from Oct 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp111080">
      <firstname>Olivier</firstname>
      <lastname>Pietquin</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>IUF and Univ. Lille I</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2014-idp112520">
      <firstname>Amir</firstname>
      <lastname>Sani</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Région Nord-Pas-de-Calais and Inria</moreinfo>
    </person>
    <person key="sequel-2014-idp113800">
      <firstname>Marta</firstname>
      <lastname>Soare</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Région Nord-Pas-de-Calais and EU FP7 Complacs</moreinfo>
    </person>
    <person key="sequel-2014-idp115104">
      <firstname>Olivier</firstname>
      <lastname>Nicol</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Lille I and III, until Dec 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp116352">
      <firstname>Victor</firstname>
      <lastname>Gabillon</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>MENRT, Univ. Lille I, until June 2014</moreinfo>
    </person>
    <person key="sequel-2014-idp117600">
      <firstname>Vincenzo</firstname>
      <lastname>Musco</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Lille I and III</moreinfo>
    </person>
    <person key="sequel-2014-idp118832">
      <firstname>Jennifer</firstname>
      <lastname>Healey</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Intel</moreinfo>
    </person>
    <person key="sequel-2014-idp120064">
      <firstname>Julien</firstname>
      <lastname>Audiffren</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Univ. Marseille</moreinfo>
    </person>
    <person key="sequel-2014-idp121328">
      <firstname>Hong-Phuong</firstname>
      <lastname>Dang</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Ecole Centrale Lille</moreinfo>
    </person>
    <person key="sequel-2014-idp122560">
      <firstname>Clément</firstname>
      <lastname>Elvira</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>ANR BNPSI no ANR-13-BS-03-0006-01</moreinfo>
    </person>
  </team>
  <presentation id="uid2">
    <bodyTitle>Overall Objectives</bodyTitle>
    <subsection id="uid3" level="1">
      <bodyTitle>Presentation</bodyTitle>
      <p><span class="smallcap" align="left">SequeL</span> means “Sequential Learning”. As such, <span class="smallcap" align="left">SequeL</span> focuses on the task of learning in artificial systems (either hardware, or software) that gather information along time. Such systems are named <i>(learning) agents</i> (or learning machines) in the following.
These data may be used to estimate some parameters of a model, which in turn, may be used for selecting actions in order to perform some long-term optimization task.</p>
      <p>For the purpose of model building, the agent needs to represent information collected so far in some compact form and use it to process newly available data.</p>
      <p>The acquired data may result from an observation process of an agent in interaction with its environment (the data thus represent a perception). This is the case when the agent makes decisions (in order to attain a certain objective) that impact the environment, and thus the observation process itself.</p>
      <p>Hence, in <span class="smallcap" align="left">SequeL</span>, the term <b>sequential</b> refers to two aspects:</p>
      <simplelist>
        <li id="uid4">
          <p noindent="true">The <b>sequential acquisition of data</b>, from which a model is learned (supervised and non supervised learning),</p>
        </li>
        <li id="uid5">
          <p noindent="true">the <b>sequential decision making task</b>, based on the learned model (reinforcement learning).</p>
        </li>
      </simplelist>
      <p>Examples of sequential learning problems include:</p>
      <descriptionlist>
        <label>Supervised learning</label>
        <li id="uid6">
          <p noindent="true">tasks deal with the prediction of some response given a certain set of observations of input variables and responses. New sample points keep on being observed.</p>
        </li>
        <label>Unsupervised learning</label>
        <li id="uid7">
          <p noindent="true">tasks deal with clustering objects, these latter making a flow of objects. The (unknown) number of clusters typically evolves during time, as new objects are observed.</p>
        </li>
        <label>Reinforcement learning</label>
        <li id="uid8">
          <p noindent="true">tasks deal with the control (a policy) of some system which has to be optimized (see <ref xlink:href="#sequel-2014-bid0" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>). We do not assume the availability of a model of the system to be controlled.</p>
        </li>
      </descriptionlist>
      <p>In all these cases, we mostly assume that the process can be considered stationary for at least a certain amount of time, and slowly evolving.</p>
      <p>We wish to have any-time algorithms, that is, at any moment, a prediction may be required/an action may be selected making full use, and hopefully, the best use, of the experience already gathered by the learning agent.</p>
      <p>The perception of the environment by the learning agent (using its sensors) is generally neither the best one to make a prediction, nor to take a decision (we deal with Partially Observable Markov Decision Problem). So, the perception has to be mapped in some way to a better, and relevant, state (or input) space.</p>
      <p>Finally, an important issue of prediction regards its evaluation: how wrong may we be when we perform a prediction? For real systems to be controlled, this issue can not be simply left unanswered.</p>
      <p spacebefore="6.0pt">To sum-up, in <span class="smallcap" align="left">SequeL</span>, the main issues regard:</p>
      <simplelist>
        <li id="uid9">
          <p noindent="true">the learning of a model: we focus on models that map some
input space <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>ℝ</mi><mi>P</mi></msup></math></formula> to <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ℝ</mi></math></formula>,</p>
        </li>
        <li id="uid10">
          <p noindent="true">the observation to state mapping,</p>
        </li>
        <li id="uid11">
          <p noindent="true">the choice of the action to perform (in the case of sequential
decision problem),</p>
        </li>
        <li id="uid12">
          <p noindent="true">the performance guarantees,</p>
        </li>
        <li id="uid13">
          <p noindent="true">the implementation of usable algorithms,</p>
        </li>
      </simplelist>
      <p>all that being understood in a <i>sequential</i> framework.</p>
    </subsection>
  </presentation>
  <fondements id="uid14">
    <bodyTitle>Research Program</bodyTitle>
    <subsection id="uid15" level="1">
      <bodyTitle>In Short</bodyTitle>
      <p><span class="smallcap" align="left">SequeL</span> is primarily grounded on two domains:</p>
      <simplelist>
        <li id="uid16">
          <p noindent="true">the problem of decision under uncertainty,</p>
        </li>
        <li id="uid17">
          <p noindent="true">statistical analysis and statistical learning, which provide the general concepts and tools to solve this problem.</p>
        </li>
      </simplelist>
      <p>To help the reader who is unfamiliar with these questions, we briefly present key ideas below.</p>
    </subsection>
    <subsection id="uid18" level="1">
      <bodyTitle>Decision-making Under Uncertainty</bodyTitle>
      <p>The phrase “Decision under uncertainty” refers to the problem of taking decisions when we do not have a full knowledge neither of the situation, nor of the consequences of the decisions, as well as when the consequences of decision are non deterministic.</p>
      <p>We introduce two specific sub-domains, namely the Markov decision processes which models sequential decision problems, and bandit problems.</p>
      <subsection id="uid19" level="2">
        <bodyTitle>Reinforcement Learning</bodyTitle>
        <p>Sequential decision processes occupy the heart of the <span class="smallcap" align="left">SequeL</span> project; a detailed presentation of this problem may be found in Puterman's book <ref xlink:href="#sequel-2014-bid1" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        <p>A Markov Decision Process (MDP) is defined as the tuple <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo>(</mo><mi>𝒳</mi><mo>,</mo><mi>𝒜</mi><mo>,</mo><mi>P</mi><mo>,</mo><mi>r</mi><mo>)</mo></mrow></math></formula> where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒳</mi></math></formula> is the state space, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒜</mi></math></formula> is the action space, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>P</mi></math></formula> is the probabilistic transition kernel, and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>r</mi><mo>:</mo><mi>𝒳</mi><mo>×</mo><mi>𝒜</mi><mo>×</mo><mi>𝒳</mi><mo>→</mo><mi>I</mi><mspace width="-0.166667em"/><mspace width="-0.166667em"/><mi>R</mi></mrow></math></formula> is the reward function. For the sake of simplicity, we assume in this introduction that the state and action spaces are finite. If the current state (at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula>) is <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula> and the chosen action is <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>a</mi><mo>∈</mo><mi>𝒜</mi></mrow></math></formula>, then the Markov assumption means that the transition probability to a new state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>x</mi><mo>'</mo></msup><mo>∈</mo><mi>𝒳</mi></mrow></math></formula> (at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>t</mi><mo>+</mo><mn>1</mn></mrow></math></formula>) only depends on <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo>(</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>)</mo></mrow></math></formula>. We write <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>p</mi><mo>(</mo><msup><mi>x</mi><mo>'</mo></msup><mo>|</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>)</mo></mrow></math></formula> the corresponding transition probability. During a transition <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mrow><mo>(</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>)</mo></mrow><mo>→</mo><msup><mi>x</mi><mo>'</mo></msup></mrow></math></formula>, a reward <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>r</mi><mo>(</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>,</mo><msup><mi>x</mi><mo>'</mo></msup><mo>)</mo></mrow></math></formula> is incurred.</p>
        <p>In the MDP (<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>𝒳</mi><mo>,</mo><mi>𝒜</mi><mo>,</mo><mi>P</mi><mo>,</mo><mi>r</mi><mo>)</mo></mrow></math></formula>, each initial state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mn>0</mn></msub></math></formula> and action sequence <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>1</mn></msub><mo>,</mo><mo>...</mo></mrow></math></formula> gives rise to a sequence of states <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><msub><mi>x</mi><mn>2</mn></msub><mo>,</mo><mo>...</mo></mrow></math></formula>, satisfying <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>ℙ</mi><mfenced separators="" open="(" close=")"><msub><mi>x</mi><mrow><mi>t</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>=</mo><msup><mi>x</mi><mo>'</mo></msup><mrow><mo>|</mo></mrow><msub><mi>x</mi><mi>t</mi></msub><mo>=</mo><mi>x</mi><mo>,</mo><msub><mi>a</mi><mi>t</mi></msub><mo>=</mo><mi>a</mi></mfenced><mo>=</mo><mi>p</mi><mrow><mo>(</mo><msup><mi>x</mi><mo>'</mo></msup><mo>|</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>)</mo></mrow><mo>,</mo></mrow></math></formula> and rewards <footnote id="uid20" id-text="1">Note that for simplicity, we considered the case of a deterministic reward function, but in many applications, the reward <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>r</mi><mi>t</mi></msub></math></formula> itself is a random variable.</footnote> <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>r</mi><mn>1</mn></msub><mo>,</mo><msub><mi>r</mi><mn>2</mn></msub><mo>,</mo><mo>...</mo></mrow></math></formula> defined by <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>r</mi><mi>t</mi></msub><mo>=</mo><mi>r</mi><mrow><mo>(</mo><msub><mi>x</mi><mi>t</mi></msub><mo>,</mo><msub><mi>a</mi><mi>t</mi></msub><mo>,</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>)</mo></mrow></mrow></math></formula>.</p>
        <p>The history of the process up to time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> is defined to be <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>H</mi><mi>t</mi></msub><mo>=</mo><mrow><mo>(</mo><msub><mi>x</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><mo>...</mo><mo>,</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>-</mo><mn>1</mn></mrow></msub><mo>,</mo><msub><mi>a</mi><mrow><mi>t</mi><mo>-</mo><mn>1</mn></mrow></msub><mo>,</mo><msub><mi>x</mi><mi>t</mi></msub><mo>)</mo></mrow></mrow></math></formula>. A policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> is a sequence of functions <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>π</mi><mn>0</mn></msub><mo>,</mo><msub><mi>π</mi><mn>1</mn></msub><mo>,</mo><mo>...</mo></mrow></math></formula>, where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>π</mi><mi>t</mi></msub></math></formula> maps the space of possible histories at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> to the space of probability distributions over the space of actions <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒜</mi></math></formula>. To follow a policy means that, in each time step, we assume that the process history up to time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> is <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><mo>...</mo><mo>,</mo><msub><mi>x</mi><mi>t</mi></msub></mrow></math></formula> and the probability of selecting an action <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>a</mi></math></formula> is equal to <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>π</mi><mi>t</mi></msub><mrow><mo>(</mo><msub><mi>x</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><mo>...</mo><mo>,</mo><msub><mi>x</mi><mi>t</mi></msub><mo>)</mo></mrow><mrow><mo>(</mo><mi>a</mi><mo>)</mo></mrow></mrow></math></formula>. A policy is called stationary (or Markovian) if <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>π</mi><mi>t</mi></msub></math></formula> depends only on the last visited state. In other words, a policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>π</mi><mo>=</mo><mo>(</mo><msub><mi>π</mi><mn>0</mn></msub><mo>,</mo><msub><mi>π</mi><mn>1</mn></msub><mo>,</mo><mo>...</mo><mo>)</mo></mrow></math></formula> is called stationary if <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>π</mi><mi>t</mi></msub><mrow><mo>(</mo><msub><mi>x</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><mo>...</mo><mo>,</mo><msub><mi>x</mi><mi>t</mi></msub><mo>)</mo></mrow><mo>=</mo><msub><mi>π</mi><mn>0</mn></msub><mrow><mo>(</mo><msub><mi>x</mi><mi>t</mi></msub><mo>)</mo></mrow></mrow></math></formula> holds for all <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>t</mi><mo>≥</mo><mn>0</mn></mrow></math></formula>. A policy is called deterministic if the probability distribution prescribed by the policy for any history is concentrated on a single action. Otherwise it is called a stochastic policy.</p>
        <p>We move from an MD process to an MD problem by formulating the goal of the agent, that is what the sought policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> has to optimize? It is very often formulated as maximizing (or minimizing), in expectation, some functional of the sequence of future rewards. For example, an usual functional is the infinite-time horizon sum of discounted rewards. For a given (stationary) policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>, we define the value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mi>π</mi></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula> of that policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> at a state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula> as the expected sum of discounted future rewards given that we state from the initial state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>x</mi></math></formula> and follow the policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>:</p>
        <formula id-text="1" id="uid21" textype="equation" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mrow>
              <msup>
                <mi>V</mi>
                <mi>π</mi>
              </msup>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>)</mo>
              </mrow>
              <mo>=</mo>
              <mi>𝔼</mi>
              <mfenced separators="" open="[" close="]">
                <munderover>
                  <mo>∑</mo>
                  <mrow>
                    <mi>t</mi>
                    <mo>=</mo>
                    <mn>0</mn>
                  </mrow>
                  <mi>∞</mi>
                </munderover>
                <msup>
                  <mi>γ</mi>
                  <mi>t</mi>
                </msup>
                <msub>
                  <mi>r</mi>
                  <mi>t</mi>
                </msub>
                <mo>|</mo>
                <msub>
                  <mi>x</mi>
                  <mn>0</mn>
                </msub>
                <mo>=</mo>
                <mi>x</mi>
                <mo>,</mo>
                <mi>π</mi>
              </mfenced>
              <mo>,</mo>
            </mrow>
          </math>
        </formula>
        <p>where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝔼</mi></math></formula> is the expectation operator and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>γ</mi><mo>∈</mo><mo>(</mo><mn>0</mn><mo>,</mo><mn>1</mn><mo>)</mo></mrow></math></formula> is the discount factor. This value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mi>π</mi></msup></math></formula> gives an evaluation of the performance of a given policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>. Other functionals of the sequence of future rewards may be considered, such as the undiscounted reward (see the stochastic shortest path problems <ref xlink:href="#sequel-2014-bid2" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) and average reward settings. Note also that, here, we considered the problem of maximizing a reward functional, but a formulation in terms of minimizing some cost or risk functional would be equivalent.</p>
        <p>In order to maximize a given functional in a sequential framework, one usually applies Dynamic Programming (DP)  <ref xlink:href="#sequel-2014-bid3" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, which introduces the optimal value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mo>*</mo></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula>, defined as the optimal expected sum of rewards when the agent starts from a state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>x</mi></math></formula>. We have <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mo>*</mo></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow><mo>=</mo><msub><mo movablelimits="true" form="prefix">sup</mo><mi>π</mi></msub><msup><mi>V</mi><mi>π</mi></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula>. Now, let us give two definitions about policies:</p>
        <simplelist>
          <li id="uid22">
            <p noindent="true">We say that a policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> is optimal, if it attains the optimal values <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mo>*</mo></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula> for any state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula>, <i>i.e.</i>, if <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mi>π</mi></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow><mo>=</mo><msup><mi>V</mi><mo>*</mo></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula> for all <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula>. Under mild conditions, deterministic stationary optimal policies exist <ref xlink:href="#sequel-2014-bid4" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. Such an optimal policy is written <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>π</mi><mo>*</mo></msup></math></formula>.</p>
          </li>
          <li id="uid23">
            <p noindent="true">We say that a (deterministic stationary) policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> is greedy with respect to (w.r.t.) some function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula> (defined on <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒳</mi></math></formula>) if, for all <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula>,</p>
            <formula type="display">
              <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
                <mrow>
                  <mi>π</mi>
                  <mrow>
                    <mo>(</mo>
                    <mi>x</mi>
                    <mo>)</mo>
                  </mrow>
                  <mo>∈</mo>
                  <mo form="prefix">arg</mo>
                  <munder>
                    <mo movablelimits="true" form="prefix">max</mo>
                    <mrow>
                      <mi>a</mi>
                      <mo>∈</mo>
                      <mi>𝒜</mi>
                    </mrow>
                  </munder>
                  <munder>
                    <mo>∑</mo>
                    <mrow>
                      <msup>
                        <mi>x</mi>
                        <mo>'</mo>
                      </msup>
                      <mo>∈</mo>
                      <mi>𝒳</mi>
                    </mrow>
                  </munder>
                  <mi>p</mi>
                  <mrow>
                    <mo>(</mo>
                    <msup>
                      <mi>x</mi>
                      <mo>'</mo>
                    </msup>
                    <mo>|</mo>
                    <mi>x</mi>
                    <mo>,</mo>
                    <mi>a</mi>
                    <mo>)</mo>
                  </mrow>
                  <mfenced separators="" open="[" close="]">
                    <mi>r</mi>
                    <mrow>
                      <mo>(</mo>
                      <mi>x</mi>
                      <mo>,</mo>
                      <mi>a</mi>
                      <mo>,</mo>
                      <msup>
                        <mi>x</mi>
                        <mo>'</mo>
                      </msup>
                      <mo>)</mo>
                    </mrow>
                    <mo>+</mo>
                    <mi>γ</mi>
                    <mi>V</mi>
                    <mrow>
                      <mo>(</mo>
                      <msup>
                        <mi>x</mi>
                        <mo>'</mo>
                      </msup>
                      <mo>)</mo>
                    </mrow>
                  </mfenced>
                  <mo>.</mo>
                </mrow>
              </math>
            </formula>
            <p> </p>
            <p noindent="true">where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo form="prefix">arg</mo><msub><mo movablelimits="true" form="prefix">max</mo><mrow><mi>a</mi><mo>∈</mo><mi>𝒜</mi></mrow></msub><mi>f</mi><mrow><mo>(</mo><mi>a</mi><mo>)</mo></mrow></mrow></math></formula> is the set of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>a</mi><mo>∈</mo><mi>𝒜</mi></mrow></math></formula> that maximizes <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>f</mi><mo>(</mo><mi>a</mi><mo>)</mo></mrow></math></formula>. For any function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula>, such a greedy policy always exists because <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒜</mi></math></formula> is finite.</p>
          </li>
        </simplelist>
        <p>The goal of Reinforcement Learning (RL), as well as that of dynamic programming, is to design an optimal policy (or a good approximation of it).</p>
        <p spacebefore="6.0pt">The well-known Dynamic Programming equation (also called the Bellman equation) provides a relation between the optimal value function at a state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>x</mi></math></formula> and the optimal value function at the successors states <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>x</mi><mo>'</mo></msup></math></formula> when choosing an optimal action: for all <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula>,</p>
        <formula id-text="2" id="uid24" textype="equation" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mrow>
              <msup>
                <mi>V</mi>
                <mo>*</mo>
              </msup>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>)</mo>
              </mrow>
              <mo>=</mo>
              <munder>
                <mo movablelimits="true" form="prefix">max</mo>
                <mrow>
                  <mi>a</mi>
                  <mo>∈</mo>
                  <mi>𝒜</mi>
                </mrow>
              </munder>
              <munder>
                <mo>∑</mo>
                <mrow>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>∈</mo>
                  <mi>𝒳</mi>
                </mrow>
              </munder>
              <mi>p</mi>
              <mrow>
                <mo>(</mo>
                <msup>
                  <mi>x</mi>
                  <mo>'</mo>
                </msup>
                <mo>|</mo>
                <mi>x</mi>
                <mo>,</mo>
                <mi>a</mi>
                <mo>)</mo>
              </mrow>
              <mfenced separators="" open="[" close="]">
                <mi>r</mi>
                <mrow>
                  <mo>(</mo>
                  <mi>x</mi>
                  <mo>,</mo>
                  <mi>a</mi>
                  <mo>,</mo>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>)</mo>
                </mrow>
                <mo>+</mo>
                <mi>γ</mi>
                <msup>
                  <mi>V</mi>
                  <mo>*</mo>
                </msup>
                <mrow>
                  <mo>(</mo>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>)</mo>
                </mrow>
              </mfenced>
              <mo>.</mo>
            </mrow>
          </math>
        </formula>
        <p>The benefit of introducing this concept of optimal value function relies on the property that, from the optimal value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mo>*</mo></msup></math></formula>, it is easy to derive an optimal behavior by choosing the actions according to a policy greedy w.r.t. <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mo>*</mo></msup></math></formula>. Indeed, we have the property that a policy greedy w.r.t. the optimal value function is an optimal policy:</p>
        <formula id-text="3" id="uid25" textype="equation" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mrow>
              <msup>
                <mi>π</mi>
                <mo>*</mo>
              </msup>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>)</mo>
              </mrow>
              <mo>∈</mo>
              <mo form="prefix">arg</mo>
              <munder>
                <mo movablelimits="true" form="prefix">max</mo>
                <mrow>
                  <mi>a</mi>
                  <mo>∈</mo>
                  <mi>𝒜</mi>
                </mrow>
              </munder>
              <munder>
                <mo>∑</mo>
                <mrow>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>∈</mo>
                  <mi>𝒳</mi>
                </mrow>
              </munder>
              <mi>p</mi>
              <mrow>
                <mo>(</mo>
                <msup>
                  <mi>x</mi>
                  <mo>'</mo>
                </msup>
                <mo>|</mo>
                <mi>x</mi>
                <mo>,</mo>
                <mi>a</mi>
                <mo>)</mo>
              </mrow>
              <mfenced separators="" open="[" close="]">
                <mi>r</mi>
                <mrow>
                  <mo>(</mo>
                  <mi>x</mi>
                  <mo>,</mo>
                  <mi>a</mi>
                  <mo>,</mo>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>)</mo>
                </mrow>
                <mo>+</mo>
                <mi>γ</mi>
                <msup>
                  <mi>V</mi>
                  <mo>*</mo>
                </msup>
                <mrow>
                  <mo>(</mo>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>)</mo>
                </mrow>
              </mfenced>
              <mo>.</mo>
            </mrow>
          </math>
        </formula>
        <p>In short, we would like to mention that most of the reinforcement learning methods developed so far are built on one (or both) of the two following approaches ( <ref xlink:href="#sequel-2014-bid5" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>):</p>
        <simplelist>
          <li id="uid26">
            <p noindent="true">Bellman's dynamic programming approach, based on the introduction of the value function. It consists in learning a “good” approximation of the optimal value function, and then using it to derive a greedy policy w.r.t. this approximation. The hope (well justified in several cases) is that the performance <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mi>π</mi></msup></math></formula> of the policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> greedy w.r.t. an approximation <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula> of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mo>*</mo></msup></math></formula> will be close to optimality. This approximation issue of the optimal value function is one of the major challenges inherent to the reinforcement learning problem. <b>Approximate dynamic programming</b> addresses the problem of estimating performance bounds (<i>e.g.</i> the loss in performance <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mrow><mo>|</mo><mo>|</mo></mrow><msup><mi>V</mi><mo>*</mo></msup><mo>-</mo><msup><mi>V</mi><mi>π</mi></msup><mrow><mo>|</mo><mo>|</mo></mrow></mrow></math></formula> resulting from using a policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>-greedy w.r.t. some approximation <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula>- instead of an optimal policy) in terms of the approximation error <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mrow><mo>|</mo><mo>|</mo></mrow><msup><mi>V</mi><mo>*</mo></msup><mo>-</mo><mi>V</mi><mrow><mo>|</mo><mo>|</mo></mrow></mrow></math></formula> of the optimal value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mo>*</mo></msup></math></formula> by <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula>. Approximation theory and Statistical Learning theory provide us with bounds in terms of the number of sample data used to represent the
functions, and the capacity and approximation power of the considered function spaces.</p>
          </li>
          <li id="uid27">
            <p noindent="true">Pontryagin's maximum principle approach, based on sensitivity analysis of the performance measure w.r.t. some control parameters. This approach, also called <b>direct policy search</b> in the Reinforcement Learning community aims at directly finding a good feedback control law in a parameterized policy space without trying to approximate the value function. The method consists in estimating the so-called <b>policy gradient</b>, <i>i.e.</i> the sensitivity of the performance measure (the value function) w.r.t. some parameters of the current policy. The idea being that an optimal control problem is replaced by a parametric optimization problem in the space of parameterized policies. As such, deriving a policy gradient estimate would lead to performing a stochastic gradient method in order to search for a local optimal parametric policy.</p>
          </li>
        </simplelist>
        <p>Finally, many extensions of the Markov decision processes exist, among which the Partially Observable MDPs (POMDPs) is the case where the current state does not contain all the necessary information required to decide for sure of the best action.</p>
      </subsection>
      <subsection id="uid28" level="2">
        <bodyTitle>Multi-arm Bandit Theory</bodyTitle>
        <p>Bandit problems illustrate the fundamental difficulty of decision making in the face of uncertainty: A decision maker must choose between what seems to be the best choice (“exploit”), or to test (“explore”) some alternative, hoping to discover a choice that beats the current best choice.</p>
        <p>The classical example of a bandit problem is deciding what treatment to give each patient in a clinical trial when the effectiveness of the treatments are initially unknown and the patients arrive sequentially. These bandit problems became popular with the seminal paper <ref xlink:href="#sequel-2014-bid6" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, after which they have found applications in diverse fields, such as control, economics, statistics, or learning theory.</p>
        <p>Formally, a K-armed bandit problem (<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>K</mi><mo>≥</mo><mn>2</mn></mrow></math></formula>) is specified by K real-valued distributions. In each time step a decision maker can select one of the distributions to obtain a sample from it. The samples obtained are considered as rewards. The distributions are initially unknown to the decision maker, whose goal is to maximize the sum of the rewards received, or equivalently, to minimize the regret which is defined as the loss compared to the total payoff that can be achieved given full knowledge of the problem, <i>i.e.</i>, when the arm giving the highest expected reward is pulled all the time.</p>
        <p>The name “bandit” comes from imagining a gambler playing with K slot machines. The gambler can pull the arm of any of the machines, which produces a random payoff as a result: When arm k is pulled, the random payoff is drawn from the distribution associated to k. Since the payoff distributions are initially unknown, the gambler must use exploratory actions to learn the utility of the individual arms. However, exploration has to be carefully controlled since excessive exploration may lead to unnecessary losses. Hence, to play well, the gambler must carefully balance exploration and exploitation. Auer <i>et al.</i> <ref xlink:href="#sequel-2014-bid7" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> introduced the algorithm UCB (Upper Confidence Bounds) that follows what is now called the “optimism in the face of uncertainty principle”. Their algorithm works by computing upper confidence bounds for all the arms and then choosing the arm with the highest such bound. They proved that the expected regret of their algorithm increases at most at a logarithmic rate
with the number of trials, and that the algorithm achieves the smallest possible regret up to some sub-logarithmic factor (for the considered family of distributions).</p>
      </subsection>
    </subsection>
    <subsection id="uid29" level="1">
      <bodyTitle>Statistical analysis of time series</bodyTitle>
      <p>Many of the problems of machine learning can be seen as extensions of classical problems of mathematical statistics to their (extremely) non-parametric and model-free cases. Other machine learning problems are founded on such statistical problems. Statistical problems of sequential learning are mainly those that are concerned with the analysis of time series. These problems are as follows.</p>
      <subsection id="uid30" level="2">
        <bodyTitle>Prediction of Sequences of Structured and Unstructured Data</bodyTitle>
        <p>Given a series of observations <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>⋯</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub></mrow></math></formula> it is required to give forecasts concerning the distribution of the future observations <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mrow><mi>n</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>,</mo><msub><mi>x</mi><mrow><mi>n</mi><mo>+</mo><mn>2</mn></mrow></msub><mo>,</mo><mo>⋯</mo></mrow></math></formula>; in the simplest case, that of the next outcome <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mrow><mi>n</mi><mo>+</mo><mn>1</mn></mrow></msub></math></formula>.
Then <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mrow><mi>n</mi><mo>+</mo><mn>1</mn></mrow></msub></math></formula> is revealed and the process continues. Different goals can be formulated in this setting. One can either make some assumptions on the probability
measure that generates the sequence <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>⋯</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub><mo>,</mo><mo>⋯</mo></mrow></math></formula>, such as that the outcomes are independent and identically distributed (i.i.d.),
or that the sequence is a Markov chain, that it is a stationary process, etc.
More generally, one can assume that the data is generated by a probability measure that belongs to a certain set <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒞</mi></math></formula>.
In these cases the goal is to have the discrepancy between the predicted and the “true” probabilities to go to zero, if possible, with guarantees
on the speed of convergence.</p>
        <p>Alternatively, rather than making some assumptions on the data, one can change the goal: the predicted probabilities should be asymptotically as good as those given by the best reference predictor from a certain pre-defined set.</p>
        <p>Another dimension of complexity in this problem concerns the nature of observations <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mi>i</mi></msub></math></formula>. In the simplest case,
they come from a finite space, but already basic applications often require real-valued observations. Moreover,
function or even graph-valued observations often arise in practice, in particular in applications concerning Web data.
In these settings estimating even simple characteristics of probability distributions of the future outcomes becomes
non-trivial, and new learning algorithms for solving these problems are in order.</p>
      </subsection>
      <subsection id="uid31" level="2">
        <bodyTitle>Hypothesis testing</bodyTitle>
        <p>Given a series of observations of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>⋯</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub><mo>,</mo><mo>⋯</mo></mrow></math></formula> generated by some unknown probability measure <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula>, the problem is to test a certain given hypothesis <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>0</mn></msub></math></formula> about <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula>, versus a given alternative hypothesis <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>1</mn></msub></math></formula>. There are many different examples of this problem. Perhaps the simplest one is testing a simple hypothesis “<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula> is Bernoulli i.i.d. measure with probability of 0 equals 1/2” versus “<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula> is Bernoulli i.i.d. with the parameter different from 1/2”. More
interesting cases include the problems of model verification: for example, testing that <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula> is a Markov chain, versus that it is a stationary ergodic process but not a Markov chain. In the case when we have not one but several series of observations, we may wish to test the hypothesis that they are independent, or that they are generated by the same distribution. Applications of these problems to a more general class of machine learning tasks include the problem of feature selection, the problem of testing that a certain behaviour (such as pulling a certain arm of a bandit, or using a certain policy) is better (in terms of achieving some goal, or collecting some rewards) than another behaviour, or than a class of other behaviours.</p>
        <p>The problem of hypothesis testing can also be studied in its general formulations: given two (abstract) hypothesis <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>0</mn></msub></math></formula> and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>1</mn></msub></math></formula> about the unknown measure that generates the data, find out whether it is possible to test <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>0</mn></msub></math></formula> against <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>1</mn></msub></math></formula> (with confidence), and if yes then how can one do it.</p>
      </subsection>
      <subsection id="uid32" level="2">
        <bodyTitle>Change Point Analysis</bodyTitle>
        <p>A stochastic process is generating the data. At some point, the process distribution changes.
In the “offline” situation, the statistician observes the resulting sequence of outcomes and has
to estimate the point or the points at which the change(s) occurred. In online setting, the goal is to
detect the change as quickly as possible.</p>
        <p>These are the classical problems in mathematical statistics, and probably among the last remaining statistical problems
not adequately addressed by machine learning methods. The reason for the latter is perhaps in that the problem is rather
challenging. Thus, most methods available so far are parametric methods concerning piece-wise constant distributions, and the
change in distribution is associated with the change in the mean. However, many applications, including DNA analysis,
the analysis of (user) behaviour data, etc., fail to comply with this kind of assumptions. Thus, our goal here is to provide completely non-parametric
methods allowing for any kind of changes in the time-series distribution.</p>
      </subsection>
      <subsection id="uid33" level="2">
        <bodyTitle>Clustering Time Series, Online and Offline</bodyTitle>
        <p>The problem of clustering, while being a classical problem of mathematical statistics, belongs to the realm of unsupervised learning. For time series, this problem can be formulated as follows: given several samples <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>x</mi><mn>1</mn></msup><mo>=</mo><mrow><mo>(</mo><msubsup><mi>x</mi><mn>1</mn><mn>1</mn></msubsup><mo>,</mo><mo>⋯</mo><mo>,</mo><msubsup><mi>x</mi><msub><mi>n</mi><mn>1</mn></msub><mn>1</mn></msubsup><mo>)</mo></mrow><mo>,</mo><mo>⋯</mo><mo>,</mo><msup><mi>x</mi><mi>N</mi></msup><mo>=</mo><mrow><mo>(</mo><msubsup><mi>x</mi><mi>N</mi><mn>1</mn></msubsup><mo>,</mo><mo>⋯</mo><mo>,</mo><msubsup><mi>x</mi><msub><mi>n</mi><mi>N</mi></msub><mi>N</mi></msubsup><mo>)</mo></mrow></mrow></math></formula>, we wish to group similar objects together. While this is of course not a precise formulation, it can be made precise if we assume that the samples were generated by <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>k</mi></math></formula> different distributions.</p>
        <p>The online version of the problem allows for the number of observed time series to grow with time, in general, in an arbitrary manner.</p>
      </subsection>
      <subsection id="uid34" level="2">
        <bodyTitle>Online Semi-Supervised Learning</bodyTitle>
        <p>Semi-supervised learning (SSL) is a field of machine learning that studies
learning from both labeled and unlabeled examples. This learning
paradigm is extremely useful for solving real-world problems, where
data is often abundant but the resources to label them are limited.</p>
        <p>Furthermore, <i>online</i> SSL is suitable for adaptive machine learning
systems.
In the classification case, learning is viewed as a repeated game against a
potentially adversarial nature. At each step <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> of this game, we observe an
example <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>𝐱</mi><mi>𝐭</mi></msub></math></formula>, and then predict its label <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mover accent="true"><mi>y</mi><mo>^</mo></mover><mi>t</mi></msub></math></formula>.</p>
        <p>The challenge of the game is that we only exceptionally observe the true label
<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>y</mi><mi>t</mi></msub></math></formula>. In the extreme case, which we also study, only a handful of labeled
examples are provided in advance and set the initial bias of the system while
unlabeled examples are gathered online and update the bias continuously.
Thus, if we want to adapt to changes in the environment, we have to rely on
indirect forms of feedback, such as the structure of data.</p>
      </subsection>
    </subsection>
    <subsection id="uid35" level="1">
      <bodyTitle>Statistical Learning and Bayesian Analysis</bodyTitle>
      <p>Before detailing some issues in these fields, let us remind the definition of a few terms.</p>
      <glosslist>
        <label>Machine learning</label>
        <li>
          <p>refers to a system capable of the autonomous acquisition and integration of knowledge. This capacity to learn from experience, analytical observation, and other means, results in a system that can continuously self-improve and thereby offer increased efficiency and effectiveness.</p>
        </li>
        <label>Statistical learning</label>
        <li>
          <p>is an approach to machine intelligence that is based on statistical modeling of data. With a statistical model in hand, one applies probability theory and decision theory to get an algorithm. This is opposed to using training data merely to select among different algorithms or using heuristics/“common sense” to design an algorithm.</p>
        </li>
        <label>Bayesian Analysis</label>
        <li>
          <p>applies to data that could be seen as observations in the more general meaning of the term. These data may not only come from classical sensors but also from any <i>device</i> recording information. From an operational point of view, like for statistical learning, uncertainty about the data is modeled by a probability measure thus defining the so-called likelihood functions. This last one depends upon parameters defining the state of the world we focus on for decision purposes. Within the Bayesian framework the uncertainty about these parameters is also modeled by probability measures, the priors that are subjective probabilities. Using probability theory and decision theory, one then defines new algorithms to estimate the parameters of interest and/or associated decisions. According to the International Society for Bayesian Analysis (source: <ref xlink:href="http://bayesian.org" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>bayesian.<allowbreak/>org</ref>), and from a more general point of view, this overall process could be summarize as follows: one assesses the
current state of knowledge regarding the issue of interest, gather new data to address remaining questions, and then update and refine their understanding to incorporate both new and old data. Bayesian inference provides a logical, quantitative framework for this process based on probability theory.</p>
        </li>
        <label>Kernel method.</label>
        <li>
          <p>Generally speaking, a kernel function is a function that maps a couple of points to a real value. Typically, this value is a measure of dissimilarity between the two points. Assuming a few properties on it, the kernel function implicitly defines a dot product in some function space. This very nice formal property as well as a bunch of others have ensured a strong appeal for these methods in the last 10 years in the field of function approximation. Many classical algorithms have been “kernelized”, that is, restated in a much more general way than their original formulation. Kernels also implicitly induce the representation of data in a certain “suitable” space where the problem to solve (classification, regression, ...) is expected to be simpler (non-linearity turns to linearity).</p>
        </li>
      </glosslist>
      <p>The fundamental tools used in <span class="smallcap" align="left">SequeL</span> come from the field of statistical learning <ref xlink:href="#sequel-2014-bid8" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. We briefly present the most important for us to date, namely, kernel-based non parametric function approximation, and non parametric Bayesian models.</p>
      <subsection id="uid36" level="2">
        <bodyTitle>Non-parametric methods for Function Approximation</bodyTitle>
        <p>In statistics in general, and applied mathematics, the approximation of a multi-dimensional real function given some samples is a well-known problem (known as either regression, or interpolation, or function approximation, ...). Regressing a function from data is a key ingredient of our research, or to the least, a basic component of most of our algorithms. In the context of sequential learning, we have to regress a function while data samples are being obtained one at a time, while keeping the constraint to be able to predict points at any step along the acquisition process. In sequential decision problems, we typically have to learn a value function, or a policy.</p>
        <p>Many methods have been proposed for this purpose. We are looking for suitable ones to cope with the problems we wish to solve. In reinforcement learning, the value function may have areas where the gradient is large; these are areas where the approximation is difficult, while these are also the areas where the accuracy of the approximation should be maximal to obtain a good policy (and where, otherwise, a bad choice of action may imply catastrophic consequences).</p>
        <p>We particularly favor non parametric methods since they make quite a few assumptions about the function to learn. In particular, we have strong interests in <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>l</mi><mn>1</mn></msub></math></formula>-regularization, and the (kernelized-)LARS algorithm. <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>l</mi><mn>1</mn></msub></math></formula>-regularization yields sparse solutions, and the LARS approach produces the whole regularization path very efficiently, which helps solving the regularization parameter tuning problem.</p>
      </subsection>
      <subsection id="uid37" level="2">
        <bodyTitle>Nonparametric Bayesian Estimation</bodyTitle>
        <p>Numerous problems may be solved efficiently by a Bayesian approach. The use of Monte-Carlo methods allows us to handle non–linear, as well as non–Gaussian, problems. In their standard form, they require the formulation of probability densities in a parametric form. For instance, it is a common usage to use Gaussian likelihood, because it is handy. However, in some applications such as Bayesian filtering, or blind deconvolution, the choice of a parametric form of the density of the noise is often arbitrary. If this choice is wrong, it may also have dramatic consequences on the estimation quality. To overcome this shortcoming, one possible approach is to consider that this density must also be estimated from data. A general Bayesian approach then consists in defining a probabilistic space associated with the possible outcomes of the <i>object</i> to be estimated. Applied to density estimation, it means that we need to define a probability measure on the probability density of the noise: such a measure is
called a <i>random measure</i>. The classical Bayesian inference procedures can then been used. This approach being by nature non parametric, the associated frame is called <i>Non Parametric Bayesian</i>.</p>
        <p>In particular, mixtures of Dirichlet processes <ref xlink:href="#sequel-2014-bid9" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> provide a very powerful formalism. Dirichlet Processes are a possible random measure and Mixtures of Dirichlet Processes are an extension of well-known finite mixture models. Given a mixture density <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>f</mi><mo>(</mo><mi>x</mi><mo>|</mo><mi>θ</mi><mo>)</mo></mrow></math></formula>, and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>G</mi><mrow><mo>(</mo><mi>d</mi><mi>θ</mi><mo>)</mo></mrow><mo>=</mo><msubsup><mo>∑</mo><mrow><mi>k</mi><mo>=</mo><mn>1</mn></mrow><mi>∞</mi></msubsup><msub><mi>ω</mi><mi>k</mi></msub><msub><mi>δ</mi><msub><mi>U</mi><mi>k</mi></msub></msub><mrow><mo>(</mo><mi>d</mi><mi>θ</mi><mo>)</mo></mrow></mrow></math></formula>, a Dirichlet process, we define a mixture of Dirichlet processes as:</p>
        <formula id-text="4" id="uid38" textype="equation" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mrow>
              <mi>F</mi>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>)</mo>
              </mrow>
              <mo>=</mo>
              <msub>
                <mo>∫</mo>
                <mi>Θ</mi>
              </msub>
              <mi>f</mi>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>|</mo>
                <mi>θ</mi>
                <mo>)</mo>
              </mrow>
              <mi>G</mi>
              <mrow>
                <mo>(</mo>
                <mi>d</mi>
                <mi>θ</mi>
                <mo>)</mo>
              </mrow>
              <mo>=</mo>
              <munderover>
                <mo>∑</mo>
                <mrow>
                  <mi>k</mi>
                  <mo>=</mo>
                  <mn>1</mn>
                </mrow>
                <mi>∞</mi>
              </munderover>
              <msub>
                <mi>ω</mi>
                <mi>k</mi>
              </msub>
              <mi>f</mi>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>|</mo>
                <msub>
                  <mi>U</mi>
                  <mi>k</mi>
                </msub>
                <mo>)</mo>
              </mrow>
            </mrow>
          </math>
        </formula>
        <p noindent="true">where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>F</mi><mo>(</mo><mi>x</mi><mo>)</mo></mrow></math></formula> is the density to be estimated. The class of densities that may be written as a mixture of Dirichlet processes is very wide, so that they really fit a very large number of applications.</p>
        <p>Given a set of observations, the estimation of the parameters of a mixture of Dirichlet processes is performed by way of a Monte Carlo Markov Chain (MCMC) algorithm. Dirichlet Process Mixture are also widely used in clustering problems. Once the parameters of a mixture are estimated, they can be interpreted as the parameters of a specific cluster defining a class as well. Dirichlet processes are well known within the machine learning community and their potential in statistical signal processing still need to be developed.</p>
      </subsection>
      <subsection id="uid39" level="2">
        <bodyTitle>Random Finite Sets for multisensor multitarget tracking</bodyTitle>
        <p>In the general multi-sensor multi-target Bayesian framework, an unknown (and possibly varying) number of targets whose states <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><msub><mi>x</mi><mi>n</mi></msub></mrow></math></formula> are observed by several sensors which produce a collection of measurements <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>z</mi><mn>1</mn></msub><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><msub><mi>z</mi><mi>m</mi></msub></mrow></math></formula> at every time step <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>k</mi></math></formula>. Well-known models to this problem are track-based models, such as the joint probability data association (JPDA), or joint multi-target probabilities, such as the joint multi-target probability density. Common difficulties in multi-target tracking arise from the fact that the system state and the collection of measures from sensors are unordered and their size evolve randomly through time. Vector-based algorithms must therefore account for state coordinates exchanges and missing data within an unknown time interval. Although this approach is very popular and has resulted in many algorithms in the past, it may not be the optimal way to tackle the problem, since the sate and the data are in fact <i>sets</i> and not vectors.</p>
        <p>The random finite set theory provides a powerful framework to deal with these issues. Mahler's work on finite sets statistics (FISST) provides a mathematical framework to build multi-object densities and derive the Bayesian rules for state prediction and state estimation. Randomness on object number and their states are encapsulated into random finite sets (RFS), namely multi-target(state) sets <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>X</mi><mo>=</mo><mo>{</mo><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub><mo>}</mo></mrow></math></formula> and multi-sensor (measurement) set <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>Z</mi><mi>k</mi><mo>=</mo><mo>{</mo><msub><mi>z</mi><mn>1</mn></msub><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><msub><mi>z</mi><mi>m</mi></msub><mo>}</mo></mrow></math></formula>. The objective is then to propagate the multitarget probability density <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>f</mi><mrow><mi>k</mi><mo>|</mo><mi>k</mi></mrow></msub><mrow><mo>(</mo><mi>X</mi><mo>|</mo><mi>Z</mi><mrow><mo>(</mo><mi>k</mi><mo>)</mo></mrow><mo>)</mo></mrow></mrow></math></formula> by using the Bayesian set equations at every time step <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>k</mi></math></formula>:</p>
        <formula id-text="10" id="uid40" textype="multline" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mtable displaystyle="true">
              <mtr>
                <mtd columnalign="left">
                  <mrow>
                    <msub>
                      <mi>f</mi>
                      <mrow>
                        <mi>k</mi>
                        <mo>+</mo>
                        <mn>1</mn>
                        <mo>|</mo>
                        <mi>k</mi>
                      </mrow>
                    </msub>
                    <mrow>
                      <mo>(</mo>
                      <mi>X</mi>
                      <mo>|</mo>
                      <msup>
                        <mi>Z</mi>
                        <mrow>
                          <mo>(</mo>
                          <mi>k</mi>
                          <mo>)</mo>
                        </mrow>
                      </msup>
                      <mo>)</mo>
                    </mrow>
                    <mo>=</mo>
                    <mo>∫</mo>
                    <msub>
                      <mi>f</mi>
                      <mrow>
                        <mi>k</mi>
                        <mo>+</mo>
                        <mn>1</mn>
                        <mo>|</mo>
                        <mi>k</mi>
                      </mrow>
                    </msub>
                    <mrow>
                      <mo>(</mo>
                      <mi>X</mi>
                      <mo>|</mo>
                      <mi>W</mi>
                      <mo>)</mo>
                    </mrow>
                    <msub>
                      <mi>f</mi>
                      <mrow>
                        <mi>k</mi>
                        <mo>|</mo>
                        <mi>k</mi>
                      </mrow>
                    </msub>
                    <mrow>
                      <mo>(</mo>
                      <mi>W</mi>
                      <mo>|</mo>
                      <msup>
                        <mi>Z</mi>
                        <mrow>
                          <mo>(</mo>
                          <mi>k</mi>
                          <mo>)</mo>
                        </mrow>
                      </msup>
                      <mo>)</mo>
                    </mrow>
                    <mi>δ</mi>
                    <mi>W</mi>
                  </mrow>
                </mtd>
              </mtr>
              <mtr>
                <mtd columnalign="right">
                  <mrow>
                    <msub>
                      <mi>f</mi>
                      <mrow>
                        <mi>k</mi>
                        <mo>+</mo>
                        <mn>1</mn>
                        <mo>|</mo>
                        <mi>k</mi>
                        <mo>+</mo>
                        <mn>1</mn>
                      </mrow>
                    </msub>
                    <mrow>
                      <mo>(</mo>
                      <mi>X</mi>
                      <mo>|</mo>
                      <msup>
                        <mi>Z</mi>
                        <mrow>
                          <mo>(</mo>
                          <mi>k</mi>
                          <mo>+</mo>
                          <mn>1</mn>
                          <mo>)</mo>
                        </mrow>
                      </msup>
                      <mo>)</mo>
                    </mrow>
                    <mo>=</mo>
                    <mfrac>
                      <mrow>
                        <msub>
                          <mi>f</mi>
                          <mrow>
                            <mi>k</mi>
                            <mo>+</mo>
                            <mn>1</mn>
                          </mrow>
                        </msub>
                        <mrow>
                          <mo>(</mo>
                          <msub>
                            <mi>Z</mi>
                            <mrow>
                              <mi>k</mi>
                              <mo>+</mo>
                              <mn>1</mn>
                            </mrow>
                          </msub>
                          <mo>|</mo>
                          <mi>X</mi>
                          <mo>)</mo>
                        </mrow>
                        <msub>
                          <mi>f</mi>
                          <mrow>
                            <mi>k</mi>
                            <mo>+</mo>
                            <mn>1</mn>
                            <mo>|</mo>
                            <mi>k</mi>
                          </mrow>
                        </msub>
                        <mrow>
                          <mo>(</mo>
                          <mi>X</mi>
                          <mo>|</mo>
                          <msup>
                            <mi>Z</mi>
                            <mrow>
                              <mo>(</mo>
                              <mi>k</mi>
                              <mo>)</mo>
                            </mrow>
                          </msup>
                          <mo>)</mo>
                        </mrow>
                      </mrow>
                      <mrow>
                        <mo>∫</mo>
                        <msub>
                          <mi>f</mi>
                          <mrow>
                            <mi>k</mi>
                            <mo>+</mo>
                            <mn>1</mn>
                          </mrow>
                        </msub>
                        <mrow>
                          <mo>(</mo>
                          <msub>
                            <mi>Z</mi>
                            <mrow>
                              <mi>k</mi>
                              <mo>+</mo>
                              <mn>1</mn>
                            </mrow>
                          </msub>
                          <mo>|</mo>
                          <mi>W</mi>
                          <mo>)</mo>
                        </mrow>
                        <msub>
                          <mi>f</mi>
                          <mrow>
                            <mi>k</mi>
                            <mo>+</mo>
                            <mn>1</mn>
                            <mo>|</mo>
                            <mi>k</mi>
                          </mrow>
                        </msub>
                        <mrow>
                          <mo>(</mo>
                          <mi>W</mi>
                          <mo>|</mo>
                          <msup>
                            <mi>Z</mi>
                            <mrow>
                              <mo>(</mo>
                              <mi>k</mi>
                              <mo>)</mo>
                            </mrow>
                          </msup>
                          <mo>)</mo>
                        </mrow>
                        <mi>δ</mi>
                        <mi>W</mi>
                      </mrow>
                    </mfrac>
                  </mrow>
                </mtd>
              </mtr>
            </mtable>
          </math>
        </formula>
        <p noindent="true">where:</p>
        <simplelist>
          <li id="uid41">
            <p noindent="true"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>X</mi><mo>=</mo><mo>{</mo><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub><mo>}</mo></mrow></math></formula> is a multi-target state, <i>i.e.</i> a finite set of elements <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mi>i</mi></msub></math></formula> defined on the single-target space <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒳</mi></math></formula>; <footnote id="uid42" id-text="2">The state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mi>i</mi></msub></math></formula> of a target is usually composed of its position, its velocity, etc.</footnote></p>
          </li>
          <li id="uid43">
            <p noindent="true"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>Z</mi><mrow><mi>k</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>=</mo><mrow><mo>{</mo><msub><mi>z</mi><mn>1</mn></msub><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><msub><mi>z</mi><mi>m</mi></msub><mo>}</mo></mrow></mrow></math></formula> is the current multi-sensor observation, <i>i.e.</i> a collection of measures <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>z</mi><mi>i</mi></msub></math></formula> produced at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>k</mi><mo>+</mo><mn>1</mn></mrow></math></formula> by all the sensors;</p>
          </li>
          <li id="uid44">
            <p noindent="true"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>Z</mi><mrow><mo>(</mo><mi>k</mi><mo>)</mo></mrow></msup><mo>=</mo><msub><mo>⋃</mo><mrow><mi>t</mi><mo>⩽</mo><mi>k</mi></mrow></msub><msub><mi>Z</mi><mi>t</mi></msub></mrow></math></formula> is the collection of observations up to time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>k</mi></math></formula>;</p>
          </li>
          <li id="uid45">
            <p noindent="true"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>f</mi><mrow><mi>k</mi><mo>|</mo><mi>k</mi></mrow></msub><mrow><mo>(</mo><mi>W</mi><mo>|</mo><msup><mi>Z</mi><mrow><mo>(</mo><mi>k</mi><mo>)</mo></mrow></msup><mo>)</mo></mrow></mrow></math></formula> is the current multi-target posterior density in state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>W</mi></math></formula>;</p>
          </li>
          <li id="uid46">
            <p noindent="true"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>f</mi><mrow><mi>k</mi><mo>+</mo><mn>1</mn><mo>|</mo><mi>k</mi></mrow></msub><mrow><mo>(</mo><mi>X</mi><mo>|</mo><mi>W</mi><mo>)</mo></mrow></mrow></math></formula> is the current multi-target Markov transition density, from state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>W</mi></math></formula> to state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>X</mi></math></formula>;</p>
          </li>
          <li id="uid47">
            <p noindent="true"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>f</mi><mrow><mi>k</mi><mo>+</mo><mn>1</mn></mrow></msub><mrow><mo>(</mo><mi>Z</mi><mo>|</mo><mi>X</mi><mo>)</mo></mrow></mrow></math></formula> is the current multi-sensor/multi-target likelihood function.</p>
          </li>
        </simplelist>
        <p>Although equations (<ref xlink:href="#uid40" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) may seem similar to the classical single-sensor/single-target Bayesian equations, they are generally intractable because of the presence of the <i>set integrals</i>. For, a RFS <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>Ξ</mi></math></formula> is characterized by the family of its Janossy densities <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>j</mi><mrow><mi>Ξ</mi><mo>,</mo><mn>1</mn></mrow></msub><mrow><mo>(</mo><msub><mi>x</mi><mn>1</mn></msub><mo>)</mo></mrow></mrow></math></formula>, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>j</mi><mrow><mi>Ξ</mi><mo>,</mo><mn>2</mn></mrow></msub><mrow><mo>(</mo><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><msub><mi>x</mi><mn>2</mn></msub><mo>)</mo></mrow><mo>.</mo><mo>.</mo><mo>.</mo></mrow></math></formula> and not just by one density as it is the case with vectors. Mahler then introduced the PHD, defined on single-target state space. The PHD is the quantity whose integral on any region <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>S</mi></math></formula> is the expected number of targets inside <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>S</mi></math></formula>. Mahler proved that the PHD is the first-moment density of the multi-target probability density. Although defined on single-state space X, the PHD encapsulates information on both target number and states.</p>
      </subsection>
    </subsection>
  </fondements>
  <domaine id="uid48">
    <bodyTitle>Application Domains</bodyTitle>
    <subsection id="uid49" level="1">
      <bodyTitle>In Short</bodyTitle>
      <p><span class="smallcap" align="left">SequeL</span> aims at solving problems of prediction, as well as problems of optimal and adaptive control. As such, the application domains are very numerous.</p>
      <p>The application domains have been organized as follows:</p>
      <simplelist>
        <li id="uid50">
          <p noindent="true">adaptive control,</p>
        </li>
        <li id="uid51">
          <p noindent="true">signal processing and functional prediction,</p>
        </li>
        <li id="uid52">
          <p noindent="true">web mining,</p>
        </li>
        <li id="uid53">
          <p noindent="true">computer games.</p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid54" level="1">
      <bodyTitle>Adaptive Control</bodyTitle>
      <p>Adaptive control is an important application of the research being done in <span class="smallcap" align="left">SequeL</span>. Reinforcement learning (RL) precisely aims at controling the behavior of systems and may be used in situations with more or less information available. Of course, the more information, the better, in which case methods of (approximate) dynamic programming may be used <ref xlink:href="#sequel-2014-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. But, reinforcement learning may also handle situations where the dynamics of the system is unknown, situations where the system is partially observable, and non stationary situations. Indeed, in these cases, the behavior is learned by interacting with the environment and thus naturally adapts to the changes of the environment. Furthermore, the adaptive system may also take advantage of expert knowledge when available.</p>
      <p>Clearly, the spectrum of potential applications is very wide: as far as an agent (a human, a robot, a virtual agent) has to take a decision, in particular in cases where he lacks some information to take the decision, this enters the scope of our activities. To exemplify the potential applications, let us cite:</p>
      <simplelist>
        <li id="uid55">
          <p noindent="true">game software: in the 1990's, RL has been the basis of a very successful Backgammon program, TD-Gammon <ref xlink:href="#sequel-2014-bid11" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> that learned to play at an expert level by basically playing a very large amount of games against itself. Today, various games are studied with RL techniques.</p>
        </li>
        <li id="uid56">
          <p noindent="true">many optimization problems that are closely related to operation research, but taking into account the uncertainty, and the stochasticity of the environment: see the job-shop scheduling, or the cellular phone frequency allocation problems, resource allocation in general <ref xlink:href="#sequel-2014-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        </li>
        <li id="uid57">
          <p noindent="true">we can also foresee that some progress may be made by using RL to design adaptive conversational agents, or system-level as well as application-level operating systems that adapt to their users habits.</p>
          <p>More generally, these ideas fall into what adaptive control may bring to human beings, in making their life simpler, by being embedded in an environment that is made to help them, an idea phrased as “ambient intelligence”.</p>
        </li>
        <li id="uid58">
          <p noindent="true">The sensor management problem consists in determining the best way to task several sensors when each sensor has many modes and search patterns. In the detection/tracking applications, the tasks assigned to a sensor management system are for instance:</p>
          <simplelist>
            <li id="uid59">
              <p noindent="true">detect targets,</p>
            </li>
            <li id="uid60">
              <p noindent="true">track the targets in the case of a moving target and/or a smart target (a smart target can change its behavior when it detects that it is under analysis),</p>
            </li>
            <li id="uid61">
              <p noindent="true">combine all the detections in order to track each moving target,</p>
            </li>
            <li id="uid62">
              <p noindent="true">dynamically allocate the sensors in order to achieve the previous three tasks in an optimal way. The allocation of sensors, and their modes, thus defines the action space of the underlying Markov decision problem.</p>
            </li>
          </simplelist>
          <p>In the more general situation, some sensors may be localized at the same place while others are dispatched over a given volume. Tasking a sensor may include, at each moment, such choices as where to point and/or what mode to use. Tasking a group of sensors includes the tasking of each individual sensor but also the choice of collaborating sensors subgroups. Of course, the sensor management problem is related to an objective. In general, sensors must balance complex trade-offs between achieving mission goals such as detecting new targets, tracking existing targets, and identifying existing targets. The word “target” is used here in its most general meaning, and the potential applications are not restricted to military applications. Whatever the underlying application, the sensor management problem consists in choosing at each time an action within the set of available actions.</p>
        </li>
        <li id="uid63">
          <p noindent="true">sequential decision processes are also very well-known in economy. They may be used as a decision aid tool, to help in the design of social helps, or the implementation of plants (see <ref xlink:href="#sequel-2014-bid12" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, <ref xlink:href="#sequel-2014-bid13" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> for such applications).</p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid64" level="1">
      <bodyTitle>Signal Processing</bodyTitle>
      <p>Applications of sequential learning in the field of signal processing are also very numerous. A signal is naturally sequential as it flows. It usually comes from the recording of the output of sensors but the recording of any sequence of numbers may be considered as a signal like the stock-exchange rates evolution with respect to time and/or place, the number of consumers at a mall entrance or the number of connections to a web site. Signal processing has several objectives: predict , estimate, remove noise, characterize or classify. The signal is often considered as sequential: we want to predict, estimate or classify a value (or a feature) at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> knowing the past values of the parameter of interest or past values of data related to this parameter. This is typically the case in estimation processes arising in dynamical systems.</p>
      <p>Signals may be processed in several ways. One of the best–known way is the time-frequency analysis in which the frequencies of each signal are analyzed with respect to time. This concept has been generalized to the time-scale analysis obtained by a wavelet transform. Both analysis are based on the projection of the original signal onto a well-chosen function basis. Signal processing is also closely related to the probability field as the uncertainty inherent to many signals leads to consider them as stochastic processes: the Bayesian framework is actually one of the main frameworks within which signals are processed for many purposes. It is worth noting that Bayesian analysis can be used jointly with a time-frequency or a wavelet analysis. However, alternatives like belief functions came up these last years. Belief functions were introduced by Demspter few decades ago and have been successfully used in the few past years in fields where probability had, during many years, no alternatives like in
classification. Belief functions can be viewed as a generalization of probabilities which can capture both imprecision and uncertainty. Belief functions are also closely related to data fusion.</p>
    </subsection>
    <subsection id="uid65" level="1">
      <bodyTitle>Web Mining</bodyTitle>
      <p>We work on the news/ad recommendation. These online learning algorithms reached a critical importance over the last few years due to these major applications. After designing a new algorithm, it is critical to be able to evaluate it without having to plug it into the real application in order to protect user experiences or/and the company's revenue. To do this, people used to build simulators of user behaviors and try to achieve good performances against it. However designing such a simulator is probably much more difficult than designing the algorithm itself! An other common way to evaluate is to not consider the exploration/exploitation dilemma (also known as “Cold Start” for recommender systems). Lately data-driven methods have been developed. We are working on building automatic replay methodology with some theoretical guarantees. This work also exhibits strong link with the choice of the number of contexts to use with recommender systems wrt your audience.</p>
      <p>An other point is that web sites must forecast Web page views in order to plan computer resource allocation and estimate upcoming revenue and advertising growth. In this work, we focus on extracting trends and seasonal patterns from page view series.
We investigate Holt-Winters/ARIMA like procedures and some regularized models for making short-term prediction (3-6 weeks) wrt to logged data of several big media websites.
We work on some news event related webpages and we feel that kind of time series deserves a particular attention. Self-similarity is found to exist at multiple time scales of network traffic, and can be exploited for prediction. In particular, it is found that Web page views exhibit strong impulsive changes occasionally. The impulses cause large prediction errors long after their occurrences and can sometimes be predicted (<i>e.g.</i>, elections, sport events, editorial changes,holidays) in order to improve accuracies. It also seems that some promising model could arise from using global trends shift in the population.</p>
    </subsection>
    <subsection id="uid66" level="1">
      <bodyTitle>Games</bodyTitle>
      <p>The problem of artificial intelligence in games consists in choosing actions of
players in order to produce artificial opponents. Most games can be formalized
as Markov decision problems, so they can be approached with reinforcement
learning.</p>
      <p>In particular, <span class="smallcap" align="left">SequeL</span> was a pioneer of Monte Carlo Tree Search, a technique
that obtained spectacular successes in the game of Go. Other application
domains include the game of poker and the Japanese card game of hanafuda.</p>
    </subsection>
  </domaine>
  <logiciels id="uid67">
    <bodyTitle>New Software and Platforms</bodyTitle>
    <subsection id="uid68" level="1">
      <bodyTitle>Computer Games</bodyTitle>
      <participants>
        <person key="sequel-2014-idp72936">
          <firstname>Rémi</firstname>
          <lastname>Coulom</lastname>
        </person>
      </participants>
      <simplelist>
        <li id="uid69">
          <p noindent="true"><i/><i><b>Crazy Stone</b></i><i/> is a top-level Go-playing program that has been developed by Rémi Coulom since 2005. Crazy Stone won several major international Go tournaments in the past. In 2013, a new version was released in Japan. This new version won the 6th edition of the UEC Cup (the most important international computer-Go tournament). It also won the first edition of the Denseisen, by winning a 4-stone handicap game against 9-dan professional player Yoshio Ishida. It is distributed as a commercial product by <i>Unbalance Corporation</i> (Japan). 6-month work in 2013. URL: <ref xlink:href="http://remi.coulom.free.fr/CrazyStone/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>remi.<allowbreak/>coulom.<allowbreak/>free.<allowbreak/>fr/<allowbreak/>CrazyStone/</ref></p>
        </li>
        <li id="uid70">
          <p noindent="true"><i/><i><b>Kifu Snap</b></i><i/> is an Android image-recognition app. It can automatically recognize a Go board from a picture, and analyze it with Crazy Stone. It was released on Google Play in November, 2013. 6-month work in 2013. URL: <ref xlink:href="http://remi.coulom.free.fr/kifu-snap/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>remi.<allowbreak/>coulom.<allowbreak/>free.<allowbreak/>fr/<allowbreak/>kifu-snap/</ref></p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid71" level="1">
      <bodyTitle>Function optimization</bodyTitle>
      <participants>
        <person key="sequel-2014-idm27568">
          <firstname>Philippe</firstname>
          <lastname>Preux</lastname>
        </person>
      </participants>
      <subsection id="uid72" level="2">
        <bodyTitle>yaStoSOO</bodyTitle>
        <p>We have worked on the efficient implementation of the StoSOO algorithm in order to have a software that can be used for real to optimize real functions, and to be able to experiment with the algorithm, and assess its practical usefulness. This led to yaStoSOO, an implementation in C available on the web at <ref xlink:href="http://www.grappa.univ-lille3.fr/~ppreux/software/StoSOO/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>grappa.<allowbreak/>univ-lille3.<allowbreak/>fr/<allowbreak/>~ppreux/<allowbreak/>software/<allowbreak/>StoSOO/</ref>. The code is distributed under the GPL licence.</p>
        <p>Thanks to this implementation, we were able to compete in the CEC'2014 competition on Real-Parameter Single Objective optimization at which we ranked honorably (10th out of 17 competitor algorithms). More experimental work is under-way.
</p>
      </subsection>
    </subsection>
  </logiciels>
  <resultats id="uid73">
    <bodyTitle>New Results</bodyTitle>
    <subsection id="uid74" level="1">
      <bodyTitle>Highlights of the Year</bodyTitle>
      <simplelist>
        <li id="uid75">
          <p noindent="true">New startup by Rémi Coulom on AI in games (go, chess, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mo>⋯</mo></math></formula>).</p>
        </li>
        <li id="uid76">
          <p noindent="true">Successful Collaboration with Deezer and the victory at the ACM RecSys Recommendation Systems Challenge</p>
        </li>
        <li id="uid77">
          <p noindent="true">We were selected and working on preparation of ICML 2015 in Lille.
ICML is the most
important conference in the field of machine learning.
This is the first time after more than 30 years of existence,
that this conference will be held in France.</p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid78" level="1">
      <bodyTitle>Decision-making Under Uncertainty</bodyTitle>
      <subsection id="uid79" level="2">
        <bodyTitle>Reinforcement Learning</bodyTitle>
        <p>
          <i>
            <b>Selecting Near-Optimal Approximate State Representations in Reinforcement Learning <ref xlink:href="#sequel-2014-bid14" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider a reinforcement learning setting where the learner does not have explicit access to the states of the underlying Markov decision process (MDP). Instead, she has access to several models that map histories of past interactions to states. Here we improve over known regret bounds in this setting, and more importantly generalize to the case where the models given to the learner do not contain a true model resulting in an MDP representation but only approximations of it. We also give improved error bounds for state aggregation.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Online Stochastic Optimization under Correlated Bandit Feedback <ref xlink:href="#sequel-2014-bid15" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>In this paper we consider the problem of online stochastic optimization of a locally smooth function under bandit feedback. We introduce the high-confidence tree (HCT) algorithm, a novel anytime X -armed bandit algorithm, and derive regret bounds matching the performance of state-of-the-art algorithms in terms of the dependency on number of steps and the near-optimality di-mension. The main advantage of HCT is that it handles the challenging case of correlated bandit feedback (reward), whereas existing meth-ods require rewards to be conditionally indepen-dent. HCT also improves on the state-of-the-art in terms of the memory requirement, as well as requiring a weaker smoothness assumption on the mean-reward function in comparison with the existing anytime algorithms. Finally, we discuss how HCT can be applied to the problem of policy search in reinforcement learning and we report preliminary empirical results.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Sparse Multi-task Reinforcement Learning <ref xlink:href="#sequel-2014-bid16" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>In multi-task reinforcement learning (MTRL), the objective is to simultaneously learn multiple tasks and exploit their similarity to improve the performance w.r.t. single-task learning. In this paper we investigate the case when all the tasks can be accurately represented in a linear approximation space using the same small subset of the original (large) set of features. This is equivalent to assuming that the weight vectors of the task value functions are <i>jointly sparse</i>, i.e., the set of their non-zero components is small and it is shared across tasks. Building on existing results in multi-task regression, we develop two multi-task extensions of the fitted <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>Q</mi></math></formula>-iteration algorithm. While the first algorithm assumes that the tasks are jointly sparse in the given representation, the second one learns a transformation of the features in the attempt of finding a more sparse representation. For both algorithms we provide a sample complexity analysis and numerical simulations.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
      <subsection id="uid80" level="2">
        <bodyTitle>Multi-arm Bandit Theory</bodyTitle>
        <p>
          <i>
            <b>Spectral Bandits for Smooth Graph Functions with Applications in Recommender Systems <ref xlink:href="#sequel-2014-bid17" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Smooth functions on graphs have wide applications in manifold and semi-supervised learning. In this paper, we study a bandit problem where the payoffs of arms are smooth on a graph. This framework is suitable for solving online learning problems that involve graphs, such as content-based recommendation. In this problem, each recommended item is a node and its expected rating is similar to its neighbors. The goal is to recommend items that have high expected ratings. We aim for the algorithms where the cumulative regret would not scale poorly with the number of nodes. In particular, we introduce the notion of an effective dimension, which is small in real-world graphs, and propose two algorithms for solving our problem that scale linearly in this dimension. Our experiments on real-world content recommendation problem show that a good estimator of user preferences for thousands of items can be learned from just tens nodes evaluations.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Online combinatorial optimization with stochastic decision sets and adversarial losses <ref xlink:href="#sequel-2014-bid18" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Most work on sequential learning assumes a fixed set of actions that are available all the time. However, in practice, actions can consist of picking subsets of readings from sensors that may break from time to time, road segments that can be blocked or goods that are out of stock. In this paper we study learning algorithms that are able to deal with stochastic availability of such unreliable composite actions. We propose and analyze algorithms based on the Follow-The-Perturbed-Leader prediction method for several learning settings differing in the feedback provided to the learner. Our algorithms rely on a novel loss estimation technique that we call Counting Asleep Times. We deliver regret bounds for our algorithms for the previously studied full information and (semi-)bandit settings, as well as a natural middle point between the two that we call the restricted information setting. A special consequence of our results is a significant improvement of the best known performance guarantees achieved by an efficient algorithm for the sleeping bandit problem with stochastic availability. Finally, we evaluate our algorithms empirically and show their improvement over the known approaches.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Extreme bandits <ref xlink:href="#sequel-2014-bid19" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>In many areas of medicine, security, and life sciences, we want to allocate limited resources to different sources in order to detect extreme values. In this paper, we study an efficient way to allocate these resources sequentially under limited feedback. While sequential design of experiments is well studied in bandit theory, the most commonly optimized property is the regret with respect to the maximum mean reward. However, in other problems such as network intrusion detection, we are interested in detecting the most extreme value output by the sources. Therefore, in our work we study extreme regret which measures the efficiency of an algorithm compared to the oracle policy selecting the source with the heaviest tail. We propose the ExtremeHunter algorithm, provide its analysis, and evaluate it empirically on synthetic and real-world experiments.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Efficient learning by implicit exploration in bandit problems with side observations <ref xlink:href="#sequel-2014-bid20" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider online learning problems under a a partial observability model capturing situations where the information conveyed to the learner is between full information and bandit feedback. In the simplest variant, we assume that in addition to its own loss, the learner also gets to observe losses of some other actions. The revealed losses depend on the learner's action and a directed observation system chosen by the environment. For this setting, we propose the first algorithm that enjoys near-optimal regret guarantees without having to know the observation system before selecting its actions. Along similar lines, we also define a new partial information setting that models online combinatorial optimization problems where the feedback received by the learner is between semi-bandit and full feedback. As the predictions of our first algorithm cannot be always computed efficiently in this setting, we propose another algorithm with similar properties and with the benefit of always being computationally efficient, at the price of a slightly more complicated tuning mechanism. Both algorithms rely on a novel exploration strategy called implicit exploration, which is shown to be more efficient both computationally and information-theoretically than previously studied exploration strategies for the problem.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Best-Arm Identification in Linear Bandits <ref xlink:href="#sequel-2014-bid21" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We study the best-arm identification problem in linear bandit, where the rewards of the arms depend linearly on an unknown parameter <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>θ</mi><mo>*</mo></msup></math></formula> and the objective is to return the arm with the largest reward. We characterize the complexity of the problem and introduce sample allocation strategies that pull arms to identify the best arm with a fixed confidence, while minimizing the sample budget. In particular, we show the importance of exploiting the global linear structure to improve the estimate of the reward of near-optimal arms. We analyze the proposed strategies and compare their empirical performance. Finally, we point out the connection to the <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>G</mi></math></formula>-optimality criterion used in optimal experimental design.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Exploiting easy data in online optimization <ref xlink:href="#sequel-2014-bid22" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider the problem of online optimization, where a learner chooses a decision from a given decision set and suffers some loss associated with the decision and the state of the environment. The learner's objective is to minimize its cumulative regret against the best fixed decision in hindsight. Over the past few decades numerous variants have been considered, with many algorithms designed to achieve sub-linear regret in the worst case. However, this level of robustness comes at a cost. Proposed algorithms are often over-conservative, failing to adapt to the actual complexity of the loss sequence which is often far from the worst case. In this paper we introduce a general algorithm that, provided with a "safe" learning algorithm and an opportunistic "benchmark", can effectively combine good worst-case guarantees with much improved performance on "easy" data. We derive general theoretical bounds on the regret of the proposed algorithm and discuss its implementation in a wide range of applications, notably in the problem of learning with shifting experts (a recent COLT open problem). Finally, we provide numerical simulations in the setting of prediction with expert advice with comparisons to the state of the art.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Spectral Bandits for Smooth Graph Functions <ref xlink:href="#sequel-2014-bid23" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Smooth functions on graphs have wide applications in manifold and semi-supervised learning. In this paper, we study a bandit problem where the payoffs of arms are smooth on a graph. This framework is suitable for solving online learning problems that involve graphs, such as content-based recommendation. In this problem, each item we can recommend is a node and its expected rating is similar to its neighbors. The goal is to recommend items that have high expected ratings. We aim for the algorithms where the cumulative regret with respect to the optimal policy would not scale poorly with the number of nodes. In particular, we introduce the notion of an effective dimension, which is small in real-world graphs, and propose two algorithms for solving our problem that scale linearly and sublinearly in this dimension. Our experiments on real-world content recommendation problem show that a good estimator of user preferences for thousands of items can be learned from just tens of nodes evaluations.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Regret bounds for restless Markov bandits <ref xlink:href="#sequel-2014-bid24" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider the restless Markov bandit problem, in which the state of each arm evolves according to a Markov process independently of the learner's actions. We suggest an algorithm, that first represents the setting as an MDP which exhibits some special structural properties. In order to grasp this information we introduce the notion of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ϵ</mi></math></formula>-structured MDPs, which are a generalization of concepts like (approximate) state aggregation and MDP homomorphisms. We propose a general algorithm for learning <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ϵ</mi></math></formula>-structured MDPs and show regret bounds that demonstrate that additional structural information enhances learning. Applied to the restless bandit setting, this algorithm achieves after any <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>T</mi></math></formula> steps regret of order <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mover accent="true"><mi>O</mi><mo>˜</mo></mover><mrow><mo>(</mo><msup><mi>T</mi><mrow><mn>1</mn><mo>/</mo><mn>2</mn></mrow></msup><mo>)</mo></mrow></mrow></math></formula> with respect to the best policy that knows the distributions of all arms. We make no assumptions on the Markov chains underlying each arm except that they are irreducible. In addition, we show that index-based policies are necessarily suboptimal for the considered problem.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Spectral Thompson Sampling <ref xlink:href="#sequel-2014-bid25" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Thompson Sampling (TS) has surged a lot of interest due to its good empirical performance, in particular in the computational advertising. Though successful, the tools for its performance analysis appeared only recently. In this paper, we describe and analyze SpectralTS algorithm for a bandit problem, where the payoffs of the choices are smooth given an underlying graph. In this setting, each choice is a node of a graph and the expected payoffs of the neighboring nodes are assumed to be similar. Although the setting has application both in recommender systems and advertising, the traditional algorithms would scale poorly with the number of choices. For that purpose we consider an effective dimension d, which is small in real-world graphs. We deliver the analysis showing that the regret of SpectralTS scales as <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>d</mi><msup><mrow><mo>(</mo><mi>T</mi><mi> ln </mi><mi>N</mi><mo>)</mo></mrow><mrow><mn>1</mn><mo>/</mo><mn>2</mn></mrow></msup></mrow></math></formula> with high probability, where T is the time horizon and N is the number of choices. Since a d sqrt(T ln N) regret is comparable to the known results, SpectralTS offers a computationally more efficient alternative. We also show that our algorithm is competitive on both synthetic and real-world data.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
      <subsection id="uid81" level="2">
        <bodyTitle>Recommendation systems</bodyTitle>
        <p>
          <i>
            <b>User Engagement as Evaluation: a Ranking or a Regression Problem? <ref xlink:href="#sequel-2014-bid26" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>In this paper, we describe the winning approach used on the RecSys Challenge 2014 which focuses on employing user en-gagement as evaluation of recommendations. On one hand, we regard the challenge as a ranking problem and apply the LambdaMART algorithm, which is a listwise model special-ized in a Learning To Rank approach. On the other hand, after noticing some specific characteristics of this challenge, we also consider it as a regression problem and use pointwise regression models such as Random Forests. We compare how these different methods can be modified or combined to improve the accuracy and robustness of our model and we draw the advantages or disadvantages of each approach.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Improving offline evaluation of contextual bandit algorithms via bootstrapping techniques <ref xlink:href="#sequel-2014-bid27" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>In many recommendation applications such as news recommendation, the items that can be recommended come and go at a very fast pace. This is a challenge for recommender systems (RS) to face this setting. Online learning algorithms seem to be the most straight forward solution. The contextual bandit framework was introduced for that very purpose. In general the evaluation of a RS is a critical issue. Live evaluation is often avoided due to the potential loss of revenue, hence the need for offline evaluation methods. Two options are available. Model based meth- ods are biased by nature and are thus difficult to trust when used alone. Data driven methods are therefore what we consider here. Evaluat- ing online learning algorithms with past data is not simple but some methods exist in the litera- ture. Nonetheless their accuracy is not satisfac- tory mainly due to their mechanism of data re- jection that only allow the exploitation of a small fraction of the data. We precisely address this issue in this paper. After highlighting the limita- tions of the previous methods, we present a new method, based on bootstrapping techniques. This new method comes with two important improve- ments: it is much more accurate and it provides a measure of quality of its estimation. The latter is a highly desirable property in order to minimize the risks entailed by putting online a RS for the first time. We provide both theoretical and ex- perimental proofs of its superiority compared to state-of-the-art methods, as well as an analysis of the convergence of the measure of quality.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Bandits Warm-up Cold Recommender Systems <ref xlink:href="#sequel-2014-bid28" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We address the cold start problem in recommendation systems assuming no contextual information is available neither about users, nor items. We consider the case in which we only have access to a set of ratings of items by users. Most of the existing works consider a batch setting, and use cross-validation to tune parameters. The classical method consists in minimizing the root mean square error over a training subset of the ratings which provides a factorization of the matrix of ratings, interpreted as a latent representation of items and users. Our contribution in this paper is 5-fold. First, we explicit the issues raised by this kind of batch setting for users or items with very few ratings. Then, we propose an online setting closer to the actual use of recommender systems; this setting is inspired by the bandit framework. The proposed methodology can be used to turn any recommender system dataset (such as Netflix, MovieLens,...) into a sequential dataset. Then, we explicit a strong and insightful link between contextual bandit algorithms and matrix factorization; this leads us to a new algorithm that tackles the exploration/exploitation dilemma associated to the cold start problem in a strikingly new perspective. Finally, experimental evidence confirm that our algorithm is effective in dealing with the cold start problem on publicly available datasets. Overall, the goal of this paper is to bridge the gap between recommender systems based on matrix factorizations and those based on contextual bandits.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
      <subsection id="uid82" level="2">
        <bodyTitle>Nonparametric statistics of time series</bodyTitle>
        <p>
          <i>
            <b>Uniform hypothesis testing for finite-valued stationary processes <ref xlink:href="#sequel-2014-bid29" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Given a discrete-valued sample <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>X</mi><mn>1</mn></msub><mo>,</mo><mo>⋯</mo><mo>,</mo><msub><mi>X</mi><mi>n</mi></msub></mrow></math></formula> we wish to decide whether it was generated by a distribution belonging to a family <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>0</mn></msub></math></formula>, or it was generated by a distribution belonging to a family <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>1</mn></msub></math></formula>. In this work we assume that all distributions are stationary ergodic, and do not make any further assumptions (e.g. no independence or mixing rate assumptions). We would like to have a test whose probability of error (both Type I and Type II) is uniformly bounded. More precisely, we require that for each <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ϵ</mi></math></formula> there exist a sample size <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>n</mi></math></formula> such that probability of error is upper-bounded by <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ϵ</mi></math></formula> for samples longer than <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>n</mi></math></formula>. We find some necessary and some sufficient conditions on <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>0</mn></msub></math></formula> and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>1</mn></msub></math></formula> under which a consistent test (with this notion of consistency) exists. These conditions are topological, with respect to the topology of distributional distance.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Asymptotically consistent estimation of the number of change points in highly dependent time series <ref xlink:href="#sequel-2014-bid30" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>The problem of change point estimation is considered in a general framework where the data are generated by arbitrary unknown stationary ergodic process distributions. This means that the data may have long-range dependencies of an arbitrary form. In this context the consistent estimation of the number of change points is provably impossible. A formulation is proposed which overcomes this obstacle: it is possible to find the correct number of change points at the expense of introducing the additional constraint that the correct number of process distributions that generate the data is provided. This additional parameter has a natural interpretation in many real-world applications. It turns out that in this formulation change point estimation can be reduced to time series clustering. Based on this reduction, an algorithm is proposed that finds the number of change points and locates the changes. This algorithm is shown to be asymptotically consistent. The theoretical results are complemented with empirical evaluations.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
    </subsection>
    <subsection id="uid83" level="1">
      <bodyTitle>Statistical Learning and Bayesian Analysis</bodyTitle>
      <subsection id="uid84" level="2">
        <bodyTitle>Prediction of Sequences of Structured and Unstructured Data</bodyTitle>
        <p>
          <i>
            <b>Statistical performance analysis of a fast super-resolution technique using noisy translations <ref xlink:href="#sequel-2014-bid31" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>It is well known that the registration process is a key step for super-resolution reconstruction. In this work, we propose to use a piezoelectric system that is easily adaptable on all microscopes and telescopes for controlling accurately their motion (down to nanometers) and therefore acquiring multiple images of the same scene at different controlled positions. Then a fast super-resolution algorithm can be used for efficient super-resolution reconstruction. In this case, the optimal use of r2 images for a resolution enhancement factor r is generally not enough to obtain satisfying results due to the random inaccuracy of the positioning system. Thus we propose to take several images around each reference position. We study the error produced by the super-resolution algorithm due to spatial uncertainty as a function of the number of images per position. We obtain a lower bound on the number of images that is necessary to ensure a given error upper bound with probability higher than some desired confidence level.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Quantitative control of the error bounds of a fast super-resolution technique for microscopy and astronomy <ref xlink:href="#sequel-2014-bid32" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>While the registration step is often problematic for super-resolution, many microscopes and telescopes are now equipped with a piezoelectric mechanical system which permits to ac-curately control their motion (down to nanometers). There-fore one can use such devices to acquire multiple images of the same scene at various controlled positions. Then a fast super-resolution algorithm [1] can be used for efficient super-resolution. However the minimal use of r 2 images for a resolution enhancement factor r is generally not sufficient to obtain good results. We propose to take several images at po-sitions randomly distributed close to each reference position. We study the number of images necessary to control the error resulting from the super-resolution algorithm by [1] due to the uncertainty on positions. The main result is a lower bound on the number of images to respect a given error upper bound with probability higher than a desired confidence level.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
      <subsection id="uid85" level="2">
        <bodyTitle>Statistical analysis of superresolution</bodyTitle>
        <p>
          <i>
            <b>A diffusion strategy for distributed dictionary learning <ref xlink:href="#sequel-2014-bid33" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider the problem of a set of nodes which is required to collectively learn a common dictionary from noisy measurements. This distributed dictionary learning approach may be useful in several contexts including sensor networks. Dif-fusion cooperation schemes have been proposed to estimate a consensus solution to distributed linear regression. This work proposes a diffusion-based adaptive dictionary learning strategy. Each node receives measurements which may be shared or not with its neighbors. All nodes cooperate with their neighbors by sharing their local dictionary to estimate a common representa-tion. In a diffusion approach, the resulting algorithm corresponds to a distributed alternate optimization. Beyond dictionary learn-ing, this strategy could be adapted to many matrix factorization problems in various settings. We illustrate its efficiency on some numerical experiments, including the difficult problem of blind hyperspectral images unmixing.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
    </subsection>
    <subsection id="uid86" level="1">
      <bodyTitle>Miscellaneous</bodyTitle>
      <subsection id="uid87" level="2">
        <bodyTitle>Miscellaneous</bodyTitle>
        <p>
          <i>
            <b>Online Matrix Completion Through Nuclear Norm Regularisation <ref xlink:href="#sequel-2014-bid34" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>It is the main goal of this paper to propose a novel method to perform matrix completion on-line. Motivated by a wide variety of applications, ranging from the design of recommender systems to sensor network localization through seismic data reconstruction, we consider the matrix completion problem when entries of the matrix of interest are observed gradually. Precisely, we place ourselves in the situation where the predictive rule should be refined incrementally, rather than recomputed from scratch each time the sample of observed entries increases. The extension of existing matrix completion methods to the sequential prediction context is indeed a major issue in the Big Data era, and yet little addressed in the literature. The algorithm promoted in this article builds upon the Soft Impute approach introduced in Mazumder et al. (2010). The major novelty essentially arises from the use of a randomised technique for both computing and updating the Singular Value Decomposition (SVD) involved in the algorithm. Though of disarming simplicity, the method proposed turns out to be very efficient, while requiring reduced computations. Several numerical experiments based on real datasets illustrating its performance are displayed, together with preliminary results giving it a theoretical basis.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Synthèse en espace et temps du rayonnement acoustique d'une paroi sous excitation turbulente par synthèse spectrale 2D+T et formulation vibro-acoustique directe <ref xlink:href="#sequel-2014-bid35" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Une méthode directe pour simuler les vibrations et le rayonnement acoustique d'une paroi soumise à un écoulement subsonique est proposée. Tout d'abord, en adoptant l'hypothèse d'un écoulement homogène et stationnaire, on montre qu'une méthode de synthèse spectrale en espace et temps (2D+t) est suffisante pour obtenir explicitement une réalisation d'un champ de pression pariétale excitatrice p(x,y,t) dont les propriétés inter-spectrales sont prescrites par un modèle empirique de Chase. Cette pression turbulente p(x,y,t) est obtenue explicitement et permet de résoudre le problème vibro-acoustique de la paroi dans une formulation directe. La méthode proposée fournit ainsi une solution complète du problème dans le domaine spatio-temporel : pression excitatrice, déplacement en flexion et pression acoustique rayonnée par la paroi. Une caractéristique de la méthode proposée est un cout de calcul qui s'avère similaire aux formulations inter-spectrales majoritairement utilisées dans la littérature. En particulier, la synthèse permet de prendre en compte l'intégralité des échelles spatio-temporelles du problème : échelles turbulentes, vibratoires et acoustiques. A titre d'exemple, la pression aux oreilles d'un auditeur suite à l'excitation turbulente de la paroi est synthétisée.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Bandits attack function optimization <ref xlink:href="#sequel-2014-bid36" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider function optimization as a sequential decision making problem under the budget constraint. Such constraint limits the number of objective function evaluations allowed during the optimization. We consider an algorithm inspired by a continuous version of a multi-armed bandit problem which attacks this optimization problem by solving the tradeoff between exploration (initial quasi-uniform search of the domain) and exploitation (local optimization around the potentially global maxima). We introduce the so-called Simultaneous Optimistic Optimization (SOO), a deterministic algorithm that works by domain partitioning. The benefit of such an approach are the guarantees on the returned solution and the numerical eficiency of the algorithm. We present this machine learning rooted approach to optimization, and provide the empirical assessment of SOO on the CEC'2014 competition on single objective real-parameter numerical optimization testsuite.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Optimistic planning in Markov decision processes using a generative model <ref xlink:href="#sequel-2014-bid37" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider the problem of online planning in a Markov decision process with discounted rewards for any given initial state. We consider the PAC sample com-plexity problem of computing, with probability 1−δ, an -optimal action using the smallest possible number of calls to the generative model (which provides reward and next-state samples). We design an algorithm, called StOP (for Stochastic-Optimistic Planning), based on the "optimism in the face of uncertainty" princi-ple. StOP can be used in the general setting, requires only a generative model, and enjoys a complexity bound that only depends on the local structure of the MDP.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Near-Optimal Rates for Limited-Delay Universal Lossy Source Coding <ref xlink:href="#sequel-2014-bid38" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider the problem of limited-delay lossy coding of individual sequences. Here, the goal is to design (fixed-rate) compression schemes to minimize the normalized expected distortion redundancy relative to a reference class of coding schemes, measured as the difference between the average distortion of the algorithm and that of the best coding scheme in the reference class. In compressing a sequence of length <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>T</mi></math></formula>, the best schemes available in the literature achieve an <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msup><mi>T</mi><mrow><mo>-</mo><mn>1</mn><mo>/</mo><mn>3</mn></mrow></msup></mrow></math></formula>) normalized distortion redundancy relative to finite reference classes of limited delay and limited memory, and the same redundancy is achievable, up to logarithmic factors, when the reference class is the set of scalar quantizers. It has also been shown that the distortion redundancy is at least of order <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>T</mi><mrow><mo>-</mo><mn>1</mn><mo>/</mo><mn>2</mn></mrow></msup></math></formula> in the latter case, and the lower bound can easily be extended to sufficiently powerful (possibly finite) reference coding schemes. In this paper, we narrow the gap between the upper and lower bounds, and give a compression scheme whose normalized distortion redundancy is <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><mi>l</mi><mi>n</mi><mrow><mo>(</mo><mi>T</mi><mo>)</mo></mrow><mo>/</mo><msup><mi>T</mi><mrow><mn>1</mn><mo>/</mo><mn>2</mn></mrow></msup><mo>)</mo></mrow></math></formula> relative to any finite class of reference schemes, only a logarithmic factor larger than the lower bound. The method is based on the recently introduced shrinking dartboard prediction algorithm, a variant of exponentially weighted average prediction. The algorithm is also extended to the problem of joint source-channel coding over a (known) stochastic noisy channel and to the case when side information is also available to the decoder (the Wyner–Ziv setting). The same improvements are obtained for these settings as in the case of a noiseless channel. Our method is also applied to the problem of zero-delay scalar quantization, where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><mi>l</mi><mi>n</mi><mrow><mo>(</mo><mi>T</mi><mo>)</mo></mrow><mo>/</mo><msup><mi>T</mi><mrow><mn>1</mn><mo>/</mo><mn>2</mn></mrow></msup><mo>)</mo></mrow></math></formula> normalized distortion redundancy is achieved relative to the (infinite) class of scalar quantizers of a given rate, almost achieving the known lower bound of order <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>1</mn><mo>/</mo><msup><mi>T</mi><mrow><mo>-</mo><mn>1</mn><mo>/</mo><mn>2</mn></mrow></msup></mrow></math></formula>. The computationally efficient algorithms known for scalar quantization and the Wyner–Ziv setting carry over to our (improved) coding schemes presented in this paper.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Online Markov Decision Processes Under Bandit Feedback <ref xlink:href="#sequel-2014-bid39" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Software systems are composed of many interacting elements. A natural way to abstract over software systems is to model them as graphs. In this paper we consider software dependency graphs of object-oriented software and we study one topological property: the degree distribution. Based on the analysis of ten software systems written in Java, we show that there exists completely different systems that have the same degree distribution. Then, we propose a generative model of software dependency graphs which synthesizes graphs whose degree distribution is close to the empirical ones observed in real software systems. This model gives us novel insights on the potential fundamental rules of software evolution.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>A Generative Model of Software Dependency Graphs to Better Understand Software Evolution <ref xlink:href="#sequel-2014-bid40" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Software systems are composed of many interacting elements. A natural way to abstract over software systems is to model them as graphs. In this paper we consider software dependency graphs of object-oriented software and we study one topological property: the degree distribution. Based on the analysis of ten software systems written in Java, we show that there exists completely different systems that have the same degree distribution. Then, we propose a generative model of software dependency graphs which synthesizes graphs whose degree distribution is close to the empirical ones observed in real software systems. This model gives us novel insights on the potential fundamental rules of software evolution.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Preference-Based Rank Elicitation using Statistical Models: The Case of Mallows <ref xlink:href="#sequel-2014-bid41" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We address the problem of rank elicitation as-suming that the underlying data generating pro-cess is characterized by a probability distribu-tion on the set of all rankings (total orders) of a given set of items. Instead of asking for complete rankings, however, our learner is only allowed to query pairwise preferences. Using information of that kind, the goal of the learner is to reliably predict properties of the distribution, such as the most probable top-item, the most probable rank-ing, or the distribution itself. More specifically, learning is done in an online manner, and the goal is to minimize sample complexity while guaran-teeing a certain level of confidence.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Preference-based reinforcement learning: evolutionary direct policy search using a preference-based racing algorithm <ref xlink:href="#sequel-2014-bid42" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We introduce a novel approach to preference-based reinforcement learn-ing, namely a preference-based variant of a direct policy search method based on evolutionary optimization. The core of our approach is a preference-based racing algorithm that selects the best among a given set of candidate policies with high probability. To this end, the algorithm operates on a suitable ordinal preference structure and only uses pairwise comparisons between sample rollouts of the policies. Embedding the racing algorithm in a rank-based evolutionary search procedure, we show that approxima-tions of the so-called Smith set of optimal policies can be produced with certain theoretical guarantees. Apart from a formal performance and complexity analysis, we present first experimental studies showing that our approach performs well in practice.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Biclique Coverings, Rectifier Networks and the Cost of ε-Removal <ref xlink:href="#sequel-2014-bid43" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We relate two complexity notions of bipartite graphs: the minimal weight biclique covering number Cov(G) and the minimal rec-tifier network size Rect(G) of a bipartite graph G. We show that there exist graphs with Cov(G) ≥ Rect(G) 3/2−ǫ . As a corollary, we estab-lish that there exist nondeterministic finite automata (NFAs) with ε-transitions, having n transitions total such that the smallest equivalent ε-free NFA has Ω(n 3/2−ǫ) transitions. We also formulate a version of previous bounds for the weighted set cover problem and discuss its con-nections to giving upper bounds for the possible blow-up.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Efficient Eigen-updating for Spectral Graph Clustering <ref xlink:href="#sequel-2014-bid44" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Partitioning a graph into groups of vertices such that those within each group are more densely connected than vertices assigned to different groups, known as graph clustering, is often used to gain insight into the organisation of large scale networks and for visualisation purposes. Whereas a large number of dedicated techniques have been recently proposed for static graphs, the design of on-line graph clustering methods tailored for evolving networks is a challenging problem, and much less documented in the literature. Motivated by the broad variety of applications concerned, ranging from the study of biological networks to the analysis of networks of scientific references through the exploration of communications networks such as the World Wide Web, it is the main purpose of this paper to introduce a novel, computationally efficient, approach to graph clustering in the evolutionary context. Namely, the method promoted in this article can be viewed as an incremental eigenvalue solution for the spectral clustering method described by Ng. et al. (2001). The incremental eigenvalue solution is a general technique for finding the approximate eigenvectors of a symmetric matrix given a change. As well as outlining the approach in detail, we present a theoretical bound on the quality of the approximate eigenvectors using perturbation theory. We then derive a novel spectral clustering algorithm called Incremental Approximate Spectral Clustering (IASC). The IASC algorithm is simple to implement and its efficacy is demonstrated on both synthetic and real datasets modelling the evolution of a HIV epidemic, a citation network and the purchase history graph of an e-commerce website.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>From Bandits to Monte-Carlo Tree Search: The Optimistic Principle Applied to Optimization and Planning <ref xlink:href="#sequel-2014-bid45" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>This work covers several aspects of the optimism in the face of uncertainty principle applied to large scale optimization problems under finite numerical budget. The initial motivation for the research reported here originated from the empirical success of the so-called Monte-Carlo Tree Search method popularized in computer-go and further extended to many other games as well as optimization and planning problems. Our objective is to contribute to the development of theoretical foundations of the field by characterizing the complexity of the underlying optimization problems and designing efficient algorithms with performance guarantees. The main idea presented here is that it is possible to decompose a complex decision making problem (such as an optimization problem in a large search space) into a sequence of elementary decisions, where each decision of the sequence is solved using a (stochastic) multi-armed bandit (simple mathematical model for decision making in stochastic environments). This so-called hierarchical bandit approach (where the reward observed by a bandit in the hierarchy is itself the return of another bandit at a deeper level) possesses the nice feature of starting the exploration by a quasi-uniform sampling of the space and then focusing progressively on the most promising area, at different scales, according to the evaluations observed so far, and eventually performing a local search around the global optima of the function. The performance of the method is assessed in terms of the optimality of the returned solution as a function of the number of function evaluations. Our main contribution to the field of function optimization is a class of hierarchical optimistic algorithms designed for general search spaces (such as metric spaces, trees, graphs, Euclidean spaces, ...) with different algorithmic instantiations depending on whether the evaluations are noisy or noiseless and whether some measure of the ”smoothness” of the function is known or unknown. The performance of the algorithms depend on the local behavior of the function around its global optima expressed in terms of the quantity of near-optimal states measured with some metric. If this local smoothness of the function is known then one can design very efficient optimization algorithms (with convergence rate independent of the space dimension), and when it is not known, we can build adaptive techniques that can, in some cases, perform almost as well as when it is known.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
    </subsection>
  </resultats>
  <contrats id="uid88">
    <bodyTitle>Bilateral Contracts and Grants with Industry</bodyTitle>
    <subsection id="uid89" level="1">
      <bodyTitle>Bilateral Contracts with Industry</bodyTitle>
      <simplelist>
        <li id="uid90">
          <p noindent="true"><ref xlink:href="http://www.deezer.com" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Deezer</ref>, 2013-2014</p>
          <participants>
            <person key="sequel-2014-idp76928">
              <firstname>Jérémie</firstname>
              <lastname>Mary</lastname>
            </person>
            <person key="sequel-2014-idm27568">
              <firstname>Philippe</firstname>
              <lastname>Preux</lastname>
            </person>
            <person key="sequel-2014-idp75664">
              <firstname>Romaric</firstname>
              <lastname>Gaudel</lastname>
            </person>
          </participants>
          <p>A research project has started on June 2013 in collaboration with the
Deezer company. The goal is to build a system which automatically
recommends music to users. That goal is an extension of the bandit
setting to the Collaborative Filtering problem.</p>
        </li>
        <li id="uid91">
          <p noindent="true"><ref xlink:href="http://www.nuukik.com" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Nuukik</ref>, 2013-2014</p>
          <participants>
            <person key="sequel-2014-idp76928">
              <firstname>Jérémie</firstname>
              <lastname>Mary</lastname>
            </person>
          </participants>
          <p>Nuukik is a start-up from Hub Innovation in Lille. It proposes a recommender systems for e-commerce based on matrix factorization. We worked with them specifically on the cold start problem (<i>i.e</i> when you have absolutely no data on a product or a customer). This led to promising result and allowed us to close the gap between bandits and matrix factorization. This work led to a patent submission in december 2013.</p>
        </li>
        <li id="uid92">
          <p noindent="true"><ref xlink:href="http://www.squoring.com/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Squoring Technologies</ref>, 2011-2014</p>
          <participants>
            <person key="sequel-2014-idp98704">
              <firstname>Boris</firstname>
              <lastname>Baldassari</lastname>
            </person>
            <person key="sequel-2014-idm27568">
              <firstname>Philippe</firstname>
              <lastname>Preux</lastname>
            </person>
          </participants>
          <p>Boris Baldassari has been hired by Squoring Technologies (Toulouse) as a PhD student in May 2011. He works on the use of machine learning to improve the quality of the software development process. During his first year as a PhD student, Boris investigated the existing norms and measures of quality of software development process. He also dedicated some time to gather some relevant datasets, which are made of either the sequence of source code releases over a multi-years period, or all the versions stored on an svn repository (svn or alike). Information from mailing-lists (bugs, support, ...) may also be part of these datasets. Tools in machine learning capable of dealing with this sort of data have also been investigated. Goals that may be reached in this endeavor have also been precised.</p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid93" level="1">
      <bodyTitle>Bilateral Grants with Industry</bodyTitle>
      <simplelist>
        <li id="uid94">
          <p noindent="true"><ref xlink:href="http://www.intel.com/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">INTEL Corp.</ref>, 2013 - 2014</p>
          <participants>
            <person key="sequel-2014-idm27568">
              <firstname>Philippe</firstname>
              <lastname>Preux</lastname>
            </person>
            <person key="sequel-2014-idp70232">
              <firstname>Michal</firstname>
              <lastname>Valko</lastname>
            </person>
            <person key="sequel-2014-idp67360">
              <firstname>Rémi</firstname>
              <lastname>Munos</lastname>
            </person>
            <person key="sequel-2014-idp107400">
              <firstname>Adrien</firstname>
              <lastname>Hoarau</lastname>
            </person>
          </participants>
          <p>This is a research project on Algorithmic Determination of IoT Edge Analytics
Requirements. We are attempting to solve the problem of how to automatically
predict the system requirements for edge node analytics in the Internet of
Things (IoT).
We envision that a flexible extensible system of edge analytics can be
created for IoT management; however, edge nodes can be very different in
terms of the systems requirements around: processing capability, wireless
communication, security/cryptography, guaranteed responsiveness,
guaranteed quality of service and on-board memory requirements. One
of the challenges of managing a heterogeneous Internet of Things is
determining the systems requirements at each edge node in the network.</p>
          <p>We suggest exploiting opportunity of being able to automatically
customize large scale IoT systems that could comprise heterogeneous
edge nodes and allow a flexible and scalable component and firmware
SoC systems to be matched to the individual need of enterprise/
government level IoT customers. We propose using large scale
sequential decision learning algorithms, particularly contextual bandit
modeling to automatically determine the systems requirements for edge
analytics. These algorithms have an adaptive property that allows for
the addition of new nodes and the re-evaluation of existing nodes under
dynamic and potentially adversarial conditions.</p>
        </li>
      </simplelist>
    </subsection>
  </contrats>
  <partenariat id="uid95">
    <bodyTitle>Partnerships and Cooperations</bodyTitle>
    <subsection id="uid96" level="1">
      <bodyTitle>Regional Initiatives</bodyTitle>
      <p>Pierre Chainais and Hong-Phuong Dang are part of the ARCIR project <i>REPAR</i>, PARcimonious REpresentations, which is funded by the Region Nord-Pas de Calais for 2 years. This project is focused on sparsity based methods for signal and image processing. It has permitted to hire 1 postdoc for 1 year (2014-2015) who works on the use of sparse representation for video-tracking. The targetted application is in biological microscopy to track cellular vesiculas (collab. Laurent Héliot, Aymeric Leray, Univ. Lille 1).</p>
    </subsection>
    <subsection id="uid97" level="1">
      <bodyTitle>National Initiatives</bodyTitle>
      <subsection id="uid98" level="2">
        <bodyTitle>ANR BNPSI</bodyTitle>
        <participants>
          <person key="sequel-2014-idp71472">
            <firstname>Pierre</firstname>
            <lastname>Chainais</lastname>
          </person>
          <person key="sequel-2014-idp121328">
            <firstname>Hong-Phuong</firstname>
            <lastname>Dang</lastname>
          </person>
          <person key="sequel-2014-idp122560">
            <firstname>Clément</firstname>
            <lastname>Elvira</lastname>
          </person>
          <person key="sequel-2014-idp74216">
            <firstname>Emmanuel</firstname>
            <lastname>Duflos</lastname>
          </person>
          <person key="sequel-2014-idp78200">
            <firstname>Philippe</firstname>
            <lastname>Vanheeghe</lastname>
          </person>
        </participants>
        <simplelist>
          <li id="uid99">
            <p noindent="true"><i>Title</i>: Bayesian Non Parametric approaches for
Signal and Image Processing</p>
          </li>
          <li id="uid100">
            <p noindent="true"><i>Type</i>: National Research Agency no ANR-13-BS-03-0006-01</p>
          </li>
          <li id="uid101">
            <p noindent="true"><i>Coordinator</i>: Ecole Centrale Lille, LAGIS (P. Chainais)</p>
          </li>
          <li id="uid102">
            <p noindent="true"><i>Duration</i>: 2014-2018</p>
          </li>
          <li id="uid103">
            <p noindent="true"><i>Other Partners</i>: Inria Bordeaux, team ALEA,
Université de Bordeaux, IMS,
Institut de Recherche en Indormatique de Toulouse (IRIT),
CEA-LIST Saclay.</p>
          </li>
          <li id="uid104">
            <p noindent="true"><i>Abstract</i>: Statistical methods have become more and more popular in signal and image processing over the past decades. These methods have been able to tackle various applications such as speech recognition, object tracking, image segmentation or restoration, classification, clustering, etc.
We propose here to investigate the use of Bayesian nonparametric methods in statistical signal and image processing. Similarly to Bayesian parametric methods, this set of methods is concerned with the elicitation of prior and computation of posterior distributions, but now on infinite-dimensional parameter spaces. Although these methods have become very popular in statistics and machine learning over the last 15 years, their potential is largely underexploited in signal and image processing. The aim of the overall project, which gathers researchers in applied probabilities, statistics, machine learning and signal and image processing, is to develop a new framework for the statistical signal and image processing communities. Based on results from statistics and machine learning we aim at defining new models, methods and algorithms for statistical signal and image processing. Applications to hyperspectral image analysis, image segmentation, GPS localization, image restoration or space-time tomographic reconstruction will allow various concrete illustrations of the theoretical advances and validation on real data coming from realistic contexts.</p>
          </li>
          <li id="uid105">
            <p noindent="true"><i>Activity Report</i>: This ANR Project was accepted in 2013. It has started in february 2014 on a new area of research for signal and image processing and is supervised by Pierre Chainais. Three meetings have taken place in Lille (in February), Toulouse (in June) and Bordeaux (in November). One special session on Bayesian non parametric approaches has been submitted and accepted to the international conference EUSIPCO 2015. We have also been selected by the Franch National Signal &amp; Image Processing Society (GRETSI) to organize the Peyresq 2016 Signal processing summer school. Two PhD students have been recruited in october 2014 thanks to this project: Clément Elvira works in Lille is co-supervised by P. Chainais and N. Dobigeon (Toulouse), Jessica Sodjo works in Bordeaux and is co-supervised by A. Giremus (IMS), N. Dobigeon (Toulouse) and F. Caron (Oxford). Moreover, Hong-Phuong Dang (PhD, 2nd year) has obtained new results on BNP for dictionary learning. The Indian Buffet Process permits to propose a method to learn a dictionary of which size automatically adapts to data. Several publications are in preparation. François Caron who is co-leading this project with Pierre Chainais has moved to Oxford University as an Assistant Professor so that we will benefit from strong connections with the Statistics Departmnt in Oxford University.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid106" level="2">
        <bodyTitle>ANR ExTra-Learn</bodyTitle>
        <participants>
          <person key="sequel-2014-idm26088">
            <firstname>Alessandro</firstname>
            <lastname>Lazaric</lastname>
          </person>
          <person key="sequel-2014-idp76928">
            <firstname>Jérémie</firstname>
            <lastname>Mary</lastname>
          </person>
          <person key="sequel-2014-idp67360">
            <firstname>Rémi</firstname>
            <lastname>Munos</lastname>
          </person>
          <person key="sequel-2014-idp70232">
            <firstname>Michal</firstname>
            <lastname>Valko</lastname>
          </person>
        </participants>
        <simplelist>
          <li id="uid107">
            <p noindent="true"><i>Title</i>: Extraction and Transfer of Knowledge in Reinforcement Learning</p>
          </li>
          <li id="uid108">
            <p noindent="true"><i>Type</i>: National Research Agency (ANR-9011)</p>
          </li>
          <li id="uid109">
            <p noindent="true"><i>Coordinator</i>: Inria Lille (A. Lazaric)</p>
          </li>
          <li id="uid110">
            <p noindent="true"><i>Duration</i>: 2014-2018</p>
          </li>
          <li id="uid111">
            <p noindent="true"><i>Abstract</i>:
ExTra-Learn is directly motivated by the evidence that one of the key features that allows humans to accomplish complicated tasks is their ability of building knowledge from past experience and transfer it while learning new tasks. We believe that integrating transfer of learning in machine learning algorithms will dramatically improve their learning performance and enable them to solve complex tasks. We identify in the reinforcement learning (RL) framework the most suitable candidate for this integration. RL formalizes the problem of learning an optimal control policy from the experience directly collected from an unknown environment. Nonetheless, practical limitations of current algorithms encouraged research to focus on how to integrate prior knowledge into the learning process. Although this improves the performance of RL algorithms, it dramatically reduces their autonomy. In this project we pursue a paradigm shift from designing RL algorithms incorporating prior knowledge, to methods able to incrementally discover, construct, and transfer “prior” knowledge in a fully automatic way. More in detail, three main elements of RL algorithms would significantly benefit from transfer of knowledge. <i>(i)</i> For every new task, RL algorithms need exploring the environment for a long time, and this corresponds to slow learning processes for large environments. Transfer learning would enable RL algorithms to dramatically reduce the exploration of each new task by exploiting its resemblance with tasks solved in the past.
<i>(ii)</i> RL algorithms evaluate the quality of a policy by computing its state-value function. Whenever the number of states is too large, approximation is needed. Since approximation may cause instability, designing suitable approximation schemes is particularly critical. While this is currently done by a domain expert, we propose to perform this step automatically by constructing features that incrementally adapt to the tasks encountered over time. This would significantly reduce human supervision and increase the accuracy and stability of RL algorithms across different tasks.
<i>(iii)</i> In order to deal with complex environments, hierarchical RL solutions have been proposed, where state representations and policies are organized over a hierarchy of subtasks. This requires a careful definition of the hierarchy, which, if not properly constructed, may lead to very poor learning performance. The ambitious goal of transfer learning is to automatically construct a hierarchy of skills, which can be effectively reused over a wide range of similar tasks.</p>
          </li>
          <li id="uid112">
            <p noindent="true"><i>Activity Report</i>: ExTra-Learn started officially in October and one paper has been published at NIPS'14 and in the workshop on “Transfer and Multi-task Learning” at NIPS'14.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid113" level="2">
        <bodyTitle>National Partners</bodyTitle>
        <simplelist>
          <li id="uid114">
            <p noindent="true">Laboratoire Paul Painlevé Université des Sciences et Technologies de Lille, France</p>
            <simplelist>
              <li id="uid115">
                <p noindent="true">Mylène Maïda <i>Collaborator</i></p>
                <p>Ph. Preux has collaborated with M. Maïda and co-advised a student of the École Centrale de Lille. The motivation of this collaboration is the study of random matrices and the potential use of this theory in machine learning.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid116">
            <p noindent="true">CMLA - ENS Cachan.</p>
            <simplelist>
              <li id="uid117">
                <p noindent="true">Julien Audiffren <i>Collaborator</i></p>
                <p noindent="true">M. Valko, A. Lazaric, and M. Ghavamzadeh work with Julien
on Semi-Supervised Apprenticeship Learning. We work on a maximum
entropy algorithm that outperforms the approach without unlabeled data.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid118">
            <p noindent="true">Laboratoire Lagrange, Université de Nice, France.</p>
            <simplelist>
              <li id="uid119">
                <p noindent="true">Cédric Richard <i>Collaborator</i></p>
                <p noindent="true">We have had collaboration on the topic of <i>dictionary learning over a sensor network</i>.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid120">
            <p noindent="true">Laboratoire de Mécanique de Lille, Université de Lille 1, France.</p>
            <simplelist>
              <li id="uid121">
                <p noindent="true">Jean-Philippe Laval <i>Collaborator</i></p>
                <p noindent="true">We co-supervise a starting PhD student (Linh Van Nguyen) on the topic of <i>high resolution field reconstruction from low resolution measurements in turbulent flows</i>.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid122">
            <p noindent="true">Institut Carnot de Bourgogne, CNRS UMR 6303, Université de Bourgogne, Dijon, France.</p>
            <simplelist>
              <li id="uid123">
                <p noindent="true">Aymeric Leray <i>Collaborator</i></p>
                <p noindent="true">P. Chainais and A. Leray have written an article on the topic of <i>quantitative guarantees of a super resolution method via concentration inequalities</i>. A paper has been published in ICASSP 2014 proceedings and a journal article is submitted to IEEE Transactions on Image Processing.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid124">
            <p noindent="true">LAGIS (CRIStAL), Ecole Centrale Lille - Université de Lille 1, France.</p>
            <simplelist>
              <li id="uid125">
                <p noindent="true">Patrick Bas <i>Collaborator</i></p>
                <p noindent="true">P. Chainais and P. Bas have a collaboration on the topic of <i>adaptive quantization to optimize classification from histrograms of features with an application to the steganalysis of textured images</i>.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid126">
            <p noindent="true">University of Oxford (Great-Britain)</p>
            <simplelist>
              <li id="uid127">
                <p noindent="true">Dr. François Caron <i>Collaborators</i></p>
              </li>
              <li id="uid128">
                <p noindent="true">P. Chainais is co-leading the ANR BNPSI in collaboration with François Caron. Note that Rémi Bardenet will arrive in Lille as a CNRS researcher in feb. 2015 after a post-doc at Oxford University.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid129">
            <p noindent="true">LTCI, Institut Télécom-ParisTech, France.</p>
            <simplelist>
              <li id="uid130">
                <p noindent="true">Charanpal Dhanjal<i>Collaborator</i></p>
                <p noindent="true">We have a collaboration on the topic of <i>Matrix Factorization update</i> with application to sequential recommendation and sequential clustering. This collaboration has leaded to two publications this year: one in Neurocomputing journal <ref xlink:href="#sequel-2014-bid44" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, one at SDM'14 conference <ref xlink:href="#sequel-2014-bid34" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
              </li>
            </simplelist>
          </li>
        </simplelist>
      </subsection>
    </subsection>
    <subsection id="uid131" level="1">
      <bodyTitle>European Initiatives</bodyTitle>
      <subsection id="uid132" level="2">
        <bodyTitle>FP7 &amp; H2020 Projects</bodyTitle>
        <subsection id="uid133" level="3">
          <bodyTitle>CompLACS</bodyTitle>
          <sanspuceslist>
            <li id="uid134">
              <p noindent="true">Type: FP7</p>
            </li>
            <li id="uid135">
              <p noindent="true">Defi: Cognitive Systems, Interaction, Robotics</p>
            </li>
            <li id="uid136">
              <p noindent="true">Instrument: Specific Targeted Research Project</p>
            </li>
            <li id="uid137">
              <p noindent="true">Objectif: Cognitive Systems and Robotics</p>
            </li>
            <li id="uid138">
              <p noindent="true">Duration: March 2011 - February 2015</p>
            </li>
            <li id="uid139">
              <p noindent="true">Coordinator: John Shaw-Taylor</p>
            </li>
            <li id="uid140">
              <p noindent="true">Partner: University College London,
University of Bristol,
Royal Holloway, University of London,
Radboud Universiteit Nijmegen,
Technische Universitat Berlin,
Montanuniversitat Leoben,
Institut National de Recherche en Informatique et en Automatique,
Technische Universität Darmstadt</p>
            </li>
            <li id="uid141">
              <p noindent="true">Inria contact: Rémi MUNOS</p>
            </li>
            <li id="uid142">
              <p noindent="true">Abstract: One of the aspirations of machine learning is to develop intelligent systems that can address a wide variety of control problems of many different types. However, although the community has developed successful technologies for many individual problems, these technologies have not previously been integrated into a unified framework. As a result, the technology used to specify, solve and analyse one control problem typically cannot be reused on a different problem. The community has fragmented into a diverse set of specialists with particular solutions to particular problems. The purpose of this project is to develop a unified toolkit for intelligent control in many different problem areas. This toolkit will incorporate many of the most successful approaches to a variety of important control problems within a single framework, including bandit problems, Markov Decision Processes (MDPs), Partially Observable MDPs (POMDPs), continuous stochastic control, and multi-agent systems. In addition, the toolkit will provide methods for the automatic construction of representations and capabilities, which can then be applied to any of these problem types. Finally, the toolkit will provide a generic interface to specifying problems and analysing performance, by mapping intuitive, human-understandable goals into machine-understandable objectives, and by mapping algorithm performance and regret back into human-understandable terms.</p>
            </li>
          </sanspuceslist>
        </subsection>
      </subsection>
    </subsection>
    <subsection id="uid143" level="1">
      <bodyTitle>International Initiatives</bodyTitle>
      <subsection id="uid144" level="2">
        <bodyTitle>Inria International Partners</bodyTitle>
        <simplelist>
          <li id="uid145">
            <p noindent="true">Inria International partnership with Leoben, Austria; starting October 2014; duration: 4 years.</p>
            <simplelist>
              <li id="uid146">
                <p noindent="true">Ronald Ortner and Peter Auer: Montanuniversität Leoben (Austria).</p>
              </li>
              <li id="uid147">
                <p noindent="true">Reinforcement learning (RL) deals with the problem of
interacting with an unknown stochastic environment that
occasionally provides rewards, with the goal of maximizing the
cumulative reward. The problem is well-understood when the
unknown environment is a finite-state Markov process. This
collaboration is centered around reducing the general RL
problem to this case.</p>
                <p>In particular, the following problems are considered:
representation learning, learning in continuous-state
environments, bandit problems with dependent arms, and pure
exploration in bandit problems. On each of these problems we
have successfully collaborated in the past, and plan to
sustain this collaboration possibly extending its scopes.</p>
              </li>
            </simplelist>
          </li>
        </simplelist>
        <subsection id="uid148" level="3">
          <bodyTitle>Informal International Partners</bodyTitle>
          <simplelist>
            <li id="uid149">
              <p noindent="true">Technion - Israel Institute of Technology, Haifa, Israel.</p>
              <simplelist>
                <li id="uid150">
                  <p noindent="true">Odalric-Ambrym Maillard <i>Collaborator</i></p>
                  <p noindent="true">Daniil Ryabko has worked with Odalric Maillard on
representation learning for reinforcement learning problems. It led to a paper in AISTATS <ref xlink:href="#sequel-2014-bid46" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
                </li>
              </simplelist>
            </li>
            <li id="uid151">
              <p noindent="true">School of Computer Science, Carnegie Mellon University, USA.</p>
              <simplelist>
                <li id="uid152">
                  <p noindent="true">Prof. Emma Brunskill <i>Collaborator</i></p>
                </li>
                <li id="uid153">
                  <p noindent="true">Mohammad Gheshlaghi Azar, (now at Northwestern University in Chicago) <i>Collaborator</i></p>
                  <p noindent="true">A. Lazaric continued his collaboration on transfer in multi-arm bandit and reinforcement learning which led to one publication at ICML'14. We have submitted an associate team project with E. Brunskill on the topic of multi-arm bandit applied to education.</p>
                </li>
              </simplelist>
            </li>
            <li id="uid154">
              <p noindent="true">Technicolor Research, Palo Alto.</p>
              <simplelist>
                <li id="uid155">
                  <p noindent="true">Branislav Kveton <i>Collaborator</i></p>
                  <p noindent="true">Michal Valko and Rémi Munos worked with Branislav on Spectral Bandits
aimed at recommendation for the entertainment content recommendation.
Michal continued the ongoing research on online semi-supervised
learning and this year delivered the algorithm for a challenging single picture
per person setting.
Victor Gabillon has spent 6 month at Technicolor as an intern
to work on the sequential learning with submodularity, which
resulted in 1 accepted paper at NIPS, 1 in ICML, and 1 in AAAI.</p>
                </li>
              </simplelist>
            </li>
            <li id="uid156">
              <p noindent="true">University of Cambridge (UK)</p>
              <simplelist>
                <li id="uid157">
                  <p noindent="true">Alexandra Carpentier <i>Collaborator</i></p>
                </li>
                <li id="uid158">
                  <p noindent="true">Michal Valko collaborates with A. Carpentier on extreme event detection
(such as network intrusion) with limited allocation capabilities.</p>
                </li>
              </simplelist>
            </li>
            <li id="uid159">
              <p noindent="true">Politecnico di Milano (Italy)</p>
              <simplelist>
                <li id="uid160">
                  <p noindent="true">Prof. Marcello Restelli and Prof. Nicola Gatti <i>Collaborators</i></p>
                </li>
                <li id="uid161">
                  <p noindent="true">A. Lazaric continued his collaboration on transfer in reinforcement learning which leads to a publication in NIPS'14. Furthermore, we have submitted a journal version of an application of multi-arm bandit in sponsored search auctions which is currently under review.</p>
                </li>
              </simplelist>
            </li>
          </simplelist>
        </subsection>
      </subsection>
    </subsection>
    <subsection id="uid162" level="1">
      <bodyTitle>International Research Visitors</bodyTitle>
      <subsection id="uid163" level="2">
        <bodyTitle>Visits of International Scientists</bodyTitle>
        <subsection id="uid164" level="3">
          <bodyTitle>Internships</bodyTitle>
          <simplelist>
            <li id="uid165">
              <p noindent="true">Daniele Calandriello, student at Politecnico di Milano, Italy</p>
              <p>Period: April 2013 to May 2014.</p>
              <p>He was working with A. Lazaric on multi-task reinforcement learning.</p>
            </li>
            <li id="uid166">
              <p noindent="true">Jessica Chemali, Master, Carnegie Mellon University, May-August 2014</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid167" level="2">
        <bodyTitle>Visits to International Teams</bodyTitle>
        <subsection id="uid168" level="3">
          <bodyTitle>Sabbatical programme</bodyTitle>
          <sanspuceslist>
            <li id="uid169">
              <p noindent="true">Ryabko Daniil</p>
              <sanspuceslist>
                <li id="uid170">
                  <p noindent="true">Date: Jan 2014 - Jan 2015</p>
                </li>
                <li id="uid171">
                  <p noindent="true">Institution: <ref xlink:href="http://www.cmm.uchile.cl" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Centro de Modelamiento Matematico</ref> (Chile)</p>
                </li>
              </sanspuceslist>
            </li>
          </sanspuceslist>
        </subsection>
        <subsection id="uid172" level="3">
          <bodyTitle>Research stays abroad</bodyTitle>
          <sanspuceslist>
            <li id="uid173">
              <p noindent="true">Munos Rémi</p>
              <sanspuceslist>
                <li id="uid174">
                  <p noindent="true">Date: Jul 2013 - June 2014</p>
                </li>
                <li id="uid175">
                  <p noindent="true">Institution: Microsoft Research New England (USA)</p>
                </li>
              </sanspuceslist>
            </li>
          </sanspuceslist>
          <sanspuceslist>
            <li id="uid176">
              <p noindent="true">Munos Rémi</p>
              <sanspuceslist>
                <li id="uid177">
                  <p noindent="true">Date: October 2014 - now</p>
                </li>
                <li id="uid178">
                  <p noindent="true">Institution: Google Deepmind (UK)</p>
                </li>
              </sanspuceslist>
            </li>
          </sanspuceslist>
          <sanspuceslist>
            <li id="uid179">
              <p noindent="true">Ghavamzadeh Mohammad</p>
              <sanspuceslist>
                <li id="uid180">
                  <p noindent="true">Date: September 2013 - now</p>
                </li>
                <li id="uid181">
                  <p noindent="true">Institution: Adobe Research (USA)</p>
                </li>
              </sanspuceslist>
            </li>
          </sanspuceslist>
        </subsection>
      </subsection>
    </subsection>
  </partenariat>
  <diffusion id="uid182">
    <bodyTitle>Dissemination</bodyTitle>
    <subsection id="uid183" level="1">
      <bodyTitle>Promoting Scientific Activities</bodyTitle>
      <subsection id="uid184" level="2">
        <bodyTitle>Scientific events organisation</bodyTitle>
        <subsection id="uid185" level="3">
          <bodyTitle>general chair, scientific chair</bodyTitle>
          <simplelist>
            <li id="uid186">
              <p noindent="true">P. Chainais has co-organized with Z. Harchaoui the GDR ISIS 1 day workshop on "Learning adapted representations for signal and image processing" in Paris on Feb. 4th, 2014, see <ref xlink:href="http://www.gdr-isis.fr/index.php?page=reunion&amp;idreunion=234" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>gdr-isis.<allowbreak/>fr/<allowbreak/>index.<allowbreak/>php?page=reunion&amp;idreunion=234</ref>.</p>
            </li>
            <li id="uid187">
              <p noindent="true">P. Chainais has led the application of Lille to the organization of the french national Signal Processing conference (GRETSI 2017) : Marrakech won, but we had a good feedback in view of 2019.</p>
            </li>
          </simplelist>
        </subsection>
        <subsection id="uid188" level="3">
          <bodyTitle>member of the conference program committee</bodyTitle>
          <simplelist>
            <li id="uid189">
              <p noindent="true">AAAI Conference on Artificial Intelligence (AAAI 2014)</p>
            </li>
            <li id="uid190">
              <p noindent="true">IEEE Approximate Dynamic Programming and Reinforcement Learning (ADPRL 2014)</p>
            </li>
            <li id="uid191">
              <p noindent="true">French Conference on Planning, Decision-making, and Learning
in Control Systems (JFPDA 2014)</p>
            </li>
            <li id="uid192">
              <p noindent="true">Conférence Apprentissage Automatique (CAP)</p>
            </li>
            <li id="uid193">
              <p noindent="true">Extraction et Gestion des Connaissances (EGC)</p>
            </li>
          </simplelist>
        </subsection>
        <subsection id="uid194" level="3">
          <bodyTitle>reviewer</bodyTitle>
          <simplelist>
            <li id="uid195">
              <p noindent="true">International Conference on Pattern Recognition Applications
and Methods (ICPRAM 2014)</p>
            </li>
            <li id="uid196">
              <p noindent="true">Algorithmic Learning Theory (ALT 2014)</p>
            </li>
            <li id="uid197">
              <p noindent="true">AAAI Conference on Artificial Intelligence (AAAI 2014)</p>
            </li>
            <li id="uid198">
              <p noindent="true">Conference on Learning Theory (COLT 2014)</p>
            </li>
            <li id="uid199">
              <p noindent="true">European Workshop on Reinforcement Learning (EWRL 2014)</p>
            </li>
            <li id="uid200">
              <p noindent="true">Annual Conference on Neural Information Processing Systems (NIPS 2014)</p>
            </li>
            <li id="uid201">
              <p noindent="true">International Conference on Artificial Intelligence and
Statistics (AISTATS 2014)</p>
            </li>
            <li id="uid202">
              <p noindent="true">European Conference on Machine Learning (ECML 2014)</p>
            </li>
            <li id="uid203">
              <p noindent="true">International Conference on Machine Learning (ICML 2014)</p>
            </li>
            <li id="uid204">
              <p noindent="true">International Conference on Uncertainty in Artificial
Intelligence (UAI 2014)</p>
            </li>
            <li id="uid205">
              <p noindent="true">IEEE Congress on Evolutionary Computation (CEC)</p>
            </li>
            <li id="uid206">
              <p noindent="true">French Conference on Planning, Decision-making, and Learning
in Control Systems (JFPDA 2014)</p>
            </li>
            <li id="uid207">
              <p noindent="true">IEEE FUSION 2014</p>
            </li>
            <li id="uid208">
              <p noindent="true">IEEE Approximate Dynamic Programming and Reinforcement Learning (ADPRL 2014)</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid209" level="2">
        <bodyTitle>Journal</bodyTitle>
        <subsection id="uid210" level="3">
          <bodyTitle>reviewer</bodyTitle>
          <simplelist>
            <li id="uid211">
              <p noindent="true">IEEE Transactions on Image Processing</p>
            </li>
            <li id="uid212">
              <p noindent="true">Journal of Statistical Physics</p>
            </li>
            <li id="uid213">
              <p noindent="true">Digital Signal Processing</p>
            </li>
            <li id="uid214">
              <p noindent="true">IEEE Transactions on Information Theory</p>
            </li>
            <li id="uid215">
              <p noindent="true">IEEE Statistical Signal Processing SSP'2013</p>
            </li>
            <li id="uid216">
              <p noindent="true">European Signal Processing Conference EUSIPCO 2013</p>
            </li>
            <li id="uid217">
              <p noindent="true">10th International Conference on Sampling Theory and Applications (SampTA 2013)</p>
            </li>
            <li id="uid218">
              <p noindent="true">IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013 &amp; 2014)</p>
            </li>
            <li id="uid219">
              <p noindent="true">Annual Conference on Neural Information Processing Systems (NIPS 2013)</p>
            </li>
            <li id="uid220">
              <p noindent="true">International Conference on Machine Learning (ICML 2013)</p>
            </li>
            <li id="uid221">
              <p noindent="true">European Conference on Machine Learning (ECML 2013)</p>
            </li>
            <li id="uid222">
              <p noindent="true">Uncertainty in Artificial Intelligence (UAI 2013)</p>
            </li>
            <li id="uid223">
              <p noindent="true">Machine Learning Journal (MLJ)</p>
            </li>
            <li id="uid224">
              <p noindent="true">Journal of Machine Learning Research (JMLR)</p>
            </li>
            <li id="uid225">
              <p noindent="true">Journal of Artificial Intelligence Research (JAIR)</p>
            </li>
            <li id="uid226">
              <p noindent="true">IEEE Transactions on Automatic Control (TAC)</p>
            </li>
            <li id="uid227">
              <p noindent="true">IEEE Transactions of Signal Processing</p>
            </li>
            <li id="uid228">
              <p noindent="true">Journal of Autonomous Agents and Multi-Agent Systems (JAAMAS)</p>
            </li>
            <li id="uid229">
              <p noindent="true">Mathematics of Operations Research (MOR)</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid230" level="2">
        <bodyTitle>Invited Talks</bodyTitle>
        <simplelist>
          <li id="uid231">
            <p noindent="true">Alessandro Lazaric gave an invited talk on “Approximate Dynamic Programming meets Statistical Learning Theory” at CNRS Journée Des D`'ecollements in Orsay (November 2014)</p>
          </li>
          <li id="uid232">
            <p noindent="true">Alessandro Lazaric gave a talk on “Transfer in Reinforcement Learning” at the “30 minutes of sciences” seminars at Inria Lille (December 2014)</p>
          </li>
          <li id="uid233">
            <p noindent="true">Michal Valko gave a talk “Bandits on Graphs” at CMLA group at ENS Cachan (December 2014)</p>
          </li>
          <li id="uid234">
            <p noindent="true">Michal Valko gave a talk “Optimistic Optimization” at CMLA group and at MIST conference, Slovakia (January 2014)</p>
          </li>
          <li id="uid235">
            <p noindent="true">Ph. Preux gave a talk “décision adaptative face au Big Data”, colloque AAFD, Institut Galilée (April 2014).</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid236" level="2">
        <bodyTitle>Evaluation activities, expertise</bodyTitle>
        <simplelist>
          <li id="uid237">
            <p noindent="true"><i>P. Chainais</i> is a grant proposal reviewer for the ANR.</p>
          </li>
          <li id="uid238">
            <p noindent="true"><i>M. Ghavamzadeh</i> is in the Editorial Board Member of Machine Learning Journal (MLJ, 2011-present).</p>
          </li>
          <li id="uid239">
            <p noindent="true"><i>M. Ghavamzadeh</i> is in the Steering Committee Member of the European Workshop on Reinforcement Learning (EWRL, 2011-present).</p>
          </li>
          <li id="uid240">
            <p noindent="true"><i>P. Preux</i> and <i>J. Mary</i> are experts for <i>Crédit Impôt Recherche</i> (CIR).</p>
          </li>
          <li id="uid241">
            <p noindent="true"><i>P. Preux</i> is expert for ANR, ANRT, AERES, FNRS. He was member on the visitng committee of the Laboratoire d'Informatique de Grenoble (LIG)</p>
          </li>
          <li id="uid242">
            <p noindent="true"><i>E. Duflos</i> is a project proposal reviewer for ANR.</p>
          </li>
          <li id="uid243">
            <p noindent="true"><i>A. Lazaric</i> is a project proposal reviewer for ANR.</p>
          </li>
          <li id="uid244">
            <p noindent="true"><i>A. Lazaric</i> is a the main organizer of the European Workshop in Reinforcement Learning in 2015.</p>
          </li>
          <li id="uid245">
            <p noindent="true"><i>A. Lazaric, J. Mary, R. Munos, O. Pietquin, and M. Valko</i> are members of the Belgium Commission Evaluation F.R.S-FNRS, 2014.</p>
          </li>
          <li id="uid246">
            <p noindent="true"><i>M. Valko</i> is an elected member of the evaluation committee and participates in the hiring, promotion, and evaluation juries of Inria.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid247" level="2">
        <bodyTitle>Other Scientific Activities</bodyTitle>
        <simplelist>
          <li id="uid248">
            <p noindent="true"><i>D. Ryabko</i> is a member of COST-GTRI committee at Inria.</p>
          </li>
          <li id="uid249">
            <p noindent="true"><i>D. Ryabko</i> is a general advisor at Inria Lille.</p>
          </li>
          <li id="uid250">
            <p noindent="true"><i>E. Duflos</i> is Director of Research of Ecole Centrale de Lille since September 2011.</p>
          </li>
          <li id="uid251">
            <p noindent="true"><i>E. Duflos</i> is the Head of the Signal and Image Team of LAGIS (UMR CNRS 8219).</p>
          </li>
          <li id="uid252">
            <p noindent="true"><i>R. Gaudel</i> is board member of LIFL.</p>
          </li>
          <li id="uid253">
            <p noindent="true"><i>A. Lazaric</i> is a member of the committee for research evaluation (CER) at Inria Lille.</p>
          </li>
          <li id="uid254">
            <p noindent="true"><i>R. Gaudel</i> manages the proml mailing list. This mailing list gathers French-speaking researchers from Machine Learning community.</p>
          </li>
          <li id="uid255">
            <p noindent="true"><i>P. Chainais</i> is a member of the administration council of GRETSI, the French association of researchers in signal and image processing.</p>
          </li>
          <li id="uid256">
            <p noindent="true"><i>P. Chainais</i> is co-responsible for the action "Machine Learning" of the GDR ISIS which gathers french researchers in signal and image processing at the national level.</p>
          </li>
          <li id="uid257">
            <p noindent="true"><i>Ph. Preux</i> is Head of the LIFL/CRIStAL lab at the Université de Lille 3; he is head of the data intelligence (DatInG) thematic group of CRIStAL; he is on the scientific committee of CRIStAL.
He is local organization chair for ICML 2015.</p>
          </li>
        </simplelist>
      </subsection>
    </subsection>
    <subsection id="uid258" level="1">
      <bodyTitle>Teaching - Supervision - Juries</bodyTitle>
      <subsection id="uid259" level="2">
        <bodyTitle>Awards</bodyTitle>
        <simplelist>
          <li id="uid260">
            <p noindent="true">D. Calandriello won the best master thesis award from the Italian Association for Artificial Intelligence for his thesis "Sparse Multi-Task Reinforcement Learning". The association awards the prize to the best master thesis focused on AI in Italy in 2014. The thesis was written under the co-supervision of A. Lazaric during a year spent in SequeL</p>
          </li>
          <li id="uid261">
            <p noindent="true">F. Guillou won the ACM RecSys challenge (on recommendation systems)</p>
          </li>
          <li id="uid262">
            <p noindent="true">P. Chainais won an IBM Faculty Award for the creation of the option DAD (Data Analysis and Decision making) at Ecole Centrale Lille (10000$ have been given to EC Lille). The partnership with IBM about Big Data is getting stronger and new perspectives are coming.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid263" level="2">
        <bodyTitle>Teaching</bodyTitle>
        <sanspuceslist>
          <li id="uid264">
            <p noindent="true">Licence: R. Gaudel, programmation R pour statistiques et sociologie quantitative, 28h eqTD, L1, université Lille 3, France</p>
          </li>
          <li id="uid265">
            <p noindent="true">Licence: R. Gaudel, projet informatique de traitement des données en SHS, 20h eqTD, L2, université Lille 3, France</p>
          </li>
          <li id="uid266">
            <p noindent="true">Licence: R. Gaudel, préparation au C2i niveau 1, 24h eqTD, L1-3, université Lille 3, France</p>
          </li>
          <li id="uid267">
            <p noindent="true">Master: R. Gaudel, fouille du web, 32h eqTD, M2, université Lille 3, France</p>
          </li>
          <li id="uid268">
            <p noindent="true">Master: R. Gaudel, fouille de donées, 30h eqTD, M2, université Lille 3, France</p>
          </li>
          <li id="uid269">
            <p noindent="true">Master: A. Lazaric, Reinforcement Learning, 25h eqTD, M2, ENS Cachan, France</p>
          </li>
          <li id="uid270">
            <p noindent="true">Master: A. Lazaric, Reinforcement Learning, 25h eqTD, M2, Ecole Centrale Lille, France</p>
          </li>
          <li id="uid271">
            <p noindent="true">Master: Ph. Preux, “Modeling, Computer Science, Mathematics”, 72h eqTD, M1 pshychology/cognitive science, université Lille 3, France</p>
          </li>
          <li id="uid272">
            <p noindent="true">Master: Ph. Preux, “Formal neural netwokrs”, 30h eqTD, M1 cognitive science, université Lille 3, France</p>
          </li>
          <li id="uid273">
            <p noindent="true">Licence: Ph. Preux, “Supervised Learning”, 30h eqTD, L3 MIASHS, université Lille 3, France</p>
          </li>
          <li id="uid274">
            <p noindent="true">EC Lille (3rd y.): P. Chainais, “Machine learning”, 34h eqTD, Ecole Centrale Lille, France</p>
          </li>
          <li id="uid275">
            <p noindent="true">EC Lille (3rd y.): P. Chainais, “Matlab”, 16h eqTD, Ecole Centrale Lille, France</p>
          </li>
          <li id="uid276">
            <p noindent="true">EC Lille (3rd y.): P. Chainais, “Image processing”, 16h eqTD, Ecole Centrale Lille, France</p>
          </li>
          <li id="uid277">
            <p noindent="true">EC Lille (3rd y.): P. Chainais, “Representation and data compression”, 8h eqTD, Ecole Centrale Lille, France</p>
          </li>
          <li id="uid278">
            <p noindent="true">EC Lille (1st y.): P. Chainais, “Signal processing”, 22h eqTD, Ecole Centrale Lille, France</p>
          </li>
          <li id="uid279">
            <p noindent="true">EC Lille (2nd y.): P. Chainais, “Wavelets and applications”, 24h eqTD, Ecole Centrale Lille, France</p>
          </li>
          <li id="uid280">
            <p noindent="true">EC Lille: J. Mary, Machine Learning with R , 20h eqTD</p>
          </li>
          <li id="uid281">
            <p noindent="true">Master: J. Mary, M2 ID - Univ Lille, Programmation web avancée et design pattern, 64h eqTD</p>
          </li>
          <li id="uid282">
            <p noindent="true">Master: J. Mary, M1 ID - Univ Lille, Programmation web , 32h eqTD</p>
          </li>
          <li id="uid283">
            <p noindent="true">Master: J. Mary, M1 ID - Univ Lille, Algorithmique avancée , 32h eqTD</p>
          </li>
          <li id="uid284">
            <p noindent="true">Master: J. Mary, M1 IIES - Univ Lille, Analyse de données avec R, 32h eqTD</p>
          </li>
          <li id="uid285">
            <p noindent="true">Master: J. Mary, C2i - Univ Lille, 24h eqTD</p>
          </li>
          <li id="uid286">
            <p noindent="true">
              <b>E-learning</b>
            </p>
            <sanspuceslist>
              <li id="uid287">
                <p noindent="true">SPOC : R. Gaudel, Marc Tommasi and Alain Preux, culture numérique S2, 8 semaines, Moodle, université Lille 3, licence (L1), formation initiale, tous les étudiants (&gt; 7 000).</p>
              </li>
            </sanspuceslist>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid288" level="2">
        <bodyTitle>Supervision</bodyTitle>
        <sanspuceslist>
          <li id="uid289">
            <p noindent="true">HDR defended: <i>Mohammad Ghavamzadeh</i> defended his “Habilitation à diriger les recherches” on June 12th.</p>
          </li>
          <li id="uid290">
            <p noindent="true">PhD defended: <i>Boris Baldassari</i> defended his PhD thesis
<i>Apprentissage automatique et développement logiciel</i>,
on July 1st, advisor: Ph. Preux.</p>
          </li>
          <li id="uid291">
            <p noindent="true">PhD defended: <i>Gabriel Dulac-Arnold</i> defended his PhD thesis <i>A
General Sequential Model for Constrained Classification</i>, on Feb. 7th,
advisor: Ph. Preux, L. Denoyer (Paris 6), P. Gallinari (Paris 6).</p>
          </li>
          <li id="uid292">
            <p noindent="true">PhD defended: <i>Victor Gabillon</i> defended his PhD thesis “Active Learning
in Classification-based Policy Iteration”, on June 12th,
advisor: M. Ghavamzadeh.</p>
          </li>
          <li id="uid293">
            <p noindent="true">PhD defended: <i>Olivier Nicol</i> defended his PhD thesis “Data-driven
evaluation of Contextual Bandit algorithms and applications to
Dynamic Recommendation”, on Dec. 18th, advisor: Ph. Preux, J. Mary.</p>
          </li>
          <li id="uid294">
            <p noindent="true">PhD defended: <i>Emilie Kaufmann</i> defended her PhD thesis, “Bayesian
Bandits”, advisor: R. Munos, O. Cappé, A. Garivier.</p>
          </li>
          <li id="uid295">
            <p noindent="true">PhD in progress: <i>Frédéric Guillou</i>, “Sequential
Recommender System”, since Oct. 2013, advisor: Ph. Preux,
J. Mary, R. Gaudel.</p>
          </li>
          <li id="uid296">
            <p noindent="true">PhD in progress: <i>Vicenzo Musco</i>, “Topology and
evolution of software graphs”, since Oct. 2013, advisor:
P. Preux, M. Monperrus</p>
          </li>
          <li id="uid297">
            <p noindent="true">PhD in progress: <i>Adrien Hoarau</i>, “Multi-arm Bandit
Theory”, since Oct. 2012, advisor: R. Munos.</p>
          </li>
          <li id="uid298">
            <p noindent="true">PhD in progress: <i>Tomáš Kocák</i>,
“Sequential Learning with Similarities”, since Oct. 2013,
advisor: R. Munos, M. Valko</p>
          </li>
          <li id="uid299">
            <p noindent="true">PhD in progress: <i>Amir Sani</i>, “Learning under
uncertainty”, Oct. 2011, since advisor: R. Munos, A. Lazaric.</p>
          </li>
          <li id="uid300">
            <p noindent="true">PhD in progress: <i>Marta Soare</i>, “Pure Exploration in
Multi-arm Bandit”, since Oct. 2012, advisor: R. Munos,
A. Lazaric.</p>
          </li>
          <li id="uid301">
            <p noindent="true">PhD in progress: <i>Hong Phuong Dang</i>,
<i>Bayesian non parametric methods for dictionary learning and inverse problems</i>,
since Oct. 2013, advisor: P. Chainais.</p>
          </li>
          <li id="uid302">
            <p noindent="true">PhD in progress: <i>Linh Van Nguyen</i>,
<i>High resolution reconstruction from low resolution measurements of velocity fields in turbulent flows</i>,
since Oct. 2013, advisor: P. Chainais &amp; J.P. Laval (Laboratoire de Mécanique de Lille).</p>
          </li>
          <li id="uid303">
            <p noindent="true">PhD in progress: <i>Clément Elvira</i>, “Bayesian non parametric approaches for blind hyperspectral images unmixing.", since Oct. 2014, advisor: P. Chainais &amp; N. Dobigeon (IRIT, Toulouse).</p>
          </li>
          <li id="uid304">
            <p noindent="true">PhD started: <i>Daniele Calandriello</i>,
<i>Efficient Sequential Learning in Structured and Constrained Environments</i>,
since Oct. 2014, advisor: M. Valko &amp; A. Lazaric &amp; P. Preux.</p>
          </li>
          <li id="uid305">
            <p noindent="true">PhD started: <i>Jean-Bastien Grill</i>,
<i>Développement et analyse de mé thodes numé riques efficaces pour de l'optimisation lorsque la régularité de la fonction sous-jacente n'est pas connue à priori.</i>, since Oct. 2014, advisor: M. Valko &amp; R. Munos</p>
          </li>
          <li id="uid306">
            <p noindent="true">PhD started: <i>Pratik Gajane</i>, “Sequential Learning and Decision Making under Partial Monitoring”, since Oct. 2014, advisor: Philippe Preux, Tanguy Urvoy (Orange Labs)</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid307" level="2">
        <bodyTitle>Juries</bodyTitle>
        <sanspuceslist>
          <li id="uid308">
            <p noindent="true"><i>A. Lazaric</i> was part of the jury of the PhD of Mahdi Milani Fard at McGill Univiersity (supervised by J. Pineau).</p>
          </li>
          <li id="uid309">
            <p noindent="true"><i>Ph. Preux</i> was part of the PhD defense jury of W. Wang (Université Paris-Sud, M. Martinez (Université de Lille), G. Dulac-Arnold (Université Paris 6), V. Gabillon, Boris Baldassari, and O. Nicol (all 3 from Université de Lille).</p>
          </li>
          <li id="uid310">
            <p noindent="true"><i>Ph. Preux</i> was part of the HdR defense jury of M. Ghavamzadeh.</p>
          </li>
          <li id="uid311">
            <p noindent="true"><i>P. Chainais</i> was part of the PhD defense jury of Raja Suleiman (supervied by David Mary) at University of Nice, dec. 2014.</p>
          </li>
        </sanspuceslist>
      </subsection>
    </subsection>
    <subsection id="uid312" level="1">
      <bodyTitle>Popularization</bodyTitle>
      <simplelist>
        <li id="uid313">
          <p noindent="true">M. Valko gave an Interview on "Face Recognition" at Sciences et Avenir (July 2014)</p>
        </li>
        <li id="uid314">
          <p noindent="true">M. Valko gave an Interview on "Biometric applications will soon be part of our daily life" at ARTE Future (November 2014)</p>
        </li>
        <li id="uid315">
          <p noindent="true">Article on research of M. Valko's collaboration with INTEL - Ford and Intel Mobii project using Face Recognition, at engadget.com (June 2014)
<ref xlink:href="http://www.engadget.com/2014/06/25/ford-and-intel-project-mobii-connected-car-cameras/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>engadget.<allowbreak/>com/<allowbreak/>2014/<allowbreak/>06/<allowbreak/>25/<allowbreak/>ford-and-intel-project-mobii-connected-car-cameras/</ref></p>
        </li>
        <li id="uid316">
          <p noindent="true">Article on research of M. Valko's collaboration with INTEL - Ford prototype using Face Recognition at intel.com (June 2014)
<ref xlink:href="http://www.intel.com/content/www/us/en/automotive/ford-mobii-prototype-video.html" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>intel.<allowbreak/>com/<allowbreak/>content/<allowbreak/>www/<allowbreak/>us/<allowbreak/>en/<allowbreak/>automotive/<allowbreak/>ford-mobii-prototype-video.<allowbreak/>html</ref></p>
        </li>
        <li id="uid317">
          <p noindent="true">as part of the Inria mediation program, Ph. Preux met high schools pupils to explain what research is.</p>
        </li>
      </simplelist>
    </subsection>
  </diffusion>
  <biblio id="bibliography" html="bibliography" numero="10" titre="Bibliography">
    
    <biblStruct id="sequel-2014-bid42" type="article" rend="year" n="cite:busafekete:hal-01079370">
      <identifiant type="doi" value="10.1007/s10994-014-5458-8"/>
      <identifiant type="hal" value="hal-01079370"/>
      <analytic>
        <title level="a">Preference-based reinforcement learning: evolutionary direct policy search using a preference-based racing algorithm</title>
        <author>
          <persName>
            <foreName>Róbert</foreName>
            <surname>Busa-Fekete</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Weiwei</foreName>
            <surname>Cheng</surname>
            <initial>W.</initial>
          </persName>
          <persName>
            <foreName>Eyke</foreName>
            <surname>Hüllermeier</surname>
            <initial>E.</initial>
          </persName>
          <persName key="sequel-2014-idp83480">
            <foreName>Balázs</foreName>
            <surname>Szörényi</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Paul</foreName>
            <surname>Weng</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01366">
        <idno type="issn">0885-6125</idno>
        <title level="j">Machine Learning</title>
        <imprint>
          <biblScope type="volume">97</biblScope>
          <biblScope type="number">3</biblScope>
          <dateStruct>
            <month>December</month>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">327-351</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01079370" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01079370</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid44" type="article" rend="year" n="cite:dhanjal:hal-00770889">
      <identifiant type="doi" value="10.1016/j.neucom.2013.11.015"/>
      <identifiant type="hal" value="hal-00770889"/>
      <analytic>
        <title level="a">Efficient Eigen-updating for Spectral Graph Clustering</title>
        <author>
          <persName>
            <foreName>Charanpal</foreName>
            <surname>Dhanjal</surname>
            <initial>C.</initial>
          </persName>
          <persName key="sequel-2014-idp75664">
            <foreName>Romaric</foreName>
            <surname>Gaudel</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Stéphan</foreName>
            <surname>Clémençon</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01491">
        <idno type="issn">0925-2312</idno>
        <title level="j">Neurocomputing</title>
        <imprint>
          <biblScope type="volume">131</biblScope>
          <dateStruct>
            <month>May</month>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">440-452</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-00770889" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-00770889</ref>
        </imprint>
      </monogr>
      <note type="bnote">Correction of several typos</note>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid38" type="article" rend="year" n="cite:gyorgy:hal-01079327">
      <identifiant type="doi" value="10.1109/TIT.2014.2307062"/>
      <identifiant type="hal" value="hal-01079327"/>
      <analytic>
        <title level="a">Near-Optimal Rates for Limited-Delay Universal Lossy Source Coding</title>
        <author>
          <persName>
            <foreName>András</foreName>
            <surname>György</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp82200">
            <foreName>Gergely</foreName>
            <surname>Neu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00734">
        <idno type="issn">0018-9448</idno>
        <title level="j">IEEE Transactions on Information Theory</title>
        <imprint>
          <dateStruct>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">2823-2834</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01079327" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01079327</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid39" type="article" rend="year" n="cite:neu:hal-01079422">
      <identifiant type="doi" value="10.1109/TAC.2013.2292137"/>
      <identifiant type="hal" value="hal-01079422"/>
      <analytic>
        <title level="a">Online Markov Decision Processes Under Bandit Feedback</title>
        <author>
          <persName key="sequel-2014-idp82200">
            <foreName>Gergely</foreName>
            <surname>Neu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>András</foreName>
            <surname>György</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Csaba</foreName>
            <surname>Szepesvári</surname>
            <initial>C.</initial>
          </persName>
          <persName>
            <foreName>András</foreName>
            <surname>Antos</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00707">
        <idno type="issn">0018-9286</idno>
        <title level="j">IEEE Transactions on Automatic Control</title>
        <imprint>
          <biblScope type="volume">59</biblScope>
          <dateStruct>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">676 - 691</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01079422" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01079422</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid24" type="article" rend="year" n="cite:ortner:hal-01074077">
      <identifiant type="doi" value="10.1016/j.tcs.2014.09.026"/>
      <identifiant type="hal" value="hal-01074077"/>
      <analytic>
        <title level="a">Regret bounds for restless Markov bandits</title>
        <author>
          <persName>
            <foreName>Ronald</foreName>
            <surname>Ortner</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2014-idp68800">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>Peter</foreName>
            <surname>Auer</surname>
            <initial>P.</initial>
          </persName>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01855">
        <idno type="issn">0304-3975</idno>
        <title level="j">Journal of Theoretical Computer Science (TCS)</title>
        <imprint>
          <biblScope type="volume">558</biblScope>
          <dateStruct>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">62-76</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01074077" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01074077</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid29" type="article" rend="year" n="cite:ryabko:inria-00610009">
      <identifiant type="doi" value="10.1080/02331888.2012.719511"/>
      <identifiant type="hal" value="inria-00610009"/>
      <analytic>
        <title level="a">Uniform hypothesis testing for finite-valued stationary processes</title>
        <author>
          <persName key="sequel-2014-idp68800">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid02359">
        <idno type="issn">0233-1888</idno>
        <title level="j">Statistics</title>
        <imprint>
          <biblScope type="volume">48</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">121-128</biblScope>
          <ref xlink:href="https://hal.inria.fr/inria-00610009" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>inria-00610009</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid47" subtype="nonparu-n" type="article" rend="year" n="cite:scherrer:hal-01091341">
      <identifiant type="hal" value="hal-01091341"/>
      <analytic>
        <title level="a">Approximate Modified Policy Iteration and its Application to the Game of Tetris</title>
        <author>
          <persName key="maia-2014-idp84944">
            <foreName>Bruno</foreName>
            <surname>Scherrer</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2014-idp65928">
            <foreName>Mohammad</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idp116352">
            <foreName>Victor</foreName>
            <surname>Gabillon</surname>
            <initial>V.</initial>
          </persName>
          <persName>
            <foreName>Boris</foreName>
            <surname>Lesner</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Matthieu</foreName>
            <surname>Geist</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01187">
        <idno type="issn">1532-4435</idno>
        <title level="j">Journal of Machine Learning Research</title>
        <imprint>
          <dateStruct>
            <year>2015</year>
          </dateStruct>
          <biblScope type="pages">47</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01091341" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01091341</ref>
        </imprint>
      </monogr>
      <note type="bnote">forthcoming</note>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid41" type="inproceedings" rend="year" n="cite:busafekete:hal-01079369">
      <identifiant type="hal" value="hal-01079369"/>
      <analytic>
        <title level="a">Preference-Based Rank Elicitation using Statistical Models: The Case of Mallows</title>
        <author>
          <persName>
            <foreName>Róbert</foreName>
            <surname>Busa-Fekete</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Eyke</foreName>
            <surname>Hüllermeier</surname>
            <initial>E.</initial>
          </persName>
          <persName key="sequel-2014-idp83480">
            <foreName>Balázs</foreName>
            <surname>Szörényi</surname>
            <initial>B.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Proceedings of The 31st International Conference on Machine Learning</title>
        <loc>Beijing, China</loc>
        <title level="s">JMLR Workshop and Conference Proceedings</title>
        <imprint>
          <biblScope type="volume">32</biblScope>
          <dateStruct>
            <month>June</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01079369" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01079369</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>31</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid16" type="inproceedings" rend="year" n="cite:calandriello:hal-01073513">
      <identifiant type="hal" value="hal-01073513"/>
      <analytic>
        <title level="a">Sparse Multi-task Reinforcement Learning</title>
        <author>
          <persName key="sequel-2014-idp101176">
            <foreName>Daniele</foreName>
            <surname>Calandriello</surname>
            <initial>D.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Marcello</foreName>
            <surname>Restelli</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NIPS - Advances in Neural Information Processing Systems 26</title>
        <loc>Montreal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01073513" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01073513</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>28</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid19" type="inproceedings" rend="year" n="cite:carpentier:hal-01079354">
      <identifiant type="hal" value="hal-01079354"/>
      <analytic>
        <title level="a">Extreme bandits</title>
        <author>
          <persName>
            <foreName>Alexandra</foreName>
            <surname>Carpentier</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Advances in Neural Information Processing Systems 27</title>
        <loc>Montréal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01079354" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01079354</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>28</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid32" type="inproceedings" rend="year" n="cite:chainais:hal-01081402">
      <identifiant type="doi" value="10.1109/ICASSP.2014.6854121"/>
      <identifiant type="hal" value="hal-01081402"/>
      <analytic>
        <title level="a">Quantitative control of the error bounds of a fast super-resolution technique for microscopy and astronomy</title>
        <author>
          <persName key="sequel-2014-idp71472">
            <foreName>Pierre</foreName>
            <surname>Chainais</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Pierre</foreName>
            <surname>Pfennig</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Aymeric</foreName>
            <surname>Leray</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</title>
        <loc>Florence, Italy</loc>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">2853 - 2857</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01081402" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01081402</ref>
        </imprint>
        <meeting id="cid80145">
          <title>IEEE International Conference on Acoustics, Speech and Signal Processing</title>
          <num>2010</num>
          <abbr type="sigle">ICASSP</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid33" type="inproceedings" rend="year" n="cite:chainais:hal-01104781">
      <identifiant type="hal" value="hal-01104781"/>
      <analytic>
        <title level="a">A diffusion strategy for distributed dictionary learning</title>
        <author>
          <persName key="sequel-2014-idp71472">
            <foreName>Pierre</foreName>
            <surname>Chainais</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Cédric</foreName>
            <surname>Richard</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">2nd "international Traveling Workshop on Interactions between Sparse models and Technology" (iTWIST'14)</title>
        <loc>Namur, Belgium</loc>
        <title level="s">Proceedings of the second "international Traveling Workshop on Interactions between Sparse models and Technology" (iTWIST'14)</title>
        <imprint>
          <publisher>
            <orgName type="organisation">Laurent Jacques</orgName>
          </publisher>
          <dateStruct>
            <month>August</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01104781" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01104781</ref>
        </imprint>
        <meeting id="cid624689">
          <title>International Traveling Workshop on Interactions between Sparse models and Technology</title>
          <num>2</num>
          <abbr type="sigle">iTWIST</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid48" type="inproceedings" rend="year" n="cite:dauce:hal-01104024">
      <identifiant type="hal" value="hal-01104024"/>
      <analytic>
        <title level="a">Evidence build-up facilitates on-line adaptivity in dynamic environments: example of the BCI P300-speller</title>
        <author>
          <persName>
            <foreName>Emmanuel</foreName>
            <surname>Daucé</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Eoin</foreName>
            <surname>Thomas</surname>
            <initial>E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">22nd European Symposium on Artificial Neural Networks</title>
        <loc>Bruges, Belgium</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01104024" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01104024</ref>
        </imprint>
        <meeting id="cid70917">
          <title>European Symposium on Artificial Neural Networks</title>
          <num>22</num>
          <abbr type="sigle">ESANN</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid34" type="inproceedings" rend="year" n="cite:dhanjal:hal-00926605">
      <identifiant type="doi" value="10.1137/1.9781611973440.72"/>
      <identifiant type="hal" value="hal-00926605"/>
      <analytic>
        <title level="a">Online Matrix Completion Through Nuclear Norm Regularisation</title>
        <author>
          <persName>
            <foreName>Charanpal</foreName>
            <surname>Dhanjal</surname>
            <initial>C.</initial>
          </persName>
          <persName key="sequel-2014-idp75664">
            <foreName>Romaric</foreName>
            <surname>Gaudel</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Stéphan</foreName>
            <surname>Clémençon</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">SDM - SIAM International Conference on Data Mining</title>
        <loc>Philadelphia, United States</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-00926605" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00926605</ref>
        </imprint>
        <meeting id="cid361599">
          <title>SIAM International Conference on Data Mining</title>
          <num>2014</num>
          <abbr type="sigle">SDM</abbr>
        </meeting>
      </monogr>
      <note type="bnote">Corrected a typo in the affiliation</note>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid15" type="inproceedings" rend="year" n="cite:gheshlaghiazar:hal-01080138">
      <identifiant type="hal" value="hal-01080138"/>
      <analytic>
        <title level="a">Online Stochastic Optimization under Correlated Bandit Feedback</title>
        <author>
          <persName>
            <foreName>Mohammad</foreName>
            <surname>Gheshlaghi Azar</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Emma</foreName>
            <surname>Brunskill</surname>
            <initial>E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">31st International Conference on Machine Learning</title>
        <loc>Beijing, China</loc>
        <imprint>
          <dateStruct>
            <month>June</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01080138" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01080138</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>31</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid43" type="inproceedings" rend="year" n="cite:ivan:hal-01079368">
      <identifiant type="doi" value="10.1007/978-3-319-09704-6_16"/>
      <identifiant type="hal" value="hal-01079368"/>
      <analytic>
        <title level="a">Biclique Coverings, Rectifier Networks and the Cost of ε-Removal</title>
        <author>
          <persName>
            <foreName>Szabolcs</foreName>
            <surname>Iván</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Ádám D.</foreName>
            <surname>Lelkes</surname>
            <initial>Á. D.</initial>
          </persName>
          <persName>
            <foreName>Judit</foreName>
            <surname>Nagy-György</surname>
            <initial>J.</initial>
          </persName>
          <persName key="sequel-2014-idp83480">
            <foreName>Balázs</foreName>
            <surname>Szörényi</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>György</foreName>
            <surname>Turán</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">16th International Workshop on Descriptional Complexity of Formal Systems, Proceedings</title>
        <loc>Turku, Finland</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">174 - 185</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01079368" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01079368</ref>
        </imprint>
        <meeting id="cid393463">
          <title>International Workshop on Descriptional Complexity of Formal Systems</title>
          <num>16</num>
          <abbr type="sigle">DCFS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid30" type="inproceedings" rend="year" n="cite:khaleghi:hal-01026583">
      <identifiant type="hal" value="hal-01026583"/>
      <analytic>
        <title level="a">Asymptotically consistent estimation of the number of change points in highly dependent time series</title>
        <author>
          <persName>
            <foreName>Azadeh</foreName>
            <surname>Khaleghi</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp68800">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">International Conference on Machine Learning (ICML)</title>
        <loc>Beijing, China</loc>
        <imprint>
          <dateStruct>
            <month>June</month>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">539-547</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01026583" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01026583</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>28</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid20" type="inproceedings" rend="year" n="cite:kocak:hal-01079351">
      <identifiant type="hal" value="hal-01079351"/>
      <analytic>
        <title level="a">Efficient learning by implicit exploration in bandit problems with side observations</title>
        <author>
          <persName key="sequel-2014-idp108624">
            <foreName>Tomáš</foreName>
            <surname>Kocák</surname>
            <initial>T.</initial>
          </persName>
          <persName key="sequel-2014-idp82200">
            <foreName>Gergely</foreName>
            <surname>Neu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Advances in Neural Information Processing Systems 27</title>
        <loc>Montréal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01079351" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01079351</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>28</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid25" type="inproceedings" rend="year" n="cite:kocak:hal-00981575">
      <identifiant type="hal" value="hal-00981575"/>
      <analytic>
        <title level="a">Spectral Thompson Sampling</title>
        <author>
          <persName key="sequel-2014-idp108624">
            <foreName>Tomáš</foreName>
            <surname>Kocák</surname>
            <initial>T.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Shipra</foreName>
            <surname>Agrawal</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Proceedings of the Twenty-Eighth AAAI Conference on Artificial Intelligence</title>
        <loc>Québec City, Canada</loc>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-00981575" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00981575</ref>
        </imprint>
        <meeting id="cid355099">
          <title>National Conference on Artificial Intelligence</title>
          <num>28</num>
          <abbr type="sigle">AAAI</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid17" type="inproceedings" rend="year" n="cite:kocak:hal-01045036">
      <identifiant type="hal" value="hal-01045036"/>
      <analytic>
        <title level="a">Spectral Bandits for Smooth Graph Functions with Applications in Recommender Systems</title>
        <author>
          <persName key="sequel-2014-idp108624">
            <foreName>Tomáš</foreName>
            <surname>Kocák</surname>
            <initial>T.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Branislav</foreName>
            <surname>Kveton</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Shipra</foreName>
            <surname>Agrawal</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">AAAI Workshop on Sequential Decision-Making with Big Data</title>
        <loc>Québec City, Canada</loc>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01045036" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01045036</ref>
        </imprint>
        <meeting id="cid624485">
          <title>AAAI Workshop on Sequential Decision-Making with Big Data</title>
          <num>2014</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid18" type="inproceedings" rend="year" n="cite:neu:hal-01079355">
      <identifiant type="hal" value="hal-01079355"/>
      <analytic>
        <title level="a">Online combinatorial optimization with stochastic decision sets and adversarial losses</title>
        <author>
          <persName key="sequel-2014-idp82200">
            <foreName>Gergely</foreName>
            <surname>Neu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Advances in Neural Information Processing Systems 27</title>
        <loc>Montréal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01079355" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01079355</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>28</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid27" type="inproceedings" rend="year" n="cite:nicol:hal-00990840">
      <identifiant type="hal" value="hal-00990840"/>
      <analytic>
        <title level="a">Improving offline evaluation of contextual bandit algorithms via bootstrapping techniques</title>
        <author>
          <persName key="sequel-2014-idp115104">
            <foreName>Olivier</foreName>
            <surname>Nicol</surname>
            <initial>O.</initial>
          </persName>
          <persName key="sequel-2014-idp76928">
            <foreName>Jérémie</foreName>
            <surname>Mary</surname>
            <initial>J.</initial>
          </persName>
          <persName key="sequel-2014-idm27568">
            <foreName>Philippe</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <editor role="editor">
          <persName>
            <foreName>Eric</foreName>
            <surname>Xing</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Tony</foreName>
            <surname>Jebara</surname>
            <initial>T.</initial>
          </persName>
        </editor>
        <title level="m">International Conference on Machine Learning</title>
        <loc>Beijing, China</loc>
        <title level="s">Journal of Machine Learning Research, Workshop and Conference Proceedings; Proceedings of The 31st International Conference on Machine Learning</title>
        <imprint>
          <biblScope type="volume">32</biblScope>
          <dateStruct>
            <month>June</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-00990840" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00990840</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>27</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid14" type="inproceedings" rend="year" n="cite:ortner:hal-01057562">
      <identifiant type="hal" value="hal-01057562"/>
      <analytic>
        <title level="a">Selecting Near-Optimal Approximate State Representations in Reinforcement Learning</title>
        <author>
          <persName>
            <foreName>Ronald</foreName>
            <surname>Ortner</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Odalric-Ambrym</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
          <persName key="sequel-2014-idp68800">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">International Conference on Algorithmic Learning Theory (ALT)</title>
        <loc>Bled, Slovenia</loc>
        <title level="s">LNCS</title>
        <imprint>
          <biblScope type="volume">8776</biblScope>
          <publisher>
            <orgName>Springer</orgName>
          </publisher>
          <dateStruct>
            <month>October</month>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">140-154</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01057562" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01057562</ref>
        </imprint>
        <meeting id="cid110465">
          <title>International Conference on Algorithmic Learning Theory</title>
          <num>25</num>
          <abbr type="sigle">ALT</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid35" type="inproceedings" rend="year" n="cite:pachebat:hal-01058151">
      <identifiant type="hal" value="hal-01058151"/>
      <analytic>
        <title level="a">Synthèse en espace et temps du rayonnement acoustique d'une paroi sous excitation turbulente par synthèse spectrale 2D+T et formulation vibro-acoustique directe</title>
        <author>
          <persName>
            <foreName>Marc</foreName>
            <surname>Pachebat</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Nicolas</foreName>
            <surname>Totaro</surname>
            <initial>N.</initial>
          </persName>
          <persName key="sequel-2014-idp71472">
            <foreName>Pierre</foreName>
            <surname>Chainais</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Olivier</foreName>
            <surname>Collery</surname>
            <initial>O.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="no" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Congrès Français d'acoustique 2014</title>
        <loc>Poitiers, France</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">1915-1921</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01058151" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01058151</ref>
        </imprint>
        <meeting id="cid394719">
          <title>Congrès Français d'Acoustique</title>
          <num>2014</num>
          <abbr type="sigle">CFA</abbr>
        </meeting>
      </monogr>
      <note type="bnote">6 Pages, 20 Refs, papier N183</note>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid52" type="inproceedings" rend="year" n="cite:pietquin:hal-01104423">
      <identifiant type="hal" value="hal-01104423"/>
      <analytic>
        <title level="a">Subspace Identification for Predictive State Representation by Nuclear Norm Minimization</title>
        <author>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
          <persName key="sequel-2014-idp103632">
            <foreName>Hadrien</foreName>
            <surname>Glaude</surname>
            <initial>H.</initial>
          </persName>
          <persName>
            <foreName>Cyrille</foreName>
            <surname>Enderli</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL 2014)</title>
        <loc>Orlando, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01104423" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01104423</ref>
        </imprint>
        <meeting id="cid143767">
          <title>IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning</title>
          <num>2014</num>
          <abbr type="sigle">ADPRL</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid49" type="inproceedings" rend="year" n="cite:piot:hal-01104419">
      <identifiant type="hal" value="hal-01104419"/>
      <analytic>
        <title level="a">Difference of Convex Functions Programming for Reinforcement Learning</title>
        <author>
          <persName key="sequel-2014-idp79648">
            <foreName>Bilal</foreName>
            <surname>Piot</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Matthieu</foreName>
            <surname>Geist</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Advances in Neural Information Processing Systems (NIPS 2014)</title>
        <loc>Montreal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01104419" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01104419</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>28</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid51" type="inproceedings" rend="year" n="cite:piot:hal-01104789">
      <identifiant type="hal" value="hal-01104789"/>
      <analytic>
        <title level="a">Méthode de minimisation du résidu de Bellman boostée qui tient compte des démonstrations expertes.</title>
        <author>
          <persName key="sequel-2014-idp79648">
            <foreName>Bilal</foreName>
            <surname>Piot</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Matthieu</foreName>
            <surname>Geist</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="no" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">JFPDA - 9èmes Journées Francophones de Planification, Décision et Apprentissage</title>
        <loc>Liège, Belgium</loc>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal-supelec.archives-ouvertes.fr/hal-01104789" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal-supelec.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01104789</ref>
        </imprint>
        <meeting id="cid344313">
          <title>Journées Francophones Planification, Décision, Apprentissage</title>
          <num>2014</num>
          <abbr type="sigle">JFPDA</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid50" type="inproceedings" rend="year" n="cite:piot:hal-01104739">
      <identifiant type="hal" value="hal-01104739"/>
      <analytic>
        <title level="a">Predicting when to laugh with structured classification</title>
        <author>
          <persName key="sequel-2014-idp79648">
            <foreName>Bilal</foreName>
            <surname>Piot</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2014-idp111080">
            <foreName>Olivier</foreName>
            <surname>Pietquin</surname>
            <initial>O.</initial>
          </persName>
          <persName>
            <foreName>Matthieu</foreName>
            <surname>Geist</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">InterSpeech 2014</title>
        <loc>Singapore</loc>
        <imprint>
          <dateStruct>
            <month>September</month>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">1786-1790</biblScope>
          <ref xlink:href="https://hal-supelec.archives-ouvertes.fr/hal-01104739" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal-supelec.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01104739</ref>
        </imprint>
        <meeting id="cid29182">
          <title>Annual Conference of the International Speech Communication Association</title>
          <num>15</num>
          <abbr type="sigle">INTERSPEECH</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid36" type="inproceedings" rend="year" n="cite:preux:hal-00978637">
      <identifiant type="hal" value="hal-00978637"/>
      <analytic>
        <title level="a">Bandits attack function optimization</title>
        <author>
          <persName key="sequel-2014-idm27568">
            <foreName>Philippe</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">IEEE Congress on Evolutionary Computation</title>
        <loc>Beijing, China</loc>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-00978637" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00978637</ref>
        </imprint>
        <meeting id="cid79049">
          <title>IEEE Congress on Evolutionary Computation</title>
          <num>10</num>
          <abbr type="sigle">CEC</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid22" type="inproceedings" rend="year" n="cite:sani:hal-01079428">
      <identifiant type="hal" value="hal-01079428"/>
      <analytic>
        <title level="a">Exploiting easy data in online optimization</title>
        <author>
          <persName key="sequel-2014-idp112520">
            <foreName>Amir</foreName>
            <surname>Sani</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp82200">
            <foreName>Gergely</foreName>
            <surname>Neu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Advances in Neural Information Processing 27</title>
        <loc>Montreal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01079428" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01079428</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>28</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid21" type="inproceedings" rend="year" n="cite:soare:hal-01075701">
      <identifiant type="hal" value="hal-01075701"/>
      <analytic>
        <title level="a">Best-Arm Identification in Linear Bandits</title>
        <author>
          <persName key="sequel-2014-idp113800">
            <foreName>Marta</foreName>
            <surname>Soare</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idm26088">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NIPS - Advances in Neural Information Processing Systems 27</title>
        <loc>Montreal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01075701" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01075701</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>28</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid37" type="inproceedings" rend="year" n="cite:szorenyi:hal-01079366">
      <identifiant type="hal" value="hal-01079366"/>
      <analytic>
        <title level="a">Optimistic planning in Markov decision processes using a generative model</title>
        <author>
          <persName key="sequel-2014-idp83480">
            <foreName>Balázs</foreName>
            <surname>Szörényi</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Gunnar</foreName>
            <surname>Kedenburg</surname>
            <initial>G.</initial>
          </persName>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Advances in Neural Information Processing Systems 27</title>
        <loc>Montréal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01079366" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01079366</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>28</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid53" type="inproceedings" rend="year" n="cite:thomas:hal-01103441">
      <identifiant type="hal" value="hal-01103441"/>
      <analytic>
        <title level="a">CoAdapt P300 speller: optimized flashing sequences and online learning</title>
        <author>
          <persName>
            <foreName>Eoin</foreName>
            <surname>Thomas</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Emmanuel</foreName>
            <surname>Daucé</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Dieter</foreName>
            <surname>Devlaminck</surname>
            <initial>D.</initial>
          </persName>
          <persName key="athena-2014-idp67976">
            <foreName>Loïc</foreName>
            <surname>Mahé</surname>
            <initial>L.</initial>
          </persName>
          <persName>
            <foreName>Alexandra</foreName>
            <surname>Carpentier</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName key="in-situ-2014-idp83280">
            <foreName>Margaux</foreName>
            <surname>Perrin</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Emmanuel</foreName>
            <surname>Maby</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Jérémie</foreName>
            <surname>Mattout</surname>
            <initial>J.</initial>
          </persName>
          <persName key="athena-2014-idm27864">
            <foreName>Théodore</foreName>
            <surname>Papadopoulo</surname>
            <initial>T.</initial>
          </persName>
          <persName key="athena-2014-idm29272">
            <foreName>Maureen</foreName>
            <surname>Clerc</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">6th International Brain Computer Interface Conference</title>
        <loc>Graz, Austria</loc>
        <imprint>
          <dateStruct>
            <month>September</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01103441" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01103441</ref>
        </imprint>
        <meeting id="cid105860">
          <title>International Brain-Computer Interface Workshop</title>
          <num>6</num>
          <abbr type="sigle">BCI</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid23" type="inproceedings" rend="year" n="cite:valko:hal-00986818">
      <identifiant type="hal" value="hal-00986818"/>
      <analytic>
        <title level="a">Spectral Bandits for Smooth Graph Functions</title>
        <author>
          <persName key="sequel-2014-idp70232">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Branislav</foreName>
            <surname>Kveton</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2014-idp108624">
            <foreName>Tomáš</foreName>
            <surname>Kocák</surname>
            <initial>T.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">31th International Conference on Machine Learning</title>
        <loc>Beijing, China</loc>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-00986818" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00986818</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>31</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid28" type="techreport" rend="year" n="cite:mary:hal-01022628">
      <identifiant type="hal" value="hal-01022628"/>
      <monogr>
        <title level="m">Bandits Warm-up Cold Recommender Systems</title>
        <author>
          <persName key="sequel-2014-idp76928">
            <foreName>Jérémie</foreName>
            <surname>Mary</surname>
            <initial>J.</initial>
          </persName>
          <persName key="sequel-2014-idp75664">
            <foreName>Romaric</foreName>
            <surname>Gaudel</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2014-idm27568">
            <foreName>Philippe</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RR-8563</biblScope>
          <publisher>
            <orgName type="institution">Inria Lille</orgName>
          </publisher>
          <dateStruct>
            <month>July</month>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">18</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01022628" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01022628</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Research Report</note>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid45" type="techreport" rend="year" n="cite:munos:hal-00747575">
      <identifiant type="hal" value="hal-00747575"/>
      <monogr>
        <title level="m">From Bandits to Monte-Carlo Tree Search: The Optimistic Principle Applied to Optimization and Planning</title>
        <author>
          <persName key="sequel-2014-idp67360">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-00747575" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-00747575</ref>
        </imprint>
      </monogr>
      <note type="bnote">130 pages</note>
      <note type="typdoc">Technical report</note>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid40" type="techreport" rend="year" n="cite:musco:hal-01078716">
      <identifiant type="hal" value="hal-01078716"/>
      <monogr>
        <title level="m">A Generative Model of Software Dependency Graphs to Better Understand Software Evolution</title>
        <author>
          <persName key="sequel-2014-idp117600">
            <foreName>Vincenzo</foreName>
            <surname>Musco</surname>
            <initial>V.</initial>
          </persName>
          <persName key="spirals-2014-idp82576">
            <foreName>Martin</foreName>
            <surname>Monperrus</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2014-idm27568">
            <foreName>Philippe</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="institution">Inria</orgName>
          </publisher>
          <dateStruct>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01078716" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01078716</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Technical Report</note>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid31" subtype="nonparu-n" type="unpublished" rend="year" n="cite:chainais:hal-01104759">
      <identifiant type="hal" value="hal-01104759"/>
      <monogr>
        <title level="m">Statistical performance analysis of a fast super-resolution technique using noisy translations</title>
        <author>
          <persName key="sequel-2014-idp71472">
            <foreName>Pierre</foreName>
            <surname>Chainais</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Aymeric</foreName>
            <surname>Leray</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>November</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01104759" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01104759</ref>
        </imprint>
      </monogr>
      <note type="bnote">15 pages, forthcoming</note>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid26" type="misc" rend="year" n="cite:guillou:hal-01077986">
      <identifiant type="doi" value="10.1145/2668067.2668073"/>
      <identifiant type="hal" value="hal-01077986"/>
      <monogr x-scientific-popularization="no">
        <title level="m">User Engagement as Evaluation: a Ranking or a Regression Problem?</title>
        <author>
          <persName key="sequel-2014-idp106104">
            <foreName>Frédéric</foreName>
            <surname>Guillou</surname>
            <initial>F.</initial>
          </persName>
          <persName key="sequel-2014-idp75664">
            <foreName>Romaric</foreName>
            <surname>Gaudel</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2014-idp76928">
            <foreName>Jérémie</foreName>
            <surname>Mary</surname>
            <initial>J.</initial>
          </persName>
          <persName key="sequel-2014-idm27568">
            <foreName>Philippe</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01077986" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01077986</ref>
        </imprint>
      </monogr>
      <note type="bnote">1. Introduction 2. Recsys Challenge 2014: Data and Protocol 2.1 Data Characteristics and Statistics 2.2 About User Engagement as Evaluation 2.3 Input Features for the Model 3. Method 3.1 LambdaMART Model 3.2 Random Forests 3.3 Description of the Approach 4. Experiments 4.1 Experimental results 4.2 Relevant Features 5. Discussions 6. Conclusions 7. Acknowledgments 8. References</note>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid7" type="article" rend="foot" n="footcite:Aueretal2002">
      <analytic>
        <title level="a">Finite-time analysis of the multi-armed bandit problem</title>
        <author>
          <persName>
            <foreName>Peter</foreName>
            <surname>Auer</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>N.</foreName>
            <surname>Cesa-Bianchi</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>P.</foreName>
            <surname>Fischer</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Machine Learning</title>
        <imprint>
          <biblScope type="volume">47</biblScope>
          <biblScope type="number">2/3</biblScope>
          <dateStruct>
            <year>2002</year>
          </dateStruct>
          <biblScope type="pages">235–256</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid3" type="book" rend="foot" n="footcite:bellman">
      <monogr>
        <title level="m">Dynamic Programming</title>
        <author>
          <persName>
            <foreName>R.</foreName>
            <surname>Bellman</surname>
            <initial>R.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Princeton University Press</orgName>
          </publisher>
          <dateStruct>
            <year>1957</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid4" type="book" rend="foot" n="footcite:bertshreve78">
      <monogr>
        <title level="m">Stochastic Optimal Control (The Discrete Time Case)</title>
        <author>
          <persName>
            <foreName>D.P.</foreName>
            <surname>Bertsekas</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>S.E.</foreName>
            <surname>Shreve</surname>
            <initial>S.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Academic Press, New York</orgName>
          </publisher>
          <dateStruct>
            <year>1978</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid2" type="book" rend="foot" n="footcite:Bertsekas96">
      <monogr>
        <title level="m">Neuro-Dynamic Programming</title>
        <author>
          <persName>
            <foreName>D.P.</foreName>
            <surname>Bertsekas</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Tsitsiklis</surname>
            <initial>J.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Athena Scientific</orgName>
          </publisher>
          <dateStruct>
            <year>1996</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid9" type="article" rend="foot" n="footcite:ferguson1973bas">
      <analytic>
        <title level="a">A Bayesian Analysis of Some Nonparametric Problems</title>
        <author>
          <persName>
            <foreName>T.S.</foreName>
            <surname>Ferguson</surname>
            <initial>T.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">The Annals of Statistics</title>
        <imprint>
          <biblScope type="volume">1</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <year>1973</year>
          </dateStruct>
          <biblScope type="pages">209–230</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid8" type="book" rend="foot" n="footcite:EltsStatLearning">
      <monogr>
        <title level="m">The elements of statistical learning — Data Mining, Inference, and Prediction</title>
        <author>
          <persName>
            <foreName>T.</foreName>
            <surname>Hastie</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>R.</foreName>
            <surname>Tibshirani</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Friedman</surname>
            <initial>J.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Springer</orgName>
          </publisher>
          <dateStruct>
            <year>2001</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid46" type="inproceedings" rend="foot" n="footcite:nguyen:hal-00823230">
      <identifiant type="hal" value="hal-00823230"/>
      <analytic>
        <title level="a">Competing with an Infinite Set of Models in Reinforcement Learning</title>
        <author>
          <persName>
            <foreName>Phuong</foreName>
            <surname>Nguyen</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Odalric-Ambrym</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
          <persName key="sequel-2014-idp68800">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>Ronald</foreName>
            <surname>Ortner</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">AISTATS</title>
        <loc>Arizona, United States</loc>
        <title level="s">JMLR W&amp;CP</title>
        <imprint>
          <biblScope type="volume">31</biblScope>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">463-471</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-00823230" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00823230</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid10" type="book" rend="foot" n="footcite:ADPpowell">
      <monogr>
        <title level="m">Approximate Dynamic Programming</title>
        <author>
          <persName>
            <foreName>W.</foreName>
            <surname>Powell</surname>
            <initial>W.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Wiley</orgName>
          </publisher>
          <dateStruct>
            <year>2007</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid1" type="book" rend="foot" n="footcite:puterman94">
      <monogr>
        <title level="m">Markov Decision Processes: Discrete Stochastic Dynamic Programming</title>
        <author>
          <persName>
            <foreName>M.L.</foreName>
            <surname>Puterman</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>John Wiley and Sons</orgName>
          </publisher>
          <dateStruct>
            <year>1994</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid6" type="article" rend="foot" n="footcite:Robbins1952">
      <analytic>
        <title level="a">Some aspects of the sequential design of experiments</title>
        <author>
          <persName>
            <foreName>H.</foreName>
            <surname>Robbins</surname>
            <initial>H.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Bull. Amer. Math. Soc.</title>
        <imprint>
          <biblScope type="volume">55</biblScope>
          <dateStruct>
            <year>1952</year>
          </dateStruct>
          <biblScope type="pages">527–535</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid13" type="article" rend="foot" n="footcite:rustSSMedicare">
      <analytic>
        <title level="a">How Social Security and Medicare Affect Retirement Behavior in a World of Incomplete Market</title>
        <author>
          <persName>
            <foreName>J.</foreName>
            <surname>Rust</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Econometrica</title>
        <imprint>
          <biblScope type="volume">65</biblScope>
          <biblScope type="number">4</biblScope>
          <dateStruct>
            <month>July</month>
            <year>1997</year>
          </dateStruct>
          <biblScope type="pages">781–831</biblScope>
          <ref xlink:href="http://econpapers.repec.org/paper/wpawuwppe/9406005.htm" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>econpapers.<allowbreak/>repec.<allowbreak/>org/<allowbreak/>paper/<allowbreak/>wpawuwppe/<allowbreak/>9406005.<allowbreak/>htm</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid12" type="article" rend="foot" n="footcite:rustNuclearPlants">
      <analytic>
        <title level="a">On the Optimal Lifetime of Nuclear Power Plants</title>
        <author>
          <persName>
            <foreName>J.</foreName>
            <surname>Rust</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Journal of Business &amp; Economic Statistics</title>
        <imprint>
          <biblScope type="volume">15</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <year>1997</year>
          </dateStruct>
          <biblScope type="pages">195–208</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid0" type="book" rend="foot" n="footcite:sb">
      <monogr>
        <title level="m">Reinforcement learning: an introduction</title>
        <author>
          <persName>
            <foreName>R.S.</foreName>
            <surname>Sutton</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>A.G.</foreName>
            <surname>Barto</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>MIT Press</orgName>
          </publisher>
          <dateStruct>
            <year>1998</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid11" type="article" rend="foot" n="footcite:tdgammon">
      <analytic>
        <title level="a">Temporal Difference Learning and TD-Gammon</title>
        <author>
          <persName>
            <foreName>G.</foreName>
            <surname>Tesauro</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Communications of the ACM</title>
        <imprint>
          <biblScope type="volume">38</biblScope>
          <biblScope type="number">3</biblScope>
          <dateStruct>
            <month>March</month>
            <year>1995</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2014-bid5" type="inbook" rend="foot" n="footcite:werbosHandbookADP">
      <analytic>
        <author>
          <persName>
            <foreName>P.</foreName>
            <surname>Werbos</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">ADP: Goals, Opportunities and Principles</title>
        <imprint>
          <publisher>
            <orgName>IEEE Press</orgName>
          </publisher>
          <dateStruct>
            <year>2004</year>
          </dateStruct>
          <biblScope type="pages">3–44</biblScope>
        </imprint>
      </monogr>
      <note type="bnote">Handbook of learning and approximate dynamic programming</note>
    </biblStruct>
  </biblio>
</raweb>
