<?xml version="1.0" encoding="utf-8"?>
<raweb xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" year="2013">
  <identification id="sequel" isproject="true">
    <shortname>SequeL</shortname>
    <projectName>Sequential Learning</projectName>
    <theme-de-recherche>Optimization, machine learning and statistical methods</theme-de-recherche>
    <domaine-de-recherche>Applied Mathematics, Computation and Simulation</domaine-de-recherche>
    <urlTeam>http://sequel.lille.inria.fr/</urlTeam>
    <datecreation>2007 July 01</datecreation>
    <structure_exterieure type="Labs">
      <libelle>Laboratoire d'informatique fondamentale de Lille (LIFL)</libelle>
    </structure_exterieure>
    <structure_exterieure type="Labs">
      <libelle>Laboratoire d'Automatique, de Génie Informatique et Signal (LAGIS)</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Université Charles de Gaulle (Lille 3)</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Ecole Centrale de Lille</libelle>
    </structure_exterieure>
    <UR name="Lille"/>
    <keywords>
      <term>Machine Learning</term>
      <term>Statistical Learning</term>
      <term>Sequential Learning</term>
      <term>Sequential Decision Making</term>
      <term>Inference</term>
    </keywords>
    <moreinfo/>
  </identification>
  <team id="uid1">
    <person key="sequel-2006-id18078">
      <firstname>Philippe</firstname>
      <lastname>Preux</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Team leader, Université Lille 3, Professor</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2008-id18222">
      <firstname>Mohammad</firstname>
      <lastname>Ghavamzadeh</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Researcher, on leave from Inria since October 2013, working in Adobe Research, San Jose, CA</moreinfo>
    </person>
    <person key="sequel-2008-id18453">
      <firstname>Alessandro</firstname>
      <lastname>Lazaric</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
    </person>
    <person key="sequel-2006-id18109">
      <firstname>Rémi</firstname>
      <lastname>Munos</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Senior Researcher, full secondment with MSR (Boston) since July 2013 (until June 2014)</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2007-id18237">
      <firstname>Daniil</firstname>
      <lastname>Ryabko</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2012-idp140352027421952">
      <firstname>Michal</firstname>
      <lastname>Valko</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
    </person>
    <person key="sequel-2013-idp140217274917456">
      <firstname>Pierre</firstname>
      <lastname>Chainais</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Ecole Centrale Lille, Associate Professor</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2006-id18239">
      <firstname>Rémi</firstname>
      <lastname>Coulom</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Université Lille 3, Associate Professor</moreinfo>
    </person>
    <person key="sequel-2006-id18189">
      <firstname>Emmanuel</firstname>
      <lastname>Duflos</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Ecole Centrale Lille, Professor</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="tao-2006-id18354">
      <firstname>Romaric</firstname>
      <lastname>Gaudel</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Université Lille 3, Associate Professor</moreinfo>
    </person>
    <person key="sequel-2006-id18261">
      <firstname>Jérémie</firstname>
      <lastname>Mary</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Université Lille 3, Associate Professor</moreinfo>
    </person>
    <person key="sequel-2006-id18214">
      <firstname>Philippe</firstname>
      <lastname>Vanheeghe</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Ecole Centrale Lille, Professor</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2013-idp140217274932720">
      <firstname>Olivier</firstname>
      <lastname>Pietquin</lastname>
      <categoryPro>CollaborateurExterieur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Université Lille 1, Professor, since Oct 2013</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2013-idp140217274935504">
      <firstname>Romain</firstname>
      <lastname>Laby</lastname>
      <categoryPro>Technique</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, granted by Technologies Broadcasting System, since Mar 2013 until Nov 2013</moreinfo>
    </person>
    <person key="athena-2011-idp140624350986624">
      <firstname>Eoin</firstname>
      <lastname>Thomas</lastname>
      <categoryPro>Technique</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, until Oct 2013</moreinfo>
    </person>
    <person key="sequel-2013-idp140217274940208">
      <firstname>Boris</firstname>
      <lastname>Baldassari</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Squoring Technologies</moreinfo>
    </person>
    <person key="sequel-2009-id59885">
      <firstname>Victor</firstname>
      <lastname>Gabillon</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Université Lille 1</moreinfo>
    </person>
    <person key="sequel-2013-idp140217274944816">
      <firstname>Frédéric</firstname>
      <lastname>Guillou</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, granted by Inria and Région Nord Pas de Calais, since Oct 2013</moreinfo>
    </person>
    <person key="sequel-2013-idp140217274947264">
      <firstname>Adrien</firstname>
      <lastname>Hoarau</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, granted by STREP/Complacs</moreinfo>
    </person>
    <person key="sequel-2010-id59972">
      <firstname>Azadeh</firstname>
      <lastname>Khaleghi</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, until Oct 2013</moreinfo>
    </person>
    <person key="sequel-2013-idp140217274951872">
      <firstname>Tomáš</firstname>
      <lastname>Kocák</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, granted by Inria, since Oct 2013</moreinfo>
    </person>
    <person key="sequel-2013-idp140217274954176">
      <firstname>Vincenzo</firstname>
      <lastname>Musco</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Université Lille 1, granted by Université Lille 1 and Université Lille 3, since Oct 2013, also member of Adam team-project</moreinfo>
    </person>
    <person key="sequel-2012-idp140352027480400">
      <firstname>Sami</firstname>
      <lastname>Naamane</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Orange Labs, until May 2013</moreinfo>
    </person>
    <person key="sequel-2010-id60072">
      <firstname>Olivier</firstname>
      <lastname>Nicol</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Université Lille 1</moreinfo>
    </person>
    <person key="sequel-2012-idp140352027488592">
      <firstname>Amir</firstname>
      <lastname>Sani</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, granted by Inria and Région Nord Pas de Calais</moreinfo>
    </person>
    <person key="sequel-2012-idp140298916067424">
      <firstname>Marta</firstname>
      <lastname>Soare</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, granted by Inria and Région Nord Pas de Calais</moreinfo>
    </person>
    <person key="sequel-2013-idp140217274966176">
      <firstname>Raphael</firstname>
      <lastname>Fonteneau</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>FNRS, until Aug 2013</moreinfo>
    </person>
    <person key="sequel-2012-idp140298916026800">
      <firstname>Nathaniel</firstname>
      <lastname>Korda</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, granted by STREP/Complacs, until Sep 2013</moreinfo>
    </person>
    <person key="sequel-2012-idp140298916034864">
      <firstname>Prashanth</firstname>
      <lastname>Lakshmanrao Anantha Padmanabha</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, granted by STREP/Complacs</moreinfo>
    </person>
    <person key="sequel-2013-idp140217274973152">
      <firstname>Gergely</firstname>
      <lastname>Neu</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, granted by ERCIM and Inria, since Sep 2013</moreinfo>
    </person>
    <person key="sequel-2013-idp140217274975520">
      <firstname>Thanh Hai</firstname>
      <lastname>Nguyen</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, from Jan 2013 to Oct 2013</moreinfo>
    </person>
    <person key="sequel-2013-idp140217274977824">
      <firstname>Balázs</firstname>
      <lastname>Szörényi</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria, granted by STREP/Complacs</moreinfo>
    </person>
    <person key="sequel-2013-idp140217274980128">
      <firstname>Gabriel</firstname>
      <lastname>Dulac Arnold</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Université Pierre &amp; Marie Curie, PhD student at LIP'6, from Mar 2013 to May 2013</moreinfo>
    </person>
    <person key="sequel-2013-idp140217274982640">
      <firstname>Gunnar</firstname>
      <lastname>Kedenburg</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Berlin Institute of Technology, PhD student at idalab, from Jun 2013 to Nov 2013</moreinfo>
    </person>
    <person key="sequel-2012-idp140298916021360">
      <firstname>Amélie</firstname>
      <lastname>Supervielle</lastname>
      <categoryPro>Assistant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
  </team>
  <presentation id="uid2">
    <bodyTitle>Overall Objectives</bodyTitle>
    <subsection id="uid3" level="1">
      <bodyTitle>Presentation</bodyTitle>
      <p><span class="smallcap" align="left">SequeL</span> means “Sequential Learning”. As such, <span class="smallcap" align="left">SequeL</span> focuses on the task of learning in artificial systems (either hardware, or software) that gather information along time. Such systems are named <i>(learning) agents</i> (or learning machines) in the following.
These data may be used to estimate some parameters of a model, which in turn, may be used for selecting actions in order to perform some long-term optimization task.</p>
      <p>For the purpose of model building, the agent needs to represent information collected so far in some compact form and use it to process newly available data.</p>
      <p>The acquired data may result from an observation process of an agent in interaction with its environment (the data thus represent a perception). This is the case when the agent makes decisions (in order to attain a certain objective) that impact the environment, and thus the observation process itself.</p>
      <p>Hence, in <span class="smallcap" align="left">SequeL</span>, the term <b>sequential</b> refers to two aspects:</p>
      <simplelist>
        <li id="uid4">
          <p noindent="true">The <b>sequential acquisition of data</b>, from which a model is learned (supervised and non supervised learning),</p>
        </li>
        <li id="uid5">
          <p noindent="true">the <b>sequential decision making task</b>, based on the learned model (reinforcement learning).</p>
        </li>
      </simplelist>
      <p>Examples of sequential learning problems include:</p>
      <descriptionlist>
        <label>Supervised learning</label>
        <li id="uid6">
          <p noindent="true">tasks deal with the prediction of some response given a certain set of observations of input variables and responses. New sample points keep on being observed.</p>
        </li>
        <label>Unsupervised learning</label>
        <li id="uid7">
          <p noindent="true">tasks deal with clustering objects, these latter making a flow of objects. The (unknown) number of clusters typically evolves during time, as new objects are observed.</p>
        </li>
        <label>Reinforcement learning</label>
        <li id="uid8">
          <p noindent="true">tasks deal with the control (a policy) of some system which has to be optimized (see <ref xlink:href="#sequel-2013-bid0" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>). We do not assume the availability of a model of the system to be controlled.</p>
        </li>
      </descriptionlist>
      <p>In all these cases, we mostly assume that the process can be considered stationary for at least a certain amount of time, and slowly evolving.</p>
      <p>We wish to have any-time algorithms, that is, at any moment, a prediction may be required/an action may be selected making full use, and hopefully, the best use, of the experience already gathered by the learning agent.</p>
      <p>The perception of the environment by the learning agent (using its sensors) is generally neither the best one to make a prediction, nor to take a decision (we deal with Partially Observable Markov Decision Problem). So, the perception has to be mapped in some way to a better, and relevant, state (or input) space.</p>
      <p>Finally, an important issue of prediction regards its evaluation: how wrong may we be when we perform a prediction? For real systems to be controlled, this issue can not be simply left unanswered.</p>
      <p spacebefore="6.0pt">To sum-up, in <span class="smallcap" align="left">SequeL</span>, the main issues regard:</p>
      <simplelist>
        <li id="uid9">
          <p noindent="true">the learning of a model: we focus on models that map some
input space <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>ℝ</mi><mi>P</mi></msup></math></formula> to <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ℝ</mi></math></formula>,</p>
        </li>
        <li id="uid10">
          <p noindent="true">the observation to state mapping,</p>
        </li>
        <li id="uid11">
          <p noindent="true">the choice of the action to perform (in the case of sequential
decision problem),</p>
        </li>
        <li id="uid12">
          <p noindent="true">the performance guarantees,</p>
        </li>
        <li id="uid13">
          <p noindent="true">the implementation of usable algorithms,</p>
        </li>
      </simplelist>
      <p>all that being understood in a <i>sequential</i> framework.</p>
    </subsection>
    <subsection id="uid14" level="1">
      <bodyTitle>Highlights of the Year</bodyTitle>
      <simplelist>
        <li id="uid15">
          <p noindent="true">In 2013, Crazy Stone won the 6th edition of the UEC Cup and
the first edition of the Denseisen. Crazy Stone is a Go-playing
program developed by Rémi Coulom since 2005, based on the Monte
Carlo Tree Search method. The UEC Cup is the most important
international computer-Go competition, organized yearly by the
University of Electro-Communications in Tokyo, Japan. The
Denseisen is a match between the winner of the UEC Cup and a top
Japanese profesionnal Go player. This year Crazy Stone won a game
with 4 stones of handicap against 9-dan profesionnal player Yoshio
Ishida.</p>
        </li>
        <li id="uid16">
          <p noindent="true">The International Machine Learning Society selects
<span class="smallcap" align="left">SequeL</span> to organize the 32<sup>nd</sup> International
Conference on Machine Learning in 2015 at Lille. ICML is the most
important conference in the field of machine learning.</p>
        </li>
      </simplelist>
    </subsection>
  </presentation>
  <fondements id="uid17">
    <bodyTitle>Research Program</bodyTitle>
    <subsection id="uid18" level="1">
      <bodyTitle>In Short</bodyTitle>
      <p><span class="smallcap" align="left">SequeL</span> is primarily grounded on two domains:</p>
      <simplelist>
        <li id="uid19">
          <p noindent="true">the problem of decision under uncertainty,</p>
        </li>
        <li id="uid20">
          <p noindent="true">statistical analysis and statistical learning, which provide the general concepts and tools to solve this problem.</p>
        </li>
      </simplelist>
      <p>To help the reader who is unfamiliar with these questions, we briefly present key ideas below.</p>
    </subsection>
    <subsection id="uid21" level="1">
      <bodyTitle>Decision-making Under Uncertainty</bodyTitle>
      <p>The phrase “Decision under uncertainty” refers to the problem of taking decisions when we do not have a full knowledge neither of the situation, nor of the consequences of the decisions, as well as when the consequences of decision are non deterministic.</p>
      <p>We introduce two specific sub-domains, namely the Markov decision processes which models sequential decision problems, and bandit problems.</p>
      <subsection id="uid22" level="2">
        <bodyTitle>Reinforcement Learning</bodyTitle>
        <p>Sequential decision processes occupy the heart of the <span class="smallcap" align="left">SequeL</span> project; a detailed presentation of this problem may be found in Puterman's book <ref xlink:href="#sequel-2013-bid1" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        <p>A Markov Decision Process (MDP) is defined as the tuple <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo>(</mo><mi>𝒳</mi><mo>,</mo><mi>𝒜</mi><mo>,</mo><mi>P</mi><mo>,</mo><mi>r</mi><mo>)</mo></mrow></math></formula> where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒳</mi></math></formula> is the state space, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒜</mi></math></formula> is the action space, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>P</mi></math></formula> is the probabilistic transition kernel, and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>r</mi><mo>:</mo><mi>𝒳</mi><mo>×</mo><mi>𝒜</mi><mo>×</mo><mi>𝒳</mi><mo>→</mo><mi>I</mi><mspace width="-0.166667em"/><mspace width="-0.166667em"/><mi>R</mi></mrow></math></formula> is the reward function. For the sake of simplicity, we assume in this introduction that the state and action spaces are finite. If the current state (at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula>) is <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula> and the chosen action is <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>a</mi><mo>∈</mo><mi>𝒜</mi></mrow></math></formula>, then the Markov assumption means that the transition probability to a new state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>x</mi><mo>'</mo></msup><mo>∈</mo><mi>𝒳</mi></mrow></math></formula> (at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>t</mi><mo>+</mo><mn>1</mn></mrow></math></formula>) only depends on <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo>(</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>)</mo></mrow></math></formula>. We write <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>p</mi><mo>(</mo><msup><mi>x</mi><mo>'</mo></msup><mo>|</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>)</mo></mrow></math></formula> the corresponding transition probability. During a transition <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mrow><mo>(</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>)</mo></mrow><mo>→</mo><msup><mi>x</mi><mo>'</mo></msup></mrow></math></formula>, a reward <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>r</mi><mo>(</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>,</mo><msup><mi>x</mi><mo>'</mo></msup><mo>)</mo></mrow></math></formula> is incurred.</p>
        <p>In the MDP (<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>𝒳</mi><mo>,</mo><mi>𝒜</mi><mo>,</mo><mi>P</mi><mo>,</mo><mi>r</mi><mo>)</mo></mrow></math></formula>, each initial state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mn>0</mn></msub></math></formula> and action sequence <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>1</mn></msub><mo>,</mo><mo>...</mo></mrow></math></formula> gives rise to a sequence of states <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><msub><mi>x</mi><mn>2</mn></msub><mo>,</mo><mo>...</mo></mrow></math></formula>, satisfying <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>ℙ</mi><mfenced separators="" open="(" close=")"><msub><mi>x</mi><mrow><mi>t</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>=</mo><msup><mi>x</mi><mo>'</mo></msup><mrow><mo>|</mo></mrow><msub><mi>x</mi><mi>t</mi></msub><mo>=</mo><mi>x</mi><mo>,</mo><msub><mi>a</mi><mi>t</mi></msub><mo>=</mo><mi>a</mi></mfenced><mo>=</mo><mi>p</mi><mrow><mo>(</mo><msup><mi>x</mi><mo>'</mo></msup><mo>|</mo><mi>x</mi><mo>,</mo><mi>a</mi><mo>)</mo></mrow><mo>,</mo></mrow></math></formula> and rewards <footnote id="uid23" id-text="1">Note that for simplicity, we considered the case of a deterministic reward function, but in many applications, the reward <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>r</mi><mi>t</mi></msub></math></formula> itself is a random variable.</footnote> <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>r</mi><mn>1</mn></msub><mo>,</mo><msub><mi>r</mi><mn>2</mn></msub><mo>,</mo><mo>...</mo></mrow></math></formula> defined by <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>r</mi><mi>t</mi></msub><mo>=</mo><mi>r</mi><mrow><mo>(</mo><msub><mi>x</mi><mi>t</mi></msub><mo>,</mo><msub><mi>a</mi><mi>t</mi></msub><mo>,</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>)</mo></mrow></mrow></math></formula>.</p>
        <p>The history of the process up to time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> is defined to be <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>H</mi><mi>t</mi></msub><mo>=</mo><mrow><mo>(</mo><msub><mi>x</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><mo>...</mo><mo>,</mo><msub><mi>x</mi><mrow><mi>t</mi><mo>-</mo><mn>1</mn></mrow></msub><mo>,</mo><msub><mi>a</mi><mrow><mi>t</mi><mo>-</mo><mn>1</mn></mrow></msub><mo>,</mo><msub><mi>x</mi><mi>t</mi></msub><mo>)</mo></mrow></mrow></math></formula>. A policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> is a sequence of functions <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>π</mi><mn>0</mn></msub><mo>,</mo><msub><mi>π</mi><mn>1</mn></msub><mo>,</mo><mo>...</mo></mrow></math></formula>, where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>π</mi><mi>t</mi></msub></math></formula> maps the space of possible histories at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> to the space of probability distributions over the space of actions <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒜</mi></math></formula>. To follow a policy means that, in each time step, we assume that the process history up to time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> is <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><mo>...</mo><mo>,</mo><msub><mi>x</mi><mi>t</mi></msub></mrow></math></formula> and the probability of selecting an action <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>a</mi></math></formula> is equal to <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>π</mi><mi>t</mi></msub><mrow><mo>(</mo><msub><mi>x</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><mo>...</mo><mo>,</mo><msub><mi>x</mi><mi>t</mi></msub><mo>)</mo></mrow><mrow><mo>(</mo><mi>a</mi><mo>)</mo></mrow></mrow></math></formula>. A policy is called stationary (or Markovian) if <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>π</mi><mi>t</mi></msub></math></formula> depends only on the last visited state. In other words, a policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>π</mi><mo>=</mo><mo>(</mo><msub><mi>π</mi><mn>0</mn></msub><mo>,</mo><msub><mi>π</mi><mn>1</mn></msub><mo>,</mo><mo>...</mo><mo>)</mo></mrow></math></formula> is called stationary if <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>π</mi><mi>t</mi></msub><mrow><mo>(</mo><msub><mi>x</mi><mn>0</mn></msub><mo>,</mo><msub><mi>a</mi><mn>0</mn></msub><mo>,</mo><mo>...</mo><mo>,</mo><msub><mi>x</mi><mi>t</mi></msub><mo>)</mo></mrow><mo>=</mo><msub><mi>π</mi><mn>0</mn></msub><mrow><mo>(</mo><msub><mi>x</mi><mi>t</mi></msub><mo>)</mo></mrow></mrow></math></formula> holds for all <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>t</mi><mo>≥</mo><mn>0</mn></mrow></math></formula>. A policy is called deterministic if the probability distribution prescribed by the policy for any history is concentrated on a single action. Otherwise it is called a stochastic policy.</p>
        <p>We move from an MD process to an MD problem by formulating the goal of the agent, that is what the sought policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> has to optimize? It is very often formulated as maximizing (or minimizing), in expectation, some functional of the sequence of future rewards. For example, an usual functional is the infinite-time horizon sum of discounted rewards. For a given (stationary) policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>, we define the value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mi>π</mi></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula> of that policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> at a state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula> as the expected sum of discounted future rewards given that we state from the initial state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>x</mi></math></formula> and follow the policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>:</p>
        <formula id-text="1" id="uid24" textype="equation" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mrow>
              <msup>
                <mi>V</mi>
                <mi>π</mi>
              </msup>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>)</mo>
              </mrow>
              <mo>=</mo>
              <mi>𝔼</mi>
              <mfenced separators="" open="[" close="]">
                <munderover>
                  <mo>∑</mo>
                  <mrow>
                    <mi>t</mi>
                    <mo>=</mo>
                    <mn>0</mn>
                  </mrow>
                  <mi>∞</mi>
                </munderover>
                <msup>
                  <mi>γ</mi>
                  <mi>t</mi>
                </msup>
                <msub>
                  <mi>r</mi>
                  <mi>t</mi>
                </msub>
                <mo>|</mo>
                <msub>
                  <mi>x</mi>
                  <mn>0</mn>
                </msub>
                <mo>=</mo>
                <mi>x</mi>
                <mo>,</mo>
                <mi>π</mi>
              </mfenced>
              <mo>,</mo>
            </mrow>
          </math>
        </formula>
        <p>where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝔼</mi></math></formula> is the expectation operator and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>γ</mi><mo>∈</mo><mo>(</mo><mn>0</mn><mo>,</mo><mn>1</mn><mo>)</mo></mrow></math></formula> is the discount factor. This value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mi>π</mi></msup></math></formula> gives an evaluation of the performance of a given policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>. Other functionals of the sequence of future rewards may be considered, such as the undiscounted reward (see the stochastic shortest path problems <ref xlink:href="#sequel-2013-bid2" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) and average reward settings. Note also that, here, we considered the problem of maximizing a reward functional, but a formulation in terms of minimizing some cost or risk functional would be equivalent.</p>
        <p>In order to maximize a given functional in a sequential framework, one usually applies Dynamic Programming (DP)  <ref xlink:href="#sequel-2013-bid3" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, which introduces the optimal value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mo>*</mo></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula>, defined as the optimal expected sum of rewards when the agent starts from a state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>x</mi></math></formula>. We have <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mo>*</mo></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow><mo>=</mo><msub><mo movablelimits="true" form="prefix">sup</mo><mi>π</mi></msub><msup><mi>V</mi><mi>π</mi></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula>. Now, let us give two definitions about policies:</p>
        <simplelist>
          <li id="uid25">
            <p noindent="true">We say that a policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> is optimal, if it attains the optimal values <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mo>*</mo></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula> for any state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula>, <i>i.e.</i>, if <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>V</mi><mi>π</mi></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow><mo>=</mo><msup><mi>V</mi><mo>*</mo></msup><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow></mrow></math></formula> for all <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula>. Under mild conditions, deterministic stationary optimal policies exist <ref xlink:href="#sequel-2013-bid4" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. Such an optimal policy is written <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>π</mi><mo>*</mo></msup></math></formula>.</p>
          </li>
          <li id="uid26">
            <p noindent="true">We say that a (deterministic stationary) policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> is greedy with respect to (w.r.t.) some function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula> (defined on <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒳</mi></math></formula>) if, for all <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula>,</p>
            <formula type="display">
              <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
                <mrow>
                  <mi>π</mi>
                  <mrow>
                    <mo>(</mo>
                    <mi>x</mi>
                    <mo>)</mo>
                  </mrow>
                  <mo>∈</mo>
                  <mo form="prefix">arg</mo>
                  <munder>
                    <mo movablelimits="true" form="prefix">max</mo>
                    <mrow>
                      <mi>a</mi>
                      <mo>∈</mo>
                      <mi>𝒜</mi>
                    </mrow>
                  </munder>
                  <munder>
                    <mo>∑</mo>
                    <mrow>
                      <msup>
                        <mi>x</mi>
                        <mo>'</mo>
                      </msup>
                      <mo>∈</mo>
                      <mi>𝒳</mi>
                    </mrow>
                  </munder>
                  <mi>p</mi>
                  <mrow>
                    <mo>(</mo>
                    <msup>
                      <mi>x</mi>
                      <mo>'</mo>
                    </msup>
                    <mo>|</mo>
                    <mi>x</mi>
                    <mo>,</mo>
                    <mi>a</mi>
                    <mo>)</mo>
                  </mrow>
                  <mfenced separators="" open="[" close="]">
                    <mi>r</mi>
                    <mrow>
                      <mo>(</mo>
                      <mi>x</mi>
                      <mo>,</mo>
                      <mi>a</mi>
                      <mo>,</mo>
                      <msup>
                        <mi>x</mi>
                        <mo>'</mo>
                      </msup>
                      <mo>)</mo>
                    </mrow>
                    <mo>+</mo>
                    <mi>γ</mi>
                    <mi>V</mi>
                    <mrow>
                      <mo>(</mo>
                      <msup>
                        <mi>x</mi>
                        <mo>'</mo>
                      </msup>
                      <mo>)</mo>
                    </mrow>
                  </mfenced>
                  <mo>.</mo>
                </mrow>
              </math>
            </formula>
            <p> </p>
            <p noindent="true">where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo form="prefix">arg</mo><msub><mo movablelimits="true" form="prefix">max</mo><mrow><mi>a</mi><mo>∈</mo><mi>𝒜</mi></mrow></msub><mi>f</mi><mrow><mo>(</mo><mi>a</mi><mo>)</mo></mrow></mrow></math></formula> is the set of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>a</mi><mo>∈</mo><mi>𝒜</mi></mrow></math></formula> that maximizes <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>f</mi><mo>(</mo><mi>a</mi><mo>)</mo></mrow></math></formula>. For any function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula>, such a greedy policy always exists because <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒜</mi></math></formula> is finite.</p>
          </li>
        </simplelist>
        <p>The goal of Reinforcement Learning (RL), as well as that of dynamic programming, is to design an optimal policy (or a good approximation of it).</p>
        <p spacebefore="6.0pt">The well-known Dynamic Programming equation (also called the Bellman equation) provides a relation between the optimal value function at a state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>x</mi></math></formula> and the optimal value function at the successors states <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>x</mi><mo>'</mo></msup></math></formula> when choosing an optimal action: for all <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>x</mi><mo>∈</mo><mi>𝒳</mi></mrow></math></formula>,</p>
        <formula id-text="2" id="uid27" textype="equation" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mrow>
              <msup>
                <mi>V</mi>
                <mo>*</mo>
              </msup>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>)</mo>
              </mrow>
              <mo>=</mo>
              <munder>
                <mo movablelimits="true" form="prefix">max</mo>
                <mrow>
                  <mi>a</mi>
                  <mo>∈</mo>
                  <mi>𝒜</mi>
                </mrow>
              </munder>
              <munder>
                <mo>∑</mo>
                <mrow>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>∈</mo>
                  <mi>𝒳</mi>
                </mrow>
              </munder>
              <mi>p</mi>
              <mrow>
                <mo>(</mo>
                <msup>
                  <mi>x</mi>
                  <mo>'</mo>
                </msup>
                <mo>|</mo>
                <mi>x</mi>
                <mo>,</mo>
                <mi>a</mi>
                <mo>)</mo>
              </mrow>
              <mfenced separators="" open="[" close="]">
                <mi>r</mi>
                <mrow>
                  <mo>(</mo>
                  <mi>x</mi>
                  <mo>,</mo>
                  <mi>a</mi>
                  <mo>,</mo>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>)</mo>
                </mrow>
                <mo>+</mo>
                <mi>γ</mi>
                <msup>
                  <mi>V</mi>
                  <mo>*</mo>
                </msup>
                <mrow>
                  <mo>(</mo>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>)</mo>
                </mrow>
              </mfenced>
              <mo>.</mo>
            </mrow>
          </math>
        </formula>
        <p>The benefit of introducing this concept of optimal value function relies on the property that, from the optimal value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mo>*</mo></msup></math></formula>, it is easy to derive an optimal behavior by choosing the actions according to a policy greedy w.r.t. <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mo>*</mo></msup></math></formula>. Indeed, we have the property that a policy greedy w.r.t. the optimal value function is an optimal policy:</p>
        <formula id-text="3" id="uid28" textype="equation" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mrow>
              <msup>
                <mi>π</mi>
                <mo>*</mo>
              </msup>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>)</mo>
              </mrow>
              <mo>∈</mo>
              <mo form="prefix">arg</mo>
              <munder>
                <mo movablelimits="true" form="prefix">max</mo>
                <mrow>
                  <mi>a</mi>
                  <mo>∈</mo>
                  <mi>𝒜</mi>
                </mrow>
              </munder>
              <munder>
                <mo>∑</mo>
                <mrow>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>∈</mo>
                  <mi>𝒳</mi>
                </mrow>
              </munder>
              <mi>p</mi>
              <mrow>
                <mo>(</mo>
                <msup>
                  <mi>x</mi>
                  <mo>'</mo>
                </msup>
                <mo>|</mo>
                <mi>x</mi>
                <mo>,</mo>
                <mi>a</mi>
                <mo>)</mo>
              </mrow>
              <mfenced separators="" open="[" close="]">
                <mi>r</mi>
                <mrow>
                  <mo>(</mo>
                  <mi>x</mi>
                  <mo>,</mo>
                  <mi>a</mi>
                  <mo>,</mo>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>)</mo>
                </mrow>
                <mo>+</mo>
                <mi>γ</mi>
                <msup>
                  <mi>V</mi>
                  <mo>*</mo>
                </msup>
                <mrow>
                  <mo>(</mo>
                  <msup>
                    <mi>x</mi>
                    <mo>'</mo>
                  </msup>
                  <mo>)</mo>
                </mrow>
              </mfenced>
              <mo>.</mo>
            </mrow>
          </math>
        </formula>
        <p>In short, we would like to mention that most of the reinforcement learning methods developed so far are built on one (or both) of the two following approaches ( <ref xlink:href="#sequel-2013-bid5" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>):</p>
        <simplelist>
          <li id="uid29">
            <p noindent="true">Bellman's dynamic programming approach, based on the introduction of the value function. It consists in learning a “good” approximation of the optimal value function, and then using it to derive a greedy policy w.r.t. this approximation. The hope (well justified in several cases) is that the performance <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mi>π</mi></msup></math></formula> of the policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula> greedy w.r.t. an approximation <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula> of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mo>*</mo></msup></math></formula> will be close to optimality. This approximation issue of the optimal value function is one of the major challenges inherent to the reinforcement learning problem. <b>Approximate dynamic programming</b> addresses the problem of estimating performance bounds (<i>e.g.</i> the loss in performance <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mrow><mo>|</mo><mo>|</mo></mrow><msup><mi>V</mi><mo>*</mo></msup><mo>-</mo><msup><mi>V</mi><mi>π</mi></msup><mrow><mo>|</mo><mo>|</mo></mrow></mrow></math></formula> resulting from using a policy <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>-greedy w.r.t. some approximation <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula>- instead of an optimal policy) in terms of the approximation error <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mrow><mo>|</mo><mo>|</mo></mrow><msup><mi>V</mi><mo>*</mo></msup><mo>-</mo><mi>V</mi><mrow><mo>|</mo><mo>|</mo></mrow></mrow></math></formula> of the optimal value function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>V</mi><mo>*</mo></msup></math></formula> by <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>V</mi></math></formula>. Approximation theory and Statistical Learning theory provide us with bounds in terms of the number of sample data used to represent the
functions, and the capacity and approximation power of the considered function spaces.</p>
          </li>
          <li id="uid30">
            <p noindent="true">Pontryagin's maximum principle approach, based on sensitivity analysis of the performance measure w.r.t. some control parameters. This approach, also called <b>direct policy search</b> in the Reinforcement Learning community aims at directly finding a good feedback control law in a parameterized policy space without trying to approximate the value function. The method consists in estimating the so-called <b>policy gradient</b>, <i>i.e.</i> the sensitivity of the performance measure (the value function) w.r.t. some parameters of the current policy. The idea being that an optimal control problem is replaced by a parametric optimization problem in the space of parameterized policies. As such, deriving a policy gradient estimate would lead to performing a stochastic gradient method in order to search for a local optimal parametric policy.</p>
          </li>
        </simplelist>
        <p>Finally, many extensions of the Markov decision processes exist, among which the Partially Observable MDPs (POMDPs) is the case where the current state does not contain all the necessary information required to decide for sure of the best action.</p>
      </subsection>
      <subsection id="uid31" level="2">
        <bodyTitle>Multi-arm Bandit Theory</bodyTitle>
        <p>Bandit problems illustrate the fundamental difficulty of decision making in the face of uncertainty: A decision maker must choose between what seems to be the best choice (“exploit”), or to test (“explore”) some alternative, hoping to discover a choice that beats the current best choice.</p>
        <p>The classical example of a bandit problem is deciding what treatment to give each patient in a clinical trial when the effectiveness of the treatments are initially unknown and the patients arrive sequentially. These bandit problems became popular with the seminal paper <ref xlink:href="#sequel-2013-bid6" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, after which they have found applications in diverse fields, such as control, economics, statistics, or learning theory.</p>
        <p>Formally, a K-armed bandit problem (<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>K</mi><mo>≥</mo><mn>2</mn></mrow></math></formula>) is specified by K real-valued distributions. In each time step a decision maker can select one of the distributions to obtain a sample from it. The samples obtained are considered as rewards. The distributions are initially unknown to the decision maker, whose goal is to maximize the sum of the rewards received, or equivalently, to minimize the regret which is defined as the loss compared to the total payoff that can be achieved given full knowledge of the problem, <i>i.e.</i>, when the arm giving the highest expected reward is pulled all the time.</p>
        <p>The name “bandit” comes from imagining a gambler playing with K slot machines. The gambler can pull the arm of any of the machines, which produces a random payoff as a result: When arm k is pulled, the random payoff is drawn from the distribution associated to k. Since the payoff distributions are initially unknown, the gambler must use exploratory actions to learn the utility of the individual arms. However, exploration has to be carefully controlled since excessive exploration may lead to unnecessary losses. Hence, to play well, the gambler must carefully balance exploration and exploitation. Auer <i>et al.</i> <ref xlink:href="#sequel-2013-bid7" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> introduced the algorithm UCB (Upper Confidence Bounds) that follows what is now called the “optimism in the face of uncertainty principle”. Their algorithm works by computing upper confidence bounds for all the arms and then choosing the arm with the highest such bound. They proved that the expected regret of their algorithm increases at most at a logarithmic rate
with the number of trials, and that the algorithm achieves the smallest possible regret up to some sub-logarithmic factor (for the considered family of distributions).</p>
      </subsection>
    </subsection>
    <subsection id="uid32" level="1">
      <bodyTitle>Statistical analysis of time series</bodyTitle>
      <p>Many of the problems of machine learning can be seen as extensions of classical problems of mathematical statistics to their (extremely) non-parametric and model-free cases. Other machine learning problems are founded on such statistical problems. Statistical problems of sequential learning are mainly those that are concerned with the analysis of time series. These problems are as follows.</p>
      <subsection id="uid33" level="2">
        <bodyTitle>Prediction of Sequences of Structured and Unstructured Data</bodyTitle>
        <p>Given a series of observations <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>⋯</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub></mrow></math></formula> it is required to give forecasts concerning the distribution of the future observations <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mrow><mi>n</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>,</mo><msub><mi>x</mi><mrow><mi>n</mi><mo>+</mo><mn>2</mn></mrow></msub><mo>,</mo><mo>⋯</mo></mrow></math></formula>; in the simplest case, that of the next outcome <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mrow><mi>n</mi><mo>+</mo><mn>1</mn></mrow></msub></math></formula>.
Then <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mrow><mi>n</mi><mo>+</mo><mn>1</mn></mrow></msub></math></formula> is revealed and the process continues. Different goals can be formulated in this setting. One can either make some assumptions on the probability
measure that generates the sequence <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>⋯</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub><mo>,</mo><mo>⋯</mo></mrow></math></formula>, such as that the outcomes are independent and identically distributed (i.i.d.),
or that the sequence is a Markov chain, that it is a stationary process, etc.
More generally, one can assume that the data is generated by a probability measure that belongs to a certain set <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒞</mi></math></formula>.
In these cases the goal is to have the discrepancy between the predicted and the “true” probabilities to go to zero, if possible, with guarantees
on the speed of convergence.</p>
        <p>Alternatively, rather than making some assumptions on the data, one can change the goal: the predicted probabilities should be asymptotically as good as those given by the best reference predictor from a certain pre-defined set.</p>
        <p>Another dimension of complexity in this problem concerns the nature of observations <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mi>i</mi></msub></math></formula>. In the simplest case,
they come from a finite space, but already basic applications often require real-valued observations. Moreover,
function or even graph-valued observations often arise in practice, in particular in applications concerning Web data.
In these settings estimating even simple characteristics of probability distributions of the future outcomes becomes
non-trivial, and new learning algorithms for solving these problems are in order.</p>
      </subsection>
      <subsection id="uid34" level="2">
        <bodyTitle>Hypothesis testing</bodyTitle>
        <p>Given a series of observations of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>⋯</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub><mo>,</mo><mo>⋯</mo></mrow></math></formula> generated by some unknown probability measure <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula>, the problem is to test a certain given hypothesis <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>0</mn></msub></math></formula> about <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula>, versus a given alternative hypothesis <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>1</mn></msub></math></formula>. There are many different examples of this problem. Perhaps the simplest one is testing a simple hypothesis “<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula> is Bernoulli i.i.d. measure with probability of 0 equals 1/2” versus “<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula> is Bernoulli i.i.d. with the parameter different from 1/2”. More
interesting cases include the problems of model verification: for example, testing that <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>μ</mi></math></formula> is a Markov chain, versus that it is a stationary ergodic process but not a Markov chain. In the case when we have not one but several series of observations, we may wish to test the hypothesis that they are independent, or that they are generated by the same distribution. Applications of these problems to a more general class of machine learning tasks include the problem of feature selection, the problem of testing that a certain behaviour (such as pulling a certain arm of a bandit, or using a certain policy) is better (in terms of achieving some goal, or collecting some rewards) than another behaviour, or than a class of other behaviours.</p>
        <p>The problem of hypothesis testing can also be studied in its general formulations: given two (abstract) hypothesis <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>0</mn></msub></math></formula> and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>1</mn></msub></math></formula> about the unknown measure that generates the data, find out whether it is possible to test <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>0</mn></msub></math></formula> against <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>H</mi><mn>1</mn></msub></math></formula> (with confidence), and if yes then how can one do it.</p>
      </subsection>
      <subsection id="uid35" level="2">
        <bodyTitle>Change Point Analysis</bodyTitle>
        <p>A stochastic process is generating the data. At some point, the process distribution changes.
In the “offline” situation, the statistician observes the resulting sequence of outcomes and has
to estimate the point or the points at which the change(s) occurred. In online setting, the goal is to
detect the change as quickly as possible.</p>
        <p>These are the classical problems in mathematical statistics, and probably among the last remaining statistical problems
not adequately addressed by machine learning methods. The reason for the latter is perhaps in that the problem is rather
challenging. Thus, most methods available so far are parametric methods concerning piece-wise constant distributions, and the
change in distribution is associated with the change in the mean. However, many applications, including DNA analysis,
the analysis of (user) behaviour data, etc., fail to comply with this kind of assumptions. Thus, our goal here is to provide completely non-parametric
methods allowing for any kind of changes in the time-series distribution.</p>
      </subsection>
      <subsection id="uid36" level="2">
        <bodyTitle>Clustering Time Series, Online and Offline</bodyTitle>
        <p>The problem of clustering, while being a classical problem of mathematical statistics, belongs to the realm of unsupervised learning. For time series, this problem can be formulated as follows: given several samples <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>x</mi><mn>1</mn></msup><mo>=</mo><mrow><mo>(</mo><msubsup><mi>x</mi><mn>1</mn><mn>1</mn></msubsup><mo>,</mo><mo>⋯</mo><mo>,</mo><msubsup><mi>x</mi><msub><mi>n</mi><mn>1</mn></msub><mn>1</mn></msubsup><mo>)</mo></mrow><mo>,</mo><mo>⋯</mo><mo>,</mo><msup><mi>x</mi><mi>N</mi></msup><mo>=</mo><mrow><mo>(</mo><msubsup><mi>x</mi><mi>N</mi><mn>1</mn></msubsup><mo>,</mo><mo>⋯</mo><mo>,</mo><msubsup><mi>x</mi><msub><mi>n</mi><mi>N</mi></msub><mi>N</mi></msubsup><mo>)</mo></mrow></mrow></math></formula>, we wish to group similar objects together. While this is of course not a precise formulation, it can be made precise if we assume that the samples were generated by <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>k</mi></math></formula> different distributions.</p>
        <p>The online version of the problem allows for the number of observed time series to grow with time, in general, in an arbitrary manner.</p>
      </subsection>
      <subsection id="uid37" level="2">
        <bodyTitle>Online Semi-Supervised Learning</bodyTitle>
        <p>Semi-supervised learning (SSL) is a field of machine learning that studies
learning from both labeled and unlabeled examples. This learning
paradigm is extremely useful for solving real-world problems, where
data is often abundant but the resources to label them are limited.</p>
        <p>Furthermore, <i>online</i> SSL is suitable for adaptive machine learning
systems.
In the classification case, learning is viewed as a repeated game against a
potentially adversarial nature. At each step <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> of this game, we observe an
example <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>𝐱</mi><mi>𝐭</mi></msub></math></formula>, and then predict its label <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mover accent="true"><mi>y</mi><mo>^</mo></mover><mi>t</mi></msub></math></formula>.</p>
        <p>The challenge of the game is that we only exceptionally observe the true label
<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>y</mi><mi>t</mi></msub></math></formula>. In the extreme case, which we also study, only a handful of labeled
examples are provided in advance and set the initial bias of the system while
unlabeled examples are gathered online and update the bias continuously.
Thus, if we want to adapt to changes in the environment, we have to rely on
indirect forms of feedback, such as the structure of data.</p>
      </subsection>
    </subsection>
    <subsection id="uid38" level="1">
      <bodyTitle>Statistical Learning and Bayesian Analysis</bodyTitle>
      <p>Before detailing some issues in these fields, let us remind the definition of a few terms.</p>
      <glosslist>
        <label>Machine learning</label>
        <li>
          <p>refers to a system capable of the autonomous acquisition and integration of knowledge. This capacity to learn from experience, analytical observation, and other means, results in a system that can continuously self-improve and thereby offer increased efficiency and effectiveness.</p>
        </li>
        <label>Statistical learning</label>
        <li>
          <p>is an approach to machine intelligence that is based on statistical modeling of data. With a statistical model in hand, one applies probability theory and decision theory to get an algorithm. This is opposed to using training data merely to select among different algorithms or using heuristics/“common sense” to design an algorithm.</p>
        </li>
        <label>Bayesian Analysis</label>
        <li>
          <p>applies to data that could be seen as observations in the more general meaning of the term. These data may not only come from classical sensors but also from any <i>device</i> recording information. From an operational point of view, like for statistical learning, uncertainty about the data is modeled by a probability measure thus defining the so-called likelihood functions. This last one depend upon parameters defining the state of the world we focus on for decision purposes. Within the Bayesian framework the uncertainty about these parameters is also modeled by probability measures, the priors that are subjective probabilities. Using probability theory and decision theory, one then defines new algorithms to estimate the parameters of interest and/or associated decisions. According to the International Society for Bayesian Analysis (source: <ref xlink:href="http://bayesian.org" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>bayesian.<allowbreak/>org</ref>), and from a more general point of view, this overall process could be summarize as follows: one assesses the
current state of knowledge regarding the issue of interest, gather new data to address remaining questions, and then update and refine their understanding to incorporate both new and old data. Bayesian inference provides a logical, quantitative framework for this process based on probability theory.</p>
        </li>
        <label>Kernel method.</label>
        <li>
          <p>Generally speaking, a kernel function is a function that maps a couple of points to a real value. Typically, this value is a measure of dissimilarity between the two points. Assuming a few properties on it, the kernel function implicitly defines a dot product in some function space. This very nice formal property as well as a bunch of others have ensured a strong appeal for these methods in the last 10 years in the field of function approximation. Many classical algorithms have been “kernelized”, that is, restated in a much more general way than their original formulation. Kernels also implicitly induce the representation of data in a certain “suitable” space where the problem to solve (classification, regression, ...) is expected to be simpler (non-linearity turns to linearity).</p>
        </li>
      </glosslist>
      <p>The fundamental tools used in <span class="smallcap" align="left">SequeL</span> come from the field of statistical learning <ref xlink:href="#sequel-2013-bid8" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. We briefly present the most important for us to date, namely, kernel-based non parametric function approximation, and non parametric Bayesian models.</p>
      <subsection id="uid39" level="2">
        <bodyTitle>Non-parametric methods for Function Approximation</bodyTitle>
        <p>In statistics in general, and applied mathematics, the approximation of a multi-dimensional real function given some samples is a well-known problem (known as either regression, or interpolation, or function approximation, ...). Regressing a function from data is a key ingredient of our research, or to the least, a basic component of most of our algorithms. In the context of sequential learning, we have to regress a function while data samples are being obtained one at a time, while keeping the constraint to be able to predict points at any step along the acquisition process. In sequential decision problems, we typically have to learn a value function, or a policy.</p>
        <p>Many methods have been proposed for this purpose. We are looking for suitable ones to cope with the problems we wish to solve. In reinforcement learning, the value function may have areas where the gradient is large; these are areas where the approximation is difficult, while these are also the areas where the accuracy of the approximation should be maximal to obtain a good policy (and where, otherwise, a bad choice of action may imply catastrophic consequences).</p>
        <p>We particularly favor non parametric methods since they make quite a few assumptions about the function to learn. In particular, we have strong interests in <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>l</mi><mn>1</mn></msub></math></formula>-regularization, and the (kernelized-)LARS algorithm. <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>l</mi><mn>1</mn></msub></math></formula>-regularization yields sparse solutions, and the LARS approach produces the whole regularization path very efficiently, which helps solving the regularization parameter tuning problem.</p>
      </subsection>
      <subsection id="uid40" level="2">
        <bodyTitle>Nonparametric Bayesian Estimation</bodyTitle>
        <p>Numerous problems may be solved efficiently by a Bayesian approach. The use of Monte-Carlo methods allows us to handle non–linear, as well as non–Gaussian, problems. In their standard form, they require the formulation of probability densities in a parametric form. For instance, it is a common usage to use Gaussian likelihood, because it is handy. However, in some applications such as Bayesian filtering, or blind deconvolution, the choice of a parametric form of the density of the noise is often arbitrary. If this choice is wrong, it may also have dramatic consequences on the estimation quality. To overcome this shortcoming, one possible approach is to consider that this density must also be estimated from data. A general Bayesian approach then consists in defining a probabilistic space associated with the possible outcomes of the <i>object</i> to be estimated. Applied to density estimation, it means that we need to define a probability measure on the probability density of the noise: such a measure is
called a <i>random measure</i>. The classical Bayesian inference procedures can then been used. This approach being by nature non parametric, the associated frame is called <i>Non Parametric Bayesian</i>.</p>
        <p>In particular, mixtures of Dirichlet processes <ref xlink:href="#sequel-2013-bid9" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> provide a very powerful formalism. Dirichlet Processes are a possible random measure and Mixtures of Dirichlet Processes are an extension of well-known finite mixture models. Given a mixture density <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>f</mi><mo>(</mo><mi>x</mi><mo>|</mo><mi>θ</mi><mo>)</mo></mrow></math></formula>, and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>G</mi><mrow><mo>(</mo><mi>d</mi><mi>θ</mi><mo>)</mo></mrow><mo>=</mo><msubsup><mo>∑</mo><mrow><mi>k</mi><mo>=</mo><mn>1</mn></mrow><mi>∞</mi></msubsup><msub><mi>ω</mi><mi>k</mi></msub><msub><mi>δ</mi><msub><mi>U</mi><mi>k</mi></msub></msub><mrow><mo>(</mo><mi>d</mi><mi>θ</mi><mo>)</mo></mrow></mrow></math></formula>, a Dirichlet process, we define a mixture of Dirichlet processes as:</p>
        <formula id-text="4" id="uid41" textype="equation" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mrow>
              <mi>F</mi>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>)</mo>
              </mrow>
              <mo>=</mo>
              <msub>
                <mo>∫</mo>
                <mi>Θ</mi>
              </msub>
              <mi>f</mi>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>|</mo>
                <mi>θ</mi>
                <mo>)</mo>
              </mrow>
              <mi>G</mi>
              <mrow>
                <mo>(</mo>
                <mi>d</mi>
                <mi>θ</mi>
                <mo>)</mo>
              </mrow>
              <mo>=</mo>
              <munderover>
                <mo>∑</mo>
                <mrow>
                  <mi>k</mi>
                  <mo>=</mo>
                  <mn>1</mn>
                </mrow>
                <mi>∞</mi>
              </munderover>
              <msub>
                <mi>ω</mi>
                <mi>k</mi>
              </msub>
              <mi>f</mi>
              <mrow>
                <mo>(</mo>
                <mi>x</mi>
                <mo>|</mo>
                <msub>
                  <mi>U</mi>
                  <mi>k</mi>
                </msub>
                <mo>)</mo>
              </mrow>
            </mrow>
          </math>
        </formula>
        <p noindent="true">where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>F</mi><mo>(</mo><mi>x</mi><mo>)</mo></mrow></math></formula> is the density to be estimated. The class of densities that may be written as a mixture of Dirichlet processes is very wide, so that they really fit a very large number of applications.</p>
        <p>Given a set of observations, the estimation of the parameters of a mixture of Dirichlet processes is performed by way of a Monte Carlo Markov Chain (MCMC) algorithm. Dirichlet Process Mixture are also widely used in clustering problems. Once the parameters of a mixture are estimated, they can be interpreted as the parameters of a specific cluster defining a class as well. Dirichlet processes are well known within the machine learning community and their potential in statistical signal processing still need to be developed.</p>
      </subsection>
      <subsection id="uid42" level="2">
        <bodyTitle>Random Finite Sets for multisensor multitarget tracking</bodyTitle>
        <p>In the general multi-sensor multi-target Bayesian framework, an unknown (and possibly varying) number of targets whose states <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><msub><mi>x</mi><mi>n</mi></msub></mrow></math></formula> are observed by several sensors which produce a collection of measurements <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>z</mi><mn>1</mn></msub><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><msub><mi>z</mi><mi>m</mi></msub></mrow></math></formula> at every time step <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>k</mi></math></formula>. Well-known models to this problem are track-based models, such as the joint probability data association (JPDA), or joint multi-target probabilities, such as the joint multi-target probability density. Common difficulties in multi-target tracking arise from the fact that the system state and the collection of measures from sensors are unordered and their size evolve randomly through time. Vector-based algorithms must therefore account for state coordinates exchanges and missing data within an unknown time interval. Although this approach is very popular and has resulted in many algorithms in the past, it may not be the optimal way to tackle the problem, since the sate and the data are in fact <i>sets</i> and not vectors.</p>
        <p>The random finite set theory provides a powerful framework to deal with these issues. Mahler's work on finite sets statistics (FISST) provides a mathematical framework to build multi-object densities and derive the Bayesian rules for state prediction and state estimation. Randomness on object number and their states are encapsulated into random finite sets (RFS), namely multi-target(state) sets <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>X</mi><mo>=</mo><mo>{</mo><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub><mo>}</mo></mrow></math></formula> and multi-sensor (measurement) set <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>Z</mi><mi>k</mi><mo>=</mo><mo>{</mo><msub><mi>z</mi><mn>1</mn></msub><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><msub><mi>z</mi><mi>m</mi></msub><mo>}</mo></mrow></math></formula>. The objective is then to propagate the multitarget probability density <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>f</mi><mrow><mi>k</mi><mo>|</mo><mi>k</mi></mrow></msub><mrow><mo>(</mo><mi>X</mi><mo>|</mo><mi>Z</mi><mrow><mo>(</mo><mi>k</mi><mo>)</mo></mrow><mo>)</mo></mrow></mrow></math></formula> by using the Bayesian set equations at every time step <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>k</mi></math></formula>:</p>
        <formula id-text="10" id="uid43" textype="multline" type="display">
          <math xmlns="http://www.w3.org/1998/Math/MathML" mode="display" overflow="scroll">
            <mtable displaystyle="true">
              <mtr>
                <mtd columnalign="left">
                  <mrow>
                    <msub>
                      <mi>f</mi>
                      <mrow>
                        <mi>k</mi>
                        <mo>+</mo>
                        <mn>1</mn>
                        <mo>|</mo>
                        <mi>k</mi>
                      </mrow>
                    </msub>
                    <mrow>
                      <mo>(</mo>
                      <mi>X</mi>
                      <mo>|</mo>
                      <msup>
                        <mi>Z</mi>
                        <mrow>
                          <mo>(</mo>
                          <mi>k</mi>
                          <mo>)</mo>
                        </mrow>
                      </msup>
                      <mo>)</mo>
                    </mrow>
                    <mo>=</mo>
                    <mo>∫</mo>
                    <msub>
                      <mi>f</mi>
                      <mrow>
                        <mi>k</mi>
                        <mo>+</mo>
                        <mn>1</mn>
                        <mo>|</mo>
                        <mi>k</mi>
                      </mrow>
                    </msub>
                    <mrow>
                      <mo>(</mo>
                      <mi>X</mi>
                      <mo>|</mo>
                      <mi>W</mi>
                      <mo>)</mo>
                    </mrow>
                    <msub>
                      <mi>f</mi>
                      <mrow>
                        <mi>k</mi>
                        <mo>|</mo>
                        <mi>k</mi>
                      </mrow>
                    </msub>
                    <mrow>
                      <mo>(</mo>
                      <mi>W</mi>
                      <mo>|</mo>
                      <msup>
                        <mi>Z</mi>
                        <mrow>
                          <mo>(</mo>
                          <mi>k</mi>
                          <mo>)</mo>
                        </mrow>
                      </msup>
                      <mo>)</mo>
                    </mrow>
                    <mi>δ</mi>
                    <mi>W</mi>
                  </mrow>
                </mtd>
              </mtr>
              <mtr>
                <mtd columnalign="right">
                  <mrow>
                    <msub>
                      <mi>f</mi>
                      <mrow>
                        <mi>k</mi>
                        <mo>+</mo>
                        <mn>1</mn>
                        <mo>|</mo>
                        <mi>k</mi>
                        <mo>+</mo>
                        <mn>1</mn>
                      </mrow>
                    </msub>
                    <mrow>
                      <mo>(</mo>
                      <mi>X</mi>
                      <mo>|</mo>
                      <msup>
                        <mi>Z</mi>
                        <mrow>
                          <mo>(</mo>
                          <mi>k</mi>
                          <mo>+</mo>
                          <mn>1</mn>
                          <mo>)</mo>
                        </mrow>
                      </msup>
                      <mo>)</mo>
                    </mrow>
                    <mo>=</mo>
                    <mfrac>
                      <mrow>
                        <msub>
                          <mi>f</mi>
                          <mrow>
                            <mi>k</mi>
                            <mo>+</mo>
                            <mn>1</mn>
                          </mrow>
                        </msub>
                        <mrow>
                          <mo>(</mo>
                          <msub>
                            <mi>Z</mi>
                            <mrow>
                              <mi>k</mi>
                              <mo>+</mo>
                              <mn>1</mn>
                            </mrow>
                          </msub>
                          <mo>|</mo>
                          <mi>X</mi>
                          <mo>)</mo>
                        </mrow>
                        <msub>
                          <mi>f</mi>
                          <mrow>
                            <mi>k</mi>
                            <mo>+</mo>
                            <mn>1</mn>
                            <mo>|</mo>
                            <mi>k</mi>
                          </mrow>
                        </msub>
                        <mrow>
                          <mo>(</mo>
                          <mi>X</mi>
                          <mo>|</mo>
                          <msup>
                            <mi>Z</mi>
                            <mrow>
                              <mo>(</mo>
                              <mi>k</mi>
                              <mo>)</mo>
                            </mrow>
                          </msup>
                          <mo>)</mo>
                        </mrow>
                      </mrow>
                      <mrow>
                        <mo>∫</mo>
                        <msub>
                          <mi>f</mi>
                          <mrow>
                            <mi>k</mi>
                            <mo>+</mo>
                            <mn>1</mn>
                          </mrow>
                        </msub>
                        <mrow>
                          <mo>(</mo>
                          <msub>
                            <mi>Z</mi>
                            <mrow>
                              <mi>k</mi>
                              <mo>+</mo>
                              <mn>1</mn>
                            </mrow>
                          </msub>
                          <mo>|</mo>
                          <mi>W</mi>
                          <mo>)</mo>
                        </mrow>
                        <msub>
                          <mi>f</mi>
                          <mrow>
                            <mi>k</mi>
                            <mo>+</mo>
                            <mn>1</mn>
                            <mo>|</mo>
                            <mi>k</mi>
                          </mrow>
                        </msub>
                        <mrow>
                          <mo>(</mo>
                          <mi>W</mi>
                          <mo>|</mo>
                          <msup>
                            <mi>Z</mi>
                            <mrow>
                              <mo>(</mo>
                              <mi>k</mi>
                              <mo>)</mo>
                            </mrow>
                          </msup>
                          <mo>)</mo>
                        </mrow>
                        <mi>δ</mi>
                        <mi>W</mi>
                      </mrow>
                    </mfrac>
                  </mrow>
                </mtd>
              </mtr>
            </mtable>
          </math>
        </formula>
        <p noindent="true">where:</p>
        <simplelist>
          <li id="uid44">
            <p noindent="true"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>X</mi><mo>=</mo><mo>{</mo><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><msub><mi>x</mi><mi>n</mi></msub><mo>}</mo></mrow></math></formula> is a multi-target state, <i>i.e.</i> a finite set of elements <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mi>i</mi></msub></math></formula> defined on the single-target space <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒳</mi></math></formula>; <footnote id="uid45" id-text="2">The state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>x</mi><mi>i</mi></msub></math></formula> of a target is usually composed of its position, its velocity, etc.</footnote></p>
          </li>
          <li id="uid46">
            <p noindent="true"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>Z</mi><mrow><mi>k</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>=</mo><mrow><mo>{</mo><msub><mi>z</mi><mn>1</mn></msub><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><msub><mi>z</mi><mi>m</mi></msub><mo>}</mo></mrow></mrow></math></formula> is the current multi-sensor observation, <i>i.e.</i> a collection of measures <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>z</mi><mi>i</mi></msub></math></formula> produced at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>k</mi><mo>+</mo><mn>1</mn></mrow></math></formula> by all the sensors;</p>
          </li>
          <li id="uid47">
            <p noindent="true"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msup><mi>Z</mi><mrow><mo>(</mo><mi>k</mi><mo>)</mo></mrow></msup><mo>=</mo><msub><mo>⋃</mo><mrow><mi>t</mi><mo>⩽</mo><mi>k</mi></mrow></msub><msub><mi>Z</mi><mi>t</mi></msub></mrow></math></formula> is the collection of observations up to time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>k</mi></math></formula>;</p>
          </li>
          <li id="uid48">
            <p noindent="true"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>f</mi><mrow><mi>k</mi><mo>|</mo><mi>k</mi></mrow></msub><mrow><mo>(</mo><mi>W</mi><mo>|</mo><msup><mi>Z</mi><mrow><mo>(</mo><mi>k</mi><mo>)</mo></mrow></msup><mo>)</mo></mrow></mrow></math></formula> is the current multi-target posterior density in state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>W</mi></math></formula>;</p>
          </li>
          <li id="uid49">
            <p noindent="true"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>f</mi><mrow><mi>k</mi><mo>+</mo><mn>1</mn><mo>|</mo><mi>k</mi></mrow></msub><mrow><mo>(</mo><mi>X</mi><mo>|</mo><mi>W</mi><mo>)</mo></mrow></mrow></math></formula> is the current multi-target Markov transition density, from state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>W</mi></math></formula> to state <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>X</mi></math></formula>;</p>
          </li>
          <li id="uid50">
            <p noindent="true"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>f</mi><mrow><mi>k</mi><mo>+</mo><mn>1</mn></mrow></msub><mrow><mo>(</mo><mi>Z</mi><mo>|</mo><mi>X</mi><mo>)</mo></mrow></mrow></math></formula> is the current multi-sensor/multi-target likelihood function.</p>
          </li>
        </simplelist>
        <p>Although equations (<ref xlink:href="#uid43" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) may seem similar to the classical single-sensor/single-target Bayesian equations, they are generally intractable because of the presence of the <i>set integrals</i>. For, a RFS <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>Ξ</mi></math></formula> is characterized by the family of its Janossy densities <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>j</mi><mrow><mi>Ξ</mi><mo>,</mo><mn>1</mn></mrow></msub><mrow><mo>(</mo><msub><mi>x</mi><mn>1</mn></msub><mo>)</mo></mrow></mrow></math></formula>, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>j</mi><mrow><mi>Ξ</mi><mo>,</mo><mn>2</mn></mrow></msub><mrow><mo>(</mo><msub><mi>x</mi><mn>1</mn></msub><mo>,</mo><msub><mi>x</mi><mn>2</mn></msub><mo>)</mo></mrow><mo>.</mo><mo>.</mo><mo>.</mo></mrow></math></formula> and not just by one density as it is the case with vectors. Mahler then introduced the PHD, defined on single-target state space. The PHD is the quantity whose integral on any region <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>S</mi></math></formula> is the expected number of targets inside <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>S</mi></math></formula>. Mahler proved that the PHD is the first-moment density of the multi-target probability density. Although defined on single-state space X, the PHD encapsulates information on both target number and states.</p>
      </subsection>
    </subsection>
  </fondements>
  <domaine id="uid51">
    <bodyTitle>Application Domains</bodyTitle>
    <subsection id="uid52" level="1">
      <bodyTitle>In Short</bodyTitle>
      <p><span class="smallcap" align="left">SequeL</span> aims at solving problems of prediction, as well as problems of optimal and adaptive control. As such, the application domains are very numerous.</p>
      <p>The application domains have been organized as follows:</p>
      <simplelist>
        <li id="uid53">
          <p noindent="true">adaptive control,</p>
        </li>
        <li id="uid54">
          <p noindent="true">signal processing and functional prediction,</p>
        </li>
        <li id="uid55">
          <p noindent="true">medical applications,</p>
        </li>
        <li id="uid56">
          <p noindent="true">web mining,</p>
        </li>
        <li id="uid57">
          <p noindent="true">computer games.</p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid58" level="1">
      <bodyTitle>Adaptive Control</bodyTitle>
      <p>Adaptive control is an important application of the research being done in <span class="smallcap" align="left">SequeL</span>. Reinforcement learning (RL) precisely aims at controling the behavior of systems and may be used in situations with more or less information available. Of course, the more information, the better, in which case methods of (approximate) dynamic programming may be used <ref xlink:href="#sequel-2013-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. But, reinforcement learning may also handle situations where the dynamics of the system is unknown, situations where the system is partially observable, and non stationary situations. Indeed, in these cases, the behavior is learned by interacting with the environment and thus naturally adapts to the changes of the environment. Furthermore, the adaptive system may also take advantage of expert knowledge when available.</p>
      <p>Clearly, the spectrum of potential applications is very wide: as far as an agent (a human, a robot, a virtual agent) has to take a decision, in particular in cases where he lacks some information to take the decision, this enters the scope of our activities. To exemplify the potential applications, let us cite:</p>
      <simplelist>
        <li id="uid59">
          <p noindent="true">game softwares: in the 1990's, RL has been the basis of a very successful Backgammon program, TD-Gammon <ref xlink:href="#sequel-2013-bid11" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> that learned to play at an expert level by basically playing a very large amount of games against itself. Today, various games are studied with RL techniques.</p>
        </li>
        <li id="uid60">
          <p noindent="true">many optimization problems that are closely related to operation research, but taking into account the uncertainty, and the stochasticity of the environment: see the job-shop scheduling, or the cellular phone frequency allocation problems, resource allocation in general <ref xlink:href="#sequel-2013-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        </li>
        <li id="uid61">
          <p noindent="true">we can also foresee that some progress may be made by using RL to design adaptive conversational agents, or system-level as well as application-level operating systems that adapt to their users habits.</p>
          <p>More generally, these ideas fall into what adaptive control may bring to human beings, in making their life simpler, by being embedded in an environment that is made to help them, an idea phrased as “ambient intelligence”.</p>
        </li>
        <li id="uid62">
          <p noindent="true">The sensor management problem consists in determining the best way to task several sensors when each sensor has many modes and search patterns. In the detection/tracking applications, the tasks assigned to a sensor management system are for instance:</p>
          <simplelist>
            <li id="uid63">
              <p noindent="true">detect targets,</p>
            </li>
            <li id="uid64">
              <p noindent="true">track the targets in the case of a moving target and/or a smart target (a smart target can change its behavior when it detects that it is under analysis),</p>
            </li>
            <li id="uid65">
              <p noindent="true">combine all the detections in order to track each moving target,</p>
            </li>
            <li id="uid66">
              <p noindent="true">dynamically allocate the sensors in order to achieve the previous three tasks in an optimal way. The allocation of sensors, and their modes, thus defines the action space of the underlying Markov decision problem.</p>
            </li>
          </simplelist>
          <p>In the more general situation, some sensors may be localized at the same place while others are dispatched over a given volume. Tasking a sensor may include, at each moment, such choices as where to point and/or what mode to use. Tasking a group of sensors includes the tasking of each individual sensor but also the choice of collaborating sensors subgroups. Of course, the sensor management problem is related to an objective. In general, sensors must balance complex trade-offs between achieving mission goals such as detecting new targets, tracking existing targets, and identifying existing targets. The word “target” is used here in its most general meaning, and the potential applications are not restricted to military applications. Whatever the underlying application, the sensor management problem consists in choosing at each time an action within the set of available actions.</p>
        </li>
        <li id="uid67">
          <p noindent="true">sequential decision processes are also very well-known in economy. They may be used as a decision aid tool, to help in the design of social helps, or the implementation of plants (see <ref xlink:href="#sequel-2013-bid12" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, <ref xlink:href="#sequel-2013-bid13" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> for such applications).</p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid68" level="1">
      <bodyTitle>Signal Processing</bodyTitle>
      <p>Applications of sequential learning in the field of signal processing are also very numerous. A signal is naturally sequential as it flows. It usually comes from the recording of the output of sensors but the recording of any sequence of numbers may be considered as a signal like the stock-exchange rates evolution with respect to time and/or place, the number of consumers at a mall entrance or the number of connections to a web site. Signal processing has several objectives: predict , estimate, remove noise, characterize or classify. The signal is often considered as sequential: we want to predict, estimate or classify a value (or a feature) at time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> knowing the past values of the parameter of interest or past values of data related to this parameter. This is typically the case in estimation processes arising in dynamical systems.</p>
      <p>Signals may be processed in several ways. One of the best–known way is the time-frequency analysis in which the frequencies of each signal are analyzed with respect to time. This concept has been generalized to the time-scale analysis obtained by a wavelet transform. Both analysis are based on the projection of the original signal onto a well-chosen function basis. Signal processing is also closely related to the probability field as the uncertainty inherent to many signals leads to consider them as stochastic processes: the Bayesian framework is actually one of the main frameworks within which signals are processed for many purposes. It is worth noting that Bayesian analysis can be used jointly with a time-frequency or a wavelet analysis. However, alternatives like belief functions came up these last years. Belief functions were introduced by Demspter few decades ago and have been successfully used in the few past years in fields where probability had, during many years, no alternatives like in
classification. Belief functions can be viewed as a generalization of probabilities which can capture both imprecision and uncertainty. Belief functions are also closely related to data fusion.</p>
    </subsection>
    <subsection id="uid69" level="1">
      <bodyTitle>Medical Applications</bodyTitle>
      <p>One of the initial motivations of the
multi-arm bandit theory stems from
clinical trials when one researches the effects of different treatments while
maximizing the improvement of the patients' health states.</p>
      <p>Medical health-care and in particular
patient-management
is up today one of the most
important applications of the sequential decision making.
This is because the treatment of the more complex health problems
is typically sequential: A physician repeatedly observes the current state of
the patient and makes the decision in order to improve the health condition
as measured for example by <i>qualys</i>
(quality adjusted life years).</p>
      <p>Moreover, machine learning methods may be used for at least two means in
neuroscience:</p>
      <orderedlist>
        <li id="uid70">
          <p noindent="true">as in any other (experimental) scientific domain, the machine learning methods relying heavily on statistics, they may be used to analyse experimental data,</p>
        </li>
        <li id="uid71">
          <p noindent="true">dealing with induction learning, that is the ability to generalize from facts which is an ability that is considered to be one of the basic components of “intelligence”, machine learning may be considered as a model of learning in living beings. In particular, the temporal difference methods for reinforcement learning have strong ties with various concepts of psychology (Thorndike's law of effect, and the Rescorla-Wagner law to name the two most well-known).</p>
        </li>
      </orderedlist>
    </subsection>
    <subsection id="uid72" level="1">
      <bodyTitle>Web Mining</bodyTitle>
      <p>We work on the news/ad recommendation. These online learning algorithms reached a critical importance over the last few years due to these major applications. After designing a new algorithm, it is critical to be able to evaluate it without having to plug it into the real application in order to protect user experiences or/and the company's revenue. To do this, people used to build simulators of user behaviors and try to achieve good performances against it. However designing such a simulator is probably much more difficult than designing the algorithm itself! An other common way to evaluate is to not consider the exploration/exploitation dilemma (also known as “Cold Start” for recommender systems). Lately data-driven methods have been developed. We are working on building automatic replay methodology with some theoretical guarantees. This work also exhibits strong link with the choice of the number of contexts to use with recommender systems wrt your audience.</p>
      <p>An other point is that web sites must forecast Web page views in order to plan computer resource allocation and estimate upcoming revenue and advertising growth. In this work, we focus on extracting trends and seasonal patterns from page view series.
We investigate Holt-Winters/ARIMA like procedures and some regularized models for making short-term prediction (3-6 weeks) wrt to logged data of several big media websites.
We work on some news event related webpages and we feel that kind of time series deserves a particular attention. Self-similarity is found to exist at multiple time scales of network traffic, and can be exploited for prediction. In particular, it is found that Web page views exhibit strong impulsive changes occasionally. The impulses cause large prediction errors long after their occurrences and can sometimes be predicted (<i>e.g.</i>, elections, sport events, editorial changes,holidays) in order to improve accuracies. It also seems that some promising model could arise from using global trends shift in the population.</p>
    </subsection>
    <subsection id="uid73" level="1">
      <bodyTitle>Games</bodyTitle>
      <p>The problem of artificial intelligence in games consists in choosing actions of
players in order to produce artificial opponents. Most games can be formalized
as Markov decision problems, so they can be approached with reinforcement
learning.</p>
      <p>In particular, <span class="smallcap" align="left">SequeL</span> was a pioneer of Monte Carlo Tree Search, a technique
that obtained spectacular successes in the game of Go. Other application
domains include the game of poker and the Japanese card game of hanafuda.</p>
    </subsection>
  </domaine>
  <logiciels id="uid74">
    <bodyTitle>Software and Platforms</bodyTitle>
    <subsection id="uid75" level="1">
      <bodyTitle>Computer Games</bodyTitle>
      <participants>
        <person key="sequel-2006-id18239">
          <firstname>Rémi</firstname>
          <lastname>Coulom</lastname>
        </person>
      </participants>
      <simplelist>
        <li id="uid76">
          <p noindent="true"><i/><i><b>Crazy Stone</b></i><i/> is a top-level Go-playing program that has been developed by Rémi Coulom since 2005. Crazy Stone won several major international Go tournaments in the past. In 2013, a new version was released in Japan. This new version won the 6th edition of the UEC Cup (the most important international computer-Go tournament). It also won the first edition of the Denseisen, by winning a 4-stone handicap game against 9-dan professional player Yoshio Ishida. It is distributed as a commercial product by <i>Unbalance Corporation</i> (Japan). 6-month work in 2013. URL: <ref xlink:href="http://remi.coulom.free.fr/CrazyStone/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>remi.<allowbreak/>coulom.<allowbreak/>free.<allowbreak/>fr/<allowbreak/>CrazyStone/</ref></p>
        </li>
        <li id="uid77">
          <p noindent="true"><i/><i><b>Kifu Snap</b></i><i/> is an Android image-recognition app. It can automatically recognize a Go board from a picture, and analyze it with Crazy Stone. It was released on Google Play in November, 2013. 6-month work in 2013. URL: <ref xlink:href="http://remi.coulom.free.fr/kifu-snap/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>remi.<allowbreak/>coulom.<allowbreak/>free.<allowbreak/>fr/<allowbreak/>kifu-snap/</ref></p>
        </li>
      </simplelist>
    </subsection>
  </logiciels>
  <resultats id="uid78">
    <bodyTitle>New Results</bodyTitle>
    <subsection id="uid79" level="1">
      <bodyTitle>Decision-making Under Uncertainty</bodyTitle>
      <subsection id="uid80" level="2">
        <bodyTitle>Reinforcement Learning</bodyTitle>
        <p>
          <i>
            <b>Minimax PAC bounds on the sample complexity of reinforcement learning with a generative model <ref xlink:href="#sequel-2013-bid14" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider the problem of learning the optimal action-value function in discounted-reward Markov decision processes (MDPs). We prove new PAC bounds on the sample-complexity of two well-known model-based reinforcement learning (RL) algorithms in the presence of a generative model of the MDP: value iteration and policy iteration. The first result indicates that for an MDP with N state-action pairs and the discount factor <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>γ</mi></math></formula>in[0, 1) only O(N log(N/<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>δ</mi></math></formula>)/ [(1 - <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>γ</mi></math></formula>)3 <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ϵ</mi></math></formula>2]) state-transition samples are required to find an <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ϵ</mi></math></formula>-optimal estimation of the action-value function with the probability (w.p.) 1-<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>δ</mi></math></formula>. Further, we prove that, for small values of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ϵ</mi></math></formula>, an order of O(N log(N/<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>δ</mi></math></formula>)/ [(1 - <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>γ</mi></math></formula>)3 <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ϵ</mi></math></formula>2]) samples is required to find an <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ϵ</mi></math></formula>-optimal policy w.p. 1-<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>δ</mi></math></formula>. We also prove a matching lower bound of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>Ω</mi></math></formula>(N log(N/<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>δ</mi></math></formula>)/ [(1 - <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>γ</mi></math></formula>)3 <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ϵ</mi></math></formula>2]) on the sample complexity of estimating the optimal action-value function. To the best of our knowledge, this is the first minimax result on the sample complexity of RL: The upper bound matches the lower bound interms of N , <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ϵ</mi></math></formula>, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>δ</mi></math></formula> and 1/(1 -<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>γ</mi></math></formula>) up to a constant factor. Also, both our lower bound and upper bound improve on the state-of-the-art in terms of their dependence on 1/(1-<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>γ</mi></math></formula>).</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Regret Bounds for Reinforcement Learning with Policy Advice <ref xlink:href="#sequel-2013-bid15" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>In some reinforcement learning problems an agent may be provided with a set of input policies, perhaps learned from prior experience or provided by advisors. We present a reinforcement learning with policy advice (RLPA) algorithm which leverages this input set and learns to use the best policy in the set for the reinforcement learning task at hand. We prove that RLPA has a sub-linear regret of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mover accent="true"><mi>O</mi><mo>˜</mo></mover><mrow><mo>(</mo><msqrt><mi>T</mi></msqrt><mo>)</mo></mrow></mrow></math></formula> relative to the best input policy, and that both this regret and its computational complexity are independent of the size of the state and action space. Our empirical simulations support our theoretical analysis. This suggests RLPA may offer significant advantages in large domains where some prior good policies are provided.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Optimistic planning for belief-augmented Markov decision processes <ref xlink:href="#sequel-2013-bid16" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>This paper presents the Bayesian Optimistic Planning (BOP) algorithm, a novel model-based Bayesian reinforcement learning approach. BOP extends the planning approach of the Optimistic Planning for Markov Decision Processes (OP-MDP) algorithm [10], [9] to contexts where the transition model of the MDP is initially unknown and progressively learned through interactions within the environment. The knowledge about the unknown MDP is represented with a probability distribution over all possible transition models using Dirichlet distributions, and the BOP algorithm plans in the belief-augmented state space constructed by concatenating the original state vector with the current posterior distribution over transition models. We show that BOP becomes Bayesian optimal when the budget parameter increases to infinity. Preliminary empirical validations show promising performance.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Aggregating optimistic planning trees for solving markov decision processes <ref xlink:href="#sequel-2013-bid17" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>This paper addresses the problem of online planning in Markov decision processes using a generative model and under a budget constraint. We propose a new algorithm, ASOP, which is based on the construction of a forest of single successor state planning trees, where each tree corresponds to a random realization of the stochastic environment. The trees are explored using a "safe" optimistic planning strategy which combines the optimistic principle (in order to explore the most promising part of the search space first) and a safety principle (which guarantees a certain amount of uniform exploration). In the decision-making step of the algorithm, the individual trees are aggregated and an immediate action is recommended. We provide a finite-sample analysis and discuss the trade-off between the principles of optimism and safety. We report numerical results on a benchmark problem showing that ASOP performs as well as state-of-the-art optimistic planning algorithms.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Optimal Regret Bounds for Selecting the State Representation in Reinforcement Learning <ref xlink:href="#sequel-2013-bid18" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider an agent interacting with an environment in a single stream of actions, observations, and rewards, with no reset. This process is not assumed to be a Markov Decision Process (MDP). Rather, the agent has several representations (mapping histories of past interactions to a discrete state space) of the environment with unknown dynamics, only some of which result in an MDP. The goal is to minimize the average regret criterion against an agent who knows an MDP representation giving the highest optimal reward, and acts optimally in it. Recent regret bounds for this setting are of order <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msup><mi>T</mi><mrow><mn>2</mn><mo>/</mo><mn>3</mn></mrow></msup><mo>)</mo></mrow></math></formula> with an additive term constant yet exponential in some characteristics of the optimal MDP. We propose an algorithm whose regret after <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>T</mi></math></formula> time steps is <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msqrt><mi>T</mi></msqrt><mo>)</mo></mrow></math></formula>, with all constants reasonably small. This is optimal in <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>T</mi></math></formula> since <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msqrt><mi>T</mi></msqrt><mo>)</mo></mrow></math></formula> is the optimal regret in the setting of learning in a (single discrete) MDP.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Competing with an Infinite Set of Models in Reinforcement Learning <ref xlink:href="#sequel-2013-bid19" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider a reinforcement learning setting where the learner also has to deal with the problem of finding a suitable state-representation function from a given set of models. This has to be done while interacting with the environment in an online fashion (no resets), and the goal is to have small regret with respect to any Markov model in the set. For this setting, recently the BLBãlgorithm has been proposed, which achieves regret of order <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>T</mi><mrow><mn>2</mn><mo>/</mo><mn>3</mn></mrow></msup></math></formula>, provided that the given set of models is finite. Our first contribution is to extend this result to a countably infinite set of models. Moreover, the BLBr̃egret bound suffers from an additive term that can be exponential in the diameter of the MDP involved, since the diameter has to be guessed. The algorithm we propose avoids guessing the diameter, thus improving the regret bound.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>A review of optimistic planning in Markov decision processes <ref xlink:href="#sequel-2013-bid20" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We review a class of online planning algorithms for deterministic and stochastic optimal control problems, modeled as Markov decision processes. At each discrete time step, these algorithms maximize the predicted value of planning policies from the current state, and apply the first action of the best policy found. An overall receding-horizon algorithm results, which can also be seen as a type of model-predictive control. The space of planning policies is explored optimistically, focusing on areas with largest upper bounds on the value - or upper confidence bounds, in the stochastic case. The resulting optimistic planning framework integrates several types of optimism previously used in planning, optimization, and reinforcement learning, in order to obtain several intuitive algorithms with good performance guarantees. We describe in detail three recent such algorithms, outline the theoretical guarantees on their performance, and illustrate their behavior in a numerical example.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
      <subsection id="uid81" level="2">
        <bodyTitle>Multi-arm Bandit Theory</bodyTitle>
        <p>
          <i>
            <b>Automatic motor task selection via a bandit algorithm for a brain-controlled button <ref xlink:href="#sequel-2013-bid21" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Objective. Brain-computer interfaces (BCIs) based on sensorimotor rhythms use a variety of motor tasks, such as imagining moving the right or left hand, the feet or the tongue. Finding the tasks that yield best performance, specifically to each user, is a time-consuming preliminary phase to a BCI experiment. This study presents a new adaptive procedure to automatically select (online) the most promising motor task for an asynchronous brain-controlled button. Approach. We develop for this purpose an adaptive algorithm UCB-classif based on the stochastic bandit theory and design an EEG experiment to test our method. We compare (offline) the adaptive algorithm to a naïve selection strategy which uses uniformly distributed samples from each task. We also run the adaptive algorithm online to fully validate the approach. Main results. By not wasting time on inefficient tasks, and focusing on the most promising ones, this algorithm results in a faster task selection and a more efficient use of the BCI training session. More precisely, the offline analysis reveals that the use of this algorithm can reduce the time needed to select the most appropriate task by almost half without loss in precision, or alternatively, allow us to investigate twice the number of tasks within a similar time span. Online tests confirm that the method leads to an optimal task selection. Significance. This study is the first one to optimize the task selection phase by an adaptive procedure. By increasing the number of tasks that can be tested in a given time span, the proposed method could contribute to reducing 'BCI illiteracy'.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Kullback-Leibler Upper Confidence Bounds for Optimal Sequential Allocation <ref xlink:href="#sequel-2013-bid22" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider optimal sequential allocation in the context of the so-called stochastic multi-armed bandit model. We describe a generic index policy, in the sense of Gittins (1979), based on upper confidence bounds of the arm payoffs computed using the Kullback-Leibler divergence. We consider two classes of distributions for which instances of this general idea are analyzed: The kl-UCB algorithm is designed for one-parameter exponential families and the empirical KL-UCB algorithm for bounded and finitely supported distributions. Our main contribution is a unified finite-time analysis of the regret of these algorithms that asymptotically matches the lower bounds of Lai and Robbins (1985) and Burnetas and Katehakis (1996), respectively. We also investigate the behavior of these algorithms when used with general bounded rewards, showing in particular that they provide significant improvements over the state-of-the-art.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Sequential Transfer in Multi-armed Bandit with Finite Set of Models <ref xlink:href="#sequel-2013-bid23" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Learning from prior tasks and transferring that experience to improve future performance is critical for building lifelong learning agents. Although results in supervised and reinforcement learning show that transfer may significantly improve the learning performance, most of the literature on transfer is focused on batch learning tasks. In this paper we study the problem of <i>sequential transfer in online learning</i>, notably in the multi–armed bandit framework, where the objective is to minimize the total regret over a sequence of tasks by transferring knowledge from prior tasks. Under the assumption that the tasks are drawn from a stationary distribution over a finite set of models, we define a novel bandit algorithm based on a method-of-moments approach for the estimation of the possible tasks and derive regret bounds for it. We introduce a novel bandit algorithm based on a method-of-moments approach for estimating the possible tasks and derive regret bounds for it. Finally, we report preliminary empirical results confirming the theoretical findings.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Optimizing P300-speller sequences by RIP-ping groups apart <ref xlink:href="#sequel-2013-bid24" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>So far P300-speller design has put very little emphasis on the design of optimized flash patterns, a surprising fact given the importance of the sequence of flashes on the selection outcome. Previous work in this domain has consisted in studying consecutive flashes, to prevent the same letter or its neighbors from flashing consecutively. To this effect, the flashing letters form more random groups than the original row-column sequences for the P300 paradigm, but the groups remain fixed across repetitions. This has several important consequences, among which a lack of discrepancy between the scores of the different letters. The new approach proposed in this paper accumulates evidence for individual elements, and optimizes the sequences by relaxing the constraint that letters should belong to fixed groups across repetitions. The method is inspired by the theory of Restricted Isometry Property matrices in Compressed Sensing, and it can be applied to any display grid size, and for any target flash frequency. This leads to P300 sequences which are shown here to perform significantly better than the state of the art, in simulations and online tests.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Stochastic Simultaneous Optimistic Optimization <ref xlink:href="#sequel-2013-bid25" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We study the problem of global maximization of a function f given a finite number of evaluations perturbed by noise. We consider a very weak assumption on the function, namely that it is locally smooth (in some precise sense) with respect to some semi-metric, around one of its global maxima. Compared to previous works on bandits in general spaces (Kleinberg et al., 2008; Bubeck et al., 2011a) our algorithm does not require the knowledge of this semi-metric. Our algorithm, StoSOO, follows an optimistic strategy to iteratively construct upper confidence bounds over the hierarchical partitions of the function domain to decide which point to sample next. A finite-time analysis of StoSOO shows that it performs almost as well as the best specifically-tuned algorithms even though the local smoothness of the function is not known.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Toward optimal stratification for stratified monte-carlo integration <ref xlink:href="#sequel-2013-bid26" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider the problem of adaptive stratified sampling for Monte Carlo integration of a noisy function, given a finite budget n of noisy evaluations to the function. We tackle in this paper the problem of adapting to the function at the same time the number of samples into each stratum and the partition itself. More precisely, it is interesting to refine the partition of the domain in area where the noise to the function, or where the variations of the function, are very heterogeneous. On the other hand, having a (too) refined stratification is not optimal. Indeed, the more refined the stratification, the more difficult it is to adjust the allocation of the samples to the stratification, i.e. sample more points where the noise or variations of the function are larger. We provide in this paper an algorithm that selects online, among a large class of partitions, the partition that provides the optimal trade-off, and allocates the samples almost optimally on this partition</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Thompson sampling for one-dimensional exponential family bandits <ref xlink:href="#sequel-2013-bid27" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Thompson Sampling has been demonstrated in many complex bandit models, however the theoretical guarantees available for the parametric multi-armed bandit are still limited to the Bernoulli case. Here we extend them by proving asymptotic optimality of the algorithm using the Jeffreys prior for 1-dimensional exponential family bandits. Our proof builds on previous work, but also makes extensive use of closed forms for Kullback-Leibler divergence and Fisher information (and thus Jeffreys prior) available in an exponential family. This allow us to give a finite time exponential concentration inequality for posterior distributions on exponential families that may be of interest in its own right. Moreover our analysis covers some distributions for which no optimistic algorithm has yet been proposed, including heavy-tailed exponential families.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Finite-Time Analysis of Kernelised Contextual Bandits <ref xlink:href="#sequel-2013-bid28" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We tackle the problem of online reward maximisation over a large finite set of actions described by their contexts. We focus on the case when the number of actions is too big to sample all of them even once. However we assume that we have access to the similarities between actions' contexts and that the expected reward is an arbitrary linear function of the contexts' images in the related reproducing kernel Hilbert space (RKHS). We propose KernelUCB, a kernelised UCB algorithm, and give a cumulative regret bound through a frequentist analysis. For contextual bandits, the related algorithm GP-UCB turns out to be a special case of our algorithm, and our finite-time analysis improves the regret bound of GP-UCB for the agnostic case, both in the terms of the kernel-dependent quantity and the RKHS norm of the reward function. Moreover, for the linear kernel, our regret bound matches the lower bound for contextual linear bandits.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>From Bandits to Monte-Carlo Tree Search: The Optimistic Principle Applied to Optimization and Planning <ref xlink:href="#sequel-2013-bid29" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>This work covers several aspects of the optimism in the face of uncertainty principle applied to large scale optimization problems under finite numerical budget. The initial motivation for the research reported here originated from the empirical success of the so-called Monte-Carlo Tree Search method popularized in computer-go and further extended to many other games as well as optimization and planning problems. Our objective is to contribute to the development of theoretical foundations of the field by characterizing the complexity of the underlying optimization problems and designing efficient algorithms with performance guarantees. The main idea presented here is that it is possible to decompose a complex decision making problem (such as an optimization problem in a large search space) into a sequence of elementary decisions, where each decision of the sequence is solved using a (stochastic) multi-armed bandit (simple mathematical model for decision making in stochastic environments). This so-called hierarchical bandit approach (where the reward observed by a bandit in the hierarchy is itself the return of another bandit at a deeper level) possesses the nice feature of starting the exploration by a quasi-uniform sampling of the space and then focusing progressively on the most promising area, at different scales, according to the evaluations observed so far, and eventually performing a local search around the global optima of the function. The performance of the method is assessed in terms of the optimality of the returned solution as a function of the number of function evaluations. Our main contribution to the field of function optimization is a class of hierarchical optimistic algorithms designed for general search spaces (such as metric spaces, trees, graphs, Euclidean spaces, ...) with different algorithmic instantiations depending on whether the evaluations are noisy or noiseless and whether some measure of the ”smoothness” of the function is known or unknown. The performance of the algorithms depend on the local behavior of the function around its global optima expressed in terms of the quantity of near-optimal states measured with some metric. If this local smoothness of the function is known then one can design very efficient optimization algorithms (with convergence rate independent of the space dimension), and when it is not known, we can build adaptive techniques that can, in some cases, perform almost as well as when it is known.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
    </subsection>
    <subsection id="uid82" level="1">
      <bodyTitle>Statistical analysis of time series</bodyTitle>
      <subsection id="uid83" level="2">
        <bodyTitle>Change Point Analysis</bodyTitle>
        <p>
          <i>
            <b>Nonparametric multiple change point estimation in highly dependent time series <ref xlink:href="#sequel-2013-bid30" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Given a heterogeneous time-series sample, it is required to find the points in time (called change points) where the probability distribution generating the data has changed. The data is assumed to have been generated by arbitrary, unknown, stationary ergodic distributions. No modeling, independence or mixing are made. A novel, computationally efficient, nonparametric method is proposed, and is shown to be asymptotically consistent in this general framework; the theoretical results are complemented with experimental evaluations.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
      <subsection id="uid84" level="2">
        <bodyTitle>Clustering Time Series, Online and Offline</bodyTitle>
        <p>
          <i>
            <b>A Binary-Classification-Based Metric between Time-Series Distributions and Its Use in Statistical and Learning Problems <ref xlink:href="#sequel-2013-bid31" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>A metric between time-series distributions is proposed that can be evaluated using binary classification methods, which were originally developed to work on i.i.d. data. It is shown how this metric can be used for solving statistical problems that are seemingly unrelated to classification and concern highly dependent time series. Specifically, the problems of time-series clustering, homogeneity testing and the three-sample problem are addressed. Universal consistency of the resulting algorithms is proven under most general assumptions. The theoretical results are illustrated with experiments on synthetic and real-world data.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
      <subsection id="uid85" level="2">
        <bodyTitle>Semi-Supervised and Unsupervised Learning</bodyTitle>
        <p>
          <i>
            <b>Learning from a Single Labeled Face and a Stream of Unlabeled Data <ref xlink:href="#sequel-2013-bid32" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Face recognition from a single image per person is a challenging problem because the training sample is extremely small. We consider a variation of this problem. In our problem, we recognize only one person, and there are no labeled data for any other person. This setting naturally arises in authentication on personal computers and mobile devices, and poses additional challenges because it lacks negative examples. We formalize our problem as one-class classification, and propose and analyze an algorithm that learns a non-parametric model of the face from a single labeled image and a stream of unlabeled data. In many domains, for instance when a person interacts with a computer with a camera, unlabeled data are abundant and easy to utilize. This is the first paper that investigates how these data can help in learning better models in the single-image-per-person setting. Our method is evaluated on a dataset of 43 people and we show that these people can be recognized 90% of time at nearly zero false positives. This recall is 25+% higher than the recall of our best performing baseline. Finally, we conduct a comprehensive sensitivity analysis of our algorithm and provide a guideline for setting its parameters in practice.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Unsupervised model-free representation learning <ref xlink:href="#sequel-2013-bid33" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Numerous control and learning problems face the situation where sequences of high-dimensional highly dependent data are available, but no or little feedback is provided to the learner. In such situations it may be useful to find a concise representation of the input signal, that would preserve as much as possible of the relevant information. In this work we are interested in the problems where the relevant information is in the time-series dependence. Thus, the problem can be formalized as follows. Given a series of observations <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>X</mi><mo>_</mo><mn>0</mn><mo>,</mo><mo>⋯</mo><mo>,</mo><mi>X</mi><mo>_</mo><mi>n</mi></mrow></math></formula> coming from a large (high-dimensional) space <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒳</mi></math></formula>, find a representation function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>f</mi></math></formula> mapping <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒳</mi></math></formula> to a finite space <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒴</mi></math></formula> such that the series <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>f</mi><mo>(</mo><mi>X</mi><mo>_</mo><mn>0</mn><mo>)</mo><mo>,</mo><mo>⋯</mo><mo>,</mo><mi>f</mi><mo>(</mo><mi>X</mi><mo>_</mo><mi>n</mi><mo>)</mo></mrow></math></formula> preserve as much information as possible about the original time-series dependence in <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>X</mi><mo>_</mo><mn>0</mn><mo>,</mo><mo>⋯</mo><mo>,</mo><mi>X</mi><mo>_</mo><mi>n</mi></mrow></math></formula>. For stationary time series, the function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>f</mi></math></formula> can be selected as the one maximizing the time-series information <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>I</mi><mo>_</mo><mi>∞</mi><mo>(</mo><mi>f</mi><mo>)</mo><mo>=</mo><mi>h</mi><mo>_</mo><mn>0</mn><mo>(</mo><mi>f</mi><mo>(</mo><mi>X</mi><mo>)</mo><mo>)</mo><mo>-</mo><mi>h</mi><mo>_</mo><mi>∞</mi><mo>(</mo><mi>f</mi><mo>(</mo><mi>X</mi><mo>)</mo><mo>)</mo></mrow></math></formula> where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>h</mi><mo>_</mo><mn>0</mn><mo>(</mo><mi>f</mi><mo>(</mo><mi>X</mi><mo>)</mo><mo>)</mo></mrow></math></formula> is the Shannon entropy of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>f</mi><mo>(</mo><mi>X</mi><mo>_</mo><mn>0</mn><mo>)</mo></mrow></math></formula> and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>h</mi><mo>_</mo><mi>∞</mi><mo>(</mo><mi>f</mi><mo>(</mo><mi>X</mi><mo>)</mo><mo>)</mo></mrow></math></formula> is the entropy rate of the time series <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>f</mi><mo>(</mo><mi>X</mi><mo>_</mo><mn>0</mn><mo>)</mo><mo>,</mo><mo>⋯</mo><mo>,</mo><mi>f</mi><mo>(</mo><mi>X</mi><mo>_</mo><mi>n</mi><mo>)</mo><mo>,</mo><mo>⋯</mo></mrow></math></formula>. In this paper we study the functional <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>I</mi><mo>_</mo><mi>∞</mi><mo>(</mo><mi>f</mi><mo>)</mo></mrow></math></formula> from the learning-theoretic point of view. Specifically, we provide some uniform approximation results, and study the behaviour of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>I</mi><mo>_</mo><mi>∞</mi><mo>(</mo><mi>f</mi><mo>)</mo></mrow></math></formula> in the problem of optimal control.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Time-series information and learning <ref xlink:href="#sequel-2013-bid34" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Given a time series <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>X</mi><mo>_</mo><mn>1</mn><mo>,</mo><mo>⋯</mo><mo>,</mo><mi>X</mi><mo>_</mo><mi>n</mi><mo>,</mo><mo>⋯</mo></mrow></math></formula> taking values in a large (high-dimensional) space <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒳</mi></math></formula>, we would like to find a function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>f</mi></math></formula> from <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒳</mi></math></formula> to a small (low-dimensional or finite) space <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>𝒴</mi></math></formula> such that the time series <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>f</mi><mo>(</mo><mi>X</mi><mo>_</mo><mn>1</mn><mo>)</mo><mo>,</mo><mo>⋯</mo><mo>,</mo><mi>f</mi><mo>(</mo><mi>X</mi><mo>_</mo><mi>n</mi><mo>)</mo><mo>,</mo><mo>⋯</mo></mrow></math></formula> retains all the information about the time-series dependence in the original sequence, or as much as possible thereof. This goal is formalized in this work, and it is shown that the target function <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>f</mi></math></formula> can be found as the one that maximizes a certain quantity that can be expressed in terms of entropies of the series <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo>(</mo><mi>f</mi><mo>(</mo><mi>X</mi><mo>_</mo><mi>i</mi><mo>)</mo><mo>)</mo><mo>_</mo><mrow><mi>i</mi><mo>∈</mo><mi>𝒩</mi></mrow></mrow></math></formula>. This quantity can be estimated empirically, and does not involve estimating the distribution on the original time series <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo>(</mo><mi>X</mi><mo>_</mo><mi>i</mi><mo>)</mo><mo>_</mo><mrow><mi>i</mi><mo>∈</mo><mi>𝒩</mi></mrow></mrow></math></formula>.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
    </subsection>
    <subsection id="uid86" level="1">
      <bodyTitle>Statistical Learning and Bayesian Analysis</bodyTitle>
      <subsection id="uid87" level="2">
        <bodyTitle>Dictionary learning</bodyTitle>
        <p>
          <i>
            <b>Learning a common dictionary over a sensor network <ref xlink:href="#sequel-2013-bid35" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider the problem of distributed dictionary learning, where a set of nodes is required to collectively learn a common dictionary from noisy measurements. This approach may be useful in several contexts including sensor networks. Diffusion cooperation schemes have been proposed to solve the distributed linear regression problem. In this work we focus on a diffusion-based adaptive dictionary learning strategy: each node records independent observations and cooperates with its neighbors by sharing its local dictionary. The resulting algorithm corresponds to a distributed alternate optimization. Beyond dictionary learning, this strategy could be adapted to many matrix factorization problems in various settings. We illustrate its efficiency on some numerical experiments.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Distributed dictionary learning over a sensor network <ref xlink:href="#sequel-2013-bid36" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We consider the problem of distributed dictionary learning, where a set of nodes is required to collec- tively learn a common dictionary from noisy measure- ments. This approach may be useful in several con- texts including sensor networks. Diffusion cooperation schemes have been proposed to solve the distributed linear regression problem. In this work we focus on a diffusion-based adaptive dictionary learning strategy: each node records observations and cooperates with its neighbors by sharing its local dictionary. The resulting algorithm corresponds to a distributed block coordi- nate descent (alternate optimization). Beyond dictio- nary learning, this strategy could be adapted to many matrix factorization problems and generalized to var- ious settings. This article presents our approach and illustrates its efficiency on some numerical examples.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
    </subsection>
    <subsection id="uid88" level="1">
      <bodyTitle>Applications</bodyTitle>
      <subsection id="uid89" level="2">
        <bodyTitle>Medical Applications</bodyTitle>
        <p>
          <i>
            <b>Outlier detection for patient monitoring and alerting <ref xlink:href="#sequel-2013-bid37" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We develop and evaluate a data-driven approach for detecting unusual (anomalous) patient-management decisions using past patient cases stored in electronic health records (EHRs). Our hypothesis is that a patient-management decision that is unusual with respect to past patient care may be due to an error and that it is worthwhile to generate an alert if such a decision is encountered. We evaluate this hypothesis using data obtained from EHRs of 4486 post-cardiac surgical patients and a subset of 222 alerts generated from the data. We base the evaluation on the opinions of a panel of experts. The results of the study support our hypothesis that the outlier-based alerting can lead to promising true alert rates. We observed true alert rates that ranged from 25% to 66% for a variety of patient-management actions, with 66% corresponding to the strongest outliers.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
    </subsection>
    <subsection id="uid90" level="1">
      <bodyTitle>Miscellaneous</bodyTitle>
      <subsection id="uid91" level="2">
        <bodyTitle>Miscellaneous</bodyTitle>
        <p>
          <i>
            <b>A confidence-set approach to signal denoising <ref xlink:href="#sequel-2013-bid38" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>The problem of filtering of finite-alphabet stationary ergodic time series is considered. A method for constructing a confidence set for the (unknown) signal is proposed, such that the resulting set has the following properties. First, it includes the unknown signal with probability γ, where γ is a parameter supplied to the filter. Second, the size of the confidence sets grows exponentially with a rate that is asymptotically equal to the conditional entropy of the signal given the data. Moreover, it is shown that this rate is optimal. We also show that the described construction of the confidence set can be applied to the case where the signal is corrupted by an erasure channel with unknown statistics.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Quantification adaptative pour la stéganalyse d'images texturées <ref xlink:href="#sequel-2013-bid39" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Nous cherchons à améliorer les performances d'un schéma de stéganalyse (i.e. la détection de messages cachées) pour des images texturées. Le schéma de stéganographie étudié consiste à modifier certains pixels de l'image par une perturbation +/-1, et le schéma de stéganalyse utilise les caractéristiques construites à partir de la probabilité conditionnelle empirique de différences de 4 pixels voisins. Dans sa version originale, la stéganalyse n'est pas trés efficace sur des images texturées et ce travail vise àâ€ explorer plusieurs techniques de quantification en utilisant d'abord un pas de quantification plus important puis une quantification adaptative scalaire ou vectorielle. Les cellules de la quantification adaptative sont générées en utilisant un K-means ou un K-means ”équilibré” de manière à ce chaque cellule quantifie approximativement le même nombre d'échantillon. Nous obtenons un gain maximal de classification de 3% pour un pas de quantification uniforme de 3. En utilisant l'algorithme K-means équilibré sur [-18,18], le gain par rapport à la version de base est de 4.7%.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Cost-sensitive Multiclass Classification Risk Bounds <ref xlink:href="#sequel-2013-bid40" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>A commonly used approach to multiclass classification is to replace the 0-1 loss with a convex surrogate so as to make empirical risk minimization computationally tractable. Previous work has uncovered sufficient and necessary conditions for the consistency of the resulting procedures. In this paper, we strengthen these results by showing how the 0-1 excess loss of a predictor can be upper bounded as a function of the excess loss of the predictor measured using the convex surrogate. The bound is developed for the case of cost-sensitive multiclass classification and a convex surrogate loss that goes back to the work of Lee, Lin and Wahba. The bounds are as easy to calculate as in binary classification. Furthermore, we also show that our analysis extends to the analysis of the recently introduced "Simplex Coding" scheme.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Approximate Dynamic Programming Finally Performs Well in the Game of Tetris <ref xlink:href="#sequel-2013-bid41" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Tetris is a video game that has been widely used as a benchmark for various optimization techniques including approximate dynamic programming (ADP) algorithms. A look at the literature of this game shows that while ADP algorithms that have been (almost) entirely based on approximating the value function (value function based) have performed poorly in Tetris, the methods that search directly in the space of policies by learning the policy parameters using an optimization black box, such as the cross entropy (CE) method, have achieved the best reported results. This makes us conjecture that Tetris is a game in which good policies are easier to represent, and thus, learn than their corresponding value functions. So, in order to obtain a good performance with ADP, we should use ADP algorithms that search in a policy space, instead of the more traditional ones that search in a value function space. In this paper, we put our conjecture to test by applying such an ADP algorithm, called classification-based modified policy iteration (CBMPI), to the game of Tetris. Our experimental results show that for the first time an ADP algorithm, namely CBMPI, obtains the best results reported in the literature for Tetris in both small <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>10</mn><mo>×</mo><mn>10</mn></mrow></math></formula> and large <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>10</mn><mo>×</mo><mn>20</mn></mrow></math></formula> boards. Although the CBMPI's results are similar to those of the CE method in the large board, CBMPI uses considerably fewer (almost 1/6) samples (calls to the generative model) than CE.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>A Generalized Kernel Approach to Structured Output Learning <ref xlink:href="#sequel-2013-bid42" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>We study the problem of structured output learning from a regression perspective. We first provide a general formulation of the kernel dependency estimation (KDE) problem using operator-valued kernels. We show that some of the existing formulations of this problem are special cases of our framework. We then propose a covariance-based operator-valued kernel that allows us to take into account the structure of the kernel feature space. This kernel operates on the output space and encodes the interactions between the outputs without any reference to the input space. To address this issue, we introduce a variant of our KDE method based on the conditional covariance operator that in addition to the correlation between the outputs takes into account the effects of the input variables. Finally, we evaluate the performance of our KDE approach using both covariance and conditional covariance kernels on two structured output problems, and compare it to the state-of-the-art kernel-based structured output regression methods.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Gossip-based distributed stochastic bandit algorithms <ref xlink:href="#sequel-2013-bid43" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>The multi-armed bandit problem has attracted remarkable attention in the machine learning community and many efficient algorithms have been proposed to handle the so-called exploitation-exploration dilemma in various bandit setups. At the same time, significantly less effort has been devoted to adapting bandit algorithms to particular architectures, such as sensor networks, multi-core machines, or peer-to-peer (P2P) environments, which could potentially speed up their convergence. Our goal is to adapt stochastic bandit algorithms to P2P networks. In our setup, the same set of arms is available in each peer. In every iteration each peer can pull one arm independently of the other peers, and then some limited communication is possible with a few random other peers. As our main result, we show that our adaptation achieves a linear speedup in terms of the number of peers participating in the network. More precisely, we show that the probability of playing a suboptimal arm at a peer in iteration t=<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>Ω</mi></math></formula>(logN) is proportional to 1/(Nt) where N denotes the number of peers. The theoretical results are supported by simulation experiments showing that our algorithm scales gracefully with the size of network.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Sur quelques problèmes non-supervisés impliquant des séries temporelles hautement dèpendantes <ref xlink:href="#sequel-2013-bid44" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Cette thèse est consacrée à l'analyse théorique de problèmes non supervisés impliquant des séries temporelles hautement dépendantes. Plus particulièrement, nous abordons les deux problèmes fondamentaux que sont le problème d'estimation des points de rupture et le partitionnement de séries temporelles. Ces problèmes sont abordés dans un cadre extrêmement général oùles données sont générées par des processus stochastiques ergodiques stationnaires. Il s'agit de l'une des hypothèses les plus faibles en statistiques, comprenant non seulement, les hypothèses de modèles et les hypothèses paramétriques habituelles dans la littérature scientifique, mais aussi des hypothèses classiques d'indépendance, de contraintes sur l'espace mémoire ou encore des hypothèses de mélange. En particulier, aucune restriction n'est faite sur la forme ou la nature des dépendances, de telles sortes que les échantillons peuvent être arbitrairement dépendants. Pour chaque problème abordé, nous proposons de nouvelles méthodes non paramétriques et nous prouvons de plus qu'elles sont, dans ce cadre, asymptotiquement consistantes. Pour l'estimation de points de rupture, la consistance asymptotique se rapporte à la capacité de l'algorithme à produire des estimations des points de rupture qui sont asymptotiquement arbitrairement proches des vrais points de rupture. D'autre part, un algorithme de partitionnement est asymptotiquement consistant si le partitionnement qu'il produit, restreint à chaque lot de séquences, coïncides, à partir d'un certain temps et de manière consistante, avec le partitionnement cible. Nous montrons que les algorithmes proposés sont implémentables efficacement, et nous accompagnons nos résultats théoriques par des évaluations expérimentales. L'analyse statistique dans le cadre stationnaire ergodique est extrêmement difficile. De manière générale, il est prouvé que les vitesses de convergence sont impossibles à obtenir. Dès lors, pour deux échantillons générés indépendamment par des processus ergodiques stationnaires, il est prouvé qu'il est impossible de distinguer le cas où les échantillons sont générés par le même processus de celui où ils sont générés par des processus différents. Ceci implique que des problèmes tels le partitionnement de séries temporelles sans la connaissance du nombre de partitions ou du nombre de points de rupture ne peut admettre de solutions consistantes. En conséquence, une tâche difficile est de découvrir les formulations du problème qui en permettent une résolution dans ce cadre général. La principale contribution de cette thèse est de démontrer (par construction) que malgré ces résultats d'impossibilités théoriques, des formulations naturelles des problèmes considérés existent et admettent des solutions consistantes dans ce cadre général. Ceci inclut la démonstration du fait que le nombre de points de rupture corrects peut être trouvé, sans recourir à des hypothèses plus fortes sur les processus stochastiques. Il en résulte que, dans cette formulation, le problème des points de rupture peut être réduit à du partitionnement de séries temporelles. Les résultats présentés dans ce travail formulent les fondations théoriques pour l'analyse des données séquentielles dans un espace d'applications bien plus large.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Actor-Critic Algorithms for Risk-Sensitive MDPs <ref xlink:href="#sequel-2013-bid45" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>In many sequential decision-making problems we may want to manage risk by minimizing some measure of variability in rewards in addition to maximizing a standard criterion. Variance-related risk measures are among the most common risk-sensitive criteria in finance and operations research. However, optimizing many such criteria is known to be a hard problem. In this paper, we consider both discounted and average reward Markov decision processes. For each formulation, we first define a measure of variability for a policy, which in turn gives us a set of risk-sensitive criteria to optimize. For each of these criteria, we derive a formula for computing its gradient. We then devise actor-critic algorithms for estimating the gradient and updating the policy parameters in the ascent direction. We establish the convergence of our algorithms to locally risk-sensitive optimal policies. Finally, we demonstrate the usefulness of our algorithms in a traffic signal control application.</p>
        <p spacebefore="7.22743pt"/>
        <p>
          <i>
            <b>Bayesian Policy Gradient and Actor-Critic Algorithms <ref xlink:href="#sequel-2013-bid46" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></b>
          </i>
          <i/>
        </p>
        <p>Policy gradient methods are reinforcement learning algorithms that adapt a parameterized policy by following a performance gradient estimate. Many conventional policy gradient methods use Monte-Carlo techniques to estimate this gradient. The policy is improved by adjusting the parameters in the direction of the gradient estimate. Since Monte-Carlo methods tend to have high variance, a large number of samples is required to attain accurate estimates, resulting in slow convergence. In this paper, we first propose a Bayesian framework for policy gradient, based on modeling the policy gradient as a Gaussian process. This reduces the number of samples needed to obtain accurate gradient estimates. Moreover, estimates of the natural gradient as well as a measure of the uncertainty in the gradient estimates, namely, the gradient covariance, are provided at little extra cost. Since the proposed Bayesian framework considers system trajectories as its basic observable unit, it does not require the dynamic within each trajectory to be of any special form, and thus, can be easily extended to partially observable problems. On the downside, it cannot take advantage of the Markov property when the system is Markovian. To address this issue, we then extend our Bayesian policy gradient framework to actor-critic algorithms and present a new actor-critic learning model in which a Bayesian class of non-parametric critics, based on Gaussian process temporal difference learning, is used. Such critics model the action-value function as a Gaussian process, allowing Bayes' rule to be used in computing the posterior distribution over action-value functions, conditioned on the observed data. Appropriate choices of the policy parameterization and of the prior covariance (kernel) between action-values allow us to obtain closed-form expressions for the posterior distribution of the gradient of the expected return with respect to the policy parameters. We perform detailed experimental comparisons of the proposed Bayesian policy gradient and actor-critic algorithms with classic Monte-Carlo based policy gradient methods, as well as with each other, on a number of reinforcement learning problems.</p>
        <p spacebefore="7.22743pt"/>
      </subsection>
    </subsection>
  </resultats>
  <contrats id="uid92">
    <bodyTitle>Bilateral Contracts and Grants with Industry</bodyTitle>
    <subsection id="uid93" level="1">
      <bodyTitle>Bilateral Contracts with Industry</bodyTitle>
      <simplelist>
        <li id="uid94">
          <p noindent="true"><ref xlink:href="http://www.deezer.com" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Deezer</ref>, 2013-2014</p>
          <participants>
            <person key="sequel-2006-id18261">
              <firstname>Jérémie</firstname>
              <lastname>Mary</lastname>
            </person>
            <person key="sequel-2006-id18078">
              <firstname>Philippe</firstname>
              <lastname>Preux</lastname>
            </person>
            <person key="tao-2006-id18354">
              <firstname>Romaric</firstname>
              <lastname>Gaudel</lastname>
            </person>
          </participants>
          <p>A research project has started on June 2013 in collaboration with the
Deezer company. The goal is to build a system which automatically
recommends music to users. That goal is an extension of the bandit
setting to the Collaborative Filtering problem.</p>
        </li>
        <li id="uid95">
          <p noindent="true"><ref xlink:href="http://www.nuukik.com" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Nuukik</ref>, 2013-2014</p>
          <participants>
            <person key="sequel-2006-id18261">
              <firstname>Jérémie</firstname>
              <lastname>Mary</lastname>
            </person>
          </participants>
          <p>Nuukik is a start-up from Hub Innovation in Lille. It proposes a recommender systems for e-commerce based on matrix factorization. We worked with them specifically on the cold start problem (<i>i.e</i> when you have absolutely no data on a product or a customer). This led to promising result and allowed us to close the gap between bandits and matrix factorization. This work led to a patent submission in december 2013.</p>
        </li>
        <li id="uid96">
          <p noindent="true"><ref xlink:href="http://www.tbsfrance.com/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">TBS</ref>, 2012-2013</p>
          <participants>
            <person key="sequel-2006-id18261">
              <firstname>Jérémie</firstname>
              <lastname>Mary</lastname>
            </person>
            <person key="sequel-2006-id18078">
              <firstname>Philippe</firstname>
              <lastname>Preux</lastname>
            </person>
          </participants>
          <p>A research project has started in September 2012 in collaboration with
the TBS company. The goal is to understand and predict the audience of
news related websites. These websites tend to present an ergodic
frequentation with respect to a context. The main goal is to separate
the effect of the context (big events, elections, ...) and the
impact of the policies of the news websites. This work is based on
data originating from major French media websites and also involves
research of tendencies on the web (as Google Trends and Google Flu
do). Used algorithms mix methods from time series prediction (ARIMA
and MARSS models) and machine learning methods (L1 penalization, SVM).</p>
        </li>
        <li id="uid97">
          <p noindent="true"><ref xlink:href="http://www.squoring.com/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Squoring Technologies</ref>, 2011-2014</p>
          <participants>
            <person key="sequel-2013-idp140217274940208">
              <firstname>Boris</firstname>
              <lastname>Baldassari</lastname>
            </person>
            <person key="sequel-2006-id18078">
              <firstname>Philippe</firstname>
              <lastname>Preux</lastname>
            </person>
          </participants>
          <p>Boris Baldassari has been hired by Squoring Technologies (Toulouse) as a PhD student in May 2011. He works on the use of machine learning to improve the quality of the software development process. During his first year as a PhD student, Boris investigated the existing norms and measures of quality of software development process. He also dedicated some time to gather some relevant datasets, which are made of either the sequence of source code releases over a multi-years period, or all the versions stored on an svn repository (svn or alike). Information from mailing-lists (bugs, support, ...) may also be part of these datasets. Tools in machine learning capable of dealing with this sort of data have also been investigated. Goals that may be reached in this endeavor have also been precised.</p>
        </li>
        <li id="uid98">
          <p noindent="true"><ref xlink:href="http://www.intel.com/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">INTEL Corp.</ref>, 2013 - 2014</p>
          <participants>
            <person key="sequel-2006-id18078">
              <firstname>Philippe</firstname>
              <lastname>Preux</lastname>
            </person>
            <person key="sequel-2012-idp140352027421952">
              <firstname>Michal</firstname>
              <lastname>Valko</lastname>
            </person>
            <person key="sequel-2006-id18109">
              <firstname>Rémi</firstname>
              <lastname>Munos</lastname>
            </person>
            <person key="sequel-2013-idp140217274947264">
              <firstname>Adrien</firstname>
              <lastname>Hoarau</lastname>
            </person>
          </participants>
          <p>This is a research project on Algorithmic Determination of IoT Edge Analytics
Requirements. We are attempting to solve the problem of how to automatically
predict the system requirements for edge node analytics in the Internet of
Things (IoT).
We envision that a flexible extensible system of edge analytics can be
created for IoT management; however, edge nodes can be very different in
terms of the systems requirements around: processing capability, wireless
communication, security/cryptography, guaranteed responsiveness,
guaranteed quality of service and on-board memory requirements. One
of the challenges of managing a heterogeneous Internet of Things is
determining the systems requirements at each edge node in the network.</p>
          <p>We suggest exploiting opportunity of being able to automatically
customize large scale IoT systems that could comprise heterogeneous
edge nodes and allow a flexible and scalable component and firmware
SoC systems to be matched to the individual need of enterprise/
government level IoT customers. We propose using large scale
sequential decision learning algorithms, particularly contextual bandit
modeling to automatically determine the systems requirements for edge
analytics. These algorithms have an adaptive property that allows for
the addition of new nodes and the re-evaluation of existing nodes under
dynamic and potentially adversarial conditions.</p>
        </li>
      </simplelist>
    </subsection>
  </contrats>
  <partenariat id="uid99">
    <bodyTitle>Partnerships and Cooperations</bodyTitle>
    <subsection id="uid100" level="1">
      <bodyTitle>National Initiatives</bodyTitle>
      <subsection id="uid101" level="2">
        <bodyTitle>ANR-Lampada</bodyTitle>
        <participants>
          <person key="sequel-2008-id18222">
            <firstname>Mohammad</firstname>
            <lastname>Ghavamzadeh</lastname>
          </person>
          <person key="sequel-2006-id18261">
            <firstname>Jérémie</firstname>
            <lastname>Mary</lastname>
          </person>
          <person key="sequel-2010-id60072">
            <firstname>Olivier</firstname>
            <lastname>Nicol</lastname>
          </person>
          <person key="sequel-2006-id18078">
            <firstname>Philippe</firstname>
            <lastname>Preux</lastname>
          </person>
          <person key="sequel-2007-id18237">
            <firstname>Daniil</firstname>
            <lastname>Ryabko</lastname>
          </person>
        </participants>
        <simplelist>
          <li id="uid102">
            <p noindent="true"><i>Title</i>: Learning Algorithms, Models and sPArse
representations for structured DAta</p>
          </li>
          <li id="uid103">
            <p noindent="true"><i>Type</i>: National Research Agency (ANR-09-EMER-007)</p>
          </li>
          <li id="uid104">
            <p noindent="true"><i>Coordinator</i>: Inria Lille – Nord Europe (Mostrare)</p>
          </li>
          <li id="uid105">
            <p noindent="true"><i>Others partners</i>: Laboratoire d'Informatique
Fondamentale de Marseille; Laboratoire Hubert Curien à Saint
Etienne; Laboratoire d'Informatique de Paris 6.</p>
          </li>
          <li id="uid106">
            <p noindent="true"><i>Web site</i>: <ref xlink:href="http://lampada.gforge.inria.fr/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>lampada.<allowbreak/>gforge.<allowbreak/>inria.<allowbreak/>fr/</ref></p>
          </li>
          <li id="uid107">
            <p noindent="true"><i>Duration</i>: ends mid-2014</p>
          </li>
          <li id="uid108">
            <p noindent="true"><i>Abstract</i>: Lampada is a fundamental research project
on machine learning and structured data. Lampada focuses on
scaling learning algorithms to handle large sets of complex
data. The main challenges are 1) high dimension learning problems,
2) large sets of data and 3) dynamics of data. We consider
evolving data. The representation of these data involves both
structure and content information and are typically large
sequences, trees and graphs. The main application domains are
web2, social networks and biological data.</p>
            <p>The project proposes to study formal representations of such data
together with incremental or sequential machine learning methods
and similarity learning methods.</p>
            <p>The representation research topic includes condensed data
representation, sampling, prototype selection and representation
of streams of data. Machine learning methods include edit distance
learning, reinforcement learning and incremental methods, density
estimation of structured data and learning on streams.</p>
          </li>
          <li id="uid109">
            <p noindent="true"><i>Activity Report</i>:</p>
            <p>Philippe Preux has collaborated with Ludovic Denoyer and Gabriel
Dulac-Arnold from LIP'6 to investigate further the idea of
datum-wise representation, introduced in 2011.</p>
            <p>Mohammad Ghavamzadeh and Philippe Preux have collaborated with
Hachem Kadri on an operator-based approach for structured output
<ref xlink:href="#sequel-2013-bid42" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
            <p>Daniil Ryabko has developed a theory for unsupervised learning of
time-series dependence, where the time series are either coming
from a stationary environment or are a result of interaction with
a Markovian environment with a continuous state space. Danil
Ryabko and Jeremie Mary have developed methods for using binary
classification methods for solving various unsupervised learning
problems about time series.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid110" level="2">
        <bodyTitle>ANR CO-ADAPT</bodyTitle>
        <participants>
          <person key="sequel-2006-id18109">
            <firstname>Rémi</firstname>
            <lastname>Munos</lastname>
          </person>
        </participants>
        <simplelist>
          <li id="uid111">
            <p noindent="true"><i>Title</i>: Brain computer co-adaptation for better interfaces</p>
          </li>
          <li id="uid112">
            <p noindent="true"><i>Type</i>: National Research Agency (ANR-09-EMER-002)</p>
          </li>
          <li id="uid113">
            <p noindent="true"><i>Coordinator</i>: Maureen Clerc</p>
          </li>
          <li id="uid114">
            <p noindent="true"><i>Other Partners</i>: Inria Odyssee project (Maureen
Clerc), the INSERM U821 team (Olivier Bertrand), the Laboratory of
Neurobiology of Cognition (CNRS) (Boris Burle) and the laboratory
of Analysis, topology and probabilities (CNRS and University of
Provence) (Bruno Torresani).</p>
          </li>
          <li id="uid115">
            <p noindent="true"><i>Web site</i>:
<ref xlink:href="https://twiki-sop.inria.fr/twiki/bin/view/Projets/Athena/CoAdapt/WebHome" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>twiki-sop.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>twiki/<allowbreak/>bin/<allowbreak/>view/<allowbreak/>Projets/<allowbreak/>Athena/<allowbreak/>CoAdapt/<allowbreak/>WebHome</ref></p>
          </li>
          <li id="uid116">
            <p noindent="true"><i>Duration</i>: 2009-2014</p>
          </li>
          <li id="uid117">
            <p noindent="true"><i>Abstract</i>: The aim of Co-Adapt is to propose new
directions for BCI design, by modeling explicitly the
co-adaptation taking place between the user and the system. The
goal of CoAdapt is to study the co-adaptation between a user and a
BCI system in the course of training and operation. The quality of
the interface will be judged according to several criteria
(reliability, learning curve, error correction, bit rate). BCI
will be considered under a joint perspective: the user's and the
system's. From the user's brain activity, features must be
extracted, and translated into commands to drive the BCI
system. From the point of view of the system, it is important to
devise adaptive learning strategies, because the brain activity is
not stable in time. How to adapt the features in the course of BCI
operation is a difficult and important topic of research. We will
investigate Reinforcement Learning (RL) techniques to address the
above questions.</p>
          </li>
          <li id="uid118">
            <p noindent="true"><i>Activity Report</i>:
The performances of a BCI can vary greatly across users but also depend on the tasks used, making the problem of appropriate task selection an important issue. We develop an adaptive algorithm, UCB-classif, based on the stochastic bandit theory. This shortens the training stage, thereby allowing the exploration of a greater variety of tasks. By not wasting time on inefficient tasks, and focusing on the most promising ones, this algorithm results in a faster task selection and a more efficient use of the BCI training session. See <ref xlink:href="#sequel-2013-bid21" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> and <ref xlink:href="https://twiki-sop.inria.fr/twiki/bin/view/Projets/Athena/CoAdapt/WebHome" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>twiki-sop.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>twiki/<allowbreak/>bin/<allowbreak/>view/<allowbreak/>Projets/<allowbreak/>Athena/<allowbreak/>CoAdapt/<allowbreak/>WebHome</ref></p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid119" level="2">
        <bodyTitle>ANR AMATIS</bodyTitle>
        <participants>
          <person key="sequel-2013-idp140217274917456">
            <firstname>Pierre</firstname>
            <lastname>Chainais</lastname>
          </person>
        </participants>
        <simplelist>
          <li id="uid120">
            <p noindent="true"><i>Title</i>: Multifractal Analysis and Applications to
Signal and Image Processing</p>
          </li>
          <li id="uid121">
            <p noindent="true"><i>Type</i>: National Research Agency</p>
          </li>
          <li id="uid122">
            <p noindent="true"><i>Coordinator</i>: Univ. Paris-Est-Créteil (S. Jaffard)</p>
          </li>
          <li id="uid123">
            <p noindent="true"><i>Duration</i>: 2011-2015</p>
          </li>
          <li id="uid124">
            <p noindent="true"><i>Other Partners</i>: Univ. Paris-Est Créteil,
Univ. Sciences et Technologies de Lille and Inria (Lille), ENST
(Telechom ParisTech), Univ. Blaise Pascal (Clermont-Ferrand), and
Univ. Bretagne Sud (Vannes), Statistical Signal Processing group
at the Physics Department at the Ecole Normale Supérieure de
Lyon, one researcher from the Math. Department of Institut
National des Sciences Appliquees de Lyon and two researchers from
the Laboratoire d'Analyse, Topologie et Probabilités (LAPT) of
Aix-Marseille University.</p>
          </li>
          <li id="uid125">
            <p noindent="true"><i>Abstract</i>: Multifractal analysis refers to two
concepts of different natures: On the theoretical side, it
corresponds to pointwise singularity characterization and
fractional dimension determination ; on the applied side, it is
associated with scale invariance characterization, involving a
family of parameters, the scaling function, used in
classification or model selection. Following the seminal ideas of
Parisi and Frisch in the mid-80s, these two components are usually
related by a Legendre transform, stemming from a heuristic
argument relying on large deviation and statistical thermodynamics
principles: The multifractal formalism. This led to new
theoretical approaches for the study of singularities of
functions and measures, as well as efficient tools for
classification and models selection, that allowed to settle
longstanding issues (<i>e.g.</i>, concerning the modeling of fully
developed turbulence). Though this formalism has been shown to
hold for large classes of functions of widely different origins,
the generality of its level of validity remains an open
issue. Despite its popularity in applications, the interactions
between theoretical developments and applications are
unsatisfactory. Its use in image processing for instance is still
in its infancy. This is partly due to discrepancy between the
theoretical contributions mostly grounded in functional analysis
and geometric measure theory, and applications naturally implying
a stochastic or statistical framework. The AMATIS project aims at
addressing these issues, by proposing a consistent and documented
framework combining different theoretical approaches and bridging
the gap towards applications. To that end, it will both address a
number of challenging theoretical issues and devote significant
efforts to elaborating a WEB platform with softwares and
documentation. It will combine the efforts of mathematicians with
those of physicists and experts in signal and image
processing. Dissemination among and interactions between
scientific fields are also intended via the organization of summer
schools and workshop.</p>
          </li>
          <li id="uid126">
            <p noindent="true"><i>Activity Report</i>: a collaboration with P. Bas (CR
CNRS, LAGIS) deals with the steganalysis of textured
images. While steganography aims at hiding a message within some
support, <i>e.g.</i> a numerical image, steganalysis aims at detecting
the presence or not of any hidden message in the
support. Steganalysis involves two main tasks: first identify
relevant features which may be sensitive to the presence of a
hidden message, then use supervised classification to build a
detector. While the steganalysis of usual images has been well
studied, the case of textured images, for which multifractal
models may be relevant, is much more difficult. Indeed, textured
images have a rich and disordered content which favors hiding
information in an unperceptible manner. A student internship of 8
months at Master level in 2012 has led us to consider a very fundamental question.
Steganalysis is usually proceeded to a classification based on histograms of
features (bag of words). We consider the problem of the optimization
of the bins of such histograms with respect to the performance of the classifier.
We have shown that a balanced version of K-means which fills each cell equally
yields an efficient quantization to this respect <ref xlink:href="#sequel-2013-bid39" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid127" level="2">
        <bodyTitle>National Partners</bodyTitle>
        <simplelist>
          <li id="uid128">
            <p noindent="true">Laboratoire de Mathématiques d'Orsay, France.</p>
            <simplelist>
              <li id="uid129">
                <p noindent="true">Mylène Maïda <i>Collaborator</i></p>
                <p>Ph. Preux has collaborated with M. Maïda and co-advised a student of the École Centrale de Lille. The motivation of this collaboration is the study of random matrices and the potential use of this theory in machine learning.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid130">
            <p noindent="true">LIF - CMI - Université de Provence.</p>
            <simplelist>
              <li id="uid131">
                <p noindent="true">Julien Audiffren <i>Collaborator</i></p>
                <p noindent="true">M. Valko, A. Lazaric, and M. Ghavamzadeh work with Julien
on Semi-Supervised Apprenticeship Learning. We have recently developed a maximum
entropy algorithm that outperforms the approach without unlabeled data.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid132">
            <p noindent="true">Laboratoire Lagrange, Université de Nice, France.</p>
            <simplelist>
              <li id="uid133">
                <p noindent="true">Cédric Richard <i>Collaborator</i></p>
                <p noindent="true">We have had collaboration on the topic of <i>dictionary learning over a sensor network</i>. We have published 2 conference papers <ref xlink:href="#sequel-2013-bid36" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> and <ref xlink:href="#sequel-2013-bid35" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid134">
            <p noindent="true">Laboratoire de Mécanique de Lille, Université de Lille 1, France.</p>
            <simplelist>
              <li id="uid135">
                <p noindent="true">Jean-Philippe Laval <i>Collaborator</i></p>
                <p noindent="true">We co-supervise a starting PhD student (Linh Van Nguyen) on the topic of <i>high resolution field reconstruction from low resolution measurements in turbulent flows</i>.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid136">
            <p noindent="true">Biophotonics team at the Interdisciplinary Research Institute (IRI), Villeneuve d'Ascq, France.</p>
            <simplelist>
              <li id="uid137">
                <p noindent="true">Aymeric Leray <i>Collaborator</i></p>
                <p noindent="true">We have co-supervised an intern student (Pierre Pfennig, 2 months) on the topic of <i>quantitative guarantees of a super resolution method via concentration inequalities</i>. A paper is submitted to ICASSP 2014.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid138">
            <p noindent="true">LAGIS, Ecole Centrale Lille - Université de Lille 1, France.</p>
            <simplelist>
              <li id="uid139">
                <p noindent="true">Patrick Bas <i>Collaborator</i></p>
                <p noindent="true">We have a collaboration on the topic of <i>adaptive quantization to optimize classification from histrograms of features with an applicaiton to the steganalysis of textured images</i>.</p>
              </li>
            </simplelist>
          </li>
        </simplelist>
      </subsection>
    </subsection>
    <subsection id="uid140" level="1">
      <bodyTitle>European Initiatives</bodyTitle>
      <subsection id="uid141" level="2">
        <bodyTitle>FP7 Projects</bodyTitle>
        <subsection id="uid142" level="3">
          <bodyTitle>CompLACS</bodyTitle>
          <sanspuceslist>
            <li id="uid143">
              <p noindent="true">Type: COOPERATION</p>
            </li>
            <li id="uid144">
              <p noindent="true">Defi: Composing Learning for Artificial Cognitive Systems</p>
            </li>
            <li id="uid145">
              <p noindent="true">Instrument: Specific Targeted Research Project</p>
            </li>
            <li id="uid146">
              <p noindent="true">Objectif: Cognitive Systems and Robotics</p>
            </li>
            <li id="uid147">
              <p noindent="true">Duration: March 2011 - February 2015</p>
            </li>
            <li id="uid148">
              <p noindent="true">Coordinator: University College London</p>
            </li>
            <li id="uid149">
              <p noindent="true">Partner:</p>
              <simplelist>
                <li id="uid150">
                  <p noindent="true">Centre for Computational Statistics and Machine Learning, University College London (United Kingdom)</p>
                </li>
                <li id="uid151">
                  <p noindent="true">Department of Computer Science, University of Bristol (United Kingdom)</p>
                </li>
                <li id="uid152">
                  <p noindent="true">Department of Computer Science, Royal Holloway, University of London (United Kingdom)</p>
                </li>
                <li id="uid153">
                  <p noindent="true">SNN Machine Learning, Radboud Universiteit Nijmegen (The Netherlands)</p>
                </li>
                <li id="uid154">
                  <p noindent="true">Institut für Softwaretechnik und Theoretische Informatik, TU Berlin (Germany)</p>
                </li>
                <li id="uid155">
                  <p noindent="true">University of Leoben (Austria)</p>
                </li>
                <li id="uid156">
                  <p noindent="true">Computer Science Department, Technische Universitaet Darmstadt (Germany)</p>
                </li>
              </simplelist>
            </li>
            <li id="uid157">
              <p noindent="true">Inria contact: Rémi MUNOS</p>
            </li>
            <li id="uid158">
              <p noindent="true">Website: <ref xlink:href="http://www.complacs.org/pmwiki.php/CompLACS/Organization" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">COMPLACS</ref></p>
            </li>
            <li id="uid159">
              <p noindent="true">Abstract: One of the aspirations of machine learning is to develop intelligent systems that can address a wide variety of control problems of many different types. However, although the community has developed successful technologies for many individual problems, these technologies have not previously been integrated into a unified framework. As a result, the technology used to specify, solve and analyse one control problem typically cannot be reused on a different problem. The community has fragmented into a diverse set of specialists with particular solutions to particular problems. The purpose of this project is to develop a unified toolkit for intelligent control in many different problem areas. This toolkit will incorporate many of the most successful approaches to a variety of important control problems within a single framework, including bandit problems, Markov Decision Processes (MDPs), Partially Observable MDPs (POMDPs), continuous stochastic control, and multi-agent systems. In addition, the
toolkit will provide methods for the automatic construction of representations and capabilities, which can then be applied to any of these problem types. Finally, the toolkit will provide a generic interface to specifying problems and analysing performance, by mapping intuitive, human-understandable goals into machine-understandable objectives, and by mapping algorithm performance and regret back into human-understandable terms.</p>
            </li>
          </sanspuceslist>
        </subsection>
      </subsection>
      <subsection id="uid160" level="2">
        <bodyTitle>Collaborations with Major European Organizations</bodyTitle>
        <sanspuceslist>
          <li id="uid161">
            <p noindent="true">Alexandra Carpentier: University of Cambridge (UK).</p>
          </li>
          <li id="uid162">
            <p noindent="true">Michal Valko collaborates with Alexandra on extreme event detection
(such as network intrusion) with limited allocation capabilities.</p>
          </li>
        </sanspuceslist>
        <sanspuceslist>
          <li id="uid163">
            <p noindent="true">Prof. Marcello Restelli and Prof. Nicola Gatti: Politecnico di Milano (Italy).</p>
          </li>
          <li id="uid164">
            <p noindent="true">A. Lazaric continued his collaboration on transfer in reinforcement learning which is leading to an extended version of the last year work on transfer of samples in MDPs. Furthermore, we are going to submit an extended version of an application of multi-arm bandit in a strategic environment such as sponsored search auctions.</p>
          </li>
        </sanspuceslist>
      </subsection>
    </subsection>
    <subsection id="uid165" level="1">
      <bodyTitle>International Initiatives</bodyTitle>
      <subsection id="uid166" level="2">
        <bodyTitle>Inria Associate Teams</bodyTitle>
        <simplelist>
          <li id="uid167">
            <p noindent="true"><i>Inria principal investigator</i>: Mohammad Ghavamzadeh and Rémi Munos</p>
            <simplelist>
              <li id="uid168">
                <p noindent="true"><i>Institution</i>: McGill university (Canada)</p>
              </li>
              <li id="uid169">
                <p noindent="true"><i>Laboratory</i>: Reasoning and Learning Lab</p>
              </li>
              <li id="uid170">
                <p noindent="true"><i>Principal investigator</i>:</p>
                <simplelist>
                  <li id="uid171">
                    <p noindent="true">Prof. Joelle Pineau <i>Collaborator</i></p>
                  </li>
                  <li id="uid172">
                    <p noindent="true">Prof. Doina Precup <i>Collaborator</i></p>
                  </li>
                  <li id="uid173">
                    <p noindent="true">Amir massoud Farahmand <i>Collaborator</i></p>
                  </li>
                </simplelist>
              </li>
            </simplelist>
          </li>
          <li id="uid174">
            <p noindent="true"><i>Duration</i>: January 2013 - January 2015</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid175" level="2">
        <bodyTitle>Inria International Partners</bodyTitle>
        <subsection id="uid176" level="3">
          <bodyTitle>Declared Inria International Partners</bodyTitle>
          <sanspuceslist>
            <li id="uid177">
              <p noindent="true">Ronald Ortner and Peter Auer: Montanuniversität Leoben (Austria).</p>
            </li>
            <li id="uid178">
              <p noindent="true">Reinforcement learning (RL) deals with the problem of
interacting with an unknown stochastic environment that
occasionally provides rewards, with the goal of maximizing the
cumulative reward. The problem is well-understood when the
unknown environment is a finite-state Markov process. This
collaboration is centered around reducing the general RL
problem to this case.</p>
              <p>In particular, the following problems are considered:
representation learning, learning in continuous-state
environments, bandit problems with dependent arms, and pure
exploration in bandit problems. On each of these problems we
have successfully collaborated in the past, and plan to
sustain this collaboration possibly extending its scopes.</p>
            </li>
          </sanspuceslist>
        </subsection>
        <subsection id="uid179" level="3">
          <bodyTitle>Informal International Partners</bodyTitle>
          <simplelist>
            <li id="uid180">
              <p noindent="true">eHarmony Research, California.</p>
              <simplelist>
                <li id="uid181">
                  <p noindent="true">Václav Petříček <i>Collaborator</i></p>
                  <p noindent="true">Michal Valko has started to collaborate with eHarmony on
sequential decision making for online dating and offline evaluation.</p>
                </li>
              </simplelist>
            </li>
            <li id="uid182">
              <p noindent="true">University of Alberta, Edmonton, Alberta, Canada.</p>
              <simplelist>
                <li id="uid183">
                  <p noindent="true">Csaba Szepesvári and Bernardo Avila Pires <i>Collaborator</i></p>
                  <p noindent="true">We have been collaborating on the topic of <i>risk bounds in cost-sensitive multiclass classification</i> this
year. We have an accepted paper <ref xlink:href="#sequel-2013-bid40" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> at
ICML.</p>
                </li>
              </simplelist>
            </li>
            <li id="uid184">
              <p noindent="true">Technion - Israel Institute of Technology, Haifa, Israel.</p>
              <simplelist>
                <li id="uid185">
                  <p noindent="true">Odalric-Ambrym Maillard <i>Collaborator</i></p>
                  <p noindent="true">Daniil Ryabko has worked with Odalric Maillard on
representation learning for reinforcement learning problems. It led to a paper in AISTATS <ref xlink:href="#sequel-2013-bid19" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
                </li>
              </simplelist>
            </li>
            <li id="uid186">
              <p noindent="true">School of Computer Science, Carnegie Mellon University, USA.</p>
              <simplelist>
                <li id="uid187">
                  <p noindent="true">Prof. Emma Brunskill <i>Collaborator</i></p>
                </li>
                <li id="uid188">
                  <p noindent="true">Mohammad Gheshlaghi Azar, PhD <i>Collaborator</i></p>
                  <p noindent="true">A. Lazaric started a profitable collaboration on transfer in multi-arm bandit and reinforcement learning which led to two publications at ECML and NIPS. We are currently working on extensions of the previous algorithms and development of novel regret minimisation algorithms in non-iid settings.</p>
                </li>
              </simplelist>
            </li>
            <li id="uid189">
              <p noindent="true">Technicolor Research, Palo Alto.</p>
              <simplelist>
                <li id="uid190">
                  <p noindent="true">Branislav Kveton <i>Collaborator</i></p>
                  <p noindent="true">Michal Valko and Rémi Munos worked with Branislav on Spectral Bandits
aimed at recommendation for the entertainment content recommendation.
Michal continued the ongoing research on online semi-supervised
learning and this year delivered the algorithm for a challenging single picture
per person setting <ref xlink:href="#sequel-2013-bid32" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.
Victor Gabillon has spent 6 month at Technicolor as an intern
to work on the sequential learning with submodularity, which
resulted in 1 accepted paper at NIPS and two submissions to ICML.</p>
                </li>
              </simplelist>
            </li>
          </simplelist>
        </subsection>
      </subsection>
    </subsection>
    <subsection id="uid191" level="1">
      <bodyTitle>International Research Visitors</bodyTitle>
      <subsection id="uid192" level="2">
        <bodyTitle>Visits of International Scientists</bodyTitle>
        <subsection id="uid193" level="3">
          <bodyTitle>Internships</bodyTitle>
          <simplelist>
            <li id="uid194">
              <p noindent="true">Daniele Calandriello, student at Politecnico di Milano, Italy</p>
              <p>Period: since April 2013.</p>
              <p>He is working with A. Lazaric on multi-task reinforcement learning.</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid195" level="2">
        <bodyTitle>Visits to International Teams</bodyTitle>
        <simplelist>
          <li id="uid196">
            <p noindent="true">Rémi Munos, since July 2013, Microsoft Research New-England, USA</p>
          </li>
          <li id="uid197">
            <p noindent="true">Mohammad Ghavamzadeh, since November 2013, Adobe Research, San Jose, CA</p>
          </li>
          <li id="uid198">
            <p noindent="true">Victor Gabillon visited Technicolor research lab, Palo Alto, from March to September 2013.</p>
          </li>
          <li id="uid199">
            <p noindent="true">Azadeh Khaleghi visited Walt Disney Animation Studios, Burbank, from March to September 2013.</p>
          </li>
        </simplelist>
      </subsection>
    </subsection>
  </partenariat>
  <diffusion id="uid200">
    <bodyTitle>Dissemination</bodyTitle>
    <subsection id="uid201" level="1">
      <bodyTitle>Scientific Animation</bodyTitle>
      <subsection id="uid202" level="2">
        <bodyTitle>Awards</bodyTitle>
        <simplelist>
          <li id="uid203">
            <p noindent="true"><i/><i><b>Crazy Stone</b></i><i/> won the 6th edition of the UEC Cup (the most important international computer-Go tournament). It also won the first edition of the Denseisen, by winning a 4-stone handicap game against 9-dan professional player Yoshio Ishida.</p>
          </li>
          <li id="uid204">
            <p noindent="true"><i/><i><b>Alexandra Carpentier</b></i><i/> obtained an AFIA ex-aequo accessit for her PhD, (french machine learning/artificial intelligence second price).</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid205" level="2">
        <bodyTitle>Tutorials</bodyTitle>
        <simplelist>
          <li id="uid206">
            <p noindent="true">Tutorial by Rémi Munos at AAAI 2013: From Bandits to Monte Carlo Tree Search: The optimistic principle applied to Optimization and Planning.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid207" level="2">
        <bodyTitle>Conferences, Workshops and Schools</bodyTitle>
        <simplelist>
          <li id="uid208">
            <p noindent="true"><i>Philippe Preux</i> and Marc Tommasi were the main
organizers of the Conférence sur l'Apprentissage Automatique
(CAP'13).</p>
          </li>
          <li id="uid209">
            <p noindent="true"><i>Rémi Munos</i> was the main organizer of the 8<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mrow/><mrow><mi>i</mi><mi>t</mi><mi>h</mi></mrow></msup></math></formula>
Journées Francophones sur la Planification, la Décision et
l'Apprentissage (JFPDA'13) along with <i>Marta Soare</i>,
<i>Raphael Fonteneau</i>, <i>Michal Valko</i> and
<i>Alessandro Lazaric</i>.</p>
          </li>
          <li id="uid210">
            <p noindent="true"><i>Rémi Munos</i> was co-chair of the Algorithmic Learning Conference, in Singapore, 2013.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid211" level="2">
        <bodyTitle>Invited Talks</bodyTitle>
        <simplelist>
          <li id="uid212">
            <p noindent="true">Daniil Ryabko gave a talk entitled “Time-series information and
unsupervised representation learning” at SMILE seminar in Paris</p>
          </li>
          <li id="uid213">
            <p noindent="true">Michal Valko gave an talk “Sequential Face Recognition with
Minimal Feedback” which was opening talk of the series named 30 minutes of
Science, a new format at Inria Lille to support intra-center collaboration.</p>
          </li>
          <li id="uid214">
            <p noindent="true">Rémi Munos gave a course (6 hours) at the Summer School Netadis in Hillerod, Denmark in September 2013.</p>
          </li>
          <li id="uid215">
            <p noindent="true">Rémi Munos was invited to give a talk at CMU in November 2013.</p>
          </li>
          <li id="uid216">
            <p noindent="true">Alessandro Lazaric was invited to give a talk at CMU in March 2013.</p>
          </li>
          <li id="uid217">
            <p noindent="true">Pierre Chainais gave a talk "Learning a common dictionary over a sensor network" at GDR Phénix - ISIS workshop about "Analysis and inference for networks" in Paris in november 2013.</p>
          </li>
          <li id="uid218">
            <p noindent="true">Pierre Chainais gave a tutorial talk on "Multifractal analysis of images and applicaitons" at the "Groupe Image of the company TOTAL in Paris La Défense on sept. 11th, 2013.</p>
          </li>
          <li id="uid219">
            <p noindent="true">Jérémie Mary gave a invited talk "Recommendation system from a bandit perspective" at GDR "Estimation et traitement statistique en grande dimension" on May 16th, 2013 - Télécom Paristech.</p>
          </li>
          <li id="uid220">
            <p noindent="true">Jérémie Mary gave an invited talk "Bandit point of view on recommenders" at Large-scale Online Learning and Decision Making Workshop
Cumberland Lodge, Windsor, UK in September, 2013.</p>
          </li>
          <li id="uid221">
            <p noindent="true">Jeremie Mary gave an invited talk on recommender systems at "Journées rencontres AFIA/IHM" in may 2013.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid222" level="2">
        <bodyTitle>Review Activities</bodyTitle>
        <simplelist>
          <li id="uid223">
            <p noindent="true">
              <i/>
              <i>
                <b>Participation to the program committee of international conferences</b>
              </i>
              <i/>
            </p>
            <simplelist>
              <li id="uid224">
                <p noindent="true">International Conference on Pattern Recognition Applications
and Methods (ICPRAM 2013)</p>
              </li>
              <li id="uid225">
                <p noindent="true">Algorithmic Learning Theory (ALT 2013)</p>
              </li>
              <li id="uid226">
                <p noindent="true">AAAI Conference on Artificial Intelligence (AAAI 2013)</p>
              </li>
              <li id="uid227">
                <p noindent="true">European Workshop on Reinforcement Learning (EWRL 2013)</p>
              </li>
              <li id="uid228">
                <p noindent="true">Annual Conference on Neural Information Processing Systems (NIPS 2013)</p>
              </li>
              <li id="uid229">
                <p noindent="true">International Conference on Artificial Intelligence and
Statistics (AISTATS 2013)</p>
              </li>
              <li id="uid230">
                <p noindent="true">European Conference on Machine Learning (ECML 2013)</p>
              </li>
              <li id="uid231">
                <p noindent="true">International Conference on Machine Learning (ICML 2013 and 2014)</p>
              </li>
              <li id="uid232">
                <p noindent="true">International Conference on Uncertainty in Artificial
Intelligence (UAI 2013)</p>
              </li>
              <li id="uid233">
                <p noindent="true">French Conference on Planning, Decision-making, and Learning
in Control Systems (JFPDA 2013)</p>
              </li>
              <li id="uid234">
                <p noindent="true">IEEE FUSION 2013</p>
              </li>
              <li id="uid235">
                <p noindent="true">IEEE Approximate Dynamic Programming and Reinforcement Learning (ADPRL 2013)</p>
              </li>
              <li id="uid236">
                <p noindent="true">ICML workshop “Prediction with Sequential Models”</p>
              </li>
            </simplelist>
          </li>
          <li id="uid237">
            <p noindent="true"><i/><i><b>International journal and conference reviewing activities</b></i><i/> (in addition to the conferences in which we belong to the PC)</p>
            <simplelist>
              <li id="uid238">
                <p noindent="true">IEEE Transactions on Image Processing</p>
              </li>
              <li id="uid239">
                <p noindent="true">Journal of Statistical Physics</p>
              </li>
              <li id="uid240">
                <p noindent="true">Digital Signal Processing</p>
              </li>
              <li id="uid241">
                <p noindent="true">IEEE Transactions on Information Theory</p>
              </li>
              <li id="uid242">
                <p noindent="true">IEEE Statistical Signal Processing SSP'2013</p>
              </li>
              <li id="uid243">
                <p noindent="true">European Signal Processing Conference EUSIPCO 2013</p>
              </li>
              <li id="uid244">
                <p noindent="true">10th International Conference on Sampling Theory and Applications (SampTA 2013)</p>
              </li>
              <li id="uid245">
                <p noindent="true">IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013 &amp; 2014)</p>
              </li>
              <li id="uid246">
                <p noindent="true">Annual Conference on Neural Information Processing Systems (NIPS 2013)</p>
              </li>
              <li id="uid247">
                <p noindent="true">International Conference on Machine Learning (ICML 2013)</p>
              </li>
              <li id="uid248">
                <p noindent="true">European Conference on Machine Learning (ECML 2013)</p>
              </li>
              <li id="uid249">
                <p noindent="true">Uncertainty in Artificial Intelligence (UAI 2013)</p>
              </li>
              <li id="uid250">
                <p noindent="true">Machine Learning Journal (MLJ)</p>
              </li>
              <li id="uid251">
                <p noindent="true">Journal of Machine Learning Research (JMLR)</p>
              </li>
              <li id="uid252">
                <p noindent="true">Journal of Artificial Intelligence Research (JAIR)</p>
              </li>
              <li id="uid253">
                <p noindent="true">IEEE Transactions on Automatic Control (TAC)</p>
              </li>
              <li id="uid254">
                <p noindent="true">IEEE Transactions of Signal Processing</p>
              </li>
              <li id="uid255">
                <p noindent="true">Journal of Autonomous Agents and Multi-Agent Systems (JAAMAS)</p>
              </li>
              <li id="uid256">
                <p noindent="true">Mathematics of Operations Research (MOR)</p>
              </li>
            </simplelist>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid257" level="2">
        <bodyTitle>Evaluation activities, expertise</bodyTitle>
        <simplelist>
          <li id="uid258">
            <p noindent="true"><i>M. Ghavamzadeh</i> is in the Editorial Board Member of Machine Learning Journal (MLJ, 2011-present).</p>
          </li>
          <li id="uid259">
            <p noindent="true"><i>M. Ghavamzadeh</i> is in the Steering Committee Member of the European Workshop on Reinforcement Learning (EWRL, 2011-present).</p>
          </li>
          <li id="uid260">
            <p noindent="true"><i>P. Preux</i>, <i>R. Gaudel</i> and <i>J. Mary</i> are experts for <i>Crédit Impôt Recherche</i> (CIR).</p>
          </li>
          <li id="uid261">
            <p noindent="true"><i>E. Duflos</i> is a project proposal reviewer for ANR.</p>
          </li>
          <li id="uid262">
            <p noindent="true"><i>R. Munos</i> is a Member of the Belgium Commission Evaluation F.R.S-FNRS, 2013.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid263" level="2">
        <bodyTitle>Other Scientific Activities</bodyTitle>
        <simplelist>
          <li id="uid264">
            <p noindent="true"><i>R. Munos</i> was Vice Président du Comité des Projets at Inria Lille-Nord Europe, until July 2013.</p>
          </li>
          <li id="uid265">
            <p noindent="true"><i>D. Ryabko</i> is a member of COST-GTRI committee at Inria.</p>
          </li>
          <li id="uid266">
            <p noindent="true"><i>D. Ryabko</i> is a general advisor at Inria Lille.</p>
          </li>
          <li id="uid267">
            <p noindent="true"><i>E. Duflos</i> is Director of Research of Ecole Centrale de Lille since September 2011.</p>
          </li>
          <li id="uid268">
            <p noindent="true"><i>E. Duflos</i> is the Head of the Signal and Image Team of LAGIS (UMR CNRS 8219).</p>
          </li>
          <li id="uid269">
            <p noindent="true"><i>R. Gaudel</i> is board member of LIFL.</p>
          </li>
          <li id="uid270">
            <p noindent="true"><i>R. Gaudel</i> manages the proml mailing list. This mailing list gathers French-speaking researchers from Machine Learning community.</p>
          </li>
          <li id="uid271">
            <p noindent="true"><i>P. Chainais</i> is a member of the administration council of GRETSI, the French association of researchers in signal and image processing.</p>
          </li>
          <li id="uid272">
            <p noindent="true"><i>P. Chainais</i> is co-responsible for the action "Machine Learning" of the GDR ISIS which gathers french researchers in signal and image processing at the national level.</p>
          </li>
        </simplelist>
      </subsection>
    </subsection>
    <subsection id="uid273" level="1">
      <bodyTitle>Teaching - Supervision - Juries</bodyTitle>
      <subsection id="uid274" level="2">
        <bodyTitle>Teaching</bodyTitle>
        <sanspuceslist>
          <li id="uid275">
            <p noindent="true">Ecole Centrale de Lille: <i>P. Chainais</i>, , “Machine Learning”, 36 hours, 3rd year.</p>
          </li>
          <li id="uid276">
            <p noindent="true">Ecole Centrale de Lille: <i>P. Chainais</i>, “Wavelets and Applications”, 24 hours, 2nd year.</p>
          </li>
          <li id="uid277">
            <p noindent="true">Ecole Centrale de Lille: <i>P. Chainais</i>, “Introduction to Matlab”, 16 hours, 3rd year.</p>
          </li>
          <li id="uid278">
            <p noindent="true">Ecole Centrale de Lille: <i>P. Chainais</i>, “Signal processing”, 22 hours, 1st year.</p>
          </li>
          <li id="uid279">
            <p noindent="true">Ecole Centrale de Lille: <i>P. Chainais</i>, “Data Compression”, 16 hours, 2nd year.</p>
          </li>
          <li id="uid280">
            <p noindent="true">Ecole Centrale de Lille: <i>Ph. Preux</i>, “<small>Data </small>Data <big>Data Data</big>”, 2 hours, 3rd year.</p>
          </li>
          <li id="uid281">
            <p noindent="true"><i>P. Chainais</i> is Responsible for a new 3rd year program called Decision making &amp; Data analysis.</p>
          </li>
          <li id="uid282">
            <p noindent="true">Master: <i>O. Pietquin</i>, “Decision under uncertainty”, 46 hours, M2, Master in Computer Science, Université de Lille 1.</p>
          </li>
          <li id="uid283">
            <p noindent="true">Master: <i>A. Lazaric</i>, “Introduction to Reinforcement Learning”, 30h eq. TD, M2, Master “Mathematiques, Vision, Apprentissage”, ENS Cachan.</p>
          </li>
          <li id="uid284">
            <p noindent="true">Master: <i>R. Gaudel</i>, “Data Mining”, 30h eq. TD, M2, Université Lille 3.</p>
          </li>
          <li id="uid285">
            <p noindent="true">Master: <i>R. Gaudel</i>, “Web Mining”, 32h eq. TD, M2, Université Lille 3.</p>
          </li>
          <li id="uid286">
            <p noindent="true">Master: <i>R. Gaudel</i>, “Algorithmic”, 19h eq. TD, M2, Université Lille 3.</p>
          </li>
          <li id="uid287">
            <p noindent="true">Master: <i>Ph. Preux</i>, “Mathematics, Computer Science, and Modeling”, M1 of psychology, Université of Lille 3.</p>
          </li>
          <li id="uid288">
            <p noindent="true">Master: <i>Ph. Preux</i>, “Algorithms, and programming in Python”, M1 MIASHS, Université of Lille 3.</p>
          </li>
          <li id="uid289">
            <p noindent="true">Licence: <i>Ph. Preux</i>, “Algorithms, and programming in Python”, L3 MIASHS, Université of Lille 3.</p>
          </li>
          <li id="uid290">
            <p noindent="true">Licence: <i>R. Gaudel</i>, “Programing”, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>2</mn><mo>×</mo><mn>16</mn></mrow></math></formula>h eq. TD, L1, Université Lille 3.</p>
          </li>
          <li id="uid291">
            <p noindent="true">Licence: <i>R. Gaudel</i>, “Logic”, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>31</mn><mo>.</mo><mn>5</mn></mrow></math></formula>h eq. TD, L3, Université Lille 3.</p>
          </li>
          <li id="uid292">
            <p noindent="true">Licence: <i>R. Gaudel</i>, “Information and Communication Technologies”, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>2</mn><mo>×</mo><mn>16</mn></mrow></math></formula>h eq. TD, L1, Université Lille 3.</p>
          </li>
          <li id="uid293">
            <p noindent="true">Licence: <i>R. Gaudel</i>, “Artificial Intelligence”, <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>31</mn><mo>.</mo><mn>5</mn></mrow></math></formula>h eq. TD, L2, Université Lille 3.</p>
          </li>
          <li id="uid294">
            <p noindent="true">Licence: <i>R. Gaudel,</i>, “C2i”, 25h eq. TD, L1-3, Université Lille 3.</p>
          </li>
          <li id="uid295">
            <p noindent="true">Licence: <i>R. Mary,</i>, “C2i”, 25h eq. TD, L1-3, Université Lille 3.</p>
          </li>
          <li id="uid296">
            <p noindent="true">Master: <i>J. Mary</i>, “Programmation et analyse de donnée en R”, 24h eq TD, M1, Université de Lille 3, France.</p>
          </li>
          <li id="uid297">
            <p noindent="true">Master: <i>J. Mary</i>, “Programmation web avancée”, 24h eq TD,M2, Université de Lille 3, France.</p>
          </li>
          <li id="uid298">
            <p noindent="true">Master: <i>J. Mary</i>, “Programmation objet et Design Pattern”, 48h eq TD,M2, Université de Lille 3, France.</p>
          </li>
          <li id="uid299">
            <p noindent="true">Master: <i>J. Mary</i>, “Algorithmique”, 12h eq TD,M1, Université de Lille 3, France.</p>
          </li>
          <li id="uid300">
            <p noindent="true">Master (3rd year of Engineer School): <i>J. Mary</i>, “Machine Learning avec R" , 16 hours, M2, Option "Data Analysis and Decision", Ecole Centrale de Lille, France.</p>
          </li>
          <li id="uid301">
            <p noindent="true">Master (3rd year of Engineer School): <i>E. Duflos</i>, “Advanced Estimation" , 20 hours, M2, Option "Data Analysis and Decision", Ecole Centrale de Lille, France.</p>
          </li>
          <li id="uid302">
            <p noindent="true">Master (3rd year of Engineer School): <i>E. Duflos</i>, “Multi-Objects Filreting" , 16 hours, M2, Option "Data Analysis and Decision", Ecole Centrale de Lille, France.</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid303" level="2">
        <bodyTitle>Supervision</bodyTitle>
        <sanspuceslist>
          <li id="uid304">
            <p noindent="true">PhD: <i>Azadeh Khaleghi</i>, “Sur Quelques Problèmes non
supervisés impliquant des séries temporelles hautement
dépendantes”, Nov. 2013, Université de Lille 1, advisor:
D. Ryabko.</p>
          </li>
          <li id="uid305">
            <p noindent="true">PhD in progress: <i>Boris Baldassari</i>,
<i>Apprentissage automatique et développement logiciel</i>,
since May 2011, advisor: Ph. Preux.</p>
          </li>
          <li id="uid306">
            <p noindent="true">PhD in progress: <i>Gabriel Dulac-Arnold</i>, <i>A
General Sequential Model for Constrained Classification</i>, since
Oct. 2011, advisor: Ph. Preux, L. Denoyer, P. Gallinari.</p>
          </li>
          <li id="uid307">
            <p noindent="true">PhD in progress: <i>Victor Gabillon</i>, “Active Learning
in Classification-based Policy Iteration”, since Sep. 2009,
advisor: Ph. Preux, M. Ghavamzadeh.</p>
          </li>
          <li id="uid308">
            <p noindent="true">PhD in progress: <i>Frédéric Guillou</i>, “Sequential
Recommender System”, since Oct. 2013, advisor: Ph. Preux,
J. Mary, R. Gaudel.</p>
          </li>
          <li id="uid309">
            <p noindent="true">PhD in progress: <i>vicenzo Musco</i>, “Topology and
evolution of software graphs”, since Oct. 2013, advisor:
P. Preux, M. Monperrus</p>
          </li>
          <li id="uid310">
            <p noindent="true">PhD in progress: <i>Olivier Nicol</i>, “Data-driven
evaluation of Contextual Bandit algorithms and applications to
Dynamic Recommendation”, since Nov. 2010, advisor: Ph. Preux, J. Mary.</p>
          </li>
          <li id="uid311">
            <p noindent="true">PhD in progress: <i>Adrien Hoarau</i>, “Multi-arm Bandit
Theory”, since Oct. 2012, advisor: R. Munos.</p>
          </li>
          <li id="uid312">
            <p noindent="true">PhD in progress: <i>Tomáš Kocák</i>,
“Sequential Learning with Similarities”, since Oct. 2013,
advisor: R. Munos, M. Valko</p>
          </li>
          <li id="uid313">
            <p noindent="true">PhD in progress: <i>Emilie Kaufmann</i>, “Bayesian
Bandits”, since Oct. 2011, advisor: R. Munos, O. Cappé,
A. Garivier.</p>
          </li>
          <li id="uid314">
            <p noindent="true">PhD in progress: <i>Amir Sani</i>, “Learning under
uncertainty”, Oct. 2011, since advisor: R. Munos, A. Lazaric.</p>
          </li>
          <li id="uid315">
            <p noindent="true">PhD in progress: <i>Marta Soare</i>, “Pure Exploration in
Multi-arm Bandit”, since Oct. 2012, advisor: R. Munos,
A. Lazaric.</p>
          </li>
          <li id="uid316">
            <p noindent="true">PhD in progress: <i>Hong Phuong Dang</i>,
<i>Bayesian non parametric methods for dictionary learning and inverse problems</i>,
since Oct. 2013, advisor: P. Chainais.</p>
          </li>
          <li id="uid317">
            <p noindent="true">PhD in progress: <i>Linh Van Nguyen</i>,
<i>High resolution reconstruction from low resolution measurements of velocity fields in turbulent flows</i>,
since Oct. 2013, advisor: P. Chainais &amp; J.p. Laval (Laboratoire de Mécanique de Lille).</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid318" level="2">
        <bodyTitle>Juries</bodyTitle>
        <simplelist>
          <li id="uid319">
            <p noindent="true">member of the recruitement committee for an assistant professor position at Université de Lille 3: R. Gaudel, Ph. Preux</p>
          </li>
          <li id="uid320">
            <p noindent="true">member of the recruitement committee for an assistant professor position at Université de Lille 1: P. Chainais</p>
          </li>
          <li id="uid321">
            <p noindent="true">member of the recruitement committee for a professor position at Université de Paris 6: Ph. Preux</p>
          </li>
          <li id="uid322">
            <p noindent="true">Member of the jury DR2 Inria 2013: R. Munos</p>
          </li>
          <li id="uid323">
            <p noindent="true">Member of the jury CR2 Rocquencourt Inria 2013: R. Munos</p>
          </li>
        </simplelist>
      </subsection>
    </subsection>
    <subsection id="uid324" level="1">
      <bodyTitle>Popularization</bodyTitle>
      <simplelist>
        <li id="uid325">
          <p noindent="true">“Small or big (data), make it sequentially!”, J. Mary, Ph. Preux, invited talk at Euratechnologies, March 2013.</p>
        </li>
        <li id="uid326">
          <p noindent="true">Inria publishes an article about Face Recognition, Michal Valko,
<ref xlink:href="http://www.inria.fr/centre/lille/actualites/intel-collabore-avec-inria" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>centre/<allowbreak/>lille/<allowbreak/>actualites/<allowbreak/>intel-collabore-avec-inria</ref>, March 2013</p>
        </li>
        <li id="uid327">
          <p noindent="true">Jérémie Mary highlighted on TV and on Inria website: you are how you browse: <ref xlink:href="http://www.inria.fr/en/centre/lille/news/you-are-how-you-browse" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>en/<allowbreak/>centre/<allowbreak/>lille/<allowbreak/>news/<allowbreak/>you-are-how-you-browse</ref>, Dec. 2013</p>
        </li>
      </simplelist>
    </subsection>
  </diffusion>
  <biblio id="bibliography" html="bibliography" numero="10" titre="Bibliography">
    
    <biblStruct id="sequel-2013-bid44" type="phdthesis" rend="year" n="cite:khaleghi:tel-00920184">
      <identifiant type="hal" value="tel-00920184"/>
      <monogr>
        <title level="m">Sur quelques problèmes non-supervisés impliquant des séries temporelles hautement dèpendantes</title>
        <author>
          <persName key="sequel-2010-id59972">
            <foreName>Azadeh</foreName>
            <surname>Khaleghi</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">Institut national de recherche en informatique et en automatique (Inria)</orgName>
          </publisher>
          <dateStruct>
            <month>November</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/tel-00920184" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>tel-00920184</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Ph. D. Thesis</note>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid14" type="article" rend="year" n="cite:azar:hal-00831875">
      <identifiant type="hal" value="hal-00831875"/>
      <analytic>
        <title level="a">Minimax PAC bounds on the sample complexity of reinforcement learning with a generative model</title>
        <author>
          <persName>
            <foreName>Mohammad Gheshlaghi</foreName>
            <surname>Azar</surname>
            <initial>M. G.</initial>
          </persName>
          <persName key="sequel-2006-id18109">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Hilbert</foreName>
            <surname>Kappen</surname>
            <initial>H.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" id="rid01456">
        <idno type="issn">0885-6125</idno>
        <title level="j">Machine Learning</title>
        <imprint>
          <biblScope type="volume">91</biblScope>
          <biblScope type="number">3</biblScope>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">325-349</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00831875" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00831875</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid20" type="incollection" rend="year" n="cite:busoniu:hal-00756742">
      <identifiant type="hal" value="hal-00756742"/>
      <analytic>
        <title level="a">A review of optimistic planning in Markov decision processes</title>
        <author>
          <persName>
            <foreName>Lucian</foreName>
            <surname>Busoniu</surname>
            <initial>L.</initial>
          </persName>
          <persName key="sequel-2006-id18109">
            <foreName>Remi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Robert</foreName>
            <surname>Babuska</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <editor role="editor">
          <persName>
            <foreName>Frank</foreName>
            <surname>Lewis</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Derong</foreName>
            <surname>Liu</surname>
            <initial>D.</initial>
          </persName>
        </editor>
        <title level="m">Reinforcement Learning and Adaptive Dynamic Programming for Feedback Control</title>
        <title level="s">IEEE Press Series on Computational Intelligence</title>
        <imprint>
          <biblScope type="chapter">22</biblScope>
          <publisher>
            <orgName>Wiley-IEEE Press</orgName>
          </publisher>
          <dateStruct>
            <month>January</month>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">494-516</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00756742" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00756742</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid22" subtype="nonparu-n" type="article" rend="year" n="cite:cappe:hal-00738209">
      <identifiant type="hal" value="hal-00738209"/>
      <analytic>
        <title level="a">Kullback-Leibler Upper Confidence Bounds for Optimal Sequential Allocation</title>
        <author>
          <persName>
            <foreName>Olivier</foreName>
            <surname>Cappé</surname>
            <initial>O.</initial>
          </persName>
          <persName>
            <foreName>Aurélien</foreName>
            <surname>Garivier</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2008-id18811">
            <foreName>Odalric-Ambrym</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
          <persName key="sequel-2006-id18109">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Gilles</foreName>
            <surname>Stoltz</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" id="rid00162">
        <idno type="issn">0090-5364</idno>
        <title level="j">Annals of Statistics</title>
        <imprint>
          <biblScope type="volume">41</biblScope>
          <biblScope type="number">3</biblScope>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">1516-1541</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00738209" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00738209</ref>
        </imprint>
      </monogr>
      <note type="bnote">Accepted</note>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid21" type="article" rend="year" n="cite:fruitet:hal-00798561">
      <identifiant type="doi" value="10.1088/1741-2560/10/1/016012"/>
      <identifiant type="hal" value="hal-00798561"/>
      <analytic>
        <title level="a">Automatic motor task selection via a bandit algorithm for a brain-controlled button</title>
        <author>
          <persName key="odyssee-2008-id19173">
            <foreName>Joan</foreName>
            <surname>Fruitet</surname>
            <initial>J.</initial>
          </persName>
          <persName key="sequel-2009-id59810">
            <foreName>Alexandra</foreName>
            <surname>Carpentier</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2006-id18109">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName key="odyssee-2005-id18165">
            <foreName>Maureen</foreName>
            <surname>Clerc</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" id="rid01305">
        <idno type="issn">1741-2560</idno>
        <title level="j">Journal of Neural Engineering</title>
        <imprint>
          <biblScope type="volume">10</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <month>January</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00798561" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00798561</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid37" type="article" rend="year" n="cite:hauskrecht:hal-00742097">
      <identifiant type="doi" value="10.1016/j.jbi.2012.08.004"/>
      <identifiant type="hal" value="hal-00742097"/>
      <analytic>
        <title level="a">Outlier detection for patient monitoring and alerting</title>
        <author>
          <persName>
            <foreName>Milos</foreName>
            <surname>Hauskrecht</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Iyad</foreName>
            <surname>Batal</surname>
            <initial>I.</initial>
          </persName>
          <persName key="sequel-2012-idp140352027421952">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Shyam</foreName>
            <surname>Visweswaran</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Gregory F</foreName>
            <surname>Cooper</surname>
            <initial>G. F.</initial>
          </persName>
          <persName>
            <foreName>Gilles</foreName>
            <surname>Clermont</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" id="rid01156">
        <idno type="issn">1532-0464</idno>
        <title level="j">Journal of Biomedical Informatics</title>
        <imprint>
          <biblScope type="volume">46</biblScope>
          <dateStruct>
            <month>February</month>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">47-55</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00742097" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00742097</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid31" type="article" rend="year" n="cite:ryabko:hal-00913240">
      <identifiant type="hal" value="hal-00913240"/>
      <analytic>
        <title level="a">A Binary-Classification-Based Metric between Time-Series Distributions and Its Use in Statistical and Learning Problems</title>
        <author>
          <persName key="sequel-2007-id18237">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
          <persName key="sequel-2006-id18261">
            <foreName>Jeremie</foreName>
            <surname>Mary</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" id="rid01276">
        <idno type="issn">1532-4435</idno>
        <title level="j">Journal of Machine Learning Research</title>
        <imprint>
          <biblScope type="volume">14</biblScope>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">2837-2856</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00913240" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00913240</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid38" type="article" rend="year" n="cite:ryabko:hal-00913253">
      <identifiant type="hal" value="hal-00913253"/>
      <analytic>
        <title level="a">A confidence-set approach to signal denoising</title>
        <author>
          <persName>
            <foreName>Boris</foreName>
            <surname>Ryabko</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2007-id18237">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" id="rid01886">
        <idno type="issn">1572-3127</idno>
        <title level="j">Statistical Methodology</title>
        <imprint>
          <biblScope type="volume">15</biblScope>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">115–120</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00913253" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00913253</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid40" type="inproceedings" rend="year" n="cite:avilapires:hal-00840485">
      <identifiant type="hal" value="hal-00840485"/>
      <analytic>
        <title level="a">Cost-sensitive Multiclass Classification Risk Bounds</title>
        <author>
          <persName>
            <foreName>Bernardo</foreName>
            <surname>Avila Pires</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2008-id18222">
            <foreName>Mohammad</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Csaba</foreName>
            <surname>Szepesvari</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">International Conference on Machine Learning</title>
        <loc>Atlanta, United States</loc>
        <imprint>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00840485" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00840485</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>27</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid39" type="inproceedings" rend="year" n="cite:bas:hal-00868550">
      <identifiant type="hal" value="hal-00868550"/>
      <analytic>
        <title level="a">Quantification adaptative pour la stéganalyse d'images texturées</title>
        <author>
          <persName>
            <foreName>Patrick</foreName>
            <surname>Bas</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Pierre</foreName>
            <surname>Chainais</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Emmanuel</foreName>
            <surname>Zidel - Cauffet</surname>
            <initial>E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="no" x-proceedings="yes">
        <title level="m">GRETSI 2013</title>
        <loc>Brest, France</loc>
        <imprint>
          <dateStruct>
            <month>September</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00868550" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00868550</ref>
        </imprint>
        <meeting id="cid42924">
          <title>Colloque sur le Traitement du Signal et des Images</title>
          <num>24</num>
          <abbr type="sigle">GRETSI</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid26" type="inproceedings" rend="year" n="cite:carpentier:hal-00923685">
      <identifiant type="hal" value="hal-00923685"/>
      <analytic>
        <title level="a">Toward optimal stratification for stratified monte-carlo integration</title>
        <author>
          <persName key="sequel-2009-id59810">
            <foreName>Alexandra</foreName>
            <surname>Carpentier</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2006-id18109">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">International Conference on Machine Learning</title>
        <loc>United States</loc>
        <imprint>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00923685" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00923685</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>27</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid36" type="inproceedings" rend="year" n="cite:chainais:hal-00923741">
      <identifiant type="hal" value="hal-00923741"/>
      <analytic>
        <title level="a">Distributed dictionary learning over a sensor network</title>
        <author>
          <persName>
            <foreName>Pierre</foreName>
            <surname>Chainais</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Cédric</foreName>
            <surname>Richard</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="no" x-proceedings="yes">
        <title level="m">CaP 2013</title>
        <loc>Villeneuve d'Ascq, France</loc>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">1-4</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00923741" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00923741</ref>
        </imprint>
        <meeting id="cid50509">
          <title>Conférence Francophone sur l'Apprentissage Automatique</title>
          <num>2013</num>
          <abbr type="sigle">CAP</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid35" type="inproceedings" rend="year" n="cite:chainais:hal-00923742">
      <identifiant type="hal" value="hal-00923742"/>
      <analytic>
        <title level="a">Learning a common dictionary over a sensor network</title>
        <author>
          <persName>
            <foreName>Pierre</foreName>
            <surname>Chainais</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Cédric</foreName>
            <surname>Richard</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">CAMSAP 2013</title>
        <loc>Saint-Martin, France</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">1-4</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00923742" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00923742</ref>
        </imprint>
        <meeting id="cid323245">
          <title>International Workshop on Computational Advances in Multi-Sensor Adaptive Processing</title>
          <num>5</num>
          <abbr type="sigle">CAMSAP</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid16" type="inproceedings" rend="year" n="cite:fonteneau:hal-00840202">
      <identifiant type="hal" value="hal-00840202"/>
      <analytic>
        <title level="a">Optimistic planning for belief-augmented Markov decision processes</title>
        <author>
          <persName key="sequel-2012-idp140298916032176">
            <foreName>Raphael</foreName>
            <surname>Fonteneau</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Lucian</foreName>
            <surname>Busoniu</surname>
            <initial>L.</initial>
          </persName>
          <persName key="sequel-2006-id18109">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">IEEE International Symposium on Adaptive Dynamic Programming and reinforcement Learning, ADPRL 2013</title>
        <loc>Singapore</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00840202" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00840202</ref>
        </imprint>
        <meeting id="cid88347">
          <title>IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning</title>
          <num>2013</num>
          <abbr type="sigle">ADPRL</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid41" type="inproceedings" rend="year" n="cite:gabillon:hal-00921250">
      <identifiant type="hal" value="hal-00921250"/>
      <analytic>
        <title level="a">Approximate Dynamic Programming Finally Performs Well in the Game of Tetris</title>
        <author>
          <persName key="sequel-2009-id59885">
            <foreName>Victor</foreName>
            <surname>Gabillon</surname>
            <initial>V.</initial>
          </persName>
          <persName key="sequel-2008-id18222">
            <foreName>Mohammad</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName key="maia-2005-id18130">
            <foreName>Bruno</foreName>
            <surname>Scherrer</surname>
            <initial>B.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">Neural Information Processing Systems (NIPS) 2013</title>
        <loc>South Lake Tahoe, United States</loc>
        <imprint>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00921250" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00921250</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>27</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid15" type="inproceedings" rend="year" n="cite:gheshlaghiazar:hal-00924021">
      <identifiant type="hal" value="hal-00924021"/>
      <analytic>
        <title level="a">Regret Bounds for Reinforcement Learning with Policy Advice</title>
        <author>
          <persName>
            <foreName>Mohammad</foreName>
            <surname>Gheshlaghi Azar</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2008-id18453">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Brunskill</foreName>
            <surname>Emma</surname>
            <initial>B.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">ECML/PKDD - European conference on machine learning and principles and practice of knowledge discovery in databases - 2013</title>
        <loc>Prague, Czech Republic</loc>
        <imprint>
          <dateStruct>
            <month>September</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00924021" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00924021</ref>
        </imprint>
        <meeting id="cid67163">
          <title>European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases</title>
          <num>16</num>
          <abbr type="sigle">ECML PKDD</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid23" type="inproceedings" rend="year" n="cite:gheshlaghiazar:hal-00924025">
      <identifiant type="hal" value="hal-00924025"/>
      <analytic>
        <title level="a">Sequential Transfer in Multi-armed Bandit with Finite Set of Models</title>
        <author>
          <persName>
            <foreName>Mohammad</foreName>
            <surname>Gheshlaghi Azar</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2008-id18453">
            <foreName>Alessandro</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Brunskill</foreName>
            <surname>Emma</surname>
            <initial>B.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">NIPS - Advances in Neural Information Processing Systems 25 - 2013</title>
        <loc>Lake Tahoe, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00924025" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00924025</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>27</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid42" type="inproceedings" rend="year" n="cite:kadri:hal-00695631">
      <identifiant type="hal" value="hal-00695631"/>
      <analytic>
        <title level="a">A Generalized Kernel Approach to Structured Output Learning</title>
        <author>
          <persName key="sequel-2008-id18481">
            <foreName>Hachem</foreName>
            <surname>Kadri</surname>
            <initial>H.</initial>
          </persName>
          <persName key="sequel-2008-id18222">
            <foreName>Mohammad</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2006-id18078">
            <foreName>Philippe</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">International Conference on Machine Learning (ICML)</title>
        <loc>Atlanta, United States</loc>
        <imprint>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00695631" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00695631</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>28</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid17" type="inproceedings" rend="year" n="cite:kedenburg:hal-00923681">
      <identifiant type="hal" value="hal-00923681"/>
      <analytic>
        <title level="a">Aggregating optimistic planning trees for solving markov decision processes</title>
        <author>
          <persName>
            <foreName>Gunnar</foreName>
            <surname>Kedenburg</surname>
            <initial>G.</initial>
          </persName>
          <persName key="sequel-2012-idp140298916032176">
            <foreName>Raphael</foreName>
            <surname>Fonteneau</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2006-id18109">
            <foreName>Remi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">Advances in Neural Information Processing Systems</title>
        <loc>United States</loc>
        <imprint>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">2382-2390</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00923681" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00923681</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>21</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid30" type="inproceedings" rend="year" n="cite:khaleghi:hal-00913250">
      <identifiant type="hal" value="hal-00913250"/>
      <analytic>
        <title level="a">Nonparametric multiple change point estimation in highly dependent time series</title>
        <author>
          <persName key="sequel-2010-id59972">
            <foreName>Azadeh</foreName>
            <surname>Khaleghi</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2007-id18237">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proc. 24th International Conf. on Algorithmic Learning Theory (ALT'13)</title>
        <loc>Singapore</loc>
        <imprint>
          <publisher>
            <orgName>Springer</orgName>
          </publisher>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">382-396</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00913250" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00913250</ref>
        </imprint>
        <meeting id="cid110465">
          <title>International Conference on Algorithmic Learning Theory</title>
          <num>24</num>
          <abbr type="sigle">ALT</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid27" type="inproceedings" rend="year" n="cite:korda:hal-00923683">
      <identifiant type="hal" value="hal-00923683"/>
      <analytic>
        <title level="a">Thompson sampling for one-dimensional exponential family bandits</title>
        <author>
          <persName key="sequel-2012-idp140298916026800">
            <foreName>Nathaniel</foreName>
            <surname>Korda</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>Emilie</foreName>
            <surname>Kaufmann</surname>
            <initial>E.</initial>
          </persName>
          <persName key="sequel-2006-id18109">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">Advances in Neural Information Processing Systems</title>
        <loc>United States</loc>
        <imprint>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00923683" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00923683</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>21</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid32" type="inproceedings" rend="year" n="cite:kveton:hal-00749197">
      <identifiant type="hal" value="hal-00749197"/>
      <analytic>
        <title level="a">Learning from a Single Labeled Face and a Stream of Unlabeled Data</title>
        <author>
          <persName>
            <foreName>Branislav</foreName>
            <surname>Kveton</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2012-idp140352027421952">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">10th IEEE International Conference on Automatic Face and Gesture Recognition</title>
        <loc>Shanghai, China</loc>
        <imprint>
          <dateStruct>
            <month>January</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00749197" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00749197</ref>
        </imprint>
        <meeting id="cid112532">
          <title>International Conference on Automatic Face and Gesture Recognition</title>
          <num>10</num>
          <abbr type="sigle">FG</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid18" type="inproceedings" rend="year" n="cite:maillard:hal-00778586">
      <identifiant type="hal" value="hal-00778586"/>
      <analytic>
        <title level="a">Optimal Regret Bounds for Selecting the State Representation in Reinforcement Learning</title>
        <author>
          <persName key="sequel-2008-id18811">
            <foreName>Odalric-Ambrym</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
          <persName key="concha-2007-id18275">
            <foreName>Phuong</foreName>
            <surname>Nguyen</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Ronald</foreName>
            <surname>Ortner</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2007-id18237">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">ICML - 30th International Conference on Machine Learning</title>
        <loc>Atlanta, USA, United States</loc>
        <imprint>
          <biblScope type="volume">28(1)</biblScope>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">543-551</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00778586" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00778586</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>30</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid19" type="inproceedings" rend="year" n="cite:nguyen:hal-00823230">
      <identifiant type="hal" value="hal-00823230"/>
      <analytic>
        <title level="a">Competing with an Infinite Set of Models in Reinforcement Learning</title>
        <author>
          <persName key="concha-2007-id18275">
            <foreName>Phuong</foreName>
            <surname>Nguyen</surname>
            <initial>P.</initial>
          </persName>
          <persName key="sequel-2008-id18811">
            <foreName>Odalric-Ambrym</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
          <persName key="sequel-2007-id18237">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>Ronald</foreName>
            <surname>Ortner</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">AISTATS</title>
        <loc>Arizona, United States</loc>
        <title level="s">JMLR W&amp;CP</title>
        <imprint>
          <biblScope type="volume">31</biblScope>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">463-471</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00823230" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00823230</ref>
        </imprint>
        <meeting id="cid388734">
          <title>International Conference on Artificial Intelligence and Statistics</title>
          <num>14</num>
          <abbr type="sigle">AISTATS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid34" type="inproceedings" rend="year" n="cite:ryabko:hal-00823233">
      <identifiant type="hal" value="hal-00823233"/>
      <analytic>
        <title level="a">Time-series information and learning</title>
        <author>
          <persName key="sequel-2007-id18237">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">ISIT - International Symposium on Information Theory</title>
        <loc>Istanbul, Turkey</loc>
        <imprint>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">1392-1395</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00823233" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00823233</ref>
        </imprint>
        <meeting id="cid89373">
          <title>IEEE International Symposium on Information Theory</title>
          <num>2013</num>
          <abbr type="sigle">ISIT</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid33" type="inproceedings" rend="year" n="cite:ryabko:hal-00913244">
      <identifiant type="hal" value="hal-00913244"/>
      <analytic>
        <title level="a">Unsupervised model-free representation learning</title>
        <author>
          <persName key="sequel-2007-id18237">
            <foreName>Daniil</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proc. 24th International Conf. on Algorithmic Learning Theory (ALT'13)</title>
        <loc>Singapore</loc>
        <imprint>
          <publisher>
            <orgName>Springer</orgName>
          </publisher>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">354-366</biblScope>
          <ref xlink:href="http://hal.inria.fr/hal-00913244" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00913244</ref>
        </imprint>
        <meeting id="cid110465">
          <title>International Conference on Algorithmic Learning Theory</title>
          <num>24</num>
          <abbr type="sigle">ALT</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid43" type="inproceedings" rend="year" n="cite:szorenyi:in2p3-00907406">
      <identifiant type="hal" value="in2p3-00907406"/>
      <analytic>
        <title level="a">Gossip-based distributed stochastic bandit algorithms</title>
        <author>
          <persName>
            <foreName>B.</foreName>
            <surname>Szorenyi</surname>
            <initial>B.</initial>
          </persName>
          <persName key="tao-2011-idp140318445754768">
            <foreName>R.</foreName>
            <surname>Busa-Fekete</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>I.</foreName>
            <surname>Hegedüs</surname>
            <initial>I.</initial>
          </persName>
          <persName>
            <foreName>R.</foreName>
            <surname>Ormandi</surname>
            <initial>R.</initial>
          </persName>
          <persName key="asap-2006-id18710">
            <foreName>M.</foreName>
            <surname>Jelasity</surname>
            <initial>M.</initial>
          </persName>
          <persName key="tao-2009-id60518">
            <foreName>B.</foreName>
            <surname>Kégl</surname>
            <initial>B.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <editor role="editor">
          <persName>
            <foreName>Sanjoy</foreName>
            <surname>Dasgupta</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>David</foreName>
            <surname>McAllester</surname>
            <initial>D.</initial>
          </persName>
        </editor>
        <title level="m">30th International Conference on Machine Learning (ICML 2013)</title>
        <loc>Atlanta, United States</loc>
        <imprint>
          <biblScope type="volume">28</biblScope>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">19-27</biblScope>
          <ref xlink:href="http://hal.inria.fr/in2p3-00907406" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>in2p3-00907406</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>30</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid24" type="inproceedings" rend="year" n="cite:thomas:hal-00907781">
      <identifiant type="hal" value="hal-00907781"/>
      <analytic>
        <title level="a">Optimizing P300-speller sequences by RIP-ping groups apart</title>
        <author>
          <persName key="athena-2011-idp140624350986624">
            <foreName>Eoin M.</foreName>
            <surname>Thomas</surname>
            <initial>E. M.</initial>
          </persName>
          <persName key="odyssee-2005-id18165">
            <foreName>Maureen</foreName>
            <surname>Clerc</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2009-id59810">
            <foreName>Alexandra</foreName>
            <surname>Carpentier</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2008-id18278">
            <foreName>Emmanuel</foreName>
            <surname>Daucé</surname>
            <initial>E.</initial>
          </persName>
          <persName key="athena-2011-idp140624350983856">
            <foreName>Dieter</foreName>
            <surname>Devlaminck</surname>
            <initial>D.</initial>
          </persName>
          <persName key="sequel-2006-id18109">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">IEEE/EMBS 6th international conference on neural engineering (2013)</title>
        <loc>San Diego, United States</loc>
        <imprint>
          <publisher>
            <orgName type="organisation">IEEE/EMBS</orgName>
          </publisher>
          <dateStruct>
            <month>November</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00907781" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00907781</ref>
        </imprint>
        <meeting id="cid550682">
          <title>International IEEE/EMBS Conference on Neural Engineering</title>
          <num>6</num>
          <abbr type="sigle">NER</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid25" type="inproceedings" rend="year" n="cite:valko:hal-00789606">
      <identifiant type="hal" value="hal-00789606"/>
      <analytic>
        <title level="a">Stochastic Simultaneous Optimistic Optimization</title>
        <author>
          <persName key="sequel-2012-idp140352027421952">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2009-id59810">
            <foreName>Alexandra</foreName>
            <surname>Carpentier</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2006-id18109">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">30th International Conference on Machine Learning</title>
        <loc>Atlanta, United States</loc>
        <imprint>
          <dateStruct>
            <month>February</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00789606" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00789606</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>30</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid28" type="inproceedings" rend="year" n="cite:valko:hal-00826946">
      <identifiant type="hal" value="hal-00826946"/>
      <analytic>
        <title level="a">Finite-Time Analysis of Kernelised Contextual Bandits</title>
        <author>
          <persName key="sequel-2012-idp140352027421952">
            <foreName>Michal</foreName>
            <surname>Valko</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2012-idp140298916026800">
            <foreName>Nathan</foreName>
            <surname>Korda</surname>
            <initial>N.</initial>
          </persName>
          <persName key="sequel-2006-id18109">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Ilias</foreName>
            <surname>Flaounas</surname>
            <initial>I.</initial>
          </persName>
          <persName>
            <foreName>Nello</foreName>
            <surname>Cristianini</surname>
            <initial>N.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">The 29th Conference on Uncertainty in Artificial Intelligence</title>
        <loc>Bellevue, United States</loc>
        <imprint>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00826946" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00826946</ref>
        </imprint>
        <meeting id="cid49628">
          <title>Conference on Uncertainty in Artificial Intelligence</title>
          <num>29</num>
          <abbr type="sigle">UAI</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid46" type="techreport" rend="year" n="cite:ghavamzadeh:hal-00776608">
      <identifiant type="hal" value="hal-00776608"/>
      <monogr>
        <title level="m">Bayesian Policy Gradient and Actor-Critic Algorithms</title>
        <author>
          <persName key="sequel-2008-id18222">
            <foreName>Mohammad</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Yaakov</foreName>
            <surname>Engel</surname>
            <initial>Y.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>January</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00776608" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00776608</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Technical Report</note>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid45" type="techreport" rend="year" n="cite:la:hal-00794721">
      <identifiant type="hal" value="hal-00794721"/>
      <monogr>
        <title level="m">Actor-Critic Algorithms for Risk-Sensitive MDPs</title>
        <author>
          <persName>
            <foreName>Prashanth</foreName>
            <surname>L.A.</surname>
            <initial>P.</initial>
          </persName>
          <persName key="sequel-2008-id18222">
            <foreName>Mohammad</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>February</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00794721" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00794721</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Technical Report</note>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid29" type="techreport" rend="year" n="cite:munos:hal-00747575">
      <identifiant type="hal" value="hal-00747575"/>
      <monogr>
        <title level="m">From Bandits to Monte-Carlo Tree Search: The Optimistic Principle Applied to Optimization and Planning</title>
        <author>
          <persName key="sequel-2006-id18109">
            <foreName>Rémi</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/hal-00747575" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00747575</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Report</note>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid7" type="article" rend="foot" n="footcite:Aueretal2002">
      <analytic>
        <title level="a">Finite-time analysis of the multi-armed bandit problem</title>
        <author>
          <persName>
            <foreName>Peter</foreName>
            <surname>Auer</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>N.</foreName>
            <surname>Cesa-Bianchi</surname>
            <initial>N.</initial>
          </persName>
          <persName key="mc2-2005-id18251">
            <foreName>P.</foreName>
            <surname>Fischer</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Machine Learning</title>
        <imprint>
          <biblScope type="volume">47</biblScope>
          <biblScope type="number">2/3</biblScope>
          <dateStruct>
            <year>2002</year>
          </dateStruct>
          <biblScope type="pages">235–256</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid3" type="book" rend="foot" n="footcite:bellman">
      <monogr>
        <title level="m">Dynamic Programming</title>
        <author>
          <persName>
            <foreName>R.</foreName>
            <surname>Bellman</surname>
            <initial>R.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Princeton University Press</orgName>
          </publisher>
          <dateStruct>
            <year>1957</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid4" type="book" rend="foot" n="footcite:bertshreve78">
      <monogr>
        <title level="m">Stochastic Optimal Control (The Discrete Time Case)</title>
        <author>
          <persName>
            <foreName>D.P.</foreName>
            <surname>Bertsekas</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>S.E.</foreName>
            <surname>Shreve</surname>
            <initial>S.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Academic Press, New York</orgName>
          </publisher>
          <dateStruct>
            <year>1978</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid2" type="book" rend="foot" n="footcite:Bertsekas96">
      <monogr>
        <title level="m">Neuro-Dynamic Programming</title>
        <author>
          <persName>
            <foreName>D.P.</foreName>
            <surname>Bertsekas</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Tsitsiklis</surname>
            <initial>J.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Athena Scientific</orgName>
          </publisher>
          <dateStruct>
            <year>1996</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid9" type="article" rend="foot" n="footcite:ferguson1973bas">
      <analytic>
        <title level="a">A Bayesian Analysis of Some Nonparametric Problems</title>
        <author>
          <persName>
            <foreName>T.S.</foreName>
            <surname>Ferguson</surname>
            <initial>T.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">The Annals of Statistics</title>
        <imprint>
          <biblScope type="volume">1</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <year>1973</year>
          </dateStruct>
          <biblScope type="pages">209–230</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid8" type="book" rend="foot" n="footcite:EltsStatLearning">
      <monogr>
        <title level="m">The elements of statistical learning — Data Mining, Inference, and Prediction</title>
        <author>
          <persName>
            <foreName>T.</foreName>
            <surname>Hastie</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>R.</foreName>
            <surname>Tibshirani</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Friedman</surname>
            <initial>J.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Springer</orgName>
          </publisher>
          <dateStruct>
            <year>2001</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid10" type="book" rend="foot" n="footcite:ADPpowell">
      <monogr>
        <title level="m">Approximate Dynamic Programming</title>
        <author>
          <persName>
            <foreName>W.</foreName>
            <surname>Powell</surname>
            <initial>W.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Wiley</orgName>
          </publisher>
          <dateStruct>
            <year>2007</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid1" type="book" rend="foot" n="footcite:puterman94">
      <monogr>
        <title level="m">Markov Decision Processes: Discrete Stochastic Dynamic Programming</title>
        <author>
          <persName>
            <foreName>M.L.</foreName>
            <surname>Puterman</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>John Wiley and Sons</orgName>
          </publisher>
          <dateStruct>
            <year>1994</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid6" type="article" rend="foot" n="footcite:Robbins1952">
      <analytic>
        <title level="a">Some aspects of the sequential design of experiments</title>
        <author>
          <persName>
            <foreName>H.</foreName>
            <surname>Robbins</surname>
            <initial>H.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Bull. Amer. Math. Soc.</title>
        <imprint>
          <biblScope type="volume">55</biblScope>
          <dateStruct>
            <year>1952</year>
          </dateStruct>
          <biblScope type="pages">527–535</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid13" type="article" rend="foot" n="footcite:rustSSMedicare">
      <analytic>
        <title level="a">How Social Security and Medicare Affect Retirement Behavior in a World of Incomplete Market</title>
        <author>
          <persName>
            <foreName>J.</foreName>
            <surname>Rust</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Econometrica</title>
        <imprint>
          <biblScope type="volume">65</biblScope>
          <biblScope type="number">4</biblScope>
          <dateStruct>
            <month>July</month>
            <year>1997</year>
          </dateStruct>
          <biblScope type="pages">781–831</biblScope>
          <ref xlink:href="http://gemini.econ.umd.edu/jrust/research/rustphelan.pdf" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>gemini.<allowbreak/>econ.<allowbreak/>umd.<allowbreak/>edu/<allowbreak/>jrust/<allowbreak/>research/<allowbreak/>rustphelan.<allowbreak/>pdf</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid12" type="article" rend="foot" n="footcite:rustNuclearPlants">
      <analytic>
        <title level="a">On the Optimal Lifetime of Nuclear Power Plants</title>
        <author>
          <persName>
            <foreName>J.</foreName>
            <surname>Rust</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Journal of Business &amp; Economic Statistics</title>
        <imprint>
          <biblScope type="volume">15</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <year>1997</year>
          </dateStruct>
          <biblScope type="pages">195–208</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid0" type="book" rend="foot" n="footcite:sb">
      <monogr>
        <title level="m">Reinforcement learning: an introduction</title>
        <author>
          <persName>
            <foreName>R.S.</foreName>
            <surname>Sutton</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>A.G.</foreName>
            <surname>Barto</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>MIT Press</orgName>
          </publisher>
          <dateStruct>
            <year>1998</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid11" type="article" rend="foot" n="footcite:tdgammon">
      <analytic>
        <title level="a">Temporal Difference Learning and TD-Gammon</title>
        <author>
          <persName>
            <foreName>G.</foreName>
            <surname>Tesauro</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Communications of the ACM</title>
        <imprint>
          <biblScope type="volume">38</biblScope>
          <biblScope type="number">3</biblScope>
          <dateStruct>
            <month>March</month>
            <year>1995</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sequel-2013-bid5" type="inbook" rend="foot" n="footcite:werbosHandbookADP">
      <analytic>
        <author>
          <persName>
            <foreName>P.</foreName>
            <surname>Werbos</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">ADP: Goals, Opportunities and Principles</title>
        <imprint>
          <publisher>
            <orgName>IEEE Press</orgName>
          </publisher>
          <dateStruct>
            <year>2004</year>
          </dateStruct>
          <biblScope type="pages">3–44</biblScope>
        </imprint>
      </monogr>
      <note type="bnote">Handbook of learning and approximate dynamic programming</note>
    </biblStruct>
  </biblio>
</raweb>
