<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE raweb PUBLIC "-//INRIA//DTD " "raweb2.dtd">
<raweb xml:lang="en" year="2009">
  <identification id="sequel" isproject="true">
    <shortname>SequeL</shortname>
    <projectName>Sequential Learning</projectName>
    <domaine-de-recherche>Applied Mathematics, Computation and Simulation</domaine-de-recherche>
    <theme-de-recherche>Optimization, Learning and Statistical Methods</theme-de-recherche>
    <UR name="Lille"/>
    <moreinfo>
      <p><span class="smallcap" align="left">SequeL</span>is a joint project with the 
      <span class="smallcap" align="left">LIFL</span>(UMR 8022 of CNRS, and University of Lille 1, and University of Lille 3) and the 
      <span class="smallcap" align="left">LAGIS</span>(UMR 8021 of the École Centrale of Lille and the University of Lille 1).</p>
    </moreinfo>
  </identification>
  <team id="uid1">
    <person key="sequel-2006-idm391148966784">
      <firstname>Philippe</firstname>
      <lastname>Preux</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Team leader, Professor, Université de Lille, secondment at the INRIA until Aug. 31
      <sup>st</sup>, 2009</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2006-idm391148963008">
      <firstname>Rémi</firstname>
      <lastname>Munos</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Co-head, Research Director (DR), INRIA</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2007-idm332252538560">
      <firstname>Sandrine</firstname>
      <lastname>Catillon</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>Assistant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Secretary (SAR) INRIA, shared by 3 projects</moreinfo>
    </person>
    <person key="sequel-2008-idm80283914416">
      <firstname>Mohammad</firstname>
      <lastname>Ghavamzadeh</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Researcher (CR) INRIA</moreinfo>
    </person>
    <person key="sequel-2007-idm332252530992">
      <firstname>Daniil</firstname>
      <lastname>Ryabko</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Researcher (CR) INRIA</moreinfo>
    </person>
    <person key="sequel-2008-idm80283907440">
      <firstname>Emmanuel</firstname>
      <lastname>Daucé</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Assistant Professor, École Centrale de Marseille, partial secondment in 
      <span class="smallcap" align="left">SequeL</span>until Aug. 31
      <sup>st</sup>, 2009</moreinfo>
    </person>
    <person key="sequel-2006-idm391148952976">
      <firstname>Emmanuel</firstname>
      <lastname>Duflos</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Professor, École Centrale de Lille</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2006-idm391148949872">
      <firstname>Philippe</firstname>
      <lastname>Vanheeghe</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Professor, École Centrale de Lille</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sequel-2006-idm391148946784">
      <firstname>Rémi</firstname>
      <lastname>Coulom</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Assistant professor, Université de Lille 3</moreinfo>
    </person>
    <person key="sequel-2006-idm391148944048">
      <firstname>Jérémie</firstname>
      <lastname>Mary</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Assistant professor, Université de Lille 3</moreinfo>
    </person>
    <person key="sequel-2007-idm332252510144">
      <firstname>Sertan</firstname>
      <lastname>Girgin</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Région Nord-Pas de Calais, begins on Sep. 1
      <sup>st</sup>, 2009</moreinfo>
    </person>
    <person key="sequel-2008-idm80283886240">
      <firstname>Alessandro</firstname>
      <lastname>Lazaric</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>INRIA until Aug. 31
      <sup>st</sup>, then ANR</moreinfo>
    </person>
    <person key="sequel-2008-idm80283882784">
      <firstname>Hachem</firstname>
      <lastname>Kadri</lastname>
      <affiliation>CNRS</affiliation>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>CNRS until Aug. 31
      <sup>st</sup>, then Région Nord-Pas de Calais</moreinfo>
    </person>
    <person key="sequel-2007-idm332252484192">
      <firstname>Sébastien</firstname>
      <lastname>Bubeck</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>ENS Grant, since Oct., 2007</moreinfo>
    </person>
    <person key="sequel-2009-idm70241666752">
      <firstname>Alexandra</firstname>
      <lastname>Carpentier</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>ANR-Région Nord-Pas de Calais Grant, since Oct., 2009</moreinfo>
    </person>
    <person key="sequel-2006-idm391148940688">
      <firstname>Pierre-Arnaud</firstname>
      <lastname>Coquelin</lastname>
      <affiliation>AutreEtablissementPublic</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>École Polytechnique, since Oct., 2005, currently mostly CO of the start-up Vekia, he created in 2007</moreinfo>
    </person>
    <person key="sequel-2008-idm80283851664">
      <firstname>Emmanuel</firstname>
      <lastname>Delande</lastname>
      <affiliation>AutreEtablissementPublic</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>DGA, since Nov., 2008</moreinfo>
    </person>
    <person key="sequel-2009-idm70241657440">
      <firstname>Victor</firstname>
      <lastname>Gabillon</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>MENESR Grant, since Oct., 2009</moreinfo>
    </person>
    <person key="sequel-2007-idm332252490272">
      <firstname>Jean-François</firstname>
      <lastname>Hren</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>MENESR Grant, since Oct., 2007</moreinfo>
    </person>
    <person key="sequel-2006-idm391148938064">
      <firstname>Robin</firstname>
      <lastname>Jaulmes</lastname>
      <affiliation>AutreEtablissementPublic</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>DGA Grant, since Oct., 2006</moreinfo>
    </person>
    <person key="sequel-2006-idm391148935408">
      <firstname>Manuel</firstname>
      <lastname>Loth</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>INRIA-Région Nord-pas-de-calais Grant, since Oct., 2006</moreinfo>
    </person>
    <person key="sequel-2008-idm80283857760">
      <firstname>Odalric-Ambrym</firstname>
      <lastname>Maillard</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>ENS Grant, since Oct., 2008</moreinfo>
    </person>
    <person key="smis-2006-idm390881485360">
      <firstname>Christophe</firstname>
      <lastname>Salperwyck</lastname>
      <affiliation>EtablissementPrive</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>CIFRE with France Telecom Grant, since Dec., 2009</moreinfo>
    </person>
    <person key="sequel-2008-idm80283854720">
      <firstname>Nicolas</firstname>
      <lastname>Viandier</lastname>
      <affiliation>AutreEtablissementPublic</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>INRETS, since Oct., 2007</moreinfo>
    </person>
    <person key="sequel-2008-idm80283845136">
      <firstname>Tony</firstname>
      <lastname>Ducrocq</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>Technique</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Assistant Engineer, until Sep. 30
      <sup>th</sup>, 2009</moreinfo>
    </person>
    <person key="sequel-2009-idm70240660896">
      <firstname>Boris</firstname>
      <lastname>Iolis</lastname>
      <affiliation>UnivEtrangere</affiliation>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Master 1 internship, Université Libre de Bruxelles, Oct. to Dec. 2009</moreinfo>
    </person>
    <person key="sequel-2009-idm70241657440">
      <firstname>Victor</firstname>
      <lastname>Gabillon</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Master 2 internship, Telecom Sud-Paris and ENS-Cachan, Apr. to Sep. 2009</moreinfo>
    </person>
    <person key="sequel-2009-idm70240654624">
      <firstname>Victor</firstname>
      <lastname>Marsault</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Lille</research-centre>
      <moreinfo>Lience 3 internship, ENS-Cachan, Jun. to Jul. 2009</moreinfo>
    </person>
  </team>
  <presentation id="uid2">
    <bodyTitle>Overall Objectives</bodyTitle>
    <subsection id="uid3" level="1">
      <bodyTitle>Introduction</bodyTitle>
      <p><span class="smallcap" align="left">SequeL</span>means “Sequential Learning”. As such, 
      <span class="smallcap" align="left">SequeL</span>focuses on the task of learning in artificial systems (either hardware, or software) that gather information along time. Such systems are named 
      <i>(learning) agents</i>in the following
      <footnote id="uid4" id-text="1" place="foot" anchored="yes">we might also have called them “learning machines”, since that's what these agents are here.</footnote>. These data may be used to
      estimate some parameters of a model, which in turn, may be used for selecting actions in order to perform some long-term optimization task.</p>
      <p>For the purpose of model building, the agent needs to gather information collected so far in some compact representation and combine it to newly available data.</p>
      <p>The acquired data may result from an observation process of an agent in interaction with its environment (the data thus represent a perception). This is the case when the agent makes
      decisions (in order to fulfill a certain goal) that impact the environment thus the observation process itself.</p>
      <p>Hence, in 
      <span class="smallcap" align="left">SequeL</span>, the term 
      <b>sequential</b>refers to two aspects:</p>
      <simplelist>
        <li id="uid5">
          <p noindent="true">The 
          <b>sequential acquisition of data</b>, from which a model is learned (supervised and non supervised learning),</p>
        </li>
        <li id="uid6">
          <p noindent="true">the 
          <b>sequential decision making task</b>, based on the learned model (reinforcement learning).</p>
        </li>
      </simplelist>
      <p>We exemplify these various problems:</p>
      <descriptionlist>
        <label>Supervised learning</label>
        <li id="uid7">
          <p noindent="true">tasks deal with the prediction of some response given a certain set of observations of input variables and responses. New sample points keep on being observed.</p>
        </li>
        <label>Unsupervised learning</label>
        <li id="uid8">
          <p noindent="true">tasks deal with clustering objects, these latter making a flow of objects. The (unknown) number of clusters typically evolves during time, as new objects are
          observed.</p>
        </li>
        <label>Reinforcement learning</label>
        <li id="uid9">
          <p noindent="true">tasks deal with the control (a policy) of some system which has to be optimized (see 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid0" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>). We do not assume the availability of a model of the system to
          be controlled.</p>
        </li>
      </descriptionlist>
      <p>In all these cases, we assume that the process can be considered stationary for at least a certain amount of time, and slowly evolving.</p>
      <p>We wish to have any-time algorithms, that is, at any moment, a prediction may be required/an action may be selected making full use, and hopefully, the best use, of the experience already
      gathered by the learning agent.</p>
      <p>The perception of the environment by the learning agent (using its sensors) is generally neither the best one to make a prediction, nor to take a decision (we deal with Partially Observable
      Markov Decision Problem). So, the perception has to be mapped in some way to a better, and relevant, state (or input) space.</p>
      <p>Finally, an important issue of prediction regards its evaluation: how wrong may we be when we perform a prediction? For real systems to be controlled, this issue can not be simply left
      unanswered.</p>
      <p spacebefore="6.0pt">To sum-up, in 
      <span class="smallcap" align="left">SequeL</span>, the main issues regard:</p>
      <simplelist>
        <li id="uid10">
          <p noindent="true">the learning of a model: we focus on models than map some input space 
          <span class="math"><img align="bottom" width="18" height="12" src="math_image_1.png" xylemeAttach="1" border="0" alt="Im1 $\#8477 ^P$"/></span>to 
          <span class="math"><img align="bottom" width="10" height="10" src="math_image_2.png" xylemeAttach="2" border="0" alt="Im2 $\#8477 $"/></span>,</p>
        </li>
        <li id="uid11">
          <p noindent="true">the observation to state mapping,</p>
        </li>
        <li id="uid12">
          <p noindent="true">the choice of the action to perform (in the case of sequential decision problem),</p>
        </li>
        <li id="uid13">
          <p noindent="true">the bounding of the performance,</p>
        </li>
        <li id="uid14">
          <p noindent="true">the implementation of usable algorithms,</p>
        </li>
      </simplelist>
      <p>all that being understood in a 
      <i>sequential</i>framework.</p>
    </subsection>
    <subsection id="uid15" level="1">
      <bodyTitle>Highlight of the year</bodyTitle>
      <p>In 2009, we would like to highlight the fact that we have obtained several contracts with private societies, either directly or via a “Pôle de compétitivité”, as well as academic contracts
      (ANR, Europe). This is really a strong increase of the contracted part of the activities of 
      <span class="smallcap" align="left">SequeL</span>, and this increase corresponds to a desire to investigate applications lying at the edge of our research activities, and also to help promote
      the machine learning technology in solving real problems. Other contracts should be negotiated in 2010 with private societies in particular.</p>
    </subsection>
  </presentation>
  <fondements id="uid16">
    <bodyTitle>Scientific Foundations</bodyTitle>
    <subsection id="uid17" level="1">
      <bodyTitle>Introduction</bodyTitle>
      <p><span class="smallcap" align="left">SequeL</span>is primarily grounded on two domains:</p>
      <simplelist>
        <li id="uid18">
          <p noindent="true">the problem of decision under uncertainty,</p>
        </li>
        <li id="uid19">
          <p noindent="true">statistical learning which provides the general concepts and tools to solve this problem.</p>
        </li>
      </simplelist>
      <p>To help the reader who is unfamiliar with these questions, we briefly present key ideas below.</p>
    </subsection>
    <subsection id="uid20" level="1">
      <bodyTitle>Decision under uncertainty</bodyTitle>
      <p>The phrase “Decision under uncertainty” refers to the problem of taking decisions when we do not have a full knowledge neither of the situation, nor of the consequences of the decisions, as
      well as when the consequences of decision are non deterministic.</p>
      <p>We introduce two specific sub-domains, namely the Markov decision processes which models sequential decision problems, and bandit problems.</p>
      <subsection id="uid21" level="2">
        <bodyTitle>Markov decision processes</bodyTitle>
        <p>Sequential decision processes occupy the heart of the 
        <span class="smallcap" align="left">SequeL</span>project; a detailed presentation of this problem may be found in Puterman's book 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid1" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        <p>A Markov Decision Process (MDP) is defined as the tuple 
        <span class="math"><img align="middle" width="64" height="13" src="math_image_3.png" xylemeAttach="3" border="0" alt="Im3 ${(\#119987 ,\#119964 ,P,r)}$"/></span>where 
        <span class="math"><img align="bottom" width="10" height="10" src="math_image_4.png" xylemeAttach="4" border="0" alt="Im4 $\#119987 $"/></span>is the state space, 
        <span class="math"><img align="bottom" width="10" height="11" src="math_image_5.png" xylemeAttach="5" border="0" alt="Im5 $\#119964 $"/></span>is the action space, 
        <span class="math"><hi rend="it">P</hi></span>is the probabilistic transition kernel, and 
        <span class="math"><img align="middle" width="118" height="11" src="math_image_6.png" xylemeAttach="6" border="0" alt="Im6 ${r:\#119987 ×\#119964 ×\#119987 \#8594 I~~R}$"/></span>is the reward function. For the sake of simplicity, we assume in this introduction that the state and action spaces are finite. If the current state (at time 
        <span class="math"><hi rend="it">t</hi></span>) is 
        <span class="math"><img align="middle" width="35" height="11" src="math_image_7.png" xylemeAttach="7" border="0" alt="Im7 ${x\#8712 \#119987 }$"/></span>and the chosen action is 
        <span class="math"><img align="middle" width="35" height="11" src="math_image_8.png" xylemeAttach="8" border="0" alt="Im8 ${a\#8712 \#119964 }$"/></span>, then the Markov assumption means that the transition probability to a new state 
        <span class="math"><img align="middle" width="39" height="11" src="math_image_9.png" xylemeAttach="9" border="0" alt="Im9 ${x^'\#8712 \#119987 }$"/></span>(at time 
        <span class="math"><hi rend="it">t</hi>+ 1</span>) only depends on 
        <span class="math">(
        <hi rend="it">x</hi>, 
        <hi rend="it">a</hi>)</span>. We write 
        <span class="math"><hi rend="it">p</hi>(
        <hi rend="it">x</hi><sup>'</sup>|
        <hi rend="it">x</hi>, 
        <hi rend="it">a</hi>)</span>the corresponding transition probability. During a transition 
        <span class="math">(
        <hi rend="it">x</hi>, 
        <hi rend="it">a</hi>)
        <img width="17" height="12" align="bottom" border="0" src="../../images/img_other_rightarrow.png" alt="$ \rightarrow$"/>
        <hi rend="it">x</hi>
        <sup>'</sup></span>, a reward 
        <span class="math"><hi rend="it">r</hi>(
        <hi rend="it">x</hi>, 
        <hi rend="it">a</hi>, 
        <hi rend="it">x</hi><sup>'</sup>)</span>is incurred.</p>
        <p>In the MDP (
        <span class="math"><img align="middle" width="59" height="13" src="math_image_10.png" xylemeAttach="10" border="0" alt="Im10 ${\#119987 ,\#119964 ,P,r)}$"/></span>, each initial state 
        <span class="math"><hi rend="it">x</hi><sub>0</sub></span>and action sequence 
        <span class="math"><hi rend="it">a</hi><sub>0</sub>, 
        <hi rend="it">a</hi><sub>1</sub>, ...</span>gives rise to a sequence of states 
        <span class="math"><hi rend="it">x</hi><sub>1</sub>, 
        <hi rend="it">x</hi><sub>2</sub>, ...</span>, satisfying 
        <span class="math"><img align="middle" width="244" height="13" src="math_image_11.png" xylemeAttach="11" border="0" alt="Im11 ${\#8473 \mfenced o=( c=) x_{t+1}=x^'{|}x_t=x,a_t=a=p{(x^'|x,a)},}$"/></span>and rewards
        <footnote id="uid22" id-text="2" place="foot" anchored="yes">Note that for simplicity, we considered the case of a deterministic reward function, but in many applications, the reward 
        <span class="math"><hi rend="it">r</hi><sub><hi rend="it">t</hi></sub></span>itself is a random variable.</footnote>
        <span class="math"><hi rend="it">r</hi><sub>1</sub>, 
        <hi rend="it">r</hi><sub>2</sub>, ...</span>defined by 
        <span class="math"><hi rend="it">r</hi><sub><hi rend="it">t</hi></sub>= 
        <hi rend="it">r</hi>(
        <hi rend="it">x</hi><sub><hi rend="it">t</hi></sub>, 
        <hi rend="it">a</hi><sub><hi rend="it">t</hi></sub>, 
        <hi rend="it">x</hi><sub><hi rend="it">t</hi>+ 1</sub>)</span>.</p>
        <p>The history of the process up to time 
        <span class="math"><hi rend="it">t</hi></span>is defined to be 
        <span class="math"><hi rend="it">H</hi><sub><hi rend="it">t</hi></sub>= (
        <hi rend="it">x</hi><sub>0</sub>, 
        <hi rend="it">a</hi><sub>0</sub>, ..., 
        <hi rend="it">x</hi><sub><hi rend="it">t</hi>-1</sub>, 
        <hi rend="it">a</hi><sub><hi rend="it">t</hi>-1</sub>, 
        <hi rend="it">x</hi><sub><hi rend="it">t</hi></sub>)</span>. A policy 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/></span>is a sequence of functions 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/><sub>0</sub>, 
        <img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/><sub>1</sub>, ...</span>, where 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/><sub><hi rend="it">t</hi></sub></span>maps the space of possible histories at time 
        <span class="math"><hi rend="it">t</hi></span>to the space of probability distributions over the space of actions 
        <span class="math"><img align="bottom" width="10" height="11" src="math_image_5.png" xylemeAttach="5" border="0" alt="Im5 $\#119964 $"/></span>. To follow a policy means that, in each time step, we assume that the process history up to time 
        <span class="math"><hi rend="it">t</hi></span>is 
        <span class="math"><hi rend="it">x</hi><sub>0</sub>, 
        <hi rend="it">a</hi><sub>0</sub>, ..., 
        <hi rend="it">x</hi><sub><hi rend="it">t</hi></sub></span>and the probability of selecting an action 
        <span class="math"><hi rend="it">a</hi></span>is equal to 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/><sub><hi rend="it">t</hi></sub>(
        <hi rend="it">x</hi><sub>0</sub>, 
        <hi rend="it">a</hi><sub>0</sub>, ..., 
        <hi rend="it">x</hi><sub><hi rend="it">t</hi></sub>)(
        <hi rend="it">a</hi>)</span>. A policy is called stationary (or Markovian) if 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/><sub><hi rend="it">t</hi></sub></span>depends only on the last visited state. In other words, a policy 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/>= (
        <img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/><sub>0</sub>, 
        <img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/><sub>1</sub>, ...)</span>is called stationary if 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/><sub><hi rend="it">t</hi></sub>(
        <hi rend="it">x</hi><sub>0</sub>, 
        <hi rend="it">a</hi><sub>0</sub>, ..., 
        <hi rend="it">x</hi><sub><hi rend="it">t</hi></sub>) = 
        <img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/><sub>0</sub>(
        <hi rend="it">x</hi><sub><hi rend="it">t</hi></sub>)</span>holds for all 
        <span class="math"><hi rend="it">t</hi><img width="14" height="24" align="middle" border="0" src="../../images/img_other_ge.png" alt="$ \ge$"/>0</span>. A policy is called deterministic if the probability distribution prescribed
        by the policy for any history is concentrated on a single action. Otherwise it is called a stochastic policy.</p>
        <p>We move from an MD process to an MD problem by formulating the goal of the agent, that is what the sought policy 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/></span>has to optimize? It is very often formulated as maximizing (or minimizing), in expectation, some functional of the sequence of future rewards. For example, an usual functional is the
        infinite-time horizon sum of discounted rewards. For a given (stationary) policy 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/></span>, we define the value function 
        <span class="math"><img align="middle" width="36" height="13" src="math_image_12.png" xylemeAttach="12" border="0" alt="Im12 ${V^\#960 {(x)}}$"/></span>of that policy 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/></span>at a state 
        <span class="math"><img align="middle" width="35" height="11" src="math_image_7.png" xylemeAttach="7" border="0" alt="Im7 ${x\#8712 \#119987 }$"/></span>as the expected sum of discounted future rewards given that we state from the initial state 
        <span class="math"><hi rend="it">x</hi></span>and follow the policy 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/></span>:</p>
        <p>
          <formula type="display" id="uid23">
            <img align="middle" width="192" height="34" src="math_image_13.png" xylemeAttach="13" border="0" alt="Im13 ${V^\#960 {(x)}=\#120124 \mfenced o=[ c=] \munderover \#8721 {t=0}\#8734 \#947 ^tr_t|x_0=x,\#960 ,}$"/>
          </formula>
        </p>
        <p>where 
        <span class="math"><img align="bottom" width="9" height="10" src="math_image_14.png" xylemeAttach="14" border="0" alt="Im14 $\#120124 $"/></span>is the expectation operator and 
        <span class="math"><img width="11" height="24" align="middle" border="0" src="../../images/img_gamma.png" alt="$ \gamma$"/><img width="13" height="24" align="middle" border="0" src="../../images/img_other_in.png" alt="$ \in$"/>(0, 1)</span>is the discount factor. This value function 
        <span class="math"><img align="bottom" width="18" height="10" src="math_image_15.png" xylemeAttach="15" border="0" alt="Im15 $V^\#960 $"/></span>gives an evaluation of the performance of a given policy 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/></span>. Other functionals of the sequence of future rewards may be considered, such as the undiscounted reward (see the stochastic shortest path problems 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid2" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) and average reward settings. Note also that, here, we considered
        the problem of maximizing a reward functional, but a formulation in terms of minimizing some cost or risk functional would be equivalent.</p>
        <p>In order to maximize a given functional in a sequential framework, one usually applies Dynamic Programming (DP)  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid3" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, which introduces the optimal value function 
        <span class="math"><hi rend="it">V</hi><sup>*</sup>(
        <hi rend="it">x</hi>)</span>, defined as the optimal expected sum of rewards when the agent starts from a state 
        <span class="math"><hi rend="it">x</hi></span>. We have 
        <span class="math"><img align="middle" width="122" height="14" src="math_image_16.png" xylemeAttach="16" border="0" alt="Im16 ${V^*{(x)}=sup_\#960 V^\#960 {(x)}}$"/></span>. Now, let us give two definitions about policies:</p>
        <simplelist>
          <li id="uid24">
            <p noindent="true">We say that a policy 
            <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/></span>is optimal, if it attains the optimal values 
            <span class="math"><hi rend="it">V</hi><sup>*</sup>(
            <hi rend="it">x</hi>)</span>for any state 
            <span class="math"><img align="middle" width="35" height="11" src="math_image_7.png" xylemeAttach="7" border="0" alt="Im7 ${x\#8712 \#119987 }$"/></span>, 
            <i>i.e.</i>, if 
            <span class="math"><img align="middle" width="91" height="13" src="math_image_17.png" xylemeAttach="17" border="0" alt="Im17 ${V^\#960 {(x)}=V^*{(x)}}$"/></span>for all 
            <span class="math"><img align="middle" width="35" height="11" src="math_image_7.png" xylemeAttach="7" border="0" alt="Im7 ${x\#8712 \#119987 }$"/></span>. Under mild conditions, deterministic stationary optimal policies exist 
            <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid4" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. Such an optimal policy is written 
            <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/><sup>*</sup></span>.</p>
          </li>
          <li id="uid25">
            <p noindent="true">We say that a (deterministic stationary) policy 
            <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/></span>is greedy with respect to (w.r.t.) some function 
            <span class="math"><hi rend="it">V</hi></span>(defined on 
            <span class="math"><img align="bottom" width="10" height="10" src="math_image_4.png" xylemeAttach="4" border="0" alt="Im4 $\#119987 $"/></span>) if, for all 
            <span class="math"><img align="middle" width="35" height="11" src="math_image_7.png" xylemeAttach="7" border="0" alt="Im7 ${x\#8712 \#119987 }$"/></span>,</p>
            <p>
              <formula type="display">
                <img align="middle" width="305" height="25" src="math_image_18.png" xylemeAttach="18" border="0" alt="Im18 ${\#960 {(x)}\#8712 arg\munder max{a\#8712 \#119964 }\munder \#8721 {x^'\#8712 \#119987 }p{(x^'|x,a)}\mfenced o=[ c=] r{(x,a,x^')}+\#947 V{(x^')}.}$"/>
              </formula>
            </p>
            <p noindent="true">where 
            <span class="math"><img align="middle" width="99" height="13" src="math_image_19.png" xylemeAttach="19" border="0" alt="Im19 ${argmax_{a\#8712 \#119964 }f{(a)}}$"/></span>is the set of 
            <span class="math"><img align="middle" width="35" height="11" src="math_image_8.png" xylemeAttach="8" border="0" alt="Im8 ${a\#8712 \#119964 }$"/></span>that maximizes 
            <span class="math"><hi rend="it">f</hi>(
            <hi rend="it">a</hi>)</span>. For any function 
            <span class="math"><hi rend="it">V</hi></span>, such a greedy policy always exists because 
            <span class="math"><img align="bottom" width="10" height="11" src="math_image_5.png" xylemeAttach="5" border="0" alt="Im5 $\#119964 $"/></span>is finite.</p>
          </li>
        </simplelist>
        <p>The goal of Reinforcement Learning (RL), as well as that of dynamic programming, is to design an optimal policy (or a good approximation of it).</p>
        <p spacebefore="6.0pt">The well-known Dynamic Programming equation (also called the Bellman equation) provides a relation between the optimal value function at a state 
        <span class="math"><hi rend="it">x</hi></span>and the optimal value function at the successors states 
        <span class="math"><hi rend="it">x</hi><sup>'</sup></span>when choosing an optimal action: for all 
        <span class="math"><img align="middle" width="35" height="11" src="math_image_7.png" xylemeAttach="7" border="0" alt="Im7 ${x\#8712 \#119987 }$"/></span>,</p>
        <p>
          <formula type="display" id="uid26">
            <img align="middle" width="300" height="25" src="math_image_20.png" xylemeAttach="20" border="0" alt="Im20 ${V^*{(x)}=\munder max{a\#8712 \#119964 }\munder \#8721 {x^'\#8712 \#119987 }p{(x^'|x,a)}\mfenced o=[ c=] r{(x,a,x^')}+\#947 V^*{(x^')}.}$"/>
          </formula>
        </p>
        <p>The benefit of introducing this concept of optimal value function relies on the property that, from the optimal value function 
        <span class="math"><hi rend="it">V</hi><sup>*</sup></span>, it is easy to derive an optimal behavior by choosing the actions according to a policy greedy w.r.t. 
        <span class="math"><hi rend="it">V</hi><sup>*</sup></span>. Indeed, we have the property that a policy greedy w.r.t. the optimal value function is an optimal policy:</p>
        <p>
          <formula type="display" id="uid27">
            <img align="middle" width="318" height="25" src="math_image_21.png" xylemeAttach="21" border="0" alt="Im21 ${\#960 ^*{(x)}\#8712 arg\munder max{a\#8712 \#119964 }\munder \#8721 {x^'\#8712 \#119987 }p{(x^'|x,a)}\mfenced o=[ c=] r{(x,a,x^')}+\#947 V^*{(x^')}.}$"/>
          </formula>
        </p>
        <p>In short, we would like to mention that most of the reinforcement learning methods developed so far are built on one (or both) of the two following approaches ( 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid5" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>):</p>
        <simplelist>
          <li id="uid28">
            <p noindent="true">Bellman's dynamic programming approach, based on the introduction of the value function. It consists in learning a “good” approximation of the optimal value function,
            and then using it to derive a greedy policy w.r.t. this approximation. The hope (well justified in several cases) is that the performance 
            <span class="math"><img align="bottom" width="18" height="10" src="math_image_15.png" xylemeAttach="15" border="0" alt="Im15 $V^\#960 $"/></span>of the policy 
            <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/></span>greedy w.r.t. an approximation 
            <span class="math"><hi rend="it">V</hi></span>of 
            <span class="math"><hi rend="it">V</hi><sup>*</sup></span>will be close to optimality. This approximation issue of the optimal value function is one of the major challenge inherent to the reinforcement learning problem. 
            <b>Approximate dynamic programming</b>addresses the problem of estimating performance bounds (
            <i>e.g.</i>the loss in performance 
            <span class="math"><img align="middle" width="65" height="13" src="math_image_22.png" xylemeAttach="22" border="0" alt="Im22 ${{||}V^*-V^\#960 {||}}$"/></span>resulting from using a policy 
            <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_pi.png" alt="$ \pi$"/></span>-greedy w.r.t. some approximation 
            <span class="math"><hi rend="it">V</hi></span>- instead of an optimal policy) in terms of the approximation error 
            <span class="math">||
            <hi rend="it">V</hi>
            <sup>*</sup>-
            <hi rend="it">V</hi>||</span>of the optimal value function 
            <span class="math"><hi rend="it">V</hi><sup>*</sup></span>by 
            <span class="math"><hi rend="it">V</hi></span>. Approximation theory and Statistical Learning theory provide us with bounds in terms of the number of sample data used to represent the functions, and the capacity and
            approximation power of the considered function spaces.</p>
          </li>
          <li id="uid29">
            <p noindent="true">Pontryagin's maximum principle approach, based on sensitivity analysis of the performance measure w.r.t. some control parameters. This approach, also called 
            <b>direct policy search</b>in the Reinforcement Learning community aims at directly finding a good feedback control law in a parameterized policy space without trying to approximate the
            value function. The method consists in estimating the so-called 
            <b>policy gradient</b>, 
            <i>i.e.</i>the sensitivity of the performance measure (the value function) w.r.t. some parameters of the current policy. The idea being that an optimal control problem is replaced by a
            parametric optimization problem in the space of parameterized policies. As such, deriving a policy gradient estimate would lead to performing a stochastic gradient method in order to
            search for a local optimal parametric policy.</p>
          </li>
        </simplelist>
        <p>Finally, many extensions of the Markov decision processes exist, among which the Partially Observable MDPs (POMDPs) is the case where the current state does not contain all the necessary
        information required to decide for sure of the best action.</p>
      </subsection>
      <subsection id="uid30" level="2">
        <bodyTitle>Bandits</bodyTitle>
        <p>Bandit problems illustrate the fundamental difficulty of decision making in the face of uncertainty: A decision maker must choose between what seems to be the best choice (“exploit”), or
        to test (“explore”) some alternative, hoping to discover a choice that beats the current best choice.</p>
        <p>The classical example of a bandit problem is deciding what treatment to give each patient in a clinical trial when the effectiveness of the treatments are initially unknown and the
        patients arrive sequentially. These bandit problems became popular with the seminal paper 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid6" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, after which they have found applications in diverse fields, such
        as control, economics, statistics, or learning theory.</p>
        <p>Formally, a K-armed bandit problem (
        <span class="math"><hi rend="it">K</hi><img width="14" height="24" align="middle" border="0" src="../../images/img_other_ge.png" alt="$ \ge$"/>2</span>) is specified by K real-valued distributions. In each time step a decision
        maker can select one of the distributions to obtain a sample from it. The samples obtained are considered as rewards. The distributions are initially unknown to the decision maker, whose goal
        is to maximize the sum of the rewards received, or equivalently, to minimize the regret which is defined as the loss compared to the total payoff that can be achieved given full knowledge of
        the problem, 
        <i>i.e.</i>, when the arm giving the highest expected reward is pulled all the time.</p>
        <p>The name “bandit” comes from imagining a gambler playing with K slot machines. The gambler can pull the arm of any of the machines, which produces a random payoff as a result: When arm k
        is pulled, the random payoff is drawn from the distribution associated to k. Since the payoff distributions are initially unknown, the gambler must use exploratory actions to learn the
        utility of the individual arms. However, exploration has to be carefully controlled since excessive exploration may lead to unnecessary losses. Hence, to play well, the gambler must carefully
        balance exploration and exploitation.</p>
        <p>Recently, Auer 
        <i>et al.</i>
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid7" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>introduced the algorithm UCB (Upper Confidence Bounds) that follows
        what is now called the “optimism in the face of uncertainty principle”. Their algorithm works by computing upper confidence bounds for all the arms and then choosing the arm with the highest
        such bound. They proved that the expected regret of their algorithm increases at most at a logarithmic rate with the number of trials, and that the algorithm achieves the smallest possible
        regret up to some sub-logarithmic factor (for the considered family of distributions).</p>
      </subsection>
    </subsection>
    <subsection id="uid31" level="1">
      <bodyTitle>Statistical learning</bodyTitle>
      <p>Before detailing some issues of statistical learning, let us remind the definition of a few terms.</p>
      <glosslist>
        <label>Machine learning</label>
        <li>
          <p>refers to a system capable of the autonomous acquisition and integration of knowledge. This capacity to learn from experience, analytical observation, and other means, results in a
          system that can continuously self-improve and thereby offer increased efficiency and effectiveness. (source: 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.aaai.org/AITopics/html/machine.html" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">AAAI website</ref>)</p>
        </li>
        <label>Statistical learning</label>
        <li>
          <p>is an approach to machine intelligence which is based on statistical modeling of data. With a statistical model in hand, one applies probability theory and decision theory to get an
          algorithm. This is opposed to using training data merely to select among different algorithms or using heuristics/“common sense” to design an algorithm. (source: 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.cs.wisc.edu/~hzhang/glossary.html" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://
          <allowbreak/>www.
          <allowbreak/>cs.
          <allowbreak/>wisc.
          <allowbreak/>edu/
          <allowbreak/>~hzhang/
          <allowbreak/>glossary.
          <allowbreak/>html</ref>)</p>
        </li>
        <label>Kernel method</label>
        <li>
          <p>Generally speaking, a kernel function is a function that maps a couple of points to a real value. Typically, this value is a measure of dissimilarity between the two points. Assuming a
          few properties on it, the kernel function implicitly defines a dot product in some function space. This very nice formal property as well as a bunch of others have ensured a strong appeal
          for these methods in the last 10 years in the field of function approximation. Many classical algorithms have been “kernelized”, that is, restated in a much more general way than their
          original formulation. Kernels also implicitly induce the representation of data in a certain “suitable” space where the problem to solve (classification, regression, ...) is expected to be
          simpler (non-linearity turns to linearity).</p>
        </li>
      </glosslist>
      <p>The fundamental tools used in 
      <span class="smallcap" align="left">SequeL</span>come from the field of statistical learning 
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid8" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. We briefly present the most important for us to date, namely,
      kernel-based non parametric function approximation, and non parametric Bayesian models.</p>
      <subsection id="uid32" level="2">
        <bodyTitle>Kernel methods for non parametric function approximation</bodyTitle>
        <p>In statistics in general, and applied mathematics, the approximation of a multi-dimensional real function given some samples is a well-known problem (known as either regression, or
        interpolation, or function approximation, ...). Regressing a function from data is a key ingredient of our research, or to the least, a basic component of most of our algorithms. In the
        context of sequential learning, we have to regress a function while data samples are being obtained one at a time, while keeping the constraint to be able to predict points at any step along
        the acquisition process. In sequential decision problems, we typically have to learn a value function, or a policy.</p>
        <p>Many methods have been proposed for this purpose. We are looking for suitable ones to cope with the problems we wish to solve. In reinforcement learning, the value function may have areas
        where the gradient is large; these are areas where the approximation is difficult, while these are also the areas where the accuracy of the approximation should be maximal to obtain a good
        policy (and where, otherwise, a bad choice of action may imply catastrophic consequences).</p>
        <p>We particularly favor non parametric methods since they make quite a few assumptions about the function to learn. In particular, we have strong interests in 
        <span class="math"><hi rend="it">l</hi><sub>1</sub></span>-regularization, and the (kernelized-)LARS algorithm. 
        <span class="math"><hi rend="it">l</hi><sub>1</sub></span>-regularization yields sparse solutions, and the LARS approach produces the whole regularization path very efficiently, which helps solving the regularization parameter tuning
        problem.</p>
      </subsection>
      <subsection id="uid33" level="2">
        <bodyTitle>Non parametric Bayesian models</bodyTitle>
        <p>Numerous problems in signal processing may be solved efficiently by way of a Bayesian approach. The use of Monte-Carlo methods lets us handle non linear, as well as non Gaussian problems.
        In their standard form, they require the formulation of densities of probability in their parametric form. For instance, it is a common usage to use Gaussian likelihood, because it is
        handy.</p>
        <p>However, in some applications such as Bayesian filtering, or blind deconvolution, the choice of a parametric form of the density of the noise is often arbitrary. If this choice is wrong,
        it may also have dramatic consequences on the estimation.</p>
        <p>To overcome this shortcoming, non parametric methods provide another approach to this problem. In particular, mixtures of Dirichlet processes 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid9" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>provide a very powerful formalism.</p>
        <p>Mixtures of Dirichlet Processes are an extension of finite mixture models. Given a mixture density 
        <span class="math"><img align="middle" width="37" height="13" src="math_image_23.png" xylemeAttach="23" border="0" alt="Im23 ${f(\#119857 |\#952 )}$"/></span>, and 
        <span class="math"><img align="middle" width="150" height="16" src="math_image_24.png" xylemeAttach="24" border="0" alt="Im24 ${\#120230 {(d\#952 )}=\#8721 _{k=1}^\#8734 \#969 _k\#948 _U_k{(d\#952 )}}$"/></span>, a Dirichlet process 
        <footnote id="uid34" id-text="3" place="foot" anchored="yes">A Dirichlet process is a random distribution almost surely discrete, where the centroids 
        <span class="math"><hi rend="it">U</hi><sub><hi rend="it">k</hi></sub></span>are distributed along a 
        <i>base distribution</i>
        <span class="math"><img align="middle" width="28" height="13" src="math_image_25.png" xylemeAttach="25" border="0" alt="Im25 ${\#120230 _0{(·)}}$"/></span>, and where weights follow a certain 
        <i>stick breaking</i>law with parameter 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_alpha.png" alt="$ \alpha$"/></span>
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</footnote>, then we define a mixture of Dirichlet processes
        as:</p>
        <p>
          <formula type="display" id="uid35">
            <img align="middle" width="255" height="31" src="math_image_26.png" xylemeAttach="26" border="0" alt="Im26 ${\#120229 {(\#119857 )}~=~\#8747 _\#920 f{(\#119857 |\#952 )}\#120230 {(d\#952 )}~=~\munderover \#8721 {k=1}\#8734 \#969 _kf{(\#119857 |U_k)}}$"/>
          </formula>
        </p>
        <p>A mixture of Dirichlet processes is fully parameterized by the mixture density, as well as the parameters of 
        <span class="math"><img align="bottom" width="7" height="10" src="math_image_27.png" xylemeAttach="27" border="0" alt="Im27 $\#120230 $"/></span>, that is 
        <span class="math"><img align="middle" width="13" height="12" src="math_image_28.png" xylemeAttach="28" border="0" alt="Im28 $\#120230 _0$"/></span>and 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_alpha.png" alt="$ \alpha$"/></span>.</p>
        <p>The class of densities that may be written as a mixture of Dirichlet processes is very wide, so that these are really fit to very large amount of applications.</p>
        <p>Given a set of observations, the estimation of the parameters of a mixture of Dirichlet processes is performed by way of a 
        <i>Monte Carlo Markov Chain (MCMC)</i>algorithm.</p>
      </subsection>
    </subsection>
  </fondements>
  <domaine id="uid36">
    <bodyTitle>Application Domains</bodyTitle>
    <subsection id="uid37" level="1">
      <bodyTitle>Outline</bodyTitle>
      <p><span class="smallcap" align="left">SequeL</span>aims at solving problems of prediction, as well as problems of optimal and adaptive control. As such, the application domains are very
      numerous.</p>
      <p>The application domains have been organized as follows:</p>
      <simplelist>
        <li id="uid38">
          <p noindent="true">adaptive control,</p>
        </li>
        <li id="uid39">
          <p noindent="true">signal analysis and processing,</p>
        </li>
        <li id="uid40">
          <p noindent="true">functional prediction,</p>
        </li>
        <li id="uid41">
          <p noindent="true">neurosciences.</p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid42" level="1">
      <bodyTitle>Adaptive control</bodyTitle>
      <p>Adaptive control is an important application of the research being done in 
      <span class="smallcap" align="left">SequeL</span>. Reinforcement learning precisely aims at controling the behavior of systems and may be used in situations with more or less information
      available. Of course, the more information, the better, in which case methods of (approximate) dynamic programming may be used 
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid11" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. But, reinforcement learning may also handle situations where the
      dynamics of the system is unknown, situations where the system is partially observable, and non stationary situations. Indeed, in these cases, the behavior is learned by interacting with the
      environment and thus naturally adapts to the changes of the environment. Furthermore, the adaptive system may also take advantage of expert knowledge when available.</p>
      <p>Clearly, the spectrum of potential applications is very wide: as far as an agent (a human, a robot, a virtual agent) has to take a decision, in particular in cases where he lacks some
      information to take the decision, this enters the scope of our activities. To exemplify the potential applications, let us cite:</p>
      <simplelist>
        <li id="uid43">
          <p noindent="true">game softwares: in the 1990's, RL has been the basis of a very successful Backgammon program, TD-Gammon 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid12" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>that learned to play at an expert level by basically playing a
          very large amount of games against itself;</p>
          <p>Today, various games are studied with RL techniques.</p>
        </li>
        <li id="uid44">
          <p noindent="true">many optimization problems that are closely related to operation research, but taking into account the uncertainty, and the stochasticity of the environment: see the
          job-shop scheduling, or the cellular phone frequency allocation problems, resource allocation in general 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid11" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        </li>
        <li id="uid45">
          <p noindent="true">we can also foresee that some progress may be made by using RL to design adaptive conversational agents, or system-level as well as application-level operating systems
          that adapt to their users habits.</p>
          <p>More generally, these ideas fall into what adaptive control may bring to human beings, in making their life simpler, by being embedded in an environment that is made to help them, an
          idea phrased as “ambiant intelligence”.</p>
        </li>
        <li id="uid46">
          <p noindent="true">The sensor management problem consists in determining the best way to task several sensors when each sensor has many modes and search patterns. In the detection/tracking
          applications, the tasks assigned to a sensor management system are for instance:</p>
          <simplelist>
            <li id="uid47">
              <p noindent="true">detect targets,</p>
            </li>
            <li id="uid48">
              <p noindent="true">track the targets in the case of a moving target and/or a smart target (a smart target can change its behavior when it detects that it is under analysis),</p>
            </li>
            <li id="uid49">
              <p noindent="true">combine all the detections in order to track each moving target,</p>
            </li>
            <li id="uid50">
              <p noindent="true">dynamically allocate the sensors in order to achieve the previous three tasks in an optimal way. The allocation of sensors, and their modes, thus defines the action
              space of the underlying Markov decision problem.</p>
            </li>
          </simplelist>
          <p>In the more general situation, some sensors may be localized at the same place while others are dispatched over a given volume. Tasking a sensor may include, at each moment, such choices
          as where to point and/or what mode to use. Tasking a group of sensors includes the tasking of each individual sensor but also the choice of collaborating sensors subgroups. Of course, the
          sensor management problem is related to an objective. In general, sensors must balance complex trade-offs between achieving mission goals such as detecting new targets, tracking existing
          targets, and identifying existing targets. The word “target” is used here in its most general meaning, and the potential applications are not restricted to military applications. Whatever
          the underlying application, the sensor management problem consists in choosing at each time an action within the set of available actions.</p>
        </li>
        <li id="uid51">
          <p noindent="true">sequential decision processes are also very well-known in economy. They may be used as a decision aid tool, to help in the design of social helps, or the implementation
          of plants (see 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid13" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid14" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>for such applications).</p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid52" level="1">
      <bodyTitle>Signal analysis and processing</bodyTitle>
      <p>Applications of sequential learning in the field of signal processing are also very numerous. A signal is naturally sequential as it flows. It usually comes from the recording of the output
      of sensors but the recording of any sequence of numbers may be considered as a signal like the stock-exchange rates evolution with respect to time and/or place, the number of consumers at a
      mall entrance or the number of connections to a web site. Signal processing has several objectives: predict , estimate, remove noise, characterize or classify. The signal is often considered as
      sequential: we want to predict, estimate or classify a value (or a feature) at time 
      <span class="math"><hi rend="it">t</hi></span>knowing the past values of the parameter of interest or past values of data related to this parameter.</p>
      <p>Signals may be processed in several ways. One of the best way is the time-frequency analysis in which the frequencies of each signal are analyzed with respect to time. This concept has been
      generalized to the time-scale analysis obtained by a wavelet transform. Both analysis are based on the projection of the orignal signal onto a well-chosen function basis. Signal processing is
      also closely related to the probability field as the uncertainty inherent to many signals leads to consider them as stochastic processes: the Bayesian framework is actually one of the main
      frameworks within which signals are processed for many purposes. However, there exists alternatives like belief functions. Belief functions were introduced by Demspter few decades ago and have
      been successfully used in the few past years in fields where probability had, during many years, no alternatives like in classification. Belief functions can be viewed as a generalization of
      probabilities which can capture both imprecision and uncertainty. Belief functions are also closely related to data fusion where once more they can be considered as a serious alternative to
      probabilities.</p>
    </subsection>
    <subsection id="uid53" level="1">
      <bodyTitle>Functional prediction</bodyTitle>
      <p>One of the current trends in machine learning aims at dealing with data that are functions, rather than points or vectors. Generally speaking, functions represent a behavior (of a person, of
      an apparatus, or of an algorithm, or a response of a system, ...).</p>
      <p>One application of functional prediction which is particularly emphasized these days, is the understanding of client behavior, either in material shops, or in virtual shops on the web. This
      understanding may then be used for different ends, such as the management of stocks according to sales, the proposition of products according to those already bought, the “instantaneous”
      management of some resource in the shop (advisors, cashiers, instant promotions, personalized advertisement, ...).</p>
    </subsection>
    <subsection id="uid54" level="1">
      <bodyTitle>Neurosciences</bodyTitle>
      <p>Machine learning methods may be used for at least two means in neurosciences:</p>
      <orderedlist>
        <li id="uid55">
          <p noindent="true">as in any other (experimental) scientific domain, the machine learning methods relying heavily on statistics, they may be used to analyse experimental data,</p>
        </li>
        <li id="uid56">
          <p noindent="true">dealing with induction learning, that is the ability to generalize from facts which is an ability that is considered to be one of the basic components of “intelligence”,
          machine learning may be considered as a model of learning in living beings. In particular, the temporal difference methods for reinforcement learning has strong ties with various concepts
          of psychology (Thorndike's law of effect, and the Rescorla-Wagner law to name the two most well-known).</p>
        </li>
      </orderedlist>
    </subsection>
  </domaine>
  <logiciels id="uid57">
    <bodyTitle>Software</bodyTitle>
    <subsection id="uid58" level="1">
      <bodyTitle>Software</bodyTitle>
      <subsection id="uid59" level="2">
        <bodyTitle>Crazy Stone</bodyTitle>
        <participants>
          <person key="sequel-2006-idm391148946784">
            <firstname>Rémi</firstname>
            <lastname>Coulom</lastname>
            <moreinfo>correspondent</moreinfo>
          </person>
        </participants>
        <p>Crazy Stone, is a top-level Go-playing program that has been developped by Rémi Coulom since 2005. Crazy Stone won several major international Go tournaments in the past. Because of the
        media impact of those victories, some software companies showed interest in buying licences of Crazy Stone. So, in 2009, Crazy Stone was registered with the APP (Agence pour la Protection des
        Programmes). No licence has been sold so far. Crazy Stone is not available publicly.</p>
      </subsection>
    </subsection>
  </logiciels>
  <resultats id="uid60">
    <bodyTitle>New Results</bodyTitle>
    <subsection id="uid61" level="1">
      <bodyTitle>Introduction</bodyTitle>
      <p>New results are organized in the following sections:</p>
      <orderedlist>
        <li id="uid62">
          <p noindent="true">decision under uncertainty,</p>
        </li>
        <li id="uid63">
          <p noindent="true">foundations of machine learning,</p>
        </li>
        <li id="uid64">
          <p noindent="true">supervised learning,</p>
        </li>
        <li id="uid65">
          <p noindent="true">clustering,</p>
        </li>
        <li id="uid66">
          <p noindent="true">signal processing.</p>
        </li>
      </orderedlist>
    </subsection>
    <subsection id="uid67" level="1">
      <bodyTitle>Decision under uncertainty</bodyTitle>
      <participants>
        <person key="sequel-2007-idm332252484192">
          <firstname>Sébastien</firstname>
          <lastname>Bubeck</lastname>
        </person>
        <person key="sequel-2009-idm70241666752">
          <firstname>Alexandra</firstname>
          <lastname>Carpentier</lastname>
        </person>
        <person key="sequel-2006-idm391148940688">
          <firstname>Pierre-Arnaud</firstname>
          <lastname>Coquelin</lastname>
        </person>
        <person key="sequel-2006-idm391148946784">
          <firstname>Rémi</firstname>
          <lastname>Coulom</lastname>
        </person>
        <person key="sequel-2009-idm70241657440">
          <firstname>Victor</firstname>
          <lastname>Gabillon</lastname>
        </person>
        <person key="sequel-2008-idm80283914416">
          <firstname>Mohammad</firstname>
          <lastname>Ghavamzadeh</lastname>
        </person>
        <person key="sequel-2007-idm332252510144">
          <firstname>Sertan</firstname>
          <lastname>Girgin</lastname>
        </person>
        <person key="sequel-2007-idm332252490272">
          <firstname>Jean-François</firstname>
          <lastname>Hren</lastname>
        </person>
        <person key="sequel-2008-idm80283886240">
          <firstname>Alessandro</firstname>
          <lastname>Lazaric</lastname>
        </person>
        <person key="sequel-2006-idm391148935408">
          <firstname>Manuel</firstname>
          <lastname>Loth</lastname>
        </person>
        <person key="sequel-2008-idm80283857760">
          <firstname>Odalric-Ambrym</firstname>
          <lastname>Maillard</lastname>
        </person>
        <person key="sequel-2006-idm391148963008">
          <firstname>Rémi</firstname>
          <lastname>Munos</lastname>
        </person>
        <person key="sequel-2006-idm391148966784">
          <firstname>Philippe</firstname>
          <lastname>Preux</lastname>
        </person>
        <person key="sequel-2007-idm332252530992">
          <firstname>Daniil</firstname>
          <lastname>Ryabko</lastname>
        </person>
      </participants>
      <subsection id="uid68" level="2">
        <bodyTitle>Reinforcement learning and approximate dynamic programming</bodyTitle>
        <subsection id="uid69" level="3">
          <bodyTitle>Approximate Policy Iteration without Value Function Representation</bodyTitle>
          <p>There is a recent interest on approximate policy iteration algorithms in which the action-value function is not approximated over the entire state-action space 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid15" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid16" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. The main idea is to remove the policy evaluation and cast the
          policy improvement as a classification problem. The training set of this classification problem is generated by rollout estimates of the action-value function on a finite number of states.
          In 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid17" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we present a novel loss function by weighting the number of
          classification errors with the actual regret associated to each error, 
          <i>i.e.</i>, the difference between the action-values of the greedy action and the action chosen by the rollout policy, and provide convergence bounds for the resulting approximate policy
          iteration algorithm.</p>
        </subsection>
        <subsection id="uid70" level="3">
          <bodyTitle>Natural actor-critic</bodyTitle>
          <p>In 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid18" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid19" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we present four new reinforcement learning algorithms based on
          actor–critic, function approximation, and natural gradient ideas, and we provide their convergence proofs. Actor–critic reinforcement learning methods 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid20" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid21" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>are online approximations to policy iteration in which the
          value-function parameters are estimated using temporal difference learning 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid22" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>and the policy parameters are updated by stochastic gradient
          descent. Methods based on policy gradients in this way are of special interest because of their compatibility with function approximation methods, which are needed to handle large or
          infinite state spaces. The use of temporal difference learning in this way is of special interest because in many applications it dramatically reduces the variance of the gradient
          estimates. The use of the natural gradient is of interest because it can produce better conditioned parameterizations and has been shown to further reduce variance in some cases. Our
          results extend prior two-timescale convergence results for actor–critic methods by 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid23" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>(also 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid24" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) by using temporal difference learning in the actor and by
          incorporating natural gradients. Our results extend prior empirical studies of natural actor–critic methods by 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid25" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>by providing the first convergence proofs and the first fully
          incremental algorithms. We present empirical results verifying the convergence of our algorithms.</p>
        </subsection>
        <subsection id="uid71" level="3">
          <bodyTitle>Bayesian Multi-Task Reinforcement Learning</bodyTitle>
          <p>In 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid26" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we consider the problem of multi-task reinforcement learning,
          where a learner is provided with a set of tasks, for which only a small number of samples can be generated for any given policy. As the number of samples may not be enough to learn an
          accurate evaluation of the policy, it would be necessary to identify classes of tasks with similar structure and to learn them jointly. We consider the case where the tasks share structure
          in their value functions, and model this by assuming that the value functions are all sampled from a common prior. We adopt the Gaussian process temporal-difference 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid27" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>value function model and use a hierarchical Bayesian approach to
          model the distribution over the value functions. In this paper, we study two cases, where all the value functions belong to the same class and where they belong to an undefined number of
          classes. For each case, we present a hierarchical Bayesian model, and derive inference algorithms for:</p>
          <orderedlist>
            <li id="uid72">
              <p noindent="true">joint learning of the value functions, and</p>
            </li>
            <li id="uid73">
              <p noindent="true">efficient transfer of the information gained in (i) to assist learning the value function of a newly observed task.</p>
            </li>
          </orderedlist>
        </subsection>
        <subsection id="uid74" level="3">
          <bodyTitle>Regularized Fitted Q-iteration for Planning in Continuous-Space MDPs</bodyTitle>
          <p>Reinforcement learning with linear and non-linear function approximation has been studied extensively in the last decade. However, as opposed to other fields of machine learning such as
          supervised learning, the effect of finite sample has not been thoroughly addressed within the reinforcement learning framework. In this work 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid28" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we propose to use 
          <span class="math"><hi rend="it">L</hi><sup>2</sup></span>regularization to control the complexity of the value function in reinforcement learning and planning problems. We consider the regularized fitted Q-iteration algorithm and provide
          generalization bounds that account for small sample sizes. We use a realistic visual-servoing problem to illustrate the benefits of using the regularization procedure.</p>
        </subsection>
        <subsection id="uid75" level="3">
          <bodyTitle>Function approximation and representation learning</bodyTitle>
          <p>As a follow-up to the 2008 work on the issue of the representation of states, we have worked further on feature discovery in the context of sequential decision problems. Based on our
          2008 work on feature discovery in the context of reinforcement learning to discover a good (if not the best) representation of states, we have studied the use of non parametric function
          approximation in the context of approximate dynamic programming. The striking difference with the usual approach is that we use a non parametric function approximator to represent the value
          function, instead of a parametric one. See 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid29" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid30" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        </subsection>
      </subsection>
      <subsection id="uid76" level="2">
        <bodyTitle>Sensitivity analysis in HMMs</bodyTitle>
        <p>We considered a sensitivity analysis in Hidden Markov Models with continuous state and observation spaces. We proposed an Infinitesimal Perturbation Analysis (IPA) on the filtering
        distribution with respect to some parameters of the model. We described a methodology for using any algorithm that estimates the filtering density, such as Sequential Monte Carlo methods, to
        design an algorithm that estimates its gradient. The resulting IPA estimator is proven to be asymptotically unbiased, consistent and has computational complexity linear in the number of
        particles. We considered an application of this analysis to the problem of identifying unknown parameters of the model given a sequence of observations. We derived an IPA estimator for the
        gradient of the log-likelihood, which may be used in a gradient method for the purpose of likelihood maximization. See 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid31" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      </subsection>
      <subsection id="uid77" level="2">
        <bodyTitle>Exploration vs. exploitation</bodyTitle>
        <subsection id="uid78" level="3">
          <bodyTitle>Pure exploration in multi-armed bandits</bodyTitle>
          <p>We considered the framework of stochastic multi-armed bandit problems where a forecaster is assessed in terms of its simple regret, a regret notion that captures the fact that
          exploration is only constrained by the number of available rounds (not necessarily known in advance), in contrast to the case when the cumulative regret is considered and when exploitation
          needs to be performed at the same time. This performance criterion is suited to situations when the cost of pulling an arm is expressed in terms of resources rather than rewards. We
          discussed the links between the simple and the cumulative regret. Our main result is that the required exploration–exploitation trade-offs are qualitatively different, in view of a general
          lower bound on the simple regret in terms of the cumulative regret. See 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid32" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        </subsection>
        <subsection id="uid79" level="3">
          <bodyTitle>Hybrid Stochastic-Adversarial On-line Learning</bodyTitle>
          <p>Most of the research in online learning focused either on the problem of adversarial classification (
          <i>i.e.</i>, both inputs and labels are arbitrarily chosen by an adversary) or on the traditional supervised learning problem in which samples are independently generated from a fixed
          probability distribution. Nonetheless, in a number of domains the relationship between inputs and labels may be adversarial, whereas input instances are generated according to a constant
          distribution. We introduced a hybrid stochastic-adversarial classification problem, in which inputs are stochastic, while labels are adversarial. We proposed an online learning algorithm
          for its solution, and analyzed its performance. In particular, we showed that, given a hypothesis space 
          <span class="math"><img align="bottom" width="12" height="11" src="math_image_29.png" xylemeAttach="29" border="0" alt="Im29 $\#8459 $"/></span>with finite VC dimension, it is possible to incrementally build a suitable finite set of hypotheses that can be used as input for an exponentially weighted forecaster achieving a
          cumulative regret of order 
          <span class="math"><img align="middle" width="119" height="17" src="math_image_30.png" xylemeAttach="30" border="0" alt="Im30 ${O(\sqrt {nVC(\#8459 )logn})}$"/></span>with overwhelming probability. We also discussed extensions to multi-label classification, learning from experts and bandit settings with stochastic side information, and application
          to games. See 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid33" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        </subsection>
        <subsection id="uid80" level="3">
          <bodyTitle>Minimax Policies for Adversarial and Stochastic Bandits</bodyTitle>
          <p>This work deals with four classical prediction games, namely full information, bandit and label efficient (full information or bandit) games as well as three different notions of regret:
          pseudo-regret, expected regret and tracking the best expert regret. We introduced a new forecaster, INF (Implicitly Normalized Forecaster), for which we proposed a unified analysis of its
          pseudo-regret in the four games. With well-chosen parameters INF defines a new forecaster, for which we were able to remove the extraneous logarithmic factor in the pseudo-regret bounds for
          bandit games, and thus fill in a long open gap in the characterization of the minimax rate for the pseudo-regret in the bandit game. We also consider the stochastic bandit game, and prove
          that an appropriate modification of the upper confidence bound policy UCB achieves the distribution-free optimal rate while still having a distribution-dependent rate logarithmic in the
          number of plays. See 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid34" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        </subsection>
      </subsection>
      <subsection id="uid81" level="2">
        <bodyTitle>Applications</bodyTitle>
        <subsection id="uid82" level="3">
          <bodyTitle>The games of Go and Havannah</bodyTitle>
          <p>After the 2006 major breakthrough in go realized by Rémi Coulom's Crazy Stone program, the latter has evolved further.</p>
          <p>Rémi Coulom's main research topic in 2009 was automatic parameter optimization from noisy observations, applied to his Go-playing program Crazy Stone. The performance of most
          game-playing programs depends on several parameters. In order to get optimal performance, it is necessary to tune these parameters carefully. This is a very challenging problem, because the
          number of parameters is very high, and the effect of parameters is measured with very noisy observations. Crazy Stone has thousands of parameters, and observations are binary outcomes of
          games (win or loss). Early results of using local quadratic regression were presented at the University of Electro-Communications (Japan) in January 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid35" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
          <p>From June 15th to July 31st, Rémi Coulom supervised Victor Marsault, a first-year student from ENS Cachan. The topic of this internship was the application of Monte-Carlo tree search to
          the game of Havannah. Like the game of Go, the game of Havannah is a challenging application domain, where the strongest human players still easily outperform the best computer algorithms.
          Although they did not manage to reach top human level, they investigated original Monte-Carlo tree search ideas and produced a decent artificial player 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid36" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        </subsection>
        <subsection id="uid83" level="3">
          <bodyTitle>The Ubiquitous Virtual Seller</bodyTitle>
          <p>This 18 months project aims at studying the design, and implementation, of virtual agents on selling Internet portals. The goal is that this agent will be able to recognize the visitors
          of the portal, either as regular visitors, or new visitors, and help them, provide advices, develop a selling strategy, ...</p>
          <p>Having begun in Sep. 2009, for the moment, the work has mostly been a research of relevant work in the literature, as well as getting acquainted with the other members of the project, in
          particular the marketing aspects of the project, as well as the private companies expectations.</p>
          <p>See also the contract section (Sec. 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid112" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) of the report for specific details about the contract itself.</p>
        </subsection>
        <subsection id="uid84" level="3">
          <bodyTitle>Ad selection on web portals</bodyTitle>
          <p>In 2009, we have begun a work on the selection of displayed ads on web pages, under contract with France Telecom/Orange Labs.</p>
          <p>Of course, this problem has already received a lot of attention by major actors of the Internet. However, publicly available works on this problem have never tackled the real problem,
          with the specific real constraints. In particular, the finiteness of resources (in time, and in the number of ads to display) is not tackled, and asymptotically optimal algorithms are
          studied. But asymptotic results are not those that are sought, and the performance of these asymptotically optimal algorithms used under finite constraints of time and resource are
          typically bad. Indeed, our work has shown that handling this finiteness is necessary to obtain good strategies of ad display. We have modeled the problem as the resolution of a linear
          program, in which some crucial quantities have to be learned from data. So, we end-up proposing an approach which mixes bandits to estimate these data on which linear programming is
          applied. Furthermore, this process has to be iterated to handle the fact that ad campaigns have a limited extent in time, new ad campaigns are created, and the discrepancy between the
          actual visitors of the website, and those that were planned. This work has been accepted, and will be published in 2010.</p>
          <p>See also the contract section (Sec. 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid105" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) of the report for specific details about the contract itself.</p>
        </subsection>
        <subsection id="uid85" level="3">
          <bodyTitle>Games that adapt to player skill</bodyTitle>
          <p>It has always been a challenge for computer scientists to try to defeat human experts at any game; among many other games, draughts, Othello, chess, and currently Go have challenged the
          community. However, for “standard” humans, some programs are desperately too strong; we have been arguing for years that methods of adaptive control may be useful to design new games which,
          instead of aiming at defeating any human being, at the cost of boredom, adapts to the strength of the human player.</p>
          <p>We have had the opportunity to work concretely on this idea in collaboration with the InQuest company located in Villeneuve d'Ascq. We tackled the problem of asking questions to people,
          according to their skill: the difficulty of a question depends on people, on their age, their culture, ... Jérémie Mary designed a Bayesian approach to assess the difficulty of questions
          related to a given human being, and ask his/her questions of appropriate difficulty that he/she has a reasonable probability to answer correctly. We have also worked on the inclusion of
          new, non rated, questions to the catalog of available questions (approx 
          <span class="math">10
          <sup>4</sup></span>different questions, among which a dozen is asked to a given human being: so, the skill of a given player has to be assessed very quickly with the first of these 12
          questions).</p>
          <p>See also the contract section (Sec. 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid106" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) of the report for specific details about the contract itself.</p>
        </subsection>
      </subsection>
    </subsection>
    <subsection id="uid86" level="1">
      <bodyTitle>Foundations of machine learning</bodyTitle>
      <participants>
        <person key="sequel-2007-idm332252530992">
          <firstname>Daniil</firstname>
          <lastname>Ryabko</lastname>
        </person>
      </participants>
      <subsection id="uid87" level="2">
        <bodyTitle>Sequence prediction in the most general form.</bodyTitle>
        <p>The problem of sequence prediction consists in forecasting, on each step of time, the probabilities of the next outcome of the observed sequence of data. In the most general formulation of
        the problem, we assume that the data is generated by a stochastic process that belongs to a certain known class of processes 
        <span class="math"><img align="bottom" width="7" height="10" src="math_image_31.png" xylemeAttach="31" border="0" alt="Im31 $\#119966 $"/></span>, and the problem is to construct a predictor that works for any (a priory unknown) process coming from 
        <span class="math"><img align="bottom" width="7" height="10" src="math_image_31.png" xylemeAttach="31" border="0" alt="Im31 $\#119966 $"/></span>.</p>
        <p>This general formulation is motivated by the diversity of sequential prediction problems: they include analysis of biological, financial, textual or web-generated data, to mention a few.
        Naturally, one has to have different models for these problems, and therefore one is interested in finding a general procedure for constructing a predictor, given only some weak probabilistic
        constraints on the data; this is formalized by saying that the data-generating process comes from a known but arbitrary family 
        <span class="math"><img align="bottom" width="7" height="10" src="math_image_31.png" xylemeAttach="31" border="0" alt="Im31 $\#119966 $"/></span>.</p>
        <p>Our recent breakthrough 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid37" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>in solving this general problem is in showing that, when such a
        predictor can be constructed, in can be constructed as a Bayesian predictor whose prior is concentrated on a countable subset of 
        <span class="math"><img align="bottom" width="7" height="10" src="math_image_31.png" xylemeAttach="31" border="0" alt="Im31 $\#119966 $"/></span>.</p>
      </subsection>
      <subsection id="uid88" level="2">
        <bodyTitle>Statistical inference</bodyTitle>
        <p>We have developed a new theoretical framework that has allowed us to solve some classical problems of mathematical statistics in a radically more general setting. Namely, the setting is
        that the data is generated by a stationary ergodic process (or processes, depending on the problem), and no assumptions of independence, mixing rates, etc., as well as no parametric
        assumptions, are made. The obtained results include a general hypothesis testing procedure, a consistent change point estimator, and a consistent classification procedure 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid38" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. Previous results on these problems concerned only much more
        restricted settings (e.g. i.i.d. data). In addition, we have shown 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid39" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>that consistent homogeneity testing is impossible in this setting,
        which means that given two growing samples of data which are only known to be generated by stationary ergodic processes, one cannot in general tell whether they are generated by the same or
        by different process distributions, even in the weakest asymptotic setting, and even if the processes are binary-valued. This is particularly remarkable in view of our result that establishes
        a consistent change point estimator.</p>
        <p>Our most recent results 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid40" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid41" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>in this direction provide a complete characterization (necessary
        and sufficient conditions) for the existence of a consistent test for membership to an arbitrary family 
        <span class="math"><hi rend="it">H</hi><sub>0</sub></span>of stationary ergodic discrete-valued processes, against 
        <span class="math"><hi rend="it">H</hi><sub>1</sub></span>which is the complement of 
        <span class="math"><hi rend="it">H</hi><sub>0</sub></span>to this class of processes. The criterion is that 
        <span class="math"><hi rend="it">H</hi><sub>0</sub></span>has to be closed in the topology of distributional distance, and closed under taking ergodic decompositions of its elements.</p>
        <p>In addition, the paper on rank tests that was mentioned in the previous report as accepted, has now been published 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid42" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      </subsection>
      <subsection id="id67057" level="2">
        <bodyTitle>Steganography</bodyTitle>
        <p>The goal of steganography is to transfer hidden information in seemingly innocuous messages (called “covertexts”), in the presence of an observer who is trying to find out whether hidden
        information is being transmitted. The innocuous messages may be, for example, photographic images, or human-written notes. They are assumed to be generated by an oracle, whose exact
        probabilistic characteristics are unknown to the communicating parties. For the case when this probabilistic process is i.i.d. or has a finite memory (which is a natural and a standard
        assumption) we have constructed 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid43" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>a universal (any distribution conforming to the above assumption)
        perfectly secure (no detection is possible) asymptotically optimal (in terms of the amount of transmitted secret information) and simple (in terms of computation) steganographic system. On
        the other hand, we have shown 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid44" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>that there exist such complicated sources of covertexts, that any
        stegosystem that meets the perfect security condition must itself have an exponential (in the size of the message) Kolmogorov complexity.</p>
      </subsection>
    </subsection>
    <subsection id="uid89" level="1">
      <bodyTitle>Supervised learning</bodyTitle>
      <participants>
        <person key="sequel-2006-idm391148952976">
          <firstname>Emmanuel</firstname>
          <lastname>Duflos</lastname>
        </person>
        <person key="sequel-2008-idm80283882784">
          <firstname>Hachem</firstname>
          <lastname>Kadri</lastname>
        </person>
        <person key="sequel-2006-idm391148935408">
          <firstname>Manuel</firstname>
          <lastname>Loth</lastname>
        </person>
        <person key="sequel-2008-idm80283857760">
          <firstname>Odalric-Ambrym</firstname>
          <lastname>Maillard</lastname>
        </person>
        <person key="sequel-2006-idm391148963008">
          <firstname>Rémi</firstname>
          <lastname>Munos</lastname>
        </person>
        <person key="sequel-2006-idm391148966784">
          <firstname>Philippe</firstname>
          <lastname>Preux</lastname>
        </person>
      </participants>
      <subsection id="uid90" level="2">
        <bodyTitle>Multi representation</bodyTitle>
        <p>This work considers the problem of semi-supervised multi-view classification, where each view corresponds to a Reproducing Kernel Hilbert Space. We propose an algorithm based on
        co-regularization methods with extra penalty terms reflecting smoothness and general agreement properties. We first provide explicit tight control on the Rademacher (L1) complexity of the
        corresponding class of learners for arbitrary many views, then give the asymptotic behavior of the bounds when the co-regularization term increases, making explicit the relation between
        consistency of the views and reduction of the search space. Since many views involve many parameters, we third provide a parameter selection procedure, based on the stability approach with
        clustering and localization arguments. To this aim, we give an explicit bound on the variance (L2-diameter) of the class of functions. See 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid45" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      </subsection>
      <subsection id="uid91" level="2">
        <bodyTitle>New algorithms to induce classifiers, and regressors</bodyTitle>
        <subsection id="uid92" level="3">
          <bodyTitle>Compressed Least Squares Regression</bodyTitle>
          <p>We considered the problem of learning, from 
          <span class="math"><hi rend="it">K</hi></span>input data, a regression function in a function space of high dimension 
          <span class="math"><hi rend="it">N</hi></span>using projections onto a random subspace of lower dimension 
          <span class="math"><hi rend="it">M</hi></span>. From any linear approximation algorithm using empirical risk minimization (possibly penalized), we provided bounds on the excess risk of the estimate computed in the projected
          subspace (compressed domain) in terms of the excess risk of the estimate built in the high-dimensional space (initial domain). We applied the analysis to the ordinary Least-Squares
          regression and showed that by choosing 
          <span class="math"><img align="middle" width="79" height="16" src="math_image_32.png" xylemeAttach="32" border="0" alt="Im32 ${M=O(\sqrt K)}$"/></span>, the estimation error (for the quadratic loss) of the “Compressed Least Squares Regression” is 
          <span class="math"><img align="middle" width="59" height="16" src="math_image_33.png" xylemeAttach="33" border="0" alt="Im33 ${O(1/\sqrt K)}$"/></span>up to logarithmic factors. See 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid46" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
        </subsection>
        <subsection id="uid93" level="3">
          <bodyTitle>Non parametric function approximation: the Equi-Correlation Network algorithm</bodyTitle>
          <p>We have designed a new algorithm, named the Equi-Correlation Network (ECON), to perform supervised classification, and regression. ECON is a kernelized LARS-like algorithm, by which we
          mean that ECON uses an 
          <span class="math"><hi rend="it">l</hi><sub>1</sub></span>regularization to produce sparse estimators. ECON efficiently rides the regularization path to obtain the estimator associated to any value of the constant of regularization, and
          ECON represents the data by way of features induced by a feature function. The originality of ECON is that it automatically tunes the parameters of the features while riding the
          regularization path. So, ECON has the unique ability to produce optimally tuned features for each value of the constant of regularization. Experimentally, we have obtained remarkable
          performance of ECON on standard benchmark datasets in regression and supervised classification.</p>
          <p>We have also used ECON to tackle the problem of representing photometric solids in computer graphics. See the application section below, as well as 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid47" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid48" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid49" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        </subsection>
      </subsection>
      <subsection id="uid94" level="2">
        <bodyTitle>Functional regression</bodyTitle>
        <p>Functional regression deals with the setting in which the attributes of data, as well as their associated label, are functions. Traditionally, functional regression considers discretized
        attributes, and apply the classical regression techniques (see 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid50" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>for instance).</p>
        <p>We have tackled this problem considering functions as functions, whereas the traditional approach consists in dealing with discretized functions, thus vectors. We have developed a RKHS
        approach for it, kernels being now operators mapping a function to a function. We have demonstrated the basic theorems (basic properties of such functional kernel, existence of such kernel,
        representer theorem) on which a sound functional RKHS approach can be built. We have also exhibited a functional kernel, and provided preliminary experimental results.</p>
        <p>A preliminary version of this work is available as an INRIA research report 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid51" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, and a further worked version is under submission for
        publication.</p>
        <p>This work takes place under the ANR Kernsig project (see Sec. 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid120" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>).</p>
      </subsection>
      <subsection id="uid95" level="2">
        <bodyTitle>Applications</bodyTitle>
        <p>To create realistic images, photometric solids are used that represent how the energy of a wave of light of a certain wavelength is reflected in any direction. This data is available for a
        huge amount of materials. This whole data is traditionally represented in mere tables, which are thus huge, and interpolation is used to estimate the reflected energy for directions which are
        not available.</p>
        <p>In collaboration with a team working in computer graphics, we have studied the use of the machine learning technology to represent these data in a much more compact way. Mere back
        propagated neural networks have first been used, and then ECON has been used. The expected results have been obtained: having much more compact representation of these photometric solids,
        while keeping the same quality of rendered images, which is the ultimate goal in computer graphics. See 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid52" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid53" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid47" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        <p>This collaboration has shown us that the field of computer graphics is a rich field of applications of machine learning technology, yet to be exploited. This collaboration is going on.</p>
      </subsection>
    </subsection>
    <subsection id="uid96" level="1">
      <bodyTitle>Unsupervised learning</bodyTitle>
      <participants>
        <person key="sequel-2007-idm332252484192">
          <firstname>Sébastien</firstname>
          <lastname>Bubeck</lastname>
        </person>
      </participants>
      <subsection id="uid97" level="2">
        <bodyTitle>Nearest Neighbor Clustering</bodyTitle>
        <p>Clustering is often formulated as a discrete optimization problem. The objective is to find, among all partitions of the data set, the best one according to some quality measure. However,
        in the statistical setting where we assume that the finite data set has been sampled from some underlying space, the goal is not to find the best partition of the given sample, but to
        approximate the true partition of the underlying space. We argue that the discrete optimization approach usually does not achieve this goal, and instead can lead to inconsistency. We
        construct examples which provably have this behavior. As in the case of supervised learning, the cure is to restrict the size of the function classes under consideration. For appropriate
        “small” function classes we can prove very general consistency theorems for clustering optimization schemes. As one particular algorithm for clustering with a restricted function space we
        introduce “nearest neighbor clustering”. Similar to the k-nearest neighbor classifier in supervised learning, this algorithm can be seen as a general baseline algorithm to minimize arbitrary
        clustering objective functions. We prove that it is statistically consistent for all commonly used clustering objective functions. See 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid54" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      </subsection>
    </subsection>
    <subsection id="uid98" level="1">
      <bodyTitle>Sensors Networks: Tracking, Localization and Communication</bodyTitle>
      <participants>
        <person key="sequel-2008-idm80283851664">
          <firstname>Emmanuel</firstname>
          <lastname>Delande</lastname>
        </person>
        <person key="sequel-2006-idm391148952976">
          <firstname>Emmanuel</firstname>
          <lastname>Duflos</lastname>
        </person>
        <person key="sequel-2006-idm391148949872">
          <firstname>Philippe</firstname>
          <lastname>Vanheeghe</lastname>
        </person>
        <person key="sequel-2008-idm80283854720">
          <firstname>Nicolas</firstname>
          <lastname>Viandier</lastname>
        </person>
      </participants>
      <subsection id="uid99" level="2">
        <bodyTitle>The sensor management problem</bodyTitle>
        <p>This class of applications took a new turn this year with the thesis of Emmanuel Delande, supervised by Emmanuel Duflos and Philippe Vanheeghe, in collaboration with Thales Communication.
        The aim of this work is to manage a set of sensors to track vehicles or groups of people in land applications. The dynamic of each target is controlled by a velocity vector field defined over
        the area of interest. Such a modelling allows the use of particle filters to track the targets. In real application, the high dimension state is however an obstacle to an accurate estimation
        of the targets parameters since it is well known that the estimation error increase with the number of targets. That is the reason why our work focuses today on random sets based estimation
        filter and more precisely on the PHD filter. The sensors management modelling work is still under progresses. It is clear today that such an optimization problem is very close to the
        reinforcement learning problem, and current research focuses on how to model a sensor management problem as a reinforcement learning optimization problem.</p>
      </subsection>
      <subsection id="uid100" level="2">
        <bodyTitle>Sequential learning of sensors localization: application to civil engineering</bodyTitle>
        <p>This work is done in collaboration with Prof Carl Haas of the University of Waterloo (Canada). This collaboration is related to a problem occurring in civil engineering: how can we
        automatically locate the building materials on a construction site? This is a real problem because a lot of time (hence of money) is lost to find these materials that have often been moved
        away. The ability to detect dislocations automatically for tens of thousands of items can ultimately improve project performance significantly. The proposed solution is to equip each piece
        with a RFID tag and each people working on the construction site with a RFID receiver, a GPS for the localization, and a transmitter. We then learn sequentially the position of the pieces
        using the incoming detection information sent automatically by the transmitter to a central processor when the workforces walk near these pieces and detect them. RFID systems and localization
        systems as GPS allow to treat such a problem in the more general context of randomly distributed communication nodes localization. We have obtained a PICS (International Project for
        Scientific Cooperation) from the CNRS in 2008 for 3 years to work on the specific problems arising when huge amount of sensors are used in civil engineering application. This activity deals
        with both sensor management and signal analysis. The work achived in 2009 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid55" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, is a continuation of previous research, in which we tackled the
        location estimation problem by fusing the data from a simulation model.</p>
      </subsection>
      <subsection id="uid101" level="2">
        <bodyTitle>Accurate Localization using Satellites in Urban Canyons</bodyTitle>
        <p>Today, Global Navigation Satellite Systems (GNSS) have penetrated the transport field through applications such as monitoring of containers. These applications do not necessarily request a
        high availability, integrity and accuracy of the positioning system. For safety applications (as complete guidance of autonomous vehicles), performances require to be more stringent. The
        American system GPS (Global Positioning System) is the only fully operational solution for the moment. This monopole reduces the possibilities of measurement redundancy and diversity, thus
        limits the reachable performances. Unfortunately most all these transport applications are mainly used in dense urban environments, highly constraining for signal propagation. Sensors may
        deliver very erroneous measurements because of such hard external conditions which reduce significantly the possibilities to receive direct signals. The consequences of environmental
        obstructions are unavailability of the service and reception of reflected signals that degrades in particular the accuracy of the positioning. Indeed, NLOS (Non Line Of Sight) signals, 
        <i>i.e.</i>signals received after reflections on the surrounding obstacles, frequently occur in dense environments and degrade localization accuracy because of the delays observed on the
        propagation time measurement creating additional error on pseudorange estimation. The worst case of reception is the alternate path. In this case the LOS signal from a satellite cannot reach
        the antenna and receiver tracks only reflected signals. Such phenomena make the pseudorange error distribution becomes a non-white and non-Gaussian distribution (
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid56" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>). As a consequence, the classical localization methods like
        Extended Kalman Filter (EKF), assuming that state and observation noises are white and Gaussian, are not efficient anymore and make positioning error more important. Thus, to enhance the
        localization accuracy in case of alternate path reception, the filtering part of the receiver (after correlators) must be improved. Furthermore, in order to limit costs, we have chosen to
        work only with GNSS signals. In a goal of enhanced position accuracy, we propose a new statistical filtering method based on a better definition (and use) of the observation noise for each
        satellite signal. Moreover, in a very constraint environment (like urban environment or canyon) where reflected signals are frequent, the pseudorange noise density takes an unknown form.
        Consequently, to estimate such unknown distribution form, a mixture model can be a suitable solution. In previous works, a first approach was studied based on Jump Markov System (JMS)
        algorithm (
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid57" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid58" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>). JMS switches between several observation noise models according
        to the estimated reception state of each satellite. The law parameters which describe the observation noise of each available pseudorange are next use in a particle filter to estimate the
        position. JMS showed its performances in terms of accuracy and continuity of service. However some drawbacks of JMS show that the density modeling can be improved. Indeed, the proposed JMS
        version is strongly related to the study of the close propagation environment and consequently a punctual reflection cannot be detected by the Markov Chain. This can create false detection
        and missed detection and consequently the chosen model by the algorithm should be wrong. Moreover, we need to allocate T seconds for initialization. Another default is that in the context of
        dynamic models, the assumption of stationary is wrong. And finally the number of Gaussian components is limited in the Gaussian mixture and consequently the estimated model does not represent
        the true distribution but an approximation of it. That is why we opted for the use of Dirichlet Process Mixture (DPM). We have shown that the DPM, which is an infinite mixture model, is more
        efficient than a finite mixture model to estimate sequentially an unknown distribution. The first step of this algorithm is the sampling of hyperparameters which are a couple of parameters:
        the mean and the standard deviation of each Gaussian law which composes the infinite mixture. This sampling is performed by a Gibbs sampler. Then the hyperparameters are used as inputs of a
        Rao-Blackwellised particle filter (RBPF) to compute the position. This approach outperforms standard models commonly used to represent observation noise distributions, 
        <i>i.e.</i>white and Gaussian noise. The efficiency of this approach has been demonstrated by applying a validation step involving real GPS data. These data have been acquired in an urban
        environment and in a public transport context.</p>
      </subsection>
      <subsection id="uid102" level="2">
        <bodyTitle>Internet of Things</bodyTitle>
        <p>A new thesis, supervised by Emmanuel Duflos and Philippe Vanheeghe, has started in september within the frame of the internet of things. The term “Internet of Things” has come to describe
        a number of technologies and research disciplines that enable the Internet to reach out into the real world of physical objects. Technologies like RFID, short-range wireless communications,
        real-time localization and sensor networks are now becoming increasingly common, bringing the Internet of Things into commercial use. In such applications the data sent by a 
        <i>thing</i>to another may generate an impulse noise in the reception channel of objects in the neighbourhood. The noise appearing in such applications can be considered as 
        <span class="math"><img width="12" height="12" align="bottom" border="0" src="../../images/img_alpha.png" alt="$ \alpha$"/></span>-stable which means that moment higher than 2 does not exist. New estimation algorithms must therefore be developped to estimate sequentially the parameters of the probability density
        function which may vary according to time as well as the data received by each node of the network.</p>
      </subsection>
    </subsection>
  </resultats>
  <contrats id="uid103">
    <bodyTitle>Contracts and Grants with Industry</bodyTitle>
    <subsection id="uid104" level="1">
      <bodyTitle>Contracts and Grants with Industry</bodyTitle>
      <subsection id="uid105" level="2">
        <bodyTitle>France Telecom/Orange Labs</bodyTitle>
        <p>We have had a 10 months externalized research contract (CRE) with France Telecom in 2009 on the problem of selecting ads to display on web pages. During his internship in the EPI, V.
        Gabillon has made his master thesis on this subject; J. Mary and Ph. Preux have dedicated a significant part of their time to work on this contract. Based on the very interesting results that
        were obtained during this CRE, a new contract is under negotiation for 2010 as a follow-up to this first work.</p>
        <p>More technical details are available in section 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid84" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>of this document.</p>
      </subsection>
      <subsection id="uid106" level="2">
        <bodyTitle>Inquest</bodyTitle>
        <p>We have had a collaboration with inQuest
        <footnote id="uid107" id-text="4" place="foot" anchored="yes"><ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.inquest.fr" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://
        <allowbreak/>www.
        <allowbreak/>inquest.
        <allowbreak/>fr</ref>.</footnote>, a society working on casual games, located in Villeneuve d'Ascq.</p>
        <p>These new methods should be used in production very shortly and a contract is under negotiation. See sec. 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid106" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>for more about this contract.</p>
      </subsection>
      <subsection id="uid108" level="2">
        <bodyTitle>ETO</bodyTitle>
        <p>A collaboration has been initiated with the private society ETO, located in Roubaix. ETO manages large databases of customers, and fidelity programs, for a few dozens very well-known
        commercial brands (both national, and international brands). ETO also proposes human support in order to follow and exploit these data: identification of high value customers, building of ads
        campaigns, ...Their software is called X27 and requires a lot of human intervention to tailor it to their new customers. ETO wishes to render automatic a maximum of steps in order to reduce
        the costs and widespread their solution. That is the so-called A-27 project.</p>
        <p>One of the problems is to cluster customers in a sequential framework. The sequence of data is the list of the visits to a shop. In an ideal world we would model the behavior of any
        customer. This objective is impossible to reach because we do not have enough data on each customer. So, we wish to classify the customers in groups based on their habits. However, customers'
        habits change over time (they are single, then in couple, then have babies, ... they live in a flat, then in a house, ... they earn more and more, ...). One challenge here is to study and
        detect the switch of customers from one cluster to an other along time.</p>
        <p>An other goal is to evaluate by simulation the impact of a new ad campaign. It would be used to help marketing to optimize its decisions.</p>
        <p>Jérémie Mary conducted some preliminary work on their data, showing these objectives may be reached. This led to the project Simul-Market between ETO, Vekia and INRIA (involving Jeremie
        Mary and Philippe Preux). Then, this project has been proposed, assessed, and labelled by the PICOM and A-27 will be funded by the Région Nord-Pas de Calais and the FEDER (basically, this
        will fund 2 years post-doc funding, and 1 year of engineer, over 2010 and 2011).</p>
      </subsection>
      <subsection id="uid109" level="2">
        <bodyTitle>Vekia Innovation</bodyTitle>
        <p>Vekia Innovation is the name of the spin-off two of us (P-A. Coquelin and M. Davy) created in 2007, originally under the name “Predict &amp; Control”.</p>
        <p spacebefore="6.0pt">We have done a work on the clustering of temporal series, with an application to the clustering of calls to call centers. A software toolbox has been implemented to
        demonstrate various algorithms.</p>
        <p>This collaboration was funded by OSEO.</p>
      </subsection>
    </subsection>
  </contrats>
  <international id="uid110">
    <bodyTitle>Other Grants and Activities</bodyTitle>
    <subsection id="uid111" level="1">
      <bodyTitle>Regional activities</bodyTitle>
      <subsection id="uid112" level="2">
        <bodyTitle>Pôle de Compétitivité “Industries du commerce”</bodyTitle>
        <participants>
          <person key="sequel-2007-idm332252510144">
            <firstname>Sertan</firstname>
            <lastname>Girgin</lastname>
          </person>
          <person key="sequel-2006-idm391148944048">
            <firstname>Jérémie</firstname>
            <lastname>Mary</lastname>
          </person>
          <person key="sequel-2006-idm391148966784">
            <firstname>Philippe</firstname>
            <lastname>Preux</lastname>
          </person>
        </participants>
        <p><span class="smallcap" align="left">SequeL</span>is taking part in a project named “Ubiquitous Virtual Seller” (VVU) of the Pôle de Compétitivité “Industrie du Commerce” (PICOM). This project
        has begun on Sep. 1
        <sup>st</sup>, 2009 and will last 2 years. The VVU project involves three computer science laboratories (Laboratoire d'Informatique Fondamentale de Lille, INRIA Lille Nord Europe, and Mines
        de Douai), a marketing school (ESC-Lille), and private companies (Becquet, Oxylane, France Telecom, Artificial Solutions, Nextstage). In this project, we are funded by the Région-Nord Pas de
        Calais, and the FEDER; funding is mostly for a post-doc over a period of 18 months. The work involves a close collaboration with other computer science teams at the Laboratoire d'Informatique
        Fondamentale de Lille, and the Mines de Douai. See sec. 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid83" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>for more details about 2009 activities on this contract.</p>
      </subsection>
    </subsection>
    <subsection id="uid113" level="1">
      <bodyTitle>National activities</bodyTitle>
      <subsection id="uid114" level="2">
        <bodyTitle>DGA / Thalès</bodyTitle>
        <participants>
          <person key="sequel-2006-idm391148952976">
            <firstname>Emmanuel</firstname>
            <lastname>Duflos</lastname>
          </person>
          <person key="sequel-2006-idm391148949872">
            <firstname>Philippe</firstname>
            <lastname>Vanheeghe</lastname>
          </person>
          <person key="sequel-2008-idm80283851664">
            <firstname>Emmanuel</firstname>
            <lastname>Delande</lastname>
          </person>
        </participants>
        <p>The work on sensor management went on this year, focusing on three main points:</p>
        <simplelist>
          <li id="uid115">
            <p noindent="true">Modelling the dynamic of the moving object for land applications</p>
          </li>
          <li id="uid116">
            <p noindent="true">Modelling the tracking problem in the Random Finite Sets framework</p>
          </li>
          <li id="uid117">
            <p noindent="true">Modelling the optimization problem as it may usually be done in reinforcement learning</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid118" level="2">
        <bodyTitle>ANR EXPLORA</bodyTitle>
        <participants>
          <person key="sequel-2007-idm332252484192">
            <firstname>Sébastien</firstname>
            <lastname>Bubeck</lastname>
          </person>
          <person key="sequel-2009-idm70241666752">
            <firstname>Alexandra</firstname>
            <lastname>Carpentier</lastname>
          </person>
          <person key="sequel-2008-idm80283851664">
            <firstname>Emmanuel</firstname>
            <lastname>Delande</lastname>
          </person>
          <person key="sequel-2009-idm70241657440">
            <firstname>Victor</firstname>
            <lastname>Gabillon</lastname>
          </person>
          <person key="sequel-2008-idm80283914416">
            <firstname>Mohammad</firstname>
            <lastname>Ghavamzadeh</lastname>
          </person>
          <person key="sequel-2007-idm332252490272">
            <firstname>Jean-François</firstname>
            <lastname>Hren</lastname>
          </person>
          <person key="sequel-2008-idm80283886240">
            <firstname>Alessandro</firstname>
            <lastname>Lazaric</lastname>
          </person>
          <person key="sequel-2006-idm391148935408">
            <firstname>Manuel</firstname>
            <lastname>Loth</lastname>
          </person>
          <person key="sequel-2006-idm391148944048">
            <firstname>Jérémie</firstname>
            <lastname>Mary</lastname>
          </person>
          <person key="sequel-2008-idm80283857760">
            <firstname>Odalric-Ambrym</firstname>
            <lastname>Maillard</lastname>
          </person>
          <person key="sequel-2006-idm391148963008">
            <firstname>Rémi</firstname>
            <lastname>Munos</lastname>
          </person>
          <person key="sequel-2006-idm391148966784">
            <firstname>Philippe</firstname>
            <lastname>Preux</lastname>
          </person>
          <person key="sequel-2007-idm332252530992">
            <firstname>Daniil</firstname>
            <lastname>Ryabko</lastname>
          </person>
        </participants>
        <p>Rémi Munos is the coordinator of the 
        <b>ANR EXPLO-RA</b>
        <footnote id="uid119" id-text="5" place="foot" anchored="yes"><ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://sites.google.com/site/anrexplora/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://
        <allowbreak/>sites.
        <allowbreak/>google.
        <allowbreak/>com/
        <allowbreak/>site/
        <allowbreak/>anrexplora/
        <allowbreak/></ref>.</footnote>(EXPLOration - EXPLOitation for efficient Resource Allocation. Applications to optimization, control, learning, and games) 3 years project which started in
        2009. This is a collaboration between 2 INRIA team project (
        <span class="smallcap" align="left">SequeL</span>and TAO), HEC Paris (GREGHEC), Les Ponts (CERTIS), Paris 5 (CRIP5), and the Université Paris Dauphine (LAMSADE).</p>
        <p>This project deals with the question of how to make the best possible use of available resources in order to optimize the performance of some decision-making task. In the case of simulated
        scenarios, the term resource refers to a piece of computational effort (for example CPU time, memory) devoted to the realization of some computation. Nonetheless, we will also consider the
        case of real-world scenarios where the term resource denotes some effort (real-world experiment) that has a real, 
        <i>e.g.</i>financial, cost. Making a good use of the available resources means designing an exploration strategy that would allocate the resources in a clever way such as to maximize (among
        the space of possible exploration strategies) the performance of the resulting task. Potential applications are numerous and may be found in domains where a one-shot decision or a sequence of
        decisions has to be made, such as in optimization, control, learning, and games.</p>
        <p>For that purpose we will consider several ways of combining algorithms which perform a good job in balancing resources between exploitation (making the best decision based on our current,
        but possibly imperfect, knowledge) and exploration (decisions that may appear sub-optimal but which may yield additional information about the unknown parameters, and, as a result, could
        improve the relevance of future decisions). These exploration/exploitation algorithms, also called bandit algorithms, or regret-minimization algorithms, will be the building blocks of our
        methods. They will be combined either in a hierarchical way, or as a population, either in collaborative or adversary working mode.</p>
        <p>A motivating example concerns min-max tree search in large scale games. The goal here is to explore the tree to find the best move for the next play, given a limited amount of simulation
        resources (
        <i>e.g.</i>, CPU time). Here, resource allocation means an exploration strategy that selects which branch one should explore deeper at each time step; the aim being that at the end of the
        available resources, the collected information allows making the best decision (or an almost optimal decision). Previous works in efficient tree exploration using hierarchical bandits for the
        game of go have shown very promising results (such as the MoGo program [Gelly et al., 2006] currently among the world best computer-go programs), which have motivated our research for
        extending both the theoretical analysis of the underlying ideas and their scope to a wide range of applications.</p>
        <p>We expect to develop new simulation techniques based on a clever use of available computational resources, in order to solve large scale optimization and decision making problems
        previously considered unsolvable. See sec. 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid77" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>for details about 2009 scientific activities.</p>
      </subsection>
      <subsection id="uid120" level="2">
        <bodyTitle>ANR Kernsig</bodyTitle>
        <participants>
          <person key="sequel-2006-idm391148952976">
            <firstname>Emmanuel</firstname>
            <lastname>Duflos</lastname>
          </person>
          <person key="sequel-2008-idm80283882784">
            <firstname>Hachem</firstname>
            <lastname>Kadri</lastname>
          </person>
          <person key="sequel-2006-idm391148966784">
            <firstname>Philippe</firstname>
            <lastname>Preux</lastname>
          </person>
        </participants>
        <p>The ANR Kernsig project began in 2007 and it is headed by Prof. S. Canu with the INSA-Rouen. It deals with the study of kernel methods for signal processing.</p>
        <p>See the section 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid94" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>for scientific details of 2009 activities.</p>
      </subsection>
      <subsection id="uid121" level="2">
        <bodyTitle>ANR Lampada</bodyTitle>
        <participants>
          <person key="sequel-2008-idm80283914416">
            <firstname>Mohammad</firstname>
            <lastname>Ghavamzadeh</lastname>
          </person>
          <person key="sequel-2006-idm391148944048">
            <firstname>Jérémie</firstname>
            <lastname>Mary</lastname>
          </person>
          <person key="sequel-2006-idm391148966784">
            <firstname>Philippe</firstname>
            <lastname>Preux</lastname>
          </person>
        </participants>
        <p>The ANR Lampada project has been submitted, and approved in 2009, and will officially begin in 2010. Lampada means “Learning Algorithms, Models an sPArse representations for structured
        DAta”
        <footnote id="uid122" id-text="6" place="foot" anchored="yes">project website: 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://lampada.gforge.inria.fr/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://
        <allowbreak/>lampada.
        <allowbreak/>gforge.
        <allowbreak/>inria.
        <allowbreak/>fr/
        <allowbreak/></ref>.</footnote>. This project involves approximately 30 people from Paris (LIP'6, P. Gallinari's group), Marseille (LIF, F. Denis' group), Saint-Étienne (LHC, M. Sebban's
        group), the Mostrare and 
        <span class="smallcap" align="left">SequeL</span>EPIs. M. Tommasi from Mostrare is the head of this ANR.</p>
        <p>Lampada is a fundamental research project on machine learning and structured data. It focuses on scalling learning algorithms to handle large sets of complex data. The main challenges
        are:</p>
        <orderedlist>
          <li id="uid123">
            <p noindent="true">high dimension learning problems,</p>
          </li>
          <li id="uid124">
            <p noindent="true">large sets of data and</p>
          </li>
          <li id="uid125">
            <p noindent="true">dynamics of data.</p>
          </li>
        </orderedlist>
        <p>Complex data we consider are evolving and composed of parts among which there are some relations. Representations of these data embed both structure and content information and are
        typically large sequences, trees and graphs. The main application domains are web2, social networks and biological data.</p>
        <p>The project proposes to study formal representations of such data together with incremental or sequential machine learning methods and similarity learning methods.</p>
        <p>The representation research topic includes condensed data representation, sampling, prototype selection and representation of streams of data. Machine learning methods include edit
        distance learning, reinforcement learning and incremental methods, density estimation of structured data and learning on streams.</p>
        <p><span class="smallcap" align="left">SequeL</span>is particularly concerned with the learning of the representation of data in high dimensional spaces, in particular the work on feature
        extraction, and non parametric supervised learning algorithms.</p>
      </subsection>
      <subsection id="uid126" level="2">
        <bodyTitle>ANR Co-Adapt</bodyTitle>
        <participants>
          <person key="sequel-2006-idm391148963008">
            <firstname>Rémi</firstname>
            <lastname>Munos</lastname>
          </person>
        </participants>
        <p>This ANR project has been submitted, and approved in 2009. Rémi Munos is the 
        <span class="smallcap" align="left">SequeL</span>coordinator of the 
        <b>ANR CO-ADAPT</b>(Brain computer co-adaptation for better interfaces) project which starts in the end of 2009 (for 4 years). This is in collaboration with the INRIA Odyssee project (Maureen
        Clerc), the INSERM U821 team (Olivier Bertrand), the Laboratory of Neurobiology of Cognition (CNRS) (Boris Burle) and the laboratory of Analysis, topology and probabilities (CNRS and
        University of Provence) (Bruno Torresani).</p>
        <subsection id="uid127" level="3">
          <bodyTitle>Workshop “Localisation Précise pour les Transports Terrestres”</bodyTitle>
          <p>Emmanuel Duflos was the main organizer, in collaboration with the LEOST, Heudiasyc and LCPC french laboratories, of a workshop on precised localization for land transportations. This
          workshop was held in Paris on June, 16
          <span class="math"><img align="bottom" width="21" height="12" src="math_image_34.png" xylemeAttach="34" border="0" alt="Im34 ${{}^\mtext thth}$"/></span>. There were more than 30 attendees. A CD-ROM has been edited (INRETS publisher) on which papers are in english to ease the spreading of the presented works.</p>
        </subsection>
      </subsection>
    </subsection>
    <subsection id="uid128" level="1">
      <bodyTitle>International activities</bodyTitle>
      <subsection id="uid129" level="2">
        <bodyTitle>PASCAL2 Network of excellence</bodyTitle>
        <p>In 2009, 
        <span class="smallcap" align="left">SequeL</span>has joined the Pascal-2 European network of excellence dedicated to machine learning. 
        <span class="smallcap" align="left">SequeL</span>has created a new node of this NoE in collaboration with the EPI Mostrare, and Stéphane Canu's group in Rouen. R. Munos is the head of this
        node.</p>
      </subsection>
      <subsection id="uid130" level="2">
        <bodyTitle>PASCAL2 Pump-Priming Project</bodyTitle>
        <p>Pump-Priming is a program organized by the PASCAL2 network of excellence. The goal of this program is to provide support for collaborative research on novel topics that are not yet
        sufficiently mature to attract mainstream funding. Rémi Munos and Mohammad Ghavamzadeh, along with Shie Mannor, an associate professor at the department of electrical engineering at Technion,
        Haifa, Israel, submitted a proposal on “Sparse Reinforcement Learning in High Dimensions” to this program. Our proposal was accepted for funding in September 2009. This is a 2 year project
        that starts in November 2009.</p>
        <p>The main objective of this project is to find appropriate representations for value function approximation in high-dimensional spaces, and to use them to develop efficient reinforcement
        learning algorithms. By appropriate we mean representations that facilitate fast and robust learning, and by efficient we mean algorithms whose sample and computational complexities do not
        grow too rapidly with the dimension of the observations. We further intend to provide theoretical analysis for these algorithms as we believe that such results will help us refine the
        performance of such algorithms. We intend to empirically evaluate the performance of the developed algorithms in real-world applications such as a complex network management domain and a
        dogfight flight simulator.</p>
        <p>This is a fundamental research project that would also help us to establish a collaboration with a very strong research group at Technion in Israel.</p>
      </subsection>
      <subsection id="uid131" level="2">
        <bodyTitle>University of Alberta, Canada</bodyTitle>
        <p>We have continued our collaboration with the University of Alberta in Canada:</p>
        <simplelist>
          <li id="uid132">
            <p noindent="true">with Prof. Csaba Szepesvári and Amir massoud Farahmand at the University of Alberta, Canada, on the topic of 
            <i>regularities in sequential decision making problems</i>. We have published two conference papers 
            <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid59" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
            <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid28" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>and had two workshop papers accepted on this topic this
            year.</p>
          </li>
          <li id="uid133">
            <p noindent="true">with Prof. Richard Sutton from the University of Alberta, Canada, and Prof. Shalabh Bhatnagar from the Indian Institute of Science, Bangalore, India, on the topic of 
            <i>actor-critic algorithms</i>, on which we have published a journal paper 
            <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid18" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>and a technical report 
            <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#sequel-2009-bid19" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>this year.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid134" level="2">
        <bodyTitle>Russia</bodyTitle>
        <p>D. Ryabko obtained an INRIA grant in the “collaboration avec la Russie” framework, for collaboration on steganography and statistics with Institute of Computational Technologies Siberian
        Branch of Russian Academy of Science, which funds two mutual visits. As a part of this funding scheme, D. Ryabko is also going to make a visit to Laboratoire J-V. Poncelet, Moscow, and give a
        talk on sequence prediction and statistics of processes there.</p>
      </subsection>
      <subsection id="uid135" level="2">
        <bodyTitle>MPI Tübinghen</bodyTitle>
        <p>Sébastien Bubeck collaborates with U. von Luxburg on clustering.</p>
      </subsection>
      <subsection id="uid136" level="2">
        <bodyTitle>COLT workshop</bodyTitle>
        <p>A 1 day “On-line Learning with Limited Feedback” (PASCAL2 sponsored event) has been organized by Alessandro Lazaric, Rémi Munos, Daniil Ryabko, Sébastien Bubeck, Odalric Maillard,
        Jean-Yves Audibert, Peter Auer, and Csaba Szepesvári.</p>
      </subsection>
      <subsection id="uid137" level="2">
        <bodyTitle>Special session at COGIS'2009</bodyTitle>
        <p>Along with François Caron (EPI Alea, Bordeaux), E. Duflos organized a session on multi-target tracking at the conference COGIS'2009, held in Paris, Nov. 16-18
        <sup>th</sup>, 2009.</p>
      </subsection>
      <subsection id="uid138" level="2">
        <bodyTitle>Programme Interdisciplinaire de Coopération Scientifique</bodyTitle>
        <p>A “Programme Interdisciplinaire de Coopération Scientifique” (PICS) is running over the period 2008–2010 which concerns Ph. Vanheeghe, and E. Duflos, in relation with the Centre for
        Pavement and Transportation Technology (CPATT), headed by prof. Carl Haas at the University of Waterloo, Canada.</p>
        <p>The optimal use of the data provided by the sensors must necessarily lie within a dynamic process suitable to control the acquisition of information. This project proposes to define
        principles and methods for the management of multisensor systems in the frame of civil engineering. This work, requires the development of specific methodological tools. These tools will be
        tested on a real civil engineering application, the characterization of new materials for highway pavement. Multisensor management being integrated in this Canadian, very ambitious, civil
        engineering project. The Canadian team will carry out the instrumentation and the validation, whereas the definition of the tools and method will be carried out in tight partnership and
        controlled by the French team.</p>
      </subsection>
    </subsection>
    <subsection id="uid139" level="1">
      <bodyTitle>Visits and invitations</bodyTitle>
      <simplelist>
        <li id="uid140">
          <p noindent="true">E. Duflos and Ph. Vanheeghe visit Carl Haas, U. Waterloo, Ontario, Canada, to work further in the frame of their joint PICS (November 28
          <sup>th</sup>to December 5
          <sup>th</sup>)</p>
        </li>
        <li id="uid141">
          <p noindent="true">Daniil Ryabko visits the J-V. Poncelet laboratory in Moscow.</p>
        </li>
        <li id="uid142">
          <p noindent="true">Daniil Ryabko visits Petri Myllymaki at the University of Helsinki, Finland.</p>
        </li>
        <li id="uid143">
          <p noindent="true">Rémi Munos and Rémi Coulom were invited to the Japanese-French conference, Tokyo, Jan. 2009</p>
        </li>
      </simplelist>
    </subsection>
  </international>
  <diffusion id="uid144">
    <bodyTitle>Dissemination</bodyTitle>
    <subsection id="uid145" level="1">
      <bodyTitle>Scientific community animation</bodyTitle>
      <simplelist>
        <li id="uid146">
          <p noindent="true">A. Lazaric presented a tutorial on “Transfer Learning in Reinforcement Learning Domains” at both conferences AAMAS'2009, and ECML'2009.</p>
        </li>
        <li id="uid147">
          <p noindent="true">participation to the program committees of international conferences:</p>
          <simplelist>
            <li id="uid148">
              <p noindent="true">R. Coulom: “Advances in Computer Games 12”</p>
            </li>
            <li id="uid149">
              <p noindent="true">E. Duflos: workshop on the Theory of Belief Function (Brest, April 1-2, 2010), Fusion 2009, Gretsi 2009</p>
            </li>
            <li id="uid150">
              <p noindent="true">M. Ghavamzadeh: International Conference on Machine Learning (ICML 2009), Annual Conference on Neural Information Processing Systems (NIPS 2009)</p>
            </li>
            <li id="uid151">
              <p noindent="true">R. Munos: NIPS 2009, ADPRL 2009, AISTATS 2009, ALT 2009, ICML 2009, JFPDA 2009</p>
            </li>
            <li id="uid152">
              <p noindent="true">Ph. Preux: ECML 2009, IJCAI 2009, ADPRL 2009, EGC 2009 and 2010</p>
            </li>
            <li id="uid153">
              <p noindent="true">D. Ryabko: “Learning from non-IID data” ECML 2009 workshop</p>
            </li>
            <li id="uid154">
              <p noindent="true">E. Duflos: Fusion 2009</p>
            </li>
          </simplelist>
        </li>
        <li id="uid155">
          <p noindent="true">international journal and conference reviewing activities (in addition to the conferences in which we belong to the PC):</p>
          <simplelist>
            <li id="uid156">
              <p noindent="true">E. Duflos: IEEE Transaction on Signal Processing, International Journal of Approximate Reasonning, Information Fusion.</p>
            </li>
            <li id="uid157">
              <p noindent="true">M. Ghavamzadeh: Machine Learning Journal (MLJ), Journal of Artificial Intelligence Research (JAIR), Journal of Autonomous Agents and Multi-Agent Systems (JAAMAS),</p>
            </li>
            <li id="uid158">
              <p noindent="true">J. Mary: Journal of Machine Learning Research (JMLR), EGC 2010</p>
            </li>
            <li id="uid159">
              <p noindent="true">R. Munos: Annals of Telecommunications, Machine Learning, Mathematics of Operations Research, Revue d'Intelligence Artificielle,</p>
            </li>
            <li id="uid160">
              <p noindent="true">Ph. Preux: Machine Learning Journal, IEEE Trans. on SMC-C, Algorithms</p>
            </li>
            <li id="uid161">
              <p noindent="true">D. Ryabko: Uncertainty in Artificial Intelligence (UAI) 2009</p>
            </li>
          </simplelist>
        </li>
        <li id="uid162">
          <p noindent="true">R. Munos and Ph. Preux have reviewed proposals in the ANR Blanc program (2009)</p>
        </li>
        <li id="uid163">
          <p noindent="true">R. Munos has reviewed proposals in the ANR Jeunes Chercheurs program (2009), and ANR COSINUS program</p>
        </li>
        <li id="uid164">
          <p noindent="true">R. Munos has been a member of the following committees:</p>
          <simplelist>
            <li id="uid165">
              <p noindent="true">INRIA Senior Researcher (DR 2) recruitment, 2009</p>
            </li>
            <li id="uid166">
              <p noindent="true">INRIA Junior Researcher (CR 2) recruitment in Nancy, 2009</p>
            </li>
            <li id="uid167">
              <p noindent="true">Scientific organizer of the INRIA evaluation seminar theme “Optimisation, apprentissage et méthodes statistiques”, scheduled in March 2010.</p>
            </li>
            <li id="uid168">
              <p noindent="true">animation comity of the INRIA theme “Mathématiques appliquées, calcul et simulation”.</p>
            </li>
            <li id="uid169">
              <p noindent="true">INRIA Evaluation committee</p>
            </li>
          </simplelist>
        </li>
        <li id="uid170">
          <p noindent="true">participation to PhD jurys:</p>
          <simplelist>
            <li id="uid171">
              <p noindent="true">R. Munos was Rapporteur for PhD thesis of Matthieu Geist (Supélec Metz), and member of the PhD defense jury of Lucian Busoniu (Delft University, Nederland), Emmanuel
              Rachelson (University of Toulouse), Olivier Caelen (ULB, Belgium)</p>
            </li>
            <li id="uid172">
              <p noindent="true">Ph. Preux was Rapporteur for the PhD thesis of A. Machado (Lip 6). He also serves as a member of the “Jury Gilles Kahn 2009” which aims at awarding the “best”
              computer science PhD dissertation of the year.</p>
            </li>
          </simplelist>
        </li>
        <li id="uid173">
          <p noindent="true">expertise:</p>
          <simplelist>
            <li id="uid174">
              <p noindent="true">R. Munos was a referee in:</p>
              <simplelist>
                <li id="uid175">
                  <p noindent="true">ERC starting grants evaluation Panel PE6 (Computer Science and Informatics)</p>
                </li>
                <li id="uid176">
                  <p noindent="true">Digiteo project in logiciel et systèmes complexes (Ile-de-France region)</p>
                </li>
                <li id="uid177">
                  <p noindent="true">Review for a CRC book on Reinforcement Learning</p>
                </li>
              </simplelist>
            </li>
          </simplelist>
        </li>
        <li id="uid178">
          <p noindent="true">invited talks:</p>
          <simplelist>
            <li id="uid179">
              <p noindent="true">R. Munos was invited speaker: seminars given at Delft University (Nederlands), Imperial College of London, Université Libre de Bruxelles, Atelier PIRSTEC (Lyon).</p>
            </li>
            <li id="uid180">
              <p noindent="true">R. Coulom was invited speaker at the “Japanese-French Frontiers of Science Symposium JFFoS'2009)”</p>
            </li>
            <li id="uid181">
              <p noindent="true">J. Mary was invited to give a Smile seminar at the “École des Mines de Paris”</p>
            </li>
            <li id="uid182">
              <p noindent="true">J. Mary gives a lecture as the Montebello high-school in Lille in a program aiming at drawing more students towards scientific studies.</p>
            </li>
          </simplelist>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid183" level="1">
      <bodyTitle>Teaching</bodyTitle>
      <p>We list the classes that are related to the research activities in 
      <span class="smallcap" align="left">SequeL</span>that happened in 2008.</p>
      <simplelist>
        <li id="uid184">
          <p noindent="true">Rémi Munos teaches a class in reinforcement learning in the M2 “Mathematics-Vision-Learning” (MVA) at the ENS-Cachan.</p>
        </li>
        <li id="uid185">
          <p noindent="true">Philippe Preux teaches:</p>
          <simplelist>
            <li id="uid186">
              <p noindent="true">in the M2 MIASHS, 2 data mining classes</p>
            </li>
            <li id="uid187">
              <p noindent="true">in the M2 of computer science at the University of Lille a class on reinforcement learning.</p>
            </li>
          </simplelist>
        </li>
        <li id="uid188">
          <p noindent="true">Jérémie Mary is head of the speciality “Informatique et Documents” of the Master MIASHS.</p>
        </li>
        <li id="uid189">
          <p noindent="true">Jérémie Mary and Rémi Coulom are teaching data mining in master MIASHS at the University of Lille.</p>
        </li>
      </simplelist>
      <p>Otherwise, each of the 4 professors and assistant professors of the 
      <span class="smallcap" align="left">SequeL</span>team teaches 192 hours per year. Taught classes include machine learning, data mining, and signal processing classes.</p>
    </subsection>
  </diffusion>
  <biblio id="bibliography" html="bibliography" numero="10" titre="Bibliography">
    <biblStruct id="sequel-2009-bid77" type="article" rend="refer" n="refercite:AudibertTCS08" default="NO">
      <analytic>
        <title level="a">Tuning Bandit Algorithms in Stochastic Environments</title>
        <author>
          <persName>
            <foreName>J-Y.</foreName>
            <surname>Audibert</surname>
            <initial>J.-Y.</initial>
          </persName>
          <persName key="sequel-2006-idm391148963008">
            <foreName>R.</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Cs.</foreName>
            <surname>Szepesvári</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes">
        <title level="j">Theoretical Computer Science</title>
        <imprint>
          <dateStruct>
            <year full="yes">2008</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="bnote" place="unspecified" anchored="yes">To appear</note>
    </biblStruct>
    <biblStruct id="sequel-2009-bid78" type="inproceedings" rend="refer" n="refercite:HOO08" default="NO">
      <analytic>
        <title level="a">Online Optimization of X-armed Bandits</title>
        <author>
          <persName key="sequel-2007-idm332252484192">
            <foreName>S.</foreName>
            <surname>Bubeck</surname>
            <initial>S.</initial>
          </persName>
          <persName key="sequel-2006-idm391148963008">
            <foreName>R.</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName key="micmac-2006-idm511006334512">
            <foreName>G.</foreName>
            <surname>Stoltz</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Cs.</foreName>
            <surname>Szepesvári</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proceedings of Advances in Neural Information Processing Systems</title>
        <imprint>
          <biblScope type="volume">22</biblScope>
          <publisher>
            <orgName>MIT Press</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2008</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid83" type="article" rend="refer" n="refercite:caron2008" default="NO">
      <analytic>
        <title level="a">Bayesian Inference for Linear Dynamic Models With Dirichlet Process Mixtures</title>
        <author>
          <persName key="cqfd-2008-idm283607001520">
            <foreName>F.</foreName>
            <surname>Caron</surname>
            <initial>F.</initial>
          </persName>
          <persName key="sequel-2006-idm391148956016">
            <foreName>M.</foreName>
            <surname>Davy</surname>
            <initial>M.</initial>
          </persName>
          <persName key="texmex-2006-idm152358787648">
            <foreName>A.</foreName>
            <surname>Doucet</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2006-idm391148952976">
            <foreName>E.</foreName>
            <surname>Duflos</surname>
            <initial>E.</initial>
          </persName>
          <persName key="sequel-2006-idm391148949872">
            <foreName>Ph.</foreName>
            <surname>Vanheeghe</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="j">IEEE Transactions on Signal Processing</title>
        <imprint>
          <biblScope type="volume">56</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <month full="yes">January</month>
            <year full="yes">2008</year>
          </dateStruct>
          <biblScope type="pages">71–84</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid76" type="article" rend="refer" n="refercite:Caron-IEEESP-2006" default="NO">
      <analytic>
        <title level="a">Particle Filtering for Multisensor Data Fusion with Switching Observation Models. Application to Land Vehicle Positioning</title>
        <author>
          <persName key="cqfd-2008-idm283607001520">
            <foreName>F.</foreName>
            <surname>Caron</surname>
            <initial>F.</initial>
          </persName>
          <persName key="sequel-2006-idm391148956016">
            <foreName>M.</foreName>
            <surname>Davy</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2006-idm391148952976">
            <foreName>E.</foreName>
            <surname>Duflos</surname>
            <initial>E.</initial>
          </persName>
          <persName key="sequel-2006-idm391148949872">
            <foreName>Ph.</foreName>
            <surname>Vanheeghe</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes">
        <title level="j">IEEE Transactions on Signal Processing</title>
        <imprint>
          <biblScope type="volume">55</biblScope>
          <biblScope type="number">6</biblScope>
          <dateStruct>
            <month full="yes">June</month>
            <year full="yes">2006</year>
          </dateStruct>
          <biblScope type="pages">2703–2719</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid82" type="article" rend="refer" n="refercite:coulom2007c" default="NO">
      <analytic>
        <title level="a">Computing Elo Ratings of Move Patterns in the Game of Go</title>
        <author>
          <persName key="sequel-2006-idm391148946784">
            <foreName>R.</foreName>
            <surname>Coulom</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes">
        <title level="j">International Computer Games Association Journal</title>
        <imprint>
          <dateStruct>
            <year full="yes">2007</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid79" type="inproceedings" rend="refer" n="refercite:clsr09" default="NO">
      <analytic>
        <title level="a">Compressed Least Squares Regression</title>
        <author>
          <persName>
            <foreName>O-A.</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
          <persName key="sequel-2006-idm391148963008">
            <foreName>R.</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proceedings of Advances in Neural Information Processing Systems</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid80" type="article" rend="refer" n="refercite:Munos_SIAM08" default="NO">
      <analytic>
        <title level="a">Performance Bounds in Lp norm for Approximate Value Iteration</title>
        <author>
          <persName key="sequel-2006-idm391148963008">
            <foreName>R.</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="j">SIAM J. Control and Optimization</title>
        <imprint>
          <biblScope type="volume">46</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <year full="yes">2008</year>
          </dateStruct>
          <biblScope type="pages">541–561</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid81" type="article" rend="refer" n="refercite:Munos_JMLR07" default="NO">
      <analytic>
        <title level="a">Finite time bounds for sampling based fitted value iteration</title>
        <author>
          <persName key="sequel-2006-idm391148963008">
            <foreName>R.</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Cs.</foreName>
            <surname>Szepesvári</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes">
        <title level="j">Journal of Machine Learning Research</title>
        <imprint>
          <dateStruct>
            <year full="yes">2007</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid74" type="article" rend="refer" n="refercite:Ryabko:08ao++" default="NO">
      <analytic>
        <title level="a">On the Possibility of Learning in Reactive Environments with Arbitrary Dependence</title>
        <author>
          <persName key="sequel-2007-idm332252530992">
            <foreName>D.</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>M.</foreName>
            <surname>Hutter</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes">
        <title level="j">Theoretical Computer Science</title>
        <imprint>
          <biblScope type="volume">405</biblScope>
          <biblScope type="number">3</biblScope>
          <dateStruct>
            <year full="yes">2008</year>
          </dateStruct>
          <biblScope type="pages">274–284</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid75" type="article" rend="refer" n="refercite:Ryabko:08pqaml" default="NO">
      <analytic>
        <title level="a">Predicting Non-Stationary Processes</title>
        <author>
          <persName key="sequel-2007-idm332252530992">
            <foreName>D.</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>M.</foreName>
            <surname>Hutter</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes">
        <title level="j">Applied Mathematics Letters</title>
        <imprint>
          <biblScope type="volume">21</biblScope>
          <biblScope type="number">5</biblScope>
          <dateStruct>
            <year full="yes">2008</year>
          </dateStruct>
          <biblScope type="pages">477-482</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="5291" id="sequel-2009-bid61" type="phdthesis" rend="year" n="cite:NAH2009" default="NO">
      <monogr>
        <title level="m">Impact des multitrajets sur les performances des systèmes de navigation par satellite : Contribution à l'amélioration de la précision de localisation par modémisation
        bayesienne</title>
        <author>
          <persName>
            <foreName>F.</foreName>
            <surname>Nahimana</surname>
            <initial>F.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">Ecole Centrale de Lille</orgName>
          </publisher>
          <dateStruct>
            <month full="yes">feb</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="typdoc" place="unspecified" anchored="yes">Ph. D. Thesis</note>
    </biblStruct>
    <biblStruct dedoublkey="0637" id="sequel-2009-bid73" type="article" rend="year" n="cite:AudibertTCS08" default="NO">
      <analytic>
        <title level="a">Exploration-exploitation trade-off using variance estimates in multi-armed bandits</title>
        <author>
          <persName>
            <foreName>J-Y.</foreName>
            <surname>Audibert</surname>
            <initial>J.-Y.</initial>
          </persName>
          <persName key="sequel-2006-idm391148963008">
            <foreName>R.</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Cs.</foreName>
            <surname>Szepesvári</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr id="rid01946" x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <idno type="issn">0304-3975</idno>
        <title level="j">Theoretical Computer Science</title>
        <imprint>
          <biblScope type="volume">410</biblScope>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">1876-1902</biblScope>
        </imprint>
      </monogr>
      <affiliation>
        <country>CN</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="1047" id="sequel-2009-bid18" type="article" rend="year" n="cite:Bhatnagar09NA" default="NO">
      <analytic>
        <title level="a">Natural Actor-Critic Algorithms</title>
        <author>
          <persName>
            <foreName>S.</foreName>
            <surname>Bhatnagar</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>R.S.</foreName>
            <surname>Sutton</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2008-idm80283914416">
            <foreName>M.</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName key="mistis-2012-idm128610638448">
            <foreName>M.</foreName>
            <surname>Lee</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr id="rid00229" x-editorial-board="yes" x-international-audience="yes">
        <idno type="issn">0005-1098</idno>
        <title level="j">Automatica</title>
        <imprint>
          <biblScope type="volume">45</biblScope>
          <biblScope type="number">11</biblScope>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">2471-2482</biblScope>
        </imprint>
      </monogr>
      <affiliation>
        <country>CA</country>
        <country>IN</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="1050" id="sequel-2009-bid54" type="article" rend="year" n="cite:Seb_JMLR08" default="NO">
      <analytic>
        <title level="a">Nearest Neighbor Clustering: A Baseline Method for Consistent Clustering with Arbitrary Objective Functions</title>
        <author>
          <persName key="sequel-2007-idm332252484192">
            <foreName>S.</foreName>
            <surname>Bubeck</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>U.</foreName>
            <surname>von Luxburg</surname>
            <initial>U.</initial>
          </persName>
        </author>
      </analytic>
      <monogr id="rid01276" x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <idno type="issn">1532-4435</idno>
        <title level="j">Journal of Machine Learning Research</title>
        <imprint>
          <biblScope type="volume">10</biblScope>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">657-698</biblScope>
        </imprint>
      </monogr>
      <affiliation>
        <country>DE</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="0900" id="sequel-2009-bid71" type="article" rend="year" n="cite:Coulom-2009b" default="NO">
      <analytic>
        <title level="a">Le jeu de go et la révolution de Monte Carlo</title>
        <author>
          <persName key="sequel-2006-idm391148946784">
            <foreName>R.</foreName>
            <surname>Coulom</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr id="rid01098" x-editorial-board="yes" x-international-audience="no" x-proceedings="no">
        <idno type="issn">I-NtFd17</idno>
        <title level="j">Interstices</title>
        <imprint>
          <dateStruct>
            <month full="yes">April</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="2116" id="sequel-2009-bid53" type="incollection" rend="year" n="cite:3AI2009etendu" default="NO">
      <analytic>
        <title level="a">Light Source Storage and Interpolation for Global Illumination: a neural solution</title>
        <author>
          <persName>
            <foreName>S.</foreName>
            <surname>Delepoulle</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Ch.</foreName>
            <surname>Renaud</surname>
            <initial>C.</initial>
          </persName>
          <persName key="sequel-2006-idm391148966784">
            <foreName>Ph.</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Intelligent Computer Graphics</title>
        <title level="s">Studies in Computational Intelligence</title>
        <imprint>
          <biblScope type="chapter">5</biblScope>
          <publisher>
            <orgName>Springer</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">87–104</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="1036" id="sequel-2009-bid60" type="article" rend="year" n="cite:Mazouni09" default="NO">
      <analytic>
        <title level="a">Multi Reaction Batch Process and Optimal Time Switching Control</title>
        <author>
          <persName key="mere-2006-idm364725807456">
            <foreName>D.</foreName>
            <surname>Mazouni</surname>
            <initial>D.</initial>
          </persName>
          <persName key="mere-2006-idm364725824832">
            <foreName>J.</foreName>
            <surname>Harmand</surname>
            <initial>J.</initial>
          </persName>
          <persName key="mere-2006-idm364725819072">
            <foreName>A.</foreName>
            <surname>Rapaport</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>H.</foreName>
            <surname>Hammouri</surname>
            <initial>H.</initial>
          </persName>
        </author>
      </analytic>
      <monogr id="rid01636" x-editorial-board="yes" x-international-audience="yes">
        <idno type="issn">0143-2087</idno>
        <title level="j">Journal of Optimal Control Application and Methods</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="0247" id="sequel-2009-bid43" type="article" rend="year" n="cite:Ryabko09:steg" default="NO">
      <analytic>
        <title level="a">Asymptotically Optimal Perfect Steganographic Systems</title>
        <author>
          <persName>
            <foreName>B.</foreName>
            <surname>Ryabko</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2007-idm332252530992">
            <foreName>D.</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr id="rid01700" x-editorial-board="yes" x-international-audience="yes">
        <idno type="issn">0032-9460</idno>
        <title level="j">Problems of Information Transmission</title>
        <imprint>
          <biblScope type="volume">45</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">184–190</biblScope>
        </imprint>
      </monogr>
      <affiliation>
        <country>RU</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="1064" id="sequel-2009-bid38" type="article" rend="year" n="cite:Ryabko10:3s" default="NO">
      <analytic>
        <title level="a">Nonparametric Statistical Inference for Ergodic Processes</title>
        <author>
          <persName key="sequel-2007-idm332252530992">
            <foreName>D.</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>B.</foreName>
            <surname>Ryabko</surname>
            <initial>B.</initial>
          </persName>
        </author>
      </analytic>
      <monogr id="rid00819" x-editorial-board="yes" x-international-audience="yes">
        <idno type="issn">0018-9448</idno>
        <title level="j">IEEE Transactions on Information Theory</title>
        <imprint>
          <dateStruct>
            <year full="yes">2010</year>
          </dateStruct>
        </imprint>
      </monogr>
      <affiliation>
        <country>RU</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="1643" id="sequel-2009-bid42" type="article" rend="year" n="cite:Ryabko:08testsaml" default="NO">
      <analytic>
        <title level="a">Using Data Compressors to Construct Order Tests for Homogeneity and Component Independence</title>
        <author>
          <persName key="sequel-2007-idm332252530992">
            <foreName>D.</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Schmidhuber</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr id="rid00180" x-editorial-board="yes" x-international-audience="yes">
        <idno type="issn">0893-9659</idno>
        <title level="j">Applied Mathematics Letters</title>
        <imprint>
          <biblScope type="volume">22</biblScope>
          <biblScope type="number">7</biblScope>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">1029–1032</biblScope>
        </imprint>
      </monogr>
      <affiliation>
        <country>CH</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="1284" id="sequel-2009-bid63" type="article" rend="year" n="cite:deVIL09" default="NO">
      <analytic>
        <title level="a">Radar Optimal Times Detection Allocation in Multitarget Environment</title>
        <author>
          <persName>
            <foreName>M.</foreName>
            <surname>de Vilmorin</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2006-idm391148952976">
            <foreName>E.</foreName>
            <surname>Duflos</surname>
            <initial>E.</initial>
          </persName>
          <persName key="sequel-2006-idm391148949872">
            <foreName>Ph.</foreName>
            <surname>Vanheeghe</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr id="rid00785" x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <idno type="issn">1932-8184</idno>
        <title level="j">IEEE Systems Journal</title>
        <imprint>
          <biblScope type="volume">3</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <month full="yes">Jne</month>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">210–220</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="3750" id="sequel-2009-bid34" type="inproceedings" rend="year" n="cite:AB09" default="NO">
      <analytic>
        <title level="a">Minimax Policies for Adversarial and Stochastic Bandits</title>
        <author>
          <persName>
            <foreName>J-Y.</foreName>
            <surname>Audibert</surname>
            <initial>J.-Y.</initial>
          </persName>
          <persName key="sequel-2007-idm332252484192">
            <foreName>S.</foreName>
            <surname>Bubeck</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">22th annual conference on learning theory</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid29437">
          <title>Annual Conference on Learning Theory</title>
          <num>22</num>
          <abbr type="sigle">COLT</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="4220" id="sequel-2009-bid32" type="inproceedings" rend="year" n="cite:BMS09" default="NO">
      <analytic>
        <title level="a">Pure Exploration in Multi-Armed Bandits Problems</title>
        <author>
          <persName key="sequel-2007-idm332252484192">
            <foreName>S.</foreName>
            <surname>Bubeck</surname>
            <initial>S.</initial>
          </persName>
          <persName key="sequel-2006-idm391148963008">
            <foreName>R.</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName key="micmac-2006-idm511006334512">
            <foreName>G.</foreName>
            <surname>Stoltz</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proc. of the 20th International Conference on Algorithmic Learning Theory</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid110465">
          <title>International Conference on Algorithmic Learning Theory</title>
          <num>20</num>
          <abbr type="sigle">ALT</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="4431" id="sequel-2009-bid31" type="inproceedings" rend="year" n="cite:sensitivityHMM09" default="NO">
      <analytic>
        <title level="a">Sensitivity analysis in HMMs with application to likelihood maximization</title>
        <author>
          <persName>
            <foreName>P.A.</foreName>
            <surname>Coquelin</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>R.</foreName>
            <surname>Deguest</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2006-idm391148963008">
            <foreName>R.</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proceedings of Advances in Neural Information Processing Systems</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>22</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="4661" id="sequel-2009-bid70" type="inproceedings" rend="year" n="cite:Coulom-2009a" default="NO">
      <analytic>
        <title level="a">The Monte-Carlo Revolution in Go</title>
        <author>
          <persName key="sequel-2006-idm391148946784">
            <foreName>R.</foreName>
            <surname>Coulom</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Japanese-French Frontiers of Science Symposium (JFFoS'2009), Shonan, Japan</title>
        <imprint>
          <dateStruct>
            <month full="yes">January</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid339884">
          <title>Japanese-French Frontiers of Science Symposium</title>
          <num>3</num>
          <abbr type="sigle">JFFoS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="2377" id="sequel-2009-bid62" type="inproceedings" rend="year" n="cite:icann09" default="NO">
      <analytic>
        <title level="a">A Model of Neuronal Specialization Using Hebbian Policy-Gradient with Slow Noise</title>
        <author>
          <persName key="sequel-2008-idm80283907440">
            <foreName>E.</foreName>
            <surname>Daucé</surname>
            <initial>E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proc. of the Int'l Conf. on Artificial Neural Networks (ICANN)</title>
        <title level="s">Lecture Notes in Computer Science (LNCS)</title>
        <imprint>
          <biblScope type="volume">5768</biblScope>
          <publisher>
            <orgName>Springer</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">218–228</biblScope>
        </imprint>
        <meeting id="cid338731">
          <title>International conference on Artificial Neural Networks</title>
          <num>19</num>
          <abbr type="sigle">ICANN</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="4146" id="sequel-2009-bid52" type="inproceedings" rend="year" n="cite:3ia" default="NO">
      <analytic>
        <title level="a">Photometric compression and interpolation for light source representation</title>
        <author>
          <persName>
            <foreName>S.</foreName>
            <surname>Delepoulle</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Ch.</foreName>
            <surname>Renaud</surname>
            <initial>C.</initial>
          </persName>
          <persName key="sequel-2006-idm391148966784">
            <foreName>Ph.</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proc. 3IA, Athens, Greece</title>
        <imprint>
          <dateStruct>
            <month full="yes">May</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid17935">
          <title>3IA International Conference on Computer Graphics and Artificial Intelligence</title>
          <num>12</num>
          <abbr type="sigle">3IA</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="4319" id="sequel-2009-bid66" type="inproceedings" rend="year" n="cite:Farahmand09RR" default="NO">
      <analytic>
        <title level="a">Regularization in Reinforcement Learning</title>
        <author>
          <persName>
            <foreName>A. M.</foreName>
            <surname>Farahmand</surname>
            <initial>A. M.</initial>
          </persName>
          <persName key="sequel-2008-idm80283914416">
            <foreName>M.</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Cs.</foreName>
            <surname>Szepesvári</surname>
            <initial>Cs.</initial>
          </persName>
          <persName>
            <foreName>S.</foreName>
            <surname>Mannor</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="no">
        <title level="m">Multidisciplinary Symposium on Reinforcement Learning (MSRL)</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid354539">
          <title>Multidisciplinary Symposium on Reinforcement Learning</title>
          <num>2009</num>
          <abbr type="sigle">MSRL</abbr>
        </meeting>
      </monogr>
      <affiliation>
        <country>CN</country>
        <country>IL</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="4320" id="sequel-2009-bid28" type="inproceedings" rend="year" n="cite:Farahmand09RF" default="NO">
      <analytic>
        <title level="a">Regularized Fitted Q-iteration for Planning in Continuous-Space Markovian Decision Problems</title>
        <author>
          <persName>
            <foreName>A. M.</foreName>
            <surname>Farahmand</surname>
            <initial>A. M.</initial>
          </persName>
          <persName key="sequel-2008-idm80283914416">
            <foreName>M.</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Cs.</foreName>
            <surname>Szepesvári</surname>
            <initial>Cs.</initial>
          </persName>
          <persName>
            <foreName>S.</foreName>
            <surname>Mannor</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proceedings of the American Control Conference</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid28220">
          <title>American Control Conference</title>
          <num>2009</num>
          <abbr type="sigle">ACC</abbr>
        </meeting>
      </monogr>
      <affiliation>
        <country>CN</country>
        <country>IL</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="4321" id="sequel-2009-bid59" type="inproceedings" rend="year" n="cite:Farahmand09RP" default="NO">
      <analytic>
        <title level="a">Regularized Policy Iteration</title>
        <author>
          <persName>
            <foreName>A. M.</foreName>
            <surname>Farahmand</surname>
            <initial>A. M.</initial>
          </persName>
          <persName key="sequel-2008-idm80283914416">
            <foreName>M.</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Cs.</foreName>
            <surname>Szepesvári</surname>
            <initial>Cs.</initial>
          </persName>
          <persName>
            <foreName>S.</foreName>
            <surname>Mannor</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proceedings of Advances in Neural Information Processing Systems 21</title>
        <imprint>
          <publisher>
            <orgName>MIT Press</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">441-448</biblScope>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>23</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <affiliation>
        <country>CN</country>
        <country>IL</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="4369" id="sequel-2009-bid65" type="inproceedings" rend="year" n="cite:Farahmand09RL" default="NO">
      <analytic>
        <title level="a">Robot Learning with Regularized Reinforcement Learning</title>
        <author>
          <persName>
            <foreName>A. M.</foreName>
            <surname>Farahmand</surname>
            <initial>A. M.</initial>
          </persName>
          <persName key="sequel-2008-idm80283914416">
            <foreName>M.</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Cs.</foreName>
            <surname>Szepesvári</surname>
            <initial>Cs.</initial>
          </persName>
          <persName>
            <foreName>S.</foreName>
            <surname>Mannor</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="no">
        <title level="m">Workshop on Regression in Robotics: Approaches and Applications at Robotics: Science and Systems Conference (RSS)</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid358246">
          <title>RSS Workshop on Regression in Robotics: Approaches and Applications</title>
          <num>2009</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
      <affiliation>
        <country>CN</country>
        <country>IL</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="2683" id="sequel-2009-bid68" type="inproceedings" rend="year" n="cite:Ghavamzadeh09BA" default="NO">
      <analytic>
        <title level="a">Bayesian Actor-Critic: A Bayesian Model for Value Function Approximation and Policy Learning</title>
        <author>
          <persName key="sequel-2008-idm80283914416">
            <foreName>M.</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Y.</foreName>
            <surname>Engel</surname>
            <initial>Y.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="no">
        <title level="m">Workshop on Regression in Robotics: Approaches and Applications at Robotics: Science and Systems Conference (RSS)</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid358246">
          <title>RSS Workshop on Regression in Robotics: Approaches and Applications</title>
          <num>2009</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
      <affiliation>
        <country>IL</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="3465" id="sequel-2009-bid67" type="inproceedings" rend="year" n="cite:Ghavamzadeh09HH" default="NO">
      <analytic>
        <title level="a">Hierarchical Hybrid Reinforcement Learning Algorithms</title>
        <author>
          <persName key="sequel-2008-idm80283914416">
            <foreName>M.</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="no">
        <title level="m">Workshop on Bridging the Gap between High-Level Discrete Representations and Low-Level Continuous Behaviors at Robotics: Science and Systems Conference (RSS)</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid358191">
          <title>RSS Workshop on Bridging the Gap between High-Level Discrete Representations and Low-Level Continuous Behaviors</title>
          <num>2009</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="3493" id="sequel-2009-bid33" type="inproceedings" rend="year" n="cite:Estochad09" default="NO">
      <analytic>
        <title level="a">Hybrid Stochastic-Adversarial On-line Learning</title>
        <author>
          <persName key="sequel-2008-idm80283886240">
            <foreName>A.</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2006-idm391148963008">
            <foreName>R.</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proceedings of Computational Learning Theory</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid29437">
          <title>Annual Conference on Learning Theory</title>
          <num>22</num>
          <abbr type="sigle">COLT</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="3129" id="sequel-2009-bid47" type="inproceedings" rend="year" n="cite:icmla2009" default="NO">
      <analytic>
        <title level="a">ECON: a Kernel Basis Pursuit Algorithm with Automatic Feature Parameter Tuning, and its Application to Photometric Solids Approximation</title>
        <author>
          <persName key="sequel-2006-idm391148935408">
            <foreName>M.</foreName>
            <surname>Loth</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2006-idm391148966784">
            <foreName>Ph.</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>S.</foreName>
            <surname>Delepoulle</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Ch.</foreName>
            <surname>Renaud</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proc. International Conference on Machine Learning and Applications (ICML-A)</title>
        <imprint>
          <publisher>
            <orgName>IEEE Press</orgName>
          </publisher>
          <dateStruct>
            <month full="yes">December</month>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">–</biblScope>
        </imprint>
        <meeting id="cid290299">
          <title>International Conference on Machine Learning and Applications</title>
          <num>7</num>
          <abbr type="sigle">ICML-A</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="2876" id="sequel-2009-bid46" type="inproceedings" rend="year" n="cite:clsr09" default="NO">
      <analytic>
        <title level="a">Compressed Least Squares Regression</title>
        <author>
          <persName>
            <foreName>O-A.</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
          <persName key="sequel-2006-idm391148963008">
            <foreName>R.</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proceedings of Advances in Neural Information Processing Systems</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>22</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="2863" id="sequel-2009-bid45" type="inproceedings" rend="year" n="cite:MV09" default="NO">
      <analytic>
        <title level="a">Complexity versus Agreement for Many Views</title>
        <author>
          <persName>
            <foreName>O-A.</foreName>
            <surname>Maillard</surname>
            <initial>O.-A.</initial>
          </persName>
          <persName>
            <foreName>Nicolas</foreName>
            <surname>Vayatis</surname>
            <initial>N.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proc. of the 20th International Conference on Algorithmic Learning Theory</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">232-246</biblScope>
        </imprint>
        <meeting id="cid110465">
          <title>International Conference on Algorithmic Learning Theory</title>
          <num>20</num>
          <abbr type="sigle">ALT</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="3305" id="sequel-2009-bid29" type="inproceedings" rend="year" n="cite:adprl2009" default="NO">
      <analytic>
        <title level="a">Feature Discovery in Approximate Dynamic Programming</title>
        <author>
          <persName key="sequel-2006-idm391148966784">
            <foreName>Ph.</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
          <persName key="sequel-2007-idm332252510144">
            <foreName>S.</foreName>
            <surname>Girgin</surname>
            <initial>S.</initial>
          </persName>
          <persName key="sequel-2006-idm391148935408">
            <foreName>M.</foreName>
            <surname>Loth</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proc. IEEE Approximate Dynamic Programming and Reinforcement Learning (ADPRL)</title>
        <imprint>
          <publisher>
            <orgName>IEEE Press</orgName>
          </publisher>
          <dateStruct>
            <month full="yes">Mar–Apr.</month>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">109–116</biblScope>
        </imprint>
        <meeting id="cid88347">
          <title>IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning</title>
          <num>2009</num>
          <abbr type="sigle">ADPRL</abbr>
        </meeting>
      </monogr>
      <affiliation>
        <country>TR</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="4004" id="sequel-2009-bid57" type="inproceedings" rend="year" n="cite:RAB-09" default="NO">
      <analytic>
        <title level="a">On the Use of Dirichlet Process Mixtures for the Modelling of Pseudorange Errors in Multi-constellation Based Localisation</title>
        <author>
          <persName>
            <foreName>A.</foreName>
            <surname>Rabaoui</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2008-idm80283854720">
            <foreName>N.</foreName>
            <surname>Viandier</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Marais</surname>
            <initial>J.</initial>
          </persName>
          <persName key="sequel-2006-idm391148952976">
            <foreName>E.</foreName>
            <surname>Duflos</surname>
            <initial>E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proceedings of the 9th International Conference on ITS Telecommunications</title>
        <imprint>
          <dateStruct>
            <month full="yes">October</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid287946">
          <title>International Conference on Intelligent Transportation System - Telecommunications</title>
          <num>9</num>
          <abbr type="sigle">ITS-T</abbr>
        </meeting>
      </monogr>
      <affiliation>
        <country>TN</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="4824" id="sequel-2009-bid58" type="inproceedings" rend="year" n="cite:RAB09-2" default="NO">
      <analytic>
        <title level="a">Using Dirichlet Process Mixtures for the Modelling of GNSS Pseudorange Errors in Urban Canyon</title>
        <author>
          <persName>
            <foreName>A.</foreName>
            <surname>Rabaoui</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2008-idm80283854720">
            <foreName>N.</foreName>
            <surname>Viandier</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Marais</surname>
            <initial>J.</initial>
          </persName>
          <persName key="sequel-2006-idm391148952976">
            <foreName>E.</foreName>
            <surname>Duflos</surname>
            <initial>E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proceedings of ION GNSS 2009</title>
        <imprint>
          <dateStruct>
            <month full="yes">September</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid319937">
          <title>International Technical Meeting of the Institute of Navigation Satellite Division</title>
          <num>2009</num>
          <abbr type="sigle">ION GNSS</abbr>
        </meeting>
      </monogr>
      <affiliation>
        <country>TN</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="4274" id="sequel-2009-bid55" type="inproceedings" rend="year" n="cite:RAV09" default="NO">
      <analytic>
        <title level="a">Real world implementation of belief function theory to detect dislocation of materials in construction</title>
        <author>
          <persName>
            <foreName>S. N.</foreName>
            <surname>Ravazi</surname>
            <initial>S. N.</initial>
          </persName>
          <persName>
            <foreName>C.</foreName>
            <surname>Haas</surname>
            <initial>C.</initial>
          </persName>
          <persName key="sequel-2006-idm391148952976">
            <foreName>E.</foreName>
            <surname>Duflos</surname>
            <initial>E.</initial>
          </persName>
          <persName key="sequel-2006-idm391148949872">
            <foreName>Ph.</foreName>
            <surname>Vanheeghe</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proceedings of the 12th International Conference on Information Fusion</title>
        <imprint>
          <publisher>
            <orgName>ISIF</orgName>
          </publisher>
          <dateStruct>
            <month full="yes">july</month>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">748–755</biblScope>
        </imprint>
        <meeting id="cid285701">
          <title>International Conference on Information Fusion</title>
          <num>12</num>
          <abbr type="sigle">FUSION</abbr>
        </meeting>
      </monogr>
      <affiliation>
        <country>CN</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="2469" id="sequel-2009-bid39" type="inproceedings" rend="year" n="cite:Ryabko09:discr" default="NO">
      <analytic>
        <title level="a">An impossibility result for process discrimination</title>
        <author>
          <persName key="sequel-2007-idm332252530992">
            <foreName>D.</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proc. 2009 IEEE International Symposium on Information Theory, Seoul, South Korea</title>
        <imprint>
          <publisher>
            <orgName>IEEE</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">1734-1738</biblScope>
        </imprint>
        <meeting id="cid89373">
          <title>IEEE International Symposium on Information Theory</title>
          <num>2009</num>
          <abbr type="sigle">ISIT</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="2779" id="sequel-2009-bid37" type="inproceedings" rend="year" n="cite:Ryabko09:pq3" default="NO">
      <analytic>
        <title level="a">Characterizing predictable classes of processes</title>
        <author>
          <persName key="sequel-2007-idm332252530992">
            <foreName>D.</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <editor role="editor">
          <persName>
            <foreName>J.</foreName>
            <surname>Bilmes</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>A.</foreName>
            <surname>Ng</surname>
            <initial>A.</initial>
          </persName>
        </editor>
        <title level="m">Proceedings of the 25th Conference on Uncertainty in Artificial Intelligence (UAI'09), Montreal, Canada</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid49628">
          <title>Conference on Uncertainty in Artificial Intelligence</title>
          <num>25</num>
          <abbr type="sigle">UAI</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="2969" id="sequel-2009-bid41" type="inproceedings" rend="year" n="cite:Ryabko09:2c" default="NO">
      <analytic>
        <title level="a">Criteria for hypothesis testing for discrete-valued stationary processes</title>
        <author>
          <persName key="sequel-2007-idm332252530992">
            <foreName>D.</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="no">
        <title level="m">European Meeting of Statisticians, Toulouse, France</title>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid69298">
          <title>European Meeting of Statisticians</title>
          <num>27</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="4640" id="sequel-2009-bid40" type="inproceedings" rend="year" n="cite:Ryabko10:1c" default="NO">
      <analytic>
        <title level="a">Testing composite hypotheses about discrete-valued stationary processes</title>
        <author>
          <persName key="sequel-2007-idm332252530992">
            <foreName>D.</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proc. IEEE Information Theory Workshop (ITW'10), Cairo, Egypt</title>
        <imprint>
          <publisher>
            <orgName>IEEE</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2010</year>
          </dateStruct>
        </imprint>
        <meeting id="cid79788">
          <title>IEEE Information Theory Workshop</title>
          <num>2009</num>
          <abbr type="sigle">ITW</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="4827" id="sequel-2009-bid44" type="inproceedings" rend="year" n="cite:Ryabko09:skc" default="NO">
      <analytic>
        <title level="a">Using Kolmogorov Complexity for Understanding Some Limitations on Steganography</title>
        <author>
          <persName>
            <foreName>B.</foreName>
            <surname>Ryabko</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sequel-2007-idm332252530992">
            <foreName>D.</foreName>
            <surname>Ryabko</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proc. 2009 IEEE International Symposium on Information Theory, Seoul, South Korea</title>
        <imprint>
          <publisher>
            <orgName>IEEE</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
          <biblScope type="pages">2733-2736</biblScope>
        </imprint>
        <meeting id="cid89373">
          <title>IEEE International Symposium on Information Theory</title>
          <num>2009</num>
          <abbr type="sigle">ISIT</abbr>
        </meeting>
      </monogr>
      <affiliation>
        <country>RU</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="3424" id="sequel-2009-bid64" type="inproceedings" rend="year" n="cite:VIA09-3" default="NO">
      <analytic>
        <title level="a">GNSS Accuracy enhancement in urban environments based on error modeling and sequential Monte Carlo</title>
        <author>
          <persName key="sequel-2008-idm80283854720">
            <foreName>N.</foreName>
            <surname>Viandier</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>F.</foreName>
            <surname>Nahimana</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Marais</surname>
            <initial>J.</initial>
          </persName>
          <persName key="sequel-2006-idm391148952976">
            <foreName>E.</foreName>
            <surname>Duflos</surname>
            <initial>E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="no" x-international-audience="no" x-proceedings="yes">
        <title level="m">Proceedings of Worhshop on Localisation Precise pour les Transports Terrestres</title>
        <imprint>
          <publisher>
            <orgName>INRETS</orgName>
          </publisher>
          <dateStruct>
            <month full="yes">June</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid369841">
          <title>Worhshop "Localisation Precise pour les Transports Terrestres"</title>
          <num>2009</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="3178" id="sequel-2009-bid56" type="inproceedings" rend="year" n="cite:VIA-09" default="NO">
      <analytic>
        <title level="a">Enhancement of Galileo and multi-constellation accuracy by modeling pseudorange noises</title>
        <author>
          <persName key="sequel-2008-idm80283854720">
            <foreName>N.</foreName>
            <surname>Viandier</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>A.</foreName>
            <surname>Rabaoui</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Marais</surname>
            <initial>J.</initial>
          </persName>
          <persName key="sequel-2006-idm391148952976">
            <foreName>E.</foreName>
            <surname>Duflos</surname>
            <initial>E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proceedings of the 9th International Conference on ITS Telecommunications</title>
        <imprint>
          <dateStruct>
            <month full="yes">October</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
        <meeting id="cid287946">
          <title>International Conference on Intelligent Transportation System - Telecommunications</title>
          <num>9</num>
          <abbr type="sigle">ITS-T</abbr>
        </meeting>
      </monogr>
      <affiliation>
        <country>TN</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="5780" id="sequel-2009-bid19" type="techreport" rend="year" n="cite:Bhatnagar09NA2" default="NO">
      <monogr x-editorial-board="no" x-international-audience="yes">
        <title level="m">Natural Actor-Critic Algorithms</title>
        <author>
          <persName>
            <foreName>S.</foreName>
            <surname>Bhatnagar</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>R.S.</foreName>
            <surname>Sutton</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2008-idm80283914416">
            <foreName>M.</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
          <persName key="mistis-2012-idm128610638448">
            <foreName>M.</foreName>
            <surname>Lee</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">TR09-10</biblScope>
          <publisher>
            <orgName type="institution">Department of Computing Science, University of Alberta</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="typdoc" place="unspecified" anchored="yes">Technical report</note>
      <affiliation>
        <country>IN</country>
        <country>CN</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="5486" id="sequel-2009-bid51" type="techreport" rend="year" n="cite:RR-6908" default="NO">
      <monogr>
        <title level="m">A General Framework for Nonlinear Functional Regression with Reproducing Kernel Hilbert Spaces</title>
        <author>
          <persName key="sequel-2008-idm80283882784">
            <foreName>H.</foreName>
            <surname>Kadri</surname>
            <initial>H.</initial>
          </persName>
          <persName key="sequel-2006-idm391148952976">
            <foreName>E.</foreName>
            <surname>Duflos</surname>
            <initial>E.</initial>
          </persName>
          <persName key="sequel-2006-idm391148956016">
            <foreName>M.</foreName>
            <surname>Davy</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2006-idm391148966784">
            <foreName>Ph.</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>S.</foreName>
            <surname>Canu</surname>
            <initial>S.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RR-6908</biblScope>
          <publisher>
            <orgName type="institution">INRIA</orgName>
          </publisher>
          <dateStruct>
            <month full="yes">April</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="typdoc" place="unspecified" anchored="yes">Research Report</note>
    </biblStruct>
    <biblStruct dedoublkey="5930" id="sequel-2009-bid69" type="techreport" rend="year" n="cite:RR-6794" default="NO">
      <monogr>
        <title level="m">The Equi-Correlation Network: a New Kernelized-LARS with Automatic Kernel Parameters Tuning</title>
        <author>
          <persName key="sequel-2006-idm391148935408">
            <foreName>M.</foreName>
            <surname>Loth</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2006-idm391148966784">
            <foreName>Ph.</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RR-6794</biblScope>
          <publisher>
            <orgName type="institution">INRIA</orgName>
          </publisher>
          <dateStruct>
            <month full="yes">January</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="typdoc" place="unspecified" anchored="yes">Research Report</note>
    </biblStruct>
    <biblStruct dedoublkey="5026" id="sequel-2009-bid72" type="misc" rend="year" n="cite:Coulom-2009d" default="NO">
      <monogr x-international-audience="yes">
        <title level="m">Criticality: a Monte-Carlo Heuristic for Go Programs</title>
        <author>
          <persName key="sequel-2006-idm391148946784">
            <foreName>R.</foreName>
            <surname>Coulom</surname>
            <initial>R.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month full="yes">January</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="howpublished" place="unspecified" anchored="yes">Invited presentation at the University of Electro-Communication, Tokyo, Japan</note>
    </biblStruct>
    <biblStruct dedoublkey="5080" id="sequel-2009-bid35" type="misc" rend="year" n="cite:Coulom-2009c" default="NO">
      <monogr x-international-audience="yes">
        <title level="m">Local Quadratic Logistic Regression for Stochastic Optimization of Parameters</title>
        <author>
          <persName key="sequel-2006-idm391148946784">
            <foreName>R.</foreName>
            <surname>Coulom</surname>
            <initial>R.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month full="yes">January</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="howpublished" place="unspecified" anchored="yes">Invited presentation at the University of Electro-Communication, Tokyo, Japan</note>
    </biblStruct>
    <biblStruct dedoublkey="5006" id="sequel-2009-bid26" type="misc" rend="year" n="cite:Lazaric09BM" default="NO">
      <monogr>
        <title level="m">Bayesian Multi-Task Reinforcement Learning</title>
        <author>
          <persName key="sequel-2008-idm80283886240">
            <foreName>A.</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2008-idm80283914416">
            <foreName>M.</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="bnote" place="unspecified" anchored="yes">in preparation</note>
    </biblStruct>
    <biblStruct dedoublkey="5077" id="sequel-2009-bid48" type="misc" rend="year" n="cite:econ_sparsity_w" default="NO">
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="no">
        <title level="m"><span class="math"><hi rend="it">l</hi><sub>1</sub></span>regularization path for functional features</title>
        <author>
          <persName key="sequel-2006-idm391148935408">
            <foreName>M.</foreName>
            <surname>Loth</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2006-idm391148966784">
            <foreName>Ph.</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month full="yes">April</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="bnote" place="unspecified" anchored="yes">Sparsity in Machine Learning and Statistics Workshop, Cumberland Lodge, UK (1 page)</note>
    </biblStruct>
    <biblStruct dedoublkey="5004" id="sequel-2009-bid49" type="misc" rend="year" n="cite:benelearn2009" default="NO">
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="no">
        <title level="m">Automatic kernel parameter tuning for supervised learning: the ECON approach</title>
        <author>
          <persName key="sequel-2006-idm391148935408">
            <foreName>M.</foreName>
            <surname>Loth</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sequel-2006-idm391148966784">
            <foreName>Ph.</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month full="yes">May</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="bnote" place="unspecified" anchored="yes">Benelearn (2 pages abstract)</note>
    </biblStruct>
    <biblStruct dedoublkey="4959" id="sequel-2009-bid36" type="mastersthesis" rend="year" n="cite:Marsault-2009a" default="NO">
      <monogr>
        <title level="m">Développement d'algorithmes de planification pour le jeu de Havannah</title>
        <author>
          <persName key="sequel-2009-idm70240654624">
            <foreName>Victor</foreName>
            <surname>Marsault</surname>
            <initial>V.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">ENS Cachan</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="typdoc" place="unspecified" anchored="yes">Mémoire de Licence</note>
    </biblStruct>
    <biblStruct dedoublkey="4997" id="sequel-2009-bid17" type="misc" rend="year" n="cite:Munos09AP" default="NO">
      <monogr>
        <title level="m">Approximate Policy Iteration without Value Function Representation</title>
        <author>
          <persName key="sequel-2006-idm391148963008">
            <foreName>R.</foreName>
            <surname>Munos</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sequel-2008-idm80283886240">
            <foreName>A.</foreName>
            <surname>Lazaric</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sequel-2008-idm80283914416">
            <foreName>M.</foreName>
            <surname>Ghavamzadeh</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="bnote" place="unspecified" anchored="yes">in preparation</note>
    </biblStruct>
    <biblStruct dedoublkey="5132" id="sequel-2009-bid30" type="misc" rend="year" n="cite:cascor_sparsity_w" default="NO">
      <monogr x-editorial-board="yes" x-international-audience="yes" x-proceedings="no">
        <title level="m">Sparsity in Adaptive Control</title>
        <author>
          <persName key="sequel-2006-idm391148966784">
            <foreName>Ph.</foreName>
            <surname>Preux</surname>
            <initial>P.</initial>
          </persName>
          <persName key="sequel-2007-idm332252510144">
            <foreName>S.</foreName>
            <surname>Girgin</surname>
            <initial>S.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month full="yes">April</month>
            <year full="yes">2009</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="bnote" place="unspecified" anchored="yes">Sparsity in Machine Learning and Statistics Workshop, Cumberland Lodge, UK (1 page)</note>
    </biblStruct>
    <biblStruct id="sequel-2009-bid7" type="article" rend="foot" n="footcite:Aueretal2002" default="NO">
      <analytic>
        <title level="a">Finite-time analysis of the multi-armed bandit problem</title>
        <author>
          <persName>
            <foreName>P.</foreName>
            <surname>Auer</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>N.</foreName>
            <surname>Cesa-Bianchi</surname>
            <initial>N.</initial>
          </persName>
          <persName key="mc2-2006-idm487769908880">
            <foreName>P.</foreName>
            <surname>Fischer</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Machine Learning</title>
        <imprint>
          <biblScope type="volume">47</biblScope>
          <biblScope type="number">2/3</biblScope>
          <dateStruct>
            <year full="yes">2002</year>
          </dateStruct>
          <biblScope type="pages">235–256</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid20" type="article" rend="foot" n="footcite:Barto83NE" default="NO">
      <analytic>
        <title level="a">Neuron-Like Elements that can Solve Difficult Learning Control Problems</title>
        <author>
          <persName>
            <foreName>A.G.</foreName>
            <surname>Barto</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>R.S.</foreName>
            <surname>Sutton</surname>
            <initial>R.</initial>
          </persName>
          <persName key="talaris-2008-idm310216591424">
            <foreName>C.</foreName>
            <surname>Anderson</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">IEEE Transaction on Systems, Man and Cybernetics</title>
        <imprint>
          <biblScope type="volume">13</biblScope>
          <dateStruct>
            <year full="yes">1983</year>
          </dateStruct>
          <biblScope type="pages">835-846</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid3" type="book" rend="foot" n="footcite:bellman" default="NO">
      <monogr>
        <title level="m">Dynamic Programming</title>
        <author>
          <persName>
            <foreName>R.</foreName>
            <surname>Bellman</surname>
            <initial>R.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Princeton University Press</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">1957</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid4" type="book" rend="foot" n="footcite:bertshreve78" default="NO">
      <monogr>
        <title level="m">Stochastic Optimal Control (The Discrete Time Case)</title>
        <author>
          <persName>
            <foreName>D.P.</foreName>
            <surname>Bertsekas</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>S.E.</foreName>
            <surname>Shreve</surname>
            <initial>S.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Academic Press, New York</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">1978</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid2" type="book" rend="foot" n="footcite:Bertsekas96" default="NO">
      <monogr>
        <title level="m">Neuro-Dynamic Programming</title>
        <author>
          <persName>
            <foreName>D.P.</foreName>
            <surname>Bertsekas</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Tsitsiklis</surname>
            <initial>J.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Athena Scientific</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">1996</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid27" type="inproceedings" rend="foot" n="footcite:Engel05RL" default="NO">
      <analytic>
        <title level="a">Reinforcement Learning with Gaussian Processes</title>
        <author>
          <persName>
            <foreName>Y.</foreName>
            <surname>Engel</surname>
            <initial>Y.</initial>
          </persName>
          <persName>
            <foreName>S.</foreName>
            <surname>Mannor</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>R.</foreName>
            <surname>Meir</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Proceedings of the Twenty Second International Conference on Machine Learning</title>
        <imprint>
          <dateStruct>
            <year full="yes">2005</year>
          </dateStruct>
          <biblScope type="pages">201-208</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid9" type="article" rend="foot" n="footcite:ferguson1973bas" default="NO">
      <analytic>
        <title level="a">A Bayesian Analysis of Some Nonparametric Problems</title>
        <author>
          <persName>
            <foreName>T.S.</foreName>
            <surname>Ferguson</surname>
            <initial>T.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">The Annals of Statistics</title>
        <imprint>
          <biblScope type="volume">1</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <year full="yes">1973</year>
          </dateStruct>
          <biblScope type="pages">209–230</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid16" type="article" rend="foot" n="footcite:Fern06AP" default="NO">
      <analytic>
        <title level="a">Approximate Policy Iteration with a Policy Language Bias: Solving Relational Markov Decision Processes</title>
        <author>
          <persName>
            <foreName>A.</foreName>
            <surname>Fern</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>S.</foreName>
            <surname>Yoon</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>R.</foreName>
            <surname>Givan</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Journal of Artificial Intelligence Research</title>
        <imprint>
          <biblScope type="volume">25</biblScope>
          <dateStruct>
            <year full="yes">2006</year>
          </dateStruct>
          <biblScope type="pages">85-118</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid8" type="book" rend="foot" n="footcite:EltsStatLearning" default="NO">
      <monogr>
        <title level="m">The elements of statistical learning — Data Mining, Inference, and Prediction</title>
        <author>
          <persName>
            <foreName>T.</foreName>
            <surname>Hastie</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>R.</foreName>
            <surname>Tibshirani</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Friedman</surname>
            <initial>J.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Springer</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2001</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid23" type="inproceedings" rend="foot" n="footcite:Konda00AA" default="NO">
      <analytic>
        <title level="a">Actor-Critic Algorithms</title>
        <author>
          <persName>
            <foreName>V.</foreName>
            <surname>Konda</surname>
            <initial>V.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Tsitsiklis</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Proceedings of Advances in Neural Information Processing Systems 12</title>
        <imprint>
          <dateStruct>
            <year full="yes">2000</year>
          </dateStruct>
          <biblScope type="pages">1008-1014</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid24" type="article" rend="foot" n="footcite:Konda03AA" default="NO">
      <analytic>
        <title level="a">On Actor-Critic Algorithms</title>
        <author>
          <persName>
            <foreName>V.</foreName>
            <surname>Konda</surname>
            <initial>V.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Tsitsiklis</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">SIAM Journal on Control and Optimization</title>
        <imprint>
          <biblScope type="volume">42</biblScope>
          <biblScope type="number">4</biblScope>
          <dateStruct>
            <year full="yes">2003</year>
          </dateStruct>
          <biblScope type="pages">1143-1166</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid15" type="inproceedings" rend="foot" n="footcite:Lagoudakis03RL" default="NO">
      <analytic>
        <title level="a">Reinforcement Learning as Classification: Leveraging Modern Classifiers</title>
        <author>
          <persName>
            <foreName>M.</foreName>
            <surname>Lagoudakis</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>R.</foreName>
            <surname>Parr</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Proceedings of the Twentieth International Conference on Machine Learning</title>
        <imprint>
          <dateStruct>
            <year full="yes">2003</year>
          </dateStruct>
          <biblScope type="pages">424-431</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid25" type="inproceedings" rend="foot" n="footcite:Peters05NA" default="NO">
      <analytic>
        <title level="a">Natural Actor-Critic</title>
        <author>
          <persName key="mascotte-2006-idm64999743248">
            <foreName>J.</foreName>
            <surname>Peters</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>S.</foreName>
            <surname>Vijayakumar</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>S.</foreName>
            <surname>Schaal</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Proceedings of the Sixteenth European Conference on Machine Learning</title>
        <imprint>
          <dateStruct>
            <year full="yes">2005</year>
          </dateStruct>
          <biblScope type="pages">280-291</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid11" type="book" rend="foot" n="footcite:ADPpowell" default="NO">
      <monogr>
        <title level="m">Approximate Dynamic Programming</title>
        <author>
          <persName>
            <foreName>W.</foreName>
            <surname>Powell</surname>
            <initial>W.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Wiley</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2007</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid1" type="book" rend="foot" n="footcite:puterman94" default="NO">
      <monogr>
        <title level="m">Markov Decision Processes: Discrete Stochastic Dynamic Programming</title>
        <author>
          <persName>
            <foreName>M.L.</foreName>
            <surname>Puterman</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>John Wiley and Sons</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">1994</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid50" type="book" rend="foot" n="footcite:ramsey" default="NO">
      <monogr>
        <title level="m">Functional Data Analysis</title>
        <author>
          <persName>
            <foreName>J.</foreName>
            <surname>Ramsey</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>B.</foreName>
            <surname>Silverman</surname>
            <initial>B.</initial>
          </persName>
        </author>
        <edition>2nd edition</edition>
        <imprint>
          <publisher>
            <orgName>Springer</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2005</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid6" type="article" rend="foot" n="footcite:Robbins1952" default="NO">
      <analytic>
        <title level="a">Some aspects of the sequential design of experiments</title>
        <author>
          <persName>
            <foreName>H.</foreName>
            <surname>Robbins</surname>
            <initial>H.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Bull. Amer. Math. Soc.</title>
        <imprint>
          <biblScope type="volume">55</biblScope>
          <dateStruct>
            <year full="yes">1952</year>
          </dateStruct>
          <biblScope type="pages">527–535</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid14" type="article" rend="foot" n="footcite:rustSSMedicare" default="NO">
      <analytic>
        <title level="a">How Social Security and Medicare Affect Retirement Behavior in a World of Incomplete Market</title>
        <author>
          <persName>
            <foreName>J.</foreName>
            <surname>Rust</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Econometrica</title>
        <imprint>
          <biblScope type="volume">65</biblScope>
          <biblScope type="number">4</biblScope>
          <dateStruct>
            <month full="yes">July</month>
            <year full="yes">1997</year>
          </dateStruct>
          <biblScope type="pages">781–831</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://gemini.econ.umd.edu/jrust/research/rustphelan.pdf" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://
          <allowbreak/>gemini.
          <allowbreak/>econ.
          <allowbreak/>umd.
          <allowbreak/>edu/
          <allowbreak/>jrust/
          <allowbreak/>research/
          <allowbreak/>rustphelan.
          <allowbreak/>pdf</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid13" type="article" rend="foot" n="footcite:rustNuclearPlants" default="NO">
      <analytic>
        <title level="a">On the Optimal Lifetime of Nuclear Power Plants</title>
        <author>
          <persName>
            <foreName>J.</foreName>
            <surname>Rust</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Journal of Business &amp; Economic Statistics</title>
        <imprint>
          <biblScope type="volume">15</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <year full="yes">1997</year>
          </dateStruct>
          <biblScope type="pages">195–208</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://129.3.20.41/eprints/io/papers/9512/9512002.abs" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://
          <allowbreak/>129.
          <allowbreak/>3.
          <allowbreak/>20.
          <allowbreak/>41/
          <allowbreak/>eprints/
          <allowbreak/>io/
          <allowbreak/>papers/
          <allowbreak/>9512/
          <allowbreak/>9512002.
          <allowbreak/>abs</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid10" type="article" rend="foot" n="footcite:Sethuraman94" default="NO">
      <analytic>
        <title level="a">A constructive definition of Dirichlet priors</title>
        <author>
          <persName>
            <foreName>J.</foreName>
            <surname>Sethuraman</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Statistica Sinica</title>
        <imprint>
          <biblScope type="volume">4</biblScope>
          <dateStruct>
            <year full="yes">1994</year>
          </dateStruct>
          <biblScope type="pages">639-650</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid0" type="book" rend="foot" n="footcite:sb" default="NO">
      <monogr>
        <title level="m">Reinforcement learning: an introduction</title>
        <author>
          <persName>
            <foreName>R.S.</foreName>
            <surname>Sutton</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>A.G.</foreName>
            <surname>Barto</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>MIT Press</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">1998</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid21" type="phdthesis" rend="foot" n="footcite:Sutton84TC" default="NO">
      <monogr>
        <title level="m">Temporal credit assignment in reinforcement learning</title>
        <author>
          <persName>
            <foreName>R.S.</foreName>
            <surname>Sutton</surname>
            <initial>R.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">University of Massachusetts Amherst</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">1984</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="typdoc" place="unspecified" anchored="yes">Ph. D. Thesis</note>
    </biblStruct>
    <biblStruct id="sequel-2009-bid22" type="article" rend="foot" n="footcite:Sutton88LP" default="NO">
      <analytic>
        <title level="a">Learning to Predict by the Methods of Temporal Differences</title>
        <author>
          <persName>
            <foreName>R.S.</foreName>
            <surname>Sutton</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Machine Learning</title>
        <imprint>
          <biblScope type="volume">3</biblScope>
          <dateStruct>
            <year full="yes">1988</year>
          </dateStruct>
          <biblScope type="pages">9-44</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid12" type="article" rend="foot" n="footcite:tdgammon" default="NO">
      <analytic>
        <title level="a">Temporal Difference Learning and TD-Gammon</title>
        <author>
          <persName>
            <foreName>G.</foreName>
            <surname>Tesauro</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Communications of the ACM</title>
        <imprint>
          <biblScope type="volume">38</biblScope>
          <biblScope type="number">3</biblScope>
          <dateStruct>
            <month full="yes">March</month>
            <year full="yes">1995</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.research.ibm.com/massive/tdl.html" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://
          <allowbreak/>www.
          <allowbreak/>research.
          <allowbreak/>ibm.
          <allowbreak/>com/
          <allowbreak/>massive/
          <allowbreak/>tdl.
          <allowbreak/>html</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="sequel-2009-bid5" type="inbook" rend="foot" n="footcite:werbosHandbookADP" default="NO">
      <analytic>
        <author>
          <persName>
            <foreName>P.</foreName>
            <surname>Werbos</surname>
            <initial>P.</initial>
          </persName>
        </author>
        <title level="a">ADP: Goals, Opportunities and Principles</title>
      </analytic>
      <monogr>
        <title level="m">Handbook of learning and approximate dynamic programming</title>
        <editor role="editor">
          <persName>
            <foreName>J.</foreName>
            <surname>Si</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>A.G.</foreName>
            <surname>Barto</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>W.</foreName>
            <surname>Powell</surname>
            <initial>W.</initial>
          </persName>
          <persName>
            <foreName>D.</foreName>
            <surname>Wunsch</surname>
            <initial>D.</initial>
          </persName>
        </editor>
        <imprint>
          <publisher>
            <orgName>IEEE Press</orgName>
          </publisher>
          <dateStruct>
            <year full="yes">2004</year>
          </dateStruct>
          <biblScope type="pages">3–44</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
  </biblio>
</raweb>
