<?xml version="1.0" encoding="utf-8"?>
<raweb xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" year="2018">
  <identification id="sierra" isproject="true">
    <shortname>SIERRA</shortname>
    <projectName>Statistical Machine Learning and Parsimony</projectName>
    <theme-de-recherche>Optimization, machine learning and statistical methods</theme-de-recherche>
    <domaine-de-recherche>Applied Mathematics, Computation and Simulation</domaine-de-recherche>
    <urlTeam>http://www.di.ens.fr/sierra/</urlTeam>
    <structure_exterieure type="Labs">
      <libelle>Département d'Informatique de l'Ecole Normale Supérieure</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>CNRS</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Ecole normale supérieure de Paris</libelle>
    </structure_exterieure>
    <header_dates_team>Creation of the Team: 2011 January 01, updated into Project-Team: 2012 January 01</header_dates_team>
    <LeTypeProjet>Project-Team</LeTypeProjet>
    <keywordsSdN>
      <term>A1.2.8. - Network security</term>
      <term>A3.4. - Machine learning and statistics</term>
      <term>A5.4. - Computer vision</term>
      <term>A6.2. - Scientific computing, Numerical Analysis &amp; Optimization</term>
      <term>A7.1. - Algorithms</term>
      <term>A8.2. - Optimization</term>
      <term>A9.2. - Machine learning</term>
    </keywordsSdN>
    <keywordsSecteurs>
      <term>B9.5.6. - Data science</term>
    </keywordsSecteurs>
    <UR name="Paris"/>
  </identification>
  <team id="uid1">
    <person key="sierra-2018-idp112912">
      <firstname>Francis</firstname>
      <lastname>Bach</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Team leader, Inria, Senior Researcher</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sierra-2018-idp115824">
      <firstname>Alexandre</firstname>
      <lastname>d'Aspremont</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>CNRS, Senior Researcher</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="sierra-2018-idp118672">
      <firstname>Pierre</firstname>
      <lastname>Gaillard</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
    </person>
    <person key="sierra-2018-idp121136">
      <firstname>Remi</firstname>
      <lastname>Leblond</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, Researcher, until Aug 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp123584">
      <firstname>Alessandro</firstname>
      <lastname>Rudi</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, Starting Research Position</moreinfo>
    </person>
    <person key="sierra-2018-idp126064">
      <firstname>Lenaic</firstname>
      <lastname>Chizat</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, until Nov 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp128528">
      <firstname>Pierre Yves</firstname>
      <lastname>Massé</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Université Technique de Prague, from Apr 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp130992">
      <firstname>Dmitrii</firstname>
      <lastname>Ostrovskii</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, from Feb 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp133456">
      <firstname>Adrien</firstname>
      <lastname>Taylor</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="sierra-2018-idp135920">
      <firstname>Dmitry</firstname>
      <lastname>Babichev</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="sierra-2018-idp138352">
      <firstname>Mathieu</firstname>
      <lastname>Barré</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Ecole Normale Supérieure Paris, from Sep 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp140864">
      <firstname>Raphaël</firstname>
      <lastname>Berthier</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, from Oct 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp143296">
      <firstname>Anaël</firstname>
      <lastname>Bonneton</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Ecole Normale Supérieure Paris</moreinfo>
    </person>
    <person key="sierra-2018-idp145776">
      <firstname>Margaux</firstname>
      <lastname>Brégère</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>EDF</moreinfo>
    </person>
    <person key="sierra-2018-idp148176">
      <firstname>Alexandre</firstname>
      <lastname>Défossez</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Facebook</moreinfo>
    </person>
    <person key="sierra-2018-idp150608">
      <firstname>Radu Alexandru</firstname>
      <lastname>Dragomir</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Ecole polytechnique, from Sep 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp153056">
      <firstname>Thomas</firstname>
      <lastname>Kerdreux</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Ecole polytechnique</moreinfo>
    </person>
    <person key="thoth-2018-idp164736">
      <firstname>Gregoire</firstname>
      <lastname>Mialon</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, from Oct 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp157920">
      <firstname>Loucas</firstname>
      <lastname>Pillaud Vivien</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Ministère de l'Ecologie, de l'Energie, du Développement durable et de la Mer</moreinfo>
    </person>
    <person key="sierra-2018-idp160496">
      <firstname>Antoine</firstname>
      <lastname>Recanati</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>CNRS, until Sep 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp162928">
      <firstname>Damien</firstname>
      <lastname>Scieur</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, until Aug 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp165360">
      <firstname>Tatiana</firstname>
      <lastname>Shpakova</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="sierra-2018-idp167792">
      <firstname>Loïc</firstname>
      <lastname>Estève</lastname>
      <categoryPro>Technique</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, from Apr 2018</moreinfo>
    </person>
    <person key="dyogene-2018-idp171968">
      <firstname>Hadrien</firstname>
      <lastname>Hendrikx</lastname>
      <categoryPro>Technique</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, from Apr 2018 until Sep 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp138352">
      <firstname>Mathieu</firstname>
      <lastname>Barre</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, from Apr 2018 until Sep 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp140864">
      <firstname>Raphaël</firstname>
      <lastname>Berthier</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Ecole Normale Supérieure Paris, until Sep 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp177760">
      <firstname>Vivien</firstname>
      <lastname>Cabannes</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Univ Vincennes-Saint Denis, from Sep 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp180256">
      <firstname>Florentin</firstname>
      <lastname>Guth</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Ecole Normale Supérieure Paris, from Feb 2018 until Mar 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp182832">
      <firstname>Remi</firstname>
      <lastname>Jézequel</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Ecole Normale Supérieure Paris, from Oct 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp185376">
      <firstname>Ulysse</firstname>
      <lastname>Marteau Ferey</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Ecole Normale Supérieure Paris, from Apr 2018</moreinfo>
    </person>
    <person key="dyogene-2018-idp204064">
      <firstname>Helene</firstname>
      <lastname>Bessin Rousseau</lastname>
      <categoryPro>Assistant</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, from Mar 2018</moreinfo>
    </person>
    <person key="willow-2018-idp181920">
      <firstname>Sabrine</firstname>
      <lastname>Boumizy</lastname>
      <categoryPro>Assistant</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, until Feb 2018</moreinfo>
    </person>
    <person key="valda-2018-idp164000">
      <firstname>Sandrine</firstname>
      <lastname>Verges</lastname>
      <categoryPro>Assistant</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, until Jan 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp195312">
      <firstname>Vijaya</firstname>
      <lastname>Bollapragada</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Northwestern University, from Apr 2018 until Jul 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp197808">
      <firstname>Aaron</firstname>
      <lastname>Defazio</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Facebook Research, until Feb 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp200288">
      <firstname>Gauthier</firstname>
      <lastname>Gidel</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>University of Montreal, Jan 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp202768">
      <firstname>Achintya</firstname>
      <lastname>Kundu</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Ecole d'ingénieurs, from Jun 2018 until Aug 2018</moreinfo>
    </person>
    <person key="thoth-2018-idp164736">
      <firstname>Gregoire</firstname>
      <lastname>Mialon</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, Sep 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp207776">
      <firstname>Sharan</firstname>
      <lastname>Vaswani</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>University of British Columbia, from Apr 2018 until Jul 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp210288">
      <firstname>Simon</firstname>
      <lastname>Lacoste-Julien</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>University of Montreal, Aug 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp212768">
      <firstname>Alex</firstname>
      <lastname>Nowak Vila</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, from Oct 2018</moreinfo>
    </person>
    <person key="sierra-2018-idp212768">
      <firstname>Alex</firstname>
      <lastname>Nowak Vila</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Paris</research-centre>
      <moreinfo>Inria, from Apr 2018 until Sep 2018</moreinfo>
    </person>
  </team>
  <presentation id="uid2">
    <bodyTitle>Overall Objectives</bodyTitle>
    <subsection id="uid3" level="1">
      <bodyTitle>Statement</bodyTitle>
      <p>Machine learning is a recent scientific domain, positioned between
applied mathematics, statistics and computer science. Its goals are
the optimization, control, and modelisation of complex systems from
examples. It applies to data from numerous engineering and scientific
fields (e.g., vision, bioinformatics, neuroscience, audio processing,
text processing, economy, finance, etc.), the ultimate goal being to
derive general theories and algorithms allowing advances in each of
these domains. Machine learning is characterized by the high quality
and quantity of the exchanges between theory, algorithms and
applications: interesting theoretical problems almost always emerge
from applications, while theoretical analysis allows the understanding
of why and when popular or successful algorithms do or do not work,
and leads to proposing significant improvements.</p>
      <p>Our academic positioning is exactly at the intersection between these
three aspects—algorithms, theory and applications—and our main
research goal is to make the link between theory and algorithms, and
between algorithms and high-impact applications in various engineering
and scientific fields, in particular computer vision, bioinformatics,
audio processing, text processing and neuro-imaging.</p>
      <p>Machine learning is now a vast field of research and the team focuses
on the following aspects: supervised learning (kernel methods,
calibration), unsupervised learning (matrix factorization, statistical
tests), parsimony (structured sparsity, theory and algorithms), and
optimization (convex optimization, bandit learning). These four
research axes are strongly interdependent, and the interplay between
them is key to successful practical applications.</p>
    </subsection>
  </presentation>
  <fondements id="uid4">
    <bodyTitle>Research Program</bodyTitle>
    <subsection id="uid5" level="1">
      <bodyTitle>Supervised Learning</bodyTitle>
      <p>This part of our research focuses on methods where, given a set of
examples of input/output pairs, the goal is to predict the output
for a new input, with research on kernel methods, calibration methods,
and multi-task learning.
</p>
    </subsection>
    <subsection id="uid6" level="1">
      <bodyTitle>Unsupervised Learning</bodyTitle>
      <p>We focus here on methods where no output is given and the goal is to
find structure of certain known types (e.g., discrete or
low-dimensional) in the data, with a focus on matrix factorization,
statistical tests, dimension reduction, and semi-supervised learning.
</p>
    </subsection>
    <subsection id="uid7" level="1">
      <bodyTitle>Parsimony</bodyTitle>
      <p>The concept of parsimony is central to many areas of science. In the
context of statistical machine learning, this takes the form of
variable or feature selection. The team focuses primarily on
structured sparsity, with theoretical and algorithmic contributions.
</p>
    </subsection>
    <subsection id="uid8" level="1">
      <bodyTitle>Optimization</bodyTitle>
      <p>Optimization in all its forms is central to machine learning, as many
of its theoretical frameworks are based at least in part on
empirical risk minimization. The team focuses primarily on convex and
bandit optimization, with a particular focus on large-scale optimization.
</p>
    </subsection>
  </fondements>
  <domaine id="uid9">
    <bodyTitle>Application Domains</bodyTitle>
    <subsection id="uid10" level="1">
      <bodyTitle>Applications for Machine Learning</bodyTitle>
      <p>Machine learning research can be conducted from two main perspectives: the first one, which has been
dominant in the last 30 years, is to design learning algorithms and theories which are as generic as possible, the
goal being to make as few assumptions as possible regarding the problems to be solved and to let data speak
for themselves. This has led to many interesting methodological developments and successful applications.
However, we believe that this strategy has reached its limit for many application domains, such as computer
vision, bioinformatics, neuro-imaging, text and audio processing, which leads to the second perspective our
team is built on: Research in machine learning theory and algorithms should be driven by interdisciplinary
collaborations, so that specific prior knowledge may be properly introduced into the learning process, in
particular with the following fields:</p>
      <simplelist>
        <li id="uid11">
          <p noindent="true">Computer vision: object recognition, object detection, image segmentation, image/video processing, computational photography. In collaboration with the Willow project-team.</p>
        </li>
        <li id="uid12">
          <p noindent="true">Bioinformatics: cancer diagnosis, protein function prediction, virtual screening. In collaboration with Institut Curie.</p>
        </li>
        <li id="uid13">
          <p noindent="true">Text processing: document collection modeling, language models.</p>
        </li>
        <li id="uid14">
          <p noindent="true">Audio processing: source separation, speech/music processing.</p>
        </li>
        <li id="uid15">
          <p noindent="true">Neuro-imaging: brain-computer interface (fMRI, EEG, MEG).</p>
        </li>
      </simplelist>
    </subsection>
  </domaine>
  <highlights id="uid16">
    <bodyTitle>Highlights of the Year</bodyTitle>
    <subsection id="uid17" level="1">
      <bodyTitle>Highlights of the Year</bodyTitle>
      <subsection id="uid18" level="2">
        <bodyTitle>Awards</bodyTitle>
        <sanspuceslist>
          <li id="uid19">
            <p noindent="true">Francis Bach, Lagrange Prize in Continuous Optimization, Society for Industrial and Applied Mathematics 2018</p>
          </li>
          <li id="uid20">
            <p noindent="true">Francis Bach, Best Paper Award, NeurIPS 2018.</p>
          </li>
          <li id="uid21">
            <p noindent="true">Francis Bach included in the report <i>Highly cited researchers, year 2018</i>, Clarivate Analytics, 2018</p>
          </li>
          <li id="uid22">
            <p noindent="true">Nicolas Flammarion, PhD thesis award in the <i>Programme Gaspard Monge</i>, Fondation Mathématique Jacques Hadamard, 2018.</p>
          </li>
          <li id="uid23">
            <p noindent="true">Adrien Taylor, Tucker Prize (finalist) 2018 (dissertation prize by the Math-
ematical Optimization Society for 2015-2017).</p>
          </li>
          <li id="uid24">
            <p noindent="true">Adrien Taylor, IBM/FNRS innovation award 2018 (dissertation prize for
original contributions to informatics).</p>
          </li>
          <li id="uid25">
            <p noindent="true">Adrien Taylor, Icteam thesis award 2018 (dissertation award by the icteam
institute of UCLouvain, Belgium).</p>
          </li>
          <li id="uid26">
            <p noindent="true">Adrien Taylor, Best paper award 2018 from the journal Optimization Letters for the paper <i>On the worst-case complexity of the gradient method with exact line search for smooth strongly convex functions</i>, Etienne De Klerk, François Glineur, Adrien Taylor.
journal=.</p>
          </li>
        </sanspuceslist>
      </subsection>
    </subsection>
  </highlights>
  <logiciels id="uid27">
    <bodyTitle>New Software and Platforms</bodyTitle>
    <subsection id="uid28" level="1">
      <bodyTitle>ProxASAGA</bodyTitle>
      <p><span class="smallcap" align="left">Keyword:</span> Optimization</p>
      <p noindent="true"><span class="smallcap" align="left">Functional Description:</span> A C++/Python code implementing the methods in the paper "Breaking the Nonsmooth Barrier: A Scalable Parallel Method for Composite Optimization", F. Pedregosa, R. Leblond and S. Lacoste-Julien, Advances in Neural Information Processing Systems (NIPS) 2017. Due to their simplicity and excellent performance, parallel asynchronous variants of stochastic gradient descent have become popular methods to solve a wide range of large-scale optimization problems on multi-core architectures. Yet, despite their practical success, support for nonsmooth objectives is still lacking, making them unsuitable for many problems of interest in machine learning, such as the Lasso, group Lasso or empirical risk minimization with convex constraints. In this work, we propose and analyze ProxASAGA, a fully asynchronous sparse method inspired by SAGA, a variance reduced incremental gradient algorithm. The proposed method is easy to implement and significantly outperforms the state of the art on several nonsmooth, large-scale problems. We prove that our method achieves a theoretical linear speedup with respect to the sequential version under assumptions on the sparsity of gradients and block-separability of the proximal term. Empirical benchmarks on a multi-core architecture illustrate practical speedups of up to 12x on a 20-core machine.</p>
      <simplelist>
        <li id="uid29">
          <p noindent="true">Contact: Fabian Pedregosa</p>
        </li>
        <li id="uid30">
          <p noindent="true">URL: <ref xlink:href="https://github.com/fabianp/ProxASAGA" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>github.<allowbreak/>com/<allowbreak/>fabianp/<allowbreak/>ProxASAGA</ref></p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid31" level="1">
      <bodyTitle>object-states-action</bodyTitle>
      <p><span class="smallcap" align="left">Keyword:</span> Computer vision</p>
      <p noindent="true"><span class="smallcap" align="left">Functional Description:</span> Code for the paper Joint Discovery of Object States and Manipulation Actions, ICCV 2017: Many human activities involve object manipulations
aiming to modify the object state. Examples of common
state changes include full/empty bottle, open/closed door,
and attached/detached car wheel. In this work, we seek to
automatically discover the states of objects and the associated
manipulation actions. Given a set of videos for a particular
task, we propose a joint model that learns to identify
object states and to localize state-modifying actions. Our
model is formulated as a discriminative clustering cost with
constraints. We assume a consistent temporal order for the
changes in object states and manipulation actions, and introduce
new optimization techniques to learn model parameters
without additional supervision. We demonstrate successful
discovery of seven manipulation actions and corresponding
object states on a new dataset of videos depicting
real-life object manipulations. We show that our joint formulation
results in an improvement of object state discovery
by action recognition and vice versa.</p>
      <simplelist>
        <li id="uid32">
          <p noindent="true">Participants: Jean-Baptiste Alayrac, Josef Sivic, Ivan Laptev and Simon Lacoste-Julien</p>
        </li>
        <li id="uid33">
          <p noindent="true">Contact: Jean-Baptiste Alayrac</p>
        </li>
        <li id="uid34">
          <p noindent="true">Publication: <ref xlink:href="https://hal.inria.fr/hal-01676084" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Joint Discovery of Object States and Manipulation Actions</ref></p>
        </li>
        <li id="uid35">
          <p noindent="true">URL: <ref xlink:href="https://github.com/jalayrac/object-states-action" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>github.<allowbreak/>com/<allowbreak/>jalayrac/<allowbreak/>object-states-action</ref></p>
        </li>
      </simplelist>
    </subsection>
  </logiciels>
  <resultats id="uid36">
    <bodyTitle>New Results</bodyTitle>
    <subsection id="uid37" level="1">
      <bodyTitle>On the Global Convergence of Gradient Descent for Over-parameterized Models using Optimal Transport</bodyTitle>
      <p>Many tasks in machine learning and signal processing can be solved by minimizing a convex function of a measure. This includes sparse spikes deconvolution or training a neural network with a single hidden layer. For these problems, in <ref xlink:href="#sierra-2018-bid0" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> we study a simple minimization method: the unknown measure is discretized into a mixture of particles and a continuous-time gradient descent is performed on their weights and positions. This is an idealization of the usual way to train neural networks with a large hidden layer. We show that, when initialized correctly and in the many-particle limit, this gradient flow, although non-convex, converges to global minimizers. The proof involves Wasserstein gradient flows, a by-product of optimal transport theory. Numerical experiments show that this asymptotic behavior is already at play for a reasonable number of particles, even in high dimension.
</p>
    </subsection>
    <subsection id="uid38" level="1">
      <bodyTitle>Sharp Analysis of Learning with Discrete Losses</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid1" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we study a least-squares framework to systematically design learning algorithms
for discrete losses, with quantitative characterizations in terms of statistical and
computational complexity. In particular we improve existing results by providing explicit
dependence on the number of labels for a wide class of losses and faster learning rates in
conditions of low-noise.
Theoretical results are complemented with experiments on real datasets, showing the
effectiveness of the proposed general approach.
</p>
    </subsection>
    <subsection id="uid39" level="1">
      <bodyTitle>Gossip of Statistical Observations using Orthogonal Polynomials</bodyTitle>
      <p>Consider a network of agents connected by communication links, where each agent holds a real value. The gossip problem consists in estimating the average of the values diffused in the network in a distributed manner. Current techniques for gossiping are designed to deal with worst-case scenarios, which is irrelevant in applications to distributed statistical learning and denoising in sensor networks. In <ref xlink:href="#sierra-2018-bid2" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we design second-order gossip methods tailor-made for the case where the real values are i.i.d. samples from the same distribution. In some regular network structures, we are able to prove optimality of our methods, and simulations suggest that they are efficient in a wide range of random networks. Our approach of gossip stems from a new acceleration framework using the family of orthogonal polynomials with respect to the spectral measure of the network graph.
</p>
    </subsection>
    <subsection id="uid40" level="1">
      <bodyTitle>Marginal Weighted Maximum Log-likelihood for Efficient Learning of Perturb-and-Map models</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid3" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, We consider the structured-output prediction problem through probabilistic approaches and generalize the “perturb-and-MAP” framework to more challenging weighted Hamming losses, which are crucial in applications. While in principle our approach is a straightforward marginalization, it requires solving many related MAP inference problems. We show that for log-supermodular pairwise models these operations can be performed efficiently using the machinery of dynamic graph cuts. We also propose to use double stochastic gradient descent, both on the data and on the perturbations, for efficient learning. Our framework can naturally take weak supervision (e.g., partial labels) into account. We conduct a set of experiments on medium-scale character recognition and image segmentation, showing the benefits of our algorithms.
</p>
    </subsection>
    <subsection id="uid41" level="1">
      <bodyTitle>Slice inverse regression with score functions</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid4" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we consider non-linear regression problems where we assume that the response depends non-linearly on a linear projection of the covariates. We propose score function extensions to sliced inverse regression problems, both for the first- order and second-order score functions. We show that they provably improve estimation in the population case over the non-sliced versions and we study finite sample estimators and their consistency given the exact score functions. We also propose to learn the score function as well, in two steps, i.e., first learning the score function and then learning the effective dimension reduction space, or directly, by solving a convex optimization problem regularized by the nuclear norm. We illustrate our results on a series of experiments.
</p>
    </subsection>
    <subsection id="uid42" level="1">
      <bodyTitle>Constant Step Size Stochastic Gradient Descent for Probabilistic Modeling</bodyTitle>
      <p>Stochastic gradient methods enable learning probabilistic models from large amounts of data. While large step-sizes (learning rates) have shown to be best for least-squares (e.g., Gaussian noise) once combined with parameter averaging, these are not leading to convergent algorithms in general. In this paper, we consider generalized linear models, that is, conditional models based on exponential families. In <ref xlink:href="#sierra-2018-bid5" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we propose averaging moment parameters instead of natural parameters for constant-step-size stochastic gradient descent. For finite-dimensional models, we show that this can sometimes (and surprisingly) lead to better predictions than the best linear model. For infinite-dimensional models, we show that it always converges to optimal predictions, while averaging natural parameters never does. We illustrate our findings with simulations on synthetic data and classical benchmarks with many observations.
</p>
    </subsection>
    <subsection id="uid43" level="1">
      <bodyTitle>Nonlinear Acceleration of Momentum and Primal-Dual Algorithms</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid6" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, We describe a convergence acceleration scheme for multistep optimization algorithms. The extrapolated solution is written as a nonlinear average of the iterates produced by the original optimization algorithm. Our scheme does not need the underlying fixed-point operator to be symmetric, hence handles e.g. algorithms with momentum terms such as Nesterov's accelerated method, or primal-dual methods. The weights are computed via a simple linear system and we analyze performance in both online and offline modes. We use Crouzeix's conjecture to show that acceleration performance is controlled by the solution of a Chebyshev problem on the numerical range of a non-symmetric operator modelling the behavior of iterates near the optimum. Numerical experiments are detailed on image processing problems, logistic regression and neural network training for CIFAR10 and ImageNet.
</p>
    </subsection>
    <subsection id="uid44" level="1">
      <bodyTitle>Nonlinear Acceleration of Deep Neural Networks</bodyTitle>
      <p>Regularized nonlinear acceleration (RNA) is a generic extrapolation scheme for optimization methods, with marginal computational overhead. It aims to improve convergence using only the iterates of simple iterative algorithms. However, so far its application to optimization was theoretically limited to gradient descent and other single-step algorithms. Here, we adapt RNA to a much broader setting including stochastic gradient with momentum and Nesterov's fast gradient. In <ref xlink:href="#sierra-2018-bid7" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we use it to train deep neural networks, and empirically observe that extrapolated networks are more accurate, especially in the early iterations. A straightforward application of our algorithm when training ResNet-152 on ImageNet produces a top-1 test error of 20.88, improving by 0.8 the reference classification pipeline. Furthermore, the code runs offline in this case, so it never negatively affects performance.
</p>
    </subsection>
    <subsection id="uid45" level="1">
      <bodyTitle>Nonlinear Acceleration of CNNs</bodyTitle>
      <p>The Regularized Nonlinear Acceleration (RNA) algorithm is an acceleration method capable of improving the rate of convergence of many optimization schemes such as gradient descend, SAGA or SVRG. Until now, its analysis is limited to convex problems, but empirical observations shows that RNA may be extended to wider settings. In <ref xlink:href="#sierra-2018-bid8" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we investigate further the benefits of RNA when applied to neural networks, in particular for the task of image recognition on CIFAR10 and ImageNet. With very few modifications of exiting frameworks, RNA improves slightly the optimization process of CNNs, after training.
</p>
    </subsection>
    <subsection id="uid46" level="1">
      <bodyTitle>Robust Seriation and Applications To Cancer Genomics</bodyTitle>
      <p>The seriation problem seeks to reorder a set of elements given pairwise similarity information,
so that elements with higher similarity are closer in the resulting sequence. When a global ordering consistent
with the similarity information exists, an exact spectral solution recovers it in the noiseless case and seriation
is equivalent to the combinatorial 2-SUM problem over permutations, for which several relaxations have been
derived. However, in applications such as DNA assembly, similarity values are often heavily corrupted, and
the solution of 2-SUM may no longer yield an approximate serial structure on the elements. In <ref xlink:href="#sierra-2018-bid9" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we introduce the
robust seriation problem and show that it is equivalent to a modified 2-SUM problem for a class of similarity
matrices modeling those observed in DNA assembly. We explore several relaxations of this modified 2-SUM
problem and compare them empirically on both synthetic matrices and real DNA data. We then introduce
the problem of seriation with duplications, which is a generalization of Seriation motivated by applications
to cancer genome reconstruction. We propose an algorithm involving robust seriation to solve it, and present
preliminary results on synthetic data sets.
</p>
    </subsection>
    <subsection id="uid47" level="1">
      <bodyTitle>Reconstructing Latent Orderings by Spectral Clustering</bodyTitle>
      <p>Spectral clustering uses a graph Laplacian spectral embedding to enhance the
cluster structure of some data sets. When the embedding is one dimensional, it
can be used to sort the items (spectral ordering). A number of empirical results
also suggests that a multidimensional Laplacian embedding enhances the latent
ordering of the data, if any. This also extends to circular orderings, a case where
unidimensional embeddings fail. In <ref xlink:href="#sierra-2018-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we tackle the task of retrieving linear and circular
orderings in a unifying framework, and show how a latent ordering on the data
translates into a filamentary structure on the Laplacian embedding. We propose
a method to recover it, illustrated with numerical experiments on synthetic data
and real DNA sequencing data.
</p>
    </subsection>
    <subsection id="uid48" level="1">
      <bodyTitle>Lyapunov Functions for First-Order Methods: Tight Automated Convergence Guarantees</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid11" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we present a novel way of generating Lyapunov
functions for proving linear convergence rates
of first-order optimization methods. Our approach
provably obtains the fastest linear convergence
rate that can be verified by a quadratic
Lyapunov function (with given states), and only
relies on solving a small-sized semidefinite program.
Our approach combines the advantages of
performance estimation problems and integral quadratic
constraints,
and relies on convex interpolation.
</p>
    </subsection>
    <subsection id="uid49" level="1">
      <bodyTitle>Efficient First-order Methods for Convex Minimization:
a Constructive Approach</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid12" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we describe a novel constructive technique for devising efficient first-order methods for a wide range of large-scale convex minimization settings, including smooth, non-smooth, and strongly convex minimization. The design technique takes a method performing a series of subspace-searches and constructs a family of methods that share the same worst-case guarantees as the original method, and includes a fixed-step first-order method. We show that this technique yields optimal methods in the smooth and non-smooth cases and derive new methods for these cases, including methods that forego knowledge of the problem parameters, at the cost of a one-dimensional line search per iteration. In the strongly convex case, we show how numerical tools can be used to perform the construction, and show that resulting method offers an improved convergence rate compared to Nesterov's celebrated fast gradient method.
</p>
    </subsection>
    <subsection id="uid50" level="1">
      <bodyTitle>Operator Splitting Performance Estimation: Tight contraction factors and optimal parameter selection</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid13" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we propose a methodology for studying the performance of common splitting methods through semidefinite programming. We prove tightness of the methodology and demonstrate its value by presenting two applications of it. First, we use the methodology as a tool for computer-assisted proofs to prove tight analytical contraction factors for Douglas–Rachford splitting that are likely too complicated for a human to find bare-handed. Second, we use the methodology as an algorithmic tool to computationally select the optimal splitting method parameters by solving a series of semidefinite programs.
</p>
    </subsection>
    <subsection id="uid51" level="1">
      <bodyTitle>Finite-sample Analysis of M-estimators using Self-concordance</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid14" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we demonstrate how <i>self-concordance</i> of the loss allows to obtain asymptotically optimal rates for <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>M</mi></math></formula>-estimators in finite-sample regimes.
We consider two classes of losses:
(i) self-concordant losses, i.e., whose third derivative is uniformly bounded with the <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>3</mn><mo>/</mo><mn>2</mn></mrow></math></formula> power of the second;
(ii) <i>pseudo</i> self-concordant losses, for which the power is removed.
These classes contain some losses arising in generalized linear models, including the logistic loss; in addition, the second class includes some common pseudo-Huber losses.
Our results consist in establishing the <i>critical sample size</i> sufficient to reach the asymptotically optimal excess risk in both cases. Denoting <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>d</mi></math></formula> the parameter dimension, and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>d</mi><mi>e</mi></msub></math></formula> the effective dimension taking into account possible model misspecification, we find the critical sample size to be <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msub><mi>d</mi><mi>e</mi></msub><mo>·</mo><mi>d</mi><mo>)</mo></mrow></math></formula> for the first class of losses, and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><mi>ρ</mi><mo>·</mo><msub><mi>d</mi><mi>e</mi></msub><mo>·</mo><mi>d</mi><mo>)</mo></mrow></math></formula> for the second class, where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ρ</mi></math></formula> is the problem-dependent parameter that characterizes the risk curvature at the best predictor <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>θ</mi><mo>*</mo></msub></math></formula>.
In contrast to the existing results, we only impose <i>local</i> assumptions on the data distribution, assuming that the calibrated design, i.e., the design scaled with the square root of the second derivative of the loss, is subgaussian at the best predictor.
Moreover, we obtain the improved bounds on the critical sample size, scaling <i>near-linearly</i> in <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mo movablelimits="true" form="prefix">max</mo><mo>(</mo><msub><mi>d</mi><mi>e</mi></msub><mo>,</mo><mi>d</mi><mo>)</mo></mrow></math></formula>, under the extra assumption that the calibrated design is subgaussian in the Dikin ellipsoid of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>θ</mi><mo>*</mo></msub></math></formula>.
Motivated by these findings, we construct canonically self-concordant analogues of the Huber and logistic losses with improved statistical properties.
Finally, we extend some of the above results to <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>ℓ</mi><mn>1</mn></msub></math></formula>-penalized <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>M</mi></math></formula>-estimators in high-dimensional setups.
</p>
    </subsection>
    <subsection id="uid52" level="1">
      <bodyTitle>Uniform regret bounds over <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>R</mi><mi>d</mi></msup></math></formula> for the sequential linear regression problem with the square loss</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid15" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> we consider the setting of online linear regression for arbitrary deterministic sequences,
with the square loss. We are interested in obtaining regret bounds that hold uniformly over all vectors <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>R</mi><mi>d</mi></msup></math></formula>. When the feature sequence is known at the beginning of the game, they provided closed-form regret bounds of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>2</mn><mi>d</mi><msup><mi>B</mi><mn>2</mn></msup><mo form="prefix">ln</mo><mi>T</mi><mo>+</mo><mi>O</mi><mrow><mo>(</mo><mn>1</mn><mo>)</mo></mrow></mrow></math></formula>, where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>T</mi></math></formula> is the number of rounds and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>B</mi></math></formula> is a bound on the observations. Instead, we derive bounds with an optimal constant of 1 in front of the <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>d</mi><msup><mi>B</mi><mn>2</mn></msup><mo form="prefix">ln</mo><mi>T</mi></mrow></math></formula> term. In the case of sequentially revealed features, we also derive an asymptotic regret bound of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>d</mi><msup><mi>B</mi><mn>2</mn></msup><mo form="prefix">ln</mo><mi>T</mi></mrow></math></formula> for any individual sequence of features and bounded observations. All our algorithms are variants of the online nonlinear ridge regression forecaster, either with a data-dependent regularization or with almost no regularization.
</p>
    </subsection>
    <subsection id="uid53" level="1">
      <bodyTitle>Efficient online algorithms for fast-rate regret bounds under sparsity.</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid16" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> we consider the problem of online convex optimization in two different settings: arbitrary and i.i.d. sequence of convex loss functions. In both settings, we provide efficient algorithms whose cumulative excess risks are controlled with fast-rate sparse bounds.
First, the excess risks bounds depend on the sparsity of the objective rather than on the dimension of the parameters space. Second, their rates are faster than the slow-rate <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>1</mn><mo>/</mo><msqrt><mi>T</mi></msqrt></mrow></math></formula>
</p>
    </subsection>
    <subsection id="uid54" level="1">
      <bodyTitle>Exponential convergence of testing error for stochastic gradient methods</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid17" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we consider binary classification problems with positive definite kernels and square loss, and study the convergence rates of stochastic gradient methods. We show that while the excess testing loss (squared loss) converges slowly to zero as the number of observations (and thus iterations) goes to infinity, the testing error (classification error) converges exponentially fast if low-noise conditions are assumed.
</p>
    </subsection>
    <subsection id="uid55" level="1">
      <bodyTitle>Statistical Optimality of Stochastic Gradient Descent on Hard Learning Problems through Multiple Passes</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid18" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we consider stochastic gradient descent (SGD) for least-squares regression with potentially several passes over the data. While several passes have been widely reported to perform practically better in terms of predictive performance on unseen data, the existing theoretical analysis of SGD suggests that a single pass is statistically optimal. While this is true for low-dimensional easy problems, we show that for hard problems, multiple passes lead to statistically optimal predictions while single pass does not; we also show that in these hard models, the optimal number of passes over the data increases with sample size. In order to define the notion of hardness and show that our predictive performances are optimal, we consider potentially infinite-dimensional models and notions typically associated to kernel methods, namely, the decay of eigenvalues of the covariance matrix of the features and the complexity of the optimal predictor as measured through the covariance matrix. We illustrate our results on synthetic experiments with non-linear kernel methods and on a classical benchmark with a linear model.
</p>
    </subsection>
    <subsection id="uid56" level="1">
      <bodyTitle>Central Limit Theorem for stationary Fleming–Viot particle systems in finite spaces</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid19" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we consider the Fleming–Viot particle system associated with a continuous-time Markov chain in a finite space. Assuming irreducibility, it is known that the particle system possesses a unique stationary distribution, under which its empirical measure converges to the quasistationary distribution of the Markov chain. We complement this Law of Large Numbers with a Central Limit Theorem. Our proof essentially relies on elementary computations on the infinitesimal generator of the Fleming–Viot particle system, and involves the so-called <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>π</mi></math></formula>-return process in the expression of the asymptotic variance. Our work can be seen as an infinite-time version, in the setting of finite space Markov chains, of results by Del Moral and Miclo [ESAIM: Probab. Statist., 2003] and Cérou, Delyon, Guyader and Rousset [arXiv:1611.00515, arXiv:1709.06771].
</p>
    </subsection>
    <subsection id="uid57" level="1">
      <bodyTitle>SeaRNN: Improved RNN training through Global-Local Losses</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid20" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we propose SEARNN, a novel training algorithm for recurrent neural networks (RNNs) inspired by the “learning to search” (L2S) approach to structured prediction. RNNs have been widely successful in structured prediction applications such as machine translation or parsing, and are commonly trained using maximum likelihood estimation (MLE). Unfortunately, this training loss is not always an appropriate surrogate for the test error: by only maximizing the ground truth probability, it fails to exploit the wealth of information offered by structured losses. Further, it introduces discrepancies between training and predicting (such as exposure bias) that may hurt test performance. Instead, SEARNN leverages test-alike search space exploration to introduce global-local losses that are closer to the test error. We first demonstrate improved performance over MLE on two different tasks: OCR and spelling correction. Then, we propose a subsampling strategy to enable SEARNN to scale to large vocabulary sizes. This allows us to validate the benefits of our approach on a machine translation task.
</p>
    </subsection>
    <subsection id="uid58" level="1">
      <bodyTitle>Improved asynchronous parallel optimization analysis for stochastic incremental methods</bodyTitle>
      <p>As datasets continue to increase in size and multi-core computer architectures are developed, asynchronous parallel optimization algorithms become more and more essential to the field of Machine Learning. Unfortunately, conducting the theoretical analysis of asynchronous methods is difficult, notably due to the introduction of delay and inconsistency in inherently sequential algorithms. Handling these issues often requires resorting to simplifying but unrealistic assumptions. Through a novel perspective, in <ref xlink:href="#sierra-2018-bid21" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> we revisit and clarify a subtle but important technical issue present in a large fraction of the recent convergence rate proofs for asynchronous parallel optimization algorithms, and propose a simplification of the recently introduced "perturbed iterate" framework that resolves it. We demonstrate the usefulness of our new framework by analyzing three distinct asynchronous parallel incremental optimization algorithms: Hogwild (asynchronous SGD), KROMAGNON (asynchronous SVRG) and ASAGA, a novel asynchronous parallel version of the incremental gradient algorithm SAGA that enjoys fast linear convergence rates. We are able to both remove problematic assumptions and obtain better theoretical results. Notably, we prove that ASAGA and KROMAGNON can obtain a theoretical linear speedup on multi-core systems even without sparsity assumptions. We present results of an implementation on a 40-core architecture illustrating the practical speedups as well as the hardware overhead. Finally, we investigate the overlap constant, an ill-understood but central quantity for the theoretical analysis of asynchronous parallel algorithms. We find that it encompasses much more complexity than suggested in previous work, and often is order-of-magnitude bigger than traditionally thought.
</p>
    </subsection>
    <subsection id="uid59" level="1">
      <bodyTitle>Asynchronous optimisation for Machine Learning</bodyTitle>
      <p>The impressive breakthroughs of the last two decades in the field of machine learning can be in large part attributed to the explosion of computing power and available data. These two limiting factors have been replaced by a new bottleneck: algorithms. The focus of this thesis <ref xlink:href="#sierra-2018-bid22" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> is thus on introducing novel methods that can take advantage of high data quantity and computing power. We present two independent contributions.</p>
      <p>First, we develop and analyze novel fast optimization algorithms which take advantage of the advances in parallel computing architecture and can handle vast amounts of data. We introduce a new framework of analysis for asynchronous parallel incremental algorithms, which enable correct and simple proofs. We then demonstrate its usefulness by performing the convergence analysis for several methods, including two novel algorithms.</p>
      <p>Asaga is a sparse asynchronous parallel variant of the variance-reduced algorithm Saga which enjoys fast linear convergence rates on smooth and strongly convex objectives. We prove that it can be linearly faster than its sequential counterpart, even without sparsity assump- tions.</p>
      <p>ProxAsaga is an extension of Asaga to the more general setting where the regularizer can be non-smooth. We prove that it can also achieve a linear speedup.
We provide extensive experiments comparing our new algorithms to the current state-of-art.</p>
      <p>Second, we introduce new methods for complex struc- tured prediction tasks. We focus on recurrent neural net- works (RNNs), whose traditional training algorithm for RNNs – based on maximum likelihood estimation (MLE) – suffers from several issues. The associated surrogate training loss notably ignores the information contained in structured losses and introduces discrepancies between train and test times that may hurt performance.</p>
      <p>To alleviate these problems, we propose SeaRNN, a novel training algorithm for RNNs inspired by the “learning to search” approach to structured prediction.SeaRNN leverages test-alike search space exploration to introduce global-local losses that are closer to the test error than the MLE objective.</p>
      <p>We demonstrate improved performance over MLE on three challenging tasks, and provide several subsampling strategies to enable SeaRNN to scale to large-scale tasks, such as machine translation. Finally, after contrasting the behavior of SeaRNN models to MLE models, we conduct an in-depth comparison of our new approach to the related work.
</p>
    </subsection>
    <subsection id="uid60" level="1">
      <bodyTitle><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>M</mi><mo>*</mo></msup></math></formula>-Regularized Dictionary Learning</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid23" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we derive a performance measure for dictionaries in compressed sensing, based on the <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>M</mi><mo>*</mo></msup></math></formula> of the corresponding norm. We use this measure to regularize dictionary learning algorithms and study the performance of our methods on both compression and inpainting experiments.
</p>
    </subsection>
    <subsection id="uid61" level="1">
      <bodyTitle>Optimal Algorithms for Non-Smooth Distributed Optimization in Networks</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid24" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we consider the distributed optimization of non-smooth convex functions using a network of computing units. We investigate this problem under two regularity assumptions: (1) the Lipschitz continuity of the global objective function, and (2) the Lipschitz continuity of local individual functions. Under the local regularity assumption, we provide the first optimal first-order decentralized algorithm called multi-step primal-dual (MSPD) and its corresponding optimal convergence rate. A notable aspect of this result is that, for non-smooth functions, while the dominant term of the error is in <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><mn>1</mn><mo>/</mo><msqrt><mi>t</mi></msqrt><mo>)</mo></mrow></math></formula>, the structure of the communication network only impacts a second-order term in <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><mn>1</mn><mo>/</mo><mi>t</mi><mo>)</mo></mrow></math></formula>, where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>t</mi></math></formula> is time. In other words, the error due to limits in communication resources decreases at a fast rate even in the case of non-strongly-convex objective functions. Under the global regularity assumption, we provide a simple yet efficient algorithm called distributed randomized smoothing (DRS) based on a local smoothing of the objective function, and show that DRS is within a <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>d</mi><mrow><mn>1</mn><mo>/</mo><mn>4</mn></mrow></msup></math></formula> multiplicative factor of the optimal convergence rate, where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>d</mi></math></formula> is the underlying dimension.
</p>
    </subsection>
    <subsection id="uid62" level="1">
      <bodyTitle>Relating Leverage Scores and Density using Regularized Christoffel Functions</bodyTitle>
      <p>Statistical leverage scores emerged as a fundamental tool for matrix sketching and column sampling with applications to low rank approximation, regression, random feature learning and quadrature. Yet, the very nature of this quantity is barely understood. Borrowing ideas from the orthogonal polynomial literature, we introduce in <ref xlink:href="#sierra-2018-bid25" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> the regularized Christoffel function associated to a positive definite kernel. This uncovers a variational formulation for leverage scores for kernel methods and allows to elucidate their relationships with the chosen kernel as well as population density. Our main result quantitatively describes a decreasing relation between leverage score and population density for a broad class of kernels on Euclidean spaces. Numerical simulations support our findings.
</p>
    </subsection>
    <subsection id="uid63" level="1">
      <bodyTitle>Averaging Stochastic Gradient Descent on Riemannian Manifolds</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid26" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> we consider the minimization of a function defined on a Riemannian manifold <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>M</mi></math></formula> accessible only through unbiased estimates of its gradients. We develop a geometric framework to transform a sequence of slowly converging iterates generated from stochastic gradient descent (SGD) on <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>M</mi></math></formula> to an averaged iterate sequence with a robust and fast <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><mn>1</mn><mo>/</mo><mi>n</mi><mo>)</mo></mrow></math></formula> convergence rate. We then present an application of our framework to geodesically-strongly-convex (and possibly Euclidean non-convex) problems. Finally, we demonstrate how these ideas apply to the case of streaming <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>k</mi></math></formula>-PCA, where we show how to accelerate the slow rate of the randomized power method (without requiring knowledge of the eigengap) into a robust algorithm achieving the optimal rate of convergence.
</p>
    </subsection>
    <subsection id="uid64" level="1">
      <bodyTitle>Localized Structured Prediction</bodyTitle>
      <p>Key to structured prediction is exploiting the problem structure to simplify the learning process. A major challenge arises when data exhibit a local structure (e.g., are made by "parts") that can be leveraged to better approximate the relation between (parts of) the input and (parts of) the output. Recent literature on signal processing, and in particular computer vision, has shown that capturing these aspects is indeed essential to achieve state-of-the-art performance. While such algorithms are typically derived on a case-by-case basis, in <ref xlink:href="#sierra-2018-bid27" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> we propose the first theoretical framework to deal with part-based data from a general perspective. We derive a novel approach to deal with these problems and study its generalization properties within the setting of statistical learning theory. Our analysis is novel in that it explicitly quantifies the benefits of leveraging the part-based structure of the problem with respect to the learning rates of the proposed estimator.
</p>
    </subsection>
    <subsection id="uid65" level="1">
      <bodyTitle>Optimal rates for spectral algorithms with least-squares regression over Hilbert spaces</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid28" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we study regression problems over a separable Hilbert space with the square loss, covering non-parametric regression over a reproducing kernel Hilbert space. We investigate a class of spectral-regularized algorithms, including ridge regression, principal component analysis, and gradient methods. We prove optimal, high-probability convergence results in terms of variants of norms for the studied algorithms, considering a capacity assumption on the hypothesis space and a general source condition on the target function. Consequently, we obtain almost sure convergence results with optimal rates. Our results improve and generalize previous results, filling a theoretical gap for the non-attainable cases.
</p>
    </subsection>
    <subsection id="uid66" level="1">
      <bodyTitle>Differential Properties of Sinkhorn Approximation for Learning with Wasserstein Distance</bodyTitle>
      <p>Applications of optimal transport have recently gained remarkable attention thanks to the computational advantages of entropic regularization. However, in most situations the Sinkhorn approximation of the Wasserstein distance is replaced by a regularized version that is less accurate but easy to differentiate. In <ref xlink:href="#sierra-2018-bid29" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> we characterize the differential properties of the original Sinkhorn distance, proving that it enjoys the same smoothness as its regularized version and we explicitly provide an efficient algorithm to compute its gradient. We show that this result benefits both theory and applications: on one hand, high order smoothness confers statistical guarantees to learning with Wasserstein approximations. On the other hand, the gradient formula allows us to efficiently solve learning and optimization problems in practice. Promising preliminary experiments complement our analysis.
</p>
    </subsection>
    <subsection id="uid67" level="1">
      <bodyTitle>Learning with SGD and Random Features</bodyTitle>
      <p>Sketching and stochastic gradient methods are arguably the most common techniques to derive efficient large scale learning algorithms. In <ref xlink:href="#sierra-2018-bid30" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we investigate their application in the context of nonparametric statistical learning. More precisely, we study the estimator defined by stochastic gradient with mini batches and random features. The latter can be seen as form of nonlinear sketching and used to define approximate kernel methods. The considered estimator is not explicitly penalized/constrained and regularization is implicit. Indeed, our study highlights how different parameters, such as number of features, iterations, step-size and mini-batch size control the learning properties of the solutions. We do this by deriving optimal finite sample bounds, under standard assumptions. The obtained results are corroborated and illustrated by numerical experiments.
</p>
    </subsection>
    <subsection id="uid68" level="1">
      <bodyTitle>Manifold Structured Prediction</bodyTitle>
      <p>Structured prediction provides a general framework to deal with supervised problems where the outputs have semantically rich structure. While classical approaches consider finite, albeit potentially huge, output spaces, in <ref xlink:href="#sierra-2018-bid31" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> we discuss how structured prediction can be extended to a continuous scenario. Specifically, we study a structured prediction approach to manifold valued regression. We characterize a class of problems for which the considered approach is statistically consistent and study how geometric optimization can be used to compute the corresponding estimator. Promising experimental results on both simulated and real data complete our study.
</p>
    </subsection>
    <subsection id="uid69" level="1">
      <bodyTitle>On Fast Leverage Score Sampling and Optimal Learning</bodyTitle>
      <p>Leverage score sampling provides an appealing way to perform approximate computations for large matrices. Indeed, it allows to derive faithful approximations with a complexity adapted to the problem at hand. Yet, performing leverage scores sampling is a challenge in its own right requiring further approximations. In <ref xlink:href="#sierra-2018-bid32" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we study the problem of leverage score sampling for positive definite matrices defined by a kernel. Our contribution is twofold. First we provide a novel algorithm for leverage score sampling and second, we exploit the proposed method in statistical learning by deriving a novel solver for kernel ridge regression. Our main technical contribution is showing that the proposed algorithms are currently the most efficient and accurate for these problems.
</p>
    </subsection>
    <subsection id="uid70" level="1">
      <bodyTitle>Accelerated Decentralized Optimization with Local Updates for Smooth and Strongly Convex Objectives</bodyTitle>
      <p>In <ref xlink:href="#sierra-2018-bid33" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we study the problem of minimizing a sum of smooth and
strongly convex functions split over the nodes of a network in a
decentralized fashion. We propose a decentralized
accelerated algorithm that only requires local synchrony. Its rate
depends on the condition number <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>κ</mi></math></formula> of the local functions as well
as the network topology and delays. Under mild assumptions on the
topology of the graph, our algorithm takes a time <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><mrow><mo>(</mo><msub><mi>τ</mi><mo movablelimits="true" form="prefix">max</mo></msub><mo>+</mo><msub><mi>Δ</mi><mo movablelimits="true" form="prefix">max</mo></msub><mo>)</mo></mrow><msqrt><mrow><mi>κ</mi><mo>/</mo><mi>γ</mi></mrow></msqrt><mo form="prefix">ln</mo><mrow><mo>(</mo><msup><mi>ϵ</mi><mrow><mo>-</mo><mn>1</mn></mrow></msup><mo>)</mo></mrow><mo>)</mo></mrow></math></formula> to reach a
precision <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>ϵ</mi></math></formula> where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>γ</mi></math></formula> is the spectral gap of the graph,
<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>τ</mi><mo movablelimits="true" form="prefix">max</mo></msub></math></formula> the maximum communication delay and <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>Δ</mi><mo movablelimits="true" form="prefix">max</mo></msub></math></formula> the
maximum computation time. Therefore, it matches the rate of
SSDA, which is optimal when <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><msub><mi>τ</mi><mo movablelimits="true" form="prefix">max</mo></msub><mo>=</mo><mi>Ω</mi><mfenced separators="" open="(" close=")"><msub><mi>Δ</mi><mo movablelimits="true" form="prefix">max</mo></msub></mfenced></mrow></math></formula>. Applying our algorithm to quadratic local
functions leads to an accelerated randomized gossip algorithm of rate
<formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mi>O</mi><mo>(</mo><msqrt><mrow><msub><mi>θ</mi><mi> gossip </mi></msub><mo>/</mo><mi>n</mi></mrow></msqrt><mo>)</mo></mrow></math></formula> where <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msub><mi>θ</mi><mi> gossip </mi></msub></math></formula> is the
rate of the standard randomized gossip. To
the best of our knowledge, it is the first asynchronous gossip algorithm
with a provably improved rate of convergence of the second moment of the
error. We illustrate these results with experiments in idealized settings.
</p>
    </subsection>
  </resultats>
  <contrats id="uid71">
    <bodyTitle>Bilateral Contracts and Grants with Industry</bodyTitle>
    <subsection id="uid72" level="1">
      <bodyTitle>Bilateral Contracts with Industry</bodyTitle>
      <p>Microsoft Research: “Structured Large-Scale Machine Learning”. Machine learning is now ubiquitous in
industry, science, engineering, and personal life. While early successes were obtained by applying off-the-shelf techniques, there are two main challenges faced by machine learning in the “big data” era: structure and
scale. The project proposes to explore three axes, from theoretical, algorithmic and practical perspectives: (1)
large-scale convex optimization, (2) large-scale combinatorial optimization and (3) sequential decision making
for structured data. The project involves two Inria sites (Paris and Grenoble) and four MSR sites (Cambridge,
New England, Redmond, New York). Project website: <ref xlink:href="http://www.msr-inria.fr/projects/structured-large-scale- machine-learning/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>msr-inria.<allowbreak/>fr/<allowbreak/>projects/<allowbreak/>structured-large-scale-
machine-learning/</ref>.
</p>
    </subsection>
    <subsection id="uid73" level="1">
      <bodyTitle>Bilateral Grants with Industry</bodyTitle>
      <simplelist>
        <li id="uid74">
          <p noindent="true">Alexandre d’Aspremont, Francis Bach, Martin Jaggi (EPFL): Google Focused award.</p>
        </li>
        <li id="uid75">
          <p noindent="true">Francis Bach: Gift from Facebook AI Research.</p>
        </li>
        <li id="uid76">
          <p noindent="true">Alexandre d’Aspremont: AXA, "mécénat scientifique, chaire Havas-Dauphine", machine learning.</p>
        </li>
      </simplelist>
    </subsection>
  </contrats>
  <partenariat id="uid77">
    <bodyTitle>Partnerships and Cooperations</bodyTitle>
    <subsection id="uid78" level="1">
      <bodyTitle>National Initiatives</bodyTitle>
      <sanspuceslist>
        <li id="uid79">
          <p noindent="true">Alexandre d'Aspremont: IRIS, PSL “Science des données, données de la science”.</p>
        </li>
      </sanspuceslist>
    </subsection>
    <subsection id="uid80" level="1">
      <bodyTitle>European Initiatives</bodyTitle>
      <simplelist>
        <li id="uid81">
          <p noindent="true">
            <b>ITN Spartan</b>
          </p>
          <p noindent="true">Title: Sparse Representations and Compressed Sensing Training Network</p>
          <p noindent="true">Type: FP7</p>
          <p noindent="true">Instrument: Initial Training Network</p>
          <p noindent="true">Duration: October 2014 to October 2018</p>
          <p noindent="true">Coordinator: Mark Plumbley (University of Surrey)</p>
          <p noindent="true">Inria contact: Francis Bach</p>
          <p noindent="true">Abstract: The SpaRTaN Initial Training Network will train a new generation of interdisciplinary
researchers in sparse representations and compressed sensing, contributing to Europe’s leading role
in scientific innovation. By bringing together leading academic and industry groups with expertise in
sparse representations, compressed sensing, machine learning and optimisation, and with an interest
in applications such as hyperspectral imaging, audio signal processing and video analytics, this
project will create an interdisciplinary, trans-national and inter-sectorial training network to enhance
mobility and training of researchers in this area. SpaRTaN is funded under the FP7-PEOPLE-2013-
ITN call and is part of the Marie Curie Actions — Initial Training Networks (ITN) funding scheme:
Project number - 607290</p>
        </li>
        <li id="uid82">
          <p noindent="true">
            <b>ITN Macsenet</b>
          </p>
          <p noindent="true">Title: Machine Sensing Training Network</p>
          <p noindent="true">Type: H2020</p>
          <p noindent="true">Instrument: Initial Training Network</p>
          <p noindent="true">Duration: January 2015 - January 2019</p>
          <p noindent="true">Coordinator: Mark Plumbley (University of Surrey)</p>
          <p noindent="true">Inria contact: Francis Bach</p>
          <p noindent="true">Abstract: The aim of this Innovative Training Network is to train a new generation of creative,
entrepreneurial and innovative early stage researchers (ESRs) in the research area of measurement
and estimation of signals using knowledge or data about the underlying structure. We will develop
new robust and efficient Machine Sensing theory and algorithms, together methods for a wide range
of signals, including: advanced brain imaging; inverse imaging problems; audio and music signals;
and non-traditional signals such as signals on graphs. We will apply these methods to real-world
problems, through work with non-Academic partners, and disseminate the results of this research
to a wide range of academic and non-academic audiences, including through publications, data,
software and public engagement events. MacSeNet is funded under the H2020-MSCA-ITN-2014
call and is part of the Marie Sklodowska- Curie Actions — Innovative Training Networks (ITN)
funding scheme.</p>
        </li>
        <li id="uid83">
          <p noindent="true"><b>ERC Sequoia</b>
Title: Robust algorithms for learning from modern data</p>
          <p noindent="true">Programm: H2020</p>
          <p noindent="true">Type: ERC</p>
          <p noindent="true">Duration: 2017-2022</p>
          <p noindent="true">Coordinator: Inria</p>
          <p noindent="true">Inria contact: Francis Bach</p>
          <p noindent="true">Abstract: Machine learning is needed and used everywhere, from science to industry, with a growing
impact on many disciplines. While first successes were due at least in part to simple supervised
learning algorithms used primarily as black boxes on medium-scale problems, modern data pose
new challenges. Scalability is an important issue of course: with large amounts of data, many
current problems far exceed the capabilities of existing algorithms despite sophisticated computing
architectures. But beyond this, the core classical model of supervised machine learning, with
the usual assumptions of independent and identically distributed data, or well-defined features,
outputs and loss functions, has reached its theoretical and practical limits. Given this new setting,
existing optimization-based algorithms are not adapted. The main objective of this project is to
push the frontiers of supervised machine learning, in terms of (a) scalability to data with massive numbers of observations, features, and tasks, (b) adaptability to modern computing environments,
in particular for parallel and distributed processing, (c) provable adaptivity and robustness to
problem and hardware specifications, and (d) robustness to non-convexities inherent in machine
learning problems. To achieve the expected breakthroughs, we will design a novel generation of
learning algorithms amenable to a tight convergence analysis with realistic assumptions and efficient
implementations. They will help transition machine learning algorithms towards the same wide-
spread robust use as numerical linear algebra libraries. Outcomes of the research described in this
proposal will include algorithms that come with strong convergence guarantees and are well-tested
on real-life benchmarks coming from computer vision, bioin- formatics, audio processing and natural
language processing. For both distributed and non-distributed settings, we will release open-source
software, adapted to widely available computing platforms.</p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid84" level="1">
      <bodyTitle>International Initiatives</bodyTitle>
      <subsection id="uid85" level="2">
        <bodyTitle>
          <ref xlink:href="mllab.csa.iisc.ernet.in/indo-french.html" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">BigFOKS2 </ref>
        </bodyTitle>
        <sanspuceslist>
          <li id="uid86">
            <p noindent="true">Title: Learning from Big Data: First-Order methods for Kernels and Submodular functions</p>
          </li>
          <li id="uid87">
            <p noindent="true">International Partner (Institution - Laboratory - Researcher):</p>
            <sanspuceslist>
              <li id="uid88">
                <p noindent="true">IISc Bangalore (India)
- Computer Science Department - Chiranjib Bhattacharyya</p>
              </li>
            </sanspuceslist>
          </li>
          <li id="uid89">
            <p noindent="true">Start year: 2016</p>
          </li>
          <li id="uid90">
            <p noindent="true">See also: <ref xlink:href="mllab.csa.iisc.ernet.in/indo-french.html" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">mllab.<allowbreak/>csa.<allowbreak/>iisc.<allowbreak/>ernet.<allowbreak/>in/<allowbreak/>indo-french.<allowbreak/>html</ref></p>
          </li>
          <li id="uid91">
            <p noindent="true">Recent advances in sensor technologies have resulted in large amounts of data being generated in a wide array of scientific disciplines. Deriving models from such large datasets, often known as “Big Data”, is one of the important challenges facing many engineering and scientific disciplines. In this proposal we investigate the problem of learning supervised models from Big Data, which has immediate applications in Computational Biology, Computer vision, Natural language processing, Web, E-commerce, etc., where specific structure is often present and hard to take into account with current algorithms. Our focus will be on the algorithmic aspects. Often supervised learning problems can be cast as convex programs. The goal of this proposal will be to derive first-order methods which can be effective for solving such convex programs arising in the Big-Data setting. Keeping this broad goal in mind we investigate two foundational problems which are not well addressed in existing literature. The first problem investigates Stochastic Gradient Descent Algorithms in the context of First-order methods for designing algorithms for Kernel based prediction functions on Large Datasets. The second problem involves solving discrete optimization problems arising in Submodular formulations in Machine Learning, for which first-order methods have not reached the level of speed required for practical applications (notably in computer vision).</p>
          </li>
        </sanspuceslist>
      </subsection>
    </subsection>
    <subsection id="uid92" level="1">
      <bodyTitle>International Research Visitors</bodyTitle>
      <simplelist>
        <li id="uid93">
          <p noindent="true">Vijaya Bollapragada from Northwestern University, Chicago, IL, United States, Apr - Jul 2018.</p>
        </li>
        <li id="uid94">
          <p noindent="true">Aaron De Fazio from Facebook Research NY, New York, United States, Feb 2018.</p>
        </li>
        <li id="uid95">
          <p noindent="true">Gauthier Gidel from University of Montreal - MILA, Montreal, Canada, Jan 2018.</p>
        </li>
        <li id="uid96">
          <p noindent="true">Sharan Vaswani from University of British Columbia, Vancouver, Canada, Apr - Jul 2018</p>
        </li>
        <li id="uid97">
          <p noindent="true">Simon Lacoste-Julien from University of Montreal - MILA, Montreal, Canada, Aug 2018.</p>
        </li>
      </simplelist>
    </subsection>
  </partenariat>
  <diffusion id="uid98">
    <bodyTitle>Dissemination</bodyTitle>
    <subsection id="uid99" level="1">
      <bodyTitle>Promoting Scientific Activities</bodyTitle>
      <subsection id="uid100" level="2">
        <bodyTitle>Scientific Events Organisation</bodyTitle>
        <subsection id="uid101" level="3">
          <bodyTitle>General Chair, Scientific Chair</bodyTitle>
          <sanspuceslist>
            <li id="uid102">
              <p noindent="true">F. Bach: General Chair of ICML 2018 (Stockholm)</p>
            </li>
          </sanspuceslist>
        </subsection>
        <subsection id="uid103" level="3">
          <bodyTitle>Member of the Organizing Committees</bodyTitle>
          <sanspuceslist>
            <li id="uid104">
              <p noindent="true">Adrian Taylor, Session Organizer: <i>Computer-assisted analyses of optimization algorithms I &amp; II</i>, International Symposium on Mathematical Programming, July 2018.</p>
            </li>
            <li id="uid105">
              <p noindent="true">F. Bach: Co-organization of the workshop “Horizon Maths 2018 : Intelligence Artificielle”, November 23, 2018</p>
            </li>
          </sanspuceslist>
        </subsection>
      </subsection>
      <subsection id="uid106" level="2">
        <bodyTitle>Scientific Events Selection</bodyTitle>
        <subsection id="uid107" level="3">
          <bodyTitle>Chair of Conference Program Committees</bodyTitle>
          <sanspuceslist>
            <li id="uid108">
              <p noindent="true">F. Bach: Program Chair of the Journées de Statistiques (Saclay)</p>
            </li>
          </sanspuceslist>
        </subsection>
        <subsection id="uid109" level="3">
          <bodyTitle>Reviewer</bodyTitle>
          <sanspuceslist>
            <li id="uid110">
              <p noindent="true">Conference on Learning Theory (COLT 2018): Pierre Gaillard, Alessandro Rudi</p>
            </li>
            <li id="uid111">
              <p noindent="true">Symposium on Discrete Algorithms (SODA 2019): Adrien Taylor,</p>
            </li>
            <li id="uid112">
              <p noindent="true">Neural Information Processing Systems (NIPS 2018): Pierre Gaillard, Alessandro Rudi</p>
            </li>
            <li id="uid113">
              <p noindent="true">Conference on Learning Theory (COLT 2018): Pierre Gaillard, Alessandro Rudi, Adrien Taylor</p>
            </li>
            <li id="uid114">
              <p noindent="true">Symposium on Discrete Algorithms (SODA 2019): Adrien Taylor</p>
            </li>
            <li id="uid115">
              <p noindent="true">International Conference of Machine Learning (ICML 2018): Pierre Gaillard, Alessandro Rudi</p>
            </li>
          </sanspuceslist>
        </subsection>
      </subsection>
      <subsection id="uid116" level="2">
        <bodyTitle>Journal</bodyTitle>
        <subsection id="uid117" level="3">
          <bodyTitle>Member of the Editorial Boards</bodyTitle>
          <sanspuceslist>
            <li id="uid118">
              <p noindent="true">F. Bach: Journal of Machine Learning Research, co-editor-in-chief</p>
            </li>
            <li id="uid119">
              <p noindent="true">F. Bach: Information and Inference, Associate Editor.</p>
            </li>
            <li id="uid120">
              <p noindent="true">F. Bach: Electronic Journal of Statistics, Associate Editor.</p>
            </li>
            <li id="uid121">
              <p noindent="true">F. Bach: Mathematical Programming, Associate Editor.</p>
            </li>
            <li id="uid122">
              <p noindent="true">F. Bach: Foundations of Computational Mathematics, Associate Editor.</p>
            </li>
            <li id="uid123">
              <p noindent="true">A. d’Aspremont: SIAM Journal on Optimization, Associate editor</p>
            </li>
            <li id="uid124">
              <p noindent="true">A. d’Aspremont: SIAM Journal on the Mathematics of Data Science, Associate Editor</p>
            </li>
            <li id="uid125">
              <p noindent="true">A. d’Aspremont: Mathematical Programming, Associate Editor</p>
            </li>
          </sanspuceslist>
        </subsection>
        <subsection id="uid126" level="3">
          <bodyTitle>Reviewer - Reviewing Activities</bodyTitle>
          <sanspuceslist>
            <li id="uid127">
              <p noindent="true">SIAM Journal on Optimization: Adrien Taylor</p>
            </li>
            <li id="uid128">
              <p noindent="true">Mathematical Programming: Adrien Taylor</p>
            </li>
            <li id="uid129">
              <p noindent="true">Journal of Optimization Theory and Algorithms: Adrien Taylor</p>
            </li>
            <li id="uid130">
              <p noindent="true">Journal of Machine Learning Research: Pierre Gaillard, Alessandro Rudi</p>
            </li>
            <li id="uid131">
              <p noindent="true">Applied Computational Harmonic Analysis: Alessandro Rudi</p>
            </li>
          </sanspuceslist>
        </subsection>
      </subsection>
      <subsection id="uid132" level="2">
        <bodyTitle>Invited Talks</bodyTitle>
        <sanspuceslist>
          <li id="uid133">
            <p noindent="true">F. Bach, Trends in Optimization Seminar, University of Washington, November 2018.</p>
          </li>
          <li id="uid134">
            <p noindent="true">Pierre Gaillard. <i>Distributed averaging of observations in a graph: the gossip problem</i>. MNL Conference, Paris, November 2018.</p>
          </li>
          <li id="uid135">
            <p noindent="true">Adrien Taylor, <i>Analysis and design of first-order methods via semidefinite
programming</i>, Seminaire Parisien dOptimisation (SPO), Paris (France), November 2018.</p>
          </li>
          <li id="uid136">
            <p noindent="true">F. Bach, Frontier Research and Artificial Intelligence, European Research Council, Brussels, October 2018.</p>
          </li>
          <li id="uid137">
            <p noindent="true">F. Bach, IDSS Distinguished Speaker Seminar, MIT, October 2018.</p>
          </li>
          <li id="uid138">
            <p noindent="true">F. Bach, Mathematical Institute Colloquium, Oxford, October 2018.</p>
          </li>
          <li id="uid139">
            <p noindent="true">Adrien Taylor, <i>Convex Interpolation and Performance Estimation of First-
order Methods</i> for Convex Optimization, IBM/FNRS innovation award, Brussels (Belgium), October 2018.</p>
          </li>
          <li id="uid140">
            <p noindent="true">F. Bach, Workshop on Structural Inference in High-Dimensional Models, Moscow, September 2018.</p>
          </li>
          <li id="uid141">
            <p noindent="true">F. Bach, Symposium on Mathematical Programming (ISMP), Bordeaux, plenary talk, July 2018.</p>
          </li>
          <li id="uid142">
            <p noindent="true">Alexandre d'Aspremont, <i>Sharpness, Restart and Compressed Sensing Performance</i>, ISMP 2018, Bordeaux, July 2018.</p>
          </li>
          <li id="uid143">
            <p noindent="true">Alessandro Rudi, <i>FALKON: An optimal method for large scale learning with statistical guarantees</i>, ISMP 2018, Bordeaux, July 2018.</p>
          </li>
          <li id="uid144">
            <p noindent="true">Adrien Taylor, <i>Computer-assisted Lyapunov-based worst-case analyses of first-
order methods</i>, International Symposium on Mathematical Programming, Bordeaux (France), July 2018.</p>
          </li>
          <li id="uid145">
            <p noindent="true">F. Bach, SIAM Conference on Imaging Science, Bologna, Italy, invited talk, June 2018.</p>
          </li>
          <li id="uid146">
            <p noindent="true">Pierre Gaillard. <i>Online prediction of arbitrary time-series with application to electricity consumption</i>. Conference on nonstationarity. Cergy Pontoise University. June 2018.</p>
          </li>
          <li id="uid147">
            <p noindent="true">Adrien Taylor, <i>Convex Interpolation and Performance Estimation of First-order Methods for Convex Optimization</i>, International Symposium on Mathematical Programming: Tucker prize finalist, Bordeaux (France), July 2018.</p>
          </li>
          <li id="uid148">
            <p noindent="true">Alexandre d'Aspremont, <i>An approximate Shapley-Folkman Theorem</i>, Isaac Newton Institute, Cambridge, June 2018.</p>
          </li>
          <li id="uid149">
            <p noindent="true">F. Bach,Workshop on Future challenges in statistical scalability, Newton Institute, Cambridge, UK, June 2018.</p>
          </li>
          <li id="uid150">
            <p noindent="true">Adrien Taylor, <i>Automated design of first-order optimization methods</i>, Operation Research Seminar, UCLouvain, Louvain-la-Neuve (Belgium), May 2018.</p>
          </li>
          <li id="uid151">
            <p noindent="true">Adrien Taylor, <i>Automated design of first-order optimization methods</i>, LCCC Control Seminar, Lund University, Lund (Sweden), May 2018.</p>
          </li>
          <li id="uid152">
            <p noindent="true">Pierre Gaillard. <i>Distributed learning with orthogonal polynomials</i>. Inria DGA meetup. May 2018.</p>
          </li>
          <li id="uid153">
            <p noindent="true">F. Bach, Workshop on Optimisation and Machine Learning in Economics, London, March 2018.</p>
          </li>
          <li id="uid154">
            <p noindent="true">Pierre Gaillard. <i>An overview of Artificial Intelligence</i>. Hackaton. PSL University. March 2018.</p>
          </li>
          <li id="uid155">
            <p noindent="true">Alexandre d'Aspremont, <i>Regularized Nonlinear Acceleration</i>, US and Mexico Workshop on Optimization and its Applications, Jan 2018.</p>
          </li>
          <li id="uid156">
            <p noindent="true">Alessandro Rudi, <i>Learning with Random Features</i>, Isaac Newton Institute, Cambridge, Jan 2018.</p>
          </li>
          <li id="uid157">
            <p noindent="true">Pierre Gaillard. <i>Online nonparametric regression with adversarial data.</i> Smile seminar. Paris. Jan 2018.</p>
          </li>
        </sanspuceslist>
      </subsection>
    </subsection>
    <subsection id="uid158" level="1">
      <bodyTitle>Teaching - Supervision - Juries</bodyTitle>
      <subsection id="uid159" level="2">
        <bodyTitle>Teaching</bodyTitle>
        <sanspuceslist>
          <li id="uid160">
            <p noindent="true">F. Bach (together with N. Chopin), <i>Graphical models</i>, 30h, Master M2 (MVA), ENS Cachan, France.</p>
          </li>
          <li id="uid161">
            <p noindent="true">F. Bach, <i>Optimisation et apprentissage statistique</i>, 20h, Master M2 (Mathématiques de l'aléatoire), Université Paris-Sud, France.</p>
          </li>
          <li id="uid162">
            <p noindent="true">Alexandre d'Aspremont, <i>Optimisation Combinatoire et Convexe</i>, avec Zhentao Li, (2015-Present) cours magistraux 30h, Master M1, ENS Paris.</p>
          </li>
          <li id="uid163">
            <p noindent="true">Alexandre d'Aspremont, <i>Optimisation convexe: modélisation, algorithmes et applications</i> cours magistraux 21h (2011-Present), Master M2 MVA, ENS PS.</p>
          </li>
          <li id="uid164">
            <p noindent="true">F. Bach and P. Gaillard, <i>Apprentissage statistique</i>, 35h, Master M1, Ecole Normale Supérieure, France.</p>
          </li>
          <li id="uid165">
            <p noindent="true">P. Gaillard (together with V. Perchet), <i>Prediction of individual sequences</i>, 21h, Master M2 MVA, ENS Cachan, France.</p>
          </li>
          <li id="uid166">
            <p noindent="true">Gregoire Mialon, Python for Machine Learning, 21h, M2 MASH, Dauphine-ENS-PSL, Paris.</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid167" level="2">
        <bodyTitle>Supervision</bodyTitle>
        <sanspuceslist>
          <li id="uid168">
            <p noindent="true">Anaël Bonneton, PhD defended on July 2018, co-advised by Francis Bach, located in Agence nationale de la sécurité des systèmes d’information (ANSSI).</p>
          </li>
          <li id="uid169">
            <p noindent="true">Damien Scieur, PhD defended on September 2018. <i>Sur l'accélération des méthodes d’optimisation</i>, supervised by Alexandre d'Aspremont and Francis Bach.</p>
          </li>
          <li id="uid170">
            <p noindent="true">Jean-Baptiste Alayrac, PhD defended on September 2018, <i>Structured Learning from Videos and Language</i>,
supervised by Simon Lacoste-Julien, Josef Sivic and Ivan Laptev.</p>
          </li>
          <li id="uid171">
            <p noindent="true">Antoine Recanati, PhD. defended on November 2018. <i>Application du problème de sériation au séquençage de l’ADN et autres relaxations convexes appliquées en bioinformatique</i>, supervised by Alexandre d'Aspremont.</p>
          </li>
          <li id="uid172">
            <p noindent="true">Rémi Leblond, PhD defended on November 2018, <i>Asynchronous Optimization for Machine Learning</i>, supervised by Simon Lacoste-Julien.</p>
          </li>
          <li id="uid173">
            <p noindent="true">Mathieu Barre, PhD in progress <i>Méthodes d'extrapolation, au-delà de la convexité</i>, supervised by Alexandre d'Aspremont.</p>
          </li>
          <li id="uid174">
            <p noindent="true">Grégoire Mialon, PhD in progress <i>Algorithmes d'optimisation, méthodes de régularisation et architectures pour les réseaux de neurones profonds dans un contexte où les données labellisées sont rares</i>, supervised by Alexandre d'Aspremont.</p>
          </li>
          <li id="uid175">
            <p noindent="true">Radu-Alexandru Dragomir, PhD in progress <i>Non-Euclidean first-order methods</i>, supervised by Alexandre d'Aspremont and Jérôme Bolte.</p>
          </li>
          <li id="uid176">
            <p noindent="true">Thomas Kerdreux, PhD in progress <i>Optimisation and machine learning</i>, supervised by Alexandre d'Aspremont.</p>
          </li>
          <li id="uid177">
            <p noindent="true">Margaux Brégère, PhD in progress started September 2017, supervised by Pierre Gaillard, Gilles Stoltz and Yannig Goude (EDF R&amp;D).</p>
          </li>
          <li id="uid178">
            <p noindent="true">Raphaël Berthier, PhD in progress started September 2017, supervised by Francis Bach and Pierre Gaillard.</p>
          </li>
          <li id="uid179">
            <p noindent="true">Loucas Pillaud-Vivien, PhD in progress, supervised by Francis Bach and Alessandro Rudi.</p>
          </li>
          <li id="uid180">
            <p noindent="true">Alex Nowak, PhD in progress, supervised by Francis Bach and Alessandro Rudi.</p>
          </li>
          <li id="uid181">
            <p noindent="true">Ulysse Marteau Ferey, PhD in progress, supervised by Francis Bach and Alessandro Rudi.</p>
          </li>
          <li id="uid182">
            <p noindent="true">Dmitry Babichev, PhD in progress, started is September 2015, co-advised by Francis Bach and Anatoly Judistky (Univ. Grenoble).</p>
          </li>
          <li id="uid183">
            <p noindent="true">Tatiana Shpakova, PhD in progress, started September 2015, advised by Francis Bach.</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid184" level="2">
        <bodyTitle>Juries</bodyTitle>
        <sanspuceslist>
          <li id="uid185">
            <p noindent="true">Alexandre d'Aspremont, Habilitation à diriger des recherches. Thomas Bruls, Genoscope, Université d’Evry.</p>
          </li>
        </sanspuceslist>
      </subsection>
    </subsection>
    <subsection id="uid186" level="1">
      <bodyTitle>Popularization</bodyTitle>
      <subsection id="uid187" level="2">
        <bodyTitle>Creation of media or tools for science outreach</bodyTitle>
        <p>Design and implementation of a demonstration for the permanent exhibit at Palais de la Découverte: “L’apprenti illustrateur” (J.-B. Alayrac, F. Bach)
</p>
      </subsection>
    </subsection>
  </diffusion>
  <biblio id="bibliography" html="bibliography" numero="10" titre="Bibliography">
    
    <biblStruct id="sierra-2018-bid34" type="phdthesis" rend="year" n="cite:alayrac:tel-01885412">
      <identifiant type="hal" value="tel-01885412"/>
      <monogr>
        <title level="m">Structured Learning from Videos and Language</title>
        <author>
          <persName key="willow-2018-idp128128">
            <foreName>Jean-Baptiste</foreName>
            <surname>Alayrac</surname>
            <initial>J.-B.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">Ecole normale supérieure - ENS PARIS</orgName>
          </publisher>
          <dateStruct>
            <month>September</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/tel-01885412" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>tel-01885412</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Theses</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid36" type="phdthesis" rend="year" n="cite:beaugnon:tel-01888971">
      <identifiant type="hal" value="tel-01888971"/>
      <monogr>
        <title level="m">Expert-in-the-Loop Supervised Learning for Computer Security Detection Systems</title>
        <author>
          <persName>
            <foreName>Anaël</foreName>
            <surname>Beaugnon</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">PSL Research University</orgName>
          </publisher>
          <dateStruct>
            <month>June</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/tel-01888971" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>tel-01888971</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Theses</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid22" type="phdthesis" rend="year" n="cite:leblond:tel-01950576">
      <identifiant type="hal" value="tel-01950576"/>
      <monogr>
        <title level="m">Asynchronous Optimization for Machine Learning</title>
        <author>
          <persName key="sierra-2018-idp121136">
            <foreName>Rémi</foreName>
            <surname>Leblond</surname>
            <initial>R.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">Ecole Normale Superieure de Paris - ENS Paris</orgName>
          </publisher>
          <dateStruct>
            <month>November</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/tel-01950576" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>tel-01950576</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Theses</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid51" type="phdthesis" rend="year" n="cite:recanati:tel-01984368">
      <identifiant type="hal" value="tel-01984368"/>
      <monogr>
        <title level="m">Relaxations of the Seriation problem and applications to de novo genome assembly</title>
        <author>
          <persName key="sierra-2018-idp160496">
            <foreName>Antoine</foreName>
            <surname>Recanati</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">PSL Research University</orgName>
          </publisher>
          <dateStruct>
            <month>November</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/tel-01984368" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>tel-01984368</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Theses</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid39" type="phdthesis" rend="year" n="cite:scieur:tel-01887163">
      <identifiant type="hal" value="tel-01887163"/>
      <monogr>
        <title level="m">Acceleration in Optimization</title>
        <author>
          <persName key="sierra-2018-idp162928">
            <foreName>Damien</foreName>
            <surname>Scieur</surname>
            <initial>D.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">PSL Research University</orgName>
          </publisher>
          <dateStruct>
            <month>September</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/tel-01887163" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>tel-01887163</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Theses</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid4" type="article" rend="year" n="cite:babichev:hal-01388498">
      <identifiant type="doi" value="10.1214/18-EJS1428"/>
      <identifiant type="hal" value="hal-01388498"/>
      <analytic>
        <title level="a">Slice inverse regression with score functions</title>
        <author>
          <persName key="sierra-2018-idp135920">
            <foreName>Dmitry</foreName>
            <surname>Babichev</surname>
            <initial>D.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00518">
        <idno type="issn">1935-7524</idno>
        <title level="j">Electronic journal of statistics </title>
        <imprint>
          <biblScope type="volume">Volume 12, Number 1 (2018)</biblScope>
          <dateStruct>
            <month>May</month>
            <year>2018</year>
          </dateStruct>
          <biblScope type="pages">1507-1543</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01388498" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01388498</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid43" type="article" rend="year" n="cite:bach:hal-01222319">
      <identifiant type="hal" value="hal-01222319"/>
      <analytic>
        <title level="a">Submodular Functions: from Discrete to Continous Domains</title>
        <author>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01393">
        <idno type="issn">0025-5610</idno>
        <title level="j">Mathematical Programming, Series A</title>
        <imprint>
          <dateStruct>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01222319" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01222319</ref>
        </imprint>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1511.00394" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1511.<allowbreak/>00394</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid53" type="article" rend="year" n="cite:daspremont:hal-01927392">
      <identifiant type="doi" value="10.1137/17M1116842"/>
      <identifiant type="hal" value="hal-01927392"/>
      <analytic>
        <title level="a">Optimal Affine-Invariant Smooth Minimization Algorithms</title>
        <author>
          <persName>
            <foreName>Alexandre</foreName>
            <surname>D'Aspremont</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Cristobal</foreName>
            <surname>Guzman</surname>
            <initial>C.</initial>
          </persName>
          <persName>
            <foreName>Martin</foreName>
            <surname>Jaggi</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01738">
        <idno type="issn">1052-6234</idno>
        <title level="j">SIAM Journal on Optimization</title>
        <imprint>
          <biblScope type="volume">28</biblScope>
          <biblScope type="number">3</biblScope>
          <dateStruct>
            <month>July</month>
            <year>2018</year>
          </dateStruct>
          <biblScope type="pages">2384 - 2405</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01927392" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01927392</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid54" type="article" rend="year" n="cite:garreau:hal-01416704">
      <identifiant type="hal" value="hal-01416704"/>
      <analytic>
        <title level="a">Consistent change-point detection with kernels</title>
        <author>
          <persName>
            <foreName>Damien</foreName>
            <surname>Garreau</surname>
            <initial>D.</initial>
          </persName>
          <persName key="select-2018-idp148944">
            <foreName>Sylvain</foreName>
            <surname>Arlot</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00518">
        <idno type="issn">1935-7524</idno>
        <title level="j">Electronic journal of statistics </title>
        <imprint>
          <biblScope type="volume">12</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <biblScope type="pages">4440-4486</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01416704" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01416704</ref>
        </imprint>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1612.04740" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1612.<allowbreak/>04740</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid21" type="article" rend="year" n="cite:leblond:hal-01950558">
      <identifiant type="hal" value="hal-01950558"/>
      <analytic>
        <title level="a">Improved asynchronous parallel optimization analysis for stochastic incremental methods</title>
        <author>
          <persName key="sierra-2018-idp121136">
            <foreName>Rémi</foreName>
            <surname>Leblond</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Fabian</foreName>
            <surname>Pedregosa</surname>
            <initial>F.</initial>
          </persName>
          <persName key="sierra-2018-idp210288">
            <foreName>Simon</foreName>
            <surname>Lacoste-Julien</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01187">
        <idno type="issn">1532-4435</idno>
        <title level="j">Journal of Machine Learning Research (JMLR)</title>
        <imprint>
          <dateStruct>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01950558" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01950558</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid19" type="article" rend="year" n="cite:lelievre:hal-01812120">
      <identifiant type="doi" value="10.30757/ALEA.v15-43"/>
      <identifiant type="hal" value="hal-01812120"/>
      <analytic>
        <title level="a">Central Limit Theorem for stationary Fleming–Viot particle systems in finite spaces</title>
        <author>
          <persName key="matherials-2018-idp130960">
            <foreName>Tony</foreName>
            <surname>Lelievre</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>Loucas</foreName>
            <surname>Pillaud-Vivien</surname>
            <initial>L.</initial>
          </persName>
          <persName>
            <foreName>Julien</foreName>
            <surname>Reygner</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01336">
        <idno type="issn">1980-0436</idno>
        <title level="j">ALEA : Latin American Journal of Probability and Mathematical Statistics</title>
        <imprint>
          <biblScope type="volume">15</biblScope>
          <dateStruct>
            <month>September</month>
            <year>2018</year>
          </dateStruct>
          <biblScope type="pages">1163-1182</biblScope>
          <ref xlink:href="https://hal-enpc.archives-ouvertes.fr/hal-01812120" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal-enpc.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01812120</ref>
        </imprint>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1806.04490" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1806.<allowbreak/>04490</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid28" type="article" rend="year" n="cite:lin:hal-01958890">
      <identifiant type="hal" value="hal-01958890"/>
      <analytic>
        <title level="a">Optimal rates for spectral algorithms with least-squares regression over Hilbert spaces</title>
        <author>
          <persName>
            <foreName>Junhong</foreName>
            <surname>Lin</surname>
            <initial>J.</initial>
          </persName>
          <persName key="sierra-2018-idp123584">
            <foreName>Alessandro</foreName>
            <surname>Rudi</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Lorenzo</foreName>
            <surname>Rosasco</surname>
            <initial>L.</initial>
          </persName>
          <persName>
            <foreName>Volkan</foreName>
            <surname>Cevher</surname>
            <initial>V.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00177">
        <idno type="issn">1063-5203</idno>
        <title level="j">Applied and Computational Harmonic Analysis</title>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01958890" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01958890</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid55" type="article" rend="year" n="cite:schatz:hal-01888735">
      <identifiant type="doi" value="10.1121/1.5037615"/>
      <identifiant type="hal" value="hal-01888735"/>
      <analytic>
        <title level="a">Evaluating automatic speech recognition systems as quantitative models of cross-lingual phonetic category perception</title>
        <author>
          <persName>
            <foreName>Thomas</foreName>
            <surname>Schatz</surname>
            <initial>T.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName key="coml-2018-idp149024">
            <foreName>Emmanuel</foreName>
            <surname>Dupoux</surname>
            <initial>E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01300">
        <idno type="issn">0001-4966</idno>
        <title level="j">Journal of the Acoustical Society of America</title>
        <imprint>
          <biblScope type="volume">143</biblScope>
          <biblScope type="number">5</biblScope>
          <dateStruct>
            <month>May</month>
            <year>2018</year>
          </dateStruct>
          <biblScope type="pages">EL372 - EL378</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01888735" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01888735</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid5" type="inproceedings" rend="year" n="cite:babichev:hal-01929810">
      <identifiant type="hal" value="hal-01929810"/>
      <analytic>
        <title level="a">Constant Step Size Stochastic Gradient Descent for Probabilistic Modeling</title>
        <author>
          <persName key="sierra-2018-idp135920">
            <foreName>Dmitry</foreName>
            <surname>Babichev</surname>
            <initial>D.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">UAI 2018 - Conference on Uncertainty in Artificial Intelligence</title>
        <loc>Monterey, United States</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01929810" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01929810</ref>
        </imprint>
        <meeting id="cid49628">
          <title>Conference on Uncertainty in Artificial Intelligence</title>
          <num>2018</num>
          <abbr type="sigle">UAI</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1804.05567" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1804.<allowbreak/>05567</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid50" type="inproceedings" rend="year" n="cite:bach:hal-01569934">
      <identifiant type="hal" value="hal-01569934"/>
      <analytic>
        <title level="a">Efficient Algorithms for Non-convex Isotonic Regression through Submodular Optimization</title>
        <author>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Advances in Neural Information Processing Systems</title>
        <loc>Montreal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01569934" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01569934</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>21</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1707.09157" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1707.<allowbreak/>09157</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid35" type="inproceedings" rend="year" n="cite:beaugnon:hal-01888983">
      <identifiant type="hal" value="hal-01888983"/>
      <analytic>
        <title level="a">End-to-End Active Learning for Computer Security Experts</title>
        <author>
          <persName>
            <foreName>Anaël</foreName>
            <surname>Beaugnon</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Pierre</foreName>
            <surname>Chifflier</surname>
            <initial>P.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">KDD Workshop on Interactive Data Exploration and Analytics (IDEA)</title>
        <loc>Londres, United Kingdom</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01888983" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01888983</ref>
        </imprint>
        <meeting id="cid626032">
          <title>Workshop on Interactive Data Exploration and Analytics</title>
          <num>2018</num>
          <abbr type="sigle">IDEA</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid45" type="inproceedings" rend="year" n="cite:beaugnon:hal-01888976">
      <identifiant type="hal" value="hal-01888976"/>
      <analytic>
        <title level="a">End-to-End Active Learning for Computer Security Experts</title>
        <author>
          <persName>
            <foreName>Anaël</foreName>
            <surname>Beaugnon</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Pierre</foreName>
            <surname>Chifflier</surname>
            <initial>P.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">AAAI Workshop on Artificial Intelligence for Cyber Security (AICS)</title>
        <loc>New Orleans, United States</loc>
        <imprint>
          <dateStruct>
            <month>February</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01888976" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01888976</ref>
        </imprint>
        <meeting id="cid626033">
          <title>Workshop on Artificial Intelligence for Cyber Security</title>
          <num>2018</num>
          <abbr type="sigle">AICS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid30" type="inproceedings" rend="year" n="cite:carratino:hal-01958906">
      <identifiant type="hal" value="hal-01958906"/>
      <analytic>
        <title level="a">Learning with SGD and Random Features</title>
        <author>
          <persName>
            <foreName>Luigi</foreName>
            <surname>Carratino</surname>
            <initial>L.</initial>
          </persName>
          <persName key="sierra-2018-idp123584">
            <foreName>Alessandro</foreName>
            <surname>Rudi</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Lorenzo</foreName>
            <surname>Rosasco</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Advances in Neural Information Processing Systems</title>
        <loc>Montreal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <biblScope type="pages">10213–10224</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01958906" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01958906</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>21</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1807.06343" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1807.<allowbreak/>06343</ref> - Spotlight</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid0" type="inproceedings" rend="year" n="cite:chizat:hal-01798792">
      <identifiant type="hal" value="hal-01798792"/>
      <analytic>
        <title level="a">On the Global Convergence of Gradient Descent for Over-parameterized Models using Optimal Transport</title>
        <author>
          <persName key="sierra-2018-idp126064">
            <foreName>Lenaic</foreName>
            <surname>Chizat</surname>
            <initial>L.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Advances in Neural Information Processing Systems (NIPS)</title>
        <loc>Montréal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01798792" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01798792</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>21</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1805.09545" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1805.<allowbreak/>09545</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid52" type="inproceedings" rend="year" n="cite:defossez:hal-01899949">
      <identifiant type="hal" value="hal-01899949"/>
      <analytic>
        <title level="a">SING: Symbol-to-Instrument Neural Generator</title>
        <author>
          <persName key="sierra-2018-idp148176">
            <foreName>Alexandre</foreName>
            <surname>Défossez</surname>
            <initial>A.</initial>
          </persName>
          <persName key="coml-2018-idp164544">
            <foreName>Neil</foreName>
            <surname>Zeghidour</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>Nicolas</foreName>
            <surname>Usunier</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>Léon</foreName>
            <surname>Bottou</surname>
            <initial>L.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Conference on Neural Information Processing Systems (NIPS)</title>
        <loc>Montréal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01899949" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01899949</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>32</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1810.09785" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1810.<allowbreak/>09785</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid44" type="inproceedings" rend="year" n="cite:gower:hal-01652152">
      <identifiant type="doi" value="10.07462"/>
      <identifiant type="hal" value="hal-01652152"/>
      <analytic>
        <title level="a">Tracking the gradients using the Hessian: A new look at variance reducing stochastic methods</title>
        <author>
          <persName>
            <foreName>Robert M.</foreName>
            <surname>Gower</surname>
            <initial>R. M.</initial>
          </persName>
          <persName>
            <foreName>Nicolas</foreName>
            <surname>Le Roux</surname>
            <initial>N.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">International Conference on Artificial Intelligence and Statistics (AISTATS)</title>
        <loc>Canary Islands, Spain</loc>
        <imprint>
          <dateStruct>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01652152" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01652152</ref>
        </imprint>
        <meeting id="cid388734">
          <title>International Conference on Artificial Intelligence and Statistics</title>
          <num>13</num>
          <abbr type="sigle">AISTATS</abbr>
        </meeting>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1710.07462" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1710.<allowbreak/>07462</ref> - 17 pages, 2 figures, 1 table</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid41" type="inproceedings" rend="year" n="cite:halabi:hal-01652151">
      <identifiant type="doi" value="10.06273"/>
      <identifiant type="hal" value="hal-01652151"/>
      <analytic>
        <title level="a">Combinatorial Penalties: Which structures are preserved by convex relaxations?</title>
        <author>
          <persName>
            <foreName>Marwa El</foreName>
            <surname>Halabi</surname>
            <initial>M. E.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Volkan</foreName>
            <surname>Cevher</surname>
            <initial>V.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">AISTATS 2018 - 22nd International Conference on Artificial Intelligence and Statistics</title>
        <loc>Canary Islands, Spain</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01652151" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01652151</ref>
        </imprint>
        <meeting id="cid388734">
          <title>International Conference on Artificial Intelligence and Statistics</title>
          <num>21</num>
          <abbr type="sigle">AISTATS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1710.06273" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1710.<allowbreak/>06273</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid37" type="inproceedings" rend="year" n="cite:kerdreux:hal-01927391">
      <identifiant type="hal" value="hal-01927391"/>
      <analytic>
        <title level="a">Frank-Wolfe with Subsampling Oracle</title>
        <author>
          <persName key="sierra-2018-idp153056">
            <foreName>Thomas</foreName>
            <surname>Kerdreux</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>Fabian</foreName>
            <surname>Pedregosa</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Alexandre</foreName>
            <surname>D'Aspremont</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ICML 2018 - 35th International Conference on Machine Learning</title>
        <loc>Stockholm, Sweden</loc>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01927391" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01927391</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>35</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1803.07348" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1803.<allowbreak/>07348</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid42" type="inproceedings" rend="year" n="cite:kundu:hal-01652149">
      <identifiant type="doi" value="10.06465"/>
      <identifiant type="hal" value="hal-01652149"/>
      <analytic>
        <title level="a">Convex optimization over intersection of simple sets: improved convergence rate guarantees via an exact penalty approach</title>
        <author>
          <persName key="sierra-2018-idp202768">
            <foreName>Achintya</foreName>
            <surname>Kundu</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Chiranjib</foreName>
            <surname>Bhattacharyya</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">AISTATS 2018 - 22nd International Conference on Artificial Intelligence and Statistics</title>
        <loc>Canary Islands, Spain</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01652149" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01652149</ref>
        </imprint>
        <meeting id="cid388734">
          <title>International Conference on Artificial Intelligence and Statistics</title>
          <num>21</num>
          <abbr type="sigle">AISTATS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1710.06465" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1710.<allowbreak/>06465</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid20" type="inproceedings" rend="year" n="cite:leblond:hal-01950555">
      <identifiant type="hal" value="hal-01950555"/>
      <analytic>
        <title level="a">SeaRNN: Training RNNs with Global-Local Losses</title>
        <author>
          <persName key="sierra-2018-idp121136">
            <foreName>Rémi</foreName>
            <surname>Leblond</surname>
            <initial>R.</initial>
          </persName>
          <persName key="willow-2018-idp128128">
            <foreName>Jean-Baptiste</foreName>
            <surname>Alayrac</surname>
            <initial>J.-B.</initial>
          </persName>
          <persName>
            <foreName>Anton</foreName>
            <surname>Osokin</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sierra-2018-idp210288">
            <foreName>Simon</foreName>
            <surname>Lacoste-Julien</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ICLR 2018 : 6th International Conference on Learning Representations</title>
        <loc>Vancouver, Canada</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01950555" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01950555</ref>
        </imprint>
        <meeting id="cid624026">
          <title>International Conference on Learning Representations</title>
          <num>6</num>
          <abbr type="sigle">ICLR</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid29" type="inproceedings" rend="year" n="cite:luise:hal-01958887">
      <identifiant type="hal" value="hal-01958887"/>
      <analytic>
        <title level="a">Differential Properties of Sinkhorn Approximation for Learning with Wasserstein Distance</title>
        <author>
          <persName>
            <foreName>Giulia</foreName>
            <surname>Luise</surname>
            <initial>G.</initial>
          </persName>
          <persName key="sierra-2018-idp123584">
            <foreName>Alessandro</foreName>
            <surname>Rudi</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Massimiliano</foreName>
            <surname>Pontil</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Carlo</foreName>
            <surname>Ciliberto</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NIPS 2018 - Advances in Neural Information Processing Systems</title>
        <loc>Montreal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <biblScope type="pages">5864-5874</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01958887" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01958887</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>32</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1805.11897" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1805.<allowbreak/>11897</ref> - 26 pages, 4 figures</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid25" type="inproceedings" rend="year" n="cite:pauwels:hal-01796591">
      <identifiant type="hal" value="hal-01796591"/>
      <analytic>
        <title level="a">Relating Leverage Scores and Density using Regularized Christoffel Functions</title>
        <author>
          <persName>
            <foreName>Edouard</foreName>
            <surname>Pauwels</surname>
            <initial>E.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Jean-Philippe</foreName>
            <surname>Vert</surname>
            <initial>J.-P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Neural Information Processing Systems</title>
        <loc>Montréal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01796591" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01796591</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>26</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid17" type="inproceedings" rend="year" n="cite:pillaudvivien:hal-01662278">
      <identifiant type="hal" value="hal-01662278"/>
      <analytic>
        <title level="a">Exponential convergence of testing error for stochastic gradient methods</title>
        <author>
          <persName>
            <foreName>Loucas</foreName>
            <surname>Pillaud-Vivien</surname>
            <initial>L.</initial>
          </persName>
          <persName key="sierra-2018-idp123584">
            <foreName>Alessandro</foreName>
            <surname>Rudi</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Conference on Learning Theory (COLT)</title>
        <loc>Stockholm, Sweden</loc>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01662278" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01662278</ref>
        </imprint>
        <meeting id="cid29437">
          <title>Annual Conference on Learning Theory</title>
          <num>29</num>
          <abbr type="sigle">COLT</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1712.04755" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1712.<allowbreak/>04755</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid18" type="inproceedings" rend="year" n="cite:pillaudvivien:hal-01799116">
      <identifiant type="hal" value="hal-01799116"/>
      <analytic>
        <title level="a">Statistical Optimality of Stochastic Gradient Descent on Hard Learning Problems through Multiple Passes</title>
        <author>
          <persName>
            <foreName>Loucas</foreName>
            <surname>Pillaud-Vivien</surname>
            <initial>L.</initial>
          </persName>
          <persName key="sierra-2018-idp123584">
            <foreName>Alessandro</foreName>
            <surname>Rudi</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Neural Information Processing Systems (NeurIPS)</title>
        <loc>Montréal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01799116" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01799116</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>32</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1805.10074" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1805.<allowbreak/>10074</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid40" type="inproceedings" rend="year" n="cite:reddi:hal-01652150">
      <identifiant type="hal" value="hal-01652150"/>
      <analytic>
        <title level="a">A Generic Approach for Escaping Saddle points</title>
        <author>
          <persName>
            <foreName>Sashank J</foreName>
            <surname>Reddi</surname>
            <initial>S. J.</initial>
          </persName>
          <persName>
            <foreName>Manzil</foreName>
            <surname>Zaheer</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Suvrit</foreName>
            <surname>Sra</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Barnabas</foreName>
            <surname>Poczos</surname>
            <initial>B.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Ruslan</foreName>
            <surname>Salakhutdinov</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Alexander J</foreName>
            <surname>Smola</surname>
            <initial>A. J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">AISTATS 2018 - 22nd International Conference on Artificial Intelligence and Statistics</title>
        <loc>Canary Islands, Spain</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01652150" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01652150</ref>
        </imprint>
        <meeting id="cid388734">
          <title>International Conference on Artificial Intelligence and Statistics</title>
          <num>21</num>
          <abbr type="sigle">AISTATS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1709.01434" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1709.<allowbreak/>01434</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid32" type="inproceedings" rend="year" n="cite:rudi:hal-01958879">
      <identifiant type="hal" value="hal-01958879"/>
      <analytic>
        <title level="a">On Fast Leverage Score Sampling and Optimal Learning</title>
        <author>
          <persName key="sierra-2018-idp123584">
            <foreName>Alessandro</foreName>
            <surname>Rudi</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Daniele</foreName>
            <surname>Calandriello</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>Luigi</foreName>
            <surname>Carratino</surname>
            <initial>L.</initial>
          </persName>
          <persName>
            <foreName>Lorenzo</foreName>
            <surname>Rosasco</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NeurIPS 2018 - Thirty-second Conference on Neural Information Processing Systems</title>
        <loc>Montreal, Canada</loc>
        <title level="s">Advances in Neural Information Processing Systems - NIPS-2018</title>
        <imprint>
          <biblScope type="volume">31</biblScope>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <biblScope type="pages">5677–5687</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01958879" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01958879</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>32</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1810.13258" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1810.<allowbreak/>13258</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid31" type="inproceedings" rend="year" n="cite:rudi:hal-01958900">
      <identifiant type="hal" value="hal-01958900"/>
      <analytic>
        <title level="a">Manifold Structured Prediction</title>
        <author>
          <persName key="sierra-2018-idp123584">
            <foreName>Alessandro</foreName>
            <surname>Rudi</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Carlo</foreName>
            <surname>Ciliberto</surname>
            <initial>C.</initial>
          </persName>
          <persName>
            <foreName>Gian Maria</foreName>
            <surname>Marconi</surname>
            <initial>G. M.</initial>
          </persName>
          <persName>
            <foreName>Lorenzo</foreName>
            <surname>Rosasco</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">NIPS 2018 - Neural Information Processing Systems Conference</title>
        <loc>Montreal, Canada</loc>
        <title level="s">Advances in Neural Information Processing Systems</title>
        <imprint>
          <biblScope type="volume">31</biblScope>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <biblScope type="pages">5615-5626</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01958900" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01958900</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>32</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1806.09908" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1806.<allowbreak/>09908</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid24" type="inproceedings" rend="year" n="cite:scaman:hal-01957013">
      <identifiant type="hal" value="hal-01957013"/>
      <analytic>
        <title level="a">Optimal Algorithms for Non-Smooth Distributed Optimization in Networks</title>
        <author>
          <persName>
            <foreName>Kevin</foreName>
            <surname>Scaman</surname>
            <initial>K.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Sébastien</foreName>
            <surname>Bubeck</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Yin Tat</foreName>
            <surname>Lee</surname>
            <initial>Y. T.</initial>
          </persName>
          <persName key="dyogene-2018-idp161840">
            <foreName>Laurent</foreName>
            <surname>Massoulié</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Advances In Neural Information Processing systems</title>
        <loc>Montreal, Canada</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01957013" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01957013</ref>
        </imprint>
        <meeting id="cid29560">
          <title>Annual Conference on Neural Information Processing Systems</title>
          <num>21</num>
          <abbr type="sigle">NIPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1806.00291" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1806.<allowbreak/>00291</ref> - 17 pages</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid8" type="inproceedings" rend="year" n="cite:scieur:hal-01805251">
      <identifiant type="hal" value="hal-01805251"/>
      <analytic>
        <title level="a">Nonlinear Acceleration of CNNs</title>
        <author>
          <persName key="sierra-2018-idp162928">
            <foreName>Damien</foreName>
            <surname>Scieur</surname>
            <initial>D.</initial>
          </persName>
          <persName key="galen-post-2018-idp120560">
            <foreName>Edouard</foreName>
            <surname>Oyallon</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Alexandre</foreName>
            <surname>D'Aspremont</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ICLR Workshop track</title>
        <loc>Vancouver, Canada</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01805251" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01805251</ref>
        </imprint>
        <meeting id="cid624026">
          <title>International Conference on Learning Representations</title>
          <num>6</num>
          <abbr type="sigle">ICLR</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid3" type="inproceedings" rend="year" n="cite:shpakova:hal-01939549">
      <identifiant type="hal" value="hal-01939549"/>
      <analytic>
        <title level="a">Marginal Weighted Maximum Log-likelihood for Efficient Learning of Perturb-and-Map models</title>
        <author>
          <persName key="sierra-2018-idp165360">
            <foreName>Tatiana</foreName>
            <surname>Shpakova</surname>
            <initial>T.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Anton</foreName>
            <surname>Osokin</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">UAI 2018 - Conference on Uncertainty in Artificial Intelligence 2018</title>
        <loc>Monterey, United States</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01939549" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01939549</ref>
        </imprint>
        <meeting id="cid49628">
          <title>Conference on Uncertainty in Artificial Intelligence</title>
          <num>2018</num>
          <abbr type="sigle">UAI</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1811.08725" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1811.<allowbreak/>08725</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid11" type="inproceedings" rend="year" n="cite:taylor:hal-01902068">
      <identifiant type="hal" value="hal-01902068"/>
      <analytic>
        <title level="a">Lyapunov Functions for First-Order Methods: Tight Automated Convergence Guarantees</title>
        <author>
          <persName key="sierra-2018-idp133456">
            <foreName>Adrien B.</foreName>
            <surname>Taylor</surname>
            <initial>A. B.</initial>
          </persName>
          <persName>
            <foreName>Bryan</foreName>
            <surname>Van Scoy</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Laurent</foreName>
            <surname>Lessard</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Proceedings of the 35th International Conference on Machine Learning. PMLR 80:4897-4906</title>
        <loc>Stockholm, Sweden</loc>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01902068" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01902068</ref>
        </imprint>
        <meeting id="cid32516">
          <title>International Conference on Machine Learning</title>
          <num>35</num>
          <abbr type="sigle">ICML</abbr>
        </meeting>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1803.06073" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1803.<allowbreak/>06073</ref>
      </note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid26" type="inproceedings" rend="year" n="cite:tripuraneni:hal-01957015">
      <identifiant type="hal" value="hal-01957015"/>
      <analytic>
        <title level="a">Averaging Stochastic Gradient Descent on Riemannian Manifolds</title>
        <author>
          <persName>
            <foreName>Nilesh</foreName>
            <surname>Tripuraneni</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>Nicolas</foreName>
            <surname>Flammarion</surname>
            <initial>N.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Michael I.</foreName>
            <surname>Jordan</surname>
            <initial>M. I.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Computational Learning Theory (COLT)</title>
        <loc>Stockholm, Sweden</loc>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01957015" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01957015</ref>
        </imprint>
        <meeting id="cid29437">
          <title>Annual Conference on Learning Theory</title>
          <num>31</num>
          <abbr type="sigle">COLT</abbr>
        </meeting>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1802.09128" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1802.<allowbreak/>09128</ref> - COLT 2018</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid23" type="unpublished" rend="year" n="cite:barre:hal-01897496">
      <identifiant type="doi" value="10.02748"/>
      <identifiant type="hal" value="hal-01897496"/>
      <monogr>
        <title level="m"><formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>𝐌</mi><mo>*</mo></msup></math></formula>-Regularized Dictionary Learning</title>
        <author>
          <persName key="sierra-2018-idp138352">
            <foreName>Mathieu</foreName>
            <surname>Barré</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Alexandre</foreName>
            <surname>D'Aspremont</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01897496" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01897496</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1810.02748" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1810.<allowbreak/>02748</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid2" type="unpublished" rend="year" n="cite:berthier:hal-01797016">
      <identifiant type="hal" value="hal-01797016"/>
      <monogr>
        <title level="m">Gossip of Statistical Observations using Orthogonal Polynomials</title>
        <author>
          <persName key="sierra-2018-idp140864">
            <foreName>Raphaël</foreName>
            <surname>Berthier</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName key="sierra-2018-idp118672">
            <foreName>Pierre</foreName>
            <surname>Gaillard</surname>
            <initial>P.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01797016" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01797016</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1805.08531" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1805.<allowbreak/>08531</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid6" type="unpublished" rend="year" n="cite:bollapragada:hal-01893921">
      <identifiant type="doi" value="10.04539"/>
      <identifiant type="hal" value="hal-01893921"/>
      <monogr>
        <title level="m">Nonlinear Acceleration of Momentum and Primal-Dual Algorithms</title>
        <author>
          <persName>
            <foreName>Raghu</foreName>
            <surname>Bollapragada</surname>
            <initial>R.</initial>
          </persName>
          <persName key="sierra-2018-idp162928">
            <foreName>Damien</foreName>
            <surname>Scieur</surname>
            <initial>D.</initial>
          </persName>
          <persName key="sierra-2018-idp115824">
            <foreName>Alexandre</foreName>
            <surname>d'Aspremont</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01893921" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01893921</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1810.04539" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1810.<allowbreak/>04539</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid47" type="unpublished" rend="year" n="cite:chizat:hal-01945578">
      <identifiant type="hal" value="hal-01945578"/>
      <monogr>
        <title level="m">A Note on Lazy Training in Supervised Differentiable Programming</title>
        <author>
          <persName key="sierra-2018-idp126064">
            <foreName>Lenaic</foreName>
            <surname>Chizat</surname>
            <initial>L.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01945578" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01945578</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1812.07956" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1812.<allowbreak/>07956</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid27" type="unpublished" rend="year" n="cite:ciliberto:hal-01958863">
      <identifiant type="hal" value="hal-01958863"/>
      <monogr>
        <title level="m">Localized Structured Prediction</title>
        <author>
          <persName>
            <foreName>Carlo</foreName>
            <surname>Ciliberto</surname>
            <initial>C.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName key="sierra-2018-idp123584">
            <foreName>Alessandro</foreName>
            <surname>Rudi</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01958863" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01958863</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1806.02402" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1806.<allowbreak/>02402</ref> - 53 pages, 7 figures, 1 algorithm</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid38" type="unpublished" rend="year" n="cite:dieuleveut:hal-01565514">
      <identifiant type="hal" value="hal-01565514"/>
      <monogr>
        <title level="m">Bridging the Gap between Constant Step Size Stochastic Gradient Descent and Markov Chains</title>
        <author>
          <persName>
            <foreName>Aymeric</foreName>
            <surname>Dieuleveut</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Alain</foreName>
            <surname>Durmus</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01565514" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01565514</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1707.06386" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1707.<allowbreak/>06386</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid12" type="unpublished" rend="year" n="cite:drori:hal-01902048">
      <identifiant type="hal" value="hal-01902048"/>
      <monogr>
        <title level="m">Efficient First-order Methods for Convex Minimization: a Constructive Approach</title>
        <author>
          <persName>
            <foreName>Yoel</foreName>
            <surname>Drori</surname>
            <initial>Y.</initial>
          </persName>
          <persName key="sierra-2018-idp133456">
            <foreName>Adrien B.</foreName>
            <surname>Taylor</surname>
            <initial>A. B.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01902048" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01902048</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1803.05676" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1803.<allowbreak/>05676</ref> - Code available at https://github.com/AdrienTaylor/GreedyMethods</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid15" type="unpublished" rend="year" n="cite:gaillard:hal-01802004">
      <identifiant type="hal" value="hal-01802004"/>
      <monogr>
        <title level="m">Uniform regret bounds over <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><msup><mi>R</mi><mi>d</mi></msup></math></formula> for the sequential linear regression problem with the square loss</title>
        <author>
          <persName key="sierra-2018-idp118672">
            <foreName>Pierre</foreName>
            <surname>Gaillard</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Sébastien</foreName>
            <surname>Gerchinovitz</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Malo</foreName>
            <surname>Huard</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Gilles</foreName>
            <surname>Stoltz</surname>
            <initial>G.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>February</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01802004" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01802004</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1805.11386" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1805.<allowbreak/>11386</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid16" type="unpublished" rend="year" n="cite:gaillard:hal-01798201">
      <identifiant type="hal" value="hal-01798201"/>
      <monogr>
        <title level="m">Efficient online algorithms for fast-rate regret bounds under sparsity</title>
        <author>
          <persName key="sierra-2018-idp118672">
            <foreName>Pierre</foreName>
            <surname>Gaillard</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Olivier</foreName>
            <surname>Wintenberger</surname>
            <initial>O.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01798201" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01798201</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1805.09174" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1805.<allowbreak/>09174</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid33" type="unpublished" rend="year" n="cite:hendrikx:hal-01893568">
      <identifiant type="hal" value="hal-01893568"/>
      <monogr>
        <title level="m">Accelerated decentralized optimization with local updates for smooth and strongly convex objectives</title>
        <author>
          <persName key="dyogene-2018-idp171968">
            <foreName>Hadrien</foreName>
            <surname>Hendrikx</surname>
            <initial>H.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName key="dyogene-2018-idp161840">
            <foreName>Laurent</foreName>
            <surname>Massoulié</surname>
            <initial>L.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01893568" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01893568</ref>
        </imprint>
      </monogr>
      <note type="bnote">working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid46" type="unpublished" rend="year" n="cite:kerdreux:hal-01893922">
      <identifiant type="doi" value="10.02429"/>
      <identifiant type="hal" value="hal-01893922"/>
      <monogr>
        <title level="m">Restarting Frank-Wolfe</title>
        <author>
          <persName key="sierra-2018-idp153056">
            <foreName>Thomas</foreName>
            <surname>Kerdreux</surname>
            <initial>T.</initial>
          </persName>
          <persName key="sierra-2018-idp115824">
            <foreName>Alexandre</foreName>
            <surname>d'Aspremont</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Sebastian</foreName>
            <surname>Pokutta</surname>
            <initial>S.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01893922" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01893922</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1810.02429" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1810.<allowbreak/>02429</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid1" type="unpublished" rend="year" n="cite:nowakvila:hal-01893006">
      <identifiant type="hal" value="hal-01893006"/>
      <monogr>
        <title level="m">Sharp Analysis of Learning with Discrete Losses</title>
        <author>
          <persName>
            <foreName>Alex</foreName>
            <surname>Nowak-Vila</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName key="sierra-2018-idp123584">
            <foreName>Alessandro</foreName>
            <surname>Rudi</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01893006" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01893006</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1810.06839" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1810.<allowbreak/>06839</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid14" type="unpublished" rend="year" n="cite:ostrovskii:hal-01895127">
      <identifiant type="hal" value="hal-01895127"/>
      <monogr>
        <title level="m">Finite-sample Analysis of M-estimators using Self-concordance</title>
        <author>
          <persName key="sierra-2018-idp130992">
            <foreName>Dmitrii M.</foreName>
            <surname>Ostrovskii</surname>
            <initial>D. M.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01895127" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01895127</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1810.06838" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1810.<allowbreak/>06838</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid10" type="unpublished" rend="year" n="cite:recanati:hal-01846269">
      <identifiant type="hal" value="hal-01846269"/>
      <monogr>
        <title level="m">Reconstructing Latent Orderings by Spectral Clustering</title>
        <author>
          <persName key="sierra-2018-idp160496">
            <foreName>Antoine</foreName>
            <surname>Recanati</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sierra-2018-idp153056">
            <foreName>Thomas</foreName>
            <surname>Kerdreux</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>Alexandre</foreName>
            <surname>D'Aspremont</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01846269" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01846269</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1807.07122" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1807.<allowbreak/>07122</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid9" type="unpublished" rend="year" n="cite:recanati:hal-01851960">
      <identifiant type="hal" value="hal-01851960"/>
      <monogr>
        <title level="m">Robust Seriation and Applications to Cancer Genomics</title>
        <author>
          <persName key="sierra-2018-idp160496">
            <foreName>Antoine</foreName>
            <surname>Recanati</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Nicolas</foreName>
            <surname>Servant</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>Jean-Philippe</foreName>
            <surname>Vert</surname>
            <initial>J.-P.</initial>
          </persName>
          <persName>
            <foreName>Alexandre</foreName>
            <surname>D'Aspremont</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01851960" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01851960</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1806.00664" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1806.<allowbreak/>00664</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid13" type="unpublished" rend="year" n="cite:ryu:hal-01943622">
      <identifiant type="hal" value="hal-01943622"/>
      <monogr>
        <title level="m">Operator Splitting Performance Estimation: Tight contraction factors and optimal parameter selection</title>
        <author>
          <persName>
            <foreName>Ernest K.</foreName>
            <surname>Ryu</surname>
            <initial>E. K.</initial>
          </persName>
          <persName key="sierra-2018-idp133456">
            <foreName>Adrien B.</foreName>
            <surname>Taylor</surname>
            <initial>A. B.</initial>
          </persName>
          <persName>
            <foreName>Carolina</foreName>
            <surname>Bergeling</surname>
            <initial>C.</initial>
          </persName>
          <persName>
            <foreName>Pontus</foreName>
            <surname>Giselsson</surname>
            <initial>P.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01943622" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01943622</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1812.00146" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1812.<allowbreak/>00146</ref> - working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid7" type="unpublished" rend="year" n="cite:scieur:hal-01799269">
      <identifiant type="hal" value="hal-01799269"/>
      <monogr>
        <title level="m">Nonlinear Acceleration of Deep Neural Networks</title>
        <author>
          <persName key="sierra-2018-idp162928">
            <foreName>Damien</foreName>
            <surname>Scieur</surname>
            <initial>D.</initial>
          </persName>
          <persName key="galen-post-2018-idp120560">
            <foreName>Edouard</foreName>
            <surname>Oyallon</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Alexandre</foreName>
            <surname>D'Aspremont</surname>
            <initial>A.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01799269" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01799269</ref>
        </imprint>
      </monogr>
      <note type="bnote">working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid48" type="unpublished" rend="year" n="cite:tang:hal-01889990">
      <identifiant type="hal" value="hal-01889990"/>
      <monogr>
        <title level="m">Structure-Adaptive Accelerated Coordinate Descent</title>
        <author>
          <persName>
            <foreName>Junqi</foreName>
            <surname>Tang</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Mohammad</foreName>
            <surname>Golbabaee</surname>
            <initial>M.</initial>
          </persName>
          <persName key="sierra-2018-idp112912">
            <foreName>Francis</foreName>
            <surname>Bach</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Mike E.</foreName>
            <surname>Davies</surname>
            <initial>M. E.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2018</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01889990" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01889990</ref>
        </imprint>
      </monogr>
      <note type="bnote">working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="sierra-2018-bid49" type="unpublished" rend="year" n="cite:vu:hal-01980339">
      <identifiant type="hal" value="hal-01980339"/>
      <monogr>
        <title level="m">Tube-CNN: Modeling temporal evolution of appearance for object detection in video</title>
        <author>
          <persName key="willow-2018-idp162256">
            <foreName>Tuan-Hung</foreName>
            <surname>Vu</surname>
            <initial>T.-H.</initial>
          </persName>
          <persName>
            <foreName>Anton</foreName>
            <surname>Osokin</surname>
            <initial>A.</initial>
          </persName>
          <persName key="willow-2018-idp114960">
            <foreName>Ivan</foreName>
            <surname>Laptev</surname>
            <initial>I.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>January</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01980339" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01980339</ref>
        </imprint>
      </monogr>
      <note type="bnote"><ref xlink:href="https://arxiv.org/abs/1812.02619" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1812.<allowbreak/>02619</ref> - 13 pages, 8 figures, technical report</note>
    </biblStruct>
  </biblio>
</raweb>
