<?xml version="1.0" encoding="utf-8"?>
<raweb xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="" year="2019">
  <identification id="tadaam" isproject="true">
    <shortname>TADAAM</shortname>
    <projectName>Topology-aware system-scale data management for high-performance computing</projectName>
    <theme-de-recherche>Distributed and High Performance Computing</theme-de-recherche>
    <domaine-de-recherche>Networks, Systems and Services, Distributed Computing</domaine-de-recherche>
    <urlTeam>https://team.inria.fr/tadaam/</urlTeam>
    <structure_exterieure type="Labs">
      <libelle>Laboratoire Bordelais de Recherche en Informatique (LaBRI)</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Institut Polytechnique de Bordeaux</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Université de Bordeaux</libelle>
    </structure_exterieure>
    <header_dates_team>Creation of the Team: 2015 January 01, updated into Project-Team: 2017 December 01</header_dates_team>
    <LeTypeProjet>Project-Team</LeTypeProjet>
    <keywordsSdN>
      <term>A1.1.1. - Multicore, Manycore</term>
      <term>A1.1.2. - Hardware accelerators (GPGPU, FPGA, etc.)</term>
      <term>A1.1.3. - Memory models</term>
      <term>A1.1.4. - High performance computing</term>
      <term>A1.1.5. - Exascale</term>
      <term>A1.1.9. - Fault tolerant systems</term>
      <term>A1.2. - Networks</term>
      <term>A2.1.7. - Distributed programming</term>
      <term>A2.2.2. - Memory models</term>
      <term>A2.2.4. - Parallel architectures</term>
      <term>A2.2.5. - Run-time systems</term>
      <term>A2.6.1. - Operating systems</term>
      <term>A2.6.2. - Middleware</term>
      <term>A3.1.2. - Data management, quering and storage</term>
      <term>A3.1.3. - Distributed data</term>
      <term>A3.1.8. - Big data (production, storage, transfer)</term>
      <term>A6.2.6. - Optimization</term>
      <term>A6.2.7. - High performance computing</term>
      <term>A6.3.3. - Data processing</term>
      <term>A7.1.1. - Distributed algorithms</term>
      <term>A7.1.2. - Parallel algorithms</term>
      <term>A7.1.3. - Graph algorithms</term>
      <term>A8.1. - Discrete mathematics, combinatorics</term>
      <term>A8.2. - Optimization</term>
      <term>A8.7. - Graph theory</term>
      <term>A8.9. - Performance evaluation</term>
    </keywordsSdN>
    <keywordsSecteurs>
      <term>B6.3.2. - Network protocols</term>
      <term>B6.3.3. - Network Management</term>
      <term>B6.5. - Information systems</term>
      <term>B9.5.1. - Computer science</term>
      <term>B9.8. - Reproducibility</term>
    </keywordsSecteurs>
    <UR name="Bordeaux"/>
  </identification>
  <team id="uid1">
    <person key="tadaam-2018-idp130608">
      <firstname>Emmanuel</firstname>
      <lastname>Jeannot</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Team leader, Inria, Senior Researcher</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="tadaam-2018-idp125248">
      <firstname>Alexandre</firstname>
      <lastname>Denis</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
    </person>
    <person key="tadaam-2019-idp127136">
      <firstname>Fatima</firstname>
      <lastname>El Akkary</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Inria, from Apr 2019 to Jun 2019</moreinfo>
    </person>
    <person key="tadaam-2018-idp127712">
      <firstname>Brice</firstname>
      <lastname>Goglin</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Inria, Senior Researcher</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="tadaam-2018-idp160832">
      <firstname>Adrien</firstname>
      <lastname>Guilbaud</lastname>
      <categoryPro>Technique</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Inria, Engineer</moreinfo>
    </person>
    <person key="tadaam-2018-idp138848">
      <firstname>Julien</firstname>
      <lastname>Herrmann</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Inria, until Sep 2019</moreinfo>
    </person>
    <person key="tadaam-2018-idp143744">
      <firstname>Valentin</firstname>
      <lastname>Honoré</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>U. Bordeaux</moreinfo>
    </person>
    <person key="tadaam-2018-idp173184">
      <firstname>Valentin</firstname>
      <lastname>Hoyet</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="tadaam-2019-idp142304">
      <firstname>Amaury</firstname>
      <lastname>Jacques</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Inria, from Feb 2019 to May 2019</moreinfo>
    </person>
    <person key="tadaam-2018-idp133472">
      <firstname>Guillaume</firstname>
      <lastname>Mercier</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Institut National Polytechnique de Bordeaux, Associate Professor</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="tadaam-2019-idp180880">
      <firstname>Guillaume</firstname>
      <lastname>Pallez Aupy</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
    </person>
    <person key="tadaam-2018-idp135984">
      <firstname>François</firstname>
      <lastname>Pellegrini</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>U. Bordeaux, Professor</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="tadaam-2019-idp152992">
      <firstname>Florian</firstname>
      <lastname>Reynier</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>CEA</moreinfo>
    </person>
    <person key="tadaam-2019-idp155392">
      <firstname>Philippe</firstname>
      <lastname>Swartvagher</lastname>
      <categoryPro>Stagiaire</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Inria, from Feb 2019 to Jul 2019</moreinfo>
    </person>
    <person key="tadaam-2019-idp155392">
      <firstname>Philippe</firstname>
      <lastname>Swartvagher</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Inria, from Oct 2019</moreinfo>
    </person>
    <person key="tadaam-2018-idp148608">
      <firstname>Andres Xavier</firstname>
      <lastname>Rubio Proano</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="tadaam-2018-idp153472">
      <firstname>Nicolas</firstname>
      <lastname>Vidal</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="tadaam-2019-idp165168">
      <firstname>Francieli</firstname>
      <lastname>Zanon-Boito</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Bordeaux</research-centre>
      <moreinfo>U. Bordeaux, Associate Professor, from Sep 2019</moreinfo>
    </person>
  </team>
  <presentation id="uid2">
    <bodyTitle>Overall Objectives</bodyTitle>
    <subsection id="uid3" level="1">
      <bodyTitle>Overall Objectives</bodyTitle>
      <p>In <span class="smallcap" align="left">TADaaM</span>, we propose a new approach where we allow the
application to explicitly express its resource needs about its execution. The
application needs to express its behavior, but in a different way from
the compute-centric approach, as the additional information is not necessarily
focused on computation and on instructions execution, but follows a high-level
semantics (needs of large memory for some processes, start of a
communication phase, need to refine the granularity, beginning of a
storage access phase, description of data affinity, etc.). These needs will be
expressed to a service layer though an API. The service layer will be
system-wide (able to gather a global knowledge) and stateful
(able to take decision based on the current request but also on previous
ones). The API shall enable the application to access this service layer through
a well-defined set of functions, based on carefully designed abstractions.</p>
      <p>Hence, <b>the goal of </b><span class="smallcap" align="left">TADaaM</span><b> is to design a stateful
system-wide service layer for HPC systems, in order to optimize
applications execution according to their needs</b>.</p>
      <p>This layer will abstract low-level details of the architecture and the
software stack, and will allow applications to register their needs.
Then, according to these requests and to the environment
characteristics, this layer will feature an engine to optimize the
execution of the applications at system-scale, taking into account the
gathered global knowledge and previous requests.</p>
      <p>This approach exhibits several key characteristics:</p>
      <simplelist>
        <li id="uid4">
          <p noindent="true">It is independent from the application parallelization, the
programming model, the numerical scheme and, largely, from the data
layout. Indeed, high-level semantic requests can easily be added to
the application code after the problem has been modeled,
parallelized, and most of the time after the data layout has been
designed and optimized. Therefore, this approach is – to a large
extent – orthogonal to other optimization mechanisms and does not require
application developers to rewrite their code.</p>
        </li>
        <li id="uid5">
          <p noindent="true">Application developers are the persons who know best their code
and therefore the needs of their application. They can easily (if
the interface is well designed and the abstractions are correctly
exposed), express the application needs in terms of
resource usage and interaction with the whole environment.</p>
        </li>
        <li id="uid6">
          <p noindent="true">Being stateful and shared by all the applications in the parallel
environment, the proposed layer will therefore enable optimizations
that:</p>
          <simplelist>
            <li id="uid7">
              <p noindent="true">cannot be performed statically but require information only
known at launch- or run-time,</p>
            </li>
            <li id="uid8">
              <p noindent="true">are incremental and require minimal changes to the application
execution scheme,</p>
            </li>
            <li id="uid9">
              <p noindent="true">deal with several parts of the environment at the same time
(e.g., batch scheduler, I/O, process manager and storage),</p>
            </li>
            <li id="uid10">
              <p noindent="true">take into account the needs of several applications at the same
time and deal with their interaction. This will be useful, for
instance, to handle network contention, storage access or any other
shared resources.</p>
            </li>
          </simplelist>
        </li>
      </simplelist>
    </subsection>
  </presentation>
  <fondements id="uid11">
    <bodyTitle>Research Program</bodyTitle>
    <subsection id="uid12" level="1">
      <bodyTitle>Need for System-Scale Optimization</bodyTitle>
      <p>Firstly, in order for applications to make the best possible use of the available
resources, it is impossible to expose all the low-level details of the hardware
to the program, as it would make impossible to achieve portability. Hence, the
standard approach is to add intermediate layers (programming models, libraries,
compilers, runtime systems, etc.) to the software stack so as to bridge the gap
between the application and the hardware. With this approach, optimizing the
application requires to express its parallelism (within the imposed programming
model), organize the code, schedule and load-balance the computations, etc. In
other words, in this approach, the way the code is written and the way it is
executed and interpreted by the lower layers drives the optimization. In any
case, this approach is centered on how computations are performed. Such
an approach is therefore no longer sufficient, as the way an application is
executing does depend less and less on the organization of computation and more
and more on the way its data is managed.</p>
      <p>Secondly, modern large-scale parallel platforms comprise tens to hundreds of
thousand nodes <footnote id="uid13" id-text="1">More than 22,500 XE6 compute node for the BlueWaters
system; 5040 B510 Bullx Nodes for the Curie machine; more than 49,000
BGQ nodes for the MIRA machine.</footnote>. However, very few applications use
the whole machine. In general, an application runs only on a subset of
the nodes <footnote id="uid14" id-text="2">In 2014, the median case was 2048 nodes for the
BlueWaters system and, for the first year of the Curie machine, the
median case was 256 nodes</footnote>. Therefore, most of the time, an
application shares the network, the storage and other resources with other
applications running concurrently during its execution. Depending on
the allocated resources, it is not uncommon that the execution of one
application interferes with the execution of a neighboring one.</p>
      <p>Lastly, even if an application is running alone, each element of
the software stack often performs its own optimization
independently. For instance, when considering an hybrid <span class="smallcap" align="left">MPI</span>/OpenMP
application, one may realize that threads are concurrently used within the
OpenMP runtime system, within the <span class="smallcap" align="left">MPI</span> library for communication
progression, and possibly within the computation library (BLAS) and
even within the application itself (pthreads). However, none of these
different classes of threads are aware of the existence of the others.
Consequently, the way they are executed, scheduled, prioritized does
not depend on their relative roles, their locations in the software
stack nor on the state of the application.</p>
      <p>The above remarks show that in order to go beyond the
state-of-the-art, it is necessary to design a new set of mechanisms
allowing cross-layer and system-wide optimizations so as to optimize
the way data is allocated, accessed and transferred by the application.
</p>
    </subsection>
    <subsection id="uid15" level="1">
      <bodyTitle>Scientific Challenges and Research Issues</bodyTitle>
      <p>In <span class="smallcap" align="left">TADaaM</span>, we will tackle the problem of efficiently
executing an application, at system-scale, on an HPC machine. We
assume that the application is already optimized (efficient data
layout, use of effective libraries, usage of state-of-the-art
compilation techniques, etc.). Nevertheless, even a statically
optimized application will not be able to be executed at scale without
considering the following dynamic constraints: machine
topology, allocated resources, data movement and contention, other
running applications, access to storage, etc. Thanks to the proposed
layer, we will provide a simple and efficient way for already existing
applications, as well as new ones, to express their needs in terms of
resource usage, locality and topology, using a high-level semantic.</p>
      <p>It is important to note that we target the optimization of each
application independently but also several applications at the same
time and at system-scale, taking into account their resource
requirement, their network usage or their storage access. Furthermore,
dealing with code-coupling application is an intermediate use-case
that will also be considered.</p>
      <p spacebefore="3.0pt">Several issues have to be considered. The first one consists in providing
relevant <b>abstractions and models to describe the topology</b> of the
available resources <b>and the application behavior</b>.</p>
      <p>Therefore, the first question we want to answer is: <b>“How to build
scalable models and efficient abstractions enabling to
understand the impact of data movement, topology and locality
on performance?”</b>
These models must be sufficiently precise to grasp the reality, tractable enough
to enable efficient solutions and algorithms, and simple enough to remain
usable by non-hardware experts. We will work on
(1) better describing the memory hierarchy, considering new memory
technologies;
(2) providing an integrated view of the nodes, the network and the storage;
(3) exhibiting qualitative knowledge;
(4) providing ways to express the multi-scale properties of the machine.
Concerning abstractions, we will work on providing general concepts to
be integrated at the application or programming model layers.
The goal is to offer means, for the application, to
express its high-level requirements in terms of data access, locality and
communication, by providing abstractions on the notion of hierarchy, mesh,
affinity, traffic metrics, etc.</p>
      <p spacebefore="3.0pt">In addition to the abstractions and the aforementioned models we need
to <b>define a clean and expressive API in a scalable way</b>, in
order for applications to express their needs (memory usage, affinity,
network, storage access, model refinement, etc.).</p>
      <p>Therefore, the second question we need to answer is: “<b>how to
build a system-scale, stateful, shared layer that can gather
applications needs expressed with a high-level semantic?</b>”. This work
will require not only to define a clean API where applications will
express their needs, but also to define how such a layer will be
shared across applications and will scale on future systems. The
API will provide a simple yet effective way to express different needs
such as: memory usage of a given portion of the code; start of a
compute intensive part; phase where the network is accessed
intensively; topology-aware affinity management; usage of storage
(in read and/or write mode); change of the data layout after mesh
refinement, etc. From an engineering point of view, the layer will
have a hierarchical design matching the hardware hierarchy, so as to
achieve scalability.</p>
      <p spacebefore="3.0pt">Once this has been done, the service layer, will have all the
information about the environment characteristics and application
requirements. We therefore need to design a set of <b>mechanisms to
optimize applications execution</b>: communication, mapping, thread
scheduling, data partitioning/mapping/movement, etc.</p>
      <p>Hence, the last scientific question we will address is: “<b>How
to design fast and efficient algorithms, mechanisms and tools to enable
execution of applications at system-scale, in full a HPC ecosystem,
taking into account topology and locality?</b>”
A first set of research is related to thread and process placement according to
the topology and the affinity. Another large field of study is related to data
placement, allocation and partitioning: optimizing the way data is accessed and
processed especially for mesh-based applications. The issues of transferring
data across the network will also be tackled, thanks to the global knowledge we
have on the application behavior and the data layout. Concerning the interaction
with other applications, several directions will be tackled. Among these
directions we will deal with matching process placement with resource
allocation given by the batch scheduler or with the storage
management: switching from a best-effort application centric strategy
to global optimization scheme.
</p>
    </subsection>
  </fondements>
  <domaine id="uid16">
    <bodyTitle>Application Domains</bodyTitle>
    <subsection id="uid17" level="1">
      <bodyTitle>Mesh-based applications</bodyTitle>
      <p><span class="smallcap" align="left">TADaaM</span> targets scientific simulation applications on large-scale
systems, as these applications present huge challenges in terms of
performance, locality, scalability, parallelism and data management.
Many of these HPC applications use meshes as the basic model for their
computation. For instance, PDE-based simulations using finite
differences, finite volumes, or finite elements methods operate on meshes
that describe the geometry and the physical properties of the
simulated objects. This is the case for at least two thirds of the
applications selected in the 9<sup>th</sup> PRACE.
call <footnote id="uid18" id-text="3"><ref xlink:href="http://www.prace-ri.eu/prace-9th-regular-call/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>prace-ri.<allowbreak/>eu/<allowbreak/>prace-9th-regular-call/</ref></footnote>,
which concern quantum mechanics, fluid mechanics, climate, material
physic, electromagnetism, etc.</p>
      <p>Mesh-based applications not only represent the majority of
HPC applications running on existing supercomputing systems, yet
also feature properties that should be taken into account to
achieve scalability and performance on future large-scale systems.
These properties are the following:</p>
      <descriptionlist>
        <label>Size</label>
        <li id="uid19">
          <p noindent="true">Datasets are large: some meshes comprise hundreds of
millions of elements, or even billions.</p>
        </li>
        <label>Dynamicity</label>
        <li id="uid20">
          <p noindent="true">In many simulations, meshes are refined or coarsened
at each time step, so as to account for the evolution of the
physical simulation (moving parts, shockwaves, structural changes in
the model resulting from collisions between mesh parts, etc.).</p>
        </li>
        <label>Structure</label>
        <li id="uid21">
          <p noindent="true">Many meshes are unstructured, and require advanced data
structures so as to manage irregularity in data storage.</p>
        </li>
        <label>Topology</label>
        <li id="uid22">
          <p noindent="true">Due to their rooting in the physical world, meshes exhibit
interesting topological properties (low dimensionality embedding,
small maximum degree, large diameter, etc.). It is very important to
take advantage of these properties when laying out mesh data on
systems where communication locality matters.</p>
        </li>
      </descriptionlist>
      <p>All these features make mesh-based applications a very interesting
and challenging use-case for the research we want to carry out in this
project. Moreover, we believe that our proposed approach and solutions
will contribute to enhance these applications and allow them to
achieve the best possible usage of the available resources of future
high-end systems.
</p>
    </subsection>
  </domaine>
  <highlights id="uid23">
    <bodyTitle>Highlights of the Year</bodyTitle>
    <subsection id="uid24" level="1">
      <bodyTitle>Highlights of the Year</bodyTitle>
      <simplelist>
        <li id="uid25">
          <p noindent="true">Guillaume <span class="smallcap" align="left">Pallez</span> was an invited speaker at the Royal Society <ref xlink:href="https://royalsociety.org/science-events-and-lectures/2019/04/high-performance-computing/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>royalsociety.<allowbreak/>org/<allowbreak/>science-events-and-lectures/<allowbreak/>2019/<allowbreak/>04/<allowbreak/>high-performance-computing/</ref></p>
        </li>
        <li id="uid26">
          <p noindent="true">Brice <span class="smallcap" align="left">Goglin</span> is co-chair of the Architecture &amp; Networks area of the SuperComputing 2020 conference.</p>
        </li>
        <li id="uid27">
          <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> has been re-appointed a member of the
French <i>Commission Nationale de l'Informatique et des
Libertés</i> (French data protection authority) by the President of
the French Senate.</p>
        </li>
      </simplelist>
      <subsection id="uid28" level="2">
        <bodyTitle>Awards</bodyTitle>
        <simplelist>
          <li id="uid29">
            <p noindent="true">Guillaume <span class="smallcap" align="left">Pallez</span> was one of the recipient of the IEEE Computer Society TCHPC Early Career Researchers Award for Excellence in High Performance Computing</p>
          </li>
          <li id="uid30">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> was bestowed <i>Chevalier dans l’Ordre des Palmes Académiques</i> (Order of Academic Palms), promotion of July 2019.</p>
          </li>
        </simplelist>
      </subsection>
    </subsection>
  </highlights>
  <logiciels id="uid31">
    <bodyTitle>New Software and Platforms</bodyTitle>
    <subsection id="uid32" level="1">
      <bodyTitle>Hsplit</bodyTitle>
      <p>
        <i>Hardware communicators split</i>
      </p>
      <p><span class="smallcap" align="left">Keywords:</span> MPI communication - Topology - Hardware platform</p>
      <p><span class="smallcap" align="left">Scientific Description:</span> Hsplit is a library that implements an abstraction allowing the programmer using MPI in their parallel applications to access the underlying hardware structure through a hierarchy of communicators.
Hsplit is based on the MPI_Comm_split_type routine and provides a new value for the split_type argument that specifically creates a hierarchy a subcommunicators where each new subcommunicator corresponds to a meaningful hardware level. The important point is that only the structure o the hardware is exploited and the number of levels or the levels names are not fixed so as to propose a solution independent from future hardware evolutions (such as new levels for instance). Another flavor of this MPI_Comm_split_type function is provided that creates a roots communicators at the same time a subcommunicator is produced, in order to ease the collective communication and/or synchronization among subcommunicators.</p>
      <p><span class="smallcap" align="left">Functional Description:</span> Hsplit implements an abstraction that allows the programmer using MPI in their parallel applications to access the underlying hardware structure through a hierarchy of communicators.
Hsplit is based on the MPI_Comm_split_type routine and provides a new value for the split_type argument that specifically creates a hierarchy a subcommunicators where each new subcommunicator corresponds to a meaningful hardware level. The important point is that only the structure o the hardware is exploited and the number of levels or the levels names are not fixed so as to propose a solution independent from future hardware evolutions (such as new levels for instance). Another flavor of this MPI_Comm_split_type function is provided that creates a roots communicators at the same time a subcommunicator is produced, in order to ease the collective communication and/or synchronization among subcommunicators.</p>
      <p><span class="smallcap" align="left">News Of The Year:</span> Most of our proposal had been officially read in front of the MPI Forum at the last physical meeting in December at Albuquerque.
This concerns hte guided and the unguided mode of the split function. This now has to pass two votes in the next physical meetings
in 2020 to be part of the new version of the standard: MPI 4.0 that shall be ratified and released at the end of 2020.
Since no other MPI library currently implements the unguided mode, Hsplit will be the only sofware that is currently able to provide it.</p>
      <simplelist>
        <li id="uid33">
          <p noindent="true">Participants: Guillaume Mercier, Brice Goglin and Emmanuel Jeannot</p>
        </li>
        <li id="uid34">
          <p noindent="true">Contact: Guillaume Mercier</p>
        </li>
        <li id="uid35">
          <p noindent="true">Publications: <ref xlink:href="https://hal.inria.fr/hal-01621941" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">A hierarchical model to manage hardware topology in MPI applications</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01538002" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">A Hierarchical Model to Manage Hardware Topology in MPI Applications</ref></p>
        </li>
        <li id="uid36">
          <p noindent="true">URL: <ref xlink:href="http://mpi-topology.gforge.inria.fr/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>mpi-topology.<allowbreak/>gforge.<allowbreak/>inria.<allowbreak/>fr/</ref></p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid37" level="1">
      <bodyTitle>hwloc</bodyTitle>
      <p>
        <i>Hardware Locality</i>
      </p>
      <p><span class="smallcap" align="left">Keywords:</span> NUMA - Multicore - GPU - Affinities - Open MPI - Topology - HPC - Locality</p>
      <p><span class="smallcap" align="left">Functional Description:</span> Hardware Locality (hwloc) is a library and set of tools aiming at discovering and exposing the topology of machines, including processors, cores, threads, shared caches, NUMA memory nodes and I/O devices. It builds a widely-portable abstraction of these resources and exposes it to applications so as to help them adapt their behavior to the hardware characteristics. They may consult the hierarchy of resources, their attributes, and bind task or memory on them.</p>
      <p>hwloc targets many types of high-performance computing applications, from thread scheduling to placement of MPI processes. Most existing MPI implementations, several resource managers and task schedulers, and multiple other parallel libraries already use hwloc.</p>
      <p><span class="smallcap" align="left">News Of The Year:</span> hwloc 2.1 brought support for modern multi-die processors and memory-side caches. It also enhanced memory locality in heterogeneous memory architecture (e.g. with non-volatile memory DIMMs). The visualization of many-core platforms was also improved by factorizing objects when many of them are identical.</p>
      <simplelist>
        <li id="uid38">
          <p noindent="true">Participants: Brice Goglin and Valentin Hoyet</p>
        </li>
        <li id="uid39">
          <p noindent="true">Partners: Open MPI consortium - Intel - AMD - IBM</p>
        </li>
        <li id="uid40">
          <p noindent="true">Contact: Brice Goglin</p>
        </li>
        <li id="uid41">
          <p noindent="true">Publications: <ref xlink:href="https://hal.inria.fr/inria-00429889" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">hwloc: a Generic Framework for Managing Hardware Affinities in HPC Applications</ref> -
<ref xlink:href="https://hal.inria.fr/hal-00985096" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Managing the Topology of Heterogeneous Cluster Nodes with Hardware Locality (hwloc)</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01183083" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">A Topology-Aware Performance Monitoring Tool for Shared Resource Management in Multicore Systems</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01330194" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Exposing the Locality of Heterogeneous Memory Architectures to HPC Applications</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01400264" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Towards the Structural Modeling of the Topology of next-generation heterogeneous cluster Nodes with hwloc</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01402755" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">On the Overhead of Topology Discovery for Locality-aware Scheduling in HPC</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01644087" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Memory Footprint of Locality Information on Many-Core Platforms</ref> -
<ref xlink:href="https://hal.inria.fr/hal-02266285" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">M&amp;MMs: Navigating Complex Memory Spaces with hwloc</ref></p>
        </li>
        <li id="uid42">
          <p noindent="true">URL: <ref xlink:href="http://www.open-mpi.org/projects/hwloc/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>open-mpi.<allowbreak/>org/<allowbreak/>projects/<allowbreak/>hwloc/</ref></p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid43" level="1">
      <bodyTitle>NetLoc</bodyTitle>
      <p>
        <i>Network Locality</i>
      </p>
      <p><span class="smallcap" align="left">Keywords:</span> Topology - Locality - Distributed networks - HPC - Parallel computing - MPI communication</p>
      <p><span class="smallcap" align="left">Functional Description:</span> netloc (Network Locality) is a library that extends hwloc to network topology information by assembling hwloc knowledge of server internals within graphs of inter-node fabrics such as Infiniband, Intel OmniPath or Cray networks.</p>
      <p>Netloc builds a software representation of the entire cluster so as to help applications properly place their tasks on the nodes. It may also help communication libraries optimize their strategies according to the wires and switches.</p>
      <p>Netloc targets the same challenges as hwloc but focuses on a wider spectrum by enabling cluster-wide solutions such as process placement. It interoperates with the Scotch graph partitioner to do so.</p>
      <p>Netloc is distributed within hwloc releases starting with hwloc 2.0.</p>
      <simplelist>
        <li id="uid44">
          <p noindent="true">Participants: Brice Goglin, Clément Foyer and Cyril Bordage</p>
        </li>
        <li id="uid45">
          <p noindent="true">Contact: Brice Goglin</p>
        </li>
        <li id="uid46">
          <p noindent="true">Publications: <ref xlink:href="https://hal.inria.fr/hal-01010599" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">netloc: Towards a Comprehensive View of the HPC System Topology</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01614437" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Netloc: a Tool for Topology-Aware Process Mapping</ref></p>
        </li>
        <li id="uid47">
          <p noindent="true">URL: <ref xlink:href="http://www.open-mpi.org/projects/netloc/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>open-mpi.<allowbreak/>org/<allowbreak/>projects/<allowbreak/>netloc/</ref></p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid48" level="1">
      <bodyTitle>NewMadeleine</bodyTitle>
      <p>
        <i>NewMadeleine: An Optimizing Communication Library for High-Performance Networks</i>
      </p>
      <p><span class="smallcap" align="left">Keywords:</span> High-performance calculation - MPI communication</p>
      <p><span class="smallcap" align="left">Functional Description:</span> NewMadeleine is the fourth incarnation of the Madeleine communication library. The new architecture aims at enabling the use of a much wider range of communication flow optimization techniques. Its design is entirely modular: drivers and optimization strategies are dynamically loadable software components, allowing experimentations with multiple approaches or on multiple issues with regard to processing communication flows.</p>
      <p>The optimizing scheduler SchedOpt targets applications with irregular, multi-flow communication schemes such as found in the increasingly common application conglomerates made of multiple programming environments and coupled pieces of code, for instance. SchedOpt itself is easily extensible through the concepts of optimization strategies (what to optimize for, what the optimization goal is) expressed in terms of tactics (how to optimize to reach the optimization goal). Tactics themselves are made of basic communication flows operations such as packet merging or reordering.</p>
      <p>The communication library is fully multi-threaded through its close integration with PIOMan. It manages concurrent communication operations from multiple libraries and from multiple threads. Its MPI implementation MadMPI fully supports the MPI_THREAD_MULTIPLE multi-threading level.</p>
      <simplelist>
        <li id="uid49">
          <p noindent="true">Participants: Alexandre Denis, Clément Foyer, Nathalie Furmento, Raymond Namyst, Adrien Guilbaud, Florian Reynier and Philippe Swartvagher</p>
        </li>
        <li id="uid50">
          <p noindent="true">Contact: Alexandre Denis</p>
        </li>
        <li id="uid51">
          <p noindent="true">Publications: <ref xlink:href="https://hal.inria.fr/inria-00127356" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">NewMadeleine: a Fast Communication Scheduling Engine for High Performance Networks</ref> -
<ref xlink:href="https://hal.inria.fr/inria-00177230" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Ordonnancement et qualité de service pour réseaux rapides</ref> -
<ref xlink:href="https://hal.inria.fr/inria-00177167" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Improving Reactivity and Communication Overlap in MPI using a Generic I/O Manager</ref> -
<ref xlink:href="https://hal.inria.fr/inria-00327177" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">PIOMan : un gestionnaire d'entrées-sorties générique</ref> -
<ref xlink:href="https://hal.inria.fr/inria-00224999" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">A multithreaded communication engine for multicore architectures</ref> -
<ref xlink:href="https://hal.inria.fr/inria-00327158" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">A multicore-enabled multirail communication engine</ref> -
<ref xlink:href="https://hal.inria.fr/tel-00469488" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">About the interactions between communication and thread scheduling in clusters of multicore machines</ref> -
<ref xlink:href="https://hal.inria.fr/hal-02103700" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Scalability of the NewMadeleine Communication Library for Large Numbers of MPI Point-to-Point Requests</ref> -
<ref xlink:href="https://hal.inria.fr/inria-00381670" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">An analysis of the impact of multi-threading on communication performance</ref> -
<ref xlink:href="https://hal.inria.fr/inria-00408521" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">A scalable and generic task scheduling system for communication libraries</ref> -
<ref xlink:href="https://hal.inria.fr/hal-00793176" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">A Generic and High Performance Approach for Fault Tolerance in Communication Library</ref> -
<ref xlink:href="https://hal.inria.fr/inria-00586015" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">A High-Performance Superpipeline Protocol for InfiniBand</ref> -
<ref xlink:href="https://hal.inria.fr/inria-00605735" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">A sampling-based approach for communication libraries auto-tuning</ref> -
<ref xlink:href="https://hal.inria.fr/hal-00716478" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">High performance checksum computation for fault-tolerant MPI over InfiniBand</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01064652" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">pioman: a Generic Framework for Asynchronous Progression and Multithreaded Communications</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01087775" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">pioman: a pthread-based Multithreaded Communication Engine</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01395299" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Updating MadMPI to MPI-3: Remote Memory Access</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01587584" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Portage de StarPU sur la bibliothèque de communication NewMadeleine</ref></p>
        </li>
        <li id="uid52">
          <p noindent="true">URL: <ref xlink:href="http://pm2.gforge.inria.fr/newmadeleine/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>pm2.<allowbreak/>gforge.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>newmadeleine/</ref></p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid53" level="1">
      <bodyTitle>PaMPA</bodyTitle>
      <p>
        <i>Parallel Mesh Partitioning and Adaptation</i>
      </p>
      <p><span class="smallcap" align="left">Keywords:</span> Dynamic load balancing - Unstructured heterogeneous meshes - Parallel remeshing - Subdomain decomposition - Parallel numerical solvers</p>
      <p><span class="smallcap" align="left">Scientific Description:</span> PaMPA is a parallel library for handling, redistributing and remeshing unstructured meshes on distributed-memory architectures. PaMPA dramatically eases and speeds-up the development of parallel numerical solvers for compact schemes. It provides solver writers with a distributed mesh abstraction and an API to:
- describe unstructured and possibly heterogeneous meshes, on the form of a graph of interconnected entities of different kinds (e.g. elements, faces, edges, nodes),
- attach values to the mesh entities,
- distribute such meshes across processing elements, with an overlap of variable width,
- perform synchronous or asynchronous data exchanges of values across processing elements,
- describe numerical schemes by means of iterators over mesh entities and their connected neighbors of a given kind,
- redistribute meshes so as to balance computational load,
- perform parallel dynamic remeshing, by applying adequately a user-provided sequential remesher to relevant areas of the distributed mesh.</p>
      <p>PaMPA runs concurrently multiple sequential remeshing tasks to perform dynamic parallel remeshing and redistribution of very large unstructured meshes. E.g., it can remesh a tetrahedral mesh from 43Melements to more than 1Belements on 280 Broadwell processors in 20 minutes.</p>
      <p><span class="smallcap" align="left">Functional Description:</span> Parallel library for handling, redistributing and remeshing unstructured, heterogeneous meshes on distributed-memory architectures.
PaMPA dramatically eases and speeds-up the development of parallel numerical solvers for compact schemes.</p>
      <p><span class="smallcap" align="left">News Of The Year:</span> PaMPA has been used to remesh an industrial mesh of a helicopter turbine combustion chamber, up to more than 1 billion elements.</p>
      <simplelist>
        <li id="uid54">
          <p noindent="true">Participants: Cécile Dobrzynski, Cedric Lachat and François Pellegrini</p>
        </li>
        <li id="uid55">
          <p noindent="true">Partners: Université de Bordeaux - CNRS - IPB</p>
        </li>
        <li id="uid56">
          <p noindent="true">Contact: François Pellegrini</p>
        </li>
        <li id="uid57">
          <p noindent="true">URL: <ref xlink:href="http://project.inria.fr/pampa/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>project.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>pampa/</ref></p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid58" level="1">
      <bodyTitle>TopoMatch</bodyTitle>
      <p><span class="smallcap" align="left">Keywords:</span> Intensive parallel computing - High-Performance Computing - Hierarchical architecture - Placement</p>
      <p><span class="smallcap" align="left">Scientific Description:</span> TreeMatch embeds a set of algorithms to map processors/cores in order to minimize the communication cost of the application.</p>
      <p>Important features are : the number of processors can be greater than the number of applications processes , it assumes that the topology is a tree and does not require valuation of the topology (e.g. communication speeds) , it implements different placement algorithms that are switched according to the input size.</p>
      <p>Some core algorithms are parallel to speed-up the execution. Optionally embeds scotch for fix-vertex mapping. enable exhaustive search if required. Several metric mapping are computed. Allow for oversubscribing of ressources. multithreaded.</p>
      <p>TreeMatch is integrated into various software such as the Charm++ programming environment as well as in both major open-source MPI implementations: Open MPI and MPICH2.</p>
      <p><span class="smallcap" align="left">Functional Description:</span> TreeMatch is a library for performing process placement based on the topology of the machine and the communication pattern of the application.</p>
      <simplelist>
        <li id="uid59">
          <p noindent="true">Participants: Adele Villiermet, Emmanuel Jeannot, François Tessier, Guillaume Mercier and Pierre Celor</p>
        </li>
        <li id="uid60">
          <p noindent="true">Partners: Université de Bordeaux - CNRS - IPB</p>
        </li>
        <li id="uid61">
          <p noindent="true">Contact: Emmanuel Jeannot</p>
        </li>
        <li id="uid62">
          <p noindent="true">URL: <ref xlink:href="http://treematch.gforge.inria.fr/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>treematch.<allowbreak/>gforge.<allowbreak/>inria.<allowbreak/>fr/</ref></p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid63" level="1">
      <bodyTitle>SCOTCH</bodyTitle>
      <p><span class="smallcap" align="left">Keywords:</span> Mesh partitioning - Domain decomposition - Graph algorithmics - High-performance calculation - Sparse matrix ordering - Static mapping</p>
      <p><span class="smallcap" align="left">Functional Description:</span> Scotch is a graph partitioner. It helps optimise the division of a problem, by means of a graph, into a set of independent sub-problems of equivalent sizes. These sub-problems can also be solved in parallel.</p>
      <p><span class="smallcap" align="left">Release Functional Description:</span> Version 6.0 offers many new features:</p>
      <p>sequential graph repartitioning</p>
      <p>sequential graph partitioning with fixed vertices</p>
      <p>sequential graph repartitioning with fixed vertices</p>
      <p>new, fast, direct k-way partitioning and mapping algorithms</p>
      <p>multi-threaded, shared memory algorithms in the (formerly) sequential part of the library</p>
      <p>exposure in the API of many centralized and distributed graph handling routines</p>
      <p>embedded pseudo-random generator for improved reproducibility</p>
      <p>and even more...</p>
      <p><span class="smallcap" align="left">News Of The Year:</span> In 2019, several versions of Scotch have been released, from v6.0.7 up to v6.0.9. While they are mostly bugfix updates, several new features and API routines have been added, to increase its use by third-party software, notably routines handling target topologies. Also, code quality has been improved by the addition of many tests in the continuous integration process. A new graphical system has been developed by Amaury Jacques (Inria intern, Feb.-May 2019) to display differences in result quality across versions and builds. This system has been adopted by other Inria projects.</p>
      <simplelist>
        <li id="uid64">
          <p noindent="true">Participants: François Pellegrini, Sébastien Fourestier, Jun-Ho Her, Cédric Chevalier and Amaury Jacques</p>
        </li>
        <li id="uid65">
          <p noindent="true">Partners: Université de Bordeaux - IPB - CNRS - Region Aquitaine</p>
        </li>
        <li id="uid66">
          <p noindent="true">Contact: François Pellegrini</p>
        </li>
        <li id="uid67">
          <p noindent="true">Publications: <ref xlink:href="https://hal.inria.fr/hal-01671156" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Process Mapping onto Complex Architectures and Partitions Thereof</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01968358" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Multi-criteria Graph Partitioning with Scotch</ref> -
<ref xlink:href="https://hal.inria.fr/hal-00648735" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Adaptation au repartitionnement de graphes d'une méthode d'optimisation globale par diffusion</ref> -
<ref xlink:href="https://hal.inria.fr/tel-00540581" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Contributions au partitionnement de graphes parallèle multi-niveaux</ref> -
<ref xlink:href="https://hal.inria.fr/hal-00301427" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">A parallelisable multi-level banded diffusion scheme for computing balanced partitions with smooth boundaries</ref> -
<ref xlink:href="https://hal.inria.fr/hal-00402893" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">PT-Scotch: A tool for efficient parallel graph ordering</ref> -
<ref xlink:href="https://hal.inria.fr/tel-00410402" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Design and implementation of efficient tools for parallel partitioning and distribution of very large numerical problems</ref> -
<ref xlink:href="https://hal.inria.fr/hal-00402946" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Improvement of the Efficiency of Genetic Algorithms for Scalable Parallel Graph Partitioning in a Multi-Level Framework</ref> -
<ref xlink:href="https://hal.inria.fr/hal-00410408" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">PT-Scotch : Un outil pour la renumérotation parallèle efficace de grands graphes dans un contexte multi-niveaux</ref> -
<ref xlink:href="https://hal.inria.fr/hal-00410427" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">PT-Scotch: A tool for efficient parallel graph ordering</ref></p>
        </li>
        <li id="uid68">
          <p noindent="true">URL: <ref xlink:href="http://www.labri.fr/~pelegrin/scotch/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>labri.<allowbreak/>fr/<allowbreak/>~pelegrin/<allowbreak/>scotch/</ref></p>
        </li>
      </simplelist>
    </subsection>
    <subsection id="uid69" level="1">
      <bodyTitle>disk-revolve</bodyTitle>
      <p><span class="smallcap" align="left">Keywords:</span> Automatic differentiation - Gradients - Machine learning</p>
      <p><span class="smallcap" align="left">Functional Description:</span> This software provides several algorithms (Disk-Revolve, 1D-Revolve, Periodic-Disk-Revolve,...) computing the optimal checkpointing strategy when executing a adjoin chain with limited memory.
The considered architecture has a level of limited memory that is free to access (writing and reading costs are negligible) and a level of unlimited memory with non-negligible access costs.
The algorithms describe which data should be saved in the memory to minimize the number of re-computation during the execution.</p>
      <simplelist>
        <li id="uid70">
          <p noindent="true">Authors: Guillaume Aupy and Julien Herrmann</p>
        </li>
        <li id="uid71">
          <p noindent="true">Contact: Julien Herrmann</p>
        </li>
        <li id="uid72">
          <p noindent="true">Publications: <ref xlink:href="https://hal.inria.fr/hal-02080706" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">H-Revolve: A Framework for Adjoint Computation on Synchrone Hierarchical Platforms</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01654632" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Periodicity in optimal hierarchical checkpointing schemes for adjoint computations</ref> -
<ref xlink:href="https://hal.inria.fr/hal-01354902" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Optimal Multistage Algorithm for Adjoint Computation</ref></p>
        </li>
        <li id="uid73">
          <p noindent="true">URL: <ref xlink:href="https://gitlab.inria.fr/adjoint-computation/disk-revolve-public" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>gitlab.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>adjoint-computation/<allowbreak/>disk-revolve-public</ref></p>
        </li>
      </simplelist>
    </subsection>
  </logiciels>
  <resultats id="uid74">
    <bodyTitle>New Results</bodyTitle>
    <subsection id="uid75" level="1">
      <bodyTitle>Management of heterogeneous and non-volatile memories in HPC</bodyTitle>
      <p>The emergence of non-volatile memory that may be used either as fast
storage or slow high-capacity memory brings many opportunities for
application developers.</p>
      <p>We studied the impact of those new technologies on the allocation of
resources in HPC platforms.
We showed that co-scheduling HPC applications will possibly
different needs in term of storage and memories brings constraints
of the way non-volatile memory should be exposed by the hardware and
operating system to bring both flexibility and performance. <ref xlink:href="#tadaam-2019-bid0" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
      <p>We also worked with Lawrence Livermore National Lab to propose an
API to help application choose between the different kinds of
available memory (high-bandwidth (HBM), normal (DDR), slow
(non-volatile)).
We exposed several useful criteria for selecting target memories
as well as ways to rank them. <ref xlink:href="#tadaam-2019-bid1" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>
</p>
    </subsection>
    <subsection id="uid76" level="1">
      <bodyTitle>Modeling and Visualizing Many-core HPC Platforms</bodyTitle>
      <p>As the number of cores keeps increasing inside processors, new kinds
of hierarchy are added to organize and interconnect them.
We worked with Intel to leverage new groups of cores such as
<i>Dies</i> in newest Xeon Advanced Performance models.
We also designed ways to clarify the modeling and visualisation of
those many cores by factorizing identical parts of the platforms.
</p>
    </subsection>
    <subsection id="uid77" level="1">
      <bodyTitle>Co-scheduling HPC workloads on cache-partitioned CMP platforms</bodyTitle>
      <p>Co-scheduling techniques are used to improve the throughput of
applications on chip multiprocessors (CMP), but sharing resources
often generates critical interferences.</p>
      <p>In collaboration with ENS Lyon and Georgia Tech, we looked at the
interferences in the last level of cache (LLC) and use the
<i>Cache Allocation Technology</i> (CAT) recently provided by Intel
to partition the LLC and give each co-scheduled application their
own cache area.</p>
      <p>We considered <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>m</mi></math></formula> iterative HPC applications running concurrently
and answer the following questions:
(i) how to precisely model the behavior of these applications on the
cache partitioned platform?
and (ii) how many cores and cache fractions should be assigned to
each application to maximize the platform efficiency? Here, platform
efficiency is defined as maximizing the performance either globally,
or as guaranteeing a fixed ratio of iterations per second for each
application.
Through extensive experiments using CAT, we demonstrated the impact
of cache partitioning when multiple HPC application are co-scheduled
onto CMP platforms. <ref xlink:href="#tadaam-2019-bid2" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>
</p>
    </subsection>
    <subsection id="uid78" level="1">
      <bodyTitle>Modeling High-throughput Applications for in situ Analytics</bodyTitle>
      <p>In this work <ref xlink:href="#tadaam-2019-bid3" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we proposed to model HPC applications
in the framework of in situ analytics.
Typically, an HPC application is composed of a simulation tasks (data and
compute intensive), and a set of analysis tasks that post-process the data.
Currently, the performance of the I/O system in HPC platform prohibits the
storage of all simulation data to process analysis post-mortem.
Hence, in situ framework proposes to treat the data "on the fly", directly
where it is produced.
Hence, it leverages the amount of data to store as we only keep the result
of analytics phase.
However, simulation and analysis have to be scheduled in parallel and
compete for shared resources. It generates resource conflicts and can
lead to severe performance degradation for the simulation.</p>
      <p>Hence, we proposed to model both platform (number of nodes and cores, memory, etc)
and application (profile of each tasks) in order to optimize the execution
of such applications.
We propose a resource partitioning model that affects computational
resources to the different tasks, as so as a scheduling of those tasks
in order to maximize resource usage and minimize total application makespan.
Tasks are assumed to be fully parallel to solve the partitioning problem.</p>
      <p>We evaluated different scheduling heuristics combined to the resource
partitioning model and show important features that influence in situ analytics performance.</p>
      <p>This work is done in collaboration with Bruno <span class="smallcap" align="left">Raffin</span> from Inria team DATAMOVE of Inria Grenoble.
</p>
    </subsection>
    <subsection id="uid79" level="1">
      <bodyTitle>Modeling Non-Uniform Memory Access and Heterogeneous Memories on Large Compute Nodes with the Cache-Aware Roofline Model</bodyTitle>
      <p>The trend of increasing the number of cores on-chip is enlarging the gap between compute power and memory performance.
This issue leads to design systems with heterogeneous memories, creating new challenges for data locality.
Before the release of those memory architectures, the Cache-Aware Roofline Model  <ref xlink:href="#tadaam-2019-bid4" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> (CARM) offered an
insightful model and methodology to improve application performance with knowledge of the cache memory subsystem.</p>
      <p>With the help of the <span class="smallcap" align="left">hwloc</span> library, we are able to leverage the machine topology to extend the CARM for modeling NUMA and
heterogeneous memory systems, by evaluating the memory bandwidths between all combinations of cores and NUMA nodes.
The new Locality Aware Roofline Model <ref xlink:href="#tadaam-2019-bid5" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> (LARM) scopes most contemporary types of large
compute nodes and characterizes three bottlenecks typical of those systems, namely contention, congestion and remote access.
We also designed a hybrid memory bandwidth model to better estimate
the roof when heterogeneous memories are involved or when read and
write bandwidths differ.</p>
      <p>We also developed an hybrid bandwidth model that combines the performance of different memories
and their respective read/write bandwidth with the application memory access pattern to predict
the performance of these accesses on heterogeneous memory platforms.</p>
      <p>This work has been achieved in collaboration with the authors of the CARM from University of Lisbon.
</p>
    </subsection>
    <subsection id="uid80" level="1">
      <bodyTitle>Statistical Learning for Task and Data Placement in NUMA Architecture</bodyTitle>
      <p>Achieving high performance for multi-threaded application requires both a careful placement
of threads on computing units and a thorough allocation of data in memory.
Finding such a placement is a hard problem to solve, because performance depends on
complex interactions in several layers of the memory hierarchy.</p>
      <p>We proposed a black-box approach to decide if an application execution time can be impacted
by the placement of its threads and data, and in such a case, to choose the best placement
strategy to adopt <ref xlink:href="#tadaam-2019-bid6" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.
We show that it is possible to reach near-optimal placement policy selection by looking at
hardware performance counters, and at counters obtained from application instrumentation.
Furthermore, solutions work across several recent processor architectures (from Haswell to Skylake),
across several applications, and decisions can be taken with a single run of low overhead profiling.</p>
      <p>This work has been achieved in collaboration with Thomas <span class="smallcap" align="left">Ropars</span> from University of Grenoble.
</p>
    </subsection>
    <subsection id="uid81" level="1">
      <bodyTitle>On-the-fly scheduling vs. reservation-based scheduling for unpredictable workflows</bodyTitle>
      <p>Scientific insights in the coming decade will clearly depend on the effective
processing of large datasets generated by dynamic heterogeneous applications
typical of workflows in large data centers or of emerging fields like
neuroscience. In this work <ref xlink:href="#tadaam-2019-bid7" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we show how these big data workflows have a unique
set of characteristics that pose challenges for leveraging HPC methodologies,
particularly in scheduling. Our findings indicate that execution times for these
workflows are highly unpredictable and are not correlated with the size of the
dataset involved or the precise functions used in the analysis. We characterize
this inherent variability and sketch the need for new scheduling approaches by
quantifying significant gaps in achievable performance. Through simulations, we
show how on-the-fly scheduling approaches can deliver benefits in both
system-level and user-level performance measures. On average, we find
improvements of up to 35% in system utilization and up to 45% in average
stretch of the applications, illustrating the potential of increasing
performance through new scheduling approaches.</p>
    </subsection>
    <subsection id="uid82" level="1">
      <bodyTitle>Scheduling strategies for stochastic jobs</bodyTitle>
      <p>Following the observations of made in <ref xlink:href="#uid81" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we studied stochastic
jobs (coming from neuroscience applications) which we want to schedule on a
reservation-based platform (e.g. cloud, HPC).</p>
      <p>The execution time of jobs is modeled using a (known) probability distribution.
The platform to run the job is reservation-based, meaning that the user has to
request fixed-length time slots for its job to be executed. The aim of this
project is to study efficient strategies of reservation for an user given the
cost associated to the machine. These reservations are all paid until a job is
finally executed.</p>
      <p>As a first step we derived efficient strategies without any additional
asumptions <ref xlink:href="#tadaam-2019-bid8" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. This allowed us to set up properly the
problem. These strategies were general enough that they could take as input any
probability distributions, and performed better than any more natural
strategies.
Then we extended our strategies by including checkpoint/restart to well-chosen
reservations in order to avoid wasting the benefits of work during underestimated
reservations <ref xlink:href="#tadaam-2019-bid9" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.
We were able to develop a fully polynomial-time
approximation for continuous distribution of job execution time whose
performance we then experimentally studied.</p>
      <p>The final works of this project focused on the case without checkpointing: we
studied experimentally how the strategies developed in <ref xlink:href="#tadaam-2019-bid8" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>
would perform in a parallel setup and showed that they improve both system
utilization and job response time.
Finally we started to study the robustness of such solutions when the job
distributions were not perfectly known <ref xlink:href="#tadaam-2019-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> and observed
that the performance were still correct even with a very low quantity of
information.</p>
    </subsection>
    <subsection id="uid83" level="1">
      <bodyTitle>Online Prediction of Network Utilization</bodyTitle>
      <p>Stealing network bandwidth helps a variety of HPC runtimes and services to run
additional operations in the background without negatively affecting the
applications. A key ingredient to make this possible is an accurate prediction
of the future network utilization, enabling the runtime to plan the background
op- erations in advance, such as to avoid competing with the application for
network bandwidth. In this work <ref xlink:href="#tadaam-2019-bid11" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we have proposed a portable deep learning
predictor that only uses the information available through <span class="smallcap" align="left">MPI</span> introspection
to construct a recurrent sequence-to-sequence neural network capable of
forecasting network utilization. We leverage the fact that most HPC
applications exhibit periodic behaviors to enable predictions far into the
future (at least the length of a period). Our online approach does not have
an initial training phase, it continuously improves itself during application
execution without incurring significant computational overhead. Experimental
results show better accuracy and lower computational overhead compared with
the state-of-the-art on two representative applications.
</p>
    </subsection>
    <subsection id="uid84" level="1">
      <bodyTitle>An Introspection Monitoring Library</bodyTitle>
      <p>In this work <ref xlink:href="#tadaam-2019-bid12" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> we have described how to improve communication time of <span class="smallcap" align="left">MPI</span> parallel
applications with the use of a library that enables to monitor <span class="smallcap" align="left">MPI</span> applications and allows for introspection (the program itself can query the
state of the monitoring system). Based on previous work, this library is able
to see how collective communications are decomposed into point-to-point
messages. It also features monitoring sessions that allow suspending and
restarting the monitoring, limiting it to specific portions of the
code. Experiments show that the monitoring overhead is very small and that the
proposed features allow for dynamic and efficient rank reordering enabling up
to 2-time reduction of communication parts of some program.
</p>
    </subsection>
    <subsection id="uid85" level="1">
      <bodyTitle>Tag matching in constant time</bodyTitle>
      <p>Tag matching is the operation, inside an <span class="smallcap" align="left">MPI</span> library, of pairing a
packet arriving from the network, with its corresponding receive
request posted by the user. This operation is not straightforward
given that matching criterions are the communicator, the source of
the message, a user-supplied tag, and since there are wildcards for
tag and source. State of the art algorithms are linear with the
number of pending packets and requests, or don't support wildcards.</p>
      <p>We proposed <ref xlink:href="#tadaam-2019-bid13" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> an algorithm that is able
perform the matching operation in constant time, in all cases, even
with wildcard requests. We implemented the algorithm in our
<span class="smallcap" align="left">NewMadeleine</span> communication library, and demonstrated it actually
improves performance of Cholesky factorization with <span class="smallcap" align="left">Chameleon</span> running on top of <span class="smallcap" align="left">StarPU</span>.
</p>
    </subsection>
    <subsection id="uid86" level="1">
      <bodyTitle>Dynamic broadcasts in <span class="smallcap" align="left">StarPU</span>/<span class="smallcap" align="left">NewMadeleine</span></bodyTitle>
      <p>We worked on the improvement of broadcast performance in <span class="smallcap" align="left">StarPU</span> runtime
with <span class="smallcap" align="left">NewMadeleine</span>. Although <span class="smallcap" align="left">StarPU</span> supports <span class="smallcap" align="left">MPI</span>, its distributed and
asynchronous model to schedule tasks makes it impossible to use <span class="smallcap" align="left">MPI</span> optimized routines, such as <tt>MPI_Bcast</tt>. Indeed these functions
need that all nodes participating in the collective are synchronized and
know each others, which makes it unusable in practice for <span class="smallcap" align="left">StarPU</span>.</p>
      <p>We proposed <ref xlink:href="#tadaam-2019-bid14" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, a dynamic broadcast
algorithm that runs without synchronization among participants, and
where only the root node needs to know the others. Recipient don't
even have to know whether the message will arrive as a plain
send/receive or through a dynamic broadcast, which allows for a
seamless integration in <span class="smallcap" align="left">StarPU</span>. We implemented the algorithm in our
<span class="smallcap" align="left">NewMadeleine</span> communication library, leveraging its event-based
paradigm and background progression of communications. Preliminary
experiments using Cholesky factorization from the <span class="smallcap" align="left">Chameleon</span> library
show a sensible performance improvement.
</p>
    </subsection>
    <subsection id="uid87" level="1">
      <bodyTitle>Task based asynchronous <span class="smallcap" align="left">MPI</span> collectives optimisation</bodyTitle>
      <p>Asynchronous collectives are more complex than plain non-blocking
point-to-point communications. They need specific mechanisms for
progression. Task based progression is a good way to improve the
performance of applications with overlap.</p>
      <p>We worked on a benchmarking tool <ref xlink:href="#tadaam-2019-bid15" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>
measuring specific collective overlapping, taking into account time
shift between different nodes. Using this tool, we were able to
experiment with different task execution policies in the
<span class="smallcap" align="left">NewMadeleine</span> communication library.</p>
      <p>We propose a progression policy consisting of a dedicated a core for
progression tasks; modern processors have more and more cores, so it
is profitable on that kind of processors. The only function of this
core is to progress communications, so we use a particularly
aggressive algorithm for this progression.
</p>
    </subsection>
    <subsection id="uid88" level="1">
      <bodyTitle>Dynamic placement of progress
thread for overlapping <span class="smallcap" align="left">MPI</span> non-blocking collectives on manycore
processor</bodyTitle>
      <p>To amortize the cost of <span class="smallcap" align="left">MPI</span> collective operations, non-blocking
collectives have been proposed so as to allow communications to be
overlapped with computation. Unfortunately, collective
communications are more CPU-hungry than point-to-point
communications and running them in a communication thread on a
single dedicated CPU core makes them slow. On the other hand,
running collective communications on the application cores leads to
no overlap. To address these issues, we
proposed <ref xlink:href="#tadaam-2019-bid16" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>
an algorithm for tree-based collective operations that splits the
tree between communication cores and application cores. To get the
best of both worlds, the algorithm runs the short but heavy part of
the tree on application cores, and the long but narrow part of the
tree on one or several communication cores, so as to get a trade-off
between overlap and absolute performance. We provided a model to
study and predict its behavior and to tune its parameters. We
implemented it in the MPC framework, which is a thread-based <span class="smallcap" align="left">MPI</span> implementation. We have run benchmarks on manycore processors such
as the KNL and Skylake and got good results both in terms of
performance and overlap.
</p>
    </subsection>
    <subsection id="uid89" level="1">
      <bodyTitle>Dynamic placement of Hybrid <span class="smallcap" align="left">MPI</span> +X
coupled applications</bodyTitle>
      <p>We continued our collaboration with CERFACS in order to propose the <span class="smallcap" align="left">Hippo</span> software
that addresses the issue of dynamic placement of computing kernels that feature
each their own placement/mapping/binding policy of <span class="smallcap" align="left">MPI</span> processes and OpenMP threads.
In such a case, enforcing a global placement policy for the whole application composed
of several such kernels may be detrimental to the overall performance.
<span class="smallcap" align="left">Hippo</span> (based on our <span class="smallcap" align="left">Hsplit</span> library and the <span class="smallcap" align="left">hwloc</span> software) is able to make the selection
of the relevent resource on which some master <span class="smallcap" align="left">MPI</span> processes are going to execute and
spawn OpenMP parallel sections while the remaining <span class="smallcap" align="left">MPI</span> processes are put in a
“quiescence” state. <span class="smallcap" align="left">Hippo</span> is currently at the prototype stage and the interface and
the set of provided functionnalities need some refinement, however, preliminary results
are very encouraging, especially on climate modelling applications from Météo France.
</p>
    </subsection>
    <subsection id="uid90" level="1">
      <bodyTitle>Scheduling on Two Unbounded Resources with Communication Costs</bodyTitle>
      <p>Heterogeneous computing systems are popular and powerful platforms, containing several heterogeneous computing elements (e.g. CPU+GPU). In <ref xlink:href="#tadaam-2019-bid17" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we consider a platform with two types of machines , each containing an unbounded number of elements. We want to execute an application represented as a Directed Acyclic Graph (DAG) on this platform. Each task of the application has two possible execution times, depending on the type of machine it is executed on. In addition we consider a cost to transfer data from one platform to the other between successive tasks. We aim at minimizing the execution time of the DAG (also called makespan). We show that the problem is NP-complete for graphs of depth at least three but polynomial for graphs of depth at most two. In addition, we provide polynomial-time algorithms for some usual classes of graphs (trees, series-parallel graphs).
</p>
    </subsection>
    <subsection id="uid91" level="1">
      <bodyTitle>H-Revolve: A Framework for Adjoint Computation on Synchrone Hierarchical Platforms</bodyTitle>
      <p>In this work <ref xlink:href="#tadaam-2019-bid18" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we study the problem of checkpointing strategies for adjoint
computation on synchrone hierarchical platforms. Specifically we consider
computational platforms with several levels of storage with different writing
and reading costs. When reversing a large adjoint chain, choosing which data to
checkpoint and where is a critical decision for the overall performance of the
computation. We introduce H-Revolve, an optimal algorithm for this problem. We
make it available in a public Python library along with the implementation of
several state-of-the-art algorithms for the variant of the problem with two
levels of storage. We provide a detailed description of how one can use this
library in an adjoint computation software in the field of automatic
differentiation or backpropagation. Finally, we evaluate the performance of
H-Revolve and other checkpointing heuristics though an extensive campaign of
simulation.
</p>
    </subsection>
    <subsection id="uid92" level="1">
      <bodyTitle>Sizing and Partitioning Strategies for Burst-Buffers to Reduce IO Contention</bodyTitle>
      <p>Burst-Buffers are high throughput and small size storage which are being used as
an intermediate storage between the PFS (Parallel File System) and the
computational nodes of modern HPC systems. They can allow to hinder to
contention to the PFS, a shared resource whose read and write performance
increase slower than processing power in HPC systems. A second usage is to
accelerate data transfers and to hide the latency to the PFS. In this work <ref xlink:href="#tadaam-2019-bid19" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we
concentrate on the first usage. We propose a model for Burst-Buffers and
application transfers. We consider the problem of dimensioning and sharing the
Burst-Buffers between several applications. This dimensioning can be done either
dynamically or statically. The dynamic allocation considers that any application
can use any available portion of the Burst-Buffers. The static allocation
considers that when a new application enters the system, it is assigned some
portion of the Burst-Buffers, which cannot be used by the other applications
until that application leaves the system and its data is purged from it. We show
that the general sharing problem to guarantee fair performance for all
applications is an NP-Complete problem. We propose a polynomial time algorithms
for the special case of finding the optimal buffer size such that no application
is slowed down due to PFS contention, both in the static and dynamic cases.
Finally, we provide evaluations of our algorithms in realistic settings. We use
those to discuss how to minimize the overhead of the static allocation of
buffers compared to the dynamic allocation.
</p>
    </subsection>
    <subsection id="uid93" level="1">
      <bodyTitle>Optimal Memory-aware Backpropagation of Deep Join Networks</bodyTitle>
      <p>Deep Learning training memory needs can prevent the user to consider large
models and large batch sizes. In our work <ref xlink:href="#tadaam-2019-bid20" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> (extended
version <ref xlink:href="#tadaam-2019-bid21" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>), we propose to use techniques from
memory-aware scheduling and Automatic Differentiation (AD) to execute a
backpropagation graph with a bounded memory requirement at the cost of extra
recomputations. The case of a single homogeneous chain, i.e. the case of a
network whose all stages are identical and form a chain, is well understood and
optimal solutions have been proposed in the AD literature. The networks
encountered in practice in the context of Deep Learning are much more diverse,
both in terms of shape and heterogeneity. In this work, we define the class of
backpropagation graphs, and extend those on which one can compute in polynomial
time a solution that minimizes the total number of recomputations. In particular
we consider join graphs which correspond to models such as Siamese or Cross
Modal Networks.
</p>
    </subsection>
    <subsection id="uid94" level="1">
      <bodyTitle>Optimal checkpointing for heterogeneous chains: how to train deep neural networks with limited memory </bodyTitle>
      <p>This work <ref xlink:href="#tadaam-2019-bid22" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> introduces a new activation checkpointing method which allows to significantly decrease memory usage when training Deep Neural Networks with the back-propagation algorithm. Similarly to checkpoint-ing techniques coming from the literature on Automatic Differentiation, it consists in dynamically selecting the forward activations that are saved during the training phase, and then automatically recomputing missing activations from those previously recorded. We propose an original computation model that combines two types of activation savings: either only storing the layer inputs, or recording the complete history of operations that produced the outputs (this uses more memory, but requires fewer recomputations in the backward phase), and we provide an algorithm to compute the optimal computation sequence for this model. This paper also describes a PyTorch implementation that processes the entire chain, dealing with any sequential DNN whose internal layers may be arbitrarily complex and automatically executing it according to the optimal checkpointing strategy computed given a memory limit. Through extensive experiments, we show that our implementation consistently outperforms existing checkpoint-ing approaches for a large class of networks, image sizes and batch sizes.
</p>
    </subsection>
    <subsection id="uid95" level="1">
      <bodyTitle>I/O scheduling strategy for HPC applications</bodyTitle>
      <p>With the ever-growing need of data in HPC applications, the congestion at the
I/O level becomes critical in supercomputers. Architectural enhancement such as
burst buffers and pre-fetching are added to machines, but are not sufficient to
prevent congestion. Recent online I/O scheduling strategies have been put in
place, but they add an additional congestion point and overheads in the
computation of applications.</p>
      <p>In this project, we studied application pattern (such as periodicity), in order
to develop efficient scheduling
strategies <ref xlink:href="#tadaam-2019-bid23" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, <ref xlink:href="#tadaam-2019-bid24" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> for their I/O
transfers.</p>
    </subsection>
    <subsection id="uid96" level="1">
      <bodyTitle>A New Framework for Evaluating Straggler Detection Mechanisms in MapReduce</bodyTitle>
      <p>In this work <ref xlink:href="#tadaam-2019-bid25" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> we present a new framework for evaluating
straggler detection mechanisms in MapReduce. We then show how to use it
efficiently.
</p>
    </subsection>
    <subsection id="uid97" level="1">
      <bodyTitle>Clarification of the <span class="smallcap" align="left">MPI</span> semantics</bodyTitle>
      <p>In the framework of the <span class="smallcap" align="left">MPI</span> Forum, we have been involved in several active working groups,
in particular the “Terms and Conventions” Working Group. The work carried out in this
group has lead to a timely study and proposed clarifications, revisions, and enhancements
to the Message Passing Interface's (<span class="smallcap" align="left">MPI</span>'s) Semantic Terms and Conventions. To enhance <span class="smallcap" align="left">MPI</span>,
a clearer understanding of the meaning of the key terminology has proven essential, and,
surprisingly, important concepts remain underspecified, ambiguous and, in some cases,
inconsistent and/or conflicting despite 26 years of standardization.
This work <ref xlink:href="#tadaam-2019-bid26" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> addresses these concerns comprehensively and usefully informs <span class="smallcap" align="left">MPI</span> developers, implementors, those teaching and learning <span class="smallcap" align="left">MPI</span>, and power users alike about
key aspects of existing conventions, syntax, and semantics. This work will also be a useful
driver for great clarity in current and future standardization and implementation efforts for <span class="smallcap" align="left">MPI</span>.
</p>
    </subsection>
    <subsection id="uid98" level="1">
      <bodyTitle>Adaptive Request Scheduling for the I/O Forwarding Layer using Reinforcement Learning</bodyTitle>
      <p>I/O optimization techniques such as request scheduling can improve performance mainly for the access patterns they target, or they depend on the precise tune of parameters. In this work <ref xlink:href="#tadaam-2019-bid27" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we propose an approach to adapt the I/O forwarding layer of HPC systems to the application access patterns by tuning a request scheduler. Our case study is the TWINS scheduling algorithm, where performance improvements depend on the time window parameter, which depends on the current workload. Our approach uses a reinforcement learning technique — contextual bandits — to make the system capable of learning the best parameter value to each access pattern during its execution, without a previous training phase. We evaluate our proposal and demonstrate it can achieve a precision of <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>88</mn><mo>%</mo></mrow></math></formula> on the parameter selection in the first hundreds of observations of an access pattern. After having observed an access pattern for a few minutes (not necessarily contiguously), we demonstrate that the system will be able to optimize its performance for the rest of the life of the system (years).
</p>
    </subsection>
  </resultats>
  <contrats id="uid99">
    <bodyTitle>Bilateral Contracts and Grants with Industry</bodyTitle>
    <subsection id="uid100" level="1">
      <bodyTitle>Bilateral Grants with Industry</bodyTitle>
      <subsection id="cid1" level="2">
        <bodyTitle>Intel</bodyTitle>
        <p><span class="smallcap" align="left">Intel</span> granted $30k and provided information about future many-core
platforms and memory architectures to ease the design and development
of the <span class="smallcap" align="left">hwloc</span> software with early support for next generation hardware.</p>
      </subsection>
      <subsection id="cid2" level="2">
        <bodyTitle>EDF</bodyTitle>
        <p>With Yvan Fournier from EDF R&amp;D, we co-advise the PhD thesis of Benjamin
Lorendeau under a CIFRE funding.</p>
      </subsection>
      <subsection id="cid3" level="2">
        <bodyTitle>CEA</bodyTitle>
        <p>CEA/DAM granted the CIFRE PhD thesis of Florian Reynier on non-blocking
<span class="smallcap" align="left">MPI</span> collectives.
</p>
      </subsection>
    </subsection>
  </contrats>
  <partenariat id="uid101">
    <bodyTitle>Partnerships and Cooperations</bodyTitle>
    <subsection id="uid102" level="1">
      <bodyTitle>Regional Initiatives</bodyTitle>
      <subsection id="cid4" level="2">
        <bodyTitle>CRA HPC Scalable Ecosystem, 2018-2021</bodyTitle>
        <sanspuceslist>
          <li id="uid103">
            <p noindent="true">2018 - 2021 (36 months)</p>
          </li>
          <li id="uid104">
            <p noindent="true">Coordinator: Emmanuel <span class="smallcap" align="left">Agullo</span></p>
          </li>
          <li id="uid105">
            <p noindent="true">Other partners: INRA, Institut Pprime, UPPA, Airbus, CEA, CATIE</p>
          </li>
          <li id="uid106">
            <p noindent="true">Abstract: The goal is to design a unified runtime-system for numerical
simulation at large-scale and with a large amount of data. We aim at
contributing significantly to the convergence between HPC and BigData.
<span class="smallcap" align="left">TADaaM</span> is involved in scheduling data access and managing
communication efficiently on large-scale system.</p>
          </li>
        </sanspuceslist>
      </subsection>
    </subsection>
    <subsection id="uid107" level="1">
      <bodyTitle>National Initiatives</bodyTitle>
      <subsection id="cid5" level="2">
        <bodyTitle>ANR</bodyTitle>
        <p><i>ANR SATAS</i> SAT as a Service (<ref xlink:href="http://www.agence-nationale-recherche.fr/Project-ANR-15-CE40-0017" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>agence-nationale-recherche.<allowbreak/>fr/<allowbreak/>Project-ANR-15-CE40-0017</ref>).</p>
        <sanspuceslist>
          <li id="uid108">
            <p noindent="true">AP générique 2015, 01/2016 - 12/2019 (48 months)</p>
          </li>
          <li id="uid109">
            <p noindent="true">Coordinator: Laurent Simon (LaBRI)</p>
          </li>
          <li id="uid110">
            <p noindent="true">Other partners: CRIL (Univ. Artois), Inria Lille (Spirals)</p>
          </li>
          <li id="uid111">
            <p noindent="true">Abstract:
The SATAS project aims to advance the state of the art in massively
parallel SAT solving. The final goal of the project is to provide a
“pay as you go” interface to SAT solving services and will extend
the reach of SAT solving technologies, daily used in many critical and
industrial applications, to new application areas, which were
previously considered too hard, and lower the cost of deploying
massively parallel SAT solvers on the cloud.</p>
          </li>
        </sanspuceslist>
        <p><i>ANR DASH</i> Data-Aware Scheduling at Higher scale
(<ref xlink:href="https://project.inria.fr/dash/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>project.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>dash/</ref>).</p>
        <sanspuceslist>
          <li id="uid112">
            <p noindent="true">AP générique JCJC 2017, 03/2018 - 02/2022 (48 months)</p>
          </li>
          <li id="uid113">
            <p noindent="true">Coordinator: Guillaume <span class="smallcap" align="left">Pallez</span> (Tadaam)</p>
          </li>
          <li id="uid114">
            <p noindent="true">Abstract: This project focuses on the effecient execution of I/O for
High-Performance applications. The idea is to take into account some knowledge
on the behavior of the different I/O steps to compute efficient schedules, and
to update them dynamically with the online information.</p>
          </li>
        </sanspuceslist>
        <p><i>ANR Solharis</i> SOLvers for Heterogeneous Architectures
over Runtime systems, Investigating Scalability
.</p>
        <sanspuceslist>
          <li id="uid115">
            <p noindent="true">AAPG ANR 2019, 2019 - 2023 (48 months)</p>
          </li>
          <li id="uid116">
            <p noindent="true">Coordinator: Alfredo <span class="smallcap" align="left">Buttari</span> (IRIT-INPT)</p>
          </li>
          <li id="uid117">
            <p noindent="true">Abstract: The Solharis project aims at producing scalable
methods for the solution of large sparse linear systems on large
heterogeneous supercomputers, using the <span class="smallcap" align="left">StarPU</span> runtime system, and
to address the scalability issues both in runtime systems and in
solvers.</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="cid6" level="2">
        <bodyTitle>ADT - Inria Technological Development Actions</bodyTitle>
        <p>
          <i>ADT Gordon</i>
        </p>
        <sanspuceslist>
          <li id="uid118">
            <p noindent="true">10/2018 - 09/2020 (24 months)</p>
          </li>
          <li id="uid119">
            <p noindent="true">Coordinator: Emmanuel <span class="smallcap" align="left">Jeannot</span></p>
          </li>
          <li id="uid120">
            <p noindent="true">Other partners: Storm, HiePACS, PLEIADE (Inria Bordeaux)</p>
          </li>
          <li id="uid121">
            <p noindent="true">Abstract:
Teams HiePACS, Storm and Tadaam develop each a brick of an HPC
software stack, namely solver, runtime, and communication library. The
goal of the Gordon project is to consolidate the HPC stack, to
improve interfaces between each brick, and to target a better
scalability. The bioinformatics application involved in the project
has been selected so as to stress the underlying systems.</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="cid7" level="2">
        <bodyTitle>IPL - Inria Project Lab</bodyTitle>
        <p>High-Performance computing and BigData</p>
        <sanspuceslist>
          <li id="uid122">
            <p noindent="true"><b>Participants:</b> Guillaume Pallez, Emmanuel Jeannot, Nicolas Vidal,
Francieli Zanon-Boito</p>
          </li>
          <li id="uid123">
            <p noindent="true">HPC and Big Data evolved with their own infrastructures (supercomputers
versus clouds), applications (scientific simulations versus data analytics)
and software tools (<span class="smallcap" align="left">MPI</span> and OpenMP versus Map/Reduce or Deep Learning
frameworks). But Big Data analytics is becoming more compute-intensive (thanks
to deep learning), while data handling is becoming a major concern for
scientific computing. The goal of this HPC-BigData IPL is to gather teams from
the HPC, Big Data and Machine Learning (ML) areas to work at the intersection
between these domains. Research is organized along three main axes: high
performance analytics for scientific computing applications, high performance
analytics for big data applications, infrastructure and resource management</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="cid8" level="2">
        <bodyTitle>Collaboration with CERFACS</bodyTitle>
        <p>Developments on the <span class="smallcap" align="left">Hippo</span> software</p>
        <sanspuceslist>
          <li id="uid124">
            <p noindent="true"><b>Participants:</b> Brice Goglin, Guillaume Mercier</p>
          </li>
          <li id="uid125">
            <p noindent="true">A Memorandum of Understanding is currently being negociated between Inria
and CERFACS to organize the collaboration between both entities pertaining to
the developements on the <span class="smallcap" align="left">Hippo</span> software. The goal is to provide a portable
solution to address the issue of dynamic placement of hybrid coupled <span class="smallcap" align="left">MPI</span> + OpenMP
applications, especially for climate modelling. Météo France is one of the targer
of this work but other teams/institutes around the globe have expressed an interest
in <span class="smallcap" align="left">Hippo</span>. Therefore we want to create a solution that would match the needs
of the community on the whole.</p>
          </li>
        </sanspuceslist>
      </subsection>
    </subsection>
    <subsection id="uid126" level="1">
      <bodyTitle>European Initiatives</bodyTitle>
      <subsection id="uid127" level="2">
        <bodyTitle>Collaborations with Major European Organizations</bodyTitle>
        <sanspuceslist>
          <li id="uid128">
            <p noindent="true">Partner 1: INESC-ID, Lisbon, (Portugal)</p>
          </li>
          <li id="uid129">
            <p noindent="true">Subject 1: Application modeling for hierarchical memory system</p>
          </li>
          <li id="uid130">
            <p noindent="true">Partner 2: University Carlos III de Madrid, (Spain)</p>
          </li>
          <li id="uid131">
            <p noindent="true">Subject 2: I/O Scheduling</p>
          </li>
        </sanspuceslist>
      </subsection>
    </subsection>
    <subsection id="uid132" level="1">
      <bodyTitle>International Initiatives</bodyTitle>
      <subsection id="uid133" level="2">
        <bodyTitle>Inria International Labs</bodyTitle>
        <p>Joint-Lab on Extreme Scale Computing (JLESC):</p>
        <sanspuceslist>
          <li id="uid134">
            <p noindent="true">Coordinators: Franck Cappello (general) and Yves Robert (Inria coordinator).</p>
          </li>
          <li id="uid135">
            <p noindent="true">Other partners: Argonne National Lab, University of Urbanna Champaign (NCSA), Tokyo Riken, Jülich Supercomputing Center, Barcelona Supercomputing Center (BSC).</p>
          </li>
          <li id="uid136">
            <p noindent="true">Abstract: The purpose of the Joint Laboratory for Extreme Scale Computing
(JLESC) is to be an international, virtual organization whose goal is to enhance
the ability of member organizations and investigators to make the bridge between
Petascale and Extreme computing. The founding partners of the JLESC are Inria
and UIUC. Further members are ANL, BSC, JSC and RIKEN-AICS.</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid137" level="2">
        <bodyTitle>Inria International Partners</bodyTitle>
        <subsection id="uid138" level="3">
          <bodyTitle>Informal International Partners</bodyTitle>
          <sanspuceslist>
            <li id="uid139">
              <p noindent="true">Partner 1: Argonne National Lab</p>
            </li>
            <li id="uid140">
              <p type="sanspuces" noindent="true">Subject 1: Binomial Checkpointing Strategies for Machine Learning
(recipient of a FACCTS grant, 2018-2020) as well as network
performance prediction.</p>
            </li>
            <li id="uid141">
              <p noindent="true">Partner 2: Vanderbilt University</p>
            </li>
            <li id="uid142">
              <p type="sanspuces" noindent="true">Subject 2: Scheduling for Neurosciences <ref xlink:href="#uid82" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
            </li>
            <li id="uid143">
              <p noindent="true">Partner 3: ICL at University of Tennessee</p>
            </li>
            <li id="uid144">
              <p type="sanspuces" noindent="true">Subject 3: on instrumenting <span class="smallcap" align="left">MPI</span> applications and
modeling platforms (works on HWLOC take place in the context
of the Open MPI consortium) and <span class="smallcap" align="left">MPI</span> and process placement</p>
            </li>
            <li id="uid145">
              <p noindent="true">Partner 4: Lawrence Livermore National Laboratory</p>
            </li>
            <li id="uid146">
              <p noindent="true">Subject 4: Exposing Heterogeneous Memory Characteristics to HPC Applications <ref xlink:href="#uid75" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></p>
            </li>
          </sanspuceslist>
        </subsection>
      </subsection>
    </subsection>
    <subsection id="uid147" level="1">
      <bodyTitle>International Research Visitors</bodyTitle>
      <subsection id="uid148" level="2">
        <bodyTitle>Visits of International Scientists</bodyTitle>
        <simplelist>
          <li id="uid149">
            <p noindent="true">Ana Gainaru, Reseach Assistant Professor at U. Vanderbilt, visited the
team for one week in December 2019.</p>
          </li>
        </simplelist>
      </subsection>
    </subsection>
  </partenariat>
  <diffusion id="uid150">
    <bodyTitle>Dissemination</bodyTitle>
    <subsection id="uid151" level="1">
      <bodyTitle>Promoting Scientific Activities</bodyTitle>
      <subsection id="uid152" level="2">
        <bodyTitle>Scientific Events: Organisation</bodyTitle>
        <subsection id="uid153" level="3">
          <bodyTitle>General Chair, Scientific Chair</bodyTitle>
          <simplelist>
            <li id="uid154">
              <p noindent="true">Brice <span class="smallcap" align="left">Goglin</span> and Emmanuel <span class="smallcap" align="left">Jeannot</span> organized (with
Didem <span class="smallcap" align="left">Unat</span> from Ko c University, Turkey), PADAL 2019
(Fifth Workshop on Programming Abstractions for Data Locality) in
Bordeaux (workshop by invitation): 25 participants from 10 different
countries.</p>
            </li>
            <li id="uid155">
              <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> was, along with Nataliia
<span class="smallcap" align="left">Bielova</span> (from Inria team PRIVATICS), the co-chair of this
year's jury of the CNIL-Inria European prize awarded to research
scientific papers on the subject of data protection and privacy.</p>
            </li>
          </simplelist>
        </subsection>
        <subsection id="uid156" level="3">
          <bodyTitle>Member of the steering committee</bodyTitle>
          <simplelist>
            <li id="uid157">
              <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> is member of the steering committee of
Euro-Par and the Cluster international conference.</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid158" level="2">
        <bodyTitle>Scientific Events: Selection</bodyTitle>
        <subsection id="uid159" level="3">
          <bodyTitle>Chair of Conference Program Committees</bodyTitle>
          <simplelist>
            <li id="uid160">
              <p noindent="true">Brice <span class="smallcap" align="left">Goglin</span> is the <i>Architecture &amp; Networks</i> area
co-chair of SuperComputing 2020.</p>
            </li>
            <li id="uid161">
              <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> is the track program chair of Cluster 2020
(area: application, algorithms, and libraries)</p>
            </li>
            <li id="uid162">
              <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> was the program chair of the COLOC workshop
(collocated with Euro-Par).</p>
            </li>
            <li id="uid163">
              <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> was the program chair of the RADR workshop
(collocated with IPDPS).</p>
            </li>
            <li id="uid164">
              <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> was a co-chair (along with Roberto
<span class="smallcap" align="left">Di Cosmo</span>) of the workshop on <i>Software and Open
Science: issues and opportunities</i>, National days on Open science
(JNSO 2019), Paris
(<ref xlink:href="https://jnso2019.sciencesconf.org/resource/page/id/2" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>jnso2019.<allowbreak/>sciencesconf.<allowbreak/>org/<allowbreak/>resource/<allowbreak/>page/<allowbreak/>id/<allowbreak/>2</ref>).</p>
            </li>
          </simplelist>
        </subsection>
        <subsection id="uid165" level="3">
          <bodyTitle>Member of Conference Program Committees</bodyTitle>
          <simplelist>
            <li id="uid166">
              <p noindent="true">Alexandre <span class="smallcap" align="left">Denis</span> was a member of the program
committee of CCGrid 2019.</p>
            </li>
            <li id="uid167">
              <p noindent="true">Brice <span class="smallcap" align="left">Goglin</span> was a member of the program committee of
ICPP 2019, EuroMPI 2019, HotInterconnects 26, ROME 2019, ROSS 2019,
RADR 2019.</p>
            </li>
            <li id="uid168">
              <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> was member of the program committee of
SuperComputing 2019, Euro-MPI 2019, ROSS 2019, Heteropar 2019.</p>
            </li>
            <li id="uid169">
              <p noindent="true">Guillaume <span class="smallcap" align="left">Mercier</span> was a member of the programm committee of CCGrid 2019 and EuroMPI 2019.</p>
            </li>
            <li id="uid170">
              <p noindent="true">Guillaume <span class="smallcap" align="left">Pallez</span> was a member of the program committee of SC 2019 (Tutorials), ICPP 2019, IPDPS 2020, ICA3PP 2019, PMBS 2019.</p>
            </li>
            <li id="uid171">
              <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> was a member of the program committee of ENISA's “EU Privacy Forum”.</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid172" level="2">
        <bodyTitle>Journal</bodyTitle>
        <subsection id="uid173" level="3">
          <bodyTitle>Member of the Editorial Boards</bodyTitle>
          <simplelist>
            <li id="uid174">
              <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> is associate editor of the
International Journal of Parallel, Emergent &amp; Distributed
Systems (IJPEDS).</p>
            </li>
          </simplelist>
        </subsection>
        <subsection id="uid175" level="3">
          <bodyTitle>Reviewer - Reviewing Activities</bodyTitle>
          <simplelist>
            <li id="uid176">
              <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> was a reviewer for JPDC, Parallel Computing,
Transaction on Computers.</p>
            </li>
            <li id="uid177">
              <p noindent="true">Guillaume <span class="smallcap" align="left">Mercier</span> was a reviewer for IEEE Transactions on
Computers and for Cluster Computing.</p>
            </li>
            <li id="uid178">
              <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> was a reviewer for journal Terminal.</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid179" level="2">
        <bodyTitle>Invited Talks</bodyTitle>
        <simplelist>
          <li id="uid180">
            <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> was invited to the panel <i>Heterogeneous Computing for Energy Efficiency</i> of
10th International Green and Sustainable Computing Conference (Alexandia VA,
USA, octobre 2019, <ref xlink:href="https://www.igscc.org/copy-of-schedule" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>www.<allowbreak/>igscc.<allowbreak/>org/<allowbreak/>copy-of-schedule</ref>).</p>
          </li>
          <li id="uid181">
            <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> was invited to the panel <i>Resilience in High
Performance Computing</i> to the HiPEAC event (at Bilbao Spain, October 2019,
<ref xlink:href="https://www.hipeac.net/csw/2019/bilbao/#/schedule/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>www.<allowbreak/>hipeac.<allowbreak/>net/<allowbreak/>csw/<allowbreak/>2019/<allowbreak/>bilbao/<allowbreak/>#/<allowbreak/>schedule/</ref>).</p>
          </li>
          <li id="uid182">
            <p noindent="true">Guillaume <span class="smallcap" align="left">Mercier</span> was invited to make a presentation at the PADAL workshop 2019.</p>
          </li>
          <li id="uid183">
            <p noindent="true">Guillaume <span class="smallcap" align="left">Pallez</span> was invited to give a talk at the Royal Society of London, UK, as a part of the event <i>Numerical algorithms for high-performance computational science</i> <ref xlink:href="#tadaam-2019-bid28" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>
(<ref xlink:href="https://royalsociety.org/science-events-and-lectures/2019/04/high-performance-computing/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>royalsociety.<allowbreak/>org/<allowbreak/>science-events-and-lectures/<allowbreak/>2019/<allowbreak/>04/<allowbreak/>high-performance-computing/</ref>).</p>
          </li>
          <li id="uid184">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> was invited to deliver a common talk
with Emmanuel <span class="smallcap" align="left">Netter</span>, “<i>Is code law?</i>”, at the
Symposium of the Agorantic research federation, University of
Avignon
(<ref xlink:href="https://agorantic.univ-avignon.fr/symposium/symposium-agorantic-28-janvier-2019/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>agorantic.<allowbreak/>univ-avignon.<allowbreak/>fr/<allowbreak/>symposium/<allowbreak/>symposium-agorantic-28-janvier-2019/</ref>).</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid185" level="2">
        <bodyTitle>Scientific Expertise</bodyTitle>
        <simplelist>
          <li id="uid186">
            <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> was a member of the hiring committee
of an Inria junior researcher position at Nancy and at the National Level.</p>
          </li>
          <li id="uid187">
            <p noindent="true">Guillaume <span class="smallcap" align="left">Pallez</span> is an elected member of the Inria
evaluation committee.</p>
          </li>
          <li id="uid188">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> is co-chair (along with Roberto
<span class="smallcap" align="left">Di Cosmo</span>) of the workgroup on Free software of the
Permanent Secretariat for Open Science (SPSO) of the French Ministry
of Higher Education (MENESR).</p>
          </li>
          <li id="uid189">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> was heard in an expert panel
commissioned by the <i>Information mission on digital identity</i>
of the French <i>Assemblée nationale</i>, chaired by Mrs Marietta
<span class="smallcap" align="left">Karamanli</span>, assisted by Mrs Paula <span class="smallcap" align="left">Forteza</span> and
Christine <span class="smallcap" align="left">Hennion</span>.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid190" level="2">
        <bodyTitle>Research Administration</bodyTitle>
        <simplelist>
          <li id="uid191">
            <p noindent="true">Alexandre <span class="smallcap" align="left">Denis</span> is head of the Inria Bordeaux CUMI-R (IT users committee).</p>
          </li>
          <li id="uid192">
            <p noindent="true">Brice <span class="smallcap" align="left">Goglin</span> and Guillaume <span class="smallcap" align="left">Mercier</span> are elected
members of the Inria Bordeaux center committee.</p>
          </li>
          <li id="uid193">
            <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> is deputy head of science of the Inria Bordeaux
research center.</p>
          </li>
          <li id="uid194">
            <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> is member of the Inria evaluation committee</p>
          </li>
          <li id="uid195">
            <p noindent="true">Emmanuel <span class="smallcap" align="left">Jeannot</span> is member of LaBRI scientific council and head
of the Satanas team.</p>
          </li>
          <li id="uid196">
            <p noindent="true">Guillaume <span class="smallcap" align="left">Pallez</span> is a worker representative at the
Prevention, Health, Security committee (CHSCT) for the Inria center
of Bordeaux.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid197" level="2">
        <bodyTitle>Standardization Activities</bodyTitle>
        <p><span class="smallcap" align="left">TADaaM</span> attended the <span class="smallcap" align="left">MPI</span> Forum meetings on behalf of Inria
(where the <span class="smallcap" align="left">MPI</span> standard for communication in parallel applications is developed and maintained).
Guillaume <span class="smallcap" align="left">Mercier</span> leads the <i>Hardware Topologies</i> working group.
Part of the <span class="smallcap" align="left">Hsplit</span> proposal was discussed and read at the last physical meeting in December 2019 in
Albuquerque and has been approved to enter the voting process for an eventual inclusion
in the next revision (4.0) of the <span class="smallcap" align="left">MPI</span> standard. This voting process will take place in the
first semester of 2020 and the release of the 4.0 revision is expected for the end of 2020.
Guillaume <span class="smallcap" align="left">Mercier</span> is also the chair of the standard chapter committee <i>Groups, Contexts, Communicators, Caching</i>
and member of several other chapter committees.</p>
      </subsection>
    </subsection>
    <subsection id="uid198" level="1">
      <bodyTitle>Teaching - Supervision - Juries</bodyTitle>
      <subsection id="uid199" level="2">
        <bodyTitle>Teaching</bodyTitle>
        <p>Members of the <span class="smallcap" align="left">TADaaM</span> project gave hundreds of hours of teaching at
Université de Bordeaux and the Bordeaux INP engineering school, covering a
wide range of topics from basic use of computers, introduction to algorithmics
and C programming to advanced topics such as probabilities and statistics,
scheduling, computer architecture, operating systems, parallel programming and
high-performance runtime systems, as well as software law and personal data.</p>
        <simplelist>
          <li id="uid200">
            <p noindent="true">Brice <span class="smallcap" align="left">Goglin</span> gave courses about Operating Systems to
teachers as part of the <i>Diplôme Inter Universitaire</i> to
prepare them for teaching the new Computer Science track in
high-school.</p>
          </li>
          <li id="uid201">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> did a training session on
“<i>Information science, digital technologies and law</i>” for
the continuous education of magistrates, École nationale de la
magistrature (National School for Magistrates), Paris.</p>
          </li>
          <li id="uid202">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> did two training sessions on
“<i>Strategic issues of information technologies</i>” and
“<i>Personal data law</i>” to a group of administration heads
and civil society activists of several French-speaking west-African
countries, in the context of FFGI 2019 at Ouagadougou, Burkina
Faso.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid203" level="2">
        <bodyTitle>Supervision</bodyTitle>
        <sanspuceslist>
          <li id="uid204">
            <p noindent="true">PhD: Benjamin <span class="smallcap" align="left">Lorendeau</span>, Amélioration des performances via un parallélisme multi-niveaux sur un code CFD en maillages non structurés. Defense at Université de Bordeaux on December 16th. Advisors : Yvan <span class="smallcap" align="left">Fournier</span> and Emmanuel <span class="smallcap" align="left">Jeannot</span>.</p>
          </li>
          <li id="uid205">
            <p noindent="true">PhD in progress: Valentin Honoré, Partitioning Strategies for high throughput Applications,
started in November 2017. Advisors: Guillaume <span class="smallcap" align="left">Pallez</span> and Brice <span class="smallcap" align="left">Goglin</span>.</p>
          </li>
          <li id="uid206">
            <p noindent="true">PhD in progress: Andrès <span class="smallcap" align="left">Rubio</span>, Management on heterogeneous and non-volatile memories,
started in October 2018. Advisor: Brice <span class="smallcap" align="left">Goglin</span>.</p>
          </li>
          <li id="uid207">
            <p noindent="true">PhD in progress: Nicolas <span class="smallcap" align="left">Vidal</span>, IO scheduling strategies,
started in October 2018. Advisors: Guillaume <span class="smallcap" align="left">Pallez</span> and Emmanuel <span class="smallcap" align="left">Jeannot</span>.</p>
          </li>
          <li id="uid208">
            <p noindent="true">PhD started: Philippe <span class="smallcap" align="left">Swartvagher</span>, Interactions at large scale between high performance communication
libraries and task-based runtime, started in October 2019. Advisors: Alexandre <span class="smallcap" align="left">Denis</span> and Emmanuel
<span class="smallcap" align="left">Jeannot</span>.</p>
          </li>
          <li id="uid209">
            <p noindent="true">PhD started: Florian <span class="smallcap" align="left">Reynier</span>, Task-based communication
progression, started in January 2019. Advisors: Alexandre
<span class="smallcap" align="left">Denis</span> and Emmanuel <span class="smallcap" align="left">Jeannot</span>.</p>
          </li>
          <li id="uid210">
            <p noindent="true">PhD started: Pierre <span class="smallcap" align="left">Ferenbach</span>, The legal regime of video games,
started in January 2019. Advisors: Xavier <span class="smallcap" align="left">Daverat</span> and
François <span class="smallcap" align="left">Pellegrini</span>.</p>
          </li>
          <li id="uid211">
            <p noindent="true">Master: Léa <span class="smallcap" align="left">Chevalier</span>, M2 student at Université Paris
Nanterre supervised by François <span class="smallcap" align="left">Pellegrini</span>, won the
Disney–Microsoft–Orange–TF1 prize on Media Law for her master
thesis on “<i>Artistic creations generated by automated
processing: are they works like others?</i>”.</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid212" level="2">
        <bodyTitle>Juries</bodyTitle>
        <p>Emmanuel <span class="smallcap" align="left">Jeannot</span> was member of the Ph.D defense jury of:</p>
        <simplelist>
          <li id="uid213">
            <p noindent="true">Hugo <span class="smallcap" align="left">Brunie</span>, U. Bordeaux (Member);</p>
          </li>
          <li id="uid214">
            <p noindent="true">Jean-Baptiste <span class="smallcap" align="left">Keck</span>, U. Grenoble Alpes (Reviewer);</p>
          </li>
          <li id="uid215">
            <p noindent="true">Hamza <span class="smallcap" align="left">Deroui</span>, Insa Rennes and U. Rennes (Reviewer);</p>
          </li>
          <li id="uid216">
            <p noindent="true">Arthur <span class="smallcap" align="left">Loussert</span>, U. Bordeaux (Member).</p>
          </li>
        </simplelist>
        <p>François<span class="smallcap" align="left">Pellegrini</span> was member of the Ph.D defense jury of:</p>
        <simplelist>
          <li id="uid217">
            <p noindent="true">Maximilien <span class="smallcap" align="left">Lanna</span>, U. Paris II Panthéon Assas (Member).</p>
          </li>
        </simplelist>
      </subsection>
    </subsection>
    <subsection id="uid218" level="1">
      <bodyTitle>Popularization</bodyTitle>
      <subsection id="uid219" level="2">
        <bodyTitle>Internal or external Inria responsibilities</bodyTitle>
        <p>Brice <span class="smallcap" align="left">Goglin</span> is in charge of the diffusion of the scientific
culture for the Inria Research Centre of Bordeaux.
He organized several popularization activities involving colleagues.</p>
      </subsection>
      <subsection id="uid220" level="2">
        <bodyTitle>Articles and contents</bodyTitle>
        <simplelist>
          <li id="uid221">
            <p noindent="true">Guillaume <span class="smallcap" align="left">Pallez</span> wrote a blog article for <i>Binaire</i> on autonomous vehicles <ref xlink:href="#tadaam-2019-bid29" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
          </li>
          <li id="uid222">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> was interviewed on the subject of
“<i>Resisting algorithmic governance</i>” (cover page),
Expertises droit/technologies/prospectives, nr 443,
Feb. 2019 (<ref xlink:href="https://www.expertises.info/#anciens-numeros" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>www.<allowbreak/>expertises.<allowbreak/>info/<allowbreak/>#anciens-numeros</ref>).</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid223" level="2">
        <bodyTitle>Interventions</bodyTitle>
        <simplelist>
          <li id="uid224">
            <p noindent="true">Brice <span class="smallcap" align="left">Goglin</span> is the sponsor (<i>parrain</i>)
of the <i>Edouard Vaillant</i> middle school (Bordeaux) for their
scientific projects with the fondation <i>La main à la pâte</i>.</p>
          </li>
          <li id="uid225">
            <p noindent="true">Guillaume <span class="smallcap" align="left">Pallez</span>, Brice <span class="smallcap" align="left">Goglin</span>, Valentin
<span class="smallcap" align="left">Honoré</span>, Philippe <span class="smallcap" align="left">Swartvagher</span> and Nicolas
<span class="smallcap" align="left">Vidal</span> gave seminars and hands-on session about computer
science to schools attending <i>Fete de la Science</i>, Oct. 2019.</p>
          </li>
          <li id="uid226">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> participated in the round table
“<i>GDPR and cyber-security</i>” during the OpenS'IAE event, Pau.</p>
          </li>
          <li id="uid227">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> participated in the rountable on
“<i>Legal aspects, GDPR and anonymization technologies</i>”
during the annual congress of <i>Société informatique de
France</i> (SIF), Bordeaux.</p>
          </li>
          <li id="uid228">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> gave a conference on the security of
personal data during the <i>Mars@Hack</i> event, in
Mont-de-Marsan.</p>
          </li>
          <li id="uid229">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> participated in the round table
“<i>Is our legal framework IA-compatible?</i>” during the NAIA
(<i>Nouvelle-Aquitaine Intelligence Artificielle</i>) event,
Bordeaux.</p>
          </li>
          <li id="uid230">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> participated in the round table
“<i>What alternatives and what regulations in the GAFAM
era?</i>” during the second edition of the <i>Rencontres
Culture:Tech</i>, Assemblée nationale, Paris.</p>
          </li>
          <li id="uid231">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> gave a conference on
“<i>Freedom in the digital age</i>” to students of first and
second year of all departments at ENS Cachan (250 people).</p>
          </li>
          <li id="uid232">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> participated in a round table on
“<i>The future of choice</i>” in the context of the
“de-inauguration” of the exhibition textitUnder influence, the
science of choice at Espace Pierre-Gilles de Gennes (ESPGG), Paris.</p>
          </li>
          <li id="uid233">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> answered the public at the movie theater
Utopia Bordeaux after the display of the movie <i>Meeting
Snowden</i>.</p>
          </li>
          <li id="uid234">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> gave a public conference on
“<i>How does personal data processing interfere with our
privacy?</i>” during the <i>IA Pau</i> conference, Pau.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid235" level="2">
        <bodyTitle>Internal action</bodyTitle>
        <simplelist>
          <li id="uid236">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> participated in a round table on
“<i>Digital security: technical, ethical and legal aspects</i>”,
Inria scientific days 2019, Lyon
(<ref xlink:href="https://project.inria.fr/journeesscientifiques2019/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>project.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>journeesscientifiques2019/</ref>).</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid237" level="2">
        <bodyTitle>Creation of media or tools for science outreach</bodyTitle>
        <simplelist>
          <li id="uid238">
            <p noindent="true">Brice <span class="smallcap" align="left">Goglin</span> was involved in the building of the MOOC
<i>Sciences Numériques et Technologie</i> which focus at bringing
basics about computer science to high-school teachers and general
audience.
More than 18 000 people registered to the course.</p>
          </li>
          <li id="uid239">
            <p noindent="true">François <span class="smallcap" align="left">Pellegrini</span> is one of the 14 people appearing in
the documentary “<i>LOL: Logiciel libre, une affaire
sérieuse</i>” (“LOL: Free software, a serious matter”).</p>
          </li>
        </simplelist>
      </subsection>
    </subsection>
  </diffusion>
  <biblio id="bibliography" html="bibliography" numero="10" titre="Bibliography">
    
    <biblStruct id="tadaam-2019-bid37" type="book" rend="year" n="cite:carretero:hal-02402981">
      <identifiant type="doi" value="10.1049/PBPC024E"/>
      <identifiant type="hal" value="hal-02402981"/>
      <monogr x-scientific-popularization="no" x-international-audience="yes">
        <title level="m">Ultrascale Computing Systems</title>
        <author>
          <persName>
            <foreName>Jesus</foreName>
            <surname>Carretero</surname>
            <initial>J.</initial>
          </persName>
          <persName key="tadaam-2018-idp130608">
            <foreName>Emmanuel</foreName>
            <surname>Jeannot</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Albert</foreName>
            <surname>Zomaya</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>Institution of Engineering and Technology</orgName>
          </publisher>
          <dateStruct>
            <month>January</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02402981" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02402981</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid39" type="proceedings" rend="year" n="cite:mencagli:hal-02403078">
      <identifiant type="doi" value="10.1007/978-3-030-10549-5"/>
      <identifiant type="hal" value="hal-02403078"/>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Euro-Par 2018: Parallel Processing Workshop</title>
        <title level="s">LNCS - Lecture Notes in Computer Science</title>
        <imprint>
          <biblScope type="volume">11339</biblScope>
          <publisher>
            <orgName>Springer<address><addrLine>Turin, Italy</addrLine></address></orgName>
          </publisher>
          <dateStruct>
            <month>May</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02403078" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02403078</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid35" type="proceedings" rend="year" n="cite:pellegrini:hal-02195921">
      <identifiant type="hal" value="hal-02195921"/>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes">
        <title level="m">Actes du colloque des Convergences du Droit et du Numérique</title>
        <title level="s">Actes du colloque des Convergences du droit et du numérique</title>
        <imprint>
          <publisher>
            <orgName type="organisation">Université de Bordeaux<address><addrLine>Bordeaux, France</addrLine></address></orgName>
          </publisher>
          <dateStruct>
            <month>July</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02195921" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02195921</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid42" type="hdrthesis" rend="year" n="cite:mercier:tel-02412813">
      <identifiant type="hal" value="tel-02412813"/>
      <monogr>
        <title level="m">Évolutions du passage de messages face aux défis de la gestion des topologies matérielles hiérarchiques</title>
        <author>
          <persName key="tadaam-2018-idp133472">
            <foreName>Guillaume</foreName>
            <surname>Mercier</surname>
            <initial>G.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">Université de Bordeaux</orgName>
          </publisher>
          <dateStruct>
            <month>December</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/tel-02412813" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>tel-02412813</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Habilitation à diriger des recherches</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid2" type="article" rend="year" n="cite:aupy:hal-02093172">
      <identifiant type="doi" value="10.1177/1094342019846956"/>
      <identifiant type="hal" value="hal-02093172"/>
      <analytic>
        <title level="a">Co-scheduling HPC workloads on cache-partitioned CMP platforms</title>
        <author>
          <persName key="tadaam-2018-idp122784">
            <foreName>Guillaume</foreName>
            <surname>Aupy</surname>
            <initial>G.</initial>
          </persName>
          <persName key="roma-2018-idp131280">
            <foreName>Anne</foreName>
            <surname>Benoit</surname>
            <initial>A.</initial>
          </persName>
          <persName key="tadaam-2018-idp127712">
            <foreName>Brice</foreName>
            <surname>Goglin</surname>
            <initial>B.</initial>
          </persName>
          <persName key="roma-2018-idp164240">
            <foreName>Loïc</foreName>
            <surname>Pottier</surname>
            <initial>L.</initial>
          </persName>
          <persName key="roma-2018-idp136784">
            <foreName>Yves</foreName>
            <surname>Robert</surname>
            <initial>Y.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00913">
        <idno type="issn">1094-3420</idno>
        <title level="j">International Journal of High Performance Computing Applications</title>
        <imprint>
          <biblScope type="volume">33</biblScope>
          <biblScope type="number">6</biblScope>
          <dateStruct>
            <month>April</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">1221-1239</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-02093172" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02093172</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid3" subtype="nonparu-d" type="article" rend="year" n="cite:aupy:hal-02091340">
      <identifiant type="doi" value="10.1177/1094342019847263"/>
      <identifiant type="hal" value="hal-02091340"/>
      <analytic>
        <title level="a">Modeling High-throughput Applications for in situ Analytics</title>
        <author>
          <persName key="tadaam-2018-idp122784">
            <foreName>Guillaume</foreName>
            <surname>Aupy</surname>
            <initial>G.</initial>
          </persName>
          <persName key="tadaam-2018-idp127712">
            <foreName>Brice</foreName>
            <surname>Goglin</surname>
            <initial>B.</initial>
          </persName>
          <persName key="tadaam-2018-idp143744">
            <foreName>Valentin</foreName>
            <surname>Honoré</surname>
            <initial>V.</initial>
          </persName>
          <persName key="datamove-2018-idp117856">
            <foreName>Bruno</foreName>
            <surname>Raffin</surname>
            <initial>B.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00913">
        <idno type="issn">1094-3420</idno>
        <title level="j">International Journal of High Performance Computing Applications</title>
        <imprint>
          <biblScope type="volume">33</biblScope>
          <biblScope type="number">6</biblScope>
          <dateStruct>
            <month>April</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">1185-1200</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-02091340" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02091340</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid20" subtype="nonparu-d" type="article" rend="year" n="cite:beaumont:hal-02401105">
      <identifiant type="hal" value="hal-02401105"/>
      <analytic>
        <title level="a">Optimal Memory-aware Backpropagation of Deep Join Networks</title>
        <author>
          <persName key="realopt-2018-idp120992">
            <foreName>Olivier</foreName>
            <surname>Beaumont</surname>
            <initial>O.</initial>
          </persName>
          <persName key="tadaam-2018-idp138848">
            <foreName>Julien</foreName>
            <surname>Herrmann</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Guillaume</foreName>
            <surname>Pallez</surname>
            <initial>G.</initial>
          </persName>
          <persName key="realopt-2018-idp166400">
            <foreName>Alena</foreName>
            <surname>Shilova</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01573">
        <idno type="issn">1364-503X</idno>
        <title level="j">Philosophical Transactions of the Royal Society A: Mathematical, Physical and Engineering Sciences</title>
        <imprint>
          <dateStruct>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02401105" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02401105</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid41" type="incollection" rend="year" n="cite:costa:hal-02403121">
      <identifiant type="doi" value="10.1049/PBPC024E_ch2"/>
      <identifiant type="hal" value="hal-02403121"/>
      <analytic>
        <title level="a">Programming models and runtimes</title>
        <author>
          <persName>
            <foreName>Georges Da</foreName>
            <surname>Costa</surname>
            <initial>G. D.</initial>
          </persName>
          <persName>
            <foreName>Alexey L.</foreName>
            <surname>Lastovetsky</surname>
            <initial>A. L.</initial>
          </persName>
          <persName>
            <foreName>Jorge G.</foreName>
            <surname>Barbosa</surname>
            <initial>J. G.</initial>
          </persName>
          <persName>
            <foreName>Juan Carlos Diaz</foreName>
            <surname>Martin</surname>
            <initial>J. C. D.</initial>
          </persName>
          <persName>
            <foreName>Juan-Luis Garcia</foreName>
            <surname>Zapata</surname>
            <initial>J.-L. G.</initial>
          </persName>
          <persName>
            <foreName>Matthias</foreName>
            <surname>Janetschek</surname>
            <initial>M.</initial>
          </persName>
          <persName key="tadaam-2018-idp130608">
            <foreName>Emmanuel</foreName>
            <surname>Jeannot</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>João</foreName>
            <surname>Leitão</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Ravi Reddy</foreName>
            <surname>Manumachu</surname>
            <initial>R. R.</initial>
          </persName>
          <persName>
            <foreName>Radu</foreName>
            <surname>Prodan</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Juan A.</foreName>
            <surname>Rico-Gallego</surname>
            <initial>J. A.</initial>
          </persName>
          <persName>
            <foreName>Peter Van</foreName>
            <surname>Roy</surname>
            <initial>P. V.</initial>
          </persName>
          <persName>
            <foreName>Ali</foreName>
            <surname>Shoker</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Albert van der</foreName>
            <surname>Linde</surname>
            <initial>A. v. d.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no">
        <title level="m">Ultrascale Computing Systems</title>
        <imprint>
          <publisher>
            <orgName>Institution of Engineering and Technology</orgName>
          </publisher>
          <dateStruct>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02403121" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02403121</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid16" type="article" rend="year" n="cite:denis:hal-02400422">
      <identifiant type="doi" value="10.1177/1094342019860184"/>
      <identifiant type="hal" value="hal-02400422"/>
      <analytic>
        <title level="a">Study on progress threads placement and dedicated cores for overlapping MPI nonblocking collectives on manycore processor</title>
        <author>
          <persName key="tadaam-2018-idp125248">
            <foreName>Alexandre</foreName>
            <surname>Denis</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Julien</foreName>
            <surname>Jaeger</surname>
            <initial>J.</initial>
          </persName>
          <persName key="tadaam-2018-idp130608">
            <foreName>Emmanuel</foreName>
            <surname>Jeannot</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Marc</foreName>
            <surname>Pérache</surname>
            <initial>M.</initial>
          </persName>
          <persName key="tadaam-2018-idp151040">
            <foreName>Hugo</foreName>
            <surname>Taboada</surname>
            <initial>H.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00913">
        <idno type="issn">1094-3420</idno>
        <title level="j">International Journal of High Performance Computing Applications</title>
        <imprint>
          <biblScope type="volume">33</biblScope>
          <biblScope type="number">6</biblScope>
          <dateStruct>
            <month>May</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">1240-1254</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-02400422" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02400422</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid5" type="article" rend="year" n="cite:denoyelle:hal-01924951">
      <identifiant type="doi" value="10.1109/TPDS.2018.2883056"/>
      <identifiant type="hal" value="hal-01924951"/>
      <analytic>
        <title level="a">Modeling Non-Uniform Memory Access on Large Compute Nodes with the Cache-Aware Roofline Model</title>
        <author>
          <persName key="tadaam-2018-idp141312">
            <foreName>Nicolas</foreName>
            <surname>Denoyelle</surname>
            <initial>N.</initial>
          </persName>
          <persName key="tadaam-2018-idp127712">
            <foreName>Brice</foreName>
            <surname>Goglin</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Aleksandar</foreName>
            <surname>Ilic</surname>
            <initial>A.</initial>
          </persName>
          <persName key="tadaam-2018-idp130608">
            <foreName>Emmanuel</foreName>
            <surname>Jeannot</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Leonel</foreName>
            <surname>Sousa</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00746">
        <idno type="issn">1045-9219</idno>
        <title level="j">IEEE Transactions on Parallel and Distributed Systems</title>
        <imprint>
          <biblScope type="volume">30</biblScope>
          <biblScope type="number">6</biblScope>
          <dateStruct>
            <month>June</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">1374–1389</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01924951" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01924951</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid23" subtype="nonparu-d" type="article" rend="year" n="cite:gainaru:hal-02141576">
      <identifiant type="hal" value="hal-02141576"/>
      <analytic>
        <title level="a">I/O scheduling strategy for periodic applications</title>
        <author>
          <persName>
            <foreName>Ana</foreName>
            <surname>Gainaru</surname>
            <initial>A.</initial>
          </persName>
          <persName key="roma-2018-idp154464">
            <foreName>Valentin</foreName>
            <surname>Le Fèvre</surname>
            <initial>V.</initial>
          </persName>
          <persName>
            <foreName>Guillaume</foreName>
            <surname>Pallez</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid02846">
        <idno type="issn">2329-4949</idno>
        <title level="j">ACM Transactions on Parallel Computing</title>
        <imprint>
          <dateStruct>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02141576" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02141576</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid7" subtype="nonparu-d" type="article" rend="year" n="cite:gainaru:hal-02058290">
      <identifiant type="doi" value="10.1177/1094342019841681"/>
      <identifiant type="hal" value="hal-02058290"/>
      <analytic>
        <title level="a">On-the-fly scheduling vs. reservation-based scheduling for unpredictable workflows</title>
        <author>
          <persName>
            <foreName>Ana</foreName>
            <surname>Gainaru</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Hongyang</foreName>
            <surname>Sun</surname>
            <initial>H.</initial>
          </persName>
          <persName key="tadaam-2018-idp122784">
            <foreName>Guillaume</foreName>
            <surname>Aupy</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Yuankai</foreName>
            <surname>Huo</surname>
            <initial>Y.</initial>
          </persName>
          <persName>
            <foreName>Bennett A</foreName>
            <surname>Landman</surname>
            <initial>B. A.</initial>
          </persName>
          <persName>
            <foreName>Padma</foreName>
            <surname>Raghavan</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00913">
        <idno type="issn">1094-3420</idno>
        <title level="j">International Journal of High Performance Computing Applications</title>
        <imprint>
          <dateStruct>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02058290" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02058290</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid40" type="incollection" rend="year" n="cite:jeannot:hal-02403088">
      <identifiant type="doi" value="10.1049/PBPC024E"/>
      <identifiant type="hal" value="hal-02403088"/>
      <analytic>
        <title level="a">Conclusion</title>
        <author>
          <persName key="tadaam-2018-idp130608">
            <foreName>Emmanuel</foreName>
            <surname>Jeannot</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Jesus</foreName>
            <surname>Carretero</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no">
        <title level="m">Ultrascale Computing Systems</title>
        <imprint>
          <publisher>
            <orgName>Institution of Engineering and Technology</orgName>
          </publisher>
          <dateStruct>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02403088" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02403088</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid33" type="article" rend="year" n="cite:pellegrini:hal-02113563">
      <identifiant type="hal" value="hal-02113563"/>
      <analytic>
        <title level="a">Safety and digital hygiene of professionals</title>
        <author>
          <persName key="tadaam-2018-idp135984">
            <foreName>François</foreName>
            <surname>Pellegrini</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid03169">
        <idno type="issn">2493-2957</idno>
        <title level="j">Dalloz IP/IT</title>
        <imprint>
          <biblScope type="number">4</biblScope>
          <dateStruct>
            <month>April</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">233-236</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-02113563" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02113563</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid31" type="incollection" rend="year" n="cite:pellegrini:hal-02069419">
      <identifiant type="hal" value="hal-02069419"/>
      <analytic>
        <title level="a">Security and digitization : Between fantasies of effectiveness and proven violations of fundamental rights</title>
        <author>
          <persName key="tadaam-2018-idp135984">
            <foreName>François</foreName>
            <surname>Pellegrini</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no">
        <editor role="editor">
          <persName>
            <foreName>Mustapha</foreName>
            <surname>Afroukh</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Christophe</foreName>
            <surname>Maubernard</surname>
            <initial>C.</initial>
          </persName>
          <persName>
            <foreName>Claire</foreName>
            <surname>Val</surname>
            <initial>C.</initial>
          </persName>
        </editor>
        <title level="m">La sécurité : mutations et incertitudes</title>
        <title level="s">Collection Colloques &amp; Essais</title>
        <imprint>
          <biblScope type="number">77</biblScope>
          <publisher>
            <orgName>Institut universitaire Varenne</orgName>
          </publisher>
          <dateStruct>
            <month>January</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">89-100</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-02069419" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02069419</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid25" type="article" rend="year" n="cite:phan:hal-02172590">
      <identifiant type="doi" value="10.1145/3328740"/>
      <identifiant type="hal" value="hal-02172590"/>
      <analytic>
        <title level="a">A New Framework for Evaluating Straggler Detection Mechanisms in MapReduce</title>
        <author>
          <persName>
            <foreName>Tien-Dat</foreName>
            <surname>Phan</surname>
            <initial>T.-D.</initial>
          </persName>
          <persName>
            <foreName>Guillaume</foreName>
            <surname>Pallez</surname>
            <initial>G.</initial>
          </persName>
          <persName key="stack-2018-idp120224">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Padma</foreName>
            <surname>Raghavan</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid03022">
        <idno type="issn">2376-3639</idno>
        <title level="j">ACM Transactions on Modeling and Performance Evaluation of Computing Systems</title>
        <imprint>
          <biblScope type="volume">X</biblScope>
          <dateStruct>
            <month>April</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">1-22</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-02172590" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02172590</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid17" type="inproceedings" rend="year" n="cite:aba:hal-02141622">
      <identifiant type="hal" value="hal-02141622"/>
      <analytic>
        <title level="a">Scheduling on Two Unbounded Resources with Communication Costs</title>
        <author>
          <persName>
            <foreName>Massinissa Ait</foreName>
            <surname>Aba</surname>
            <initial>M. A.</initial>
          </persName>
          <persName>
            <foreName>Alix</foreName>
            <surname>Munier-Kordon</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Guillaume</foreName>
            <surname>Pallez</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Euro-Par - European Conference on Parallel Processing</title>
        <loc>Gottingen, Germany</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02141622" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02141622</ref>
        </imprint>
        <meeting id="cid306382">
          <title>International Euro-Par Conference on Parallel Processing</title>
          <num>25</num>
          <abbr type="sigle">Euro-Par</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid19" type="inproceedings" rend="year" n="cite:aupy:hal-02141616">
      <identifiant type="hal" value="hal-02141616"/>
      <analytic>
        <title level="a">Sizing and Partitioning Strategies for Burst-Buffers to Reduce IO Contention</title>
        <author>
          <persName key="tadaam-2018-idp122784">
            <foreName>Guillaume</foreName>
            <surname>Aupy</surname>
            <initial>G.</initial>
          </persName>
          <persName key="realopt-2018-idp120992">
            <foreName>Olivier</foreName>
            <surname>Beaumont</surname>
            <initial>O.</initial>
          </persName>
          <persName key="realopt-2018-idp123904">
            <foreName>Lionel</foreName>
            <surname>Eyraud-Dubois</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">IPDPS 2019 - 33rd IEEE International Parallel and Distributed Processing Symposium</title>
        <loc>Rio de Janeiro, Brazil</loc>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02141616" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02141616</ref>
        </imprint>
        <meeting id="cid87817">
          <title>IEEE International Parallel and Distributed Processing Symposium</title>
          <num>33</num>
          <abbr type="sigle">IPDPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid8" type="inproceedings" rend="year" n="cite:aupy:hal-01968419">
      <identifiant type="doi" value="10.1109/IPDPS.2019.00027"/>
      <identifiant type="hal" value="hal-01968419"/>
      <analytic>
        <title level="a">Reservation Strategies for Stochastic Jobs</title>
        <author>
          <persName key="tadaam-2018-idp122784">
            <foreName>Guillaume</foreName>
            <surname>Aupy</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Ana</foreName>
            <surname>Gainaru</surname>
            <initial>A.</initial>
          </persName>
          <persName key="tadaam-2018-idp143744">
            <foreName>Valentin</foreName>
            <surname>Honoré</surname>
            <initial>V.</initial>
          </persName>
          <persName>
            <foreName>Padma</foreName>
            <surname>Raghavan</surname>
            <initial>P.</initial>
          </persName>
          <persName key="roma-2018-idp136784">
            <foreName>Yves</foreName>
            <surname>Robert</surname>
            <initial>Y.</initial>
          </persName>
          <persName>
            <foreName>Hongyang</foreName>
            <surname>Sun</surname>
            <initial>H.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">IPDPS 2019 - 33rd IEEE International Parallel and Distributed Processing Symposium</title>
        <loc>Rio de Janeiro, Brazil</loc>
        <imprint>
          <publisher>
            <orgName>IEEE</orgName>
          </publisher>
          <dateStruct>
            <month>May</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">166-175</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01968419" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01968419</ref>
        </imprint>
        <meeting id="cid87817">
          <title>IEEE International Parallel and Distributed Processing Symposium</title>
          <num>33</num>
          <abbr type="sigle">IPDPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid26" type="inproceedings" rend="year" n="cite:bangalore:hal-02413199">
      <identifiant type="doi" value="10.1145/3343211.3343213"/>
      <identifiant type="hal" value="hal-02413199"/>
      <analytic>
        <title level="a">Exposition, clarification, and expansion of MPI semantic terms and conventions</title>
        <author>
          <persName>
            <foreName>Purushotham</foreName>
            <surname>Bangalore</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Rolf</foreName>
            <surname>Rabenseifner</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Daniel</foreName>
            <surname>Holmes</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>Julien</foreName>
            <surname>Jaeger</surname>
            <initial>J.</initial>
          </persName>
          <persName key="tadaam-2018-idp133472">
            <foreName>Guillaume</foreName>
            <surname>Mercier</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Claudia</foreName>
            <surname>Blaas-Schenner</surname>
            <initial>C.</initial>
          </persName>
          <persName>
            <foreName>Anthony</foreName>
            <surname>Skjellum</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">EuroMPI '19 - 26th European MPI Users' Group Meeting</title>
        <loc>Zürich, Switzerland</loc>
        <imprint>
          <publisher>
            <orgName>ACM Press</orgName>
          </publisher>
          <dateStruct>
            <month>September</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">1-10</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-02413199" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02413199</ref>
        </imprint>
        <meeting id="cid62845">
          <title>European MPI Users' Group Meeting</title>
          <num>26</num>
          <abbr type="sigle">EuroMPI</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid13" type="inproceedings" rend="year" n="cite:denis:hal-02103700">
      <identifiant type="hal" value="hal-02103700"/>
      <analytic>
        <title level="a">Scalability of the NewMadeleine Communication Library for Large Numbers of MPI Point-to-Point Requests</title>
        <author>
          <persName key="tadaam-2018-idp125248">
            <foreName>Alexandre</foreName>
            <surname>Denis</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">CCGrid 2019 - 19th Annual IEEE/ACM International Symposium in Cluster, Cloud, and Grid Computing</title>
        <loc>Larnaca, Cyprus</loc>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02103700" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02103700</ref>
        </imprint>
        <meeting id="cid88920">
          <title>IEEE/ACM International Symposium on Cluster Computing and the Grid</title>
          <num>19</num>
          <abbr type="sigle">CCGRID</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid6" type="inproceedings" rend="year" n="cite:denoyelle:hal-02135545">
      <identifiant type="doi" value="10.1145/3337821.3337893"/>
      <identifiant type="hal" value="hal-02135545"/>
      <analytic>
        <title level="a">Data and Thread Placement in NUMA Architectures: A Statistical Learning Approach</title>
        <author>
          <persName key="tadaam-2018-idp141312">
            <foreName>Nicolas</foreName>
            <surname>Denoyelle</surname>
            <initial>N.</initial>
          </persName>
          <persName key="tadaam-2018-idp127712">
            <foreName>Brice</foreName>
            <surname>Goglin</surname>
            <initial>B.</initial>
          </persName>
          <persName key="tadaam-2018-idp130608">
            <foreName>Emmanuel</foreName>
            <surname>Jeannot</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Thomas</foreName>
            <surname>Ropars</surname>
            <initial>T.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ICPP 2019 - 48th International Conference on Parallel Processing</title>
        <loc>Kyoto, Japan</loc>
        <imprint>
          <publisher>
            <orgName>ACM Press</orgName>
          </publisher>
          <dateStruct>
            <month>August</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">1-10</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-02135545" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02135545</ref>
        </imprint>
        <meeting id="cid295154">
          <title>International Conference on Parallel Processing</title>
          <num>48</num>
          <abbr type="sigle">ICPP</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid10" type="inproceedings" rend="year" n="cite:gainaru:hal-02336582">
      <identifiant type="hal" value="hal-02336582"/>
      <analytic>
        <title level="a">Making Speculative Scheduling Robust to Incomplete Data</title>
        <author>
          <persName>
            <foreName>Ana</foreName>
            <surname>Gainaru</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Guillaume</foreName>
            <surname>Pallez</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ScalA19: 10th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems</title>
        <loc>Denver, United States</loc>
        <imprint>
          <dateStruct>
            <month>November</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02336582" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02336582</ref>
        </imprint>
        <meeting id="cid107949">
          <title>International Conference for High Performance Computing, Networking, Storage and Analysis</title>
          <num>10</num>
          <abbr type="sigle">SC</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid36" type="inproceedings" rend="year" n="cite:gainaru:hal-02158598">
      <identifiant type="hal" value="hal-02158598"/>
      <analytic>
        <title level="a">Speculative Scheduling for Stochastic HPC Applications</title>
        <author>
          <persName>
            <foreName>Ana</foreName>
            <surname>Gainaru</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Guillaume</foreName>
            <surname>Pallez</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Hongyang</foreName>
            <surname>Sun</surname>
            <initial>H.</initial>
          </persName>
          <persName>
            <foreName>Padma</foreName>
            <surname>Raghavan</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ICPP 2019 - 48th International Conference on Parallel Processing</title>
        <loc>Kyoto, Japan</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02158598" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02158598</ref>
        </imprint>
        <meeting id="cid295154">
          <title>International Conference on Parallel Processing</title>
          <num>48</num>
          <abbr type="sigle">ICPP</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid0" type="inproceedings" rend="year" n="cite:goglin:hal-02173336">
      <identifiant type="hal" value="hal-02173336"/>
      <analytic>
        <title level="a">Opportunities for Partitioning Non-Volatile Memory DIMMs between Co-scheduled Jobs on HPC Nodes</title>
        <author>
          <persName key="tadaam-2018-idp127712">
            <foreName>Brice</foreName>
            <surname>Goglin</surname>
            <initial>B.</initial>
          </persName>
          <persName key="tadaam-2018-idp148608">
            <foreName>Andrès</foreName>
            <surname>Rubio Proaño</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Euro-Par 2019: Parallel Processing Workshops</title>
        <loc>Göttingen, Germany</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02173336" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02173336</ref>
        </imprint>
        <meeting id="cid63151">
          <title>Euro-Par Workshops</title>
          <num>2019</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid34" type="inproceedings" rend="year" n="cite:hori:hal-02193264">
      <identifiant type="hal" value="hal-02193264"/>
      <analytic>
        <title level="a">Is Japanese HPC another Galapagos? - Interim Report of MPI International Survey -</title>
        <author>
          <persName>
            <foreName>Atsushi</foreName>
            <surname>Hori</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>George</foreName>
            <surname>Bosilca</surname>
            <initial>G.</initial>
          </persName>
          <persName key="tadaam-2018-idp130608">
            <foreName>Emmanuel</foreName>
            <surname>Jeannot</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Takahiro</foreName>
            <surname>Ogura</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>Yutaka</foreName>
            <surname>Ishikawa</surname>
            <initial>Y.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="no" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Summer United Workshops on Parallel, Distributed and Cooperative Processing</title>
        <loc>Kitami, Japan</loc>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02193264" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02193264</ref>
        </imprint>
        <meeting id="cid626378">
          <title>Summer United Workshops on Parallel, Distributed and Cooperative Processing</title>
          <num>2019</num>
          <abbr type="sigle">SWoPP</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid1" type="inproceedings" rend="year" n="cite:leon:hal-02266285">
      <identifiant type="doi" value="10.1145/3357526.3357546"/>
      <identifiant type="hal" value="hal-02266285"/>
      <analytic>
        <title level="a">M&amp;MMs: Navigating Complex Memory Spaces with hwloc</title>
        <author>
          <persName>
            <foreName>Edgar A</foreName>
            <surname>León</surname>
            <initial>E. A.</initial>
          </persName>
          <persName key="tadaam-2018-idp127712">
            <foreName>Brice</foreName>
            <surname>Goglin</surname>
            <initial>B.</initial>
          </persName>
          <persName key="tadaam-2018-idp148608">
            <foreName>Andrès</foreName>
            <surname>Rubio Proaño</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Fifth International Symposium on Memory Systems Proceedings (MEMSYS19)</title>
        <loc>Washington, DC, United States</loc>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02266285" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02266285</ref>
        </imprint>
        <meeting id="cid625308">
          <title>ACM International Symposium on Memory Systems</title>
          <num>5</num>
          <abbr type="sigle">MEMSYS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid28" type="inproceedings" rend="year" n="cite:pallez:hal-02400746">
      <identifiant type="hal" value="hal-02400746"/>
      <analytic>
        <title level="a">Adjoint computation and Backpropagation</title>
        <author>
          <persName>
            <foreName>Guillaume</foreName>
            <surname>Pallez</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="yes" x-editorial-board="no">
        <title level="m">Meeting of the Royal Society – Numerical algorithms for high-performance computational science</title>
        <loc>London, United Kingdom</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02400746" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02400746</ref>
        </imprint>
        <meeting id="cid626379">
          <title>Scientific Meeting of the Royal Society</title>
          <num>2019</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid30" type="inproceedings" rend="year" n="cite:pellegrini:hal-02150857">
      <identifiant type="hal" value="hal-02150857"/>
      <analytic>
        <title level="a">Enjeux démocratiques de la protection des données à caractère personnel</title>
        <author>
          <persName key="tadaam-2018-idp135984">
            <foreName>François</foreName>
            <surname>Pellegrini</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="no" x-proceedings="no" x-invited-conference="yes" x-editorial-board="no">
        <title level="m">Journées scientifiques Inria</title>
        <loc>Lyon, France</loc>
        <imprint>
          <publisher>
            <orgName type="organisation">Inria</orgName>
          </publisher>
          <dateStruct>
            <month>June</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02150857" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02150857</ref>
        </imprint>
        <meeting id="cid625261">
          <title>Journées Scientifiques Inria</title>
          <num>2016</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid11" type="inproceedings" rend="year" n="cite:tseng:hal-02184204">
      <identifiant type="hal" value="hal-02184204"/>
      <analytic>
        <title level="a">Towards Portable Online Prediction of Network Utilization using MPI-level Monitoring</title>
        <author>
          <persName>
            <foreName>Shu-Mei</foreName>
            <surname>Tseng</surname>
            <initial>S.-M.</initial>
          </persName>
          <persName>
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>George</foreName>
            <surname>Bosilca</surname>
            <initial>G.</initial>
          </persName>
          <persName key="tadaam-2018-idp130608">
            <foreName>Emmanuel</foreName>
            <surname>Jeannot</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Aparna</foreName>
            <surname>Chandramowlishwaran</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Franck</foreName>
            <surname>Cappello</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">EuroPar'19: 25th International European Conference on Parallel and Distributed Systems</title>
        <loc>Goettingen, Germany</loc>
        <imprint>
          <dateStruct>
            <month>August</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02184204" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02184204</ref>
        </imprint>
        <meeting id="cid306382">
          <title>International Euro-Par Conference on Parallel Processing</title>
          <num>25</num>
          <abbr type="sigle">Euro-Par</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid32" type="techreport" rend="year" n="cite:aitaba:hal-02076473">
      <identifiant type="hal" value="hal-02076473"/>
      <monogr>
        <title level="m">Scheduling on Two Unbounded Resources with Communication Costs</title>
        <author>
          <persName>
            <foreName>Massinissa</foreName>
            <surname>Ait Aba</surname>
            <initial>M.</initial>
          </persName>
          <persName key="tadaam-2018-idp122784">
            <foreName>Guillaume</foreName>
            <surname>Aupy</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Alix</foreName>
            <surname>Munier-Kordon</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RR-9264</biblScope>
          <publisher>
            <orgName type="institution">Inria</orgName>
          </publisher>
          <dateStruct>
            <month>March</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02076473" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02076473</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Research Report</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid24" type="techreport" rend="year" n="cite:aupy:hal-02021070">
      <identifiant type="hal" value="hal-02021070"/>
      <monogr>
        <title level="m">Scheduling periodic I/O access with bi-colored chains: models and algorithms</title>
        <author>
          <persName key="tadaam-2018-idp122784">
            <foreName>Guillaume</foreName>
            <surname>Aupy</surname>
            <initial>G.</initial>
          </persName>
          <persName key="tadaam-2018-idp130608">
            <foreName>Emmanuel</foreName>
            <surname>Jeannot</surname>
            <initial>E.</initial>
          </persName>
          <persName key="tadaam-2018-idp153472">
            <foreName>Nicolas</foreName>
            <surname>Vidal</surname>
            <initial>N.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RR-9255</biblScope>
          <publisher>
            <orgName type="institution">Inria</orgName>
          </publisher>
          <dateStruct>
            <month>February</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">25</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-02021070" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02021070</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Research Report</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid22" type="techreport" rend="year" n="cite:beaumont:hal-02352969">
      <identifiant type="hal" value="hal-02352969"/>
      <monogr>
        <title level="m">Optimal checkpointing for heterogeneous chains: how to train deep neural networks with limited memory</title>
        <author>
          <persName key="realopt-2018-idp120992">
            <foreName>Olivier</foreName>
            <surname>Beaumont</surname>
            <initial>O.</initial>
          </persName>
          <persName key="realopt-2018-idp123904">
            <foreName>Lionel</foreName>
            <surname>Eyraud-Dubois</surname>
            <initial>L.</initial>
          </persName>
          <persName key="tadaam-2018-idp138848">
            <foreName>Julien</foreName>
            <surname>Herrmann</surname>
            <initial>J.</initial>
          </persName>
          <persName key="zenith-2018-idp122512">
            <foreName>Alexis</foreName>
            <surname>Joly</surname>
            <initial>A.</initial>
          </persName>
          <persName key="realopt-2018-idp166400">
            <foreName>Alena</foreName>
            <surname>Shilova</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RR-9302</biblScope>
          <publisher>
            <orgName type="institution">Inria Bordeaux Sud-Ouest</orgName>
          </publisher>
          <dateStruct>
            <month>November</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02352969" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02352969</ref>
        </imprint>
      </monogr>
      <note type="bnote">
        <ref xlink:href="https://arxiv.org/abs/1911.13214" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>arxiv.<allowbreak/>org/<allowbreak/>abs/<allowbreak/>1911.<allowbreak/>13214</ref>
      </note>
      <note type="typdoc">Research Report</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid21" type="techreport" rend="year" n="cite:beaumont:hal-02131552">
      <identifiant type="hal" value="hal-02131552"/>
      <monogr>
        <title level="m">Optimal Memory-aware Backpropagation of Deep Join Networks</title>
        <author>
          <persName key="realopt-2018-idp120992">
            <foreName>Olivier</foreName>
            <surname>Beaumont</surname>
            <initial>O.</initial>
          </persName>
          <persName key="tadaam-2018-idp138848">
            <foreName>Julien</foreName>
            <surname>Herrmann</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Guillaume</foreName>
            <surname>Pallez</surname>
            <initial>G.</initial>
          </persName>
          <persName key="realopt-2018-idp166400">
            <foreName>Alena</foreName>
            <surname>Shilova</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RR-9273</biblScope>
          <publisher>
            <orgName type="institution">Inria</orgName>
          </publisher>
          <dateStruct>
            <month>May</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02131552" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02131552</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Research Report</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid9" type="techreport" rend="year" n="cite:gainaru:hal-02328013">
      <identifiant type="hal" value="hal-02328013"/>
      <monogr>
        <title level="m">Reservation and Checkpointing Strategies for Stochastic Jobs (Extended Version)</title>
        <author>
          <persName>
            <foreName>Ana</foreName>
            <surname>Gainaru</surname>
            <initial>A.</initial>
          </persName>
          <persName key="tadaam-2018-idp127712">
            <foreName>Brice</foreName>
            <surname>Goglin</surname>
            <initial>B.</initial>
          </persName>
          <persName key="tadaam-2018-idp143744">
            <foreName>Valentin</foreName>
            <surname>Honoré</surname>
            <initial>V.</initial>
          </persName>
          <persName>
            <foreName>Guillaume</foreName>
            <surname>Pallez</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Padma</foreName>
            <surname>Raghavan</surname>
            <initial>P.</initial>
          </persName>
          <persName key="roma-2018-idp136784">
            <foreName>Yves</foreName>
            <surname>Robert</surname>
            <initial>Y.</initial>
          </persName>
          <persName>
            <foreName>Hongyang</foreName>
            <surname>Sun</surname>
            <initial>H.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RR-9294</biblScope>
          <publisher>
            <orgName type="institution">Inria &amp; Labri, Univ. Bordeaux ; Department of EECS, Vanderbilt University, Nashville, TN, USA ; Laboratoire LIP, ENS Lyon &amp; University of Tennessee Knoxville, Lyon, France</orgName>
          </publisher>
          <dateStruct>
            <month>October</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02328013" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02328013</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Research Report</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid12" type="techreport" rend="year" n="cite:jeannot:hal-02304515">
      <identifiant type="hal" value="hal-02304515"/>
      <monogr>
        <title level="m">Improving MPI Application Communication Time with an Introspection Monitoring Library</title>
        <author>
          <persName key="tadaam-2018-idp130608">
            <foreName>Emmanuel</foreName>
            <surname>Jeannot</surname>
            <initial>E.</initial>
          </persName>
          <persName key="tadaam-2018-idp165776">
            <foreName>Richard</foreName>
            <surname>Sartori</surname>
            <initial>R.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RR-9292</biblScope>
          <publisher>
            <orgName type="institution">Inria</orgName>
          </publisher>
          <dateStruct>
            <month>October</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">23</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-02304515" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02304515</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Research Report</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid18" type="unpublished" rend="year" n="cite:aupy:hal-02080706">
      <identifiant type="hal" value="hal-02080706"/>
      <monogr>
        <title level="m">H-Revolve: A Framework for Adjoint Computation on Synchrone Hierarchical Platforms</title>
        <author>
          <persName key="tadaam-2018-idp122784">
            <foreName>Guillaume</foreName>
            <surname>Aupy</surname>
            <initial>G.</initial>
          </persName>
          <persName key="tadaam-2018-idp138848">
            <foreName>Julien</foreName>
            <surname>Herrmann</surname>
            <initial>J.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>March</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02080706" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02080706</ref>
        </imprint>
      </monogr>
      <note type="bnote">working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid38" type="misc" rend="year" n="cite:beckman:hal-02403058">
      <identifiant type="doi" value="10.1109/IPDPSW.2019.00150"/>
      <identifiant type="hal" value="hal-02403058"/>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no">
        <title level="m">Introduction to RADR 2019</title>
        <author>
          <persName>
            <foreName>Pete</foreName>
            <surname>Beckman</surname>
            <initial>P.</initial>
          </persName>
          <persName key="tadaam-2018-idp130608">
            <foreName>Emmanuel</foreName>
            <surname>Jeannot</surname>
            <initial>E.</initial>
          </persName>
          <persName>
            <foreName>Swann</foreName>
            <surname>Perarnau</surname>
            <initial>S.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>IEEE</orgName>
          </publisher>
          <dateStruct>
            <month>May</month>
            <year>2019</year>
          </dateStruct>
          <biblScope type="pages">908-910</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-02403058" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02403058</ref>
        </imprint>
      </monogr>
      <note type="howpublished">IPDPSW 2019 - IEEE International Parallel and Distributed Processing Symposium Workshops</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid27" type="unpublished" rend="year" n="cite:lucabez:hal-01994677">
      <identifiant type="hal" value="hal-01994677"/>
      <monogr>
        <title level="m">Adaptive Request Scheduling for the I/O Forwarding Layer using Reinforcement Learning</title>
        <author>
          <persName>
            <foreName>Jean</foreName>
            <surname>Luca Bez</surname>
            <initial>J.</initial>
          </persName>
          <persName key="datamove-2018-idp139024">
            <foreName>Francieli</foreName>
            <surname>Zanon Boito</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Ramon</foreName>
            <surname>Nou</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Alberto</foreName>
            <surname>Miranda</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Toni</foreName>
            <surname>Cortes</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>Philippe O.A.</foreName>
            <surname>Navaux</surname>
            <initial>P. O.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01994677" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01994677</ref>
        </imprint>
      </monogr>
      <note type="bnote">working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid29" type="misc" rend="year" n="cite:pallez:hal-02342636">
      <identifiant type="hal" value="hal-02342636"/>
      <monogr x-scientific-popularization="yes">
        <title level="m">Le non-sens écologique des voitures autonomes</title>
        <author>
          <persName>
            <foreName>Guillaume</foreName>
            <surname>Pallez</surname>
            <initial>G.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02342636" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02342636</ref>
        </imprint>
      </monogr>
      <note type="bnote">Dans cet article de vulgarisation, je discute si l'avénement promis des véhicules autonomes serait ou non réelement un moyen de réduire la pollution (notamment dans les villes) (plutôt pas)</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid15" type="misc" rend="year" n="cite:reynier:hal-02407276">
      <identifiant type="hal" value="hal-02407276"/>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" x-proceedings="no" x-invited-conference="no">
        <title level="m">Task based progression of asynchronous communications</title>
        <author>
          <persName key="tadaam-2019-idp152992">
            <foreName>Florian</foreName>
            <surname>Reynier</surname>
            <initial>F.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>June</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02407276" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02407276</ref>
        </imprint>
      </monogr>
      <note type="howpublished">COMPAS 2019 - Conférence d'informatique en Parallélisme, Architecture et Système</note>
      <note type="bnote">Poster</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid14" type="mastersthesis" rend="year" n="cite:swartvagher:hal-02303822">
      <identifiant type="hal" value="hal-02303822"/>
      <monogr x-international-audience="yes">
        <title level="m">Opérations collectives dynamiques dans StarPU / NewMadeleine</title>
        <author>
          <persName key="tadaam-2019-idp155392">
            <foreName>Philippe</foreName>
            <surname>Swartvagher</surname>
            <initial>P.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">ENSEIRB-MATMECA</orgName>
          </publisher>
          <dateStruct>
            <month>September</month>
            <year>2019</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-02303822" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-02303822</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Masters thesis</note>
    </biblStruct>
    
    <biblStruct id="tadaam-2019-bid4" type="article" rend="foot" n="footcite:ilic2014cache">
      <analytic>
        <title level="a">Cache-aware Roofline model: Upgrading the loft</title>
        <author>
          <persName>
            <foreName>Aleksandar</foreName>
            <surname>Ilic</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Frederico</foreName>
            <surname>Pratas</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Leonel</foreName>
            <surname>Sousa</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">IEEE Computer Architecture Letters</title>
        <imprint>
          <biblScope type="volume">13</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <year>2014</year>
          </dateStruct>
          <biblScope type="pages">21–24</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
  </biblio>
</raweb>
