<?xml version="1.0" encoding="utf-8"?>
<raweb xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" year="2016">
  <identification id="kerdata" isproject="true">
    <shortname>KERDATA</shortname>
    <projectName>Scalable Storage for Clouds and Beyond</projectName>
    <theme-de-recherche>Distributed and High Performance Computing</theme-de-recherche>
    <domaine-de-recherche>Networks, Systems and Services, Distributed Computing</domaine-de-recherche>
    <urlTeam>https://team.inria.fr/kerdata/</urlTeam>
    <structure_exterieure type="Labs">
      <libelle>Institut de recherche en informatique et systèmes aléatoires (IRISA)</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Institut national des sciences appliquées de Rennes</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Université Rennes 1</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>École normale supérieure de Rennes</libelle>
    </structure_exterieure>
    <header_dates_team>Creation of the Team: 2009 July 01, updated into Project-Team: 2012 July 01</header_dates_team>
    <LeTypeProjet>Project-Team</LeTypeProjet>
    <keywordsSdN>
      <term>1.1.4. - High performance computing</term>
      <term>1.1.5. - Exascale</term>
      <term>1.1.6. - Cloud</term>
      <term>1.3. - Distributed Systems</term>
      <term>1.6. - Green Computing</term>
      <term>2.6.2. - Middleware</term>
      <term>3.1.3. - Distributed data</term>
      <term>3.1.8. - Big data (production, storage, transfer)</term>
      <term>3.3.3. - Big data analysis</term>
      <term>6.2.7. - High performance computing</term>
      <term>7.1. - Parallel and distributed algorithms</term>
    </keywordsSdN>
    <keywordsSecteurs>
      <term>1.1.2. - Molecular biology</term>
      <term>2.6.1. - Brain imaging</term>
      <term>3.2. - Climate and meteorology</term>
      <term>4.5.1. - Green computing</term>
      <term>9.4.5. - Data science</term>
    </keywordsSecteurs>
    <UR name="Rennes"/>
  </identification>
  <team id="uid1">
    <person key="kerdata-2014-idp62712">
      <firstname>Gabriel</firstname>
      <lastname>Antoniu</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Team leader, Inria, Senior Researcher</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="kerdata-2014-idp64192">
      <firstname>Shadi</firstname>
      <lastname>Ibrahim</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
    </person>
    <person key="kerdata-2014-idp65432">
      <firstname>Luc</firstname>
      <lastname>Bougé</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>ENS Rennes, Professor</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="kerdata-2014-idp66888">
      <firstname>Alexandru</firstname>
      <lastname>Costan</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>INSA Rennes, Associate Professor</moreinfo>
    </person>
    <person key="kerdata-2016-idp127744">
      <firstname>Hadi</firstname>
      <lastname>Salimi</lastname>
      <categoryPro>Technique</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria, from April 2016</moreinfo>
    </person>
    <person key="kerdata-2015-idp118848">
      <firstname>Nathanaël</firstname>
      <lastname>Cheriere</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>ENS Rennes, from
September 2016</moreinfo>
    </person>
    <person key="kerdata-2016-idp132688">
      <firstname>Paul</firstname>
      <lastname>Le Noac'h</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>INSA Rennes, from November
2016</moreinfo>
    </person>
    <person key="kerdata-2015-idp113776">
      <firstname>Ovidiu-Cristian</firstname>
      <lastname>Marcu</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="kerdata-2014-idp82720">
      <firstname>Pierre</firstname>
      <lastname>Matri</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria and Universidad Politécnica de
Madrid</moreinfo>
    </person>
    <person key="kerdata-2016-idp140112">
      <firstname>Tien-Dat</firstname>
      <lastname>Phan</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Univ. Rennes I</moreinfo>
    </person>
    <person key="kerdata-2014-idp77520">
      <firstname>Luis Eduardo</firstname>
      <lastname>Pineda Morales</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="kerdata-2014-idp78800">
      <firstname>Lokman</firstname>
      <lastname>Rahmani</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Univ. Rennes I</moreinfo>
    </person>
    <person key="kerdata-2015-idp115016">
      <firstname>Mohammed-Yacine</firstname>
      <lastname>Taleb</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="kerdata-2014-idp81448">
      <firstname>Orçun</firstname>
      <lastname>Yildiz</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="kerdata-2016-idp152368">
      <firstname>Chi</firstname>
      <lastname>Zhou</lastname>
      <categoryPro>PostDoc</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria, from May
2016</moreinfo>
    </person>
    <person key="linkmedia-2014-idp95392">
      <firstname>Aurélie</firstname>
      <lastname>Patier</lastname>
      <categoryPro>Assistant</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Univ. Rennes I</moreinfo>
    </person>
    <person key="kerdata-2016-idp157376">
      <firstname>Muhammad Najeeb</firstname>
      <lastname>Aslam</lastname>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Univ. Rennes I, Master
intern, from May 2016 until August 2016</moreinfo>
    </person>
    <person key="kerdata-2016-idp159952">
      <firstname>Rémi</firstname>
      <lastname>Hutin</lastname>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria, Undergraduate intern, from
May 2016 until July 2016</moreinfo>
    </person>
  </team>
  <presentation id="uid2">
    <bodyTitle>Overall Objectives</bodyTitle>
    <subsection id="uid3" level="1">
      <bodyTitle>Context: the need for scalable data management</bodyTitle>
      <p>We are witnessing a rapidly increasing number of application areas
generating and processing very large volumes of data on a regular
basis. Such applications are called
<i>data-intensive</i>. Governmental and commercial statistics,
climate modeling, cosmology, genetics, bio-informatics, high-energy
physics are just a few examples in the scientific area. In
addition, rapidly growing amounts of data from social networks and
commercial applications are now routinely processed.</p>
      <p>In all these examples, the overall application performance is highly
dependent on the properties of the underlying data management
service. It becomes crucial to store and manipulate massive data
efficiently. However, these data are typically <i>shared</i> at a
large scale and <i>concurrently accessed</i> at a high degree. With
the emergence of recent infrastructures such as cloud computing
platforms and post-Petascale high-performance computing (HPC)
systems, achieving highly scalable data management under such
conditions has become a major challenge.</p>
      <subsection id="uid4" level="2">
        <bodyTitle>Our objective</bodyTitle>
        <p>The KerData project-team is namely focusing on designing innovative
architectures and systems for <i>scalable data storage and
processing</i>. We target two types of infrastructures: <i>clouds</i>
and <i>post-Petascale high-performance supercomputers</i>, according to
the current needs and requirements of data-intensive applications.</p>
        <p>We are especially concerned by the applications of major
international and industrial players in cloud computing and
extreme-scale high-performance computing (HPC), which shape the
long-term agenda of the cloud
computing  <ref xlink:href="#kerdata-2016-bid0" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, <ref xlink:href="#kerdata-2016-bid1" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> and
Exascale HPC  <ref xlink:href="#kerdata-2016-bid2" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> research communities. The Big Data
area, which has recently captured a lot of attention, emphasized the
challenges related to Volume, Velocity and Variety. This is yet
another element of context that further highlights the primary
importance of designing data management systems that are efficient
at a very large scale.</p>
        <subsection id="uid5" level="3">
          <bodyTitle>Alignment with Inria's scientific strategy</bodyTitle>
          <p>Data-intensive applications exhibit several common requirements with
respect to the need for data storage and I/O processing. We focus on
some core challenges related to data management, resulted from these
requirements. Our choice is perfectly in line with Inria's strategic
plan  <ref xlink:href="#kerdata-2016-bid3" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, which acknowledges as critical the
challenges of <i>storing, exchanging, organizing, utilizing,
handling and analyzing</i> the huge volumes of data generated by an
increasing number of sources. This topic is also stated as a
scientific priority of Inria's research centre of
Rennes  <ref xlink:href="#kerdata-2016-bid4" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>: <i>Storage and utilization of
distributed big data</i>.</p>
        </subsection>
        <subsection id="uid6" level="3">
          <bodyTitle>Challenges and goals related to cloud data storage and
processing</bodyTitle>
          <p>In the area of cloud data processing, a significant milestone is the
emergence of the Map-Reduce  <ref xlink:href="#kerdata-2016-bid5" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> parallel
programming paradigm. It is currently used on most cloud platforms,
following the trend set up by Amazon  <ref xlink:href="#kerdata-2016-bid6" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. At
the core of Map-Reduce frameworks lies the storage system, a key
component which must meet a series of specific requirements that are
not fully met yet by existing solutions: the ability to provide
efficient <i>fine-grain access</i> to the files, while sustaining a
<i>high throughput</i> in spite of <i>heavy access concurrency</i>;
the need to provide a high resilience to <i>failures</i>; the need
to take <i>energy-efficiency</i> issues into account.</p>
          <p>More recently, it becomes clear that data-intensive processing needs
to go beyond the frontiers of single datacenters. In this perspective,
extra challenges arise, related to the efficiency of metadata
management. This efficiency has a major impact on the access to very
large sets of small objects by Big Data processing workflows running
on large-scale infrastructures.</p>
        </subsection>
        <subsection id="uid7" level="3">
          <bodyTitle>Challenges and goals related to data-intensive HPC
applications</bodyTitle>
          <p>Key research fields such as climate modeling, solid Earth sciences
or astrophysics rely on very large-scale simulations running on
post-Petascale supercomputers. Such applications exhibit
requirements clearly identified by international panels of experts
like IESP  <ref xlink:href="#kerdata-2016-bid7" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, EESI  <ref xlink:href="#kerdata-2016-bid8" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>,
ETP4HPC  <ref xlink:href="#kerdata-2016-bid2" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. A jump of one order of magnitude in the
size of numerical simulations is required to address some of the
fundamental questions in several communities in this context. In
particular, the lack of data-intensive infrastructures and
methodologies to analyze the huge results of such simulations is a
major limiting factor.</p>
          <p>The challenge we have been addressing is to find new ways to store,
visualize and analyze massive outputs of data during and after the
simulations. Our main initial goal was to do it without impacting
the overall performance, avoiding the <i>jitter</i> generated by I/O
interference as much as possible. Recently, we started to focus
specifically on <i>in situ processing</i> approaches and we explored
approaches to <i>model and predict I/O phase occurrences</i> and to
<i>reduce intra-application and cross-application I/O
interference</i>.</p>
        </subsection>
      </subsection>
      <subsection id="uid8" level="2">
        <bodyTitle>Our approach</bodyTitle>
        <p>KerData's global approach consists in studying, designing,
implementing and evaluating distributed algorithms and software
architectures for scalable data storage and I/O management for
efficient, large-scale data processing. We target two main execution
infrastructures: cloud platforms and post-Petascale HPC
supercomputers.</p>
        <subsection id="uid9" level="3">
          <bodyTitle>Platforms and Methodology</bodyTitle>
          <p>The highly experimental nature of our research validation
methodology should be emphasized. To validate our proposed
algorithms and architectures, we build software prototypes, then
validate them at a large scale on real testbeds and experimental
platforms.</p>
          <p>We strongly rely on the Grid'5000 platform. Moreover, thanks to our
projects and partnerships, we have access to reference software and
physical infrastructures. In the cloud area, we use the Microsoft
Azure and Amazon cloud platforms. In the post-Petascale HPC area, we
are running our experiments on systems including some top-ranked
supercomputers, such as Titan, Jaguar, Kraken or Blue Waters. This
provides us with excellent opportunities to validate our results on
advanced realistic platforms.</p>
        </subsection>
        <subsection id="uid10" level="3">
          <bodyTitle>Collaboration strategy</bodyTitle>
          <p>Our collaboration portfolio includes international teams that are
active in the areas of data management for clouds and HPC systems,
both in Academia and Industry.</p>
          <p>Our academic collaborating partners include Argonne National Lab,
University of Illinois at Urbana-Champaign, Universidad Politécnica
de Madrid, Barcelona Supercomputing Center, University Politehnica
of Bucharest. In industry, we are mainly collaborating with
Microsoft and IBM.</p>
          <p>Moreover, the consortiums of our collaborative projects include
application partners in the areas of Bio-Chemistry (e.g., IBCP Lyon
in the MapReduce ANR project), Neurology and Genetics (e.g., the
Parietal team at Inria, the NeuroSpin centre in Saclay within the
A-Brain Microsoft Research-Inria project ), and Climate Simulations
(e.g., the Department of Earth and Atmospheric Sciences of the
University of Michigan, within our collaboration inside
JLESC  <ref xlink:href="#kerdata-2016-bid9" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>). This is an additional asset, which enables
us to take into account application requirements in the early design
phase of our solutions, and to validate those solutions with real
applications... and real users!</p>
        </subsection>
      </subsection>
    </subsection>
  </presentation>
  <fondements id="uid11">
    <bodyTitle>Research Program</bodyTitle>
    <subsection id="uid12" level="1">
      <bodyTitle>Research axis 1: Convergence of Extreme-Scale Computing and Big
Data Infrastructures</bodyTitle>
      <p>The tools and cultures of High Performance Computing and Big Data
Analytics have evolved in divergent ways. This is to the detriment
of both. However, big computations still generate and are needed to
analyze Big Data. As scientific research increasingly depends on
both high-speed computing and data analytics, the potential
interoperability and scaling convergence of these two eco-systems is
crucial to the future. Our objective for the next years is premised on
the idea that we must begin to systematically map out and account
for the ways in which the major issues associated with Big Data
intersect with, impinge upon, and potentially change the plans that
are now being laid for achieving Exascale computing.</p>
      <subsection id="uid13" level="2">
        <bodyTitle>High-performance storage for concurrent Big Data
applications</bodyTitle>
        <p>We argue that storage is a plausible pathway to convergence. In
this context, we plan to focus on the needs of concurrent Big Data
applications that require high-performance storage, as well as
transaction support. Although blobs (binary large objects) are an
increasingly popular storage model for such applications,
state-of-the-art blob storage systems offer no transaction
semantics. This demands users to coordinate data access carefully in
order to avoid race conditions, inconsistent writes, overwrites and
other problems that cause erratic behavior.</p>
        <p>We argue there is a gap between existing storage solutions and
application requirements, which limits the design of
transaction-oriented applications. In this context, one idea on
which we plan to focus our efforts is exploring how blob storage
systems could provide built-in, multi-blob transactions, while
retaining sequential consistency and high throughput under heavy
access concurrency.</p>
        <p>The early principles of this research direction have already raised
interest from our partners at ANL (Rob Ross) and UPM (María Pérez)
for potential collaborations. In this direction, the acceptance of
our paper on the Týr transactional blob storage system as a Best
Student Paper Award Finalist at the SC16
conference <ref xlink:href="#kerdata-2016-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> is a very encouraging step.</p>
      </subsection>
      <subsection id="uid14" level="2">
        <bodyTitle>Big Data analytics on Exascale HPC machines</bodyTitle>
        <p>Big Data analytics is another interesting direction that we plan to
explore, building on top of these converged storage
architectures. Specifically, we will examine the ways in which
Exascale infrastructures can be leveraged not only by HPC-centric,
but also by scientific, cloud-centric applications. Many of the
current state-of-the-art Big Data processing approaches, including
Hadoop and Spark  <ref xlink:href="#kerdata-2016-bid11" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> are optimized to run on commodity
machines. This impacts the mechanisms used to deal with failures and
the limited network bandwidth.</p>
        <p>A blind adoption of these systems on extreme-scale platforms would
result in high overheads. It would therefore prevent users from
fully benefiting from the high performance infrastructure. The
objective that we set here is to explore design and implementation
options for new data analytics systems that can exploit the features
of extreme-scale HPC machines: multi-core nodes, multiple memory and
storage technologies including a large memory, NVRAM, SSDs, etc.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid15">
            <p noindent="true">
              <i>
This axis is addressed in close collaboration with
<ref xlink:href="https://www.datsi.fi.upm.es/~mperez/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">María Pérez</ref> (UPM),
<ref xlink:href="http://www.mcs.anl.gov/person/rob-ross/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Rob Ross</ref> (ANL),
<ref xlink:href="http://people.ac.upc.es/toni/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Toni Cortes</ref> (BSC),
<ref xlink:href="http://www.bnicolae.net/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Bogdan Nicolae</ref> (formerly at IBM Research,
now at Huawei Research).</i>
            </p>
            <p>
              <i>Relevant groups with similar interests are the following ones.</i>
            </p>
            <simplelist>
              <li id="uid16">
                <p noindent="true">
                  <i>The group of
<ref xlink:href="http://www.netlib.org/utk/people/JackDongarra/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Jack
Dongarra</ref>, Innovative Computing Laboratory at University of
Tennessee/Oak Ridge National Laboratory, working on joint tools
Exascale Computing and Big Data.</i>
                </p>
              </li>
              <li id="uid17">
                <p noindent="true">
                  <i>The group of
<ref xlink:href="http://matsu-www.is.titech.ac.jp/~matsu/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Satoshi Matsuoka</ref>,
Tokyo Institute of Technology, working on system software for
Clouds and HPC.</i>
                </p>
              </li>
              <li id="uid18">
                <p noindent="true">
                  <i>The group of
<ref xlink:href="http://www.mcs.anl.gov/person/franck-cappello/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Franck
Cappello</ref> at Argonne National Laboratory/NCSA working on
on-demand data analytics and storage for extreme-scale simulations
and experiments.</i>
                </p>
              </li>
            </simplelist>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
    <subsection id="uid19" level="1">
      <bodyTitle>Research axis 2: Advanced data processing on Clouds</bodyTitle>
      <p>The recent evolutions in the area of Big Data processing have
pointed out some limitations of the initial Map-Reduce model. It is
well suited for batch data processing, but less suited for real-time
processing of dynamic data streams. New types of data-intensive
applications emerge, e.g., for enterprises who need to perform
analysis on their stream data in ways that can give fast results
(i.e., in real time) at scale (e.g., click-stream analysis and
network-monitoring log analysis). Similarly, scientists require
fast and accurate data processing techniques in order to analyze
their experimental data correctly at scale (e.g., collectively
analysis of large data sets distributed in multiple geographically
distributed locations).</p>
      <p>Our plan is to revisit current data management
techniques to cope with the volatile requirements of data-intensive
applications on large-scale dynamic clouds in a cost-efficient way.</p>
      <subsection id="uid20" level="2">
        <bodyTitle>Stream-oriented, Big Data processing on clouds</bodyTitle>
        <p>The state-of-the-art Hadoop Map-Reduce framework cannot deal with
stream data applications, as it requires the data to be initially
stored in a distributed file system in order to process them. To
better cope with the above-mentioned requirements, several systems
have been introduced for stream data processing such as
Flink  <ref xlink:href="#kerdata-2016-bid12" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, Spark  <ref xlink:href="#kerdata-2016-bid11" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>,
Storm  <ref xlink:href="#kerdata-2016-bid13" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, and Google
MillWheel  <ref xlink:href="#kerdata-2016-bid14" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. These systems keep computation
in memory to decrease latency, and preserve scalability by using
data-partitioning or dividing the streams into a set of
deterministic batch computations.</p>
        <p>However, they are designed to work in dedicated environments and
they do not consider the performance variability (i.e., network,
I/O, etc.) caused by resource contention in the cloud. This
variability may in turn cause high and unpredictable latency when
output streams are transmitted to further analysis. Moreover, they
overlook the dynamic nature of data streams and the volatility in
their computation requirements. Finally, they still address failures
in a best-effort manner.</p>
        <p>Our objective is to investigate new approaches for reliable, stream
Big Data processing on clouds. We will explore new mechanisms that
expose resource heterogeneity (observed variability in resource
utilization at runtime) when scheduling stream data applications. We
will also investigate how to adapt to node failures automatically,
and to adapt the failure handling techniques to the characteristics of
the running application and to the root cause of failures.</p>
      </subsection>
      <subsection id="uid21" level="2">
        <bodyTitle>Geographically distributed workflows on multi-site
clouds</bodyTitle>
        <p>Many data processing jobs in data-intensive applications are modeled
as workflows (i.e., as sets of tasks linked according to their data
and computation dependencies) to facilitate the management and
analysis of large volumes of data. With the fast growth of volumes
of data to be handled at larger and larger scales, geographically
distributed workflows are emerging as a natural data processing
paradigm. This may bring several benefits: resilience to failures,
distribution across partitions (e.g., moving computation close to
data or vice versa), elastic scaling to support usage bursts, user
proximity, etc.</p>
        <p>In this context, sharing, disseminating and analyzing the data sets
results in frequent large-scale data movements across widely
distributed sites. Studies show that the inter-datacenter traffic is
expected to triple in the following years. Our objective is to
investigate approaches to data management enabling an efficient
execution of such geographically distributed workflows running on
multi-site clouds.</p>
        <p>While in the past years we have addressed some data management
issues in this area, mainly in support to efficient task scheduling
of scientific workflows running on multisite clouds, we will now
focus on an increasingly common scenario where workflows generate
and process a huge number of small files, which is particularly
challenging. As such workloads generate a deluge of small and
independent I/O operations, efficient data and metadata handling is
critical. We will explore specific means to better hide latency for
data and metadata access in such scenarios, as a way to improve
global performance.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid22">
            <p noindent="true">
              <i>
This axis is addressed in close collaboration with
<ref xlink:href="https://www.datsi.fi.upm.es/~mperez/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">María Pérez</ref> (UPM),
<ref xlink:href="http://www.mcs.anl.gov/person/kate-keahey" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Kate Keahey</ref>
(ANL) and <ref xlink:href="http://people.ac.upc.es/toni/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Toni Cortes</ref>
(BSC).</i>
            </p>
            <p>
              <i>Relevant groups with similar interests include the following
ones.</i>
            </p>
            <simplelist>
              <li id="uid23">
                <p noindent="true">
                  <i>The <ref xlink:href="https://amplab.cs.berkeley.edu/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">AMPLab</ref>, UC
Berkeley, USA, working on scheduling stream data applications
in heterogeneous clouds.</i>
                </p>
              </li>
              <li id="uid24">
                <p noindent="true">
                  <i>The group of <ref xlink:href="https://deelman.isi.edu/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Ewa Deelman</ref>,
USC Information Sciences Institute, working on resource
management for workflows in Clouds.</i>
                </p>
              </li>
              <li id="uid25">
                <p noindent="true">
                  <i>The <ref xlink:href="http://pdcc.ntu.edu.sg/xtra/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">XTRA</ref> group,
Nanyang Technological University, Singapore, working on
resource provisioning for workflows in the cloud.</i>
                </p>
              </li>
            </simplelist>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
    <subsection id="uid26" level="1">
      <bodyTitle>Research axis 3: I/O management, in situ visualization and
analysis on HPC systems at extreme scales</bodyTitle>
      <p>Over the past few years, the increasing amounts of data produced
by large-scale simulations have motivated a shift from traditional
offline data analysis to in situ analysis and visualization. In
situ processing started by coupling a parallel simulation with an
analysis or visualization library, to avoid the cost of writing
data on storage and reading it back. Going beyond this simple
pairwise tight coupling, complex analysis workflows today are
graphs with one or more data sources and several interconnected
analysis components.</p>
      <subsection id="uid27" level="2">
        <bodyTitle>Toward a joint optimized architecture for in situ
visualization and advanced processing</bodyTitle>
        <p>From Inria and ANL, four tools at least have emerged to address
some challenges of coupling simulations with visualization
packages or analysis workflows. Each of them focused on some
particular aspect:</p>
        <descriptionlist>
          <label>Damaris</label>
          <li id="uid28">
            <p noindent="true">(Inria, <ref xlink:href="#kerdata-2016-bid15" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>,
<ref xlink:href="#kerdata-2016-bid16" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) exploits dedicated cores to
enable jitter-free I/O and in situ visualization;</p>
          </li>
          <label>Decaf</label>
          <li id="uid29">
            <p noindent="true">(ANL, <ref xlink:href="#kerdata-2016-bid17" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) implements a coupling service
for workflows;</p>
          </li>
          <label>FlowVR</label>
          <li id="uid30">
            <p noindent="true">(Inria, <ref xlink:href="#kerdata-2016-bid18" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) connects workflow
components for in situ processing;</p>
          </li>
          <label>Swift</label>
          <li id="uid31">
            <p noindent="true">(ANL, <ref xlink:href="#kerdata-2016-bid19" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) focuses on implicitly
parallel data flows and was optimized for Big Data processing.</p>
          </li>
        </descriptionlist>
        <p>Our plan is to explore how these tools could best leverage their
respective strengths in a <i>joint optimized architecture for
in situ visualization and advanced processing</i> in the HPC
area. We published a preliminary study describing the lessons
learned from using these tools in production environments with
real applications <ref xlink:href="#kerdata-2016-bid20" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. Such a joint
architecture will contribute to address the data volume and
velocity challenges raised by data-intensive workflows, including
complex data-intensive analytics phases. It may also impact, in a
subsequent step, future data analysis pipelines for converged Big
Data and HPC architectures.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid32">
            <p noindent="true">
              <i>
This axis is worked out in close collaboration with Rob Ross
(ANL), <ref xlink:href="http://www.mcs.anl.gov/~tpeterka/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Tom Peterka</ref>
(ANL),
<ref xlink:href="http://www.mcs.anl.gov/person/matthieu-dorier" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Matthieu
Dorier</ref> (ANL), <ref xlink:href="http://people.ac.upc.es/toni/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Toni Cortes</ref>
(BSC), <ref xlink:href="http://moais.imag.fr/membres/bruno.raffin/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Bruno
Raffin</ref> (Inria). Some additional collaborations are in
discussion with other members of
<ref xlink:href="https://jlesc.github.io/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">JLESC</ref>, and with CEA and Total.</i>
            </p>
            <p>
              <i>Relevant groups with similar interests include the following ones.</i>
            </p>
            <simplelist>
              <li id="uid33">
                <p noindent="true">
                  <i>The group of <ref xlink:href="http://parashar.rutgers.edu/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Manish
Parashar</ref> at Rutgers University, USA (I/O management for HPC
systems, in situ processing).</i>
                </p>
              </li>
              <li id="uid34">
                <p noindent="true">
                  <i>The group of
<ref xlink:href="https://www.eecs.utk.edu/people/faculty/klasky/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Scott
Klasky</ref> at Oak Ridge National Lab, USA (I/O management for HPC
systems, in situ processing).</i>
                </p>
              </li>
              <li id="uid35">
                <p noindent="true">
                  <i>The <ref xlink:href="http://www.lsce.ipsl.fr/en/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">CNRS IPSL laboratory</ref>
(Sébastien Denvil, Pôle de modélisation du climat) in Paris,
France (in situ data analytics).</i>
                </p>
              </li>
            </simplelist>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
  </fondements>
  <domaine id="uid36">
    <bodyTitle>Application Domains</bodyTitle>
    <subsection id="uid37" level="1">
      <bodyTitle>Application Domains</bodyTitle>
      <p>Our research work aims to improve large-scale, data-intensive
applications running on clouds and extreme-scale HPC systems, with
high requirements in terms of data storage and processing. Here are
some classes of such applications.</p>
      <descriptionlist>
        <label>Extreme-scale, data-intensive science simulations.</label>
        <li id="uid38">
          <p noindent="true">A major
research topic in the context of HPC simulations running on
extreme-scale supercomputers is to explore how to record and
visualize data during the simulation efficiently, without impacting
the performance of the computation generating that data. In this
area. We explore innovative approaches to I/O management and to in
situ processing, in particular through our Damaris approach.</p>
        </li>
        <label>Map-Reduce-based data analytics.</label>
        <li id="uid39">
          <p noindent="true">As Map-Reduce emerged as a
dominant programming model for data analytics, we focus on several
related challenges: how to enable fast failure recovery in shared
Hadoop clusters; how to improve scheduling policies to favor
resource allocation fairness; how to improve performance by
detecting and mitigating stragglers.</p>
        </li>
        <label>Geographically-distributed cloud workflows.</label>
        <li id="uid40">
          <p noindent="true">With
fast-growing volumes of data to be handled at larger and larger
scales, geographically distributed workflows are emerging as a
natural data processing paradigm. They actually bring several benefits:
resilience to failures, distribution across partitions, elastic
scaling, user proximity etc. In this context, we investigate
approaches to data management enabling an efficient execution of
such geographically distributed workflows running on multi-site
clouds. In projects like <i>ANR OverFlow</i> and
<i>Z-CloudFlow</i> we explore means to better hide latency for
data and metadata access and optimize transfers as a way of
improving the global performance.</p>
        </li>
        <label>Stream data processing.</label>
        <li id="uid41">
          <p noindent="true">The evolutions in the area of Big
Data processing, the development of cloud computing and the
success of the Map-Reduce model have fostered new types of
data-intensive applications, in which obtaining fast and timely
results is mandatory. Enterprises need to perform analysis on their
stream data that can give fast results (i.e., in real time) at
scale (e.g., click-stream analysis and network-monitoring log
analysis). Similarly, scientists require fast and accurate data
processing techniques in order to analyze their experimental data
correctly at scale (e.g., analysis of data produced by
massive-scale simulations and sensor deployments).</p>
          <p>Besides processing, we are also focusing on efficient stream data
storage. Unlike traditional storage, the main challenge of storing
stream data is the large number of small items (arriving at rates
easily reaching tens of millions per second). We
explore the plausible paths towards a dedicated storage solution.
We aim to provide on the one hand traditional storage functionality,
and on the other hand stream-like performance (i.e., low-latency
I/O access to items and ranges of items).</p>
        </li>
      </descriptionlist>
      <p>The team's projects and collaborations explicitly target concrete
use cases belonging to the above application classes, in the
following areas.</p>
      <descriptionlist>
        <label>Smart Cities and Territories.</label>
        <li id="uid42">
          <p noindent="true">In the framework on the
<i>BigStorage project</i> where the KerData team is a major
partner, we are focusing on several stream data applications in
the context of Smart cities. The goal is to optimize current
state-of-the-art processing engines to provide real-time analyzing
of data collected from small sensors and devices.
This will enable to make smart
decisions in fields like healthcare, traffic management, water
quality, air pollution and many more.</p>
        </li>
        <label>Climate and meteorology.</label>
        <li id="uid43">
          <p noindent="true">An example is the atmospheric
simulation code CM1 (Cloud Model 1), one of the target
applications of the Blue Waters machine. We already used
this code in collaborative
research within <i>Data@Exascale</i> Associate Team, in the framework of
the <i>Joint Laboratory for Extreme-Scale Computing</i> (JLESC),
co-supported by Inria, UIUC, ANL, BSC, JSC and RIKEN/AICS.</p>
        </li>
        <label>Brain imaging.</label>
        <li id="uid44">
          <p noindent="true">In the <i>A-Brain</i> MSR-Inria project (now
completed), we applied Map-Reduce-based data analytics to
neuro-imaging genetics.</p>
        </li>
        <label>Molecular biology.</label>
        <li id="uid45">
          <p noindent="true">In the framework of the <i>MapReduce
ANR project</i> led by KerData (now completed), we have focused on
the <i>FastA</i> bioinformatics application used for massive
protein sequence similarity searching. In the context of the
<i>OverFlow ANR project</i> we are pursuing this analysis in
collaboration with the Institut Français de Bioinformatique (IFB).@
We aim at using these results for drug design in an
industrial context (i.e. the identification of new druggable
protein targets and thereby the generation of new drug
candidates).</p>
        </li>
      </descriptionlist>
    </subsection>
  </domaine>
  <highlights id="uid46">
    <bodyTitle>Highlights of the Year</bodyTitle>
    <subsection id="uid47" level="1">
      <bodyTitle>Highlights of the Year</bodyTitle>
      <subsection id="uid48" level="2">
        <bodyTitle>Awards</bodyTitle>
        <descriptionlist>
          <label>SC16: Best Student Paper Finalist.</label>
          <li id="uid49">
            <p noindent="true">The paper entitled
<i>Týr: Blob Storage Meets Built-In
Transactions</i> <best><ref xlink:href="#kerdata-2016-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/></best> presented by Pierre
Matri at the
<ref xlink:href="http://sc16.supercomputing.org/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Supercomputing</ref> (SC16)
Conference was one of the 7 papers selected for the Best Student
Paper award.</p>
            <p>This work was carried out in the context of the
<ref xlink:href="http://www.bigstorage-project.eu/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">BigStorage</ref> project,
under the supervision of Alexandru Costan, Gabriel Antoniu,
<ref xlink:href="https://www.datsi.fi.upm.es/~mperez/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">María Pérez</ref> , and
<ref xlink:href="http://laurel.datsi.fi.upm.es/~jmontes/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Jesús Montes</ref>.</p>
            <p>There were 442 submissions, and 81 accepted papers.</p>
          </li>
          <label>ACM Graduate Student Research Competition SC16.</label>
          <li id="uid50">
            <p noindent="true">Nathanaël
Cheriere received the third prize in the SC16
<ref xlink:href="http://sc16.supercomputing.org/studentssc/student-research-venues/acm-student-research-competition/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">ACM
Student Research Competition</ref> for his work on optimizing the
algorithms for the MPI collective <i>Scatter</i> and
<i>AllGather</i> routines on the Dragonfly
topology <ref xlink:href="#kerdata-2016-bid21" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
            <p>This work was
carried out at the Argonne National Laboratory in the context of
the <ref xlink:href="https://jlesc.github.io/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">JLESC</ref>, under the supervision of
<ref xlink:href="http://www.mcs.anl.gov/person/matthieu-dorier" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Matthieu
Dorier</ref>, <ref xlink:href="http://www.mcs.anl.gov/person/rob-ross/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Rob
Ross</ref>, Shadi Ibrahim, and Gabriel Antoniu.</p>
            <p>As many as 62 posters were submitted for the Student Research
Competition, out of which 14 have been selected in the Graduate
category. After the presentation of their posters, 4 students have
been invited to make a presentation of their work in front of a
jury.</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid51" level="2">
        <bodyTitle>9 papers in international journals</bodyTitle>
        <p>This year the team published 9 papers in high-quality journals
including <i>ACM Transactions on Parallel Computing</i>, <i>IEEE
Transactions on Parallel and Distributed Systems</i>, <i>Future
Generation Computer Systems</i>, <i>Concurrency and Computation:
Practice and Experience</i> and <i>IEEE Transactions on Cloud
Computing</i>.</p>
      </subsection>
    </subsection>
  </highlights>
  <logiciels id="uid52">
    <bodyTitle>New Software and Platforms</bodyTitle>
    <subsection id="uid53" level="1">
      <bodyTitle>Týr</bodyTitle>
      <descriptionlist>
        <label>Title:</label>
        <li id="uid54">
          <p noindent="true">Týr: Blob Storage Meets Built-In Transactions.</p>
        </li>
        <label>Keywords:</label>
        <li id="uid55">
          <p noindent="true">Big Data; Transactions; Tyr; BlobSeer.</p>
        </li>
        <label>Scientific Description:</label>
        <li id="uid56">
          <p noindent="true">Týr <ref xlink:href="#kerdata-2016-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> is the
first blob storage system to provide built-in, multi-blob
transactions, while retaining sequential consistency and high
throughput under heavy access concurrency.</p>
        </li>
        <label>Functional Description:</label>
        <li id="uid57">
          <p noindent="true">Týr offers fine-grained random write access to data and in-place
atomic operations. Large-scale experiments on Microsoft Azure
with a production application from CERN LHC show Týr throughput
outperforms state-of-the-art solutions by more than 75%. Týr
leverages the approaches developed within BlobSeer, the reference
data management system for large distributed blobs, developed over
the past years in KerData.</p>
        </li>
        <label>Contact data:</label>
        <li id="uid58">
          <descriptionlist>
            <label>Participants:</label>
            <li id="uid59">
              <p noindent="true">Pierre Matri, Alexandru Costan and Gabriel
Antoniu.</p>
            </li>
            <label>Partners:</label>
            <li id="uid60">
              <p noindent="true">INSA Rennes, Universidad Politécnica de Madrid.</p>
            </li>
            <label>Contact:</label>
            <li id="uid61">
              <p noindent="true">Gabriel Antoniu.</p>
            </li>
            <label>URL:</label>
            <li id="uid62">
              <p noindent="true"><ref xlink:href="http://tyr.io/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>tyr.<allowbreak/>io/</ref>.</p>
            </li>
          </descriptionlist>
        </li>
      </descriptionlist>
    </subsection>
    <subsection id="uid63" level="1">
      <bodyTitle>Damaris</bodyTitle>
      <descriptionlist>
        <label>Title:</label>
        <li id="uid64">
          <p noindent="true">Damaris: I/O and data management for large-scale,
MPI-based HPC simulations.</p>
        </li>
        <label>Keywords:</label>
        <li id="uid65">
          <p noindent="true">I/O; HPC; Data management; Visualization; Big Data;
Exascale.</p>
        </li>
        <label>Scientific Description:</label>
        <li id="uid66">
          <p noindent="true">Damaris is a middleware for multicore SMP nodes enabling them to
efficiently handle data transfers for storage and
visualization. The key idea is to dedicate one or a few cores of
each SMP node to the application I/O. It is developed within the
framework of a collaboration between KerData and the
<ref xlink:href="https://jlesc.github.io/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">JLESC</ref>. The current version
enables efficient asynchronous I/O, hiding all I/O-related
overheads such as data compression and post-processing, as well as
direct (in situ) interactive visualization of the generated data.</p>
          <p>Damaris has been preliminarily evaluated at NCSA
(Urbana-Champaign) with the CM1 tornado simulation code. CM1 is
one of the target applications of the Blue Waters supercomputer in
production at NCSA/UIUC (USA), in the framework of the
<ref xlink:href="https://jlesc.github.io/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">JLESC</ref>. Damaris now has external
users, including (to our knowledge) visualization specialists from
NCSA, Big Data experts from the HDF group, and researchers from the
France/Brazil Associated Research Team on Parallel Computing
(joint team between Inria/LIG Grenoble and the UFRGS in
Brazil). Damaris has been successfully integrated into four
large-scale simulations (CM1, OLAM, Nek5000, CROCO). Works are in
progress to evaluate it in the context of several other
simulation codes including HACC (cosmology) and GTC (fusion).</p>
          <p>Damaris is the object of a <i>Technical Development Action</i>
(ADT) supported by Inria.</p>
        </li>
        <label>Functional Description:</label>
        <li id="uid67">
          <p noindent="true">Damaris targets large-scale HPC simulations: in situ data analysis
by some dedicated cores of the simulation platform; asynchronous
and fast data transfer from HPC simulations to Damaris;
semantic-aware dataset processing through Damaris plug-ins.</p>
        </li>
        <label>Contact data:</label>
        <li id="uid68">
          <descriptionlist>
            <label>Participants:</label>
            <li id="uid69">
              <p noindent="true"><ref xlink:href="http://www.mcs.anl.gov/person/matthieu-dorier" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Matthieu
Dorier</ref> (ANL), Lokman Rahmani, Gabriel Antoniu, Orçun Yildiz,
Hadi Salimi and Luc Bougé.</p>
            </li>
            <label>Partners:</label>
            <li id="uid70">
              <p noindent="true">ENS Rennes, Argonne National Laboratory.</p>
            </li>
            <label>Contact:</label>
            <li id="uid71">
              <p noindent="true">Gabriel Antoniu.</p>
            </li>
            <label>URL:</label>
            <li id="uid72">
              <p noindent="true"><ref xlink:href="http://damaris.gforge.inria.fr/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>damaris.<allowbreak/>gforge.<allowbreak/>inria.<allowbreak/>fr/</ref>.</p>
            </li>
          </descriptionlist>
        </li>
      </descriptionlist>
    </subsection>
    <subsection id="uid73" level="1">
      <bodyTitle>Other software</bodyTitle>
      <subsection id="uid74" level="2">
        <bodyTitle>JetStream</bodyTitle>
        <descriptionlist>
          <label>Title:</label>
          <li id="uid75">
            <p noindent="true">JetStream: Enabling High-Performance Event Streaming
across Cloud Data-Centers.</p>
          </li>
          <label>Keywords:</label>
          <li id="uid76">
            <p noindent="true">Big Data, streaming, data transfer, multisite
cloud.</p>
          </li>
          <label>Scientific Description.</label>
          <li id="uid77">
            <p noindent="true">JetStream is a middleware solution
for batch-based, high-performance streaming across cloud data
centers. JetStream implements a set of context-aware strategies
to optimize batch-based streaming, being able to self-adapt to
changing conditions.</p>
          </li>
          <label>Functional Description.</label>
          <li id="uid78">
            <p noindent="true">The system provides multi-route
streaming across cloud data centers for aggregating bandwidth by
leveraging the network parallelism. It enables easy deployment
across .Net frameworks and seamless binding with event processing
engines such as StreamInsight. JetStream is currently used at
Microsoft Research ATLE Munich for the management of the Azure
cloud infrastructure.</p>
          </li>
          <label>Participants:</label>
          <li id="uid79">
            <p noindent="true">Ovidiu-Cristian Marcu, Alexandru Costan,
Gabriel Antoniu.</p>
          </li>
          <label>Contact:</label>
          <li id="uid80">
            <p noindent="true">Alexandru Costan.</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid81" level="2">
        <bodyTitle>Omnisc'IO</bodyTitle>
        <descriptionlist>
          <label>Title:</label>
          <li id="uid82">
            <p noindent="true">Omnisc'IO: a Grammar-Based Approach to Spatial and
Temporal I/O Patterns Prediction.</p>
          </li>
          <label>Keywords:</label>
          <li id="uid83">
            <p noindent="true">HPC, Input-Output, Prediction, Grammar.</p>
          </li>
          <label>Scientific Description.</label>
          <li id="uid84">
            <p noindent="true">Omnisc'IO is a library that aims to
be integrated into I/O middleware.</p>
          </li>
          <label>Functional Description.</label>
          <li id="uid85">
            <p noindent="true">It traces I/O operations, models the
stream of such operations using grammar-inference techniques,
and predicts when new I/O operations will be performed, as well as
where and how much data will be written.</p>
          </li>
          <label>Participants:</label>
          <li id="uid86">
            <p noindent="true"><ref xlink:href="http://www.mcs.anl.gov/person/matthieu-dorier" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Matthieu
Dorier</ref> (ANL), Gabriel Antoniu, Shadi Ibrahim.</p>
          </li>
          <label>Contact:</label>
          <li id="uid87">
            <p noindent="true">Gabriel Antoniu.</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid88" level="2">
        <bodyTitle>OverFlow</bodyTitle>
        <descriptionlist>
          <label>Title:</label>
          <li id="uid89">
            <p noindent="true">OverFlow: Workflow Data Management as a Service for
Multi-Site Applications.</p>
          </li>
          <label>Keywords:</label>
          <li id="uid90">
            <p noindent="true">Small data; workflow; multi-site cloud.</p>
          </li>
          <label>Scientific Description.</label>
          <li id="uid91">
            <p noindent="true">OverFlow is a uniform data
management system for scientific workflows running across
geographically distributed sites, aiming to reap economic benefits
from this geo-diversity. The software is environment-aware, as it
monitors and models the global cloud infrastructure, offering high
and predictable performance for transfer cost and
time, within and across sites.</p>
          </li>
          <label>Functional Description.</label>
          <li id="uid92">
            <p noindent="true">OverFlow proposes a set of pluggable
services, grouped in a data-scientist cloud kit. They provide the
applications with the possibility to monitor the underlying
infrastructure, to exploit smart data compression, deduplication
and geo-replication, to evaluate data management costs, to set a
tradeoff between money and time, and optimize the transfer
strategy accordingly. Currently, OverFlow is used for data
transfers by the Microsoft Research ATLE Munich team as well as
for synthetic benchmarks at the Politehnica University of
Bucharest.</p>
          </li>
          <label>Participants:</label>
          <li id="uid93">
            <p noindent="true">Paul Le Noac'h, Ovidiu-Cristian Marcu,
Alexandru Costan and Gabriel Antoniu.</p>
          </li>
          <label>Contact:</label>
          <li id="uid94">
            <p noindent="true">Alexandru Costan.</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid95" level="2">
        <bodyTitle>iHadoop</bodyTitle>
        <descriptionlist>
          <label>Title:</label>
          <li id="uid96">
            <p noindent="true">iHadoop: A Hadoop Simulator Developed In Java on Top
of SimGrid.</p>
          </li>
          <label>Keywords:</label>
          <li id="uid97">
            <p noindent="true">Simulation, Map-Reduce, Hadoop, SimGrid.</p>
          </li>
          <label>Scientific Description.</label>
          <li id="uid98">
            <p noindent="true">iHadoop is a Hadoop simulator
developed in Java on top of SimGrid. It simulates the behavior of
Hadoop and therefore accurately predicts the performance of Hadoop
in normal scenarios and under failures. iHadoop is extended to (1)
simulate the execution and predict the performance of multiple
Map-Reduce applications; (2) simulate the execution of Map-Reduce
applications under various data distributions and data skew models.</p>
          </li>
          <label>Functional Description.</label>
          <li id="uid99">
            <p noindent="true">iHadoop is an internal software
prototype, which was initially developed to validate our idea
regarding the behavior of Hadoop under failures. iHadoop has
preliminarily evaluated within our group and it has shown very
high accuracy to predict the execution time of a Map-Reduce
applications. We intend to integrate iHadoop within the SimGrid
distribution and make it available to the SimGrid community.</p>
          </li>
          <label>Participants:</label>
          <li id="uid100">
            <p noindent="true">Shadi Ibrahim and Tien-Dat Phan.</p>
          </li>
          <label>Contact:</label>
          <li id="uid101">
            <p noindent="true">Shadi Ibrahim.</p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
  </logiciels>
  <resultats id="uid102">
    <bodyTitle>New Results</bodyTitle>
    <subsection id="uid103" level="1">
      <bodyTitle>Convergence of HPC and Big Data </bodyTitle>
      <subsection id="uid104" level="2">
        <bodyTitle>Transactional storage</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp82720">
            <firstname>Pierre</firstname>
            <lastname>Matri</lastname>
          </person>
          <person key="kerdata-2014-idp66888">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>Concurrent Big Data applications often require high-performance
storage, as well as ACID (Atomicity, Consistency, Isolation,
Durability) transaction support. Although blobs (binary large
objects) are an increasingly popular model for addressing the
storage needs of such applications, state-of-the-art blob storage
systems typically offer no transaction semantics. This demands
users to coordinate access to data carefully in order to avoid race
conditions, inconsistent writes, overwrites and other problems that
cause erratic behavior. We argue there is a gap between existing
storage solutions and application requirements, which limits the
design of transaction-oriented applications.</p>
        <p>Týr is the first blob storage system to provide built-in,
multi-blob transactions, while retaining sequential consistency and
high throughput under heavy access concurrency. Týr offers
fine-grained random write access to data and in-place atomic
operations.</p>
        <p>Large-scale experiments on Microsoft Azure with a production
application from CERN LHC show Týr throughput outperforms
state-of-the-art solutions by more than 75 %.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid105">
            <p noindent="true">
              <i>
This work was done in collaboration with
<ref xlink:href="https://www.datsi.fi.upm.es/~mperez/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">María Pérez</ref>, UPM,
Spain.</i>
            </p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid106" level="2">
        <bodyTitle>Big Data on HPC</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp81448">
            <firstname>Orçun</firstname>
            <lastname>Yildiz</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>Over the last decade, Map-Reduce has stood as the most powerful Big
Data processing model. Map-Reduce model is now used by many companies
and research labs to facilitate large-scale data analysis. With the
growing needs of users and size of data, commodity-based
infrastructure (most commonly used as of now) will strain under the
heavy weight of Big Data. On the other hand, HPC systems offer a
rich set of opportunities for Big Data processing.</p>
        <p>As first steps towards Big Data processing on HPC systems, several
research efforts have been devoted to understand Map-Reduce
performance on these systems. Yet, the impact of the specific
features of HPC environments have not been fully investigated, yet.</p>
        <p>We conducted an experimental campaign to provide a
clearer understanding of Map-Reduce performance on HPC systems. We
use Spark, a widely adopted Map-Reduce framework, and representative
Big Data workloads on Grid'5000 testbed to evaluate how the latency,
contention and file system's configuration can influence the
application performance.</p>
      </subsection>
      <subsection id="uid107" level="2">
        <bodyTitle>Energy vs. performance trade-offs</bodyTitle>
        <participants>
          <person key="kerdata-2015-idp115016">
            <firstname>Mohammed-Yacine</firstname>
            <lastname>Taleb</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>Most large popular web applications, like Facebook and Twitter, have
been relying on large amounts of in-memory storage to cache data and
provide a low response time. As the memory capacity of clusters
and clouds increases, it becomes possible to keep most of the data
in the main memory.</p>
        <p>This motivates the introduction of in-memory storage systems. While
prior work has focused on how to exploit the low latency of
in-memory access at scale, there is still little knowledge regarding
the energy efficiency of in-memory storage systems. This is
unfortunate, as it is known that main memory is a major energy
bottleneck in many computing systems. For instance, DRAM consumes up
to 40 % of a server's power.</p>
        <p>By means of experimental evaluation, we have studied the performance
and energy-efficiency of RAMCloud — a well-known in-memory storage
system. We demonstrated that although RAMCloud is scalable for
read-only applications, it exhibits non-proportional power
consumption. We also found that the current replication scheme
implemented in RAMCloud limits the performance and results in high
energy consumption. Surprisingly enough, we also showed that
replication can even play a negative role in crash-recovery.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid108">
            <p noindent="true">
              <i>
This work was carried out in collaboration with
<ref xlink:href="http://people.ac.upc.es/toni/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Toni Cortes</ref> (BSC, Spain).</i>
            </p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
    <subsection id="uid109" level="1">
      <bodyTitle>Efficient I/O and communication for Extreme-scale HPC systems</bodyTitle>
      <subsection id="uid110" level="2">
        <bodyTitle>Adaptive performance-constrained in situ
visualisation</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp78800">
            <firstname>Lokman</firstname>
            <lastname>Rahmani</lastname>
          </person>
        </participants>
        <p>While many parallel visualization tools now provide in situ
visualization capabilities, the trend has been to feed such tools
with large amounts of unprocessed output data and let them render
everything at the highest possible resolution. This leads to an
increased run time of simulations that still have to complete within
a fixed-length job allocation.</p>
        <p>We have been working on tackling the challenge of enabling in situ
visualization under performance constraints. Our approach shuffles
data across processes according to their contents and filters out part
of them. Thereby, the visualization pipeline is only fed with a
reorganized subset of the data produced by the simulation.</p>
        <p>Our framework, as presented in <ref xlink:href="#kerdata-2016-bid22" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, leverages
fast, generic evaluation procedures to score blocks of data, using
information theory, statistics, and linear algebra. It monitors its
own performance and dynamically adapts to achieve appropriate visual
fidelity within predefined performance constraints. Experiments on
the Blue Waters supercomputer with the CM1 simulation show that our
approach enables a 5-time speedup with respect to the initial
visualization pipeline, and is able to meet performance constraints.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid111">
            <p noindent="true">
              <i>
This was was carried out with the collaboration of
<ref xlink:href="http://www.mcs.anl.gov/person/matthieu-dorier" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Matthieu
Dorier</ref>, ANL, USA.</i>
            </p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid112" level="2">
        <bodyTitle>Dragonfly</bodyTitle>
        <participants>
          <person key="kerdata-2015-idp118848">
            <firstname>Nathanaël</firstname>
            <lastname>Cheriere</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>High-radix direct network topologies such as Dragonfly have been
proposed for Petascale and Exascale supercomputers. It has been
shown that they
ensure fast interconnections and reduce the cost of the network
compared to traditional network topologies. However, current
algorithms for communication do not consider the topology and thus
waste numerous opportunities of optimization for performance.</p>
        <p>In our studies, we exploit the strength of the Dragonfly with
topology-aware algorithms for AllGather and Scatter operations. We
analyze existing algorithms, then propose derived algorithms, that
we evaluate using CODES, an event-driven simulator.</p>
        <p>As expected, making AllGather algorithms topology-aware does
improve the performance and reduces the link utilization. However,
simulations of various Scatter algorithms show surprising results,
and point out the important role played by hardware for the
efficiency of the algorithms. In particular, the knowledge of the
number and size of input-output buffers in routers can be exploited
to accelerate the Scatter operation by a factor up to 2 times.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid113">
            <p noindent="true">
              <i>
This work was done in collaboration with
<ref xlink:href="http://www.mcs.anl.gov/person/matthieu-dorier" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Matthieu
Dorier</ref> and <ref xlink:href="http://www.mcs.anl.gov/person/rob-ross/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Rob
Ross</ref>, ANL, USA.</i>
            </p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid114" level="2">
        <bodyTitle>Interference between HPC jobs</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp81448">
            <firstname>Orçun</firstname>
            <lastname>Yildiz</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>As we move toward the Exascale era, performance variability in HPC
systems remains a challenge. I/O interference, a major cause of this
variability, is becoming more important every day with the growing
number of concurrent applications that share larger
machines. Earlier research efforts on mitigating I/O interference
focus on a single potential cause of interference (e.g., the
network). Yet the root causes of I/O interference can be diverse.</p>
        <p>In <ref xlink:href="#kerdata-2016-bid23" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we conducted an extensive
experimental campaign to explore the various root causes of I/O
interference in HPC storage systems. We used micro-benchmarks on the
Grid'5000 testbed to evaluate how I/O interference is influenced by
the applications' access pattern, the network components, the file
system's configuration, and the backend storage devices.</p>
        <p>Our studies revealed that in many situations interference is a result
of a bad flow control in the I/O path, rather than being caused by
some single bottleneck in one of its components. We further show
that interference-free behavior is not necessarily a sign of optimal
performance. To the best of our knowledge, our work provides the
first deep insight into the role of each of the potential root
causes of interference and their interplay. Our findings can help
developers and platform owners improve I/O performance and motivate
further research addressing the problem across all components of the
I/O stack.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid115">
            <p noindent="true">
              <i>
This work was done in collaboration with
<ref xlink:href="http://www.mcs.anl.gov/person/matthieu-dorier" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Matthieu
Dorier</ref> and <ref xlink:href="http://www.mcs.anl.gov/person/rob-ross/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Rob
Ross</ref>, ANL, USA.</i>
            </p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
    <subsection id="uid116" level="1">
      <bodyTitle>Workflow on clouds </bodyTitle>
      <subsection id="uid117" level="2">
        <bodyTitle>Managing hot metadata for scientific workflows on
multisite clouds</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp77520">
            <firstname>Luis Eduardo</firstname>
            <lastname>Pineda Morales</lastname>
          </person>
          <person key="kerdata-2014-idp66888">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>Large-scale scientific applications are often expressed as workflows
that help defining data dependencies between their different
components. Such workflows may incur huge storage and computation
requirements, so that they need to be processed in multiple
(cloud-federated) datacenters. A major challenge in such multisite
clouds is the long latency of the network links between datacenters, that
limits the performance of multisite applications. Moreover, it has
been shown that poor metadata handling can further impact the
efficiency of computing systems. Many efforts have been done to
improve metadata management; however, most of them concern only
single-site, HPC systems to date.</p>
        <p>In <ref xlink:href="#kerdata-2016-bid24" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we assert that some workflow
metadata are more frequently accessed than other, and thus should be
handled with higher priority during the workflow's execution. We
call them <i>hot metadata</i>. We present a hybrid
decentralized/distributed model for handling hot metadata in
<i>multisite</i> architectures. We couple our model with a
scientific workflow management system (SWfMS) to validate and tune
its applicability to various real-life scientific scenarios. We
show that efficient management of hot metadata improves the
performance of SWfMS, reducing the workflow execution time up to
50 % for highly parallel jobs by enabling timely data provisioning
and avoiding unnecessary <i>cold</i> metadata operations.</p>
      </subsection>
      <subsection id="uid118" level="2">
        <bodyTitle>Probabilistic optimizations for resource provisioning
of cloud workflows</bodyTitle>
        <participants>
          <person key="kerdata-2016-idp152368">
            <firstname>Chi</firstname>
            <lastname>Zhou</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
        </participants>
        <p>In many data-intensive applications, data management routines can be
represented as workflows, where tasks are organized according to
data and computation dependencies. Recently, the optimal
provisioning of resources (e.g., VMs) for workflows running in the
cloud has attracted a lot of attention. Most resource provisioning
solutions overlook the important factor of cloud dynamics, e.g., the
fluctuation of I/O, network performance, and system failures. In our
experiments on the Amazon EC2 cloud, these issues significantly
impact resource allocation quality. Therefore, we study how cloud
dynamics should be incorporated into the resource provisioning
process.</p>
        <p>Our approach models cloud dynamics as time-dependent random
variables (e.g., a probability distribution of workflow execution
times) and performs probabilistic optimizations for resource
provisioning problems using those random variables as optimization
input. This solution yields more effective resource provisioning for
cloud workflows. However, it involves heavy computation effort due
to the complex structures of workflows and the large number of
probability calculations.</p>
        <p>To overcome this problem, we develop a three-stage pruning process,
which simplifies workflow structure and reduces probability
evaluation overhead. We have also implemented our techniques in a
runtime library, which allows users to integrate our techniques into
their existing resource provisioning methods. Experiments on two
common resource provisioning problems show that probabilistic
solutions can improve the performance by 51 % –-70 % compared with
state-of-the-art, static solutions.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid119">
            <p noindent="true">
              <i>
This work was done in collaboration with
<ref xlink:href="http://www.comp.nus.edu.sg/~hebs/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Bingsheng He</ref> NUS,
Singapore.</i>
            </p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid120" level="2">
        <bodyTitle>A taxonomy and survey of scientific computing in the
cloud</bodyTitle>
        <participants>
          <person key="kerdata-2016-idp152368">
            <firstname>Chi</firstname>
            <lastname>Zhou</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
        </participants>
        <p>Cloud computing has evolved as a popular computing infrastructure
for many applications. With (big) data acquiring a crucial role in
eScience, efforts have been made recently to
develop and deploy scientific applications efficiently on the
unprecedentedly scalable cloud infrastructures.</p>
        <p>In <ref xlink:href="#kerdata-2016-bid25" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we review recent efforts in developing
and deploying scientific computing applications in the cloud. In
particular, we introduce a taxonomy specifically designed for
scientific computing in the cloud, and further review the taxonomy
with four major kinds of science applications, including life
sciences, physics sciences, social and humanities sciences, and
climate and earth sciences.</p>
        <p>Due to the large data size in most scientific applications, the
performance of I/O operations can greatly affect the overall
performance of the applications. As a consequence, the dynamic I/O
performance of the cloud has made resource provisioning an
important and complex problem for scientific applications in the
cloud.</p>
        <p>We present our efforts on improving the resource provisioning
efficiency and effectiveness of scientific applications in the
cloud. Finally, we present the open problems for developing the
next-generation eScience applications and systems in the cloud and
give our conclusions.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid121">
            <p noindent="true">
              <i>
This work was done in collaboration with
<ref xlink:href="http://www.comp.nus.edu.sg/~hebs/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Bingsheng He</ref> NUS,
Singapore.</i>
            </p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
    <subsection id="uid122" level="1">
      <bodyTitle>Fault tolerant data processing</bodyTitle>
      <subsection id="uid123" level="2">
        <bodyTitle>Fast recovery</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp81448">
            <firstname>Orçun</firstname>
            <lastname>Yildiz</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>Hadoop has emerged as a prominent tool for Big Data processing in
large-scale clouds. Failures are inevitable in large-scale systems,
especially in shared environments. Consequently, Hadoop was designed
with hardware failures in mind. In particular, Hadoop handles
machine failures by re-executing all the tasks of the failed
machine. Unfortunately, the efforts to handle failures are entirely
entrusted to the core of Hadoop and hidden from Hadoop
schedulers. This may prevent Hadoop schedulers from meeting their
objectives (e.g., fairness, job priority, performance) and can
significantly impact the performance of the applications.</p>
        <p>In our previous work, we addressed this issue through the design and
implementation of a new scheduling strategy called Chronos. Chronos
is conductive to improving the performance of Map-Reduce applications
by enabling an early action upon failure detection. Chronos tries to
launch recovery tasks immediately by preempting tasks belonging to
low priority jobs, thus avoiding to wait until slots are
freed.</p>
        <p>In <ref xlink:href="#kerdata-2016-bid26" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we further investigated the potential
benefit of launching local recovery tasks by implementing and
evaluating Chronos*. To this end, we slightly changed the smart slot
allocation strategy of Chronos into aggressive slot allocation
strategy. With Chronos, recovery tasks with higher priority would
preempt the selected tasks with less priority. With Chronos*, we
also allow recovery tasks to preempt the selected tasks with the
same priority (e.g., recovery tasks belonging to the same job with
selected tasks). The experimental results indicate that Chronos*
results in 100 % locality execution for recovery tasks thanks to
its aggressive slot allocation strategy. Moreover, Chronos* improves
the completion time of the jobs by up to 17 %.</p>
      </subsection>
      <subsection id="uid124" level="2">
        <bodyTitle>Dynamic replica placement</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp82720">
            <firstname>Pierre</firstname>
            <lastname>Matri</lastname>
          </person>
          <person key="kerdata-2014-idp66888">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>Large-scale applications are ever-increasingly geo-distributed.
Maintaining the highest possible <i>data locality</i> is crucial to
ensure high performance of such applications. Dynamic replication
addresses this problem by dynamically creating replicas of
frequently accessed data close to the clients. This data is often
stored in decentralized storage systems such as Dynamo or Voldemort,
which offer support for <i>mutable data</i>.</p>
        <p>However, existing approaches to dynamic replication for such mutable
data remain centralized, thus incompatible with these systems. We
introduce a write-enabled dynamic replication scheme that leverages
the decentralized architecture of such storage systems. We propose
an algorithm enabling clients to locate tentatively the closest data
replica without prior request to any metadata node. Large-scale
experiments show a read latency decrease of up to 42% compared to
other state-of-the-art, caching-based solutions.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid125">
            <p noindent="true">
              <i>
This work was done in collaboration with
<ref xlink:href="https://www.datsi.fi.upm.es/~mperez/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">María Pérez</ref>, UPM,
Spain.</i>
            </p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
    <subsection id="uid126" level="1">
      <bodyTitle>Advanced data management on clouds</bodyTitle>
      <subsection id="uid127" level="2">
        <bodyTitle>Benchmarking Spark and Flink</bodyTitle>
        <participants>
          <person key="kerdata-2015-idp113776">
            <firstname>Ovidiu-Cristian</firstname>
            <lastname>Marcu</lastname>
          </person>
          <person key="kerdata-2014-idp66888">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>Spark and Flink are two Apache-hosted data analytics frameworks that
represent the state of the art in modern in-memory Map-Reduce
processing. They facilitate the development of multi-step data
pipelines using directly acyclic graph (DAG) patterns. In the
framework of our BigStorage project, we performed a comparative
study <ref xlink:href="#kerdata-2016-bid27" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> which evaluates the performance of
Spark versus Flink. The objective is to identify and explain the
impact of the different architectural choices and the parameter
configurations on the perceived end-to-end performance.</p>
        <p>Based on empirical evidences, the study points out that in Big Data
processing there is not a single framework for all data types, sizes
and job patterns and emphasize a set of design choices that play an
important role in the behaviour of a Big Data framework: memory
management, pipelined execution, optimizations and parameter
configuration easiness. What raises our attention is that a
streaming engine (i.e., Flink) delivers in many benchmarks better
performance than a batch-based engine (i.e., Spark), showing that a
more general Big Data architecture (treating batches as finite sets
of streamed data) is plausible and may subsume both streaming and
batching use cases.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid128">
            <p noindent="true">
              <i>
This work was done in collaboration with
<ref xlink:href="https://www.datsi.fi.upm.es/~mperez/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">María Pérez</ref>, UPM,
Spain.</i>
            </p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid129" level="2">
        <bodyTitle>Geo-distributed graph processing</bodyTitle>
        <participants>
          <person key="kerdata-2016-idp152368">
            <firstname>Chi</firstname>
            <lastname>Zhou</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
        </participants>
        <p>Graph processing is an emerging model adopted by a wide range of
applications to easily parallelize the computations over graph
data. Partitioning graph processing workloads to multiple machines
is an important task for reducing the communication cost and
improving the performance of graph processing jobs. Recently, many
real-world applications store their data on multiple geographically
distributed datacenters (DCs) to ensure flexible and low-latency
services. Due to the limited Wide Area Network (WAN) bandwidths and
the network heterogeneity of the geo-distributed DCs, existing graph
partitioning methods need to be redesigned to improve the
performance of graph processing jobs in geo-distributed DCs.</p>
        <p>To address the above challenges, we propose a heterogeneity-aware
graph partitioning method named G-Cut, which aims at minimizing the
runtime of graph processing jobs in geo-distributed DCs while
satisfying the WAN usage budget. G-Cut is a two-stage graph
partitioning method. In the traffic-aware graph partitioning stage,
we adopt the one-pass edge assignment to place edges into different
partitions while minimizing the inter-DC data traffic size. In the
network-aware partition refinement stage, we map the partitions
obtained in the first stage onto different DCs in order to minimize
the inter-DC data transfer time. We evaluate the effectiveness and
efficiency of G-Cut using real-world graphs and the evaluation
results show that G-Cut can achieve both lower WAN usage and shorter
inter-DC data transfer time compared to state-of-the-art graph
partitioning methods.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid130">
            <p noindent="true">
              <i>
This work was done in collaboration with
<ref xlink:href="http://www.comp.nus.edu.sg/~hebs/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Bingsheng He</ref> NUS,
Singapore.</i>
            </p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid131" level="2">
        <bodyTitle>Fairness and scheduling</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp81448">
            <firstname>Orçun</firstname>
            <lastname>Yildiz</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>Recently, Map-Reduce and its open-source implementation Hadoop have
emerged as prevalent tools for big data analysis in the cloud. Fair
resource allocation in-between jobs and users is an important issue,
especially in multi-tenant environments such as clouds. Several
scheduling policies have been developed to preserve fairness in
multi-tenant Hadoop clusters. At the core of these schedulers,
simple (non-) preemptive approaches are employed to free resources
for tasks belonging to jobs with less share. For example, Hadoop
Fair Scheduler is equipped with two approaches: wait and kill. While
wait may introduce a serious violation in fairness, kill may result
in a huge waste of resources. Yet, recently some work have
introduced preemption approach in shared Hadoop clusters.</p>
        <p>To this end, we closely examine three approaches including wait,
kill and preemption when Hadoop Fair Scheduler is employed for
ensuring fair execution between multiple concurrent jobs. We perform
extensive experiments to assess the impact of these approaches on
performance and resource utilization while ensuring fairness. Our
experimental results bring out the differences between these
approaches and illustrate that these approaches are only sub-optimal
for different workloads and cluster configurations: the efficiency
of achieving fairness and the overall performance varies with the
workload composition, resource availability and the cost of the
adopted preemption technique.</p>
      </subsection>
      <subsection id="uid132" level="2">
        <bodyTitle>Stragglers in Map-Reduce</bodyTitle>
        <participants>
          <person key="kerdata-2016-idp140112">
            <firstname>Tien-Dat</firstname>
            <lastname>Phan</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
        </participants>
        <p>Big Data systems (e.g., Map-Reduce, Hadoop, Spark) rely increasingly
on speculative execution to mask slow tasks also known as stragglers
because a job's execution time is dominated by the slowest task
instance. Big Data systems typically identify stragglers and
speculatively run copies of those tasks with the expectation a copy
may complete faster to shorten job execution times.</p>
        <p>There is a rich body of recent results on straggler mitigation in
Map-Reduce. However, the majority of these do not consider the
problem of accurately detecting stragglers. Instead, they adopt a
particular straggler detection approach and then study its
effectiveness in terms of performance, e.g., reduction in job
completion time, or its efficiency, e.g., extra resource usage.</p>
        <p>In this work, we consider a complete framework for straggler
detection and mitigation. We start with a set of metrics that can be
used to characterizes and detect stragglers such as Precision,
Recall, Detection Latency, Undetected Time and Fake Positive. We
then develop an architectural model by which these metrics can be
linked to measures of performance including execution time and
system energy overheads.</p>
        <p>We further conduct a series of experiments
to demonstrate which metrics and approaches are more effective in
detecting stragglers and are also predictive of effectiveness in
terms of performance and energy efficiency. For example, our results
indicate that the default Hadoop straggler detector could be made
more effective. In certain cases, precision is low and only 65 %
of those detected are actual stragglers and recall, i.e.,
the proportion of stragglers which are actually detected, is also relatively low at
56 %. For the same case, the hierarchical approach (i.e., a
green-driven detector based on the default one) achieves a precision
of 98 % and a recall of 33 %.</p>
        <p>Further, these increases in precision can be used to achieve
lower execution time and energy consumption, and thus higher
performance and energy efficiency. Compared to the default Hadoop
mechanism, energy consumption is reduced by almost 30 %. These
results demonstrate how our framework can offer useful insights and
be applied in practical settings to characterize and design new
straggler detection mechanisms for Map-Reduce systems.</p>
        <descriptionlist>
          <label>
            <b>Collaboration.</b>
          </label>
          <li id="uid133">
            <p noindent="true">
              <i>
This work was carried out in collaboration with
<ref xlink:href="http://gaupy.org/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Guillaume Aupy</ref> and
<ref xlink:href="http://www.cse.psu.edu/~pxr3/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Padma Raghavan</ref> whilst they
were affiliated with Vanderbilt University, USA.</i>
            </p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
  </resultats>
  <contrats id="uid134">
    <bodyTitle>Bilateral Contracts and Grants with Industry</bodyTitle>
    <subsection id="uid135" level="1">
      <bodyTitle>Bilateral Contracts with Industry</bodyTitle>
      <descriptionlist>
        <label>Microsoft: Z-CloudFlow (2013–2016).</label>
        <li id="uid136">
          <p noindent="true">In the framework of the Joint Inria-Microsoft Research Center,
this project is a follow-up to the
<ref xlink:href="http://www.msr-inria.fr/projects/a-brain/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">A-Brain</ref> project.
The goal of this new project is to propose a framework for the
efficient processing of scientific workflows in clouds. This
approach will leverage the cloud infrastructure capabilities for
handling and processing large data volumes.</p>
          <p>In order to support data-intensive workflows, the cloud-based
solution will: adapt the workflows to the cloud environment and
exploit its capabilities; optimize data transfers to provide
reasonable times; manage data and tasks so that they can be
efficiently placed and accessed during execution.</p>
          <p>The validation will be performed using real-life applications,
first on the Grid5000 platform, then on the Azure cloud
environment, access being granted by Microsoft through a
<i>Azure for Research Award</i> received by G. Antoniu. The
project also provides funding for the PhD thesis of Luis
Pineda-Morales, started in 2014.</p>
          <descriptionlist>
            <label>
              <b>Collaboration.</b>
            </label>
            <li id="uid137">
              <p noindent="true">
                <i>
The project is being conducted in collaboration with the Zenith
team from Montpellier, led by
<ref xlink:href="http://www-sop.inria.fr/members/Patrick.Valduriez/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Patrick
Valduriez</ref>.</i>
              </p>
            </li>
          </descriptionlist>
        </li>
        <label>Huawei: HIRP Low-Latency Storage for Stream Data
(2016–2017).</label>
        <li id="uid138">
          <p noindent="true">The goal of this project is to explore the plausible paths towards
a dedicated storage solution for low-latency stream storage. Such
a solution should provide on the one hand traditional storage
functionality and on the other hand stream-like performance (i.e.,
low-latency I/O access to items and ranges of items).</p>
          <p>We plan to investigate the main requirements and challenges,
evaluate the different design choices (e.g., a standalone
component vs. an extension of an existing Big Data solution like
HDFS) and then propose an architectural overview.</p>
        </li>
      </descriptionlist>
    </subsection>
  </contrats>
  <partenariat id="uid139">
    <bodyTitle>Partnerships and Cooperations</bodyTitle>
    <subsection id="uid140" level="1">
      <bodyTitle>National Initiatives</bodyTitle>
      <subsection id="uid141" level="2">
        <bodyTitle>ANR</bodyTitle>
        <subsection id="uid142" level="3">
          <bodyTitle>OverFlow (2015–2019)</bodyTitle>
          <simplelist>
            <li id="uid143">
              <p noindent="true">Project Acronym: OverFlow.</p>
            </li>
            <li id="uid144">
              <p noindent="true">Project Title: Workflow Data Management as a Service for
Multisite Applications.</p>
            </li>
            <li id="uid145">
              <p noindent="true">Coordinator: Alexandru Costan.</p>
            </li>
            <li id="uid146">
              <p noindent="true">Duration: Octobre 2015–October 2019.</p>
            </li>
            <li id="uid147">
              <p noindent="true">Other Partners: None (Young Researcher Project).</p>
            </li>
            <li id="uid148">
              <p noindent="true">External collaborators:
<ref xlink:href="http://www.mcs.anl.gov/person/kate-keahey/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Kate Keahey</ref>
(University of Chicago and Argonne National Laboratory),
<ref xlink:href="http://www.bnicolae.net/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Bogdan Nicolae</ref> (Huawei Research)
and <ref xlink:href="https://www.france-bioinformatique.fr/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Christophe
Blanchet</ref> (Institut Français de Bioinformatique).</p>
            </li>
            <li id="uid149">
              <p noindent="true">Abstract: This JCJC project led by Alexandru Costan
investigates approaches to data management enabling an efficient
execution of geographically distributed workflows running on
multi-site clouds. Ultimately, OverFlow will propose a new,
pioneering paradigm: Workflow Data Management as a Service — a
general and easy-to-use, cloud-provided service that bridges for
the first time the gap between single- and multi-site workflow
data management. It aims to reap economic benefits from the
geo-diversity while accelerating the scientific discovery through
a democratization of access to globally distributed data.</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid150" level="2">
        <bodyTitle>Other National Projects</bodyTitle>
        <subsection id="uid151" level="3">
          <bodyTitle>DISCOVERY (2015–2019)</bodyTitle>
          <simplelist>
            <li id="uid152">
              <p noindent="true">Project Acronym: DISCOVERY.</p>
            </li>
            <li id="uid153">
              <p noindent="true">Project Title: DIStributed and COoperative framework to
manage Virtual EnviRonments autonomicallY.</p>
            </li>
            <li id="uid154">
              <p noindent="true">Coordinator:
<ref xlink:href="http://lebre.adrien.free.fr/alebre/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Adrien Lèbre</ref>.</p>
            </li>
            <li id="uid155">
              <p noindent="true">Duration: 2015–2019.</p>
            </li>
            <li id="uid156">
              <p noindent="true">Partners: Inria Project-Teams including ASAP, ASCOLA, Avalon,
Myriads, and KerData.</p>
            </li>
            <li id="uid157">
              <p noindent="true">Abstract: An Inria Project Lab, led by
<ref xlink:href="http://lebre.adrien.free.fr/alebre/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Adrien Lèbre</ref> (ASCOLA),
that aims at exploring a new way of operating Utility Computing
(UC) resources by leveraging any facilities available through the
Internet. The goal is to deliver widely distributed platforms that can
better match the geographical dispersal of users, as well as the
unending demand.</p>
              <p>Within DISCOVERY, S. Ibrahim (KerData Inria Team) is working with
<ref xlink:href="http://graal.ens-lyon.fr/~gfedak/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Gilles Fedak</ref> (Avalon
Inria Project-Team) to address the VM images management challenge.</p>
            </li>
          </simplelist>
        </subsection>
        <subsection id="uid158" level="3">
          <bodyTitle>ADT Damaris</bodyTitle>
          <simplelist>
            <li id="uid159">
              <p noindent="true">Project Acronym: ADT Damaris</p>
            </li>
            <li id="uid160">
              <p noindent="true">Project Title: Technology development action for te Damaris
environment.</p>
            </li>
            <li id="uid161">
              <p noindent="true">Coordinator: Alexandru Costan.</p>
            </li>
            <li id="uid162">
              <p noindent="true">Duration: 2016–2018.</p>
            </li>
            <li id="uid163">
              <p noindent="true">Abstract: This action aims to support the
development of the Damaris software. Inria's <i>Technological
Development Office</i> (D2T, <i>Direction du Développement
Technologique</i>) provided 2 years of funding support for a senior
engineer.</p>
              <p>Hadi Salimi is funded through this project to document, test and
extend the <ref xlink:href="http://damaris.gforge.inria.fr/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Damaris</ref> software
and make it a safely distributable product.</p>
            </li>
          </simplelist>
        </subsection>
        <subsection id="uid164" level="3">
          <bodyTitle>Grid'5000.</bodyTitle>
          <p>We are members of Grid'5000 community and run experiments on the
Grid'5000 platform on a daily basis.</p>
        </subsection>
      </subsection>
    </subsection>
    <subsection id="uid165" level="1">
      <bodyTitle>European Initiatives</bodyTitle>
      <subsection id="uid166" level="2">
        <bodyTitle>FP7 and H2020 Projects</bodyTitle>
        <subsection id="uid167" level="3">
          <bodyTitle>BigStorage</bodyTitle>
          <simplelist>
            <li id="uid168">
              <p noindent="true">Title: BigStorage: Storage-based Convergence between HPC and
Cloud to handle Big Data.</p>
            </li>
            <li id="uid169">
              <p noindent="true">Programme: H2020.</p>
            </li>
            <li id="uid170">
              <p noindent="true">Duration: January 2015–December 2018.</p>
            </li>
            <li id="uid171">
              <p noindent="true">Coordinator: Universidad Politécnica de Madrid (UPM).</p>
            </li>
            <li id="uid172">
              <p noindent="true">Partners:</p>
              <simplelist>
                <li id="uid173">
                  <p noindent="true">Barcelona Supercomputing Center — Centro Nacional de
Supercomputacion (Spain)</p>
                </li>
                <li id="uid174">
                  <p noindent="true">CA Technologies Development Spain (Spain)</p>
                </li>
                <li id="uid175">
                  <p noindent="true">CEA — Commissariat à l'énergie atomique et aux énergies
alternatives (France)</p>
                </li>
                <li id="uid176">
                  <p noindent="true">Deutsches Klimarechenzentrum (Germany)</p>
                </li>
                <li id="uid177">
                  <p noindent="true">Foundation for Research and Technology Hellas (Greece)</p>
                </li>
                <li id="uid178">
                  <p noindent="true">Fujitsu Technology Solutions (Germany)</p>
                </li>
                <li id="uid179">
                  <p noindent="true">Johannes Gutenberg Universitaet Mainz (Germany)</p>
                </li>
                <li id="uid180">
                  <p noindent="true">Universidad Politecnica de Madrid (Spain)</p>
                </li>
                <li id="uid181">
                  <p noindent="true">Seagate Systems UK (United Kingdom)</p>
                </li>
              </simplelist>
            </li>
            <li id="uid182">
              <p noindent="true">Inria contact: G. Antoniu and
<ref xlink:href="http://lebre.adrien.free.fr/alebre/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Adrien Lèbre</ref>.</p>
            </li>
            <li id="uid183">
              <p noindent="true">URL: <ref xlink:href="http://www.bigstorage-project.eu/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>bigstorage-project.<allowbreak/>eu/</ref>.</p>
            </li>
            <li id="uid184">
              <p noindent="true">Description: BigStorage is a European Training Network (ETN)
whose main goal is to train future <i>data scientists</i>. It aims at
enabling them and us to apply holistic and interdisciplinary
approaches to take advantage of a data-overwhelmed world. This world
requires <i>HPC</i> and <i>Cloud</i> infrastructures with a
redefinition of <i>storage</i> architectures underpinning them —
focusing on meeting highly ambitious performance and <i>energy</i>
usage objectives. The KerData team will be hosting 2 <i>Early
Stage Researchers</i> in this framework.</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
    </subsection>
    <subsection id="uid185" level="1">
      <bodyTitle>International
Initiatives</bodyTitle>
      <subsection id="uid186" level="2">
        <bodyTitle>Inria International Labs</bodyTitle>
        <subsection id="uid187" level="3">
          <bodyTitle>JLESC: Joint Laboratory on Extreme-Scale Computing</bodyTitle>
          <p>The <ref xlink:href="https://jlesc.github.io/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Joint Laboratory on
Extreme-Scale Computing</ref> is jointly run by Inria, UIUC, ANL, BSC,
JSC and RIKEN/AICS. It has ben created in 2014 as a follow-up of the
Inria-UIUC JLPC, the <i>Joint Laboratory for Petascale
Computing</i>.</p>
          <p>The KerData team is collaborating with teams from ANL and UIUC
within this lab since 2009 on several topics in the areas of I/O,
storage and in situ processing and cloud computing. This
collaboration has been initially formalized as the
<i>Data@Exascale</i> Associate Team with ANL and UIUC (2013–2015)
followed by <i>Data@Exascale 2</i> Associate Team with ANL
(2016–2018).</p>
          <p>Since 2015, Gabriel Antoniu serves as a topic leader for Inria for
the<i> I/O, Storage and In Situ Processing</i> topic.</p>
          <subsection id="uid188" level="4">
            <bodyTitle>Associate Team involved in the International Lab:
Data@Exascale 2</bodyTitle>
            <sanspuceslist>
              <li id="uid189">
                <p noindent="true">Project Acronym: Data@Exascale 2.</p>
              </li>
              <li id="uid190">
                <p noindent="true">Project Title: Convergent Data Storage and Processing
Approaches for Exascale Computing and Big Data Analytics.</p>
              </li>
              <li id="uid191">
                <p noindent="true">International Partner:</p>
                <simplelist>
                  <li id="uid192">
                    <p noindent="true">Argonne National Laboratory (United
States) — Mathematics and Computer Science Division (MCS) —
<ref xlink:href="http://www.mcs.anl.gov/person/rob-ross/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Rob Ross</ref>.</p>
                  </li>
                </simplelist>
              </li>
              <li id="uid193">
                <p noindent="true">Start year: 2013.</p>
              </li>
              <li id="uid194">
                <p noindent="true">URL: <ref xlink:href="http://www.irisa.fr/kerdata/data-at-exascale/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>irisa.<allowbreak/>fr/<allowbreak/>kerdata/<allowbreak/>data-at-exascale/</ref>.</p>
              </li>
              <li id="uid195">
                <p noindent="true">Description: In the past few years, countries including
United States, the European Union, Japan and China have set up
aggressive plans to get closer to what appears to be the next goal
in terms of high-performance computing (HPC): Exaflop computing, a
target which is now considered reachable by the next-generation
supercomputers in 2020-2023. While these government-led
initiatives have naturally focused on the big challenges of
Exascale for the development of new hardware and software
architectures, the quite recent emergence of the Big Data
phenomenon introduces what could be called a tectonic shift that
is impacting the entire research landscape for Exascale
computing. As data generation capabilities in most science domains
are now growing substantially faster than computational
capabilities, causing these domains to become data-intensive, new
challenges appeared in terms of volumes and velocity for data to
be stored, processed and analyzed on the future Exascale machines.</p>
                <p>To face the challenges generated by the exponential data growth (a
general phenomenon in many fields), a certain progress has already
been made in the recent years in the rapidly-developing,
industry-led field of cloud-based Big Data analytics, where
advanced tools emerged, relying on machine-learning techniques and
predictive analytics.</p>
                <p>Unfortunately, these advances cannot be
immediately applied to Exascale computing: the tools and cultures
of the two worlds, HPC (High-Performance Computing) and BDA (Big
Data Analytics) have developed in a divergent fashion (in terms of
major focus and technical approaches), to the detriment of
both. The two worlds share however multiple similar challenges and
unification now appears as essential in order to address the
future challenges of major application domains that can benefit
from both.</p>
                <p>The scientific program we propose for the Data@Exascale 2
Associate Team is defined from this new, highly-strategic
perspective and builds on the idea that the design of innovative
approaches to data I/O, storage and processing allowing Big Data
analytics techniques and the newest HPC architectures to leverage
each other clearly appears as a key catalyst factor for the
convergence process.</p>
              </li>
            </sanspuceslist>
          </subsection>
        </subsection>
      </subsection>
      <subsection id="uid196" level="2">
        <bodyTitle>Inria International Partners</bodyTitle>
        <subsection id="uid197" level="3">
          <bodyTitle>DataCloud@Work</bodyTitle>
          <sanspuceslist>
            <li id="uid198">
              <p noindent="true">Title: DataCloud@Work.</p>
            </li>
            <li id="uid199">
              <p noindent="true">International Partner:</p>
              <simplelist>
                <li id="uid200">
                  <p noindent="true">Polytechnic University of Bucharest (Romania), Computer
Science Department, Nicolae Tapus and Valentin Cristea.</p>
                </li>
              </simplelist>
            </li>
            <li id="uid201">
              <p noindent="true">Duration: 4 years.</p>
            </li>
            <li id="uid202">
              <p noindent="true">Start year: 2013. The status of IIP was established right
after the end of our former <i>DataCloud@work</i> Associate Team
(2010–2012).</p>
            </li>
            <li id="uid203">
              <p noindent="true">URL:
<ref xlink:href="https://www.irisa.fr/kerdata/doku.php?id=cloud_at_work:start" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>www.<allowbreak/>irisa.<allowbreak/>fr/<allowbreak/>kerdata/<allowbreak/>doku.<allowbreak/>php?id=cloud_at_work:start</ref>.</p>
            </li>
            <li id="uid204">
              <p noindent="true">Description: Our research topics address the area of
distributed data management for cloud services, focusing on
autonomic storage. The goal is explore how to build an efficient,
secure and reliable storage IaaS for data-intensive distributed
applications running in cloud environments by enabling an
autonomic behavior.</p>
            </li>
          </sanspuceslist>
        </subsection>
      </subsection>
      <subsection id="uid205" level="2">
        <bodyTitle>Informal International Partners</bodyTitle>
        <sanspuceslist>
          <li id="uid206">
            <p noindent="true">National University of Singapore (NUS): We collaborate on
resource management for workflows in the cloud and optimizing
graph processing in geo-distributed data-centers.</p>
          </li>
        </sanspuceslist>
      </subsection>
    </subsection>
    <subsection id="uid207" level="1">
      <bodyTitle>International
Research Visitors</bodyTitle>
      <subsection id="uid208" level="2">
        <bodyTitle>Visits of International Scientists</bodyTitle>
        <sanspuceslist>
          <li id="uid209">
            <p noindent="true">Guillaume Aupy (Vanderbilt University) visited the KerData
team for one week (February 2016).</p>
          </li>
        </sanspuceslist>
      </subsection>
      <subsection id="uid210" level="2">
        <bodyTitle>Visits to International Teams</bodyTitle>
        <subsection id="uid211" level="3">
          <bodyTitle>Research Stays Abroad</bodyTitle>
          <sanspuceslist>
            <li id="uid212">
              <p noindent="true">CIC-IPN, Mexico:</p>
              <participants>
                <person key="kerdata-2014-idp62712">
                  <firstname>Gabriel</firstname>
                  <lastname>Antoniu</lastname>
                </person>
                <person key="kerdata-2014-idp66888">
                  <firstname>Alexandru</firstname>
                  <lastname>Costan</lastname>
                </person>
                <person key="kerdata-2014-idp77520">
                  <firstname>Luis Eduardo</firstname>
                  <lastname>Pineda Morales</lastname>
                </person>
                <person key="kerdata-2014-idp82720">
                  <firstname>Pierre</firstname>
                  <lastname>Matri</lastname>
                </person>
              </participants>
              <p>From October 31 to November 4, four members of our team visited
the Informatics Research Centre of the National Polytechnic
Institute (CIC-IPN for its acronym in Spanish) in Mexico City,
Mexico.</p>
              <p>The visit was a follow up to previous discussions held with the
Network and Data Science Laboratory. The goal is to create a
scientific collaboration on the grounds of cloud-based big data
for smart cities, for which a proposal has been submitted in
August to the program ECOS-NORD (Mexico-France). The visit
included scientific presentations from both teams, a plenary talk
from KerData to the IPN community, as well as discussions on
future common research lines. Additionally, we held meetings with
the partnering coordinator to talk about possible funding sources
for students exchanges.</p>
            </li>
            <li id="uid213">
              <p noindent="true">ANL, USA:</p>
              <participants>
                <person key="kerdata-2015-idp118848">
                  <firstname>Nathanaël</firstname>
                  <lastname>Cheriere</lastname>
                </person>
              </participants>
              <p>Nathanaël Cheriere visited Matthieu Dorier and Rob Ross at ANL for
5.5 months, co-funded by the PUF NextGen project in the context of
the Joint Laboratory for Extreme-Scale Computing (JLESC).</p>
            </li>
            <li id="uid214">
              <p noindent="true">Vanderbilt University, USA:</p>
              <participants>
                <person key="kerdata-2016-idp140112">
                  <firstname>Tien-Dat</firstname>
                  <lastname>Phan</lastname>
                </person>
              </participants>
              <p>Tien-Dat Phan visited(Guillaume Aupy, Padma Raghavan at Vanderbilt
University for 2 months, funded by Vanderbilt University.</p>
            </li>
            <li id="uid215">
              <p noindent="true">Technische Universitat Munchen and Huawei Research Center in
Munich:</p>
              <participants>
                <person key="kerdata-2015-idp113776">
                  <firstname>Ovidiu-Cristian</firstname>
                  <lastname>Marcu</lastname>
                </person>
              </participants>
              <p>Ovidiu-Cristian Marcu is doing an internship at Huawei in Munich,
Germany for 4 months, starting October 2016. The goal is to create
a framework to improve memory management for streaming systems.</p>
            </li>
            <li id="uid216">
              <p noindent="true">National University of Singapore, Singapore:</p>
              <participants>
                <person key="kerdata-2016-idp140112">
                  <firstname>Tien-Dat</firstname>
                  <lastname>Phan</lastname>
                </person>
              </participants>
              <p>Tien-Dat Phan is visiting NUS (Bingsheng He) for 3 months,
co-funded by a Mobility grant from University Bretagne Loire (UBL)
and NUS.</p>
            </li>
          </sanspuceslist>
        </subsection>
      </subsection>
    </subsection>
  </partenariat>
  <diffusion id="uid217">
    <bodyTitle>Dissemination</bodyTitle>
    <subsection id="uid218" level="1">
      <bodyTitle>Promoting Scientific Activities</bodyTitle>
      <subsection id="uid219" level="2">
        <bodyTitle>Scientific Events Organisation</bodyTitle>
        <subsection id="uid220" level="3">
          <bodyTitle>General Chair, Scientific Chair</bodyTitle>
          <simplelist>
            <li id="uid221">
              <p noindent="true">Luc Bougé: Vice-Chair of the Steering Committee of the
<ref xlink:href="http://www.europar.org/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Euro-Par</ref> Series of conferences.</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid222" level="2">
        <bodyTitle>Scientific Events Selection</bodyTitle>
        <subsection id="uid223" level="3">
          <bodyTitle>Chair of Conference Program Committees</bodyTitle>
          <simplelist>
            <li id="uid224">
              <p noindent="true">Gabriel Antoniu:
Vice-Chair of the Program Committee of the ACM/IEEE CCGrid 2016
international conference (Hybrid and Mobile Clouds Tracks),
Cartagena, May 2016.</p>
            </li>
            <li id="uid225">
              <p noindent="true">Alexandru Costan:
Program Co-Chair of the ScienceCloud 2016 international workshop held
in conjunction with HPDC 2016, Kyoto, June 2016.</p>
            </li>
          </simplelist>
        </subsection>
        <subsection id="uid226" level="3">
          <bodyTitle>Member of the Conference Program Committees</bodyTitle>
          <simplelist>
            <li id="uid227">
              <p noindent="true">Gabriel Antoniu: ACM HPDC 2016, IEEE Cluster 2016, PDSW-DISCS
workshop (held in conjunction with ACM/IEEE SC16 conference),
ARMS-CC 2016 workshop (held in conjunction with the PODC 2016
conference).</p>
            </li>
            <li id="uid228">
              <p noindent="true">Luc Bougé: Euro-Par 2016, IPDPS 2017, ICDE 2017, Euro-Par
2017, ISPDC 2017.</p>
            </li>
            <li id="uid229">
              <p noindent="true">Alexandru Costan: ACM/IEEE SC'16 BoF Applications Track,
ACM/IEEE CCGrid 2016, IEEE BigData 2016, ICPP 2016, ARMS-CC 2016
workshop (held in conjunction with PODC 2016), FiCLOUD 2016,
ScienceCloud 2016 workshop (held in conjunction with HPDC 2016).</p>
            </li>
            <li id="uid230">
              <p noindent="true">Shadi Ibrahim: IEEE Cluster 2016, IEEE/ACM CCGrid 2016, IEEE
ICPADS 2016, IEEE CloudCom 2016, IEEE ICA3PP 2016, SCRAMBL 2016
(held in conjunction with Euro-Par 2016).</p>
            </li>
          </simplelist>
        </subsection>
        <subsection id="uid231" level="3">
          <bodyTitle>Reviewer</bodyTitle>
          <simplelist>
            <li id="uid232">
              <p noindent="true">Alexandru Costan: ACM/IEEE SC16, ACM HPDC 2016, IEEE Cluster
2016.</p>
            </li>
            <li id="uid233">
              <p noindent="true">Shadi Ibrahim: ACM HPDC 2016, Euro-Par 2016.</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid234" level="2">
        <bodyTitle>Journal</bodyTitle>
        <subsection id="uid235" level="3">
          <bodyTitle>Member of the Editorial Boards</bodyTitle>
          <simplelist>
            <li id="uid236">
              <p noindent="true">Gabriel Antoniu: Future Generation Computer Systems, Special
Issue on Resource Management for Big Data Platforms.</p>
            </li>
            <li id="uid237">
              <p noindent="true">Luc Bougé: Concurrency and Computation: Practice and
Experience, Special Issues on the Euro-Par conference.</p>
            </li>
            <li id="uid238">
              <p noindent="true">Alexandru Costan: Soft Computing Journal, Special Issue on
Autonomic Computing and Big Data Platforms</p>
            </li>
          </simplelist>
        </subsection>
        <subsection id="uid239" level="3">
          <bodyTitle>Reviewer, Reviewing Activities</bodyTitle>
          <simplelist>
            <li id="uid240">
              <p noindent="true">Alexandru Costan: IEEE Transactions on Parallel and
Distributed Systems, Future Generation Computer Systems,
Concurrency and Computation Practice and Experience, IEEE
Communications, IEEE Transactions on Storage, Information Sciences</p>
            </li>
            <li id="uid241">
              <p noindent="true">Shadi Ibrahim: IEEE Transactions on Parallel and Distributed
Systems, Future Generation Computer Systems, IEEE Transactions on
Big Data, IEEE Transactions on Cloud Computing, Springer Parallel
Computing, Computers and Electrical Engineering, Journal of
Healthcare Engineering</p>
            </li>
          </simplelist>
        </subsection>
      </subsection>
      <subsection id="uid242" level="2">
        <bodyTitle>Invited Talks</bodyTitle>
        <simplelist>
          <li id="uid243">
            <p noindent="true">Gabriel Antoniu:</p>
            <simplelist>
              <li id="uid244">
                <p noindent="true">BDEC 2016: Invited keynote talk at the <i>4th Big Data
and Exascale Computing (BDEC) workshop</i>, Frankfurt, June 2016.</p>
              </li>
              <li id="uid245">
                <p noindent="true">First Chinese-French Workshop on Extreme Computing:
<i>Damaris: Jitter-Free I/O Management and In Situ
Visualization of HPC Simulations using Dedicated Cores</i>,
Guangzhou, May 2016.</p>
              </li>
              <li id="uid246">
                <p noindent="true">5th JLESC workshop: <i>Spark versus Flink:
Understanding Performance in Big Data Analytics Frameworks</i>,
Lyon, June 2016.</p>
              </li>
              <li id="uid247">
                <p noindent="true">Inria/CIC-IPN workshop: <i>Scalable Big Data Processing
on Clouds: A-Brain and Z-CloudFlow</i>, Mexico City, November
2016.</p>
              </li>
              <li id="uid248">
                <p noindent="true">Inria/Technicolor workshop: <i>Spark versus Flink:
Understanding Performance in Big Data Analytics Frameworks</i>,
Rennes, November 2016.</p>
              </li>
              <li id="uid249">
                <p noindent="true">6th JLESC workshop: <i>Storage-Based Convergence
Between HPC and Big Data</i>, Kobe, Japan, December 2016.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid250">
            <p noindent="true">Luc Bougé:</p>
            <simplelist>
              <li id="uid251">
                <p noindent="true">Comin Labs-DGA-ENSAI BigData day: <i>Support logiciel
pour la gestion de données distribuées à très grande échelle</i>,
IRISA, January 2016.</p>
              </li>
              <li id="uid252">
                <p noindent="true">Société des agrégés: <i>Teaching informatics as a
first-class subject</i>, annual meeting of the Regional Section,
April 2016.</p>
              </li>
              <li id="uid253">
                <p noindent="true">Luminy Algorithmics and Programming School: <i>Big Data:
Tremendous challenges, great solutions</i>, Preparatory school
teachers in Mathematics and Informatics, May 2016.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid254">
            <p noindent="true">Alexandru Costan:</p>
            <simplelist>
              <li id="uid255">
                <p noindent="true">UPB Scientific Days: <i>Big Data and Extreme Computing:
A Storage-Based Pathway to Convergence</i>, The UPB Research
Workshop on Distributed Systems, University Politehnica of
Bucharest, June 2016.</p>
              </li>
              <li id="uid256">
                <p noindent="true">Inria/CIC-IPN workshop: <i>Science Driven, Scalable
Data-Intensive Processing on Clouds</i>, Mexico City, November
2016.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid257">
            <p noindent="true">Shadi Ibrahim:</p>
            <simplelist>
              <li id="uid258">
                <p noindent="true">Inria Scientific Days: <i>Big Data management at scale</i>,
Rennes, June 2016.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid259">
            <p noindent="true">Chi Zhou:</p>
            <simplelist>
              <li id="uid260">
                <p noindent="true">5th JLESC workshop: <i>Incorporating Probabilistic
Optimizations for Resource Provisioning of Cloud Workflow
Processing</i>, Lyon, June 2016.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid261">
            <p noindent="true">Nathanaël Cheriere:</p>
            <simplelist>
              <li id="uid262">
                <p noindent="true">6th JLESC Workshop: <i>Accelerating the Scatter
Operation on Dragonfly Networks</i>, Kobe, Japan, December 2016.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid263">
            <p noindent="true">Orçun Yildiz:</p>
            <simplelist>
              <li id="uid264">
                <p noindent="true">Grid'5000 Winter School: <i>Investigating the Root
Causes of I/O Interference on Grid'5000</i>, Grenoble, February
2016.</p>
              </li>
              <li id="uid265">
                <p noindent="true">6th JLESC Workshop: <i>Towards Efficient Big Data
Processing in HPC Systems</i>, Kobe, Japan, December 2016.</p>
              </li>
            </simplelist>
          </li>
          <li id="uid266">
            <p noindent="true">Luis Eduardo Pineda Morales:</p>
            <simplelist>
              <li id="uid267">
                <p noindent="true">5th JLESC workshop: <i>Exploring Elastic Scaling on
Chameleon Cloud</i>, Lyon, June 2016.</p>
              </li>
              <li id="uid268">
                <p noindent="true">Inria / CIC-IPN workshop: <i>Metadata Management for
Geo-distributed Cloud Workflows</i>, Mexico City, November 2016.</p>
              </li>
            </simplelist>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid269" level="2">
        <bodyTitle>Leadership within the Scientific Community</bodyTitle>
        <simplelist>
          <li id="uid270">
            <p noindent="true">Gabriel Antoniu: Scientific leader of the KerData
project-team.</p>
          </li>
          <li id="uid271">
            <p noindent="true">Gabriel Antoniu: Topic leader for Inria for the <i>Data
storage, I/O and in situ processing</i> topic, supervising
collaboration activities in this area within the JLESC, Joint
Inria-Illinois-ANL-BSC-JSC-RIKEN/AICS Laboratory for Extreme-Scale
Computing.</p>
          </li>
          <li id="uid272">
            <p noindent="true">Luc Bougé: serves as a Vice-President of the <i>French
Society for Informatics</i> (SIF), in charge of the teaching
department.</p>
          </li>
          <li id="uid273">
            <p noindent="true">Gabriel Antoniu: Work package leader within the BigStorage
H2020 ETN project for the <i>Data Science</i> work package.</p>
          </li>
          <li id="uid274">
            <p noindent="true">Alexandru Costan: Leader of the <i>Smart Cities</i> Working
Group within the BigStorage H2020 ETN project.</p>
          </li>
          <li id="uid275">
            <p noindent="true">Shadi Ibrahim: Leader for the <i>Resource Management and
Scheduling for Data-Intensive HPC Workflows</i> activity within the
JLESC, Joint Inria-Illinois-ANL-BSC-JSC-RIKEN/AICS Laboratory for
Extreme-Scale Computing.</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid276" level="2">
        <bodyTitle>Scientific Expertise</bodyTitle>
        <simplelist>
          <li id="uid277">
            <p noindent="true">Gabriel Antoniu served as a project evaluator for the ANR
2016 call (Phase 1).</p>
          </li>
          <li id="uid278">
            <p noindent="true">Luc Bougé: Member of the jury for the <i>Agrégation de
mathématiques</i> and the <i>CAPES of mathématiques</i>. These
national committees select high-school mathematics teachers in
secondary schools and high-schools, respectively.</p>
          </li>
          <li id="uid279">
            <p noindent="true">Luc Bougé has been solicited by the Ministry of Education to
participate to the committee in charge of designing and preparing
the new <i> Informatics track</i> in the CAPES of mathematics. It
will be offered for the 2017 session.</p>
          </li>
          <li id="uid280">
            <p noindent="true">Shadi Ibrahim served as a project evaluator in the DOE-ECP
Program 2016: The research and development in Software Technology
of the US Department of Energy's (DOE's) <i>Exascale Computing
Project</i> (ECP).</p>
          </li>
        </simplelist>
      </subsection>
      <subsection id="uid281" level="2">
        <bodyTitle>Research Administration</bodyTitle>
        <simplelist>
          <li id="uid282">
            <p noindent="true">Luc Bougé: Nominated to seat at the CNU (<i>National
University Council</i>) in the <i>Informatics</i> Section (27). His
term ended in November 2016.</p>
          </li>
        </simplelist>
      </subsection>
    </subsection>
    <subsection id="uid283" level="1">
      <bodyTitle>Teaching - Supervision -
Juries</bodyTitle>
      <subsection id="uid284" level="2">
        <bodyTitle>Teaching</bodyTitle>
        <descriptionlist>
          <label>Gabriel Antoniu</label>
          <li id="uid285">
            <simplelist>
              <li id="uid286">
                <p noindent="true">Master (Engineering Degree, 5th year): Big Data, 24 hours
(lectures), M2 level, ENSAI (<i>École nationale supérieure de
la statistique et de l'analyse de l'information</i>), Bruz,
France.</p>
              </li>
              <li id="uid287">
                <p noindent="true">Master : Cloud Computing, 15 hours (lectures and lab
sessions), M2 level, ENSAI (<i>École nationale supérieure de
la statistique et de l'analyse de l'information</i>), Bruz,
France.</p>
              </li>
              <li id="uid288">
                <p noindent="true">Master: Distributed Systems, 8 hours (lectures), M2 level,
ALMA Master, Distributed Architectures module, University of
Nantes, France.</p>
              </li>
              <li id="uid289">
                <p noindent="true">Master: Scalable Distributed Systems, 12 hours (lectures),
M1 level, SDS Module, EIT ICT Labs Master School, France.</p>
              </li>
            </simplelist>
          </li>
          <label>Luc Bougé</label>
          <li id="uid290">
            <simplelist>
              <li id="uid291">
                <p noindent="true">Bachelor: Introduction to programming concepts, 36 hours
(lectures), L3 level, Informatics program, ENS Rennes, France.</p>
              </li>
              <li id="uid292">
                <p noindent="true">Master: Introduction to compilation, 24 hours (exercice and
practical classes), M1 level, Informatics program,
Univ. Rennes I, France.</p>
              </li>
            </simplelist>
          </li>
          <label>Alexandru Costan</label>
          <li id="uid293">
            <simplelist>
              <li id="uid294">
                <p noindent="true">Bachelor: Software Engineering and Java Programming,
28 hours (lab sessions), L3, INSA Rennes.</p>
              </li>
              <li id="uid295">
                <p noindent="true">Bachelor: Databases, 68 hours (lectures and lab sessions),
L2, INSA Rennes, France.</p>
              </li>
              <li id="uid296">
                <p noindent="true">Bachelor: Practical case studies, 24 hours (project), L3,
INSA Rennes.</p>
              </li>
              <li id="uid297">
                <p noindent="true">Master: Big Data and Applications, 36h hours (lectures, lab
sessions, project), M1, INSA Rennes.</p>
              </li>
            </simplelist>
          </li>
          <label>Shadi Ibrahim</label>
          <li id="uid298">
            <simplelist>
              <li id="uid299">
                <p noindent="true">Master (Engineering Degree, 5th year): Big Data, 24 hours
(lectures and lab sessions), M2 level, ENSAI (<i>École
nationale supérieure de la statistique et de l'analyse de
l'information</i>), Bruz, France.</p>
              </li>
              <li id="uid300">
                <p noindent="true">Master : Cloud Computing and Hadoop Technologies, 16.5 hours
(lectures and lab sessions), M2 level, ENSAI (<i>École
nationale supérieure de la statistique et de l'analyse de
l'information</i>), Bruz, France.</p>
              </li>
              <li id="uid301">
                <p noindent="true">Master: Cloud and Big data, 24 hours (lectures and lab
sessions), M1 level, ENS Rennes, France.</p>
              </li>
              <li id="uid302">
                <p noindent="true">Master: Cloud1, Map-Reduce, (lectures, lab sessions),
15 hours (lectures and lab sessions), M2 level, Ecole des Mines
de Nantes (EMN Nantes), Nantes, France.</p>
              </li>
            </simplelist>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid303" level="2">
        <bodyTitle>Supervision</bodyTitle>
        <subsection id="uid304" level="3">
          <bodyTitle>PhD in progress</bodyTitle>
          <descriptionlist>
            <label>Lokman Rahmani:</label>
            <li id="uid305">
              <p noindent="true"><i>Big Data Management For Next Generation
High Performance Computing Systems</i>, thesis started in October
2013, co-advised by Gabriel Antoniu and Luc Bougé.</p>
            </li>
            <label> Luis Eduardo Pineda Morales:</label>
            <li id="uid306">
              <p noindent="true"><i>Efficient Big Data
Management for Geographically Distributed Workflows</i>, thesis
started in January 2014, co-advised by Alexandru Costan and
Gabriel Antoniu. Defense planned in Spring 2017.</p>
            </li>
            <label>Orçun Yildiz:</label>
            <li id="uid307">
              <p noindent="true"><i>Energy-Efficient Big Data Management in
Petascale Supercomputers and Beyond</i>, thesis started in
September 2014, co-advised by Shadi Ibrahim and Gabriel Antoniu.</p>
            </li>
            <label>Tien-Dat Phan:</label>
            <li id="uid308">
              <p noindent="true"><i>Green Big Data Processing in
Large-scale Clouds</i>, thesis started in October 2014, co-advised
by Shadi Ibrahim and Luc Bougé.</p>
            </li>
            <label>Pierre Matri:</label>
            <li id="uid309">
              <p noindent="true"><i>Predictive Models for Big Data</i>, thesis
started in March 2015, co-advised by María Pérez and Gabriel
Antoniu.</p>
            </li>
            <label>Mohammed-Yacine Taleb:</label>
            <li id="uid310">
              <p noindent="true"><i>Energy-impact of data
consistency management in Clouds and Beyond</i>, thesis started in
August 2015, co-advised by Shadi Ibrahim and Gabriel Antoniu.</p>
            </li>
            <label>Ovidiu-Cristian Marcu:</label>
            <li id="uid311">
              <p noindent="true"><i>Efficient data transfer and
streaming strategies for workflow-based Big Data processing</i>,
thesis started in October 2015, co-advised by Alexandru Costan and
Gabriel Antoniu.</p>
            </li>
            <label>Nathanaël Cheriere:</label>
            <li id="uid312">
              <p noindent="true"><i>Resource Management and Scheduling
for Big Data Applications in Large-scale Systems</i>, thesis
started in September 2016, co-advised by Shadi Ibrahim and Gabriel
Antoniu.</p>
            </li>
            <label>Paul Le Noac'h:</label>
            <li id="uid313">
              <p noindent="true"><i>Workflow Data Management as a Service
for Multi-Site Applications</i>, thesis started in November 2016,
co-advised by Alexandru Costan and Luc Bougé.</p>
            </li>
          </descriptionlist>
        </subsection>
      </subsection>
      <subsection id="uid314" level="2">
        <bodyTitle>Juries</bodyTitle>
        <descriptionlist>
          <label>Gabriel Antoniu:</label>
          <li id="uid315">
            <p noindent="true">Referee for the PhD thesis of Ms. Zhou Chi
at the Nanyang Technological University (NTU), Singapore (January
2016).</p>
          </li>
          <label>Luc Bougé:</label>
          <li id="uid316">
            <p noindent="true">Referee for the PhD thesis of Matthieu Perrin,
LINA, Univ. Nantes (June 2016). Member of several PhD and HDR
thesis juries in France.</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid317" level="2">
        <bodyTitle>Miscellaneous</bodyTitle>
        <subsection id="idp4655744" level="3">
          <bodyTitle>Responsibilities</bodyTitle>
          <descriptionlist>
            <label>Luc Bougé:</label>
            <li id="uid318">
              <p noindent="true">Co-ordinator between ENS Rennes and the Inria
Research Center and the IRISA laboratory.</p>
            </li>
            <label>Luc Bougé:</label>
            <li id="uid319">
              <p noindent="true">In charge of the Bachelor level (L3) and of the
student seminar series at the Informatics Departement of ENS
Rennes.</p>
            </li>
            <label>Alexandru Costan:</label>
            <li id="uid320">
              <p noindent="true">In charge of communication at the Computer
Science Department of INSA Rennes.</p>
            </li>
            <label>Alexandru Costan:</label>
            <li id="uid321">
              <p noindent="true">In charge of the organization of the IRISA
D1 Department Seminar.</p>
            </li>
            <label>Shadi Ibrahim:</label>
            <li id="uid322">
              <p noindent="true">Member of Grid'5000 Sites Committee:
Responsible for the Rennes site.</p>
            </li>
          </descriptionlist>
        </subsection>
        <subsection id="idp4665456" level="3">
          <bodyTitle>Tutorials</bodyTitle>
          <descriptionlist>
            <label>Gabriel Antoniu and Shadi Ibrahim</label>
            <li id="uid323">
              <p noindent="true">gave tutorials on
<i>Big Data technologies and Hadoop</i> at the BigStorage Winter
School in Barcelona (March 2016).</p>
            </li>
            <label>Shadi Ibrahim</label>
            <li id="uid324">
              <p noindent="true">gave a Tutorial on <i>Green Big Data
Processing using Hadoop</i> at the at the Euro-Par 2016 conference,
Grenoble, France (with Anne-Cécile Orgerie).</p>
            </li>
          </descriptionlist>
        </subsection>
      </subsection>
    </subsection>
    <subsection id="uid325" level="1">
      <bodyTitle>Popularization</bodyTitle>
      <descriptionlist>
        <label>Luc Bougé:</label>
        <li id="uid326">
          <descriptionlist>
            <label>Master Program, Rennes.</label>
            <li id="uid327">
              <p noindent="true">Invited presentation to the M2
students about <i>Informatics as a scientific activity:
Toward a responsible research</i> (December 2016).</p>
            </li>
          </descriptionlist>
        </li>
        <label>Alexandru Costan:</label>
        <li id="uid328">
          <descriptionlist>
            <label>Master Program, Rennes.</label>
            <li id="uid329">
              <p noindent="true">Invited presentation to the M2
students about <i>Big Data Analytics</i> (November 2016).</p>
            </li>
          </descriptionlist>
        </li>
      </descriptionlist>
    </subsection>
  </diffusion>
  <biblio id="bibliography" html="bibliography" numero="10" titre="Bibliography">
    
    <biblStruct id="kerdata-2016-bid21" type="misc" rend="refer" n="refercite:cheriere:hal-01400271">
      <identifiant type="hal" value="hal-01400271"/>
      <monogr>
        <title level="m">Design and Evaluation of Topology-aware Scatter and AllGather Algorithms for Dragonfly Networks</title>
        <author>
          <persName key="kerdata-2015-idp118848">
            <foreName>Nathanaël</foreName>
            <surname>Cheriere</surname>
            <initial>N.</initial>
          </persName>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>November</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01400271" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01400271</ref>
        </imprint>
      </monogr>
      <note type="howpublished">Supercomputing 2016</note>
      <note type="bnote">Poster</note>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid43" type="article" rend="refer" n="refercite:costan:hal-00767034">
      <identifiant type="hal" value="hal-00767034"/>
      <analytic>
        <title level="a">TomusBlobs: Scalable Data-intensive Processing on Azure Clouds</title>
        <author>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp73440">
            <foreName>Radu</foreName>
            <surname>Tudoran</surname>
            <initial>R.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Goetz</foreName>
            <surname>Brasche</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">CCPE - Concurrency and Computation: Practice and Experience</title>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-00767034" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00767034</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid44" type="article" rend="refer" n="refercite:damota:hal-01057325">
      <identifiant type="hal" value="hal-01057325"/>
      <analytic>
        <title level="a">Machine Learning Patterns for Neuroimaging-Genetic Studies in the Cloud</title>
        <author>
          <persName>
            <foreName>Benoit</foreName>
            <surname>Da Mota</surname>
            <initial>B.</initial>
          </persName>
          <persName key="kerdata-2014-idp73440">
            <foreName>Radu</foreName>
            <surname>Tudoran</surname>
            <initial>R.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="parietal-2014-idm28112">
            <foreName>Gaël</foreName>
            <surname>Varoquaux</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Goetz</foreName>
            <surname>Brasche</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Patricia J.</foreName>
            <surname>Conrod</surname>
            <initial>P. J.</initial>
          </persName>
          <persName>
            <foreName>Hervé</foreName>
            <surname>Lemaitre</surname>
            <initial>H.</initial>
          </persName>
          <persName>
            <foreName>Tomáš</foreName>
            <surname>Paus</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>Marcella</foreName>
            <surname>Rietschel</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Vincent</foreName>
            <surname>Frouin</surname>
            <initial>V.</initial>
          </persName>
          <persName>
            <foreName>Jean-Baptiste</foreName>
            <surname>Poline</surname>
            <initial>J.-B.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="parietal-2014-idm31000">
            <foreName>Bertrand</foreName>
            <surname>Thirion</surname>
            <initial>B.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Frontiers in Neuroinformatics</title>
        <imprint>
          <biblScope type="volume">8</biblScope>
          <dateStruct>
            <month>April</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01057325" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01057325</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid16" type="inproceedings" rend="refer" n="refercite:dorier:hal-00715252">
      <identifiant type="hal" value="hal-00715252"/>
      <analytic>
        <title level="a">Damaris: How to Efficiently Leverage Multicore Parallelism to Achieve Scalable, Jitter-free I/O</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="roma-2014-idp88208">
            <foreName>Franck</foreName>
            <surname>Cappello</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Marc</foreName>
            <surname>Snir</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Leigh</foreName>
            <surname>Orf</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">CLUSTER - IEEE International Conference on Cluster Computing</title>
        <loc>Beijing, China</loc>
        <imprint>
          <publisher>
            <orgName>IEEE</orgName>
          </publisher>
          <dateStruct>
            <month>September</month>
            <year>2012</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-00715252" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00715252</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid45" type="inproceedings" rend="refer" n="refercite:dorier:hal-00916091">
      <identifiant type="hal" value="hal-00916091"/>
      <analytic>
        <title level="a">CALCioM: Mitigating I/O Interference in HPC Systems through Cross-Application Coordination</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp84024">
            <foreName>Robert</foreName>
            <surname>Ross</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Dries</foreName>
            <surname>Kimpe</surname>
            <initial>D.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">IPDPS - International Parallel and Distributed Processing Symposium</title>
        <loc>Phoenix, United States</loc>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-00916091" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00916091</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid20" type="inproceedings" rend="refer" n="refercite:dorier:hal-01224846">
      <identifiant type="doi" value="10.1145/2828612.2828622"/>
      <identifiant type="hal" value="hal-01224846"/>
      <analytic>
        <title level="a">Lessons Learned from Building In Situ Coupling Frameworks</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="moais-2014-idp120976">
            <foreName>Matthieu</foreName>
            <surname>Dreher</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Tom</foreName>
            <surname>Peterka</surname>
            <initial>T.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="moais-2014-idp86304">
            <foreName>Bruno</foreName>
            <surname>Raffin</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Justin M.</foreName>
            <surname>Wozniak</surname>
            <initial>J. M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">ISAV 2015 - First Workshop on In Situ Infrastructures for Enabling Extreme-Scale Analysis and Visualization (held in conjunction with SC15)</title>
        <loc>Austin, United States</loc>
        <imprint>
          <dateStruct>
            <month>November</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01224846" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01224846</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid46" type="inproceedings" rend="refer" n="refercite:dorier:hal-01025670">
      <identifiant type="hal" value="hal-01025670"/>
      <analytic>
        <title level="a">Omnisc'IO: A Grammar-Based Approach to Spatial and Temporal I/O Patterns Prediction</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp84024">
            <foreName>Robert</foreName>
            <surname>Ross</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">SC14 - International Conference for High Performance Computing, Networking, Storage and Analysis</title>
        <loc>New Orleans, United States</loc>
        <imprint>
          <publisher>
            <orgName type="organisation">IEEE, ACM</orgName>
          </publisher>
          <dateStruct>
            <month>November</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01025670" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01025670</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid47" type="article" rend="refer" n="refercite:dorier:hal-01238103">
      <identifiant type="doi" value="10.1109/TPDS.2015.2485980"/>
      <identifiant type="hal" value="hal-01238103"/>
      <analytic>
        <title level="a">Using Formal Grammars to Predict I/O Behaviors in HPC: the Omnisc'IO Approach</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp84024">
            <foreName>Robert</foreName>
            <surname>Ross</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes">
        <title level="j">TPDS - IEEE Transactions on Parallel and Distributed Systems</title>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01238103" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01238103</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid48" type="article" rend="refer" n="refercite:nicolae:2010:inria-00511414:1">
      <identifiant type="hal" value="inria-00511414"/>
      <analytic>
        <title level="a">BlobSeer: Next-Generation Data Management for Large-Scale Infrastructures</title>
        <author>
          <persName>
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp65432">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
          <persName>
            <foreName>Diana</foreName>
            <surname>Moise</surname>
            <initial>D.</initial>
          </persName>
          <persName key="myriads-2014-idp74496">
            <foreName>Alexandra</foreName>
            <surname>Carpen-Amarie</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes">
        <title level="j">JPDC - Journal of Parallel and Distributed Computing</title>
        <imprint>
          <biblScope type="volume">71</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <month>February</month>
            <year>2011</year>
          </dateStruct>
          <biblScope type="pages">169–184</biblScope>
          <ref xlink:href="http://hal.inria.fr/inria-00511414/en/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>inria-00511414/<allowbreak/>en/</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid49" type="inproceedings" rend="refer" n="refercite:nicolae:2011:inria-00570682:1">
      <identifiant type="hal" value="inria-00570682"/>
      <analytic>
        <title level="a">Going Back and Forth: Efficient Multi-Deployment and Multi-Snapshotting on Clouds</title>
        <author>
          <persName>
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>John</foreName>
            <surname>Bresnahan</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Kate</foreName>
            <surname>Keahey</surname>
            <initial>K.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">HPDC 2011 - The 20th International ACM Symposium on High-Performance Parallel and Distributed Computing</title>
        <loc>San José, CA, United States</loc>
        <imprint>
          <dateStruct>
            <month>June</month>
            <year>2011</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/inria-00570682/en" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>inria-00570682/<allowbreak/>en</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid50" type="article" rend="refer" n="refercite:tudoran:hal-01239128">
      <identifiant type="doi" value="10.1109/TCC.2015.2440254"/>
      <identifiant type="hal" value="hal-01239128"/>
      <analytic>
        <title level="a">OverFlow: Multi-Site Aware Big Data Management for Scientific Workflows on Clouds</title>
        <author>
          <persName key="kerdata-2014-idp73440">
            <foreName>Radu</foreName>
            <surname>Tudoran</surname>
            <initial>R.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes">
        <title level="j">IEEE Transactions on Cloud Computing</title>
        <imprint>
          <dateStruct>
            <month>June</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01239128" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01239128</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid15" type="article" rend="year" n="cite:dorier:hal-01353890">
      <identifiant type="hal" value="hal-01353890"/>
      <analytic>
        <title level="a">Damaris: Addressing Performance Variability in Data Management for Post-Petascale Simulations</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="roma-2014-idp88208">
            <foreName>Franck</foreName>
            <surname>Cappello</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Marc</foreName>
            <surname>Snir</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Robert R.</foreName>
            <surname>Sisneros</surname>
            <initial>R. R.</initial>
          </persName>
          <persName key="kerdata-2014-idp81448">
            <foreName>Orcun</foreName>
            <surname>Yildiz</surname>
            <initial>O.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Tom</foreName>
            <surname>Peterka</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>Leigh G</foreName>
            <surname>Orf</surname>
            <initial>L. G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid02846">
        <idno type="issn">2329-4949</idno>
        <title level="j">ACM Transactions on Parallel Computing</title>
        <imprint>
          <dateStruct>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01353890" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01353890</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid31" type="article" rend="year" n="cite:dorier:hal-01238103">
      <identifiant type="doi" value="10.1109/TPDS.2015.2485980"/>
      <identifiant type="hal" value="hal-01238103"/>
      <analytic>
        <title level="a">Using Formal Grammars to Predict I/O Behaviors in HPC: the Omnisc'IO Approach</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp84024">
            <foreName>Robert</foreName>
            <surname>Ross</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00746">
        <idno type="issn">1045-9219</idno>
        <title level="j">IEEE Transactions on Parallel and Distributed Systems</title>
        <imprint>
          <dateStruct>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01238103" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01238103</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid37" type="article" rend="year" n="cite:dorier:hal-01330735">
      <identifiant type="doi" value="10.1016/j.future.2016.03.002"/>
      <identifiant type="hal" value="hal-01330735"/>
      <analytic>
        <title level="a">On the energy footprint of I/O management in Exascale HPC systems</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp81448">
            <foreName>Orcun</foreName>
            <surname>Yildiz</surname>
            <initial>O.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="myriads-2014-idp65360">
            <foreName>Anne-Cécile</foreName>
            <surname>Orgerie</surname>
            <initial>A.-C.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00622">
        <idno type="issn">0167-739X</idno>
        <title level="j">Future Generation Computer Systems</title>
        <imprint>
          <biblScope type="volume">62</biblScope>
          <dateStruct>
            <month>March</month>
            <year>2016</year>
          </dateStruct>
          <biblScope type="pages">17–28</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01330735" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01330735</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid39" type="article" rend="year" n="cite:ibrahim:hal-01166252">
      <identifiant type="doi" value="10.1016/j.future.2015.01.005"/>
      <identifiant type="hal" value="hal-01166252"/>
      <analytic>
        <title level="a">Governing Energy Consumption in Hadoop through CPU Frequency Scaling: an Analysis</title>
        <author>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2016-idp140112">
            <foreName>Tien-Dat</foreName>
            <surname>Phan</surname>
            <initial>T.-D.</initial>
          </persName>
          <persName key="myriads-2014-idp74496">
            <foreName>Alexandra</foreName>
            <surname>Carpen-Amarie</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Houssem-Eddine</foreName>
            <surname>Chihoub</surname>
            <initial>H.-E.</initial>
          </persName>
          <persName>
            <foreName>Diana</foreName>
            <surname>Moise</surname>
            <initial>D.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00622">
        <idno type="issn">0167-739X</idno>
        <title level="j">Future Generation Computer Systems</title>
        <imprint>
          <dateStruct>
            <month>January</month>
            <year>2016</year>
          </dateStruct>
          <biblScope type="pages">14</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01166252" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01166252</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid36" type="incollection" rend="year" n="cite:memishi:hal-01338393">
      <identifiant type="hal" value="hal-01338393"/>
      <analytic>
        <title level="a">On the Dynamic Shifting of the MapReduce Timeout</title>
        <author>
          <persName>
            <foreName>Bunjamin</foreName>
            <surname>Memishi</surname>
            <initial>B.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>María S.</foreName>
            <surname>Pérez-Hernández</surname>
            <initial>M. S.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no">
        <editor role="editor">
          <persName>
            <foreName>Rajkumar</foreName>
            <surname>Kannan</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Raihan Ur</foreName>
            <surname>Rasool</surname>
            <initial>R. U.</initial>
          </persName>
          <persName>
            <foreName>Hai</foreName>
            <surname>Jin</surname>
            <initial>H.</initial>
          </persName>
          <persName>
            <foreName>S.R.</foreName>
            <surname>Balasundaram</surname>
            <initial>S.</initial>
          </persName>
        </editor>
        <title level="m">Managing and Processing Big Data in Cloud Computing</title>
        <imprint>
          <publisher>
            <orgName>IGI Global</orgName>
          </publisher>
          <dateStruct>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01338393" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01338393</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid42" type="article" rend="year" n="cite:simmhan:hal-01351218">
      <identifiant type="doi" value="10.1002/cpe.3668"/>
      <identifiant type="hal" value="hal-01351218"/>
      <analytic>
        <title level="a">Cloud computing for data-driven science and engineering: Special issue on the Cloud computing for data-driven science and engineering workshop (ScienceCloud 2012)</title>
        <author>
          <persName>
            <foreName>Yogesh</foreName>
            <surname>Simmhan</surname>
            <initial>Y.</initial>
          </persName>
          <persName>
            <foreName>Lavanya</foreName>
            <surname>Ramakrishnan</surname>
            <initial>L.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Carole</foreName>
            <surname>Goble</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00435">
        <idno type="issn">1532-0626</idno>
        <title level="j">Concurrency and Computation: Practice and Experience</title>
        <imprint>
          <biblScope type="volume">28</biblScope>
          <biblScope type="number">4</biblScope>
          <dateStruct>
            <year>2016</year>
          </dateStruct>
          <biblScope type="pages">947–949</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01351218" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01351218</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid33" type="article" rend="year" n="cite:tudoran:hal-01239128">
      <identifiant type="doi" value="10.1109/TCC.2015.2440254"/>
      <identifiant type="hal" value="hal-01239128"/>
      <analytic>
        <title level="a">OverFlow: Multi-Site Aware Big Data Management for Scientific Workflows on Clouds</title>
        <author>
          <persName key="kerdata-2014-idp73440">
            <foreName>Radu</foreName>
            <surname>Tudoran</surname>
            <initial>R.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid02851">
        <idno type="issn">2168-7161</idno>
        <title level="j">IEEE Transactions on Cloud Computing</title>
        <imprint>
          <dateStruct>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01239128" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01239128</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid38" type="article" rend="year" n="cite:tudoran:hal-01239124">
      <identifiant type="doi" value="10.1016/j.future.2015.01.016"/>
      <identifiant type="hal" value="hal-01239124"/>
      <analytic>
        <title level="a">JetStream: Enabling high throughput live event streaming on multi-site clouds</title>
        <author>
          <persName key="kerdata-2014-idp73440">
            <foreName>Radu</foreName>
            <surname>Tudoran</surname>
            <initial>R.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Olivier</foreName>
            <surname>Nano</surname>
            <initial>O.</initial>
          </persName>
          <persName>
            <foreName>Ivo</foreName>
            <surname>Santos</surname>
            <initial>I.</initial>
          </persName>
          <persName>
            <foreName>Hakan</foreName>
            <surname>Soncu</surname>
            <initial>H.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00622">
        <idno type="issn">0167-739X</idno>
        <title level="j">Future Generation Computer Systems</title>
        <imprint>
          <biblScope type="volume">54</biblScope>
          <dateStruct>
            <month>January</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01239124" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01239124</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid41" type="article" rend="year" n="cite:wu:hal-01338404">
      <identifiant type="hal" value="hal-01338404"/>
      <analytic>
        <title level="a">iShare: Balancing I/O performance isolation and disk I/O efficiency in virtualized environments</title>
        <author>
          <persName>
            <foreName>Song</foreName>
            <surname>Wu</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Songqiao</foreName>
            <surname>Tao</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Xiao</foreName>
            <surname>Ling</surname>
            <initial>X.</initial>
          </persName>
          <persName>
            <foreName>Hao</foreName>
            <surname>Fan</surname>
            <initial>H.</initial>
          </persName>
          <persName>
            <foreName>Hai</foreName>
            <surname>Jin</surname>
            <initial>H.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00435">
        <idno type="issn">1532-0626</idno>
        <title level="j">Concurrency and Computation: Practice and Experience</title>
        <imprint>
          <dateStruct>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01338404" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01338404</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid26" type="article" rend="year" n="cite:yildiz:hal-01338336">
      <identifiant type="doi" value="10.1016/j.future.2016.02.015"/>
      <identifiant type="hal" value="hal-01338336"/>
      <analytic>
        <title level="a">Enabling Fast Failure Recovery in Shared Hadoop Clusters: Towards Failure-Aware Scheduling</title>
        <author>
          <persName key="kerdata-2014-idp81448">
            <foreName>Orcun</foreName>
            <surname>Yildiz</surname>
            <initial>O.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00622">
        <idno type="issn">0167-739X</idno>
        <title level="j">Future Generation Computer Systems</title>
        <imprint>
          <dateStruct>
            <month>March</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01338336" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01338336</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid25" type="incollection" rend="year" n="cite:zhou:hal-01346745">
      <identifiant type="hal" value="hal-01346745"/>
      <analytic>
        <title level="a">A Taxonomy and Survey of Scientific Computing in the Cloud</title>
        <author>
          <persName>
            <foreName>Amelie Chi</foreName>
            <surname>Zhou</surname>
            <initial>A. C.</initial>
          </persName>
          <persName>
            <foreName>Bingsheng</foreName>
            <surname>He</surname>
            <initial>B.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no">
        <title level="m">Big Data: Principles and Paradigms</title>
        <title level="s">eScience and Big Data Workflows in Clouds: A Taxonomy and Survey</title>
        <imprint>
          <publisher>
            <orgName> Morgan Kaufmann</orgName>
          </publisher>
          <dateStruct>
            <month>June</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01346745" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01346745</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid32" type="inproceedings" rend="year" n="cite:cheriere:hal-01239341">
      <identifiant type="hal" value="hal-01239341"/>
      <analytic>
        <title level="a">On the Usability of Shortest Remaining Time First Policy in Shared Hadoop Clusters</title>
        <author>
          <persName key="kerdata-2015-idp118848">
            <foreName>Nathanaël</foreName>
            <surname>Cheriere</surname>
            <initial>N.</initial>
          </persName>
          <persName key="mutant-2016-idp159296">
            <foreName>Pierre</foreName>
            <surname>Donat-Bouillud</surname>
            <initial>P.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="myriads-2014-idp96208">
            <foreName>Matthieu</foreName>
            <surname>Simonin</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">SAC 2016-The 31st ACM/SIGAPP Symposium on Applied Computing</title>
        <loc>Pisa, Italy</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01239341" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01239341</ref>
        </imprint>
        <meeting id="cid23923">
          <title>ACM Symposium on Applied Computing</title>
          <num>31</num>
          <abbr type="sigle">SAC</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid22" type="inproceedings" rend="year" n="cite:dorier:hal-01351919">
      <identifiant type="hal" value="hal-01351919"/>
      <analytic>
        <title level="a">Adaptive Performance-Constrained In Situ Visualization of Atmospheric Simulations</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Robert R.</foreName>
            <surname>Sisneros</surname>
            <initial>R. R.</initial>
          </persName>
          <persName>
            <foreName>Leonardo</foreName>
            <surname>Bautista-Gomez</surname>
            <initial>L.</initial>
          </persName>
          <persName>
            <foreName>Tom</foreName>
            <surname>Peterka</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>Leigh</foreName>
            <surname>Orf</surname>
            <initial>L.</initial>
          </persName>
          <persName key="kerdata-2014-idp78800">
            <foreName>Lokman</foreName>
            <surname>Rahmani</surname>
            <initial>L.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp65432">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Cluster 2016 - The IEEE 2016 International Conference on Cluster Computing</title>
        <loc>Taipei, Taiwan</loc>
        <imprint>
          <dateStruct>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01351919" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01351919</ref>
        </imprint>
        <meeting id="cid81665">
          <title>IEEE International Conference on Cluster Computing</title>
          <num>2016</num>
          <abbr type="sigle">Cluster</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid27" type="inproceedings" rend="year" n="cite:marcu:hal-01347638">
      <identifiant type="hal" value="hal-01347638"/>
      <analytic>
        <title level="a">Spark versus Flink: Understanding Performance in Big Data Analytics Frameworks</title>
        <author>
          <persName key="kerdata-2015-idp113776">
            <foreName>Ovidiu-Cristian</foreName>
            <surname>Marcu</surname>
            <initial>O.-C.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>María S.</foreName>
            <surname>Pérez</surname>
            <initial>M. S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">Cluster 2016 - The IEEE 2016 International Conference on Cluster Computing</title>
        <loc>Taipei, Taiwan</loc>
        <imprint>
          <dateStruct>
            <month>September</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01347638" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01347638</ref>
        </imprint>
        <meeting id="cid81665">
          <title>IEEE International Conference on Cluster Computing</title>
          <num>2016</num>
          <abbr type="sigle">Cluster</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid34" type="inproceedings" rend="year" n="cite:matri:hal-01304328">
      <identifiant type="doi" value="10.1145/2913712.2913715"/>
      <identifiant type="hal" value="hal-01304328"/>
      <analytic>
        <title level="a">Towards Efficient Location and Placement of Dynamic Replicas for Geo-Distributed Data Stores</title>
        <author>
          <persName key="kerdata-2014-idp82720">
            <foreName>Pierre</foreName>
            <surname>Matri</surname>
            <initial>P.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Jesús</foreName>
            <surname>Montes</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>María S.</foreName>
            <surname>Pérez</surname>
            <initial>M. S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ScienceCloud'16 - 7th Workshop on Scientific Cloud Computing (in conjunction with ACM HPDC 2016)</title>
        <loc>Kyoto, Japan</loc>
        <imprint>
          <dateStruct>
            <month>June</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01304328" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01304328</ref>
        </imprint>
        <meeting id="cid625395">
          <title>Workshop on Scientific Cloud Computing</title>
          <num>7</num>
          <abbr type="sigle">ScienceCloud</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid10" type="inproceedings" rend="best" n="cite:matri:hal-01347652">
      <identifiant type="hal" value="hal-01347652"/>
      <analytic>
        <title level="a">Tyr: Blob Storage Meets Built-In Transactions</title>
        <author>
          <persName key="kerdata-2014-idp82720">
            <foreName>Pierre</foreName>
            <surname>Matri</surname>
            <initial>P.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Jesús</foreName>
            <surname>Montes</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>María S.</foreName>
            <surname>Pérez</surname>
            <initial>M. S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">IEEE ACM SC16 - The International Conference for High Performance Computing, Networking, Storage and Analysis 2016</title>
        <loc>Salt Lake City, United States</loc>
        <imprint>
          <dateStruct>
            <month>November</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01347652" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01347652</ref>
        </imprint>
        <meeting id="cid107949">
          <title>International Conference for High Performance Computing, Networking, Storage and Analysis</title>
          <num>2016</num>
          <abbr type="sigle">SC</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid24" type="inproceedings" rend="year" n="cite:pinedamorales:hal-01395715">
      <identifiant type="hal" value="hal-01395715"/>
      <analytic>
        <title level="a">Managing Hot Metadata for Scientific Workflows on Multisite Clouds</title>
        <author>
          <persName>
            <foreName>Luis</foreName>
            <surname>Pineda-Morales</surname>
            <initial>L.</initial>
          </persName>
          <persName key="zenith-2014-idp86088">
            <foreName>Ji</foreName>
            <surname>Liu</surname>
            <initial>J.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="zenith-2014-idp70832">
            <foreName>Esther</foreName>
            <surname>Pacitti</surname>
            <initial>E.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="zenith-2014-idp68136">
            <foreName>Patrick</foreName>
            <surname>Valduriez</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Marta</foreName>
            <surname>Mattoso</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">BIGDATA 2016 - 2016 IEEE International Conference on Big Data</title>
        <loc>Washington, United States</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01395715" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01395715</ref>
        </imprint>
        <meeting id="cid624205">
          <title>IEEE International Conference on Big Data</title>
          <num>2016</num>
          <abbr type="sigle">IEEE BigData</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid23" type="inproceedings" rend="year" n="cite:yildiz:hal-01270630">
      <identifiant type="hal" value="hal-01270630"/>
      <analytic>
        <title level="a">On the Root Causes of Cross-Application I/O Interference in HPC Storage Systems</title>
        <author>
          <persName key="kerdata-2014-idp81448">
            <foreName>Orcun</foreName>
            <surname>Yildiz</surname>
            <initial>O.</initial>
          </persName>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2014-idp84024">
            <foreName>Robert</foreName>
            <surname>Ross</surname>
            <initial>R.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">IPDPS 2016 - The 30th IEEE International Parallel and Distributed Processing Symposium</title>
        <loc>Chicago, United States</loc>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01270630" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01270630</ref>
        </imprint>
        <meeting id="cid87817">
          <title>IEEE International Parallel and Distributed Processing Symposium</title>
          <num>30</num>
          <abbr type="sigle">IPDPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid40" type="techreport" rend="year" n="cite:dorier:hal-01273718">
      <identifiant type="hal" value="hal-01273718"/>
      <monogr>
        <title level="m">Performance-Constrained In Situ Visualization of Atmospheric Simulations</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Robert R.</foreName>
            <surname>Sisneros</surname>
            <initial>R. R.</initial>
          </persName>
          <persName>
            <foreName>Leonardo</foreName>
            <surname>Bautista-Gomez</surname>
            <initial>L.</initial>
          </persName>
          <persName>
            <foreName>Tom</foreName>
            <surname>Peterka</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>Leigh G</foreName>
            <surname>Orf</surname>
            <initial>L. G.</initial>
          </persName>
          <persName key="kerdata-2014-idp84024">
            <foreName>Rob</foreName>
            <surname>Ross</surname>
            <initial>R.</initial>
          </persName>
          <persName key="kerdata-2014-idp78800">
            <foreName>Lokman</foreName>
            <surname>Rahmani</surname>
            <initial>L.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp65432">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RR-8855</biblScope>
          <publisher>
            <orgName type="institution">Inria Rennes - Bretagne Atlantique</orgName>
          </publisher>
          <dateStruct>
            <month>February</month>
            <year>2016</year>
          </dateStruct>
          <biblScope type="pages">27</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01273718" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01273718</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Research Report</note>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid30" type="techreport" rend="year" n="cite:matri:hal-01256563">
      <identifiant type="hal" value="hal-01256563"/>
      <monogr>
        <title level="m">Týr: Efficient Transactional Storage for Data-Intensive Applications</title>
        <author>
          <persName key="kerdata-2014-idp82720">
            <foreName>Pierre</foreName>
            <surname>Matri</surname>
            <initial>P.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Jesús</foreName>
            <surname>Montes</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>María S.</foreName>
            <surname>Pérez</surname>
            <initial>M. S.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RT-0473</biblScope>
          <publisher>
            <orgName type="institution">Inria Rennes Bretagne Atlantique ; Universidad Politécnica de Madrid</orgName>
          </publisher>
          <dateStruct>
            <month>January</month>
            <year>2016</year>
          </dateStruct>
          <biblScope type="pages">25</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01256563" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01256563</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Technical Report</note>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid28" type="misc" rend="year" n="cite:cheriere:hal-01400271">
      <identifiant type="hal" value="hal-01400271"/>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" x-proceedings="no" x-invited-conference="no">
        <title level="m">Design and Evaluation of Topology-aware Scatter and AllGather Algorithms for Dragonfly Networks</title>
        <author>
          <persName key="kerdata-2015-idp118848">
            <foreName>Nathanaël</foreName>
            <surname>Cheriere</surname>
            <initial>N.</initial>
          </persName>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>November</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01400271" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01400271</ref>
        </imprint>
      </monogr>
      <note type="howpublished">Supercomputing 2016</note>
      <note type="bnote">Poster</note>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid29" type="unpublished" rend="year" n="cite:rahmani:hal-01290268">
      <identifiant type="hal" value="hal-01290268"/>
      <monogr>
        <title level="m">Towards Smart Visualization Framework for Climate Simulations</title>
        <author>
          <persName key="kerdata-2014-idp78800">
            <foreName>Lokman</foreName>
            <surname>Rahmani</surname>
            <initial>L.</initial>
          </persName>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp65432">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Robert R.</foreName>
            <surname>Sisneros</surname>
            <initial>R. R.</initial>
          </persName>
          <persName>
            <foreName>Tom</foreName>
            <surname>Peterka</surname>
            <initial>T.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>March</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01290268" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01290268</ref>
        </imprint>
      </monogr>
      <note type="bnote">working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid35" type="unpublished" rend="year" n="cite:taleb:hal-01376923">
      <identifiant type="hal" value="hal-01376923"/>
      <monogr>
        <title level="m">Understanding how the network impacts performance and energy-efficiency in the RAMCloud storage system</title>
        <author>
          <persName>
            <foreName>Yacine</foreName>
            <surname>Taleb</surname>
            <initial>Y.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Toni</foreName>
            <surname>Cortes</surname>
            <initial>T.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01376923" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01376923</ref>
        </imprint>
      </monogr>
      <note type="bnote">working paper or preprint</note>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid14" type="article" rend="foot" n="footcite:googlestream">
      <analytic>
        <title level="a">MillWheel: fault-tolerant stream processing at internet scale</title>
        <author>
          <persName>
            <foreName>Tyler</foreName>
            <surname>Akidau</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>Alex</foreName>
            <surname>Balikov</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Kaya</foreName>
            <surname>Bekiroğlu</surname>
            <initial>K.</initial>
          </persName>
          <persName>
            <foreName>Slava</foreName>
            <surname>Chernyak</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Josh</foreName>
            <surname>Haberman</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Reuven</foreName>
            <surname>Lax</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Sam</foreName>
            <surname>McVeety</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Daniel</foreName>
            <surname>Mills</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>Paul</foreName>
            <surname>Nordstrom</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Sam</foreName>
            <surname>Whittle</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Proceedings of the VLDB Endowment</title>
        <imprint>
          <biblScope type="volume">6</biblScope>
          <biblScope type="number">11</biblScope>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <biblScope type="pages">1033–1044</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid5" type="article" rend="foot" n="footcite:mapreduce">
      <analytic>
        <title level="a">MapReduce: simplified data processing on large clusters</title>
        <author>
          <persName>
            <foreName>Jeffrey</foreName>
            <surname>Dean</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Sanjay</foreName>
            <surname>Ghemawat</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Communications of the ACM</title>
        <imprint>
          <biblScope type="volume">51</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <year>2008</year>
          </dateStruct>
          <biblScope type="pages">107–113</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid19" type="article" rend="foot" n="footcite:swift">
      <identifiant type="doi" value="10.1016/j.parco.2011.05.005"/>
      <analytic>
        <title level="a">Swift: A language for distributed parallel scripting</title>
        <author>
          <persName>
            <foreName>Swift</foreName>
            <surname>Wilde</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Mihael</foreName>
            <surname>Hategan</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Justin M.</foreName>
            <surname>Wozniak</surname>
            <initial>J. M.</initial>
          </persName>
          <persName>
            <foreName>Ben</foreName>
            <surname>Clifford</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Daniel</foreName>
            <surname>Katz</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>Ian T.</foreName>
            <surname>Foster</surname>
            <initial>I. T.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Parallel Computing</title>
        <imprint>
          <biblScope type="volume">37</biblScope>
          <biblScope type="number">9</biblScope>
          <dateStruct>
            <year>2011</year>
          </dateStruct>
          <biblScope type="pages">633–652</biblScope>
          <ref xlink:href="http://dx.doi.org/10.1016/j.parco.2011.05.005" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>dx.<allowbreak/>doi.<allowbreak/>org/<allowbreak/>10.<allowbreak/>1016/<allowbreak/>j.<allowbreak/>parco.<allowbreak/>2011.<allowbreak/>05.<allowbreak/>005</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid6" type="misc" rend="foot" n="footcite:AmazonMapReduce">
      <monogr>
        <title level="m">Amazon Elastic Map-Reduce (EMR)</title>
        <imprint>
          <ref xlink:href="https://aws.amazon.com/emr/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>aws.<allowbreak/>amazon.<allowbreak/>com/<allowbreak/>emr/</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid17" type="misc" rend="foot" n="footcite:decaf">
      <monogr>
        <title level="m">The Decaf Project</title>
        <imprint>
          <ref xlink:href="https://bitbucket.org/tpeterka1/decaf" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>bitbucket.<allowbreak/>org/<allowbreak/>tpeterka1/<allowbreak/>decaf</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid1" type="misc" rend="foot" n="footcite:digital-market-strategy">
      <monogr>
        <title level="m">Digital Single Market</title>
        <imprint>
          <dateStruct>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://ec.europa.eu/digital-single-market/en/digital-single-market" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>ec.<allowbreak/>europa.<allowbreak/>eu/<allowbreak/>digital-single-market/<allowbreak/>en/<allowbreak/>digital-single-market</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid8" type="misc" rend="foot" n="footcite:eesi">
      <monogr>
        <title level="m">European Exascale Software Initiative</title>
        <imprint>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://www.eesi-project.eu" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>eesi-project.<allowbreak/>eu</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid2" type="misc" rend="foot" n="footcite:etp4hpc">
      <monogr>
        <title level="m">The European Technology Platform for High-Performance Computing</title>
        <imprint>
          <dateStruct>
            <year>2012</year>
          </dateStruct>
          <ref xlink:href="http://www.etp4hpc.eu" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>etp4hpc.<allowbreak/>eu</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid0" type="misc" rend="foot" n="footcite:cloud-europe">
      <monogr>
        <title level="m">European Cloud Strategy</title>
        <imprint>
          <dateStruct>
            <year>2012</year>
          </dateStruct>
          <ref xlink:href="https://ec.europa.eu/digital-single-market/en/european-cloud-computing-strategy" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>ec.<allowbreak/>europa.<allowbreak/>eu/<allowbreak/>digital-single-market/<allowbreak/>en/<allowbreak/>european-cloud-computing-strategy</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid12" type="misc" rend="foot" n="footcite:flink">
      <monogr>
        <title level="m">Apache Flink</title>
        <imprint>
          <dateStruct>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="http://flink.apache.org" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>flink.<allowbreak/>apache.<allowbreak/>org</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid7" type="misc" rend="foot" n="footcite:iesp">
      <monogr>
        <title level="m">International Exascale Software Program</title>
        <imprint>
          <dateStruct>
            <year>2011</year>
          </dateStruct>
          <ref xlink:href="http://www.exascale.org/iesp/Main_Page" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>exascale.<allowbreak/>org/<allowbreak/>iesp/<allowbreak/>Main_Page</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid4" type="misc" rend="foot" n="footcite:strategy-rennes">
      <monogr>
        <title level="m">Scientific challenges of the Inria Rennes-Bretagne Atlantique research centre</title>
        <imprint>
          <dateStruct>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://www.inria.fr/en/centre/rennes/research" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>www.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>en/<allowbreak/>centre/<allowbreak/>rennes/<allowbreak/>research</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid3" type="misc" rend="foot" n="footcite:strategic-plan">
      <monogr>
        <title level="m">Inria's strategic plan "Towards Inria 2020"</title>
        <imprint>
          <dateStruct>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://www.inria.fr/en/institute/strategy/strategic-plan" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>www.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>en/<allowbreak/>institute/<allowbreak/>strategy/<allowbreak/>strategic-plan</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid9" type="misc" rend="foot" n="footcite:jlesc">
      <monogr>
        <title level="m">Joint Laboratory for Extreme Scale Computing (JLESC)</title>
        <imprint>
          <ref xlink:href="https://jlesc.github.io" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>jlesc.<allowbreak/>github.<allowbreak/>io</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid11" type="misc" rend="foot" n="footcite:spark">
      <monogr>
        <title level="m">Apache Spark</title>
        <imprint>
          <ref xlink:href="http://spark.apache.org" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>spark.<allowbreak/>apache.<allowbreak/>org</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid13" type="misc" rend="foot" n="footcite:storm">
      <monogr>
        <title level="m">Storm</title>
        <imprint>
          <ref xlink:href="http://storm-project.net/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>storm-project.<allowbreak/>net/</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2016-bid18" type="misc" rend="foot" n="footcite:flowvr">
      <monogr>
        <title level="m">The FlowVR Project</title>
        <imprint>
          <dateStruct>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="http://flowvr.sourceforge.net/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>flowvr.<allowbreak/>sourceforge.<allowbreak/>net/</ref>
        </imprint>
      </monogr>
    </biblStruct>
  </biblio>
</raweb>
