<?xml version="1.0" encoding="utf-8"?>
<raweb xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" year="2015">
  <identification id="kerdata" isproject="true">
    <shortname>KERDATA</shortname>
    <projectName>Scalable Storage for Clouds and Beyond</projectName>
    <theme-de-recherche>Distributed and High Performance Computing</theme-de-recherche>
    <domaine-de-recherche>Networks, Systems and Services, Distributed Computing</domaine-de-recherche>
    <urlTeam>http://www.irisa.fr/kerdata/</urlTeam>
    <structure_exterieure type="Labs">
      <libelle>Institut de recherche en informatique et systèmes aléatoires (IRISA)</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Institut national des sciences appliquées de Rennes</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>Université Rennes 1</libelle>
    </structure_exterieure>
    <structure_exterieure type="Organism">
      <libelle>École normale supérieure de Rennes</libelle>
    </structure_exterieure>
    <header_dates_team>Creation of the Team: 2009 July 01, updated into Project-Team: 2012 July 01</header_dates_team>
    <LeTypeProjet>Project-Team</LeTypeProjet>
    <keywordsSdN>
      <term>1.1.4. - High performance computing</term>
      <term>1.1.6. - Cloud</term>
      <term>1.3. - Distributed Systems</term>
      <term>1.6. - Green Computing</term>
      <term>2.6.2. - Middleware</term>
      <term>3.1.3. - Distributed data</term>
      <term>3.1.8. - Big data (production, storage, transfer)</term>
      <term>3.3.3. - Big data analysis</term>
      <term>6.2.7. - High performance computing</term>
      <term>7.1. - Parallel and distributed algorithms</term>
    </keywordsSdN>
    <keywordsSecteurs>
      <term>1.1.2. - Molecular biology</term>
      <term>2.6.1. - Brain imaging</term>
      <term>3.2. - Climate and meteorology</term>
      <term>4.4.1. - Green computing</term>
    </keywordsSecteurs>
    <UR name="Rennes"/>
  </identification>
  <team id="uid1">
    <person key="kerdata-2014-idp62712">
      <firstname>Gabriel</firstname>
      <lastname>Antoniu</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Team leader, Inria, Senior
Researcher</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="kerdata-2014-idp64192">
      <firstname>Shadi</firstname>
      <lastname>Ibrahim</lastname>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria, Researcher</moreinfo>
    </person>
    <person key="kerdata-2014-idp65432">
      <firstname>Luc</firstname>
      <lastname>Bougé</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>ENS Rennes, Professor</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="kerdata-2014-idp66888">
      <firstname>Alexandru</firstname>
      <lastname>Costan</lastname>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>INSA Rennes, Associate
Professor</moreinfo>
    </person>
    <person key="kerdata-2014-idp70872">
      <firstname>Loïc</firstname>
      <lastname>Cloatre</lastname>
      <categoryPro>Technique</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria, until January 2015</moreinfo>
    </person>
    <person key="kerdata-2014-idp77520">
      <firstname>Luis Eduardo</firstname>
      <lastname>Pineda Morales</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Microsoft Research Inria Joint
Centre</moreinfo>
    </person>
    <person key="kerdata-2014-idp78800">
      <firstname>Lokman</firstname>
      <lastname>Rahmani</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Univ. Rennes I</moreinfo>
    </person>
    <person key="kerdata-2014-idp81448">
      <firstname>Orçun</firstname>
      <lastname>Yildiz</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria</moreinfo>
    </person>
    <person key="kerdata-2014-idp80104">
      <firstname>Tien Dat</firstname>
      <lastname>Phan</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Univ. Rennes I</moreinfo>
    </person>
    <person key="kerdata-2014-idp82720">
      <firstname>Pierre</firstname>
      <lastname>Matri</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Universidad Politécnica de Madrid, from
March 2015</moreinfo>
    </person>
    <person key="kerdata-2015-idp113776">
      <firstname>Ovidiu-Cristian</firstname>
      <lastname>Marcu</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria, from October 2015</moreinfo>
    </person>
    <person key="kerdata-2015-idp115016">
      <firstname>Mohammed-Yacine</firstname>
      <lastname>Taleb</lastname>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria, from August 2015</moreinfo>
    </person>
    <person key="kerdata-2015-idp116256">
      <firstname>Roxana-Ioana</firstname>
      <lastname>Roman</lastname>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Inria, Master intern,
from May 2015 until July 2015</moreinfo>
    </person>
    <person key="kerdata-2015-idp117544">
      <firstname>Antonio</firstname>
      <lastname>Aguilera</lastname>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Universidad de Granada, Invited
PhD student, from April 2015 until June 2015</moreinfo>
    </person>
    <person key="kerdata-2015-idp118848">
      <firstname>Nathanaël</firstname>
      <lastname>Cheriere</lastname>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>ENS Rennes, Master
intern, from February 2015 until July 2015</moreinfo>
    </person>
    <person key="linkmedia-2014-idp95392">
      <firstname>Aurélie</firstname>
      <lastname>Patier</lastname>
      <categoryPro>Assistant</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Univ. Rennes I</moreinfo>
    </person>
  </team>
  <presentation id="uid2">
    <bodyTitle>Overall Objectives</bodyTitle>
    <subsection id="uid3" level="1">
      <bodyTitle>Context: the need for scalable data management</bodyTitle>
      <p>We are witnessing a rapidly increasing number of application areas
generating and processing very large volumes of data on a regular
basis. Such applications are called
<i>data-intensive</i>. Governmental and commercial statistics,
climate modeling, cosmology, genetics, bio-informatics, high-energy
physics are just a few examples. In these fields, it becomes crucial
to efficiently store and manipulate massive data, which are
typically <i>shared</i> at a large scale and <i>concurrently
accessed</i>. In all these examples, the overall application
performance is highly dependent on the properties of the underlying
data management service. With the emergence of recent
infrastructures such as cloud computing platforms and post-petascale
architectures, achieving highly scalable data management has become
a critical challenge.</p>
      <p>The KerData project-team is namely focusing on <i>scalable data
storage and processing on clouds and post-petascale HPC
supercomputers</i>, according to the current needs and requirements
of data-intensive applications. We are especially concerned by the
applications of major international and industrial players in Cloud
Computing and Extreme-Scale High-Performance Computing (HPC), which
shape the long-term agenda of the Cloud Computing and Exascale HPC
research communities.</p>
    </subsection>
    <subsection id="uid4" level="1">
      <bodyTitle>Objective: efficient support for scalable data-intensive computing</bodyTitle>
      <p>Our research activities focus on the data storage and processing needs of data-intensive applications that
that exhibit the need to handle:</p>
      <simplelist>
        <li id="uid5">
          <p noindent="true">Massive data BLOBs (Binary Large OBjects), in the order of
Terabytes, stored in a large number of nodes (thousands to tens of
thousands), accessed under heavy concurrency by a large number of
processes (thousands to tens of thousands at a time), with a
relatively fine access grain, in the order of Megabytes;</p>
        </li>
        <li id="uid6">
          <p noindent="true">Very large sets (millions) of small objects potentially arriving in streams, stored and processed on geographically distributed
infrastructures (e.g. multi-site clouds);</p>
        </li>
        <li id="uid7">
          <p noindent="true">Very large sets of scientific data processed on extreme-scale supercomputers.</p>
        </li>
      </simplelist>
      <p>Examples of such applications are:</p>
      <simplelist>
        <li id="uid8">
          <p noindent="true">Massively parallel data analytics for Big Data applications (e.g.,
Map-Reduce-based data analysis as currently enabled by frameworks such as Ha doop, Spark or Flink);</p>
        </li>
        <li id="uid9">
          <p noindent="true">Advanced cloud services for data storage and transfer for geographically distributed workflows
requiring efficient data sharing within and across multiple datacenters;</p>
        </li>
        <li id="uid10">
          <p noindent="true">Scalable solutions for I/O management and in situ
visualization for data-intensive scientific simulations (e.g. atmospheric simulations, computational fluid dynamics, etc.) running on
Extreme-Scale HPC systems.</p>
        </li>
      </simplelist>
    </subsection>
  </presentation>
  <fondements id="uid11">
    <bodyTitle>Research Program</bodyTitle>
    <subsection id="uid12" level="1">
      <bodyTitle>Our goals and methodology</bodyTitle>
      <p><i>Data-intensive applications</i> demonstrate common requirements
with respect to the need for data storage and I/O processing. These
requirements lead to several core challenges discussed below.</p>
      <descriptionlist>
        <label>Challenges related to cloud storage.</label>
        <li id="uid13">
          <p noindent="true">In the area of cloud data management, a significant milestone is
the emergence of the Map-Reduce  <ref xlink:href="#kerdata-2015-bid0" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> parallel
programming paradigm, currently used on most cloud platforms,
following the trend set up by Amazon 
<ref xlink:href="#kerdata-2015-bid1" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. At the core of Map-Reduce frameworks
lies teh storage system, a key component which must meet a series of specific
requirements that have not fully been met yet by existing
solutions: the ability to provide efficient <i>fine-grain
access</i> to the files, while sustaining a <i>high throughput</i>
in spite of <i>heavy access concurrency</i>; the need to provide a high resilience
to <i>failures</i>; the need to take <i>energy-efficiency</i> issues into account.
More recently, as data-intensive processing needs go beyond the frontiers of single datacenters, extra challenges related to the
efficiency of metadata management concern the storage and
efficient access to very large sets of small objects by Big Data processing
workflows running on large-scale infrastructures.</p>
        </li>
        <label>Challenges related to data-intensive HPC applications.</label>
        <li id="uid14">
          <p noindent="true">The requirements exhibited by climate simulations specifically
highlight a major, more general research topic. They have been
clearly identified by international panels of experts like IESP 
<ref xlink:href="#kerdata-2015-bid2" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, EESI  <ref xlink:href="#kerdata-2015-bid3" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, ETP4HPC 
<ref xlink:href="#kerdata-2015-bid4" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> in the context of HPC simulations running on
post-petascale supercomputers. A jump of one order of magnitude in
the size of numerical simulations is required to address some of
the fundamental questions in several communities such as climate
modeling, solid earth sciences or astrophysics. In this context,
the lack of data-intensive infrastructures and methodologies to
analyze huge simulations is a growing limiting factor. The
challenge is to find new ways to store, visualize and analyze
massive outputs of data during and after the simulation without
impacting the overall performance (i.e. while avoiding as much as possible the <i>jitter</i>
generated by I/O interference). In this area, we specifically
focus on <i>in situ processing</i> approaches and we explore approaches to
<i>model and predict I/O</i> and to <i>reduce intra-application and cross-application I/O interference</i>.</p>
        </li>
      </descriptionlist>
      <p>The overall goal of the KerData project-team is to bring a
substantial contribution to the effort of the research communities in the areas of cloud computing and HPC to
address the above challenges. KerData's approach consists in designing and implementing
distributed algorithms for scalable data storage and input/output
management for efficient large-scale data processing. We target two
main execution infrastructures: cloud platforms and post-petascale
HPC supercomputers. Our collaboration porfolio includes
international teams that are active in this areas both in Academia
(e.g., Argonne National Lab, University of Illinois at
Urbana-Champaign, Barcelona Supercomputing Centre) and Industry
(Microsoft, IBM).</p>
      <p>The highly experimental nature of our research validation
methodology should be stressed. Our approach relies on building
prototypes and on validating them at a large scale on real testbeds
and experimental platforms. We strongly rely on the Grid'5000
platform. Moreover, thanks to our projects and partnerships, we have
access to reference software and physical infrastructures in the
cloud area (Microsoft Azure, Amazon clouds, Nimbus clouds); in the
post-petascale HPC area we are running our experiments on top-ranked
supercomputers, such as Titan, Jaguar, Kraken or Blue Waters. This
provides us with excellent opportunities to validate our results on
advanced realistic platforms.</p>
      <p>Moreover, the consortiums of our current projects include
application partners in the areas of Bio-Chemistry, Neurology and
Genetics, and Climate Simulations. This is an additional asset, it
enables us to take into account application requirements in the
early design phase of our solutions, and to validate those solutions
with real applications. We intend to continue increasing our
collaborations with application communities, as we believe that this
a key to perform effective research with a high impact.</p>
    </subsection>
    <subsection id="uid15" level="1">
      <bodyTitle>Our research agenda</bodyTitle>
      <p>Three examples of motivating application scenarios will be described in detail in
the next section:</p>
      <simplelist>
        <li id="uid16">
          <p noindent="true">Joint genetic and neuroimaging data analysis on Azure clouds;</p>
        </li>
        <li id="uid17">
          <p noindent="true">Structural protein analysis on Nimbus clouds;</p>
        </li>
        <li id="uid18">
          <p noindent="true">I/O-intensive atmospheric simulations for the Blue Waters
post-petascale machine.</p>
        </li>
      </simplelist>
      <p>They illustrate the above challenges in some specific ways. They all
exhibit a common scheme: massively concurrent processes which access
massive data at a fine granularity, where data is shared and
distributed at a large scale. To address the aforementioned
challenges efficiently, we have are exploring two main approaches:</p>
      <simplelist>
        <li id="uid19">
          <p noindent="true">the BlobSeer approach, which stands at the center of some of our main research
efforts in the area of cloud storage for Big Data processing. This approach relies on the design and implementation of
<i>scalable</i> distributed algorithms for data storage and
access. They combine advanced techniques for decentralized metadata
and data management, with versioning-based concurrency control to
optimize the performance of applications under heavy access
concurrency.</p>
        </li>
        <li id="uid20">
          <p noindent="true">the Damaris approach (that is totally independent of BlobSeer), which exploits
multicore parallelism in post-petascale supercomputers to enable jitter-free, low-overhead I/O management and non intrusive in situ visualization for large-scale simulations.</p>
        </li>
      </simplelist>
      <p>Our short- and medium-term research plan is devoted to storage
challenges in two main contexts: clouds and post-petascale HPC
architectures. Consequently, our research plan is split in two main
themes, which correspond to their respective challenges. For each
of those themes, we have initiated several actions through
collaborative projects coordinated by KerData, which define our
current research agenda.</p>
      <p>Based on very promising results demonstrated by BlobSeer in
preliminary experiments  <ref xlink:href="#kerdata-2015-bid5" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>,
we have initiated several collaborative projects in the area of
cloud data management, e.g., the MapReduce ANR project (aiming to improve both the performance and the fault-tolerance of the storage component of MapReduce processing frameworks to better support highly-concurrent data analytics applications); the A-Brain
Microsoft-Inria project (that leverages these improvements on Microsoft Azure clouds to the benefit of joint neuroimaging and genetics analysis); the Z-CloudFlow Microsoft-Inria
project (exploring how to efficiently manage metadata for geographically-distributed workflows). Such frameworks are for us concrete and efficient means to
work in close connection with strong partners already well
positioned in the area of cloud computing research.</p>
      <p>Similarly, Damaris is the fruit of a very successful collaborative work within
the Joint Inria-Illinois-ANL-BSC-JSC-RIKEN/AICS
Laboratory for Extreme-Scale Computing (JLESC, formerly called JLPC).
It has become a reference framework illustrating the usage of a dedicated-core approach for scalable I/O and non-intrusive in situ visualization on post-petascale HPC systems.
It led to the creation of the particularly active Data@Exascale Associate Team between Inria, ANL and UIUC,
an excellent framework for an enlarged research activity involving a large number
of young researchers and students of the KerData team and of its partners. This Associate Team serves as a basis for extended
research activities based on our approaches (including Damaris and Omnisc'IO), carried out beyond the
frontiers of our team. Our team is playing a leading role in the Big Data
and I/O research activities in the JLESC lab. This joint lab facilitates high-quality collaborations and
access to some of the most powerful supercomputers, an important
asset which already helped us produce and transfer some results of our team (e.g. Damaris).</p>
      <p>Thanks to these projects, we are now enjoying a visible scientific positioning at
the international level.</p>
    </subsection>
  </fondements>
  <domaine id="uid21">
    <bodyTitle>Application Domains</bodyTitle>
    <subsection id="uid22" level="1">
      <bodyTitle>Joint genetic and neuroimaging data analysis on Azure clouds</bodyTitle>
      <p>Joint acquisition of neuroimaging and genetic data on large cohorts
of subjects is a new approach used to assess and understand the
variability that exists between individuals. Both neuroimaging- and genetic-domain
observations include a huge amount of variables (of the order of
millions). Performing rigorous statistical analyses on such amounts
of data is a major computational challenge that cannot be addressed
with conventional computational techniques only. On the one hand,
sophisticated regression techniques need to be used in order to
perform significant analysis on these large datasets; on the other
hand, the cost entailed by parameter optimization and statistical
validation procedures (e.g. permutation tests) is very high.</p>
      <p>To address the above challenges, the A-Brain (AzureBrain) Project
was carried out within the Microsoft Research-Inria Joint Research
Center. It was co-led by the KerData (Rennes) and Parietal (Saclay)
Inria teams. They jointly address this computational problem using
cloud related techniques on the Microsoft Azure cloud
infrastructure. The two teams brought together their complementary
expertise: KerData in the area of scalable cloud data management,
and Parietal in the field of neuroimaging and genetics data
analysis.</p>
      <p>This application scenario is a typical multi-disciplinary Data Science
project which serves as background for several on-going research
activities, beyond the end of the A-Brain project.</p>
    </subsection>
    <subsection id="uid23" level="1">
      <bodyTitle>Structural protein analysis on Nimbus and IBM clouds</bodyTitle>
      <p>In the framework of the MapReduce ANR project led by KerData (2010-2014),
we have focused on the FastA bioinformatics application used for
massive protein sequence similarity searching. This is a typical
data-intensive application that can leverage the Map-Reduce model
for a scalable execution on large-scale distributed platforms. FastA
remains an interesting use case that we are considering beyond the
end of the MapReduce project, for benchmarking our research
results in the the area of optimized MapReduce processing.</p>
    </subsection>
    <subsection id="uid24" level="1">
      <bodyTitle>I/O intensive climate simulations for the Blue Waters
post-petascale machine</bodyTitle>
      <p>A major research topic in the context of HPC simulations running on
post-petascale supercomputers is to explore how to record and
visualize data during the simulation efficiently without impacting
the performance of the computation generating that data.
Conventional practice consists in storing data on disk, moving them
off-site, reading them into a workflow, and analyzing them. This
approach becomes increasingly harder to use because of the large
data volumes generated at fast rates, in contrast to limited
back-end performance. Scalable approaches to deal with these I/O
limitations are thus of utmost importance. This is one of the main
challenges explicitly stated in the roadmap of the
<ref xlink:href="http://www.ncsa.illinois.edu/BlueWaters/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Blue Waters
Project</ref>, which aims to build one of the most powerful
supercomputers in the world.</p>
      <p>In this context, the KerData project-team is exploring innovative
ways to remove the limitations mentioned above through collaborative
work in the framework of the Joint
Inria-Illinois-ANL-BSC-JSC-RIKEN/AICS Laboratory for Extreme-Scale
Computing (JLESC, formerly called JLPC), whose research activity
focuses on the Blue Waters project. An example is the atmospheric simulation code
CM1 (Cloud Model 1), one of the target applications of the Blue Waters machine. State-of-the-art I/O
approaches, which typically consist in periodically writing a very
large number of small files are inefficient: they cause bursts of I/O in the
parallel file system, leading to poor performance and extreme
variability (<i>jitter</i>). The challenge here is to investigate how to
make an efficient use of the underlying file system, by avoiding
synchronization and contention as much as possible. In collaboration
with the JLESC, we are addressing these challenges through the Damaris approach.</p>
    </subsection>
  </domaine>
  <highlights id="uid25">
    <bodyTitle>Highlights of the Year</bodyTitle>
    <subsection id="uid26" level="1">
      <bodyTitle>Highlights of the Year</bodyTitle>
      <subsection id="uid27" level="2">
        <bodyTitle>Awards</bodyTitle>
        <descriptionlist>
          <label>Gilles Kahn honorary award of the SIF and the Academy of
Science: 2nd prize for Matthieu Dorier in 2015.</label>
          <li id="uid28">
            <p noindent="true">The
<ref xlink:href="http://www.societe-informatique-de-france.fr/recherche/prix-de-these-gilles-kahn/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Gilles Kahn Honorary Award</ref> is given every year to at most the 3
best PhD theses in Computer Science in France and is jointly
delivered by the <i>Société Informatique de France</i> (SIF) and
the French Academy of Science. The candidates are judged on all
aspects of their PhD work, including fundamental contributions to
industrial transfers, publication impact, teaching, mentoring, and
scientific dissemination activities. A Grand Prize and two
<i>ex aequo</i> Accessit Prizes are given. Matthieu Dorier was
given one of the latter.</p>
          </li>
          <label>PhD award of the Fondation Rennes 1: 2nd prize for Matthieu
Dorier in the Matisse Doctoral School in 2015.</label>
          <li id="uid29">
            <p noindent="true">The
<ref xlink:href="https://fondation.univ-rennes1.fr/la-valorisation-de-la-recherche-de-pointe/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Rennes 1 Foundation PhD award</ref> from the Fondation Rennes 1 is
given every year to 8 outstanding new doctors from the 4
doctoral schools associated with the University of Rennes 1 (2
awards per doctoral school). The candidates are judged on the
innovative aspects of their PhD thesis, "innovative" being
understood in the sense of impact on socioeconomic development
and technology transfers.</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid30" level="2">
        <bodyTitle>5 International Journals</bodyTitle>
        <p>This year the team published 5 papers in high-quality journals including IEEE Transactions on Parallel and Distributed Systems, IEEE Transactions on Cloud Computing, Future Generation Computer Systems (2), World Wide Web.</p>
      </subsection>
    </subsection>
  </highlights>
  <logiciels id="uid31">
    <bodyTitle>New Software and Platforms</bodyTitle>
    <subsection id="uid32" level="1">
      <bodyTitle>Major Software</bodyTitle>
      <subsection id="idp83952" level="2">
        <bodyTitle>BlobSeer</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp66888">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
          <person key="kerdata-2014-idp65432">
            <firstname>Luc</firstname>
            <lastname>Bougé</lastname>
          </person>
          <person key="kerdata-2014-idp70872">
            <firstname>Loïc</firstname>
            <lastname>Cloatre</lastname>
          </person>
          <p>.</p>
        </participants>
        <descriptionlist>
          <label>Contact:</label>
          <li id="uid33">
            <p noindent="true">Gabriel Antoniu.</p>
          </li>
          <label>Presentation:</label>
          <li id="uid34">
            <p noindent="true">BlobSeer is the core software platform for many
current cloud-oriented projects of the KerData team. It is a data storage service
specifically designed to deal with the requirements of large-scale,
data-intensive distributed applications that abstract data as huge
sequences of bytes, called BLOBs (Binary Large OBjects). It
provides a versatile versioning interface for manipulating BLOBs
that enables reading, writing and appending to them.</p>
            <p>BlobSeer offers both scalability and performance with respect to a
series of issues typically associated with the data-intensive
context: <i>scalable aggregation of storage space</i> from the
participating nodes with minimal overhead, ability to store
<i>huge data objects</i>, <i>efficient fine-grain access</i> to
data subsets, <i>high throughput in spite of heavy access
concurrency</i>, as well as <i>fault-tolerance</i>. This year we
have mainly focused on the deployment in production of the
BlobSeer software on IBM's cluster at Montpellier, in the context
of the ANR MapReduce project. To this end, several bugs were
solved, and several optimizations were brought to the
communication layer of BlobSeer. To showcase the benefits of
BlobSeer on this platform we focused on the Terasort
benchmark. Currently, preliminary tests on Grid5000 with this
benchmark show that BlobSeer performs better than HDFS for block
sizes lower than 2 MB. We have also improved the continuous
integration process of BlobSeer by deploying daily builds and
automatic tests on Grid5000.</p>
          </li>
          <label>Users:</label>
          <li id="uid35">
            <p noindent="true">Work is currently in progress in several formalized
projects (see previous section) to integrate and leverage BlobSeer
as a data storage back-end in the reference cloud environments: a)
Microsoft Azure; b) the Nimbus cloud toolkit developed at Argonne
National Lab (USA); and c) the OpenNebula IaaS cloud toolkit
developed at UCM (Madrid).</p>
          </li>
          <label>URL:</label>
          <li id="uid36">
            <p noindent="true">
              <ref xlink:href="http://blobseer.gforge.inria.fr/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>blobseer.<allowbreak/>gforge.<allowbreak/>inria.<allowbreak/>fr/</ref>
            </p>
          </li>
          <label>License:</label>
          <li id="uid37">
            <p noindent="true">GNU Lesser General Public License (LGPL) version 3.</p>
          </li>
          <label>Status:</label>
          <li id="uid38">
            <p noindent="true">This software is available on Inria's forge.
Version 1.0 (released late 2010) registered with APP:
IDDN.FR.001.310009.000.S.P.000.10700.</p>
          </li>
        </descriptionlist>
        <p>A <i>Technology Research Action</i> (ADT, <i>Action de recherche
technologique</i>) was active for two years until January 2015, aiming to
robustify the BlobSeer software and to make it a safely
distributable product. This project is funded by Inria
<i>Technological Development Office</i> (D2T, <i>Direction du
Développement Technologique</i>).</p>
      </subsection>
      <subsection id="idp2485504" level="2">
        <bodyTitle>Damaris</bodyTitle>
        <participants>
          <person key="PASUSERID">
            <firstname>Matthieu</firstname>
            <lastname>Dorier</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
          <person key="kerdata-2014-idp81448">
            <firstname>Orçun</firstname>
            <lastname>Yildiz</lastname>
          </person>
          <person key="kerdata-2014-idp78800">
            <firstname>Lokman</firstname>
            <lastname>Rahmani</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
        </participants>
        <descriptionlist>
          <label>Contact:</label>
          <li id="uid39">
            <p noindent="true">Gabriel Antoniu.</p>
          </li>
          <label>Presentation:</label>
          <li id="uid40">
            <p noindent="true">Damaris is a middleware for multicore SMP nodes
enabling them to handle data transfers for storage and
visualization efficiently. The key idea is to dedicate one or a few cores of
each SMP node to the application I/O. It is developed within the
framework of a collaboration between KerData and the <i>Joint
Laboratory for Petascale Computing</i> (JLPC). Damaris enables
efficient asynchronous I/O, hiding all I/O related overheads such
as data compression and post-processing, as well as direct
(<i>in-situ</i>) interactive visualization of the generated
data. Version 1.0 was released in November 2014 and enables other
approaches such as the use of dedicated nodes instead of dedicated
cores.</p>
          </li>
          <label>Users:</label>
          <li id="uid41">
            <p noindent="true">Damaris has been preliminarily evaluated at NCSA/UIUC
(Urbana-Champaign, IL, USA) with the CM1 tornado simulation code. CM1 is
one of the target applications of the Blue Waters supercomputer in
production at, in the framework of the
Inria-UIUC-ANL Joint Lab (JLPC). Damaris now has external users,
including (to our knowledge) visualization specialists from NCSA
and researchers from the France/Brazil Associated research team on
Parallel Computing (joint team between Inria/LIG Grenoble and the
UFRGS in Brazil). Damaris has been successfully integrated into
four large-scale simulations (CM1, OLAM, Nek5000, GTC).</p>
          </li>
          <label>URL:</label>
          <li id="uid42">
            <p noindent="true">
              <ref xlink:href="http://damaris.gforge.inria.fr/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>damaris.<allowbreak/>gforge.<allowbreak/>inria.<allowbreak/>fr/</ref>
            </p>
          </li>
          <label>License:</label>
          <li id="uid43">
            <p noindent="true">GNU Lesser General Public License (LGPL) version 3.</p>
          </li>
          <label>Status:</label>
          <li id="uid44">
            <p noindent="true">This software is available on Inria's forge and
registered with APP. Registration of the latest version with APP
is in progress.</p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
    <subsection id="uid45" level="1">
      <bodyTitle>Other Software</bodyTitle>
      <subsection id="idp2497800" level="2">
        <bodyTitle>Omnisc'IO</bodyTitle>
        <participants>
          <person key="PASUSERID">
            <firstname>Matthieu</firstname>
            <lastname>Dorier</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <descriptionlist>
          <label>Contact:</label>
          <li id="uid46">
            <p noindent="true">Matthieu Dorier</p>
          </li>
          <label>Presentation:</label>
          <li id="uid47">
            <p noindent="true">Omnisc'IO is a middleware integrated in the
POSIX and MPI-I/O stacks to observe, model and
predict the I/O behavior of any HPC application transparently. It is based on
formal grammars, implementing a modified version of the Sequitur
algorithm. Omnisc'IO has been used on Grid'5000 with the CM1
atmospheric simulation, the LAMMPS molecular dynamics simulation,
the GTC fusion simulation and the Nek5000 CFD
simulation. Omnisc'IO was subject to a publication at SC14.</p>
          </li>
          <label>Users:</label>
          <li id="uid48">
            <p noindent="true">Omnisc'IO is currently used only within the KerData
team and at Argonne National Lab.</p>
          </li>
          <label>URL:</label>
          <li id="uid49">
            <p noindent="true">
              <ref xlink:href="http://omniscio.gforge.inria.fr/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>omniscio.<allowbreak/>gforge.<allowbreak/>inria.<allowbreak/>fr/</ref>
            </p>
          </li>
          <label>License:</label>
          <li id="uid50">
            <p noindent="true">GNU Lesser General Public License (LGPL) version 3.</p>
          </li>
          <label>Status:</label>
          <li id="uid51">
            <p noindent="true">Currently unavailable for distribution (subject to major changes). Version 1.0 (released in November 2015) registered with APP:
IDDN.FR.001.540003.000.S.P.2015.000.10000.</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="idp2505840" level="2">
        <bodyTitle>JetStream</bodyTitle>
        <participants>
          <person key="PASUSERID">
            <firstname>Radu</firstname>
            <lastname>Tudoran</lastname>
          </person>
          <person key="kerdata-2014-idp66888">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <descriptionlist>
          <label>Contact:</label>
          <li id="uid52">
            <p noindent="true">Alexandru Costan</p>
          </li>
          <label>Presentation:</label>
          <li id="uid53">
            <p noindent="true">JetStream is a middleware solution for
batch-based, high-performance streaming across cloud data
centers. JetStream implements a set of context-aware strategies
for optimizing batch-based streaming, being able to self-adapt to
changing conditions. Additionally, the system provides multi-route
streaming across cloud data centers for aggregating bandwidth by
leveraging the network parallelism. It enables easy deployment
across .Net frameworks and seamless binding with event processing
engines such as StreamInsight.</p>
          </li>
          <label>Users:</label>
          <li id="uid54">
            <p noindent="true">JetStream is currently used at Microsoft Research ATLE
Munich for the management of the Azure cloud infrastructure.</p>
          </li>
          <label>License:</label>
          <li id="uid55">
            <p noindent="true">Microsoft Public License.</p>
          </li>
          <label>Status:</label>
          <li id="uid56">
            <p noindent="true">Prototype and demo available.</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="idp2511592" level="2">
        <bodyTitle>OverFlow</bodyTitle>
        <participants>
          <person key="PASUSERID">
            <firstname>Radu</firstname>
            <lastname>Tudoran</lastname>
          </person>
          <person key="kerdata-2014-idp66888">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <descriptionlist>
          <label>Contact:</label>
          <li id="uid57">
            <p noindent="true">Alexandru Costan.</p>
          </li>
          <label>Presentation:</label>
          <li id="uid58">
            <p noindent="true">OverFlow is a uniform data management system
for scientific workflows running across geographically distributed
sites, aiming to reap economic benefits from this
geo-diversity. The software is environment-aware, as it monitors
and models the global cloud infrastructure, offering high and
predictable data handling performance for transfer cost and time,
within and across sites. OverFlow proposes a set of pluggable
services, grouped in a data-scientist cloud kit. They provide the
applications with the possibility to monitor the underlying
infrastructure, to exploit smart data compression, deduplication
and geo-replication, to evaluate data management costs, to set a
tradeoff between money and time, and optimize the transfer
strategy accordingly. In 2015, OverFlow was extended with
support for efficient metadata operations: the newly implemented
strategies leverage workflow semantics in a 2-level metadata
partitioning hierarchy that combines distribution and replication.</p>
          </li>
          <label>Users:</label>
          <li id="uid59">
            <p noindent="true">Currently, OverFlow is used for data transfers by the
Microsoft Research ATLE Munich team as well as for synthetic
benchmarks at the Politehnica University of Bucharest.</p>
          </li>
          <label>License:</label>
          <li id="uid60">
            <p noindent="true">GNU Lesser General Public License (LGPL) version 3.</p>
          </li>
          <label>Status:</label>
          <li id="uid61">
            <p noindent="true">Registration of the latest version with APP is in progress</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="idp2519024" level="2">
        <bodyTitle>iHadoop</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp80104">
            <firstname>Tien Dat</firstname>
            <lastname>Phan</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
        </participants>
        <descriptionlist>
          <label>Contact:</label>
          <li id="uid62">
            <p noindent="true">Shadi Ibrahim</p>
          </li>
          <label>Presentation:</label>
          <li id="uid63">
            <p noindent="true"><i>iHadoop</i> is a Hadoop simulator developed
in Java on top of SimGrid to simulate the behavior of Hadoop and
therefore accurately predict the performance of Hadoop in normal
scenarios and under failures. In 2015, iHadoop was extended to
simulate the execution and predict the performance of multiple
Map-Reduce applications, sharing the same Hadoop cluster.
Two schedulers (Fifo, Fair) are now available in iHadoop.</p>
          </li>
          <label>Users:</label>
          <li id="uid64">
            <p noindent="true">iHadoop is an internal software prototype, which was
initially developed to validate our idea for exploring the
behavior of Hadoop under failures. iHadoop has preliminarily
evaluated within our group and it has shown very high accuracy
when predicating the execution time of a Map-Reduce application.
iHadoop was discussed with the SimGrid community during
the SimGrid user days in Lyon (June 2015). We intend to add iHadoop to
the contributions site of the SimGrid project and make it available to the
SimGrid community.</p>
          </li>
          <label>License:</label>
          <li id="uid65">
            <p noindent="true">GNU Lesser General Public License (LGPL) version 3.</p>
          </li>
          <label>Status:</label>
          <li id="uid66">
            <p noindent="true">Available on Inria's forge.
Registration of the latest version with APP
is in progress.</p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
  </logiciels>
  <resultats id="uid67">
    <bodyTitle>New Results</bodyTitle>
    <subsection id="uid68" level="1">
      <bodyTitle>Efficient data management for hybrid and multi-site clouds</bodyTitle>
      <subsection id="uid69" level="2">
        <bodyTitle>JetStream: enabling high-throughput live event
streaming on multi-site clouds </bodyTitle>
        <participants>
          <person key="PASUSERID">
            <firstname>Radu</firstname>
            <lastname>Tudoran</lastname>
          </person>
          <person key="kerdata-2014-idp66888">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>Scientific and commercial applications operate nowadays on tens of
cloud datacenters around the globe, following similar patterns: they
aggregate monitoring or sensor data, assess the QoS or run global
data mining queries based on inter-site event stream
processing. Enabling fast data transfers across geographically
distributed sites allows such applications to manage the continuous
streams of events in real time and quickly react to
changes. However, traditional event processing engines often
consider data resources as second-class citizens and support access
to data only as a side-effect of computation (i.e. they are not
concerned by the transfer of events from their source to the
processing site). This is an efficient approach as long as the
processing is executed in a single cluster where nodes are
interconnected by low latency networks. In a distributed
environment, consisting of multiple datacenters, with orders of
magnitude differences in capabilities and connected by a WAN, this
will undoubtedly lead to significant latency and performance
variations.</p>
        <p>This is namely the challenge we addressed this year by proposing
JetStream <ref xlink:href="#kerdata-2015-bid6" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, a high performance
batch-based streaming middleware for efficient transfers of events
between cloud datacenters. JetStream is able to self-adapt to the
streaming conditions by modeling and monitoring a set of context
parameters. It further aggregates the available bandwidth by
enabling multi-route streaming across cloud sites, while at the same
time optimizing resource utilization and increasing cost
efficiency. The prototype was validated on tens of nodes from US and
Europe datacenters of the Windows Azure cloud with synthetic
benchmarks and a real-life application monitoring the ALICE
experiment at CERN. The results show a <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>3</mn><mo>×</mo></mrow></math></formula> increase of the transfer
rate using the adaptive multi-route streaming, compared to state of
the art solutions.</p>
      </subsection>
      <subsection id="uid70" level="2">
        <bodyTitle>Multi-site metadata management for geographically
distributed cloud workflows</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp77520">
            <firstname>Luis Eduardo</firstname>
            <lastname>Pineda Morales</lastname>
          </person>
          <person key="kerdata-2014-idp66888">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>With their globally distributed datacenters, clouds now provide an
opportunity to run complex large-scale applications on dynamically
provisioned, networked and federated infrastructures. However, there
is a lack of tools supporting data-intensive applications
(e.g. scientific workflows) on virtualized IaaS or PaaS systems
across geographically distributed sites. As a relevant example,
data-intensive scientific workflows struggle in leveraging such
distributed cloud platforms. For instance, scientific workflows
which handle many small files can easily saturate state-of-the-art
distributed filesystems based on centralized metadata servers (e.g.,
HDFS, PVFS).</p>
        <p>In <ref xlink:href="#kerdata-2015-bid7" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we explore several alternative
design strategies to efficiently support the execution of existing
workflow engines across multi-site clouds, by reducing the cost of
metadata operations. These strategies leverage workflow semantics in
a 2-level metadata partitioning hierarchy that combines distribution
and replication. The system was validated on the Microsoft Azure
cloud across 4 EU and US datacenters. The experiments were conducted
on 128 nodes using synthetic benchmarks and real-life
applications. We observe as much as 28% gain in execution time for
a parallel, geo-distributed real-world application (Montage) and up
to 50% for a metadata-intensive synthetic benchmark, compared to a
baseline centralized configuration.</p>
      </subsection>
      <subsection id="uid71" level="2">
        <bodyTitle>Understanding the performance of Big Data platforms
in hybrid and multi-site clouds </bodyTitle>
        <participants>
          <person key="kerdata-2015-idp116256">
            <firstname>Roxana-Ioana</firstname>
            <lastname>Roman</lastname>
          </person>
          <person key="kerdata-2015-idp113776">
            <firstname>Ovidiu-Cristian</firstname>
            <lastname>Marcu</lastname>
          </person>
          <person key="kerdata-2014-idp66888">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>Recently, hybrid multi-site big data analytics (that combines
on-premise with off-premise resources) has gained increasing
popularity as a tool to process large amounts of data on-demand,
without additional capital investment to increase the size of a
single datacenter. However, making the most out of hybrid setups for
big data analytics is challenging because on-premise resources can
communicate with off-premise resources at significantly lower
throughput and higher latency. Understanding the impact of this
aspect is not trivial, especially in the context of modern big data
analytics frameworks that introduce complex communication patterns
and are optimized to overlap communication with computation in order
to hide data transfer latencies. This year we started to work on a
study that aims to identify and explain this impact in relationship
to the known behavior on a single cloud.</p>
        <p>A first step towards this goal consisted of analysing a
representative big data workload on a hybrid Spark
setup <ref xlink:href="#kerdata-2015-bid8" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. Unlike previous experience that
emphasized low end-impact of network communications in Spark, we
found significant overhead in the shuffle phase when the bandwidth
between the on-premise and off-premise resources is sufficiently
small. We plan to continue this study by investigating additional
parameters at a finer grain and adding new platforms, like Apache
Flink.</p>
      </subsection>
    </subsection>
    <subsection id="uid72" level="1">
      <bodyTitle>Optimizing Map-Reduce</bodyTitle>
      <subsection id="uid73" level="2">
        <bodyTitle>Chronos: failure-aware scheduling in shared Hadoop
clusters</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp81448">
            <firstname>Orçun</firstname>
            <lastname>Yildiz</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>Hadoop emerged as the de facto state-of-the-art system for
MapReduce-based data analytics. The reliability of Hadoop systems
depends in part on how well they handle failures. Currently, Hadoop
handles machine failures by re-executing all the tasks of the failed
machines (i.e., executing recovery tasks). Unfortunately, this
elegant solution is entirely entrusted to the core of Hadoop and
hidden from Hadoop schedulers. The unawareness of failures therefore
may prevent Hadoop schedulers from operating correctly towards
meeting their objectives (e.g., fairness, job priority) and can
significantly impact the performance of MapReduce applications.</p>
        <p>In <ref xlink:href="#kerdata-2015-bid9" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we propose Chronos, a failure-aware
scheduling strategy that enables an early yet smart action for fast
failure recovery while operating within a specific scheduler
objective. Chronos takes an early action rather than waiting an
uncertain amount of time to get a free slot (thanks to our
preemption technique). Chronos embraces a smart selection algorithm
that returns a list of tasks that need to be preempted in order to
free the necessary slots to launch recovery tasks immediately. This
selection considers three criteria: the progress scores of running
tasks, the scheduling objectives, and the recovery tasks input data
locations. In order to make room for recovery tasks rather than
waiting an uncertain amount of time, a natural solution is to kill
running tasks in order to create free slots. Although killing tasks
can free the slots easily, it wastes the work performed by the
killed tasks. Therefore, we present the design and implementation of
a novel work-conserving preemption technique that allows pausing and
resuming both map and reduce tasks without resource wasting and with
little overhead.</p>
        <p>We demonstrate the utility of Chronos by combining it with two
state-of-the-art Hadoop schedulers: Fifo and Fair schedulers. The
experimental results show that Chronos achieves almost optimal data
locality for the recovery tasks and reduces the job completion times
by up to 55% over state-of-the-art schedulers. Moreover, Chronos
recovers to a correct scheduling behavior after failure detection
within only a couple of seconds.</p>
      </subsection>
      <subsection id="uid74" level="2">
        <bodyTitle>On the usability of shortest remaining time first
policy in shared Hadoop clusters</bodyTitle>
        <participants>
          <person key="kerdata-2015-idp118848">
            <firstname>Nathanaël</firstname>
            <lastname>Cheriere</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
        </participants>
        <p>A practical problem facing the Hadoop community is how to reduce job
makespans by reducing job waiting times and execution
times. Previous Hadoop schedulers have focused on improving job
execution times, by improving data locality but not considering job
waiting times. Even worse, enforcing data locality according to the
job input sizes can be inefficient: it can lead to long waiting
times for small yet short jobs when sharing the cluster with jobs
with smaller input sizes but higher execution complexity.</p>
        <p>We have introduced hSRTF <ref xlink:href="#kerdata-2015-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, an adaption
of the well-known Shortest Remaining Time First scheduler (i.e.,
SRTF) in shared Hadoop clusters. hSRTF embraces a simple model to
estimate the remaining time of a job and a preemption primitive
(i.e., kill) to free the resources when needed. We have implemented
hSRTF and performed extensive evaluations with Hadoop on the
Grid'5000 testbed. The results show that hSRTF can significantly
reduce the waiting times of small jobs and therefore improves their
make-spans, but at the cost of a relatively small increase in the
make-spans of large jobs. For instance, a time-based proportional
share mode of hSRTF (i.e., hSRTF-Pr) speeds up small jobs by (on
average) 45% and 26% while introducing a performance degradation
for large jobs by (on average) 10% and 0.2% compared to Fifo and
Fair schedulers, respectively.</p>
      </subsection>
      <subsection id="uid75" level="2">
        <bodyTitle>A Performance evaluation of Hadoop's schedulers under
failures</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>Recently, Hadoop has not only been used for running single batch
jobs but it has also been optimized to simultaneously support the
execution of multiple jobs belonging to multiple concurrent
users. Several schedulers (i.e., Fifo, Fair, and Capacity
schedulers) have been proposed to optimize locality executions of
tasks but do not consider failures, although, evidence in the
literature shows that faults do occur and can probably result in
performance problems.</p>
        <p>In <ref xlink:href="#kerdata-2015-bid11" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we have designed a set of
experiments to evaluate the performance of Hadoop under failure when
applying several schedulers (i.e., explore the conflict between job
scheduling, exposing locality executions, and failures). Our results
reveal several drawbacks of current Hadoop's mechanism in
prioritizing failed tasks. By trying to launch failed tasks as soon
as possible regardless of locality, it significantly increases the
execution time of jobs with failed tasks, due to two reasons: 1)
available resources might not be freed up as quickly as expected and
2) failed tasks might be re-executed on machines with no data on it,
introducing extra cost for data transferring through network, which
is normally the most scarce resource in today's datacenters.</p>
        <p>Our preliminary study with Hadoop not only helps us to understand
the interplay between fault-tolerance and job scheduling, but also
offers useful insights into optimizing the current schedulers to be
more efficient in case of failures.</p>
      </subsection>
      <subsection id="uid76" level="2">
        <bodyTitle>Kvasir: empowering Hadoop with knowledge</bodyTitle>
        <participants>
          <person key="kerdata-2015-idp118848">
            <firstname>Nathanaël</firstname>
            <lastname>Cheriere</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
        </participants>
        <p>Most of Hadoop schedulers are based on homogeneity hypotheses about
the jobs and the nodes and therefore strongly rely on the location
of the input data when scheduling tasks. However, our study revealed
that Hadoop is a highly dynamic environment (e.g., variation in task
duration within a job and across different jobs). Even worse, clouds
are multi-tenant environments which in turn introduce more
heterogeneity and dynamicity in Hadoop clusters. As a result,
relying on static knowledge (i.e. data location) may lead to wrong
scheduling decisions.</p>
        <p>We have developed a new scheduling framework for Hadoop, named
Kvasir. Kvasir aims to provide an up-to-date knowledge that reflects
the dynamicity of the environment while being light-weight and
performance-oriented. The utility of Kvasir is demonstrated by the
implementation of several schedulers including Fifo, Fair, and SRTF
schedulers.</p>
      </subsection>
    </subsection>
    <subsection id="uid77" level="1">
      <bodyTitle>Energy-aware data management in clouds and HPC</bodyTitle>
      <subsection id="uid78" level="2">
        <bodyTitle>On understanding the energy impact of speculative
execution in Hadoop</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp80104">
            <firstname>Tien Dat</firstname>
            <lastname>Phan</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
          <person key="kerdata-2014-idp65432">
            <firstname>Luc</firstname>
            <lastname>Bougé</lastname>
          </person>
        </participants>
        <p>Hadoop emerged as an important system for large-scale data
analysis. Speculative execution is a key feature in Hadoop that is
extensively leveraged in clouds: it is used to mask slow tasks
(i.e., stragglers) — resulted from resource contention and
heterogeneity in clouds — by launching speculative task copies on
other machines. However, speculative execution is not cost-free and
may result in performance degradation and extra resource and energy
consumption. While prior literature has been dedicated to improving
stragglers detection to cope with the inevitable heterogeneity in
clouds, little work is focusing on understanding the implications of
speculative execution on the performance and energy consumption in
Hadoop cluster.</p>
        <p>In <ref xlink:href="#kerdata-2015-bid12" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we have designed a set of experiments
to evaluate the impact of speculative execution on the performance
and energy consumption of Hadoop in homogeneous and heterogeneous
environments. Our studies reveal that speculative execution may
sometimes reduce, sometimes increase the energy consumption of
Hadoop clusters. This strongly depends on the reduction in the
execution time of MapReduce applications and on the extra power
consumption introduced by speculative execution. Moreover, we show
that the extra power consumption varies among applications and
is contributed to by three main factors: the duration of speculative
tasks, the idle time, and the allocation of speculative tasks. To
the best of our knowledge, our work provides the first deep look
into the energy efficiency of speculative execution in Hadoop.</p>
      </subsection>
      <subsection id="uid79" level="2">
        <bodyTitle>On the energy footprint of I/O management in Exascale
HPC systems</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp81448">
            <firstname>Orçun</firstname>
            <lastname>Yildiz</lastname>
          </person>
          <person key="PASUSERID">
            <firstname>Matthieu</firstname>
            <lastname>Dorier</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>The advent of unprecedentedly scalable yet energy hungry Exascale
supercomputers poses a major challenge in sustaining a high
performance-per-watt ratio. With I/O management acquiring a crucial
role in supporting scientific simulations, various I/O management
approaches have been proposed to achieve high performance and
scalability. However, the details of how these approaches affect
energy consumption have not been studied yet.</p>
        <p>Therefore, we have explored how much energy a supercomputer consumes
while running scientific simulations when adopting various I/O
management approaches. In particular, we closely examined three
radically different I/O schemes including time partitioning,
dedicated cores, and dedicated nodes. To do so, we implemented the
three approaches within the Damaris I/O middleware and performed
extensive experiments with one of the target HPC applications of the
Blue Waters sustained-petaflop supercomputer project: the CM1
atmospheric model.</p>
        <p>Our experimental results obtained on the French Grid'5000 platform
highlighted the differences among these three approaches and
illustrate in which way various configurations of the application
and of the system can impact performance and energy
consumption. Considering that choosing the most energy-efficient
approach for a particular simulation on a particular machine can be
a daunting task, we provided a model to estimate the energy
consumption of a simulation under different I/O approaches. Our
proposed model gives hints to pre-select the most energy-efficient
I/O approach for a particular simulation on a particular HPC system
and therefore provides a step towards energy-efficient HPC
simulations in Exascale systems.</p>
        <p>We validated the accuracy of our proposed model using a real-life
HPC application (CM1) and two different clusters provisioned on the
Grid'5000 testbed. The estimated energy consumptions are within
5.7% of the measured ones for all I/O approaches.</p>
      </subsection>
      <subsection id="uid80" level="2">
        <bodyTitle>Exploring energy-consistency trade-offs in cloud
storage systems and beyond</bodyTitle>
        <participants>
          <person key="kerdata-2015-idp115016">
            <firstname>Mohammed-Yacine</firstname>
            <lastname>Taleb</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
          <person key="kerdata-2014-idp65432">
            <firstname>Luc</firstname>
            <lastname>Bougé</lastname>
          </person>
        </participants>
        <p>Apache Cassandra is an open-source cloud storage system that offers
multiple types of operation-level consistency including eventual
consistency with multiple levels of guarantees and strong
consistency. It is being used by many datacenter applications (e.g.,
Facebook and AppScale). Most existing research efforts have been
dedicated to exploring trade-offs such as: consistency
vs. performance, consistency vs. latency and consistency
vs. monetary cost. In contrast, a little work is focusing on the
consistency vs. energy trade-off. As power bills have become a
substantial part of the monetary cost for operating a datacenter, we
aim to provide a clearer understanding of the interplay between
consistency and energy consumption.</p>
        <p>In <ref xlink:href="#kerdata-2015-bid13" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, a series of experiments have been
conducted to explore the implication of different factors on the
energy consumption in Cassandra. Our experiments have revealed a
noticeable variation in the energy consumption depending on the
consistency level. Furthermore, for a given consistency level, the
energy consumption of Cassandra varies with the access pattern and
the load exhibited by the application. This further analysis
indicated that the uneven distribution of the load amongst different
nodes also impacts the energy consumption in Cassandra. Finally, we
experimentally compared the impact of four storage configuration and
data partitioning policies on the energy consumption in Cassandra:
interestingly, we achieve 23% energy saving when assigning 50% of
the nodes to the hot pool for the applications with moderate ratio
of reads and writes, while applying eventual (quorum) consistency.</p>
        <p>This study points to opportunities for future research on
consistency-energy trade-offs and offers useful insight into
designing energy-efficient techniques for cloud storage
systems. This work was done in collaboration with Houssem-Eddine
Chihoub (LIG lab, Grenoble) and María Pérez (UPM, Madrid).</p>
        <p>Recently, we have been looking at in-memory storage systems. In
particular, we are investigating the current replication schemes,
data placement strategies and consistency models which are used in
in-memory storage systems. Next, an empirical study will be
performed to analyze the potential impact of the aforementioned
issues on energy consumption. At this point, we are working with
RAMCloud.</p>
      </subsection>
      <subsection id="uid81" level="2">
        <bodyTitle>Governing energy consumption in Hadoop through CPU
frequency scaling: an analysis</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp80104">
            <firstname>Tien Dat</firstname>
            <lastname>Phan</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>In <ref xlink:href="#kerdata-2015-bid14" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we studied the impact of different
existing DVFS (<i>Dynamic Voltage and Frequency Scaling</i>) governors
(i.e., performance, powersave, on-demand, conservative and
userspace) on Hadoop's performance and power
efficiency. Interestingly, our experimental results reported not
only a noticeable variation of the power consumption and performance
with different applications and under different governors, but also
demonstrate the opportunity to achieve a better tradeoff between
performance and power consumption.</p>
        <p>The primary contributions of this work are as follows: (1) it
provides an overview of the state-of-the-art techniques for
energy-efficiency in Hadoop; (2) it discusses and demonstrates the
need for exploiting DVFS techniques for energy reduction in Hadoop;
(3) it experimentally demonstrates that MapReduce applications
experience variations in performance and power consumption under
different CPU frequencies and also under different governors. A
micro-analysis section is provided to explain this variation and its
cause; (4) it illustrates in practice how the behavior of different
governors influences the execution of MapReduce applications and how
it shapes the performance of the entire cluster; (5) it also brings
out the differences between these governors and CPU frequencies and
shows that they are not only sub-optimal for different applications
but also sub-optimal for different stages of MapReduce execution;
(6) it demonstrates that achieving better energy efficiency in
Hadoop cannot be done simply by tuning the governor parameters, nor
through a naive coarse-grained tuning of the CPU frequencies or the
governors according to the running phase (i.e., map phase or reduce
phase).</p>
      </subsection>
    </subsection>
    <subsection id="uid82" level="1">
      <bodyTitle>Scalable I/Os: visualization and processing</bodyTitle>
      <subsection id="uid83" level="2">
        <bodyTitle>Modeling and predicting I/O patterns of large-scale
simulations</bodyTitle>
        <participants>
          <person key="PASUSERID">
            <firstname>Matthieu</firstname>
            <lastname>Dorier</lastname>
          </person>
          <person key="kerdata-2014-idp64192">
            <firstname>Shadi</firstname>
            <lastname>Ibrahim</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>The increasing gap between the computation performance of
post-petascale machines and the performance of their I/O subsystem
has motivated many I/O optimizations including prefetching, caching,
and scheduling. In order to further improve these techniques,
modeling and predicting spatial and temporal I/O patterns of HPC
applications as they run has become crucial. Our work in this
context focuses on Omnisc'IO, an approach that builds a
grammar-based model of the I/O behavior of HPC applications and uses
it to predict when future I/O operations will occur, and where and
how much data will be accessed. To infer grammars, Omnisc'IO is
based on StarSequitur, a novel algorithm extending Nevill-Manning's
Sequitur algorithm <ref xlink:href="#kerdata-2015-bid15" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. Omnisc'IO is
transparently integrated into the POSIX and MPI I/O stacks and does
not require any modification in applications or higher-level I/O
libraries. It works without any prior knowledge of the application
and converges to accurate predictions of any <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mi>N</mi></math></formula> future I/O
operations within a couple of iterations. Its implementation is
efficient in both computation time and memory footprint.</p>
      </subsection>
      <subsection id="uid84" level="2">
        <bodyTitle>In situ analysis and visualization workflows</bodyTitle>
        <participants>
          <person key="PASUSERID">
            <firstname>Matthieu</firstname>
            <lastname>Dorier</lastname>
          </person>
          <person key="kerdata-2014-idp78800">
            <firstname>Lokman</firstname>
            <lastname>Rahmani</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>In situ visualization has been proposed in the past few years to
couple running simulations with parallel visualization and analysis
tools. While many parallel visualization tools now provide in situ
visualization capabilities, the trend has been to feed such tools
with what previously was large amounts of unprocessed output data
and let them render everything at the highest possible
resolution. This leads to an increased run time of simulations that
still have to complete within a fixed-length job allocation. In this
work, we tackle the challenge of enabling in situ visualization
under performance constraints. Our approach shuffles data across
processes according to its content and filters out part of it in
order to feed a visualization pipeline with only a reorganized
subset of the data produced by the simulation. Our framework
monitors its own performance and reconfigures itself dynamically to
achieve the best possible visual fidelity within predefined
performance constraints. Experiments on the Blue Waters
supercomputer with the CM1 simulation show that our approach enables
a <formula type="inline"><math xmlns="http://www.w3.org/1998/Math/MathML" overflow="scroll"><mrow><mn>5</mn><mo>×</mo></mrow></math></formula> speedup and is able to meet performance constraints.</p>
      </subsection>
    </subsection>
    <subsection id="uid85" level="1">
      <bodyTitle>Scalable storage for data-intensive applications</bodyTitle>
      <subsection id="uid86" level="2">
        <bodyTitle>OverFlow: multi-site aware Big Data management for
scientific workflows on clouds</bodyTitle>
        <participants>
          <person key="PASUSERID">
            <firstname>Radu</firstname>
            <lastname>Tudoran</lastname>
          </person>
          <person key="kerdata-2014-idp66888">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>The global deployment of cloud datacenters is enabling large-scale
scientific workflows to improve performance and deliver fast
responses. This unprecedented geographical distribution of the
computation is doubled by an increase in the scale of the data
handled by such applications, bringing new challenges related to the
efficient data management across sites. High throughput, low
latencies or cost-related trade-offs are just a few concerns for
both cloud providers and users when it comes to handling data across
datacenters. Existing solutions are limited to cloud-provided
storage, which offers low performance based on rigid cost
schemes. In turn, workflow engines need to improvise substitutes,
achieving performance at the cost of complex system configurations,
maintenance overheads, reduced reliability and reusability.</p>
        <p>In <ref xlink:href="#kerdata-2015-bid16" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, we introduced OverFlow, a uniform
data-management system for scientific workflows running across
geographically distributed sites, aiming to reap economic benefits
from this geo-diversity. Our solution is environment-aware, as it
monitors and models the global cloud infrastructure, offering high
and predictable data-handling performance for transfer cost and
time, within and across sites. OverFlow proposes a set of pluggable
services, grouped in a data-scientist cloud kit. They provide the
applications with the possibility to monitor the underlying
infrastructure, to exploit smart data compression, deduplication and
geo-replication, to evaluate data-management costs, to set a
tradeoff between money and time, and optimize the transfer strategy
accordingly. The system was validated on the Microsoft Azure cloud
across its 6 EU and US datacenters. The experiments were conducted
on hundreds of nodes using synthetic benchmarks and real-life
bio-informatics applications (A-Brain, BLAST). The results show that
our system is able to model the cloud performance accurately and to
leverage this for efficient data dissemination, being able to reduce
the monetary costs and transfer time by up to 3 times.</p>
      </subsection>
      <subsection id="uid87" level="2">
        <bodyTitle>Efficient transactional storage for data-intensive
applications</bodyTitle>
        <participants>
          <person key="kerdata-2014-idp82720">
            <firstname>Pierre</firstname>
            <lastname>Matri</lastname>
          </person>
          <person key="kerdata-2014-idp66888">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="kerdata-2014-idp62712">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
        </participants>
        <p>As the computational power used by large-scale applications
increases, the amount of data they need to manipulate tends to
increase as well. A wide range of such applications require robust
and flexible storage support for atomic, durable and concurrent
transactions. Historically, databases have provided the <i>de
facto</i> solution to transactional data management, but they have
forced applications to drop control over data layout and access
mechanisms, while remaining unable to meet the scale requirements of
Big Data. More recently, key-value stores have been introduced to
address these issues. However, this solution does not provide
transactions, or only restricted transaction support, constraining
users to carefully coordinate access to data in order to avoid race
conditions, partial writes, overwrites, and other hard problems that
cause erratic behavior.</p>
        <p>We argue that there is a gap between existing storage solutions and
application requirements that limits the design of
transaction-oriented data-intensive applications. We have started
working on a prototype of a massively parallel distributed
transactional blob storage system, aiming to fill this gap.</p>
      </subsection>
    </subsection>
  </resultats>
  <contrats id="uid88">
    <bodyTitle>Bilateral Contracts and Grants with Industry</bodyTitle>
    <subsection id="uid89" level="1">
      <bodyTitle>Bilateral Contracts with Industry</bodyTitle>
      <descriptionlist>
        <label>Microsoft: Z-CloudFlow (2013–2016).</label>
        <li id="uid90">
          <p noindent="true">In the framework of the Joint Inria-Microsoft Research Center,
this project is a follow-up to the A-Brain project. The goal of
this new project is to propose a framework for the efficient
processing of scientific workflows in clouds. This approach will
leverage the cloud infrastructure capabilities for handling and
processing large data volumes. In order to support data-intensive
workflows, the cloud-based solution will: adapt the workflows to
the cloud environment and exploit its capabilities; optimize data
transfers to provide reasonable times; manage data and tasks so
that they can be efficiently placed and accessed during execution.
The validation will be performed using real-life applications,
first on the Grid5000 platform, then on the Azure cloud
environment, access being granted by Microsoft through a
<i>Azure for Research Award</i> received by G. Antoniu. The
project also provides funding for the PhD thesis of Luis Pineda,
started in 2014. The project is being conducted in collaboration
with the Zenith team from Montpellier, led by Patrick Valduriez.</p>
        </li>
      </descriptionlist>
    </subsection>
  </contrats>
  <partenariat id="uid91">
    <bodyTitle>Partnerships and Cooperations</bodyTitle>
    <subsection id="uid92" level="1">
      <bodyTitle>National Initiatives</bodyTitle>
      <subsection id="uid93" level="2">
        <bodyTitle>ANR</bodyTitle>
        <descriptionlist>
          <label>OverFlow (2015–2019).</label>
          <li id="uid94">
            <p noindent="true">This JCJC project led by Alexandru Costan investigates approaches
to data management enabling an efficient execution of
geographically distributed workflows running on multi-site
clouds. Ultimately, OverFlow will propose a new, pioneering
paradigm: Workflow Data Management as a Service — a general and
easy-to-use, cloud-provided service that bridges for the first time
the gap between single- and multi-site workflow data management.
It aims to reap economic benefits from the geo-diversity while
accelerating the scientific discovery through a democratization of
access to globally distributed data. Within this project,
A. Costan is jointly working with Kate Keahey (University of
Chicago and Argonne National Laboratory), Bogdan Nicolae (IBM
Research) and Christophe Blanchet (Institut Français de
Bioinformatique).</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid95" level="2">
        <bodyTitle>Other National Projects</bodyTitle>
        <descriptionlist>
          <label>DISCOVERY (2015–2019).</label>
          <li id="uid96">
            <p noindent="true">An Inria Project Lab, led by Adrien Lebre (ASCOLA), that aims at
exploring a new way of operating Utility Computing (UC) resources
by leveraging any facilities available through the Internet in
order to deliver widely distributed platforms that can better
match the geographical dispersal of users as well as the unending
demand. Project-teams: ASAP, ASCOLA, Avalon, Myriads, and
KerData. Within DISCOVERY, S. Ibrahim (KerData Inria Team) is
working with Gilles Fedak (Avalon Inria Project-Team) to address
the VM images management challenge.</p>
          </li>
          <label>Grid'5000.</label>
          <li id="uid97">
            <p noindent="true">We are members of Grid'5000 community and run
experiments on the Grid'5000 platform on a daily basis.</p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
    <subsection id="uid98" level="1">
      <bodyTitle>European Initiatives</bodyTitle>
      <subsection id="uid99" level="2">
        <bodyTitle>FP7 and H2020 Projects</bodyTitle>
        <subsection id="uid100" level="3">
          <bodyTitle>BigStorage</bodyTitle>
          <descriptionlist>
            <label>Title:</label>
            <li id="uid101">
              <p noindent="true"><ref xlink:href="http://www.bigstorage-project.eu/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">BigStorage</ref>:
Storage-based Convergence between HPC and Cloud to handle Big Data</p>
            </li>
            <label>Program:</label>
            <li id="uid102">
              <p noindent="true">H2020</p>
            </li>
            <label>Duration:</label>
            <li id="uid103">
              <p noindent="true">January 2015–January 2019</p>
            </li>
            <label>Coordinator:</label>
            <li id="uid104">
              <p noindent="true">Universidad politecnica de Madrid</p>
            </li>
            <label>Participants:</label>
            <li id="uid105">
              <simplelist>
                <li id="uid106">
                  <p noindent="true">Barcelona Supercomputing Center — Centro Nacional de
Supercomputacion (Spain)</p>
                </li>
                <li id="uid107">
                  <p noindent="true">CA Technologies Development Spain (Spain)</p>
                </li>
                <li id="uid108">
                  <p noindent="true">CEA — Commissariat a l'Énergie atomique et aux énergies
alternatives (France)</p>
                </li>
                <li id="uid109">
                  <p noindent="true">Deutsches Klimarechenzentrum (Germany)</p>
                </li>
                <li id="uid110">
                  <p noindent="true">Foundation for Research and Technology Hellas (Greece)</p>
                </li>
                <li id="uid111">
                  <p noindent="true">Fujitsu Technology Solutions (Germany)</p>
                </li>
                <li id="uid112">
                  <p noindent="true">Johannes Gutenberg Universitaet Mainz (Germany)</p>
                </li>
                <li id="uid113">
                  <p noindent="true">Universidad Politecnica de Madrid (Spain)</p>
                </li>
                <li id="uid114">
                  <p noindent="true">Seagate Systems UK (United Kingdom)</p>
                </li>
              </simplelist>
            </li>
            <label>URL:</label>
            <li id="uid115">
              <p noindent="true">
                <ref xlink:href="http://www.bigstorage-project.eu/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>bigstorage-project.<allowbreak/>eu/</ref>
              </p>
            </li>
            <label>Inria contact:</label>
            <li id="uid116">
              <p noindent="true">Gabriel Antoniu and Adrien Lèbre</p>
            </li>
          </descriptionlist>
          <p>BigStorage is a European Training Network (ETN) whose main goal is
to train future <i>data scientists</i> in order to enable them and us to
apply holistic and interdisciplinary approaches for taking advantage
of a data-overwhelmed world, which requires <i>HPC</i> and <i>Cloud</i>
infrastructures with a redefinition of <i>storage</i> architectures
underpinning them — focusing on meeting highly ambitious performance
and <i>energy</i> usage objectives. The KerData team will be hosting 2
Early Stage Researchers in this framework.</p>
        </subsection>
      </subsection>
    </subsection>
    <subsection id="uid117" level="1">
      <bodyTitle>International
Initiatives</bodyTitle>
      <subsection id="uid118" level="2">
        <bodyTitle>Inria International Labs</bodyTitle>
        <subsection id="uid119" level="3">
          <bodyTitle>JLESC: Joint Laboratory on Extreme Scale Computing</bodyTitle>
          <p>The <ref xlink:href="https://publish.illinois.edu/jointlab-esc/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Joint
Laboratory on Extreme Scale Computing</ref> is jointly run by Inria,
UIUC, ANL, BSC, JSC and RIKEN. It has ben created in 2014 as a
follow-up of the Inria-UIUC JLPC — <i>Joint Laboratory for
Petascale Computing</i> to collaborate on concurrency-optimized I/O
for Extreme-scale platforms (see details in
Section <ref xlink:href="#uid82" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>). The KerData
team is collaborating with teams from ANL and UIUC within this lab
since 2009. This collaboration has now been formalized as the
<i>Data@Exascale</i> Associate Team with ANL and UIUC (2013–2015).</p>
          <subsection id="idp2630216" level="4">
            <bodyTitle>Associate Team involved in the International Lab:
Data@Exascale</bodyTitle>
            <descriptionlist>
              <label>Title:</label>
              <li id="uid120">
                <p noindent="true">
                  <ref xlink:href="http://www.irisa.fr/kerdata/data-at-exascale/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Ulta-scalable
I/O and storage for Exascale systems</ref>
                </p>
              </li>
              <label>International Partner:</label>
              <li id="uid121">
                <p noindent="true">Argonne National Laboratory (United States) — Mathematics
and Computer Science Division (MCS) — Robert Ross</p>
              </li>
              <label>Start year:</label>
              <li id="uid122">
                <p noindent="true">2013</p>
              </li>
              <label>URL:</label>
              <li id="uid123">
                <p noindent="true">
                  <ref xlink:href="http://www.irisa.fr/kerdata/data-at-exascale/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>irisa.<allowbreak/>fr/<allowbreak/>kerdata/<allowbreak/>data-at-exascale/</ref>
                </p>
              </li>
            </descriptionlist>
            <p>As the computational power used by large-scale scientific
applications increases, the amount of data manipulated for
subsequent analysis increases as well. Rapidly storing this data,
protecting it from loss and analyzing it to understand the results
are significant challenges, made more difficult by decades of
improvements in computation capabilities that have been unmatched in
storage. For many applications, the overall performance and
scalability clearly become driven by the performance of the I/O
subsystem. As we anticipate Exascale systems in 2020, there is a
growing consensus in the scientific community that revolutionary new
approaches are needed in computational science storage. These
challenges are at the center of the activities of the Joint
Inria-Illinois-ANL-BSC-JSC-RIKEN/AICS Laboratory for Extreme-Scale
Computing (JLESC, formerly called JLPC). This project gathers
researchers from Inria, Argonne National Lab and the University of
Illinois at Urbana Champaign to address 3 goals: 1) investigate new
storage architectures for Exascale systems; 2) investigate new
approaches to the design of I/O middleware for Exascale systems to
optimize data processing and visualization, leveraging dedicated I/O
cores and I/O forwarding techniques; 3) explore techniques enabling
adaptive cloud data services for HPC.</p>
          </subsection>
        </subsection>
      </subsection>
      <subsection id="uid124" level="2">
        <bodyTitle>Inria International Partners</bodyTitle>
        <subsection id="uid125" level="3">
          <bodyTitle>DataCloud@work</bodyTitle>
          <descriptionlist>
            <label>Title:</label>
            <li id="uid126">
              <p noindent="true"><ref xlink:href="https://www.irisa.fr/kerdata/doku.php?id=cloud_at_work:start" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">DataCloud@Work</ref>
— Distributed data management for cloud services</p>
            </li>
            <label>International Partner:</label>
            <li id="uid127">
              <p noindent="true">Politehnica University of Bucharest (Romania) — Computer
Science and Engineering Department — Valentin Cristea and
Nicolae Tapus</p>
            </li>
            <label>Start year:</label>
            <li id="uid128">
              <p noindent="true">January 2013. The status of IIP was established
right after the end of our former <i>DataCloud@work</i> Associate Team
(2010–2012).</p>
            </li>
            <label>URL:</label>
            <li id="uid129">
              <p noindent="true">
                <ref xlink:href="https://www.irisa.fr/kerdata/doku.php?id=cloud_at_work:start" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>www.<allowbreak/>irisa.<allowbreak/>fr/<allowbreak/>kerdata/<allowbreak/>doku.<allowbreak/>php?id=cloud_at_work:start</ref>
              </p>
            </li>
          </descriptionlist>
          <p>Our research topics address the area of distributed data management for cloud services, focusing on autonomic storage. The goal is explore how to build an efficient, secure and reliable storage IaaS for data-intensive distributed applications running in cloud environments by enabling an autonomic behavior.</p>
        </subsection>
      </subsection>
    </subsection>
    <subsection id="uid130" level="1">
      <bodyTitle>International
Research Visitors</bodyTitle>
      <subsection id="uid131" level="2">
        <bodyTitle>Visits of International Scientists</bodyTitle>
        <subsection id="uid132" level="3">
          <bodyTitle>Research stays abroad</bodyTitle>
          <descriptionlist>
            <label>Luis Eduardo Pineda Morales:</label>
            <li id="uid133">
              <p noindent="true">Research visit at ANL, hosted by Kate Keahey and
Balaji Subramaniam for 3 months (June–August), funded by the
PUF NextGen peoject and by the Microsoft Research Inria Joint
Centre project. This work is done in the context of the Joint
Laboratory for Extreme-Scale Computing (JLESC).</p>
            </li>
            <label>Orçun Yildiz</label>
            <li id="uid134">
              <p noindent="true">Research visit at ANL, hosted by Rob Rossa and Matthieu Dorier for 3
months, funded by the PUF NextGen project and by the Data@Exascale
Associate Team. This work is done in the context of the Joint
Laboratory for Extreme-Scale Computing (JLESC).</p>
            </li>
          </descriptionlist>
        </subsection>
      </subsection>
    </subsection>
  </partenariat>
  <diffusion id="uid135">
    <bodyTitle>Dissemination</bodyTitle>
    <subsection id="uid136" level="1">
      <bodyTitle>Promoting Scientific Activities</bodyTitle>
      <subsection id="uid137" level="2">
        <bodyTitle>Scientific events organisation</bodyTitle>
        <subsection id="uid138" level="3">
          <bodyTitle>General chair, scientific chair</bodyTitle>
          <descriptionlist>
            <label>Gabriel Antoniu:</label>
            <li id="uid139">
              <p noindent="true">Program Co-Chair of the
<ref xlink:href="https://www.eitdigital.eu/cloudsymposium2015/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">EIT Digital
Future Cloud Symposium</ref> (Rennes, October 2015).</p>
            </li>
          </descriptionlist>
        </subsection>
        <subsection id="uid140" level="3">
          <bodyTitle>Member of the organizing committees</bodyTitle>
          <descriptionlist>
            <label>Alexandru Costan:</label>
            <li id="uid141">
              <p noindent="true">Organizer of the IRISA D1 Department Day Seminar.</p>
            </li>
          </descriptionlist>
        </subsection>
      </subsection>
      <subsection id="uid142" level="2">
        <bodyTitle>Scientific events selection</bodyTitle>
        <subsection id="uid143" level="3">
          <bodyTitle>Chair of conference program committees</bodyTitle>
          <descriptionlist>
            <label>Gabriel Antoniu:</label>
            <li id="uid144">
              <p noindent="true">Track Chair for the following international conferences: IEEE
Cluster 2015 (Data, Storage, and Visualization Track - Chicago,
September 2015) and 3PGCIC 2015 (Distributed Algorithms Track -
Krakow, November 2015).</p>
            </li>
            <label>Alexandru Costan:</label>
            <li id="uid145">
              <p noindent="true">Program Co-Chair of the following international workshops:
BigDataCloud 2015 held in conjunction with the Euro-Par 2015
conference (Vienna, August 2015) and ScienceCloud 2015 held in
conjunction with HPDC 2015 (Portland, June 2015).</p>
            </li>
          </descriptionlist>
        </subsection>
        <subsection id="uid146" level="3">
          <bodyTitle>Member of the conference program committees</bodyTitle>
          <descriptionlist>
            <label>Gabriel Antoniu:</label>
            <li id="uid147">
              <p noindent="true">ACM HPDC 2015, ACM/IEEE CCGrid'2015,
ACM/IEEE SC'15, Euro-Par 2015, BigDataCloud 2015 workshop (held in
conjunction with the Euro-Par 2015 conference).</p>
            </li>
            <label>Luc Bougé:</label>
            <li id="uid148">
              <p noindent="true">BigDataCloud 2015, BigData 2015, ICA3PP 2015, CCGRID 2016.</p>
            </li>
            <label>Alexandru Costan:</label>
            <li id="uid149">
              <p noindent="true">Member of the following Program Committees: IEEE Cluster 2015,
CSCS 2015, ARMS-CC 2015, BigDataCloud 2015, ScienceCloud 2015, CSE</p>
              <p>Other reviews: IEEE BigData 2015, SC 2015, HPDC 2015, CCGrid
2015, Euro-Par 2015.</p>
            </li>
            <label>Shadi Ibrahim:</label>
            <li id="uid150">
              <p noindent="true">Member of the following Program Committees: IEEE Cluster
2015, IEEE Cloudcom 2015, ICPADS 2015, IEEE CSE 2015, IEEE FCST
2015, IEEE ICA3PP 2015, IFIP NPC 2015, MEDES 2015, BigDataCloud
2015 workshop (held in conjunction with the Euro-Par 2015
conference), SCRAMBL 2015 workshop (held in conjunction with the
CCGrid 2015 conference).</p>
              <p>Other reviews: IEEE BigData 2015, SC 2015, HPDC 2015, CCGrid
2015, Euro-Par 2015.</p>
            </li>
          </descriptionlist>
        </subsection>
      </subsection>
      <subsection id="uid151" level="2">
        <bodyTitle>Journal</bodyTitle>
        <subsection id="uid152" level="3">
          <bodyTitle>Member of the editorial boards</bodyTitle>
          <descriptionlist>
            <label>Alexandru Costan:</label>
            <li id="uid153">
              <p noindent="true">Soft Computing Journal, Special Issue on Autonomic Computing
and Big Data Platforms</p>
            </li>
          </descriptionlist>
        </subsection>
        <subsection id="uid154" level="3">
          <bodyTitle>Reviewer</bodyTitle>
          <descriptionlist>
            <label>Shadi Ibrahim:</label>
            <li id="uid155">
              <p noindent="true">IEEE Transactions on Parallel and Distributed Systems, IEEE
Transactions on Services Computing, IEEE Transactions on Cloud
Computing, ACM Transactions on Architecture and Code
Optimization, ACM Transactions on Internet Technology, Future
Generation Computer Systems, IEEE Systems Journal, Cluster
Computing.</p>
            </li>
            <label>Alexandru Costan:</label>
            <li id="uid156">
              <p noindent="true">IEEE Transactions on Parallel and
Distributed Systems, IEEE Transactions on Cloud Computing, Future
Generation Computer Systems, Concurrency and Computation Practice
and Experience.</p>
            </li>
          </descriptionlist>
        </subsection>
      </subsection>
      <subsection id="uid157" level="2">
        <bodyTitle>Invited talks</bodyTitle>
        <descriptionlist>
          <label>Gabriel Antoniu:</label>
          <li id="uid158">
            <descriptionlist>
              <label>3rd JLESC workshop:</label>
              <li id="uid159">
                <p noindent="true"><i>To Overlap or Not to Overlap:
Optimizing Incremental MapReduce Computations for On-Demand
Data Upload</i>, 3rd workshop of the Joint Laboratory for Extreme
Scale Computing, Barcelona, Spain, July 2015.</p>
              </li>
              <label>Huawei Workshop on New Directions in Algorithms and
Software,</label>
              <li id="uid160">
                <p noindent="true"><i>Scalable data-intensive processing for science
on distributed clouds: A-Brain and Z-CloudFlow</i>, Paris, March
2015.</p>
              </li>
              <label>Inria-Mexico workshop:</label>
              <li id="uid161">
                <p noindent="true"><i>Scalable data-intensive
processing for science on distributed clouds: A-Brain and
Z-CloudFlow</i>, First Inria-Mexico Workshop in Applied
Mathematics and Computer ScienceMexico City, June 2015.</p>
              </li>
              <label>IRISA Data Science Symposium:</label>
              <li id="uid162">
                <p noindent="true"><i>Damaris: Jitter-Free
I/O Management and In Situ Visualization of HPC Simulations
using Dedicated Cores</i>, Science Data Ecosystem workshop,
Rennes, November 2015.</p>
              </li>
            </descriptionlist>
          </li>
          <label>Luc Bougé</label>
          <li id="uid163">
            <descriptionlist>
              <label>BigDataCloud 2016:</label>
              <li id="uid164">
                <p noindent="true"><i>Data computing in distributed,
very large-scale clouds: From execution models to
programming models</i>, satellite workshop of the Euro-Par 2015
conference, Vienna, August 2015.</p>
              </li>
            </descriptionlist>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid165" level="2">
        <bodyTitle>Leadership within the scientific community</bodyTitle>
        <descriptionlist>
          <label>Luc Bougé:</label>
          <li id="uid166">
            <p noindent="true">Vice-Chair of the Steering
Committee of the <ref xlink:href="http://www.europar.org/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">Euro-Par</ref>
conference.</p>
          </li>
          <label>Gabriel Antoniu:</label>
          <li id="uid167">
            <p noindent="true">Leader for the Big Data, I/O and visualization activity within the
<ref xlink:href="https://publish.illinois.edu/jointlab-esc/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">JLESC</ref> - Joint
Inria-Illinois-ANL-BSC-JSC-RIKEN/AICS Laboratory for Extreme-Scale
Computing.</p>
          </li>
          <label>Gabriel Antoniu:</label>
          <li id="uid168">
            <p noindent="true">Principal Investigator of the Z-CloudFlow Microsoft Research-Inria
Project, for which he received an Azure Research Award.</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid169" level="2">
        <bodyTitle>Scientific expertise</bodyTitle>
        <descriptionlist>
          <label>Luc Bougé:</label>
          <li id="uid170">
            <p noindent="true">Member of the jury for the <i>Agrégation de
mathématiques</i>, the French national hiring committee to hire
high-school mathematics teachers at the national level.</p>
          </li>
          <label>Gabriel Antoniu:</label>
          <li id="uid171">
            <p noindent="true">Project evaluator for 12 ANR projects
submitted to the ANR 2016 call (Phase 1).</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid172" level="2">
        <bodyTitle>Research administration</bodyTitle>
        <descriptionlist>
          <label>Luc Bougé:</label>
          <li id="uid173">
            <p noindent="true">Nominated to seat at the CNU (<i>National
University Council</i>) in the <i>Informatics</i> Section (27). His
4-year term has been renewed in November 2015.</p>
          </li>
          <label>Luc Bougé:</label>
          <li id="uid174">
            <p noindent="true">Scientific Project Leader for
Fundamental Informatics at the ANR - <i>Agence Nationale de la
Recherche</i> until August 2015, for 40% of his time. It was
the end of his 3-year delegation contract.</p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
    <subsection id="uid175" level="1">
      <bodyTitle>Teaching - Supervision -
Juries</bodyTitle>
      <subsection id="uid176" level="2">
        <bodyTitle>Teaching</bodyTitle>
        <descriptionlist>
          <label>Gabriel Antoniu</label>
          <li id="uid177">
            <simplelist>
              <li id="uid178">
                <p noindent="true">Master (Engineering Degree, 5th year): Big Data, 24 hours
(lectures), M2 level, ENSAI (<i>École Nationale
Supérieure de la Statistique et de l'Analyse de
l'Information</i>), Bruz, France.</p>
              </li>
              <li id="uid179">
                <p noindent="true">Master : Cloud Computing, 15 hours (lectures and lab
sessions), M2 level, ENSAI (<i>École Nationale Supérieure
de la Statistique et de l'Analyse de l'Information</i>), Bruz,
France.</p>
              </li>
              <li id="uid180">
                <p noindent="true">Master: Distributed Systems, 8 hours (lectures), M2 level,
ALMA Master, Distributed Architectures module, University of
Nantes, France.</p>
              </li>
              <li id="uid181">
                <p noindent="true">Master: Scalable Distributed Systems, 5 hours (lectures), M2
level, SDS Module, M2RI Master Program, ENS Rennes, France.</p>
              </li>
              <li id="uid182">
                <p noindent="true">Master: Scalable Distributed Systems, 12 hours (lectures),
M1 level, SDS Module, EIT ICT Labs Master School, France.</p>
              </li>
              <li id="uid183">
                <p noindent="true">Master (Engineering Degree, 5th year, <i>Big Data</i> option), 10 hours (lectures),
M2 level, INSA de Lyon, France.</p>
              </li>
            </simplelist>
          </li>
          <label>Luc Bougé</label>
          <li id="uid184">
            <simplelist>
              <li id="uid185">
                <p noindent="true">Bachelor: Introduction to programming concepts, 24 hours
(lectures), L3 level, Informatics program, ENS Rennes, France.</p>
              </li>
              <li id="uid186">
                <p noindent="true">Master: Introduction to object-oriented high-performance
programming, 24 hours (lectures), M1 level, Mathematics program,
ENS Rennes, France.</p>
              </li>
              <li id="uid187">
                <p noindent="true">Master: Introduction to compilation, 12 hours (exercice
classes), M1 level, Informatics program, Univ. Rennes 1, France.</p>
              </li>
            </simplelist>
          </li>
          <label>Shadi Ibrahim</label>
          <li id="uid188">
            <simplelist>
              <li id="uid189">
                <p noindent="true">Master (Engineering Degree, 5th year): Big Data, 24 hours (lectures and lab sessions),
M2 level, ENSAI (<i>École Nationale Supérieure de la Statistique et de l'Analyse de l'Information</i>), Bruz,
France.</p>
              </li>
              <li id="uid190">
                <p noindent="true">Master : Cloud Computing and Hadoop Technologies, 3 hours (lectures), M2 level, ENSAI
(<i>École Nationale Supérieure de la Statistique et de l'Analyse de l'Information</i>), Bruz, France.</p>
              </li>
              <li id="uid191">
                <p noindent="true">Master: Distributed Systems (cloud data management), 4 hours (lectures), M2 level, ALMA Master, Distributed Architectures module, University of
Nantes, France.</p>
              </li>
            </simplelist>
          </li>
          <label>Alexandru Costan</label>
          <li id="uid192">
            <simplelist>
              <li id="uid193">
                <p noindent="true">Bachelor: Software Engineering and Java Programming,
28 hours (lab sessions), L3, INSA Rennes.</p>
              </li>
              <li id="uid194">
                <p noindent="true">Bachelor: Databases, 68 hours (lectures and lab sessions),
L2, INSA Rennes, France.</p>
              </li>
              <li id="uid195">
                <p noindent="true">Bachelor: Practical case studies, 24 hours (project), L3,
INSA Rennes.</p>
              </li>
              <li id="uid196">
                <p noindent="true">Master: Big Data and Applications, 36h hours (lectures, lab
sessions, project), M1, INSA Rennes.</p>
              </li>
            </simplelist>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid197" level="2">
        <bodyTitle>Supervision</bodyTitle>
        <subsection id="uid198" level="3">
          <bodyTitle>PhD in progress</bodyTitle>
          <descriptionlist>
            <label> Luis Eduardo Pineda Morales:</label>
            <li id="uid199">
              <p noindent="true"><i>Efficient Big Data
Management for Geographically Distributed Workflows</i>, thesis
started in January 2014, co-advised by Alexandru Costan and
Gabriel Antoniu.</p>
            </li>
            <label>Tien-Dat Phan:</label>
            <li id="uid200">
              <p noindent="true"><i>Green Big Data
Processing in Large-scale Clouds</i>, thesis started in October
2014, co-advised by Shadi Ibrahim and Luc Bougé.</p>
            </li>
            <label>Lokman Rahmani:</label>
            <li id="uid201">
              <p noindent="true"><i>Big Data Management for
Next-Generation High-Performance Computing Systems</i>, thesis
started in October 2013 co-advised by Gabriel Antoniu and Luc
Bougé.</p>
            </li>
            <label>Orçun Yildiz:</label>
            <li id="uid202">
              <p noindent="true"><i>Energy-Efficient Big
Data Management in Petasacle Supercomputers and Beyond</i>, thesis
started in September 2014, co-advised by Shadi Ibrahim and Gabriel
Antoniu.</p>
            </li>
            <label>Mohammed-Yacine Taleb:</label>
            <li id="uid203">
              <p noindent="true"><i>Energy-impact of
data consistency management in Clouds and Beyond</i>, thesis
started in August 2015, co-advised by Shadi Ibrahim and Gabriel
Antoniu.</p>
            </li>
            <label>Pierre Matri:</label>
            <li id="uid204">
              <p noindent="true"><i>Predictive Models for Big
Data</i>, thesis started in March 2015, co-advised by María
Pérez and Gabriel Antoniu.</p>
            </li>
            <label>Ovidiu-Cristian Marcu:</label>
            <li id="uid205">
              <p noindent="true"><i>Efficient data
transfer and streaming strategies for workflow-based Big Data
processing</i>, thesis started in October 2015, co-advised by
Alexandru Costan and Gabriel Antoniu.</p>
            </li>
          </descriptionlist>
        </subsection>
      </subsection>
      <subsection id="uid206" level="2">
        <bodyTitle>Juries</bodyTitle>
        <descriptionlist>
          <label>Gabriel Antoniu:</label>
          <li id="uid207">
            <p noindent="true">Referee for the PhD thesis of
Ms. Zhou Chi at the Nanyang Technological University (NTU),
Singapore (to be defended in January 2016).</p>
          </li>
          <label>Gabriel Antoniu:</label>
          <li id="uid208">
            <p noindent="true">Jury member of the PhD defense of
Ms. Safae Dahmani at the University Bretagne Sud (December 14,
2015).</p>
          </li>
          <label>Shadi Ibrahim:</label>
          <li id="uid209">
            <p noindent="true">Jury member of the PhD defense of
Ms. Karine Pires at the University Pierre et Marie Curie, Paris
(March 31, 2015).</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid210" level="2">
        <bodyTitle>Miscellaneous</bodyTitle>
        <descriptionlist>
          <label>Luc Bougé:</label>
          <li id="uid211">
            <p noindent="true">Co-ordinator between ENS Rennes
and the Inria Research Center and the IRISA laboratory.</p>
          </li>
          <label>Shadi Ibrahim:</label>
          <li id="uid212">
            <p noindent="true">Project evaluator in the STIC-AMSUD
Program 2015.</p>
          </li>
          <label>Shadi Ibrahim:</label>
          <li id="uid213">
            <p noindent="true">Leader of the BigStorage project recruitment
task force.</p>
          </li>
          <label>Gabriel Antoniu:</label>
          <li id="uid214">
            <p noindent="true">Invited to give a tutorial on <i>Big Data
Technologies</i> at the PUF Summer School co-organized with the 3rd
JLESC workshop in Barcelona (July 2015).</p>
          </li>
          <label>Shadi Ibrahim:</label>
          <li id="uid215">
            <p noindent="true">Invited to give a 2 days tutorial on <i>Hadoop</i> at
IT4Innovations, Technical University Ostrava, Czech Republic
(December 2015).</p>
          </li>
          <label>Alexandru Costan:</label>
          <li id="uid216">
            <p noindent="true">In charge of communication at the
Computer Science Department of INSA Rennes.</p>
          </li>
          <label>Alexandru Costan:</label>
          <li id="uid217">
            <p noindent="true">In charge of the organization of the IRISA
D1 Department Seminar.</p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
    <subsection id="uid218" level="1">
      <bodyTitle>Popularization</bodyTitle>
      <descriptionlist>
        <label>Luc Bougé:</label>
        <li id="uid219">
          <descriptionlist>
            <label>Collège international, Valbonne.</label>
            <li id="uid220">
              <p noindent="true">Invited presentation to
the students of the preparatory classes on
<i>Doing research in computer science</i> (January 2015).</p>
            </li>
            <label>Lycée Chateaubriand, Rennes.</label>
            <li id="uid221">
              <p noindent="true">Invited presentation to
the students of the preparatory classes on
<i>Science of numerics</i> (March 2015).</p>
            </li>
            <label>IRISA Conf Lunch Program.</label>
            <li id="uid222">
              <p noindent="true">Invited presentation about
<i>Surviving the Data Deluge</i> (October 2015).</p>
            </li>
            <label>Master Program, Rennes.</label>
            <li id="uid223">
              <p noindent="true">Invited presentation to the M2
students about <i>Informatics as a scientific activity:
Toward a responsible research</i> (December 2015).</p>
            </li>
            <label>IRISA Open House Days, Rennes.</label>
            <li id="uid224">
              <p noindent="true">Invited presentation about
<i>Wikipedia back stage</i>, and management of an open booth to
let visitors improve their skills about searching Wikipedia
(December 2015).</p>
            </li>
          </descriptionlist>
        </li>
        <label>Alexandru Costan:</label>
        <li id="uid225">
          <descriptionlist>
            <label>CumuloNumBio'15, Aussois.</label>
            <li id="uid226">
              <p noindent="true">Invited presentation at the
<i>Summer School of BioInformatics</i> about <i>Big Data
Management on Clouds</i> (June 2015).</p>
            </li>
            <label>IRISA, Rennes.</label>
            <li id="uid227">
              <p noindent="true">Invited presentation at the
<i>Conf'Lunch</i> about <i>Clouds and MapReduce Programming</i>
(October 2015).</p>
            </li>
            <label>EIT Digital, Rennes.</label>
            <li id="uid228">
              <p noindent="true">Invited presentation at the Future Cloud Symposium
about <i>Enhancing video gaming user
experience with Big Data analytics based on Apache Flink - a
use case</i> (October 2015).</p>
            </li>
          </descriptionlist>
        </li>
        <label>Gabriel Antoniu:</label>
        <li id="uid229">
          <descriptionlist>
            <label>EIT Digital.</label>
            <li id="uid230">
              <p noindent="true">Invited presentation at the Future Cloud Symposium
on <i>Scalable data-intensive processing for science on distributed clouds</i> (October 2015).</p>
            </li>
          </descriptionlist>
        </li>
      </descriptionlist>
    </subsection>
  </diffusion>
  <biblio id="bibliography" html="bibliography" numero="10" titre="Bibliography">
    
    <biblStruct id="kerdata-2015-bid24" type="article" rend="refer" n="refercite:costan:hal-00767034">
      <identifiant type="hal" value="hal-00767034"/>
      <analytic>
        <title level="a">TomusBlobs: Scalable Data-intensive Processing on Azure Clouds</title>
        <author>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp73440">
            <foreName>Radu</foreName>
            <surname>Tudoran</surname>
            <initial>R.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Goetz</foreName>
            <surname>Brasche</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">CCPE - Concurrency and Computation: Practice and Experience</title>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-00767034" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00767034</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid25" type="article" rend="refer" n="refercite:damota:hal-01057325">
      <identifiant type="hal" value="hal-01057325"/>
      <analytic>
        <title level="a">Machine Learning Patterns for Neuroimaging-Genetic Studies in the Cloud</title>
        <author>
          <persName>
            <foreName>Benoit</foreName>
            <surname>Da Mota</surname>
            <initial>B.</initial>
          </persName>
          <persName key="kerdata-2014-idp73440">
            <foreName>Radu</foreName>
            <surname>Tudoran</surname>
            <initial>R.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="parietal-2014-idm28112">
            <foreName>Gaël</foreName>
            <surname>Varoquaux</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Goetz</foreName>
            <surname>Brasche</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Patricia J.</foreName>
            <surname>Conrod</surname>
            <initial>P. J.</initial>
          </persName>
          <persName>
            <foreName>Hervé</foreName>
            <surname>Lemaitre</surname>
            <initial>H.</initial>
          </persName>
          <persName>
            <foreName>Tomáš</foreName>
            <surname>Paus</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>Marcella</foreName>
            <surname>Rietschel</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Vincent</foreName>
            <surname>Frouin</surname>
            <initial>V.</initial>
          </persName>
          <persName>
            <foreName>Jean-Baptiste</foreName>
            <surname>Poline</surname>
            <initial>J.-B.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="parietal-2014-idm31000">
            <foreName>Bertrand</foreName>
            <surname>Thirion</surname>
            <initial>B.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Frontiers in Neuroinformatics</title>
        <imprint>
          <biblScope type="volume">8</biblScope>
          <dateStruct>
            <month>April</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01057325" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01057325</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid26" type="inproceedings" rend="refer" n="refercite:dorier:hal-00715252">
      <identifiant type="hal" value="hal-00715252"/>
      <analytic>
        <title level="a">Damaris: How to Efficiently Leverage Multicore Parallelism to Achieve Scalable, Jitter-free I/O</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="roma-2014-idp88208">
            <foreName>Franck</foreName>
            <surname>Cappello</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Marc</foreName>
            <surname>Snir</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Leigh</foreName>
            <surname>Orf</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">CLUSTER - IEEE International Conference on Cluster Computing</title>
        <loc>Beijing, China</loc>
        <imprint>
          <publisher>
            <orgName>IEEE</orgName>
          </publisher>
          <dateStruct>
            <month>September</month>
            <year>2012</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-00715252" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00715252</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid27" type="inproceedings" rend="refer" n="refercite:dorier:hal-00916091">
      <identifiant type="hal" value="hal-00916091"/>
      <analytic>
        <title level="a">CALCioM: Mitigating I/O Interference in HPC Systems through Cross-Application Coordination</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp84024">
            <foreName>Robert</foreName>
            <surname>Ross</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Dries</foreName>
            <surname>Kimpe</surname>
            <initial>D.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">IPDPS - International Parallel and Distributed Processing Symposium</title>
        <loc>Phoenix, United States</loc>
        <imprint>
          <dateStruct>
            <month>May</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-00916091" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00916091</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid29" type="inproceedings" rend="refer" n="refercite:dorier:hal-01025670">
      <identifiant type="hal" value="hal-01025670"/>
      <analytic>
        <title level="a">Omnisc'IO: A Grammar-Based Approach to Spatial and Temporal I/O Patterns Prediction</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp84024">
            <foreName>Robert</foreName>
            <surname>Ross</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">SC'14 - International Conference for High Performance Computing, Networking, Storage and Analysis</title>
        <loc>New Orleans, United States</loc>
        <imprint>
          <publisher>
            <orgName type="organisation">IEEE, ACM</orgName>
          </publisher>
          <dateStruct>
            <month>November</month>
            <year>2014</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01025670" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01025670</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid28" type="article" rend="refer" n="refercite:dorier:hal-01238103">
      <identifiant type="doi" value="10.1109/TPDS.2015.2485980"/>
      <identifiant type="hal" value="hal-01238103"/>
      <analytic>
        <title level="a">Using Formal Grammars to Predict I/O Behaviors in HPC: the Omnisc'IO Approach</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp84024">
            <foreName>Rob</foreName>
            <surname>Ross</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes">
        <title level="j">TPDS - IEEE Transactions on Parallel and Distributed Systems</title>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01238103" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01238103</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid30" type="article" rend="refer" n="refercite:nicolae:2010:inria-00511414:1">
      <identifiant type="hal" value="inria-00511414"/>
      <analytic>
        <title level="a">BlobSeer: Next-Generation Data Management for Large-Scale Infrastructures</title>
        <author>
          <persName>
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp65432">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
          <persName>
            <foreName>Diana</foreName>
            <surname>Moise</surname>
            <initial>D.</initial>
          </persName>
          <persName key="myriads-2014-idp74496">
            <foreName>Alexandra</foreName>
            <surname>Carpen-Amarie</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes">
        <title level="j">JPDC - Journal of Parallel and Distributed Computing</title>
        <imprint>
          <biblScope type="volume">71</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <month>February</month>
            <year>2011</year>
          </dateStruct>
          <biblScope type="pages">169–184</biblScope>
          <ref xlink:href="http://hal.inria.fr/inria-00511414/en/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>inria-00511414/<allowbreak/>en/</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid31" type="inproceedings" rend="refer" n="refercite:nicolae:2011:inria-00570682:1">
      <identifiant type="hal" value="inria-00570682"/>
      <analytic>
        <title level="a">Going Back and Forth: Efficient Multi-Deployment and Multi-Snapshotting on Clouds</title>
        <author>
          <persName>
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>John</foreName>
            <surname>Bresnahan</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Kate</foreName>
            <surname>Keahey</surname>
            <initial>K.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">HPDC 2011 - The 20th International ACM Symposium on High-Performance Parallel and Distributed Computing</title>
        <loc>San José, CA, United States</loc>
        <imprint>
          <dateStruct>
            <month>June</month>
            <year>2011</year>
          </dateStruct>
          <ref xlink:href="http://hal.inria.fr/inria-00570682/en" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>inria-00570682/<allowbreak/>en</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid32" type="article" rend="refer" n="refercite:tran:hal-00640900">
      <identifiant type="hal" value="hal-00640900"/>
      <analytic>
        <title level="a">Towards Scalable Array-Oriented Active Storage: the Pyramid Approach</title>
        <author>
          <persName>
            <foreName>Viet-Trung</foreName>
            <surname>Tran</surname>
            <initial>V.-T.</initial>
          </persName>
          <persName>
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">ACM Operating Systems Review</title>
        <imprint>
          <biblScope type="volume">46</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <year>2012</year>
          </dateStruct>
          <biblScope type="pages">19–25</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-00640900" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-00640900</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid33" type="article" rend="refer" n="refercite:tudoran:hal-01239128">
      <identifiant type="doi" value="10.1109/TCC.2015.2440254"/>
      <identifiant type="hal" value="hal-01239128"/>
      <analytic>
        <title level="a">OverFlow: Multi-Site Aware Big Data Management for Scientific Workflows on Clouds</title>
        <author>
          <persName key="kerdata-2014-idp73440">
            <foreName>Radu</foreName>
            <surname>Tudoran</surname>
            <initial>R.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes">
        <title level="j">IEEE Transactions on Cloud Computing</title>
        <imprint>
          <dateStruct>
            <month>June</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01239128" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01239128</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid15" type="article" rend="year" n="cite:dorier:hal-01238103">
      <identifiant type="doi" value="10.1109/TPDS.2015.2485980"/>
      <identifiant type="hal" value="hal-01238103"/>
      <analytic>
        <title level="a">Using Formal Grammars to Predict I/O Behaviors in HPC: the Omnisc'IO Approach</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp84024">
            <foreName>Rob</foreName>
            <surname>Ross</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00746">
        <idno type="issn">1045-9219</idno>
        <title level="j">IEEE Transactions on Parallel and Distributed Systems</title>
        <imprint>
          <dateStruct>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01238103" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01238103</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid14" type="article" rend="year" n="cite:ibrahim:hal-01166252">
      <identifiant type="doi" value="10.1016/j.future.2015.01.005"/>
      <identifiant type="hal" value="hal-01166252"/>
      <analytic>
        <title level="a">Governing Energy Consumption in Hadoop through CPU Frequency Scaling: an Analysis</title>
        <author>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2014-idp80104">
            <foreName>Tien-Dat</foreName>
            <surname>Phan</surname>
            <initial>T.-D.</initial>
          </persName>
          <persName key="myriads-2014-idp74496">
            <foreName>Alexandra</foreName>
            <surname>Carpen-Amarie</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Houssem-Eddine</foreName>
            <surname>Chihoub</surname>
            <initial>H.-E.</initial>
          </persName>
          <persName>
            <foreName>Diana</foreName>
            <surname>Moise</surname>
            <initial>D.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00622">
        <idno type="issn">0167-739X</idno>
        <title level="j">Future Generation Computer Systems</title>
        <imprint>
          <dateStruct>
            <month>February</month>
            <year>2015</year>
          </dateStruct>
          <biblScope type="pages">14</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01166252" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01166252</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid22" type="article" rend="year" n="cite:serbanescu:hal-01249152">
      <identifiant type="doi" value="10.1007/s11280-015-0334-4"/>
      <identifiant type="hal" value="hal-01249152"/>
      <analytic>
        <title level="a">A formal method for rule analysis and validation in distributed data aggregation service</title>
        <author>
          <persName>
            <foreName>Vlad Nicolae</foreName>
            <surname>Serbanescu</surname>
            <initial>V. N.</initial>
          </persName>
          <persName>
            <foreName>Florin</foreName>
            <surname>Pop</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Valentin</foreName>
            <surname>Cristea</surname>
            <initial>V.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid01916">
        <idno type="issn">1386-145X</idno>
        <title level="j">World Wide Web</title>
        <imprint>
          <biblScope type="volume">18</biblScope>
          <biblScope type="number">6</biblScope>
          <dateStruct>
            <month>November</month>
            <year>2015</year>
          </dateStruct>
          <biblScope type="pages">1717–1736</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01249152" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01249152</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid16" type="article" rend="year" n="cite:tudoran:hal-01239128">
      <identifiant type="doi" value="10.1109/TCC.2015.2440254"/>
      <identifiant type="hal" value="hal-01239128"/>
      <analytic>
        <title level="a">OverFlow: Multi-Site Aware Big Data Management for Scientific Workflows on Clouds</title>
        <author>
          <persName key="kerdata-2014-idp73440">
            <foreName>Radu</foreName>
            <surname>Tudoran</surname>
            <initial>R.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid02851">
        <idno type="issn">2168-7161</idno>
        <title level="j">IEEE Transactions on Cloud Computing</title>
        <imprint>
          <dateStruct>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01239128" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01239128</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid6" type="article" rend="year" n="cite:tudoran:hal-01239124">
      <identifiant type="doi" value="10.1016/j.future.2015.01.016"/>
      <identifiant type="hal" value="hal-01239124"/>
      <analytic>
        <title level="a">JetStream: Enabling high throughput live event streaming on multi-site clouds</title>
        <author>
          <persName key="kerdata-2014-idp73440">
            <foreName>Radu</foreName>
            <surname>Tudoran</surname>
            <initial>R.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Olivier</foreName>
            <surname>Nano</surname>
            <initial>O.</initial>
          </persName>
          <persName>
            <foreName>Ivo</foreName>
            <surname>Santos</surname>
            <initial>I.</initial>
          </persName>
          <persName>
            <foreName>Hakan</foreName>
            <surname>Soncu</surname>
            <initial>H.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-editorial-board="yes" x-international-audience="yes" id="rid00622">
        <idno type="issn">0167-739X</idno>
        <title level="j">Future Generation Computer Systems</title>
        <imprint>
          <biblScope type="volume">54</biblScope>
          <dateStruct>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01239124" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01239124</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid10" type="inproceedings" rend="year" n="cite:cheriere:hal-01239341">
      <identifiant type="hal" value="hal-01239341"/>
      <analytic>
        <title level="a">On the Usability of Shortest Remaining Time First Policy in Shared Hadoop Clusters</title>
        <author>
          <persName key="kerdata-2015-idp118848">
            <foreName>Nathanaël</foreName>
            <surname>Cheriere</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>Pierre</foreName>
            <surname>Donat-Bouillud</surname>
            <initial>P.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="myriads-2014-idp96208">
            <foreName>Matthieu</foreName>
            <surname>Simonin</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">SAC 2016-The 31st ACM/SIGAPP Symposium on Applied Computing</title>
        <loc>Pisa, Italy</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2016</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01239341" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01239341</ref>
        </imprint>
        <meeting id="cid23923">
          <title>ACM Symposium on Applied Computing</title>
          <num>31</num>
          <abbr type="sigle">SAC</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid13" type="inproceedings" rend="year" n="cite:chihoub:hal-01184235">
      <identifiant type="hal" value="hal-01184235"/>
      <analytic>
        <title level="a">Exploring Energy-Consistency Trade-offs in Cassandra Cloud Storage System</title>
        <author>
          <persName>
            <foreName>Houssem-Eddine</foreName>
            <surname>Chihoub</surname>
            <initial>H.-E.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="dionysos-2014-idp113296">
            <foreName>Yue</foreName>
            <surname>Li</surname>
            <initial>Y.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>María</foreName>
            <surname>Pérez</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp65432">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">SBAC-PAD'15-The 27th International Symposium on Computer Architecture and High Performance Computing</title>
        <loc>Florianopolis, Santa Catarina, Brazil</loc>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01184235" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01184235</ref>
        </imprint>
        <meeting id="cid313009">
          <title>International Symposium on Computer Architecture and High Performance Computing</title>
          <num>27</num>
          <abbr type="sigle">SBAC-PAD</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid17" type="inproceedings" rend="year" n="cite:dorier:hal-01224846">
      <identifiant type="doi" value="10.1145/2828612.2828622"/>
      <identifiant type="hal" value="hal-01224846"/>
      <analytic>
        <title level="a">Lessons Learned from Building In Situ Coupling Frameworks</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="moais-2014-idp120976">
            <foreName>Matthieu</foreName>
            <surname>Dreher</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Tom</foreName>
            <surname>Peterka</surname>
            <initial>T.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="moais-2014-idp86304">
            <foreName>Bruno</foreName>
            <surname>Raffin</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Justin M.</foreName>
            <surname>Wozniak</surname>
            <initial>J. M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">First Workshop on In Situ Infrastructures for Enabling Extreme-Scale Analysis and Visualization</title>
        <loc>Austin, United States</loc>
        <imprint>
          <dateStruct>
            <month>November</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01224846" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01224846</ref>
        </imprint>
        <meeting id="cid624921">
          <title>Workshop on In Situ Infrastructures for Enabling Extreme-Scale Analysis and Visualization</title>
          <num>1</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid11" type="inproceedings" rend="year" n="cite:ibrahim:hal-01184236">
      <identifiant type="hal" value="hal-01184236"/>
      <analytic>
        <title level="a">An Eye on the Elephant in the Wild: A Performance Evaluation of Hadoop's Schedulers Under Failures</title>
        <author>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Tran Anh</foreName>
            <surname>Phuong</surname>
            <initial>T. A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ARMS-CC'15-The second workshop on Adaptive Resource Management and Scheduling for Cloud Computing, held in conjunction with PODC 2015,</title>
        <loc>Donostia-San Sebastián, Spain</loc>
        <imprint>
          <dateStruct>
            <month>July</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01184236" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01184236</ref>
        </imprint>
        <meeting id="cid624679">
          <title>Workshop on Adaptive Resource Management and Scheduling for Cloud Computing</title>
          <num>2015</num>
          <abbr type="sigle"/>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid21" type="inproceedings" rend="year" n="cite:memishi:hal-01249151">
      <identifiant type="doi" value="10.1016/j.procs.2015.05.179"/>
      <identifiant type="hal" value="hal-01249151"/>
      <analytic>
        <title level="a">Diarchy: An Optimized Management Approach for MapReduce Masters</title>
        <author>
          <persName>
            <foreName>Bunjamin</foreName>
            <surname>Memishi</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>María S.</foreName>
            <surname>Pérez-Hernández</surname>
            <initial>M. S.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">ICCS 2015: Proceedings of the International Conference on Computational Science, Computational Science at the Gates of Nature</title>
        <loc>Reykjavík, Iceland</loc>
        <imprint>
          <dateStruct>
            <month>June</month>
            <year>2015</year>
          </dateStruct>
          <biblScope type="pages">9–18</biblScope>
          <ref xlink:href="https://hal.archives-ouvertes.fr/hal-01249151" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>archives-ouvertes.<allowbreak/>fr/<allowbreak/>hal-01249151</ref>
        </imprint>
        <meeting id="cid115862">
          <title>International Conference on Computational Science</title>
          <num>15</num>
          <abbr type="sigle">ICCS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid12" type="inproceedings" rend="year" n="cite:phan:hal-01238055">
      <identifiant type="hal" value="hal-01238055"/>
      <analytic>
        <title level="a">On Understanding the Energy Impact of Speculative Execution in Hadoop</title>
        <author>
          <persName key="kerdata-2014-idp80104">
            <foreName>Tien-Dat</foreName>
            <surname>Phan</surname>
            <initial>T.-D.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp65432">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">GreenCom'15-The 2015 IEEE International Conference on Green Computing and Communications</title>
        <loc>Sydney, Australia</loc>
        <imprint>
          <dateStruct>
            <month>December</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01238055" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01238055</ref>
        </imprint>
        <meeting id="cid402133">
          <title>IEEE/ACM International Conference on Green Computing and Communications (GreenCom)</title>
          <num>2015</num>
          <abbr type="sigle">GREENCOM</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid7" type="inproceedings" rend="year" n="cite:pinedamorales:hal-01239150">
      <identifiant type="doi" value="10.1109/CLUSTER.2015.49"/>
      <identifiant type="hal" value="hal-01239150"/>
      <analytic>
        <title level="a">Towards Multi-site Metadata Management for Geographically Distributed Cloud Workflows</title>
        <author>
          <persName>
            <foreName>Luis</foreName>
            <surname>Pineda-Morales</surname>
            <initial>L.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">CLUSTER 2015 - IEEE International Conference on Cluster Computing</title>
        <loc>Chicago, United States</loc>
        <imprint>
          <dateStruct>
            <month>September</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01239150" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01239150</ref>
        </imprint>
        <meeting id="cid81665">
          <title>IEEE International Conference on Cluster Computing</title>
          <num>2015</num>
          <abbr type="sigle">Cluster</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid8" type="inproceedings" rend="year" n="cite:roman:hal-01239140">
      <identifiant type="hal" value="hal-01239140"/>
      <analytic>
        <title level="a">Understanding Spark Performance in Hybrid and Multi-Site Clouds</title>
        <author>
          <persName key="kerdata-2015-idp116256">
            <foreName>Roxana-Ioana</foreName>
            <surname>Roman</surname>
            <initial>R.-I.</initial>
          </persName>
          <persName>
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">6th International Workshop on Big Data Analytics: Challenges and Opportunities (BDAC-15)</title>
        <loc>Austin, TX, United States</loc>
        <imprint>
          <dateStruct>
            <month>November</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01239140" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01239140</ref>
        </imprint>
        <meeting id="cid625032">
          <title>International Workshop on Big Data Analytics: Challenges and Opportunities</title>
          <num>6</num>
          <abbr type="sigle">BDAC</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid9" type="inproceedings" rend="year" n="cite:yildiz:hal-01203001">
      <identifiant type="hal" value="hal-01203001"/>
      <analytic>
        <title level="a">Chronos: Failure-Aware Scheduling in Shared Hadoop Clusters</title>
        <author>
          <persName key="kerdata-2014-idp81448">
            <foreName>Orcun</foreName>
            <surname>Yildiz</surname>
            <initial>O.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Tran Anh</foreName>
            <surname>Phuong</surname>
            <initial>T. A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-scientific-popularization="no" x-international-audience="yes" x-proceedings="yes" x-invited-conference="no" x-editorial-board="yes">
        <title level="m">BigData'15-The 2015 IEEE International Conference on Big Data</title>
        <loc>Santa Clara, CA, United States</loc>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01203001" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01203001</ref>
        </imprint>
        <meeting id="cid624205">
          <title>IEEE International Conference on Big Data</title>
          <num>2015</num>
          <abbr type="sigle">IEEE BigData</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid19" type="techreport" rend="year" n="cite:dorier:hal-01149941">
      <identifiant type="hal" value="hal-01149941"/>
      <monogr>
        <title level="m">On the Use of Formal Grammars to Predict HPC I/O Behaviors</title>
        <author>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="kerdata-2014-idp64192">
            <foreName>Shadi</foreName>
            <surname>Ibrahim</surname>
            <initial>S.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp84024">
            <foreName>Rob</foreName>
            <surname>Ross</surname>
            <initial>R.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RR-8725</biblScope>
          <publisher>
            <orgName type="institution">ENS Rennes ; Inria Rennes Bretagne Atlantique ; Argonne National Laboratory ; Inria</orgName>
          </publisher>
          <dateStruct>
            <month>August</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01149941" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01149941</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Research Report</note>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid18" type="techreport" rend="year" n="cite:lebre:hal-01203648">
      <identifiant type="hal" value="hal-01203648"/>
      <monogr>
        <title level="m">The DISCOVERY Initiative - Overcoming Major Limitations of Traditional Server-Centric Clouds by Operating Massively Distributed IaaS Facilities</title>
        <author>
          <persName key="ascola-2014-idm25552">
            <foreName>Adrien</foreName>
            <surname>Lebre</surname>
            <initial>A.</initial>
          </persName>
          <persName key="ascola-2014-idp105552">
            <foreName>Jonathan</foreName>
            <surname>Pastor</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>.</foreName>
            <surname>The DISCOVERY Consortium</surname>
            <initial>.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RR-8779</biblScope>
          <publisher>
            <orgName type="institution">Inria</orgName>
          </publisher>
          <dateStruct>
            <month>September</month>
            <year>2015</year>
          </dateStruct>
          <biblScope type="pages">14</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01203648" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01203648</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Research Report</note>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid23" type="techreport" rend="year" n="cite:matri:hal-01256563">
      <identifiant type="hal" value="hal-01256563"/>
      <monogr>
        <title level="m">Týr: Efficient Transactional Storage for Data-Intensive Applications</title>
        <author>
          <persName key="kerdata-2014-idp82720">
            <foreName>Pierre</foreName>
            <surname>Matri</surname>
            <initial>P.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Jesús</foreName>
            <surname>Montes</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>María</foreName>
            <surname>Pérez</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <biblScope type="number">RT-0473</biblScope>
          <publisher>
            <orgName type="institution">Inria Rennes Bretagne Atlantique ; Universidad Politécnica de Madrid</orgName>
          </publisher>
          <dateStruct>
            <month>January</month>
            <year>2016</year>
          </dateStruct>
          <biblScope type="pages">25</biblScope>
          <ref xlink:href="https://hal.inria.fr/hal-01256563" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01256563</ref>
        </imprint>
      </monogr>
      <note type="typdoc">Technical Report</note>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid20" type="misc" rend="year" n="cite:pinedamorales:hal-01241718">
      <identifiant type="hal" value="hal-01241718"/>
      <monogr x-scientific-popularization="no" x-editorial-board="no" x-international-audience="yes" x-proceedings="no" x-invited-conference="no">
        <title level="m">Scaling Smart Appliances for Spatial Data Synthesis</title>
        <author>
          <persName>
            <foreName>Luis</foreName>
            <surname>Pineda-Morales</surname>
            <initial>L.</initial>
          </persName>
          <persName>
            <foreName>Balaji</foreName>
            <surname>Subramaniam</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Kate</foreName>
            <surname>Keahey</surname>
            <initial>K.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp66888">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="crypt-2014-idp82280">
            <foreName>Shaowen</foreName>
            <surname>Wang</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Anand</foreName>
            <surname>Padmanabhan</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Aiman</foreName>
            <surname>Soliman</surname>
            <initial>A.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <month>November</month>
            <year>2015</year>
          </dateStruct>
          <ref xlink:href="https://hal.inria.fr/hal-01241718" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">https://<allowbreak/>hal.<allowbreak/>inria.<allowbreak/>fr/<allowbreak/>hal-01241718</ref>
        </imprint>
      </monogr>
      <note type="howpublished">SC15 - ACM/IEEE International Conference in Supercomputing</note>
      <note type="bnote">Poster</note>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid1" type="misc" rend="foot" n="footcite:AmazonMapReduce">
      <monogr>
        <title level="m">Amazon Elastic MapReduce</title>
        <imprint>
          <dateStruct>
            <year>2010</year>
          </dateStruct>
          <ref xlink:href="http://aws.amazon.com/elasticmapreduce/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>aws.<allowbreak/>amazon.<allowbreak/>com/<allowbreak/>elasticmapreduce/</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid0" type="article" rend="foot" n="footcite:mapreduce">
      <analytic>
        <title level="a">MapReduce: simplified data processing on large clusters</title>
        <author>
          <persName>
            <foreName>Jeffrey</foreName>
            <surname>Dean</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Sanjay</foreName>
            <surname>Ghemawat</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Communications of the ACM</title>
        <imprint>
          <biblScope type="volume">51</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <year>2008</year>
          </dateStruct>
          <biblScope type="pages">107–113</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid3" type="misc" rend="foot" n="footcite:eesi">
      <monogr>
        <title level="m">European Exascale Software Initiative</title>
        <imprint>
          <dateStruct>
            <year>2013</year>
          </dateStruct>
          <ref xlink:href="http://www.eesi-project.eu" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>eesi-project.<allowbreak/>eu</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid4" type="misc" rend="foot" n="footcite:etp4hpc">
      <monogr>
        <title level="m">The European Technology Platform for High-Performance Computing</title>
        <imprint>
          <dateStruct>
            <year>2012</year>
          </dateStruct>
          <ref xlink:href="http://www.etp4hpc.eu" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>etp4hpc.<allowbreak/>eu</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid2" type="misc" rend="foot" n="footcite:iesp">
      <monogr>
        <title level="m">International Exascale Software Program</title>
        <imprint>
          <dateStruct>
            <year>2011</year>
          </dateStruct>
          <ref xlink:href="http://www.exascale.org/iesp/Main_Page" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http://<allowbreak/>www.<allowbreak/>exascale.<allowbreak/>org/<allowbreak/>iesp/<allowbreak/>Main_Page</ref>
        </imprint>
      </monogr>
    </biblStruct>
    
    <biblStruct id="kerdata-2015-bid5" type="inproceedings" rend="foot" n="footcite:nicolae:2010:inria-00456801:1">
      <analytic>
        <title level="a">BlobSeer: Bringing High Throughput under Heavy Concurrency to Hadoop Map-Reduce Applications</title>
        <author>
          <persName>
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Diana</foreName>
            <surname>Moise</surname>
            <initial>D.</initial>
          </persName>
          <persName key="kerdata-2014-idp62712">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="kerdata-2014-idp65432">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
          <persName key="kerdata-2014-idp74952">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">24th IEEE International Parallel and Distributed Processing Symposium (IPDPS 2010)</title>
        <loc>Atlanta, GA, USA</loc>
        <imprint>
          <publisher>
            <orgName type="organisation">IEEE and ACM</orgName>
          </publisher>
          <dateStruct>
            <month>April</month>
            <year>2010</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="bnote">A preliminary version of this paper has been published as Inria Research Report RR-7140</note>
    </biblStruct>
  </biblio>
</raweb>
