<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE raweb PUBLIC "-//INRIA//DTD " "raweb2.dtd">
<raweb xml:lang="en" year="2010">
  <identification id="kerdata" isproject="false">
    <shortname>KerData</shortname>
    <projectName>Scalable Storage for Clouds and
    Beyond</projectName>
    <domaine-de-recherche>Networks, Systems and Services,
    Distributed Computing</domaine-de-recherche>
    <theme-de-recherche>Distributed and High Performance
    Computing</theme-de-recherche>
    <UR name="Rennes"/>
    <moreinfo>
      <p>The KerData Team has been officially created on July 1st,
      2009. It is a spinoff of the Paris Project-Team. It
      corresponds to the former “Data management” activity of the
      Paris Project-Team.</p>
    </moreinfo>
  </identification>
  <team id="uid1">
    <person key="paris-2006-idm124332495696">
      <firstname>Gabriel</firstname>
      <lastname>Antoniu</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>Chercheur</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Team leader, Junior Researcher (CR1) 
      <span class="smallcap" align="left">Inria</span></moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="paris-2006-idm124332467968">
      <firstname>Luc</firstname>
      <lastname>Bougé</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>Enseignant</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Professor, 
      <span class="smallcap" align="left">Ens Cachan</span>Brittany
      Campus</moreinfo>
      <hdr>oui</hdr>
    </person>
    <person key="paris-2007-idm243644537488">
      <firstname>Bogdan</firstname>
      <lastname>Nicolae</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo><span class="smallcap" align="left">MENRT</span>Grant until
      September 30, 2010. Then, on a temporary ACET research
      position until December 31, 2010. PhD defended on November
      30, 2010.</moreinfo>
    </person>
    <person key="paris-2008-idm188504275952">
      <firstname>Alexandra</firstname>
      <lastname>Carpen-Amarie</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo><span class="smallcap" align="left">Inria</span>CORDI-S
      Grant</moreinfo>
    </person>
    <person key="paris-2008-idm188504272032">
      <firstname>Diana</firstname>
      <lastname>Moise</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo><span class="smallcap" align="left">Inria</span>and Brittany
      Regional Council Grant</moreinfo>
    </person>
    <person key="kerdata-2009-idm140027572976">
      <firstname>Viet-Trung</firstname>
      <lastname>Tran</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo><span class="smallcap" align="left">
      MENRT</span>Grant</moreinfo>
    </person>
    <person key="kerdata-2010-idm58934898256">
      <firstname>Houssem-Eddine</firstname>
      <lastname>Chihoub</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>PhD</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>European Marie-Curie Scalus Project. Thesis started
      in October 2010.</moreinfo>
    </person>
    <person key="alf-2009-idm185360614960">
      <firstname>Maryse</firstname>
      <lastname>Fouché</lastname>
      <affiliation>INRIA</affiliation>
      <categoryPro>Assistant</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Secretary (TR) 
      <span class="smallcap" align="left">Inria</span>.</moreinfo>
    </person>
    <person key="kerdata-2010-idm58934891200">
      <firstname>Cătălin</firstname>
      <lastname>Leordeanu</lastname>
      <affiliation>UnivEtrangere</affiliation>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>PhD student, Polytechnic University of Bucharest,
      3 months, supported by our bilateral contract</moreinfo>
    </person>
    <person key="kerdata-2010-idm58934887888">
      <firstname>Eliana</firstname>
      <lastname>Tîrşa</lastname>
      <affiliation>UnivEtrangere</affiliation>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>PhD student, Polytechnic University of Bucharest,
      3 months, supported by our bilateral contract</moreinfo>
    </person>
    <person key="kerdata-2009-idm140027562000">
      <firstname>Alexandru</firstname>
      <lastname>Costan</lastname>
      <affiliation>UnivEtrangere</affiliation>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>PhD student, Polytechnic University of Bucharest,
      3 months, supported by our bilateral contract</moreinfo>
    </person>
    <person key="kerdata-2010-idm58934881424">
      <firstname>Cristina</firstname>
      <lastname>Băsescu</lastname>
      <affiliation>UnivEtrangere</affiliation>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Master student, Polytechnic University of
      Bucharest, 3 months, supported by the INRIA Internship
      program</moreinfo>
    </person>
    <person key="kerdata-2010-idm58934878112">
      <firstname>Sînziana</firstname>
      <lastname>Mazilu</lastname>
      <affiliation>UnivEtrangere</affiliation>
      <categoryPro>Visiteur</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Master student, Polytechnic University of
      Bucharest, 4 months, supported by the INRIA Internship
      program</moreinfo>
    </person>
    <person key="kerdata-2010-idm58934874848">
      <firstname>Tuan-Viet</firstname>
      <lastname>Dinh</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Master Intern, ENS Cachan, supported by a grant of
      the ENS Cachan International Program</moreinfo>
    </person>
    <person key="kerdata-2010-idm58934871744">
      <firstname>Thi-Thu-Lan</firstname>
      <lastname>Trieu</lastname>
      <affiliation>UnivFr</affiliation>
      <categoryPro>AutreCategorie</categoryPro>
      <research-centre>Rennes</research-centre>
      <moreinfo>Master Intern, ENS Cachan</moreinfo>
    </person>
  </team>
  <presentation id="uid2">
    <bodyTitle>Overall Objectives</bodyTitle>
    <subsection id="uid3" level="1">
      <bodyTitle>Introduction</bodyTitle>
      <p spacebefore="36.0pt">More and more applications today
      generate and handle very large volumes of data on a regular
      basis. Such applications are called data-intensive.
      Governmental and commercial statistics, climate modeling,
      cosmology, genetics, bio-informatics, high-energy physics are
      just a few examples of fields where it becomes crucial to
      efficiently manipulate massive data, which are typically 
      <i>shared</i>at a large scale. With the emergence of the
      recent infrastructures (cloud computing platforms,
      post-Petascale architectures), achieving highly scalable data
      management is a critical challenge, as the overall
      application performance is highly dependent on the properties
      of the data management service.</p>
      <subsection id="id59887" level="2">
        <bodyTitle>Cloud data management</bodyTitle>
        <p>On Infrastructure-as-a-Service (IaaS) cloud
        infrastructures, computing resources are exploited on a
        per-need basis: instead of buying and managing hardware,
        users rent virtual machines and storage space. One
        important issue is thus the support for storing and
        processing data on externalized, virtual storage resources.
        Such needs require simultaneous investigation of important
        aspects related to performance, scalability, security and
        quality of service. Moreover, the impact of physical
        resource sharing also needs careful consideration.</p>
      </subsection>
      <subsection id="id59896" level="2">
        <bodyTitle>Data management for Post-Petascale
        systems</bodyTitle>
        <p>In parallel with the emergence of cloud infrastructures,
        considerable efforts are now under way to build 
        <i>Petascale computing systems</i>, such as Blue Waters ( 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.ncsa.illinois.edu/BlueWaters/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
        <allowbreak/>www. 
        <allowbreak/>ncsa. 
        <allowbreak/>illinois. 
        <allowbreak/>edu/ 
        <allowbreak/>BlueWaters/ 
        <allowbreak/></ref>). Such systems aim to provide
        sustained Petaflop performance to a much wider spectrum of
        science and engineering applications. On such
        infrastructures, data management is again a critical issue
        with a high impact on the application performance. Such
        supercomputers exhibit specific architectural features
        (e.g., a multi-level memory hierarchy scalable to tens to
        hundreds of thousands of cores) that are specifically
        designed to support a high degree of parallelism. In order
        to keep up with such advances, the storage service has to
        scale accordingly, which is clearly challenging.</p>
        <p>Our research activities address the area of distributed
        data management at challenging scales on various
        distributed systems, with a particular focus on 
        <i>clouds</i>, and 
        <i>Post-Petascale infrastructures</i>. We target
        data-oriented high-performance applications that exhibit
        the need to handle massive non structured data - BLOBs:
        binary large objects (in the order of Terabytes) - stored
        in a large number of nodes (thousands to tens of
        thousands), accessed under heavy concurrency by a large
        number of clients (thousands to tens of thousands at a
        time) with a relatively fine access grain (in the order of
        Megabytes). Examples of such applications are:</p>
        <simplelist>
          <li id="uid4">
            <p noindent="true">Cloud data-mining applications
            (e.g., based on the MapReduce paradigm) handling
            massive data distributed at a large scale.</p>
          </li>
          <li id="uid5">
            <p noindent="true">Advanced (e.g.,
            concurrency-optimized, versioning-oriented) cloud
            services both for user-level data storage and for
            virtual machine image storage and management at IaaS
            level.</p>
          </li>
          <li id="uid6">
            <p noindent="true">Distributed storage for Petaflop
            computing applications.</p>
          </li>
          <li id="uid7">
            <p noindent="true">Data storage for desktop grid
            applications with high write throughput
            requirements.</p>
          </li>
        </simplelist>
      </subsection>
    </subsection>
    <subsection id="uid8" level="1">
      <bodyTitle>Highlights</bodyTitle>
      <descriptionlist>
        <label>Team leadership.</label>
        <li id="uid9">
          <p noindent="true">The KerData Team is led by
          G. Antoniu since July 2010. His mission is to submit
          a proposal to become a fully-fledged Project-Team within
          the year.</p>
        </li>
        <label>ANR Project with Argonne National Lab, UIUC and
        IBM.</label>
        <li id="uid10">
          <p noindent="true">A new project, led by G. Antoniu,
          has been accepted by the ANR ARPEGE 2010 Program on
          embedded systems and large infrastructures. This project
          is devoted to using MapReduce programming paradigm on
          clouds and hybrid infrastructures.</p>
        </li>
        <label>INRIA-Microsoft Project.</label>
        <li id="uid11">
          <p noindent="true">A new project, led by G. Antoniu
          and B. Thirion (Parietal Project-Team, 
          <span class="smallcap" align="left">Inria Saclay –
          Île-de-France</span>), has started in collaboration with
          Microsoft Research. This project conducted within the
          framework the Microsoft Research - INRIA Joint Research
          Center involves Microsoft's 
          <i>Azure</i>cloud computing platform.</p>
        </li>
        <label>New Associate Team created.</label>
        <li id="uid12">
          <p noindent="true">A new Associate Team led by
          G. Antoniu (DataCloud@work, 
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.irisa.fr/kerdata/doku.php?id=cloud_at_work:start" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>www. 
          <allowbreak/>irisa. 
          <allowbreak/>fr/ 
          <allowbreak/>kerdata/ 
          <allowbreak/>doku. 
          <allowbreak/>php?id=cloud_at_work:start</ref>) was
          created in 2010, in partnership with the Politehnica
          University of Bucharest and with the MYRIADS INRIA
          Team.</p>
        </li>
        <label>Partnership with the INRIA-UIUC Joint
        Laboratory.</label>
        <li id="uid13">
          <p noindent="true">We have set up a partnership with the
          INRIA-UIUC Joint Laboratory for Petascale Computing at
          Urbana-Champaign. Several mutual visits and internships
          were organized in this framework and numerous
          collaborations are on track in the context of the Blue
          Waters Project, expected to become one of the world's
          most powerful supercomputers when it comes online in
          2011, with sustained Petaflop performance.</p>
        </li>
        <label>TCPP Best PhD Poster Award.</label>
        <li id="uid14">
          <p noindent="true">It has been awarded to B. Nicolae
          at the IPDPS 2010 conference in Atlanta, GA, USA.</p>
        </li>
        <label>ENS-INRIA Prize of excellence.</label>
        <li id="uid15">
          <p noindent="true">G. Antoniu and L. Bougé have
          initiated the proposal and creation of the ENS-INRIA
          Prize of excellence. It is targeted to Romanian
          high-school students who won the National Olympiad of
          Informatics. A first group of 5 students have been hosted
          in Rennes and Paris in June 2010. They visited the two
          sites of ENS Cachan (Cachan and Bruz) and two INRIA
          centers ( 
          <span class="smallcap" align="left">Inria Rennes –
          Bretagne Atlantique</span>and 
          <span class="smallcap" align="left">Inria Paris –
          Rocquencourt</span>).</p>
        </li>
        <label>1 PhD thesis defended.</label>
        <li id="uid16">
          <p noindent="true">The first PhD thesis of the KerData
          Team (B. Nicolae) was defended on 30 November
          2010.</p>
        </li>
      </descriptionlist>
    </subsection>
  </presentation>
  <fondements id="uid17">
    <bodyTitle>Scientific Foundations</bodyTitle>
    <subsection id="uid18" level="1">
      <bodyTitle>Introduction</bodyTitle>
      <p>Managing data at large scales is paramount nowadays.
      Governmental and commercial statistics, climate modeling,
      cosmology, genetics, bio-informatics, etc. are just a few
      examples of fields routinely generating huge amounts of data.
      It becomes crucial to efficiently manipulate these data,
      which are typically shared at the global scale. In such a
      context, one important goal is to provide mechanisms allowing
      to manage massive data blocks (e.g., of several terabytes),
      while providing efficient fine-grain access to small parts of
      the data. Several application areas exhibit such a need for
      efficient scaling to huge data sizes: data mining
      applications  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid0" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, multimedia applications  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid1" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, database-oriented
      applications ( 
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid2" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid3" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid4" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>), bioinformatic applications,
      etc.</p>
    </subsection>
    <p>.</p>
    <subsection id="uid19" level="1">
      <bodyTitle>Transparent, distributed data sharing</bodyTitle>
      <p>The management of massive data blocks naturally requires
      the use of data fragmentation and of distributed storage.
      Grid infrastructures, typically built by aggregating
      distributed resources that may belong to different
      administration domains, were built during the last years with
      the goal of providing an appropriate solution. When
      considering the existing approaches to grid data management,
      we can notice that most of them heavily rely on 
      <i>explicit</i>data localization and on 
      <i>explicit</i>transfers of large amounts of data across the
      distributed architecture: GridFTP  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid5" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, LDR  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid6" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, Chirp  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid7" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, IBP  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid8" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, NeST  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid9" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, etc. Managing huge amounts of
      data in such an explicit way at a very large scale makes the
      design of grid application much more complex. One key issue
      to be addressed is therefore the 
      <i>transparency</i>with respect to data localization and data
      movements. Such a transparency is highly suitable, as it
      liberates the user from the need to handle data localization
      and transfers.</p>
      <p>Several approaches to grid data management acknowledge
      that providing a transparent data access model is important.
      They integrate this idea at the early stages of their design.
      
      <i>Grid file systems</i>, for instance, provide a familiar,
      file-oriented API allowing to transparently access physically
      distributed data through globally unique, logical file paths.
      The applications simply open and access such files as if they
      were stored on a local file system. A very large distributed
      storage space is thus made available to those existing
      applications that usually use file storage, with no need for
      modifications. This approach has been taken by a few projects
      like GFarm  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid10" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, GridNFS  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid11" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, LegionFS  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid12" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, etc.</p>
      <p>On the other hand, the transparent data access model is
      equally defended by the concept of 
      <i>grid data-sharing service</i>  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid13" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, illustrated for instance by the
      JuxMem platform  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid14" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. Such a service provides the
      grid applications with the abstraction of a globally shared
      memory, in which data can be easily stored and accessed
      through global identifiers. To meet this goal, the design of
      JuxMem leverages the strengths of several building blocks:
      consistency protocols inspired by Distributed Shared Memory
      (DSM) systems; algorithms for fault-tolerant distributed
      systems; protocols for scalability and volatility support
      from peer-to-peer (P2P) systems.</p>
    </subsection>
    <subsection id="uid20" level="1">
      <bodyTitle>Managing massive unstructured data under heavy
      concurrency on large-scale distributed
      infrastructures</bodyTitle>
      <subsection id="id60554" level="2">
        <bodyTitle>Massive unstructured data: BLOBs</bodyTitle>
        <p>Studies show more than 80%  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid15" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>of data globally in circulation
        is unstructured. On the other hand, data sizes increase at
        a dramatic level with more than 1 TB of data gathered
        per week in common scenarios for some production
        applications (e.g., medical experiments  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid16" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>). Finally, on Post-Petascale
        HPC machines, the use of huge storage objects is also
        currently being considered as a promising alternative to
        today's dominant approaches to data management. Indeed,
        these approaches rely on very large numbers of small files,
        and using huge storage objects reduces the corresponding
        metadata overhead of the file system. Such huge
        unstructured data are stored as 
        <i>binary large objects (BLOBs)</i>that may continuously be
        updated by applications. However, traditional databases or
        file systems can hardly cope in an efficient way with BLOBs
        which grow to huge sizes.</p>
      </subsection>
      <subsection id="id60615" level="2">
        <bodyTitle>Scalable processing of massive data: heavy
        access concurrency</bodyTitle>
        <p>To address the scalability issue, specialized
        abstractions like MapReduce  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid17" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>and Pig-Latin  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid18" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>propose high-level data
        processing frameworks intended to hide the details of
        parallelization from the user. Such platforms are
        implemented on top of huge object storage platforms. They
        target high performance by optimizing the parallel
        execution of the computation. This leads to 
        <i>heavy access concurrency</i>to the BLOBs, thus the need
        for the storage layer to offer support in this regard.
        Parallel and distributed file systems also consider using
        objects for low-level storage (see next subsection  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid19" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid20" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid21" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>). In other application areas,
        huge BLOBs need to be used concurrently at the highest
        level layers of applications directly: high-energy physics,
        multimedia processing  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid1" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>or astronomy.</p>
      </subsection>
      <subsection id="id60736" level="2">
        <bodyTitle>Versioning</bodyTitle>
        <p>When addressing the problem of storing and efficiently
        accessing very large unstructured data objects  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid22" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid16" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>in a distributed environment, a
        challenging case is the one where data is 
        <i>mutable</i>and potentially accessed by a very large
        number of concurrent, distributed processes. In this
        context, 
        <i>versioning</i>is an important feature. Not only it
        allows to roll back data changes when desired, but it also
        enables cheap branching (possibly recursively): the same
        computation may proceed independently on different versions
        of the BLOB. Versioning should obviously not impact access
        performance to the object significantly, given that objects
        are under constant heavy access concurrency. On the other
        hand, versioning leads to increased storage space usage and
        becomes a major concern when the data size itself is huge.
        Versioning efficiency thus refers to both access
        performance under heavy load and reasonably acceptable
        overhead of storage space.</p>
      </subsection>
    </subsection>
    <subsection id="uid21" level="1">
      <bodyTitle>Towards scalable, BLOB-based distributed file
      systems</bodyTitle>
      <p>Recent research  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid23" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> emphasizes a clear move
      currently in progress from a block-based interface to a
      object-based interface in storage architectures. The goal is
      to enable scalable, self-managed storage networks by moving
      low-level functionalities such as space management to storage
      devices or to storage server, accessed through a standard
      object interface. This move has a direct impact on the design
      of today's distributed file systems: object-based file system
      would then store data rather as objects than as unstructured
      data blocks. According to  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid23" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, this move may eliminate nearly
      90% of management workload which was the major obstacle
      limiting file systems' scalability and performance.</p>
      <p>Two approaches exploit this idea. In the first approach,
      the data objects are stored and manipulated directly by a new
      type of storage device called 
      <i>object-based storage device</i>(OSD). This approach
      requires an evolution of the hardware, in order to allow
      high-level object operations to be delegated to the storage
      device. The standard OSD interface was defined in the Storage
      Networking Industry Association (SNIA) OSD working group. The
      protocol is embodied over SCSI and defines a new set of SCSI
      commands. Recently, a second generation of the command set,
      Object-Based Storage Devices - 2 (OSD-2) has been defined.
      The distributed file systems taking the OSD approach assume
      the presence of such an OSD in the near future and currently
      rely on a software module simulating its behavior. Examples
      of parallel/distributed file systems following this approach
      are Lustre  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid24" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> and Ceph  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid20" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>. Recently, research
      efforts  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid19" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/> have explored the
      feasibility and the possible benefits of integrating OSDs
      into parallel file systems, such as PVFS  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid25" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      <p>The second approach does not rely on the presence of OSDs,
      but still tries to benefit from an object-based approach to
      improve performance and scalability: files are structured as
      a set of objects that are stored on storage servers. Google
      File System  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid21" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, and HDFS ( 
      <i>Hadoop File System</i>)  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid26" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>illustrate this approach.</p>
    </subsection>
    <subsection id="uid22" level="1">
      <bodyTitle>Emerging large-scale infrastructures for
      distributed applications</bodyTitle>
      <p>During the last few years, research and development in the
      area of large-scale distributed computing led to the clear
      emergence of several types of physical execution
      infrastructures for large-scale distributed applications.</p>
      <subsection id="id60956" level="2">
        <bodyTitle>Cloud computing infrastructures</bodyTitle>
        <p>The cloud computing model 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid27" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid28" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid29" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>is gaining serious interest
        from both industry and academia in the area of large-scale
        distributed computing. It provides a new paradigm for
        managing computing resources: instead of buying and
        managing hardware, users rent virtual machines and storage
        space. Various cloud software stacks have been proposed by
        leading industry companies, like Google, Amazon or Yahoo!.
        They aim at providing fully configurable virtual machines
        or virtual storage (IaaS: 
        <i>Infrastructure-as-a-Service</i>), higher-level services
        including programming environments such as MapReduce 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid17" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>( 
        <i>PaaS: Platform-as-a-Service</i>
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid30" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid31" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) or community-specific
        applications ( 
        <i>SaaS: Software-as-a-Service</i>
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid32" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid33" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>). On the academic side, two of
        the most visible projects in this area are Nimbus 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid34" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid35" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>from the Argonne National Lab
        (USA) and OpenNebula 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid36" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, which aim at providing a
        reference implementation for a IaaS. In parallel to these
        trends, other research efforts focused on the concept of
        grid operating system: a distributed operating system for
        large-scale wide-area dynamic infrastructure spanning
        multiple administrative domains. XtreemOS 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid37" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid38" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>is such a grid operating
        system, which provides native support for virtual
        organizations. Since both the cloud approach and the grid
        operating system approach deal with resource management on
        large-scale distributed infrastructures, the relative
        positioning of these two approaches with respect to each
        other are currently subject to on-going investigation
        within the PARIS/MYRIADS Project-Team ( 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.irisa.fr/myriads/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
        <allowbreak/>www. 
        <allowbreak/>irisa. 
        <allowbreak/>fr/ 
        <allowbreak/>myriads/ 
        <allowbreak/></ref>) at 
        <span class="smallcap" align="left">Inria Rennes – Bretagne
        Atlantique</span>
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid39" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
        <p>In the context of the emerging cloud infrastructures,
        some of the most critical open issues relate to data
        management. Providing the users with the possibility to
        store and process data on externalized, virtual resources
        from the cloud requires simultaneously investigating
        important aspects related to security, efficiency and
        quality of service. To this purpose, it clearly becomes
        necessary to create mechanisms able to provide feedback
        about the state of the storage system along with the
        underlying physical infrastructure. The information thus
        monitored, can further be fed back into the storage system
        and used by self-managing engines, in order to enable an
        autonomic behavior  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid40" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid41" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid42" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, possibly with several goals
        such as self-configuration, self-optimization, or
        self-healing. Exploring ways to address the main challenges
        raised by data storage and management on cloud
        infrastructures is the major factor that motivated the
        creation of the KerData research team 
        <span class="smallcap" align="left">Inria Rennes – Bretagne
        Atlantique</span>. These topics are at the heart of our
        involvement in several projects that we are leading in the
        area of cloud storage: MapReduce (see Section  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid51" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>), AzureBrain (see
        Section  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid48" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>), DataCloud@work (see
        Section  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid52" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>).</p>
      </subsection>
      <subsection id="id61347" level="2">
        <bodyTitle>Petascale infrastructures</bodyTitle>
        <p>In 2011, a new NSF-funded Petascale computing system,
        Blue Waters, will go online at the University of Illinois.
        Blue Waters is expected to be the most powerful
        supercomputer in the world for open scientific research
        when it comes online. It will be the first system of its
        kind to sustain one-Petaflop performance on a range of
        science and engineering applications. The goal of this
        facility is to open up new possibilities in science and
        engineering. It provides unheard computational capability.
        It makes it possible for investigators to tackle much
        larger and more complex research challenges across a wide
        spectrum of domains: predict the behavior of complex
        biological systems, understand how the cosmos evolved after
        the Big Bang, design new materials at the atomic level,
        predict the behavior of hurricanes and tornadoes, and
        simulate complex engineered systems like the power
        distribution system and airplanes and automobiles.</p>
        <p>To reach sustained-Petascale performance, machines like
        Blue Waters relies on advanced, dedicated technologies at
        several levels: processor, memory subsystem, interconnect,
        operating system, programming environment, system
        administration tools. In this context, data management is
        again a critical issue that highly impacts the application
        behavior and its overall performance. Petascale
        supercomputers exhibit specific architectural features
        (e.g., a multi-level memory hierarchy scalable to tens to
        hundreds of thousands of codes) that needs to be
        specifically taken into account. Providing scalable data
        throughput on such unprecedented scales is clearly an open
        challenge today. In this context, we are investigating
        techniques to achieve concurrency-optimized I/O in
        collaboration with teams from the National Center for
        Supercomputing Applications (NCSA/UIUC) in the framework of
        the Joint INRIA-UIUC for Petascale Computing (see
        Section  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid58" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>).</p>
      </subsection>
      <subsection id="id61373" level="2">
        <bodyTitle>Desktop grids</bodyTitle>
        <p>During the recent years, Desktop grids have been
        extensively investigated as an efficient way to build
        cheap, large-scale virtual supercomputers by gathering idle
        resources from a very large number of users. A possible
        approach is to rely on clusters of workstations belonging
        to institutions and interconnected through dedicated,
        high-throughput wide-area interconnect, which is the
        typical physical infrastructure for Grid Computing. In
        contrast, Desktop grids rely on desktop computers from
        individual users, interconnected through Internet, provided
        by 
        <i>volunteer users</i>. The initial, widely-spread usage of
        Desktop grids for parallel applications consisting in
        non-communicating tasks with small input/output parameters
        is a direct consequence of the physical infrastructure.
        Actually, volatile nodes and low bandwidth are not suitable
        for communication-intensive parallel applications with high
        input or output requirements. However, the increasing
        popularity of volunteer computing projects has
        progressively lead to enlarge the set of application
        classes that might benefit of Desktop Grid infrastructures.
        If we consider distributed applications where tasks need
        very large input data, it is no longer feasible to rely on
        regular centralized server-based Desktop Grid
        architectures. Actually, the input data is there typically
        embedded in the job description and sent to workers. Such a
        strategy could lead to significant bottlenecks as the
        central server gets overwhelmed by download requests. To
        cope with such data-intensive applications, alternative
        approaches based on P2P techniques and Content Distribution
        Networks  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid43" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>have been proposed, with the
        goal of offloading the transfer of the input data from the
        central servers to the other nodes participating to the
        system, with potentially under-used bandwidth.</p>
        <p>In the general case, Desktop Grids rely on resources
        contributed by volunteers. Enterprise Desktop Grids are a
        particular case of Desktop Grids leveraging unused
        processing cycles and storage space available within the
        enterprise. The emergence of cloud infrastructures has
        opened new perspectives to the development of Desktop
        Grids, as new types of usage may benefit from a 
        <i>hybrid</i>, simultaneous use of these two types of
        infrastructures. In a typical scenario of this kind, an
        enterprise would not use dedicated, on-site hardware
        resources for a particular need for data-intensive
        analysis, e.g., to process commercial statistics. It would
        rather rely on free unused internal resources using the
        Enterprise Desktop Grid model, and, in extension to them,
        would rent resources from the cloud. Both architectures are
        suitable for massively parallel processing and this is why
        we intend to explore the potential advantages of using such
        hybrid infrastructures in the framework of the MapReduce
        project (see Section  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid51" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>).</p>
      </subsection>
    </subsection>
    <subsection id="uid23" level="1">
      <bodyTitle>Emerging programming models for scalable
      data-management</bodyTitle>
      <p>MapReduce is a parallel programming paradigm successfully
      used by large Internet service providers to perform
      computations on massive amounts of data. A computation takes
      a set of input key/value pairs, and produces a set of output
      key/value pairs. The user of a MapReduce library expresses
      the computation as two functions: 
      <i>map</i>, that processes a key/value pair to generate a set
      of intermediate key/value pairs, and 
      <i>reduce</i>, that merges all intermediate values associated
      with the same intermediate key. The framework takes care of
      splitting the input data, scheduling the jobs' component
      tasks, monitoring them and re-executing the failed ones.
      After being strongly promoted by Google, it has also been
      implemented by the open source community through the Hadoop
      project, maintained by the Apache Foundation and supported by
      Yahoo! and even by Google itself. This model is currently
      getting more and more popular as a solution for rapid
      implementation of distributed data-intensive applications.
      The key strength of the MapReduce model is its inherently
      high degree of potential parallelism that should enable
      processing of Petabytes of data in a couple of hours on large
      clusters consisting of several thousand nodes.</p>
      <p>At the core of the MapReduce frameworks stays a key
      component: the storage layer. To enable massively parallel
      data processing to a high degree over a large number of
      nodes, the storage layer must meet a series of specific
      requirements. Firstly, since data is stored in huge files,
      the computation will have to efficiently process small parts
      of these huge files concurrently. Thus, the storage layer is
      expected to provide efficient 
      <i>fine-grain access</i>to the files. Secondly, the storage
      layer must be able to sustain a 
      <i>high throughput</i>in spite of 
      <i>heavy access concurrency</i>to the same file, as thousands
      of clients simultaneously access data.</p>
      <p>These critical needs of data-intensive distributed
      applications have not been addressed by classical,
      POSIX-compliant distributed file systems. Therefore,
      specialized file systems have been designed, such as HDFS,
      the default storage layer of Hadoop. HDFS has however some
      difficulties in sustaining a high throughput in the case of
      concurrent accesses to the same file. Amazon's cloud
      computing initiative, Elastic MapReduce, employs Hadoop on
      their Elastic Compute Cloud infrastructure (EC2) and inherits
      these limitations. The storage back-end used by Hadoop is
      Amazon's Simple Storage Service (S3), which provides limited
      support for concurrent accesses to shared data. Moreover,
      many desirable features are missing altogether, such as the
      support for versioning and for concurrent updates to the same
      file. Finally, another important requirement for the storage
      layer is its ability to expose an interface that enables the
      application to be 
      <i>data-location aware</i>. This is critical in order to
      allow the scheduler to use this information to place
      computation tasks close to the data and thus reduce network
      traffic, contributing to a better global data throughput.
      These topics are at the core of KerData's contribution to the
      MapReduce ANR project and to the Hemera large wingspan
      project (both started in 2010, see Section  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid51" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>).</p>
    </subsection>
  </fondements>
  <domaine id="uid24">
    <bodyTitle>Application Domains</bodyTitle>
    <subsection id="uid25" level="1">
      <bodyTitle>Introduction</bodyTitle>
      <p>The research carried out within the KerData team targets
      applications that handle massive data that are fragmented,
      distributed, shared and accessed under heavy concurrency at a
      large scale.</p>
      <simplelist>
        <li id="uid26">
          <p noindent="true">Massively parallel data-mining
          applications (e.g., MapReduce-based data analysis).</p>
        </li>
        <li id="uid27">
          <p noindent="true">Advanced PaaS-level cloud data
          services requiring efficient data sharing under heavy
          concurrency.</p>
        </li>
        <li id="uid28">
          <p noindent="true">I/O-intensive scientific simulations
          for Post-Petascale infrastructures.</p>
        </li>
        <li id="uid29">
          <p noindent="true">Desktop grid applications with high
          write throughput requirements.</p>
        </li>
      </simplelist>
      <p>In the current projects started in 2010 we specifically
      work on providing concurrency-optimized data storage and
      management for the following applications.</p>
    </subsection>
    <subsection id="uid30" level="1">
      <bodyTitle>Structural protein analysis on clouds based on
      MapReduce: SuMo and MED-SuMo</bodyTitle>
      <p>In the framework of the MapReduce ANR project lead by
      KerData (started in October 2010) we will validate our
      techniques for concurrency-optimized data management with an
      application study from the bioinformatics field. It will
      focus on the SuMo application proposed by Institute for
      Biology and Chemistry of the Proteins from Lyon (a partner of
      the MapReduce project). This application performs structural
      protein analysis by comparing a set of protein structures
      against a very large set of structures stored in a huge
      database. This is a typical data-intensive application that
      can leverage the MapReduce model for a scalable execution on
      large-scale distributed platforms.</p>
      <p>If the results are convincing, then they can immediately
      be applied to the derivative version of this application for
      drug design in industrial context called MED-SuMo, managed by
      the MEDIT SME (also a partner of this project). Regarding
      pharmaceutical and biotech industries, such a scalable
      implementation run over a cloud computing facility opens new
      perspectives for drug design. Rather than searching for 3D
      similarity into biostructural data, it will become possible
      to classify the entire biostructural space and to
      periodically update all derivative predictive models with new
      experimental data. The applications of that complete
      chemo-proteomic vision address the identification of new
      druggable protein target, the detection of new allosteric
      binding site suitable to increase the selectivity of a drug
      compound, the generation of new drug candidates by a
      fragment-based approach over protein-ligand biostructural
      data, and other new protocols under development at MEDIT.</p>
    </subsection>
    <subsection id="uid31" level="1">
      <bodyTitle>Joint genetic and neuroimaging data analysis on
      clouds</bodyTitle>
      <p>The AzureBrain Project started in October 2010 within the
      Microsoft Research-INRIA Joint Research Center. In this
      framework, we focus on a data-analysis application whose goal
      is to find statistically relevant correlations across two
      huge sets containing genetic data and neuroimaging data
      respectively, for large cohorts of subjects. In the genome
      dimension, genotyping DNA chips allow to record several
      hundreds of thousands of values per subject, whereas in the
      imaging dimension a fMRI volume may contain hundreds of
      thousands to millions of voxels. Finding the brain and genome
      regions that may be involved in this link entails a huge
      number of hypotheses, hence a drastic correction of the
      statistical significance of pairwise relationships, which in
      turn crucially reduces the sensitivity of statistical
      procedures that aims at detecting the association.</p>
      <p>We collaborate with the PARIETAL team from 
      <span class="smallcap" align="left">Inria Saclay –
      Île-de-France</span>, who works on such optimized techniques
      for joint genetic and neuroimaging analysis. We plan to
      redesign the application using a cloud-oriented programming
      model such as MapReduce, and then to adapt and evaluate the
      whole software stack (application, programming engine,
      BlobSeer-based storage components) on Microsoft's Azure
      platform. The input application data will be taken from the
      Imagen FP6 project ( 
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.imagen-europe.com/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
      <allowbreak/>www. 
      <allowbreak/>imagen-europe. 
      <allowbreak/>com/ 
      <allowbreak/></ref>) that aims at investigating factors of
      addiction in a population of adolescents; Imagen's database
      contains multi-modal neuroimaging as well as genetics and
      psychological data on more than 1000 (possibly 2000
      within a few years) subjects. This database is hosted and
      processed at Neuropsin and is available to the grant partners
      for research purpose.</p>
    </subsection>
    <subsection id="uid32" level="1">
      <bodyTitle>I/O intensive tornado simulation for the Blue
      Waters post-Petascale machine</bodyTitle>
      <p>The Blue Waters machine ( 
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.ncsa.illinois.edu/BlueWaters/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
      <allowbreak/>www. 
      <allowbreak/>ncsa. 
      <allowbreak/>illinois. 
      <allowbreak/>edu/ 
      <allowbreak/>BlueWaters/ 
      <allowbreak/></ref>) is expected to be one of the most
      powerful supercomputers in the world when it comes online in
      2011. It will have a peak performance of 10 Petaflops
      (10 quadrillion calculations every second) and will achieve 
      <i>sustained</i>performance of 1 Petaflop running a
      range of science and engineering codes. Research at the Joint
      INRIA-UIUC (University of Illinois at Urbana-Champaign) Lab
      for Petascale computing (JLPC) is currently in progress in
      several directions, with the global goal of efficiently
      exploiting this machine that will serve to run heavy,
      data-intensive or computation-intensive simulations.</p>
      <p>Such simulations usually require to be coupled with
      visualization tools. On supercomputers, previous studies
      already showed the need of adapting the I/O path from data
      generation to visualization. In the framework of the JLPC we
      started to investigate concurrency-optimized I/O techniques
      to achieve this goal. We focus on a particular tornado
      simulation called CM1, which is intended to be run on the
      BlueWaters machine. This simulation currently generates large
      amount of data in many files, in a way that is not adapted
      for later visualization. We started to explore the use of
      BlobSeer, a large-scale data management service designed by
      the KerData team, as an intermediate layer between the
      simulation, the filesystem and visualization tools.
      Concurrency control optimizations enabled by BlobSeer will be
      tuned to ensure efficient access to the files managed by the
      underlying file system. A preliminary study done by Matthieu
      Dorier (Master student at ENS Cachan - Brittany) during a
      3-month internship at UIUC, co-advised by Marc Snir, Franck
      Cappello and G. Antoniu, has demonstrated the benefits
      of a new approach using dedicated I/O cores (see
      Section  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid58" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>).</p>
    </subsection>
  </domaine>
  <logiciels id="uid33">
    <bodyTitle>Software</bodyTitle>
    <subsection id="uid34" level="1">
      <bodyTitle>BlobSeer</bodyTitle>
      <participants>
        <person key="paris-2006-idm124332495696">
          <firstname>Gabriel</firstname>
          <lastname>Antoniu</lastname>
        </person>
        <person key="paris-2006-idm124332467968">
          <firstname>Luc</firstname>
          <lastname>Bougé</lastname>
        </person>
        <person key="paris-2007-idm243644537488">
          <firstname>Bogdan</firstname>
          <lastname>Nicolae</lastname>
        </person>
      </participants>
      <descriptionlist>
        <label>Contacts:</label>
        <li id="uid35">
          <p noindent="true">
            <tt>Bogdan.Nicolae@inria.fr,
            Gabriel.Antoniu@inria.fr</tt>
          </p>
        </li>
        <label>URL:</label>
        <li id="uid36">
          <p noindent="true">
            <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://blobseer.gforge.inria.fr/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
            <allowbreak/>blobseer. 
            <allowbreak/>gforge. 
            <allowbreak/>inria. 
            <allowbreak/>fr/ 
            <allowbreak/></ref>
          </p>
        </li>
        <label>License:</label>
        <li id="uid37">
          <p noindent="true">GNU Lesser General Public License
          (LGPL) version 3.</p>
        </li>
        <label>Status:</label>
        <li id="uid38">
          <p noindent="true">This software is available on INRIA's
          forge. Registration of version 1.0 (released late 2010)
          with APP is in progress.</p>
        </li>
        <label>Presentation:</label>
        <li id="uid39">
          <p noindent="true">BlobSeer is a data storage service
          specifically designed to deal with the requirements of
          large-scale data-intensive distributed applications that
          abstract data as huge sequences of bytes, called BLOBs
          (Binary Large OBjects). It exports a simple, yet
          versatile versioning interface to manipulate BLOBs that
          enables reading, writing and appending to them. BlobSeer
          offers both scalability and performance with respect to a
          series of issues typically associated with the
          data-intensive context: 
          <i>scalable aggregation of storage space</i>from the
          participating nodes with minimal overhead, ability to
          store 
          <i>huge data objects</i>, 
          <i>efficient fine-grain access</i>to data subsets, 
          <i>high throughput in spite of heavy access
          concurrency</i>, as well as 
          <i>fault-tolerance</i>.</p>
          <p>Development started in January 2008. The
          implementation is built on top of the Boost collection of
          C++ libraries, Berkeley DB and libconfig. Additional
          scripting in Perl/Python handles deployment on 
          <span class="smallcap" align="left">Grid'5000</span>,
          which is done through the 
          <i>OAR</i>resource scheduler. Benchmarking so far has
          proven correctness and scalable performance with up to
          400 nodes from 3 different sites.</p>
          <p>The latest stable version of BlobSeer, v1.0, brings a
          large set of new features and improvements whose
          usefulness was experimentally validated during the course
          of 2010. Of particular importance to the user are two new
          features: (1) the support to efficiently clone BLOBs by
          using a new, dedicated primitive that was added to the
          access interface; and (2) a POSIX access interface to
          BLOBs (implemented over FUSE) that enables applications
          to access BLOBs using standard I/O calls, while retaining
          the ability to perform BLOB-specific manipulations (such
          as access to past versions and cloning) through 
          <i>ioctls</i>.</p>
        </li>
      </descriptionlist>
    </subsection>
  </logiciels>
  <resultats id="uid40">
    <bodyTitle>New Results</bodyTitle>
    <subsection id="uid41" level="1">
      <bodyTitle>BlobSeer</bodyTitle>
      <participants>
        <person key="paris-2007-idm243644537488">
          <firstname>Bogdan</firstname>
          <lastname>Nicolae</lastname>
        </person>
        <person key="paris-2006-idm124332495696">
          <firstname>Gabriel</firstname>
          <lastname>Antoniu</lastname>
        </person>
        <person key="paris-2006-idm124332467968">
          <firstname>Luc</firstname>
          <lastname>Bougé</lastname>
        </person>
        <person key="paris-2008-idm188504272032">
          <firstname>Diana</firstname>
          <lastname>Moise</lastname>
        </person>
        <person key="paris-2008-idm188504275952">
          <firstname>Alexandra</firstname>
          <lastname>Carpen-Amarie</lastname>
        </person>
      </participants>
      <p>Several contributions were achieved that relate directly
      to the core functionality of BlobSeer.</p>
      <p>First, we refined the design principles behind BlobSeer
      and placed them in the context of scalable distributed
      storage systems: if combined together, these principles can
      help designers of distributed storage systems to meet the
      need for highly scalable data management. In particular, we
      focused on the potentially large benefits of using versioning
      to improve application data access performance under heavy
      concurrency. In this context, we extended the
      versioning-based access interface of BlobSeer with new
      primitives that further enhance the potential to exploit the
      inherent parallelism of data workflows efficiently.</p>
      <p>Second, we proposed a generalization for a set of
      versioning algorithms for data management originally
      implemented in BlobSeer and published in the previous years.
      We have introduced new data structures and redesigned several
      aspects to account for better decentralized metadata
      management, fine-grain access at arbitrary offsets,
      asynchrony, fault tolerance and last but not least allow the
      user to explicitly control written data layout such that it
      is optimally distributed for reading.</p>
      <p>Third, we extended the scope of our experimental
      evaluation and performed synthetic benchmarks that push the
      system to its limits. It demonstrated a high throughput under
      heavy access concurrency, even when metadata is replicated in
      order to provide fault tolerance. Furthermore, we extended
      the evaluation of BlobSeer as a storage back-end for Hadoop
      MapReduce and highlighted a series of improvements in the
      context of MapReduce data-intensive applications.</p>
      <p>These contributions materialized in a reference
      publication  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid44" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>about BlobSeer that provides a
      complete view over its design principles, algorithms,
      consistency and fault tolerance considerations, as well as
      experimental evaluations. A more compact overview of
      BlobSeer  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid45" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>was also published in the PhD
      Forum of IPDPS'10, where the corresponding poster, presented
      during the conference, won the TCPP Best PhD Student Poster
      Award.</p>
      <p>Complementary to these results, further work was
      undertaken to improve the usability of BlobSeer in the
      context of cloud computing. More specifically, we evaluated
      the trade-off resulting from transparently applying data
      compression to save storage space and bandwidth at the cost
      of slight computational overhead. The aim is to reduce the
      storage space and bandwidth needs with minimal impact on I/O
      throughput when under heavy access concurrency. To this end,
      we introduced a generic sampling-based compression technique
      that dynamically adapts to the heterogeneity of data and
      applied it to BlobSeer. It led to significant improvement
      over the original implementation: almost no performance
      overhead when dealing with incompressible data, as well as
      significant saving in storage space and bandwidth for
      compressible data, with the added benefit of improved
      aggregated read throughput. These results were obtained as a
      consequence of extensive experiments on the Grid'5000 testbed
      and were published in  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid46" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      <p>Finally, B. Nicolae successfully defended his PhD
      thesis on November 30, 2010. The thesis document details the
      contributions that relate to the core of BlobSeer since the
      beginning of the project.</p>
    </subsection>
    <subsection id="uid42" level="1">
      <bodyTitle>MapReduce</bodyTitle>
      <participants>
        <person key="paris-2008-idm188504272032">
          <firstname>Diana</firstname>
          <lastname>Moise</lastname>
        </person>
        <person key="paris-2007-idm243644537488">
          <firstname>Bogdan</firstname>
          <lastname>Nicolae</lastname>
        </person>
        <person key="paris-2006-idm124332495696">
          <firstname>Gabriel</firstname>
          <lastname>Antoniu</lastname>
        </person>
        <person key="paris-2006-idm124332467968">
          <firstname>Luc</firstname>
          <lastname>Bougé</lastname>
        </person>
        <person key="kerdata-2010-idm58934871744">
          <firstname>Thi-Thu-Lan</firstname>
          <lastname>Trieu</lastname>
        </person>
      </participants>
      <p>The features exhibited by BlobSeer meet the storage needs
      of MapReduce applications. To evaluate the benefits of using
      BlobSeer as the storage back-end in such a context, we used
      Hadoop - Yahoo!'s implementation of the MapReduce framework.
      We substituted the original data storage layer of Hadoop, the
      
      <i>Hadoop Distributed File System</i>(HDFS) with our
      BlobSeer-based file system - BSFS. To measure the impact of
      our approach, we performed experiments both with synthetic
      microbenchmarks and real MapReduce applications. The results
      showed that BSFS is capable to deliver a higher throughput
      than HDFS, and to sustain it when the number of clients
      significantly increases. This work on integrating BlobSeer
      with Hadoop  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid47" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>brought up various issues that
      could be improved in the Hadoop framework  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid48" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      <p>One of these aspects concerns the append operation for
      which HDFS does not offer support. In  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid49" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>we show how providing the
      functionality of concurrently appending data to existing
      files, can bring substantial benefits to MapReduce
      applications as well as to other classes of applications.
      Since BlobSeer efficiently supports concurrent appends, we
      modified the Hadoop MapReduce framework to use the append
      operation in the “reduce” phase of the application. Our
      experiments showed that massively concurrent append and read
      operations have a low impact on each other; furthermore,
      measurements with an application available with Hadoop showed
      that the support for concurrent appends to shared files is
      introduced with no extra cost, whereas the number of files
      managed by the MapReduce framework is substantially
      reduced.</p>
      <p>We also addressed the problem of managing intermediate
      data, which is data generated during MapReduce computations.
      In the original Hadoop MapReduce framework, intermediate data
      (data produced as output of the “map” phase and transferred
      as input to the “reduce” phase) is stored on the local file
      system of the machines executing the “map” function; in case
      of failures, the data is lost and the map computation is
      re-executed on another machine. Our approach was to store the
      intermediate data in a distributed file system, so that, when
      a failure occurs, the computation can resume on another
      machine; moreover, by using BSFS as storage for intermediate
      data, the execution time is reduced due to the high
      throughput BSFS delivers. These issues have been developed
      with the Master thesis of Lan Trieu  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid50" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
    </subsection>
    <subsection id="uid43" level="1">
      <bodyTitle>Introspective BlobSeer and security</bodyTitle>
      <participants>
        <person key="paris-2008-idm188504275952">
          <firstname>Alexandra</firstname>
          <lastname>Carpen-Amarie</lastname>
        </person>
        <person key="kerdata-2009-idm140027562000">
          <firstname>Alexandru</firstname>
          <lastname>Costan</lastname>
        </person>
        <person key="paris-2006-idm124332495696">
          <firstname>Gabriel</firstname>
          <lastname>Antoniu</lastname>
        </person>
        <person key="paris-2006-idm124332467968">
          <firstname>Luc</firstname>
          <lastname>Bougé</lastname>
        </person>
        <person key="kerdata-2010-idm58934891200">
          <firstname>Cătălin</firstname>
          <lastname>Leordeanu</lastname>
        </person>
        <person key="kerdata-2010-idm58934881424">
          <firstname>Cristina</firstname>
          <lastname>Băsescu</lastname>
        </person>
      </participants>
      <moreinfo>
        <p>This work has been done in collaboration with 
        <i>Jing (Tylor) Cai</i>, Master student at the City
        University of Hong Kong, and 
        <i>Mihaela-Camelia Vlad</i>, Master student at the
        Polytechnic University of Bucharest. Both of them visited
        the KerData Team in 2009–2010 for several months,
        supported by the INRIA Internship program.</p>
      </moreinfo>
      <p>The goal of this research direction is to enable autonomic
      storage for BlobSeer-based cloud services. This work has been
      carried out in the framework of the DataCloud@work Associated
      Team between KerData and the Computer Science Department from
      Politehnica University of Bucharest - PUB ( 
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.irisa.fr/kerdata/doku.php?id=cloud_at_work:start" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
      <allowbreak/>www. 
      <allowbreak/>irisa. 
      <allowbreak/>fr/ 
      <allowbreak/>kerdata/ 
      <allowbreak/>doku. 
      <allowbreak/>php?id=cloud_at_work:start</ref>).</p>
      <p>The first step towards an autonomic data-sharing system
      was to equip the BlobSeer platform with introspection
      capabilities, which can serve as input data for a
      self-adaptive engine deployed on top of the system, possibly
      with several goals such as self-configuration,
      self-optimization, self-healing or self-protection. This work
      has been published in  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid51" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      <p>Further, we implemented a distributed architecture for
      storing and processing monitoring data. Our solution was
      designed as a new BlobSeer component that does not interfere
      with its efficient data-access primitives. Instead, it builds
      a distributed user-activity history to obtain real-time
      information about the users in the system. Then we proposed a
      preliminary approach for enabling self-protection for the
      BlobSeer system, through a malicious client detection
      component, which analyzes protocol breaches specific to
      BlobSeer. These results have been published as INRIA research
      reports  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid52" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>, 
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid53" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      <p>We developed the self-protection direction within a
      generic security management framework allowing providers of
      Cloud data management systems to define and enforce complex
      security policies. In addition, we designed an expressive
      policy description language so as to be able to define a wide
      range of security attacks and to detect them in a security
      violation detection engine. We integrated our security
      framework with BlobSeer and we showed that we can provide a
      secure environment for data management systems without any
      significant overhead, while being able to define and detect
      complex attack scenarios. These results have been published
      in  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid54" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      <p>Moreover, we developed a specific security mechanism which
      assigns a trust level to each client by continually
      monitoring and analyzing the client activity and the state of
      the system to detect security threats, malicious activity or
      other kinds of intrusions. Additionally, we addressed the
      problem of securely running web services on top of BlobSeer.
      We implemented mechanisms that handle authentication and
      authorization of the users, as well as secure data transfers
      for web services that use BlobSeer as a storage back-end.</p>
      <p>Another direction was to introduce self-management and
      self-adaptation facilities in BlobSeer. We enhanced BlobSeer
      with self-adaptive features by dynamically changing and
      maintaining the replication factors of the data. When a
      specific BLOB is under a heavy load (in terms of read
      operations), the system automatically increases its
      replication factor and handles all the necessary data
      transfers. In contrast, when some data is less (or never)
      used, its replication factor is transparently reduced.
      Moreover, we developed a component able to dynamically
      contract and expand the pool of storage providers based on
      the system's load, so as to adapt the resource usage to the
      needs of the clients accessing the data. Several Master
      research internships and Bachelor theses at PUB focused on
      these tasks.</p>
    </subsection>
    <subsection id="uid44" level="1">
      <bodyTitle>Concurrency-optimized I/Os for Petascale
      computing</bodyTitle>
      <participants>
        <person key="kerdata-2009-idm140027572976">
          <firstname>Viet-Trung</firstname>
          <lastname>Tran</lastname>
        </person>
        <person key="paris-2006-idm124332495696">
          <firstname>Gabriel</firstname>
          <lastname>Antoniu</lastname>
        </person>
        <person key="paris-2007-idm243644537488">
          <firstname>Bogdan</firstname>
          <lastname>Nicolae</lastname>
        </person>
        <person key="paris-2006-idm124332467968">
          <firstname>Luc</firstname>
          <lastname>Bougé</lastname>
        </person>
      </participants>
      <moreinfo>
        <p>This work has been done in collaboration with 
        <i>Matthieu Dorier</i>, student at ENS Cachan, Brittany
        Campus, during his summer 2010 internship at the INRIA-UIUC
        Joint Laboratory for Petascale Computing (JLPC) at
        Urbana-Champaign.</p>
      </moreinfo>
      <p>High-performance concurrent I/O accesses are a major
      requirement of data-intensive scientific applications,
      particularly for those applications deployed on Petascale
      infrastructures. The larger the scale of the execution
      infrastructure, the higher the potential performance
      bottlenecks that could be caused by a lack of performance of
      the data input/output (I/O) layers. We focused on specific
      scenarios that exhibit the need for efficient access to huge,
      shared data under heavy concurrency workload. We identified
      two main issues that require closer consideration.</p>
      <p>First, there is still a trade-off between high-performance
      data communication and atomic I/O capabilities of concurrent
      overlapped updates in the context of scientific applications.
      Current lock-based approaches mainly perform locking around
      the operations, imposing lock overhead and slowing down the
      overall performance. In this context, we aim to exploit the
      potential benefits of BlobSeer. By leveraging a
      versioning-based scheme, an atomic I/O operation is expected
      to be done in a lock-free manner, even when overlapped
      accesses occur. Following this direction, we conducted
      several experimental evaluations on Grid'5000 and obtained
      very promising results described in  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid55" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      <p>In the second direction, our major research topic comes
      from the context of HPC, and targets scientific simulations
      running on Petascale machines. The goal is to explore how to
      efficiently record and visualize data during the simulation
      without impacting the performance of the corresponding
      computation generating that data. Conventional practice of
      storing data on disk, moving it off-site, reading it into a
      workflow, and analyzing it to produce scientific data becomes
      increasingly harder to use, due to large data volumes
      generated at fast rates compared to limited back-end speeds.
      Therefore, scalable approaches to deal with these I/O
      limitations are of utmost importance. We propose to adapt
      concurrency control techniques introduced in BlobSeer in
      order to optimize the level of parallelization between
      visualization and simulation with respect to I/O. It allows
      periodic data backup and online visualization to proceed
      without blocking computation, and vice versa.</p>
      <p>A first step has been taken in this direction by studying
      the behavior, with respect to I/O, of a tornado simulation
      code called CM1, targeting the next IBM supercomputer
      BlueWaters. This behavior induces large overheads due to the
      generation of many small files at the same time. We proposed
      a first solution using dedicated I/O cores as staging areas
      in order to overlap I/O with computation at the simulation
      level. Such a solution has demonstrated to be capable of
      bringing a better balance in throughput and to avoid
      overheads in I/O phases, as well as an ability to perform
      efficiently data preprocessing. Coupled with the BlobSeer
      approach, we intend to provide a full solution for
      efficiently coupling simulations with visualization tools for
      very large scales. This work has been initiated during
      Matthieu Dorier's master intership at JLPC.</p>
    </subsection>
    <subsection id="uid45" level="1">
      <bodyTitle>BlobSeer-based management of virtual machines in
      Nimbus</bodyTitle>
      <participants>
        <person key="paris-2007-idm243644537488">
          <firstname>Bogdan</firstname>
          <lastname>Nicolae</lastname>
        </person>
        <person key="paris-2008-idm188504275952">
          <firstname>Alexandra</firstname>
          <lastname>Carpen-Amarie</lastname>
        </person>
        <person key="kerdata-2010-idm58934874848">
          <firstname>Tuan-Viet</firstname>
          <lastname>Dinh</lastname>
        </person>
        <person key="kerdata-2010-idm58934887888">
          <firstname>Eliana</firstname>
          <lastname>Tîrşa</lastname>
        </person>
        <person key="paris-2006-idm124332495696">
          <firstname>Gabriel</firstname>
          <lastname>Antoniu</lastname>
        </person>
      </participants>
      <p>Providing efficient virtual machine image storage
      solutions is crucial in the context of
      Infrastructure-as-a-Service (IaaS) cloud computing, as users
      rent resources in terms of virtual machines that are
      instantiated from virtual machine images. One of those
      challenges in this context is the need to deploy a large
      number (hundreds or even thousands) of VM instances
      simultaneously. Once the VM instances are deployed, another
      challenge is to simultaneously take a snapshot of many images
      and transfer them to persistent storage to support management
      tasks, such as suspend-resume and migration.</p>
      <p>During a 2-month visit at Argonne National Lab, USA,
      B. Nicolae adapted BlobSeer to address these needs. More
      specifically, a series of optimization techniques were
      proposed that minimize resource consumption (execution time,
      network traffic and storage space) which translate into lower
      end-user costs. While conventional approaches transfer the
      whole VM image contents between the persistent storage
      service and the computing nodes, we proposed a lazy transfer
      scheme based on object-versioning that transfers only the
      needed content on-demand: this greatly reduces total time for
      execution time, network traffic and storage space. The
      benefits of this approach were demonstrated through extensive
      experiments operating on hundreds of nodes, showing
      improvements in time to boot virtual machines from a shared
      image by a factor of up to 25, while at the same time
      reducing storage and bandwidth usage by as much as 90% when
      compared with conventional approaches. This work is described
      in  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid56" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      <p>Furthermore, the cloud users need mechanisms to upload
      Virtual Machine (VM) images into a Cloud storage service,
      before they are deployed to the physical nodes. We
      investigated this issue for the Nimbus Cloud environment, by
      replacing its default repository with Blobseer. This work has
      been published in  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid57" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>.</p>
      <p>In the context of the Associated Team between KerData and
      the Computer Science Department from Politehnica University
      of Bucharest, we made available BlobSeer as a storage service
      on the Cloud, by integrating it within the Nimbus Cloud. We
      added mechanisms for bringing BlobSeer to a consistent state
      before stopping it and then for starting/stopping/restarting
      BlobSeer inside the Nimbus Cloud, while preserving the data
      it stored during previous runs. Additionally, we investigated
      the advantages of using BlobSeer as a storage system for
      XtreemOS, by conducting a series of performance evaluations
      targeted towards MapReduce applications. We experimented with
      Hadoop applications deployed on top of HDFS, BlobSeer and
      XtreemFS, the default file system of XtreemOS.</p>
    </subsection>
    <subsection id="uid46" level="1">
      <bodyTitle>Using Global Behavior Modeling to Improve QoS in
      Cloud Data Storage Services</bodyTitle>
      <participants>
        <person key="paris-2007-idm243644537488">
          <firstname>Bogdan</firstname>
          <lastname>Nicolae</lastname>
        </person>
        <person key="kerdata-2010-idm58934898256">
          <firstname>Houssem-Eddine</firstname>
          <lastname>Chihoub</lastname>
        </person>
        <person key="paris-2006-idm124332495696">
          <firstname>Gabriel</firstname>
          <lastname>Antoniu</lastname>
        </person>
        <person key="paris-2008-idm188504275952">
          <firstname>Alexandra</firstname>
          <lastname>Carpen-Amarie</lastname>
        </person>
      </participants>
      <p>MapReduce is emerging as a highly scalable programming
      paradigm that enables high-throughput data-intensive
      processing as a cloud service. However, the associated
      performance is highly dependent on the underlying storage
      service, responsible to efficiently support massively
      parallel data accesses by guaranteeing a high throughput
      under heavy access concurrency. In this context, quality of
      service plays a crucial role: the storage service needs to
      sustain a stable throughput regarding each access
      individually, in addition to achieving a high aggregated
      throughput under concurrency.</p>
      <p>We propose  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#kerdata-2010-bid58" location="biblio" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>a technique to address this
      problem using component monitoring, application-side feedback
      and behavior pattern analysis. It allows to automatically
      infer useful knowledge about the causes of a poor quality of
      service, and to provide an guidelines toward potential
      improvements. We apply our proposal to BlobSeer, as a
      representative data storage service specifically designed to
      achieve high aggregated throughputs. Through an extensive
      experimentation, we demonstrated substantial improvements in
      the stability of individual data read accesses under
      MapReduce workloads. Within the SCALUS Marie-Curie project
      (see Section  
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid52" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) we plan to refine this work
      using the OpenNebula as a IaaS cloud environment.</p>
    </subsection>
  </resultats>
  <contrats id="uid47">
    <bodyTitle>Contracts and Grants with Industry</bodyTitle>
    <subsection id="uid48" level="1">
      <bodyTitle>AzureBrain: INRIA-Microsoft project</bodyTitle>
      <participants>
        <person key="paris-2006-idm124332495696">
          <firstname>Gabriel</firstname>
          <lastname>Antoniu</lastname>
        </person>
        <person key="paris-2006-idm124332467968">
          <firstname>Luc</firstname>
          <lastname>Bougé</lastname>
        </person>
      </participants>
      <p>Joint genetic and neuroimaging data analysis on large
      cohorts of subjects is a new approach used to assess and
      understand the variability that exists between individuals.
      This approach has remained poorly understood so far and
      brings forward very significant challenges, as progress in
      this field can open pioneering directions in biology and
      medicine. As both neuroimaging- and genetic-domain
      observations represent a huge amount of variables (of the
      order of 106), performing statistically rigorous analyses on
      such amounts of data represents a computational challenge
      that cannot be addressed with conventional computational
      techniques. This project started in October 2010 for two
      years in the framework of the Microsoft Research - INRIA
      Joint Research Center and aims to explore cloud computing
      techniques to address the above computational challenge. The
      project will rely on Microsoft's Azure cloud platform and
      will leverage the complementary expertise of two INRIA teams:
      KerData (Rennes) in the area of scalable cloud data
      management and PARIETAL (Saclay) in the field of
      neuroimaging. For more details, see the official press
      release 
      <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.microsoft.com/france/espace-presse/communiques-de-presse/fiche-communique.aspx?EID=75da32ee-5ed3-42b2-a847-4971f716df31" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
      <allowbreak/>www. 
      <allowbreak/>microsoft. 
      <allowbreak/>com/ 
      <allowbreak/>france/ 
      <allowbreak/>espace-presse/ 
      <allowbreak/>communiques-de-presse/ 
      <allowbreak/>fiche-communique. 
      <allowbreak/>aspx?EID=75da32ee-5ed3-42b2-a847-4971f716df31</ref>.</p>
    </subsection>
  </contrats>
  <international id="uid49">
    <bodyTitle>Other Grants and Activities</bodyTitle>
    <subsection id="uid50" level="1">
      <bodyTitle>Regional intiatives</bodyTitle>
      <subsection id="id62712" level="2">
        <bodyTitle>PhD grant</bodyTitle>
        <participants>
          <person key="paris-2008-idm188504272032">
            <firstname>Diana</firstname>
            <lastname>Moise</lastname>
          </person>
        </participants>
        <p>The Brittany Regional Council provides half of the
        financial support for the PhD thesis of D. Moise
        (GRID5000BD project). This support amounts to a total of
        around 14,000 Euros/year. This support ends in
        September 2011.</p>
      </subsection>
    </subsection>
    <subsection id="uid51" level="1">
      <bodyTitle>National initiatives</bodyTitle>
      <subsection id="id62744" level="2">
        <bodyTitle>MapReduce: an ANR project with international
        partners</bodyTitle>
        <participants>
          <person key="paris-2006-idm124332495696">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
          <person key="paris-2006-idm124332467968">
            <firstname>Luc</firstname>
            <lastname>Bougé</lastname>
          </person>
          <person key="paris-2007-idm243644537488">
            <firstname>Bogdan</firstname>
            <lastname>Nicolae</lastname>
          </person>
          <person key="paris-2008-idm188504275952">
            <firstname>Alexandra</firstname>
            <lastname>Carpen-Amarie</lastname>
          </person>
          <person key="paris-2008-idm188504272032">
            <firstname>Diana</firstname>
            <lastname>Moise</lastname>
          </person>
          <person key="kerdata-2010-idm58934898256">
            <firstname>Houssem-Eddine</firstname>
            <lastname>Chihoub</lastname>
          </person>
        </participants>
        <p>KerData is leading the MapReduce project (October 2010 –
        March 2014) funded by the ANR ARPEGE 2010 Program on
        embedded systems and large infrastructures. This project is
        devoted to using MapReduce programming paradigm on clouds
        and hybrid infrastructures. It started in October 2010 in
        partnership with Argonne National Lab (USA), the University
        of Illinois at Urbana Champaign (USA), the UIUC-INRIA Joint
        Lab on Petascale Computing, IBM France, IBCP, MEDIT (SME)
        and the GRAAL INRIA project-team. In this project we
        explore advanced techniques for scalable, high-throughput,
        concurrency-optimized data and metadata management. Recent
        preliminary experiments with the BlobSeer storage platform
        designed by the KerData have shown substantial potential
        improvements of the data throughput compared to Hadoop,
        which acts as today's reference MapReduce platform.</p>
      </subsection>
      <subsection id="id62796" level="2">
        <bodyTitle>Hemera: an 
        <span class="smallcap" align="left">
        Inria</span>large-wingspan project</bodyTitle>
        <participants>
          <person key="paris-2006-idm124332495696">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
          <person key="paris-2008-idm188504272032">
            <firstname>Diana</firstname>
            <lastname>Moise</lastname>
          </person>
        </participants>
        <p>Hemera ( 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.grid5000.fr/mediawiki/index.php/Hemera" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
        <allowbreak/>www. 
        <allowbreak/>grid5000. 
        <allowbreak/>fr/ 
        <allowbreak/>mediawiki/ 
        <allowbreak/>index. 
        <allowbreak/>php/ 
        <allowbreak/>Hemera</ref>) is an INRIA Large Wingspan
        project, started in 2010. (Hemera is the Greek goddess of
        the daytime, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://en.wikipedia.org/wiki/Hemera" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
        <allowbreak/>en. 
        <allowbreak/>wikipedia. 
        <allowbreak/>org/ 
        <allowbreak/>wiki/ 
        <allowbreak/>Hemera</ref>.) It aims to demonstrate
        ambitious up-scaling techniques for large scale distributed
        computing by carrying out several dimensioning experiments
        on the Grid’5000 infrastructure. It also aims to animate
        the scientific community around Grid’5000 and to enlarge
        the Grid’5000 community by helping newcomers to make use of
        Grid’5000. It is not restricted to INRIA teams. Within
        Hemera, G. Antoniu (KerData INRIA team) and Gilles
        Fedak (GRAAL INRIA project-team) co-lead the MapReduce
        scientific challenge, whose goal is to carry out scalable
        experiments with MapReduce applications on Grid'5000.
        KerData is also involved in a working group called 
        <i>Efficient management of very large volumes of
        information for data-intensive applications</i>, co-led by
        G. Antoniu with Jean-Marc Pierson (IRIT,
        Toulouse).</p>
      </subsection>
    </subsection>
    <subsection id="uid52" level="1">
      <bodyTitle>European initiatives</bodyTitle>
      <subsection id="id62906" level="2">
        <bodyTitle>SCALUS: Marie Curie Initial Training Network
        (FP7)</bodyTitle>
        <participants>
          <person key="paris-2006-idm124332495696">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
          <person key="kerdata-2010-idm58934898256">
            <firstname>Houssem-Eddine</firstname>
            <lastname>Chihoub</lastname>
          </person>
          <person key="paris-2007-idm243644537488">
            <firstname>Bogdan</firstname>
            <lastname>Nicolae</lastname>
          </person>
          <person key="paris-2008-idm188504275952">
            <firstname>Alexandra</firstname>
            <lastname>Carpen-Amarie</lastname>
          </person>
        </participants>
        <p>The SCALUS Marie Curie Initial Training Network ( 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.scalus.eu" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
        <allowbreak/>www. 
        <allowbreak/>scalus. 
        <allowbreak/>eu</ref>) project aims at elevating
        education, research, and development inside the area of
        large-scale, distributed ubiquitous storage with a focus on
        cluster, grid, and cloud storage. The vision of this MCITN
        is to deliver the foundation for ubiquitous storage
        systems, which can be scaled in arbitrary directions
        (capacity, performance, distance, security, etc.) The
        consortium's goal is to build the first interdisciplinary
        teaching and research network on storage issues. It
        consists of top European institutes and companies in
        storage and cluster technology, building a demanding but
        rewarding interdisciplinary environment for young
        researchers. This interdisciplinary research consortium is
        the foundation for young researchers to be able to perform
        the innovative research tasks outlined in this proposal.
        The academic partners include 
        <span class="smallcap" align="left">Inria Rennes – Bretagne
        Atlantique</span>, Universidad Politécnica de Madrid,
        Barcelona Supercomputing Center, University of Paderborn,
        Ruprecht-Karls-Universität Heidelberg, Durham University,
        FORTH, École des Mines de Nantes, XLAB, CERN, NEC,
        Microsoft Research, Fujitsu, Sun Microsystems. The project
        started on December 1, 2009, and it is lasting for
        4 years. It involves the KerData and the MYRIADS
        Teams. G. Antoniu serves as a coordinator for 
        <span class="smallcap" align="left">Inria Rennes – Bretagne
        Atlantique</span>.</p>
        <p>Two PhD parallel theses funded by the SCALUS Project are
        co-advised by G. Antoniu (KerData) and María Pérez
        (Universidad Politécnica de Madrid, UPM). Both started in
        September 2010: Houssem-Eddine Chihoub, hosted by KerData,
        and Bunjamin Memishi, hosted at UPM. Both theses will
        explore ways to continue the preliminary joint work started
        by our teams involving BlobSeer and GloBeM (see
        Section  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid46" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>) in the framework of real
        cloud infrastructures, with real applications. Discussions
        and preliminary experiments are in progress on how the
        OpenNebula cloud toolkit developed at Universidad
        Complutense de Madrid could be used as a global framework
        for this work.</p>
      </subsection>
      <subsection id="id63008" level="2">
        <bodyTitle>DataCloud@Work: INRIA's Associate Team
        Programme</bodyTitle>
        <participants>
          <person key="paris-2006-idm124332495696">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
          <person key="paris-2006-idm124332467968">
            <firstname>Luc</firstname>
            <lastname>Bougé</lastname>
          </person>
          <person key="paris-2008-idm188504275952">
            <firstname>Alexandra</firstname>
            <lastname>Carpen-Amarie</lastname>
          </person>
          <person key="kerdata-2009-idm140027562000">
            <firstname>Alexandru</firstname>
            <lastname>Costan</lastname>
          </person>
          <person key="paris-2008-idm188504272032">
            <firstname>Diana</firstname>
            <lastname>Moise</lastname>
          </person>
          <person key="paris-2007-idm243644537488">
            <firstname>Bogdan</firstname>
            <lastname>Nicolae</lastname>
          </person>
          <person key="kerdata-2010-idm58934891200">
            <firstname>Cătălin</firstname>
            <lastname>Leordeanu</lastname>
          </person>
          <person key="kerdata-2010-idm58934887888">
            <firstname>Eliana</firstname>
            <lastname>Tîrşa</lastname>
          </person>
          <person key="kerdata-2010-idm58934881424">
            <firstname>Cristina</firstname>
            <lastname>Băsescu</lastname>
          </person>
          <person key="kerdata-2010-idm58934878112">
            <firstname>Sînziana</firstname>
            <lastname>Mazilu</lastname>
          </person>
        </participants>
        <p>DataCloud@work was initiated in 2010 by G. Antoniu
        (KerData) as an Associate Team in partnership with
        Politehnica University of Bucharest (PUB) and the MYRIADS
        Team ( 
        <span class="smallcap" align="left">Inria Rennes – Bretagne
        Atlantique</span>). It aims to investigate ways to provide
        advanced, autonomic storage mechanisms for cloud services.
        More specifically, the goal is to explore how to build an
        efficient, secure and reliable storage service for
        data-intensive distributed applications running in cloud
        environments by enabling an autonomic behavior. A secondary
        goal is to leverage the grid operating system approach as a
        cloud technology (e.g., by relying on its OS-support for
        virtual organizations). The project builds on preliminary
        prototypes: the BlobSeer data-sharing platform (designed by
        the KerData Team), on the MonALISA monitoring framework
        (whose main technical contributor is the PUB Team), and on
        the XtreemOS grid operation system (designed under the
        leadership of the MYRIADS Team). This work uses as a
        framework the Nimbus cloud toolkit from Argonne National
        Lab.</p>
        <p>In 2010 we addressed the following topics: 1) Introduce
        of self-adaptation capabilities in BlobSeer, based on the
        MonALISA monitoring framework; 2) Design and prototype an
        implementation of a generic security management framework
        for BlobSeer-based cloud storage; 3) Design mechanisms
        facilitating the deployment of BlobSeer on XtreemOS-enabled
        IaaS clouds based on Nimbus.</p>
        <p>The main results achieved this year are described in
        detail at 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.irisa.fr/kerdata/doku.php?id=cloud_at_work:work_programme:work_programme_2010:work_programme_2010" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
        <allowbreak/>www. 
        <allowbreak/>irisa. 
        <allowbreak/>fr/ 
        <allowbreak/>kerdata/ 
        <allowbreak/>doku. 
        <allowbreak/>php?id=cloud_at_work:work_programme:work_programme_2010:work_programme_2010</ref>.
        We would like to emphasize the following facts:</p>
        <descriptionlist>
          <label>Collaboration formally extended to Argonne
          National Lab, USA:</label>
          <li id="uid53">
            <p noindent="true">B. Nicolae visited ANL (USA)
            thanks to the INRIA Explorateur Programme for 3 months
            (April to July 2010). This served as a preliminary step
            preparing the MapReduce ANR project started in October
            2010 in partneship with ANL.</p>
          </li>
          <label>Visiting PhD students:</label>
          <li id="uid54">
            <p noindent="true">In 2010, 3 PhD students from
            PUB hosted in Rennes for 3 months each
            (9 months overall). One PhD student from Rennes
            hosted in Bucharest twice (two weeks overall).</p>
          </li>
          <label>Publications and workshops:</label>
          <li id="uid55">
            <p noindent="true">In 2010, 3 joint publications
            involving at least 2 of the 3 partners of the
            Associate Team have been made, 2 joint
            publications with Argonne National Lab and a large
            number of Master and Bachelor theses. The results were
            presented at 3 internal workshops organized in
            Rennes.</p>
          </li>
          <label>PhD defenses:</label>
          <li id="uid56">
            <p noindent="true">In 2010, 2 PhD theses strongly
            related to the Associate Team have been defended:
            B .Nicolae (KerData) in Rennes and Alexandru
            Costan (PUB) in Bucharest. The French and Romanian
            leaders of the Associate Team participated to both PhD
            committees.</p>
          </li>
          <label>Master and Bachelor theses:</label>
          <li id="uid57">
            <p noindent="true">Overall, 6 Bachelor theses
            locally carried out in Bucharest and 4 Master
            theses in Rennes were dedicated to subtasks derived
            from the scientific schedule of the DataCloud@work
            Associate Team. Out of these, 2 Master students
            from PUB were hosted by the KerData team through
            INRIA's Internship Programme (co-funded by KerData on
            its own resources).</p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
    <subsection id="uid58" level="1">
      <bodyTitle>International initiatives</bodyTitle>
      <subsection id="id63249" level="2">
        <bodyTitle>MapReduce: an ANR project with ANL (USA), UIUC
        (USA) and JLPC (France-USA)</bodyTitle>
        <p>MapReduce is an ANR project with international partners:
        Argonne National Lab (USA), the University of Illinois at
        Urbana-Champaign (UIUC, USA) and the Joint INRIA-UIUC Lab
        for Petascale Computing (JLPC). See Section  
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#uid51" location="intern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest"/>for details.</p>
      </subsection>
      <subsection id="id63280" level="2">
        <bodyTitle>INRIA-UIUC Joint Laboratory on Petascale
        Computing</bodyTitle>
        <participants>
          <person key="paris-2006-idm124332495696">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
          <person key="paris-2006-idm124332467968">
            <firstname>Luc</firstname>
            <lastname>Bougé</lastname>
          </person>
          <person key="paris-2007-idm243644537488">
            <firstname>Bogdan</firstname>
            <lastname>Nicolae</lastname>
          </person>
          <person key="kerdata-2009-idm140027572976">
            <firstname>Viet-Trung</firstname>
            <lastname>Tran</lastname>
          </person>
        </participants>
        <moreinfo>
          <p>This work has been done in collaboration with 
          <i>Matthieu Dorier</i>, student at ENS Cachan, Brittany
          Campus, during his summer 2010 internship at the
          INRIA-UIUC Joint Laboratory for Petascale Computing at
          Urbana-Champaign.</p>
        </moreinfo>
        <p>Preliminary discussions have been held at the 2nd
        workshop of the INRIA-UIUC Joint Laboratory for Petascale
        Computing (JLPC, 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://jointlab.ncsa.illinois.edu/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
        <allowbreak/>jointlab. 
        <allowbreak/>ncsa. 
        <allowbreak/>illinois. 
        <allowbreak/>edu/ 
        <allowbreak/></ref>) in December 2009. As a follow-up, a
        specific topic was identified for the involvement of the
        KerData team in a collaboration with JLPC in the area of
        distributed storage for Petascale architectures. It focuses
        on the Blue Waters machine ( 
        <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.ncsa.illinois.edu/BlueWaters/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
        <allowbreak/>www. 
        <allowbreak/>ncsa. 
        <allowbreak/>illinois. 
        <allowbreak/>edu/ 
        <allowbreak/>BlueWaters/ 
        <allowbreak/></ref>), expected to become one of the the
        world's most powerful supercomputer in 2011.</p>
        <p>G. Antoniu and B. Nicolae visited the National
        Center for Supercomputing Applications (NCSA) at UIUC in
        April 2010 to explore how the BlobSeer BLOB-based approach
        developed by KerData could be used to optimize the
        management of concurrent data I/O requests generated by
        massively parallel simulations that run simultaneously with
        parallel visualization tools. A preliminary study in this
        context was performed by Matthieu Dorier, Master student
        (M1) at ENS Cachan/Brittany, during a 3-month internship at
        NCSA/UIUC, in collaboration with several researchers at
        NCSA/UIUC involved in the JLPC (Marc Snir, Franck Cappello,
        Dave Semeraro). This study showed the benefit of a new
        approach using dedicated I/O cores.</p>
        <p>We intend to extend this approach in two directions: 1)
        Compare the use of dedicated cores with the use of
        dedicated nodes, and model the performance of both
        approaches in order to select the best one according the
        the applications and execution platforms I/O
        characteristics; 2) Build a BlobSeer-based metadata
        software layer enabled to schedule I/O operations coming
        from the simulation. This work will continue during the
        master internship (M2) of Matthieu Dorier at KerData in
        2011. It is expected to be pursued further during his PhD
        thesis in the KerData Team. This topic is also part of
        INRIA's proposed contribution in the framework of an IP
        European project proposal to be submitted in January 2011.
        This IP project will involve 2 INRIA teams: KerData in
        Rennes and GRAND-LARGE in Saclay (through the JLPC at
        Urbana-Champaign).</p>
      </subsection>
      <subsection id="id63400" level="2">
        <bodyTitle>FP3C: an ANR-JST Project</bodyTitle>
        <participants>
          <person key="paris-2006-idm124332495696">
            <firstname>Gabriel</firstname>
            <lastname>Antoniu</lastname>
          </person>
          <person key="kerdata-2009-idm140027572976">
            <firstname>Viet-Trung</firstname>
            <lastname>Tran</lastname>
          </person>
          <person key="paris-2007-idm243644537488">
            <firstname>Bogdan</firstname>
            <lastname>Nicolae</lastname>
          </person>
        </participants>
        <p>FP3C (Framework and Programming for Post-Petascale
        Computing) is a joint project co-funded by the French
        National Research Agency (ANR) and by the Japan Science and
        Technology Agency (JST). It started in September 2010 for 3
        years. Its main goal is to develop a programming chain and
        associated runtime systems which will allow scientific
        end-users to efficiently execute their applications on
        Post-Petascale, highly hierarchical computing platforms
        making use of multi-core processors and accelerators. This
        project gathers majors actors involved in HPC research in
        France (INRIA, CEA, CNRS) and Japan (University of Tsukuba,
        University of Tokyo, Tokyo Institute of Technology,
        University of Kyoto).</p>
        <p>Within this framework, we collaborate with Osamu Tatebe
        from the University of Tsukuba in the area of large-scale
        data-sharing. The goal of this collaboration is to design,
        implement and validate an integrated architecture for a
        Petascale storage system by weaving the best properties of
        global file systems (transparency, standard access
        interface) and RAM-based, BLOB storage systems (versioning,
        access efficiency under heavy concurrency). More
        specifically, we intend to explore how a hierarchical
        approach can be used to build a BLOB-based storage system
        file system.</p>
        <p>While such an approach has been used in classical,
        non-distributed computer architecture to explore the
        combined usage of file storage and RAM storage, no
        convincing tentative has been made regarding Post-Petascale
        distributed storage systems. As a first step, our objective
        in 2011 is to specify the the joint architecture for a
        BLOB-based file storage architecture.</p>
      </subsection>
    </subsection>
    <subsection id="uid59" level="1">
      <bodyTitle>Other contacts</bodyTitle>
      <subsection id="id63451" level="2">
        <bodyTitle>Orange Labs, Issy-les-Moulineaux</bodyTitle>
        <p>Several informal discussions took place with Ruby
        Krishnaswamy from Orange Labs, Issy-les-Moulineaux on
        potential collaborations in the area of cloud storage.
        Orange Labs is interested in BlobSeer-based
        concurrency-optimized storage support for virtual machine
        images and cloud application data.</p>
      </subsection>
    </subsection>
  </international>
  <diffusion id="uid60">
    <bodyTitle>Dissemination</bodyTitle>
    <subsection id="uid61" level="1">
      <bodyTitle>Committees</bodyTitle>
      <subsection id="uid62" level="2">
        <bodyTitle>Leaderships, Steering Committees and community
        service</bodyTitle>
        <descriptionlist>
          <label>Euro-Par Conference Series.</label>
          <li id="uid63">
            <p noindent="true">L. Bougé serves as a Vice-Chair
            of the 
            <i>Steering Committee</i>of the 
            <i>Euro-Par</i>annual conference series on parallel
            computing. G. Antoniu serves as a Local Chair for
            the 
            <i>Parallel and Distributed Data Management</i>topic of
            
            <i>Euro-Par 2011</i>, to be held in Bordeaux.</p>
          </li>
          <label>NAS-2010 Conference.</label>
          <li id="uid64">
            <p noindent="true">G. Antoniu served as a
            Vice-Chair of the 
            <i>Program Committee</i>for the storage track of the 
            <i>IEEE NAS</i>international conference on Networking,
            Architecture, and Storage.</p>
          </li>
          <label>MapReduce ANR Project.</label>
          <li id="uid65">
            <p noindent="true">G. Antoniu serves as a
            coordinator for the MapReduce ANR project (ARPEGE 2010
            call), started in October 2010 in collaboration with
            Argonne National Lab, the University of Illinois at
            Urbana Champaign, the UIUC/INRIA Joint Lab on Petascale
            Computing, IBM, IBCP, MEDIT and the GRAAL INRIA
            Project-Team.</p>
          </li>
          <label>AzureBrain Microsoft-INRIA Project.</label>
          <li id="uid66">
            <p noindent="true">G. Antoniu and B. Thirion
            (PARIETAL Project-Team, 
            <span class="smallcap" align="left">Inria Saclay –
            Île-de-France</span>) co-lead the AzureBrain
            Microsoft-INRIA Project started in October 2010 in the
            framework of the Microsoft Research - INRIA Joint
            Center (2010-2012).</p>
          </li>
          <label>DataCloud@work Associate Team.</label>
          <li id="uid67">
            <p noindent="true">G. Antoniu serves as a
            coordinator for the DataCloud@work Associate Team, a
            project involving the KerData and MYRIADS INRIA Teams
            in Rennes and the Distributed Systems Group from
            Politehnica University of Bucharest (2010–2012).</p>
          </li>
          <label>SCALUS Marie-Curie Initial Training Networks
          project.</label>
          <li id="uid68">
            <p noindent="true">G. Antoniu coordinates the
            involvement of the 
            <span class="smallcap" align="left">Inria Rennes –
            Bretagne Atlantique</span>Research Center in the SCALUS
            Project of the Marie-Curie Initial Training Networks
            Programme (ITN), call FP7-PEOPLE-ITN-2008
            (2009-2013).</p>
          </li>
          <label>CoreGRID ERCIM Working Group.</label>
          <li id="uid69">
            <p noindent="true">G. Antoniu coordinates the
            involvement of the 
            <span class="smallcap" align="left">Inria Rennes –
            Bretagne Atlantique</span>Research Center in the
            CoreGRID ERCIM Working Group.</p>
          </li>
          <label><i>Agrégation</i>of Mathematics.</label>
          <li id="uid70">
            <p noindent="true">L. Bougé serves as a Vice-Chair
            of the National Selection Committee for High-School
            Mathematics Teachers, Informatics Track.</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid71" level="2">
        <bodyTitle>Editorial boards, direction of program
        committees</bodyTitle>
        <descriptionlist>
          <label>L. Bougé</label>
          <li id="uid72">
            <p noindent="true">is a member of the 
            <i>Editorial Advisory Board</i>of the 
            <i>Scientific Programming</i>Journal.</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid73" level="2">
        <bodyTitle>Program Committees</bodyTitle>
        <descriptionlist>
          <label>G. Antoniu</label>
          <li id="uid74">
            <p noindent="true">served in the Program Committees for
            the following conferences and workshops:
            CloudCom 2010, ICPADS 2010, 3PGCIC-2010,
            MapReduce 2010, MAPRED 2010, ADiS 2010,
            SRMPDS 2010, AINA 2011, CISIS 2011,
            PDP 2011, RenPar'19, RenPar'20.</p>
          </li>
          <label>L. Bougé</label>
          <li id="uid75">
            <p noindent="true">served in the Program Committee for
            the following conferences: NPC 2010.</p>
          </li>
        </descriptionlist>
      </subsection>
      <subsection id="uid76" level="2">
        <bodyTitle>Evaluation committees, consulting</bodyTitle>
        <descriptionlist>
          <label>L. Bougé</label>
          <li id="uid77">
            <p noindent="true">served as a member of the Selection
            Committee for the 
            <i>Gilles Kahn PhD Thesis Award 2010</i>.</p>
          </li>
          <label>L. Bougé</label>
          <li id="uid78">
            <p noindent="true">was the chair of the national
            evaluation committee for the 2010 Scientific Excellence
            Award ( 
            <i>Prime d'excellence scientifique</i>, PES) targeted
            to the researchers on an academic teaching position in
            France.</p>
          </li>
        </descriptionlist>
      </subsection>
    </subsection>
    <subsection id="uid79" level="1">
      <bodyTitle>Invited talks</bodyTitle>
      <descriptionlist>
        <label>G. Antoniu</label>
        <li id="uid80">
          <p noindent="true">gave a keynote talk entitled 
          <i>Autonomic cloud storage: challenges at stake</i>at the
          ADiS workshop held in February 2010 in Krakow,
          Poland.</p>
        </li>
        <label>G. Antoniu</label>
        <li id="uid81">
          <p noindent="true">gave an invited talk entitled 
          <i>BlobSeer: Enabling Efficient Lock-Free,
          Versioning-Based Storage for Massive Data under Heavy
          Access Concurrency</i>at the Parallel@Illinois Special
          Event Series, University of Illinois at Urbana-Champaign,
          IL, USA, in April 2010.</p>
        </li>
        <label>G. Antoniu</label>
        <li id="uid82">
          <p noindent="true">gave a keynote talk entitled 
          <i>Scalable MapReduce Data Processing on Clouds: the
          BlobSeer Approach</i>at the International Conference on
          High Performance Computing and Simulation
          (HPCS 2010) conference held in June 2010 in Caen,
          France.</p>
        </li>
        <label>G. Antoniu</label>
        <li id="uid83">
          <p noindent="true">gave a a talk entitled 
          <i>BlobSeer: Efficient, Versioning-Based Storage for
          Massive Data under Heavy Access Concurrency on
          Clouds</i>at Microsoft Research - INRIA Workshop on
          Extreme Operating Systems held in November 2010 in Paris,
          France.</p>
        </li>
        <label>G. Antoniu</label>
        <li id="uid84">
          <p noindent="true">gave an invited talk entitled 
          <i>Concurrency-optimized I/O for visualizing HPC
          simulations: An Approach Using Dedicated I/O cores</i>at
          the 4nd workshop of the Joint Laboratory for Petascale
          Computing held in November 2010 at NCSA/UIUC,
          Urbana-Champaign, IL, USA.</p>
        </li>
      </descriptionlist>
    </subsection>
    <subsection id="uid85" level="1">
      <bodyTitle>Doctoral teaching</bodyTitle>
      <moreinfo>
        <p>Only the teaching contributions of project-team members
        on non-teaching positions are mentioned below.</p>
      </moreinfo>
      <descriptionlist>
        <label>G. Antoniu</label>
        <li id="uid86">
          <p noindent="true">gave lectures on peer-to-peer systems
          within the 
          <i>Peer-to-Peer Systems</i>Module of the Master Program
          (2nd year), 
          <span class="smallcap" align="left">University
          Rennes 1</span>. He gave lectures on Grid Data
          Management within the 
          <i>Distributed Architectures</i>Module of the ALMA Master
          Program (2nd year) of the University of Nantes. He also
          taught a full course on 
          <i>Grid Computing</i>for final year engineering students
          at the ESIEA Engineering School, Paris.</p>
        </li>
      </descriptionlist>
    </subsection>
    <subsection id="uid87" level="1">
      <bodyTitle>Administrative responsibilities</bodyTitle>
      <descriptionlist>
        <label>G. Antoniu</label>
        <li id="uid88">
          <p noindent="true">serves as the Scientific Correspondent
          for the International Relations Office of the 
          <span class="smallcap" align="left">Inria Rennes –
          Bretagne Atlantique</span>Research Center.</p>
        </li>
        <label>G. Antoniu</label>
        <li id="uid89">
          <p noindent="true">serves as the Scientific Leader of the
          KerData research team.</p>
        </li>
        <label>L. Bougé</label>
        <li id="uid90">
          <p noindent="true">chairs the Computer Science and
          Telecommunication Department ( 
          <i>Département Informatique et Télécommunications,
          DIT</i>) of the Brittany Extension of 
          <span class="smallcap" align="left">Ens Cachan</span>. He
          leads the Master Program ( 
          <i>Magistère</i>) in Computer Science at the Brittany
          Extension of 
          <span class="smallcap" align="left">Ens
          Cachan</span>.</p>
        </li>
      </descriptionlist>
    </subsection>
    <subsection id="uid91" level="1">
      <bodyTitle>Miscellaneous</bodyTitle>
      <descriptionlist>
        <label>L. Bougé</label>
        <li id="uid92">
          <p noindent="true">is a member of Scientific Committee of
          
          <span class="smallcap" align="left">Inria Rennes –
          Bretagne Atlantique</span>( 
          <i>Comité des projets</i>), standing for the 
          <span class="smallcap" align="left">Ens
          Cachan</span>partner.</p>
        </li>
        <label>G. Antoniu</label>
        <li id="uid93">
          <p noindent="true">is a member of Scientific Committee of
          
          <span class="smallcap" align="left">Inria Rennes –
          Bretagne Atlantique</span>( 
          <i>Comité des projets</i>), standing for the KerData
          research team.</p>
        </li>
      </descriptionlist>
    </subsection>
  </diffusion>
  <biblio id="bibliography" html="bibliography" numero="10" titre="Bibliography">
    <biblStruct id="kerdata-2010-bid66" type="inproceedings" rend="refer" n="refercite:ANTONIU:2007:INRIA-00178653:1">
      <identifiant type="hal" value="inria-00178653"/>
      <analytic>
        <title level="a">Performance scalability of the JXTA P2P
        framework</title>
        <author>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="paris-2006-idm124331463632">
            <foreName>Loïc</foreName>
            <surname>Cudennec</surname>
            <initial>L.</initial>
          </persName>
          <persName key="paris-2006-idm124331446512">
            <foreName>Mathieu</foreName>
            <surname>Jan</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Mike</foreName>
            <surname>Duigou</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">Proc. IEEE International Parallel and
        Distributed Processing Symposium (IPDPS 2007)</title>
        <loc>Long Beach, USA</loc>
        <imprint>
          <dateStruct>
            <year>2007</year>
          </dateStruct>
          <biblScope type="pages">108</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00178653/en/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00178653/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
      <affiliation>
        <country>US</country>
      </affiliation>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid65" type="article" rend="refer" n="refercite:ANTONIU:2006:INRIA-00000987:2">
      <identifiant type="hal" value="inria-00000987"/>
      <analytic>
        <title level="a">How to bring together fault tolerance and
        data consistency to enable grid data sharing</title>
        <author>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="caps-2006-idm215852169472">
            <foreName>Jean-François</foreName>
            <surname>Deverge</surname>
            <initial>J.-F.</initial>
          </persName>
          <persName key="paris-2006-idm124331438528">
            <foreName>Sébastien</foreName>
            <surname>Monnet</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes">
        <title level="j">Concurrency and Computation: Practice and
        Experience</title>
        <imprint>
          <biblScope type="number">17</biblScope>
          <dateStruct>
            <year>2006</year>
          </dateStruct>
          <biblScope type="pages">1-19</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00000987/en/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00000987/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid62" type="article" rend="refer" n="refercite:nicolae:2010:inria-00511414:1">
      <identifiant type="hal" value="inria-00511414"/>
      <analytic>
        <title level="a">BlobSeer: Next Generation Data Management
        for Large Scale Infrastructures</title>
        <author>
          <persName key="paris-2007-idm243644537488">
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="paris-2006-idm124332467968">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
          <persName key="paris-2008-idm188504272032">
            <foreName>Diana</foreName>
            <surname>Moise</surname>
            <initial>D.</initial>
          </persName>
          <persName key="paris-2008-idm188504275952">
            <foreName>Alexandra</foreName>
            <surname>Carpen-Amarie</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-editorial-board="yes" x-international-audience="yes">
        <title level="j">Journal of Parallel and Distributed
        Computing</title>
        <imprint>
          <biblScope type="volume">71</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <month>February</month>
            <year>2011</year>
          </dateStruct>
          <biblScope type="pages">169-184</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00511414/en/" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00511414/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
      <note type="bnote">Special issue on data intensive computing.
      To appear</note>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid63" type="inproceedings" rend="refer" n="refercite:nicolae:2010:inria-00456801:1">
      <identifiant type="hal" value="inria-00456801"/>
      <analytic>
        <title level="a">BlobSeer: Bringing High Throughput under
        Heavy Concurrency to Hadoop Map-Reduce Applications</title>
        <author>
          <persName key="paris-2007-idm243644537488">
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName key="paris-2008-idm188504272032">
            <foreName>Diana</foreName>
            <surname>Moise</surname>
            <initial>D.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="paris-2006-idm124332467968">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
          <persName key="kerdata-2009-idm140027550560">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">24th IEEE International Parallel and
        Distributed Processing Symposium (IPDPS 2010)</title>
        <loc>Atlanta</loc>
        <imprint>
          <publisher>
            <orgName type="organisation">IEEE and ACM</orgName>
          </publisher>
          <dateStruct>
            <month>Apr</month>
            <year>2010</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00456801" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00456801</ref>
        </imprint>
      </monogr>
      <note type="bnote">A preliminary version of this paper has
      been published as INRIA Research Report RR-7140.</note>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid64" type="article" rend="refer" n="refercite:MORALES:2007:INRIA-00446067:1">
      <identifiant type="hal" value="inria-00446067"/>
      <identifiant type="doi" value="10.1109/TNSM.2007.070903"/>
      <analytic>
        <title level="a">MOve:Design and Evaluation of A Malleable
        Overlay for Group-Based Applications</title>
        <author>
          <persName>
            <foreName>Ramsés</foreName>
            <surname>Morales</surname>
            <initial>R.</initial>
          </persName>
          <persName key="paris-2006-idm124331438528">
            <foreName>Sébastien</foreName>
            <surname>Monnet</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Indranil</foreName>
            <surname>Gupta</surname>
            <initial>I.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">IEEE Transactions on Network and Service
        Management, Special Issue on Self-Management</title>
        <imprint>
          <biblScope type="volume">4</biblScope>
          <dateStruct>
            <year>2007</year>
          </dateStruct>
          <biblScope type="pages">107-116</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00446067/en/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00446067/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="5588" subtype="nonparu" id="kerdata-2010-bid60" type="phdthesis" rend="year" n="cite:Nic10PhD">
      <monogr>
        <title level="m">BlobSeer: Towards efficient data storage
        management for large-scale, distributed systems</title>
        <author>
          <persName key="paris-2007-idm243644537488">
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">University Rennes 1</orgName>
            <address>
              <addrLine>IRISA/INRIA, Rennes, France</addrLine>
            </address>
          </publisher>
          <dateStruct>
            <month>November</month>
            <year>2010</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="bnote">To appear</note>
      <note type="typdoc">Ph. D. Thesis</note>
    </biblStruct>
    <biblStruct dedoublkey="0351" subtype="nonparu" id="kerdata-2010-bid44" type="article" rend="year" n="cite:nicolae:2010:inria-00511414:1">
      <identifiant type="hal" value="inria-00511414"/>
      <analytic>
        <title level="a">BlobSeer: Next Generation Data Management
        for Large Scale Infrastructures</title>
        <author>
          <persName key="paris-2007-idm243644537488">
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="paris-2006-idm124332467968">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
          <persName key="paris-2008-idm188504272032">
            <foreName>Diana</foreName>
            <surname>Moise</surname>
            <initial>D.</initial>
          </persName>
          <persName key="paris-2008-idm188504275952">
            <foreName>Alexandra</foreName>
            <surname>Carpen-Amarie</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr id="rid01326" x-editorial-board="yes" x-international-audience="yes">
        <title level="j">Journal of Parallel and Distributed
        Computing</title>
        <imprint>
          <biblScope type="volume">71</biblScope>
          <biblScope type="number">2</biblScope>
          <dateStruct>
            <month>February</month>
            <year>2011</year>
          </dateStruct>
          <biblScope type="pages">169-184</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00511414/en/" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00511414/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
      <note type="bnote">Special issue on data intensive computing.
      To appear</note>
    </biblStruct>
    <biblStruct dedoublkey="3982" id="kerdata-2010-bid54" type="inproceedings" rend="year" n="cite:aina2011">
      <identifiant type="hal" value="inria-00536603"/>
      <analytic>
        <title level="a">Managing Data Access on Clouds: A Generic
        Framework for Enforcing Security Policies</title>
        <author>
          <persName>
            <foreName>Cristina</foreName>
            <surname>Basescu</surname>
            <initial>C.</initial>
          </persName>
          <persName key="paris-2008-idm188504275952">
            <foreName>Alexandra</foreName>
            <surname>Carpen-Amarie</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Catalin</foreName>
            <surname>Leordeanu</surname>
            <initial>C.</initial>
          </persName>
          <persName key="kerdata-2009-idm140027562000">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">The 25th International Conference on
        Advanced Information Networking and Applications
        (AINA-2011)</title>
        <loc>Singapore</loc>
        <imprint>
          <publisher>
            <orgName type="organisation">Institute for Infocomm
            Research (I2R), in cooperation with the Singapore
            Chapter of ACM</orgName>
          </publisher>
          <dateStruct>
            <year>2011</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00536603/en/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00536603/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
        <meeting id="cid80562">
          <title>IEEE International Conference on Advanced Information Networking and Applications</title>
          <num>25</num>
          <abbr type="sigle">AINA</abbr>
        </meeting>
      </monogr>
      <affiliation>
        <country>RO</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="2875" id="kerdata-2010-bid51" type="inproceedings" rend="year" n="cite:adis2010">
      <identifiant type="hal" value="inria-00419978"/>
      <analytic>
        <title level="a">Bringing Introspection Into the BlobSeer
        Data-Management System Using the MonALISA Distributed
        Monitoring Framework</title>
        <author>
          <persName key="paris-2008-idm188504275952">
            <foreName>Alexandra</foreName>
            <surname>Carpen-Amarie</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2009-idm140027558784">
            <foreName>Jing</foreName>
            <surname>Cai</surname>
            <initial>J.</initial>
          </persName>
          <persName key="kerdata-2009-idm140027562000">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="paris-2006-idm124332467968">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">First International Workshop on Autonomic
        Distributed Systems (ADiS 2010)</title>
        <loc>Krakow, Poland</loc>
        <imprint>
          <dateStruct>
            <year>2010</year>
          </dateStruct>
          <biblScope type="pages">508-513</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00419978/en/" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00419978/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
        <meeting id="cid399450">
          <title>International Workshop on Autonomic Distributed Systems</title>
          <num>1</num>
          <abbr type="sigle">ADIS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">Held in conjunction with CISIS 2010
      Conference</note>
      <affiliation>
        <country>CN</country>
        <country>RO</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="3775" id="kerdata-2010-bid49" type="inproceedings" rend="year" n="cite:moise:2010:inria-00476861:1">
      <identifiant type="hal" value="inria-00476861"/>
      <analytic>
        <title level="a">Improving the Hadoop Map/Reduce Framework
        to Support Concurrent Appends through the BlobSeer BLOB
        management system</title>
        <author>
          <persName key="paris-2008-idm188504272032">
            <foreName>Diana</foreName>
            <surname>Moise</surname>
            <initial>D.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="paris-2006-idm124332467968">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">The First International Workshop on
        MapReduce and its Applications (MAPREDUCE'10)</title>
        <loc>Chicago, IL, USA</loc>
        <imprint>
          <dateStruct>
            <month>June</month>
            <year>2010</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00476861/en/" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00476861/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
        <meeting id="cid399515">
          <title>International Workshop on MapReduce and its Applications</title>
          <num>1</num>
          <abbr type="sigle">MAPREDUCE</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="3894" id="kerdata-2010-bid48" type="inproceedings" rend="year" n="cite:moise:2010:inria-00458143:1">
      <identifiant type="hal" value="inria-00458143"/>
      <analytic>
        <title level="a">Large-Scale Distributed Storage for Highly
        Concurrent MapReduce Applications</title>
        <author>
          <persName key="paris-2008-idm188504272032">
            <foreName>Diana</foreName>
            <surname>Moise</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">PhD Forum of IPDPS '10: 24th IEEE
        International Parallel and Distributed Processing Symposium
        (IPDPS 2010)</title>
        <loc>Atlanta, GA, USA</loc>
        <imprint>
          <dateStruct>
            <month>April</month>
            <year>2010</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00458143/en/" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00458143/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
        <meeting id="cid87817">
          <title>IEEE International Parallel and Distributed Processing Symposium</title>
          <num>24</num>
          <abbr type="sigle">IPDPS</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="5196" id="kerdata-2010-bid58" type="inproceedings" rend="year" n="cite:montes:2010:inria-00527650:1">
      <identifiant type="hal" value="inria-00527650"/>
      <analytic>
        <title level="a">Using Global Behavior Modeling to Improve
        QoS in Cloud Data Storage Services</title>
        <author>
          <persName key="kerdata-2009-idm140027565264">
            <foreName>Jesús</foreName>
            <surname>Montes</surname>
            <initial>J.</initial>
          </persName>
          <persName key="paris-2007-idm243644537488">
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Alberto</foreName>
            <surname>Sánchez</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>María</foreName>
            <surname>Pérez Hernández</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">CloudCom'10: Proc. 2nd IEEE International
        Conference on Cloud Computing Technology and
        Science</title>
        <loc>Indianapolis, IN, USA</loc>
        <imprint>
          <dateStruct>
            <month>October</month>
            <year>2010</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00527650/en/" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00527650/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
        <meeting id="cid81569">
          <title>IEEE International Conference on Cloud Computing</title>
          <num>2010</num>
          <abbr type="sigle">CLOUD</abbr>
        </meeting>
      </monogr>
      <affiliation>
        <country>ES</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="2854" id="kerdata-2010-bid47" type="inproceedings" rend="year" n="cite:nicolae:2010:inria-00456801:1">
      <identifiant type="hal" value="inria-00456801"/>
      <analytic>
        <title level="a">BlobSeer: Bringing High Throughput under
        Heavy Concurrency to Hadoop Map-Reduce Applications</title>
        <author>
          <persName key="paris-2007-idm243644537488">
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName key="paris-2008-idm188504272032">
            <foreName>Diana</foreName>
            <surname>Moise</surname>
            <initial>D.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="paris-2006-idm124332467968">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
          <persName key="kerdata-2009-idm140027550560">
            <foreName>Matthieu</foreName>
            <surname>Dorier</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">24th IEEE International Parallel and
        Distributed Processing Symposium (IPDPS 2010)</title>
        <loc>Atlanta, GA, USA</loc>
        <imprint>
          <publisher>
            <orgName type="organisation">IEEE and ACM</orgName>
          </publisher>
          <dateStruct>
            <month>April</month>
            <year>2010</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00456801/en/" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00456801/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
        <meeting id="cid87817">
          <title>IEEE International Parallel and Distributed Processing Symposium</title>
          <num>24</num>
          <abbr type="sigle">IPDPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">A preliminary version of this paper has
      been published as INRIA Research Report RR-7140</note>
    </biblStruct>
    <biblStruct dedoublkey="2855" id="kerdata-2010-bid45" type="inproceedings" rend="year" n="cite:BlobSeer-PhdForum">
      <identifiant type="hal" value="inria-00457809"/>
      <analytic>
        <title level="a">BlobSeer: Efficient Data Management for
        Data-Intensive Applications Distributed at
        Large-Scale</title>
        <author>
          <persName key="paris-2007-idm243644537488">
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">PhD Forum of IPDPS '10: 24th IEEE
        International Symposium on Parallel and Distributed
        Processing</title>
        <loc>Atlanta, GA, USA</loc>
        <imprint>
          <dateStruct>
            <year>2010</year>
          </dateStruct>
          <biblScope type="pages">1-4</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00457809/en/" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00457809/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
        <meeting id="cid87817">
          <title>IEEE International Parallel and Distributed Processing Symposium</title>
          <num>24</num>
          <abbr type="sigle">IPDPS</abbr>
        </meeting>
      </monogr>
      <note type="bnote">Best Poster Award</note>
    </biblStruct>
    <biblStruct dedoublkey="3695" id="kerdata-2010-bid46" type="inproceedings" rend="year" n="cite:nicolae:2010:inria-00490541:1">
      <identifiant type="hal" value="inria-00490541"/>
      <analytic>
        <title level="a">High Throughput Data-Compression for Cloud
        Storage</title>
        <author>
          <persName key="paris-2007-idm243644537488">
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
        </author>
      </analytic>
      <monogr x-international-audience="yes" x-proceedings="yes">
        <title level="m">3rd International Conference on Data
        Management in Grid and P2P Systems (Globe 2010)</title>
        <loc>Espagne Bilbao</loc>
        <imprint>
          <biblScope type="volume">6265</biblScope>
          <dateStruct>
            <month>June</month>
            <year>2010</year>
          </dateStruct>
          <biblScope type="pages">1-12</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00490541/en/" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00490541/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
        <meeting id="cid119612">
          <title>International Conference on Data Management in Grid and Peer-to-Peer Systems</title>
          <num>3</num>
          <abbr type="sigle">Globe</abbr>
        </meeting>
      </monogr>
    </biblStruct>
    <biblStruct dedoublkey="5989" id="kerdata-2010-bid53" type="techreport" rend="year" n="cite:amcs">
      <identifiant type="hal" value="inria-00536556"/>
      <monogr>
        <title level="m">Bringing Introspection into BlobSeer:
        Towards a Self-Adaptative Distributed Data Management
        System</title>
        <author>
          <persName key="paris-2008-idm188504275952">
            <foreName>Alexandra</foreName>
            <surname>Carpen-Amarie</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2009-idm140027558784">
            <foreName>Jing</foreName>
            <surname>Cai</surname>
            <initial>J.</initial>
          </persName>
          <persName key="kerdata-2009-idm140027562000">
            <foreName>Alexandru</foreName>
            <surname>Costan</surname>
            <initial>A.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="paris-2006-idm124332467968">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="institution">INRIA</orgName>
          </publisher>
          <dateStruct>
            <month>November</month>
            <year>2010</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00536556/en/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00536556/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
      <note type="bnote">RR-7452</note>
      <note type="typdoc">Research Report</note>
      <affiliation>
        <country>CN</country>
        <country>RO</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="6061" id="kerdata-2010-bid57" type="techreport" rend="year" n="cite:viet-thesis">
      <identifiant type="hal" value="inria-00528928"/>
      <monogr>
        <title level="m">Efficient VM Storage for Clouds Based on
        the High-Throughput BlobSeer BLOB Management System</title>
        <author>
          <persName key="paris-2008-idm188504275952">
            <foreName>Alexandra</foreName>
            <surname>Carpen-Amarie</surname>
            <initial>A.</initial>
          </persName>
          <persName key="kerdata-2010-idm58934874848">
            <foreName>Tuan-Viet</foreName>
            <surname>Dinh</surname>
            <initial>Tuan-Viet.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="institution">INRIA</orgName>
          </publisher>
          <dateStruct>
            <month>October</month>
            <year>2010</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00528928/en/" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00528928/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
      <note type="bnote">RR-7434</note>
      <note type="typdoc">Research Report</note>
    </biblStruct>
    <biblStruct dedoublkey="6397" id="kerdata-2010-bid61" type="techreport" rend="year" n="cite:montessanchez:2010:inria-00482568:1">
      <identifiant type="hal" value="inria-00482568"/>
      <monogr>
        <title level="m">Using Global Behavior Modeling to Improve
        QoS in Large-scale Distributed Data Storage
        Services</title>
        <author>
          <persName>
            <foreName>Jesús</foreName>
            <surname>Montes Sánchez</surname>
            <initial>J.</initial>
          </persName>
          <persName key="paris-2007-idm243644537488">
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Alberto</foreName>
            <surname>Sánchez Campos</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>María</foreName>
            <surname>Pérez Hernández</surname>
            <initial>M.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="institution">INRIA</orgName>
          </publisher>
          <dateStruct>
            <month>May</month>
            <year>2010</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00482568/en/" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00482568/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
      <note type="bnote">RR-7271</note>
      <note type="typdoc">Research Report</note>
      <affiliation>
        <country>ES</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="6050" id="kerdata-2010-bid52" type="techreport" rend="year" n="cite:vlad-thesis">
      <identifiant type="hal" value="inria-00531049"/>
      <monogr>
        <title level="m">Distributed Monitoring for User Accounting
        in the BlobSeer Distributed Storage System</title>
        <author>
          <persName key="kerdata-2009-idm140027554688">
            <foreName>Mihaela-Camelia</foreName>
            <surname>Vlad</surname>
            <initial>M.-C.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="institution">INRIA</orgName>
          </publisher>
          <dateStruct>
            <month>September</month>
            <year>2010</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00531049/en/" type="hal" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00531049/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
      <note type="bnote">RR-7436</note>
      <note type="typdoc">Research Report</note>
      <affiliation>
        <country>RO</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="6117" id="kerdata-2010-bid56" type="techreport" rend="year" n="cite:NICOLAE:2010:INRIA-00545232:1">
      <identifiant type="hal" value="inria-00545232"/>
      <monogr>
        <title level="m">Going Back and Forth: Efficient Virtual
        Machine Image Deployment and Snapshotting on IaaS
        Clouds</title>
        <author>
          <persName key="paris-2007-idm243644537488">
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>John</foreName>
            <surname>Bresnahan</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Kate</foreName>
            <surname>Keahey</surname>
            <initial>K.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="institution">INRIA</orgName>
          </publisher>
          <dateStruct>
            <year>2010</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.archives-ouvertes.fr/inria-00545232/en/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>archives-ouvertes. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00545232/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
      <note type="typdoc">Technical report</note>
      <affiliation>
        <country>US</country>
      </affiliation>
    </biblStruct>
    <biblStruct dedoublkey="6060" id="kerdata-2010-bid55" type="techreport" rend="year" n="cite:TRAN:2010:INRIA-00546956:1">
      <identifiant type="hal" value="inria-00546956"/>
      <monogr>
        <title level="m">Efficient support for MPI-IO atomicity
        based on versioning</title>
        <author>
          <persName key="kerdata-2009-idm140027572976">
            <foreName>Viet-Trung</foreName>
            <surname>Tran</surname>
            <initial>V.-T.</initial>
          </persName>
          <persName key="paris-2007-idm243644537488">
            <foreName>Bogdan</foreName>
            <surname>Nicolae</surname>
            <initial>B.</initial>
          </persName>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="paris-2006-idm124332467968">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="institution">INRIA</orgName>
          </publisher>
          <dateStruct>
            <year>2010</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.archives-ouvertes.fr/inria-00546956/en/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>archives-ouvertes. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00546956/ 
          <allowbreak/>en/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
      <note type="bnote">Submitted for publication</note>
      <note type="typdoc">Technical report</note>
    </biblStruct>
    <biblStruct dedoublkey="5356" subtype="nonparu" id="kerdata-2010-bid59" type="mastersthesis" rend="year" n="cite:Dinh10MSthesis">
      <monogr>
        <title level="m">Using BlobSeer Data Sharing Platform for
        Cloud Virtual Machine Repository</title>
        <author>
          <persName key="kerdata-2010-idm58934874848">
            <foreName>Tuan-Viet</foreName>
            <surname>Dinh</surname>
            <initial>Tuan-Viet.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">ENS Cachan/Bretagne</orgName>
            <address>
              <addrLine>IRISA/INRIA, Rennes, France</addrLine>
            </address>
          </publisher>
          <dateStruct>
            <month>June</month>
            <year>2010</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="bnote">To appear</note>
      <note type="typdoc">Masters thesis</note>
    </biblStruct>
    <biblStruct dedoublkey="5341" subtype="nonparu" id="kerdata-2010-bid50" type="mastersthesis" rend="year" n="cite:Trieu10MSthesis">
      <monogr>
        <title level="m">Intermediate Data Management for
        Map/Reduce Applications</title>
        <author>
          <persName key="kerdata-2010-idm58934871744">
            <foreName>Thi-Thu-Lan</foreName>
            <surname>Trieu</surname>
            <initial>Thi-Thu-Lan.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName type="school">ENS Cachan/Bretagne</orgName>
            <address>
              <addrLine>IRISA/INRIA, Rennes, France</addrLine>
            </address>
          </publisher>
          <dateStruct>
            <month>June</month>
            <year>2010</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="bnote">To appear</note>
      <note type="typdoc">June</note>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid5" type="article" rend="foot" n="footcite:AllBesBreetal02">
      <identifiant type="doi" value="10.1016/S0167-8191(02)00094-7"/>
      <analytic>
        <title level="a">Data management and transfer in
        high-performance computational grid environments</title>
        <author>
          <persName>
            <foreName>Bill</foreName>
            <surname>Allcock</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Joe</foreName>
            <surname>Bester</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>John</foreName>
            <surname>Bresnahan</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Ann L.</foreName>
            <surname>Chervenak</surname>
            <initial>A. L.</initial>
          </persName>
          <persName>
            <foreName>Ian</foreName>
            <surname>Foster</surname>
            <initial>I.</initial>
          </persName>
          <persName>
            <foreName>Carl</foreName>
            <surname>Kesselman</surname>
            <initial>C.</initial>
          </persName>
          <persName>
            <foreName>Sam</foreName>
            <surname>Meder</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Veronika</foreName>
            <surname>Nefedova</surname>
            <initial>V.</initial>
          </persName>
          <persName>
            <foreName>Darcy</foreName>
            <surname>Quesnel</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>Steven</foreName>
            <surname>Tuecke</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Parallel Comput.</title>
        <imprint>
          <biblScope type="volume">28</biblScope>
          <biblScope type="number">5</biblScope>
          <dateStruct>
            <year>2002</year>
          </dateStruct>
          <biblScope type="pages">749–771</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://dx.doi.org/10.1016/S0167-8191(02)00094-7" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>dx. 
          <allowbreak/>doi. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1016/ 
          <allowbreak/>S0167-8191(02)00094-7</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid13" type="inbook" rend="foot" n="footcite:AntBerCarDesBouJanMonSen06GDS">
      <analytic>
        <author>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="asap-2006-idm429376808944">
            <foreName>Marin</foreName>
            <surname>Bertier</surname>
            <initial>M.</initial>
          </persName>
          <persName key="graal-2006-idm329937269808">
            <foreName>Eddy</foreName>
            <surname>Caron</surname>
            <initial>E.</initial>
          </persName>
          <persName key="graal-2006-idm329937294144">
            <foreName>Frédéric</foreName>
            <surname>Desprez</surname>
            <initial>F.</initial>
          </persName>
          <persName key="paris-2006-idm124332467968">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
          <persName key="paris-2006-idm124331446512">
            <foreName>Mathieu</foreName>
            <surname>Jan</surname>
            <initial>M.</initial>
          </persName>
          <persName key="paris-2006-idm124331438528">
            <foreName>Sébastien</foreName>
            <surname>Monnet</surname>
            <initial>S.</initial>
          </persName>
          <persName key="regal-2006-idm522926596176">
            <foreName>Pierre</foreName>
            <surname>Sens</surname>
            <initial>P.</initial>
          </persName>
        </author>
        <title level="a">GDS: An Architecture Proposal for a grid
        Data-Sharing Service</title>
      </analytic>
      <monogr>
        <title level="m">Future Generation Grids</title>
        <title level="s">CoreGRID series</title>
        <imprint>
          <publisher>
            <orgName>Springer</orgName>
          </publisher>
          <dateStruct>
            <year>2006</year>
          </dateStruct>
          <biblScope type="pages">133-152</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid14" type="article" rend="foot" n="footcite:AntBouJan05SCPE">
      <identifiant type="hal" value="inria-00000984"/>
      <analytic>
        <title level="a">JuxMem: An Adaptive Supportive Platform
        for Data Sharing on the Grid</title>
        <author>
          <persName key="paris-2006-idm124332495696">
            <foreName>Gabriel</foreName>
            <surname>Antoniu</surname>
            <initial>G.</initial>
          </persName>
          <persName key="paris-2006-idm124332467968">
            <foreName>Luc</foreName>
            <surname>Bougé</surname>
            <initial>L.</initial>
          </persName>
          <persName key="paris-2006-idm124331446512">
            <foreName>Mathieu</foreName>
            <surname>Jan</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Scalable Computing: Practice and
        Experience</title>
        <imprint>
          <biblScope type="volume">6</biblScope>
          <biblScope type="number">3</biblScope>
          <dateStruct>
            <month>November</month>
            <year>2005</year>
          </dateStruct>
          <biblScope type="pages">45–55</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hal.inria.fr/inria-00000984" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hal. 
          <allowbreak/>inria. 
          <allowbreak/>fr/ 
          <allowbreak/>inria-00000984</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid8" type="inproceedings" rend="foot" n="footcite:BasBecFagetal02IBP">
      <analytic>
        <title level="a">The Internet Backplane Protocol: A Study
        in Resource Sharing</title>
        <author>
          <persName>
            <foreName>Alessandro</foreName>
            <surname>Bassi</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Micah</foreName>
            <surname>Beck</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Graham</foreName>
            <surname>Fagg</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Terry</foreName>
            <surname>Moore</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>James S.</foreName>
            <surname>Plank</surname>
            <initial>J. S.</initial>
          </persName>
          <persName>
            <foreName>Martin</foreName>
            <surname>Swany</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Rich</foreName>
            <surname>Wolski</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Proc. 2nd IEEE/ACM Intl. Symp. on Cluster
        Computing and the Grid (CCGRID '02)</title>
        <loc>Washington, DC, USA</loc>
        <imprint>
          <publisher>
            <orgName>IEEE Computer Society</orgName>
          </publisher>
          <dateStruct>
            <year>2002</year>
          </dateStruct>
          <biblScope type="pages">194</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid9" type="inproceedings" rend="foot" n="footcite:BenVenLeRetal02">
      <analytic>
        <title level="a">Flexibility, Manageability, and
        Performance in a Grid Storage Appliance</title>
        <author>
          <persName>
            <foreName>John</foreName>
            <surname>Bent</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Venkateshwaran</foreName>
            <surname>Venkataramani</surname>
            <initial>V.</initial>
          </persName>
          <persName>
            <foreName>Nick</foreName>
            <surname>LeRoy</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>Alain</foreName>
            <surname>Roy</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Joseph</foreName>
            <surname>Stanley</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Andrea</foreName>
            <surname>Arpaci-Dusseau</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Remzi</foreName>
            <surname>Arpaci-Dusseau</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Miron</foreName>
            <surname>Livny</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Proc. 11th IEEE Symposium on High
        Performance Distributed Computing (HPDC 11)</title>
        <imprint>
          <dateStruct>
            <year>2002</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid29" type="inproceedings" rend="foot" n="footcite:BuyyaHPCC08">
      <identifiant type="doi" value="10.1109/HPCC.2008.172"/>
      <analytic>
        <title level="a">Market-Oriented Cloud Computing: Vision,
        Hype, and Reality for Delivering IT Services as Computing
        Utilities</title>
        <author>
          <persName>
            <foreName>Rajkumar</foreName>
            <surname>Buyya</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Chee S.</foreName>
            <surname>Yeo</surname>
            <initial>C. S.</initial>
          </persName>
          <persName>
            <foreName>Srikumar</foreName>
            <surname>Venugopal</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">HPCC '08: Proceedings of the 2008
        10th IEEE International Conference on High Performance
        Computing and Communications</title>
        <loc>Washington, DC, USA</loc>
        <imprint>
          <publisher>
            <orgName>IEEE Computer Society</orgName>
          </publisher>
          <dateStruct>
            <year>2008</year>
          </dateStruct>
          <biblScope type="pages">5–13</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://dx.doi.org/10.1109/HPCC.2008.172" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>dx. 
          <allowbreak/>doi. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1109/ 
          <allowbreak/>HPCC. 
          <allowbreak/>2008. 
          <allowbreak/>172</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid25" type="inproceedings" rend="foot" n="footcite:PVFS">
      <analytic>
        <title level="a">PVFS: A Parallel File System for Linux
        Clusters</title>
        <author>
          <persName>
            <foreName>Philip H.</foreName>
            <surname>Carns</surname>
            <initial>P. H.</initial>
          </persName>
          <persName>
            <foreName>Walter B.</foreName>
            <surname>Ligon</surname>
            <initial>W. B.</initial>
          </persName>
          <persName>
            <foreName>Robert B.</foreName>
            <surname>Ross</surname>
            <initial>R. B.</initial>
          </persName>
          <persName>
            <foreName>Rajeev</foreName>
            <surname>Thakur</surname>
            <initial>R.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">ALS '00: Proceedings of the 4th Annual
        Linux Showcase and Conference</title>
        <loc>Atlanta, GA, USA</loc>
        <imprint>
          <publisher>
            <orgName>USENIX Association</orgName>
          </publisher>
          <dateStruct>
            <year>2000</year>
          </dateStruct>
          <biblScope type="pages">317–327</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid1" type="inproceedings" rend="foot" n="footcite:casKur07Multimedia">
      <identifiant type="doi" value="10.1145/1291233.1291238"/>
      <analytic>
        <title level="a">Large data methods for multimedia</title>
        <author>
          <persName>
            <foreName>Michael A.</foreName>
            <surname>Casey</surname>
            <initial>M. A.</initial>
          </persName>
          <persName>
            <foreName>Frank</foreName>
            <surname>Kurth</surname>
            <initial>F.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Proc. 15th Intl. Conf. on Multimedia
        (Multimedia '07)</title>
        <loc>New York, NY, USA</loc>
        <imprint>
          <publisher>
            <orgName>ACM</orgName>
          </publisher>
          <dateStruct>
            <year>2007</year>
          </dateStruct>
          <biblScope type="pages">6–7</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://doi.acm.org/10.1145/1291233.1291238" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>doi. 
          <allowbreak/>acm. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1145/ 
          <allowbreak/>1291233. 
          <allowbreak/>1291238</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid43" type="article" rend="foot" n="footcite:BitTorrentAndCDNDesktopGrid">
      <identifiant type="doi" value="10.1142/S0129626408003466"/>
      <analytic>
        <title level="a">Optimizing data distribution in desktop
        grid platforms</title>
        <author>
          <persName>
            <foreName>Fernando</foreName>
            <surname>Costa</surname>
            <initial>F.</initial>
          </persName>
          <persName>
            <foreName>Luis</foreName>
            <surname>Silva</surname>
            <initial>L.</initial>
          </persName>
          <persName key="grand-large-2006-idm343610702240">
            <foreName>Gilles</foreName>
            <surname>Fedak</surname>
            <initial>G.</initial>
          </persName>
          <persName>
            <foreName>Ian</foreName>
            <surname>Kelley</surname>
            <initial>I.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Parallel Processing Letters (PPL)</title>
        <imprint>
          <biblScope type="volume">18</biblScope>
          <dateStruct>
            <year>2008</year>
          </dateStruct>
          <biblScope type="pages">391 - 410</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://dx.doi.org/10.1142/S0129626408003466" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>dx. 
          <allowbreak/>doi. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1142/ 
          <allowbreak/>S0129626408003466</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid17" type="article" rend="foot" n="footcite:mapreduce">
      <analytic>
        <title level="a">MapReduce: simplified data processing on
        large clusters</title>
        <author>
          <persName>
            <foreName>Jeffrey</foreName>
            <surname>Dean</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Sanjay</foreName>
            <surname>Ghemawat</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Communications of the ACM</title>
        <imprint>
          <biblScope type="volume">51</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <year>2008</year>
          </dateStruct>
          <biblScope type="pages">107–113</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid19" type="inproceedings" rend="foot" n="footcite:PVFS:OSDs">
      <identifiant type="doi" value="10.1145/1362622.1362659"/>
      <analytic>
        <title level="a">Integrating parallel file systems with
        object-based storage devices</title>
        <author>
          <persName>
            <foreName>Ananth</foreName>
            <surname>Devulapalli</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Dennis</foreName>
            <surname>Dalessandro</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>Pete</foreName>
            <surname>Wyckoff</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Nawab</foreName>
            <surname>Ali</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>P.</foreName>
            <surname>Sadayappan</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">SC '07: Proceedings of the 2007 ACM/IEEE
        conference on Supercomputing</title>
        <loc>New York, NY, USA</loc>
        <imprint>
          <publisher>
            <orgName>ACM</orgName>
          </publisher>
          <dateStruct>
            <year>2007</year>
          </dateStruct>
          <biblScope type="pages">1–10</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://dx.doi.org/10.1145/1362622.1362659" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>dx. 
          <allowbreak/>doi. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1145/ 
          <allowbreak/>1362622. 
          <allowbreak/>1362659</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid2" type="book" rend="foot" n="footcite:DouDou03PostgreSQL">
      <monogr>
        <title level="m">PostgreSQL</title>
        <author>
          <persName>
            <foreName>Korry</foreName>
            <surname>Douglas</surname>
            <initial>K.</initial>
          </persName>
          <persName>
            <foreName>Susan</foreName>
            <surname>Douglas</surname>
            <initial>S.</initial>
          </persName>
        </author>
        <imprint>
          <publisher>
            <orgName>New Riders Publishing 
            <address><addrLine>Thousand Oaks, CA, USA</addrLine></address></orgName>
          </publisher>
          <dateStruct>
            <year>2003</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid23" type="inproceedings" rend="foot" n="footcite:Object:future">
      <identifiant type="doi" value="10.1109/LGDI.2005.1612479"/>
      <analytic>
        <title level="a">Object storage: the future building block
        for storage systems</title>
        <author>
          <persName>
            <foreName>M.</foreName>
            <surname>Factor</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>K.</foreName>
            <surname>Meth</surname>
            <initial>K.</initial>
          </persName>
          <persName>
            <foreName>D.</foreName>
            <surname>Naor</surname>
            <initial>D.</initial>
          </persName>
          <persName>
            <foreName>O.</foreName>
            <surname>Rodeh</surname>
            <initial>O.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Satran</surname>
            <initial>J.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Local to Global Data Interoperability -
        Challenges and Technologies, 2005</title>
        <imprint>
          <dateStruct>
            <year>2005</year>
          </dateStruct>
          <biblScope type="pages">119–123</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://dx.doi.org/10.1109/LGDI.2005.1612479" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>dx. 
          <allowbreak/>doi. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1109/ 
          <allowbreak/>LGDI. 
          <allowbreak/>2005. 
          <allowbreak/>1612479</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid21" type="inproceedings" rend="foot" n="footcite:GoogleFS">
      <identifiant type="doi" value="10.1145/945445.945450"/>
      <analytic>
        <title level="a">The Google file system</title>
        <author>
          <persName>
            <foreName>Sanjay</foreName>
            <surname>Ghemawat</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Howard</foreName>
            <surname>Gobioff</surname>
            <initial>H.</initial>
          </persName>
          <persName>
            <foreName>Shun-Tak</foreName>
            <surname>Leung</surname>
            <initial>S.-T.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">SOSP '03: Proceedings of the nineteenth
        ACM symposium on Operating systems principles</title>
        <loc>New York, NY, USA</loc>
        <imprint>
          <publisher>
            <orgName>ACM Press</orgName>
          </publisher>
          <dateStruct>
            <year>2003</year>
          </dateStruct>
          <biblScope type="pages">29–43</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://dx.doi.org/10.1145/945445.945450" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>dx. 
          <allowbreak/>doi. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1145/ 
          <allowbreak/>945445. 
          <allowbreak/>945450</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid15" type="misc" rend="foot" n="footcite:Gri08Unstructured">
      <monogr>
        <title level="m">Unstructured Data and the 80 Percent
        Rule</title>
        <author>
          <persName>
            <foreName>Seth</foreName>
            <surname>Grimes</surname>
            <initial>S.</initial>
          </persName>
        </author>
        <imprint>
          <dateStruct>
            <year>2008</year>
          </dateStruct>
        </imprint>
      </monogr>
      <note type="howpublished">Carabridge Bridgepoints</note>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid11" type="inproceedings" rend="foot" n="footcite:HonAdaKee05GridNFS">
      <analytic>
        <title level="a">GridNFS: global storage for global
        collaborations</title>
        <author>
          <persName>
            <foreName>Peter</foreName>
            <surname>Honeyman</surname>
            <initial>P.</initial>
          </persName>
          <persName>
            <foreName>Wandros A.</foreName>
            <surname>Adamson</surname>
            <initial>W. A.</initial>
          </persName>
          <persName>
            <foreName>Shawn</foreName>
            <surname>McKee</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Proc. IEEE Intl. Symp. Global Data
        Interoperability - Challenges and Technologies</title>
        <loc>Sardinia, Italy</loc>
        <imprint>
          <publisher>
            <orgName>IEEE Computer Society</orgName>
          </publisher>
          <dateStruct>
            <month>June</month>
            <year>2005</year>
          </dateStruct>
          <biblScope type="pages">111–115</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid42" type="article" rend="foot" n="footcite:IbrAnt2006">
      <identifiant type="doi" value="10.1109/DEXA.2006.57"/>
      <analytic>
        <title level="a">Exploring Adaptation &amp; Self-Adaptation
        in Autonomic Computing Systems</title>
        <author>
          <persName>
            <foreName>M.T.</foreName>
            <surname>Ibrahim</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>R.J.</foreName>
            <surname>Anthony</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>T.</foreName>
            <surname>Eymann</surname>
            <initial>T.</initial>
          </persName>
          <persName>
            <foreName>A.</foreName>
            <surname>Taleb-Bendiab</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>L.</foreName>
            <surname>Gruenwald</surname>
            <initial>L.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Database and Expert Systems Applications,
        International Workshop on</title>
        <imprint>
          <biblScope type="volume">0</biblScope>
          <dateStruct>
            <year>2006</year>
          </dateStruct>
          <biblScope type="pages">129-138</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://doi.ieeecomputersociety.org/10.1109/DEXA.2006.57" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>doi. 
          <allowbreak/>ieeecomputersociety. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1109/ 
          <allowbreak/>DEXA. 
          <allowbreak/>2006. 
          <allowbreak/>57</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid0" type="article" rend="foot" n="footcite:JinYan05DataMining">
      <identifiant type="doi" value="10.1109/TKDE.2005.18"/>
      <analytic>
        <title level="a">Shared Memory Parallelization of Data
        Mining Algorithms: Techniques, Programming Interface, and
        Performance</title>
        <author>
          <persName>
            <foreName>Ruoming</foreName>
            <surname>Jin</surname>
            <initial>R.</initial>
          </persName>
          <persName key="rccm-2008-idm328620253008">
            <foreName>Ge</foreName>
            <surname>Yang</surname>
            <initial>G.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">IEEE Trans. on Knowl. and Data
        Eng.</title>
        <imprint>
          <biblScope type="volume">17</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <year>2005</year>
          </dateStruct>
          <biblScope type="pages">71–89</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://dx.doi.org/10.1109/TKDE.2005.18" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>dx. 
          <allowbreak/>doi. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1109/ 
          <allowbreak/>TKDE. 
          <allowbreak/>2005. 
          <allowbreak/>18</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid35" type="inproceedings" rend="foot" n="footcite:KeaFre2008CCA">
      <analytic>
        <title level="a">Science Clouds: Early Experiences in Cloud
        Computing for Scientific Applications</title>
        <author>
          <persName>
            <foreName>Kate</foreName>
            <surname>Keahey</surname>
            <initial>K.</initial>
          </persName>
          <persName>
            <foreName>Tim</foreName>
            <surname>Freeman</surname>
            <initial>T.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Cloud Computing and Its Applications 2008
        (CCA-08)</title>
        <loc>Chicago, IL</loc>
        <imprint>
          <dateStruct>
            <year>2008</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid40" type="article" rend="foot" n="footcite:KephJeffChess2003">
      <identifiant type="doi" value="10.1109/MC.2003.1160055"/>
      <analytic>
        <title level="a">The Vision of Autonomic Computing</title>
        <author>
          <persName>
            <foreName>Jeffrey O.</foreName>
            <surname>Kephart</surname>
            <initial>J. O.</initial>
          </persName>
          <persName>
            <foreName>David M.</foreName>
            <surname>Chess</surname>
            <initial>D. M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Computer</title>
        <imprint>
          <biblScope type="volume">36</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <year>2003</year>
          </dateStruct>
          <biblScope type="pages">41–50</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://dx.doi.org/10.1109/MC.2003.1160055" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>dx. 
          <allowbreak/>doi. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1109/ 
          <allowbreak/>MC. 
          <allowbreak/>2003. 
          <allowbreak/>1160055</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid28" type="inproceedings" rend="foot" n="footcite:LenKleNimTaiSan2009CLOUD">
      <analytic>
        <title level="a">What's inside the Cloud? An architectural
        map of the Cloud landscape</title>
        <author>
          <persName>
            <foreName>A.</foreName>
            <surname>Lenk</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>M.</foreName>
            <surname>Klems</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>J.</foreName>
            <surname>Nimis</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>S.</foreName>
            <surname>Tai</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>T.</foreName>
            <surname>Sandholm</surname>
            <initial>T.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Software Engineering Challenges of Cloud
        Computing (CLOUD '09)</title>
        <imprint>
          <dateStruct>
            <year>2009</year>
          </dateStruct>
          <biblScope type="pages">23 - 31</biblScope>
        </imprint>
      </monogr>
      <note type="bnote">ICSE Workshop</note>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid22" type="article" rend="foot" n="footcite:OSD">
      <identifiant type="doi" value="10.1109/MCOM.2003.1222722"/>
      <analytic>
        <title level="a">Object-based storage</title>
        <author>
          <persName>
            <foreName>M.</foreName>
            <surname>Mesnier</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>G. R.</foreName>
            <surname>Ganger</surname>
            <initial>G. R.</initial>
          </persName>
          <persName>
            <foreName>E.</foreName>
            <surname>Riedel</surname>
            <initial>E.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Communications Magazine, IEEE</title>
        <imprint>
          <biblScope type="volume">41</biblScope>
          <biblScope type="number">8</biblScope>
          <dateStruct>
            <year>2003</year>
          </dateStruct>
          <biblScope type="pages">84–90</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://dx.doi.org/10.1109/MCOM.2003.1222722" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>dx. 
          <allowbreak/>doi. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1109/ 
          <allowbreak/>MCOM. 
          <allowbreak/>2003. 
          <allowbreak/>1222722</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid39" type="article" rend="foot" n="footcite:MorGalJegRit09">
      <analytic>
        <title level="a">Clouds: a new playground for the XtreemOS
        Grid operating system</title>
        <author>
          <persName key="paris-2006-idm124332487728">
            <foreName>Christine</foreName>
            <surname>Morin</surname>
            <initial>C.</initial>
          </persName>
          <persName key="paris-2007-idm243644550464">
            <foreName>Jérôme</foreName>
            <surname>Gallard</surname>
            <initial>J.</initial>
          </persName>
          <persName key="paris-2006-idm124332493040">
            <foreName>Yvon</foreName>
            <surname>Jégou</surname>
            <initial>Y.</initial>
          </persName>
          <persName key="paris-2008-idm188504283808">
            <foreName>Pierre</foreName>
            <surname>Riteau</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">Parallel Processing Letters</title>
        <imprint>
          <biblScope type="volume">19</biblScope>
          <biblScope type="number">3</biblScope>
          <dateStruct>
            <year>2009</year>
          </dateStruct>
          <biblScope type="pages">435-449</biblScope>
        </imprint>
      </monogr>
      <note type="bnote">To appear</note>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid37" type="inproceedings" rend="foot" n="footcite:Mor2007ISORC">
      <analytic>
        <title level="a">XtreemOS: a Grid Operating System Making
        your Computer Ready for Participating in Virtual
        Organizations</title>
        <author>
          <persName key="paris-2006-idm124332487728">
            <foreName>Christine</foreName>
            <surname>Morin</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">IEEE International Symposium on
        Object/component/service-oriented Real-time distributed
        Computing (ISORC)</title>
        <loc>Santorini Island, Greece</loc>
        <imprint>
          <dateStruct>
            <year>2007</year>
          </dateStruct>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid4" type="article" rend="foot" n="footcite:NicJar00IEEE">
      <identifiant type="doi" value="10.1109/69.868912"/>
      <analytic>
        <title level="a">Performance Modeling of Distributed and
        Replicated Databases</title>
        <author>
          <persName>
            <foreName>Matthias</foreName>
            <surname>Nicola</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Matthias</foreName>
            <surname>Jarke</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">IEEE Trans. on Knowl. and Data
        Eng.</title>
        <imprint>
          <biblScope type="volume">12</biblScope>
          <biblScope type="number">4</biblScope>
          <dateStruct>
            <year>2000</year>
          </dateStruct>
          <biblScope type="pages">645–672</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://dx.doi.org/10.1109/69.868912" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>dx. 
          <allowbreak/>doi. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1109/ 
          <allowbreak/>69. 
          <allowbreak/>868912</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid18" type="inproceedings" rend="foot" n="footcite:piglatin">
      <identifiant type="doi" value="10.1145/1376616.1376726"/>
      <analytic>
        <title level="a">Pig latin: a not-so-foreign language for
        data processing</title>
        <author>
          <persName>
            <foreName>Christopher</foreName>
            <surname>Olston</surname>
            <initial>C.</initial>
          </persName>
          <persName>
            <foreName>Benjamin</foreName>
            <surname>Reed</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>Utkarsh</foreName>
            <surname>Srivastava</surname>
            <initial>U.</initial>
          </persName>
          <persName>
            <foreName>Ravi</foreName>
            <surname>Kumar</surname>
            <initial>R.</initial>
          </persName>
          <persName>
            <foreName>Andrew</foreName>
            <surname>Tomkins</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">SIGMOD '08: Proceedings of the 2008 ACM
        SIGMOD international conference on Management of
        data</title>
        <loc>New York, NY, USA</loc>
        <imprint>
          <publisher>
            <orgName>ACM</orgName>
          </publisher>
          <dateStruct>
            <year>2008</year>
          </dateStruct>
          <biblScope type="pages">1099–1110</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://doi.acm.org/10.1145/1376616.1376726" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>doi. 
          <allowbreak/>acm. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1145/ 
          <allowbreak/>1376616. 
          <allowbreak/>1376726</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid41" type="inproceedings" rend="foot" n="footcite:Parashar05autonomiccomputing">
      <analytic>
        <title level="a">Autonomic computing: An overview</title>
        <author>
          <persName>
            <foreName>Manish</foreName>
            <surname>Parashar</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Salim</foreName>
            <surname>Hariri</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Unconventional Programming
        Paradigms</title>
        <imprint>
          <publisher>
            <orgName>Springer Verlag</orgName>
          </publisher>
          <dateStruct>
            <year>2005</year>
          </dateStruct>
          <biblScope type="pages">247–259</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid16" type="inproceedings" rend="foot" n="footcite:RagJinMokDebDu07">
      <identifiant type="doi" value="10.1145/1321440.1321583"/>
      <analytic>
        <title level="a">Towards efficient search on unstructured
        data: an intelligent-storage approach</title>
        <author>
          <persName>
            <foreName>Aravindan</foreName>
            <surname>Raghuveer</surname>
            <initial>A.</initial>
          </persName>
          <persName>
            <foreName>Meera</foreName>
            <surname>Jindal</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Mohamed F.</foreName>
            <surname>Mokbel</surname>
            <initial>M. F.</initial>
          </persName>
          <persName>
            <foreName>Biplob</foreName>
            <surname>Debnath</surname>
            <initial>B.</initial>
          </persName>
          <persName>
            <foreName>David</foreName>
            <surname>Du</surname>
            <initial>D.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">CIKM '07: Proceedings of the sixteenth ACM
        conference on Conference on information and knowledge
        management</title>
        <loc>New York, NY, USA</loc>
        <imprint>
          <publisher>
            <orgName>ACM</orgName>
          </publisher>
          <dateStruct>
            <year>2007</year>
          </dateStruct>
          <biblScope type="pages">951–954</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://doi.acm.org/10.1145/1321440.1321583" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>doi. 
          <allowbreak/>acm. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1145/ 
          <allowbreak/>1321440. 
          <allowbreak/>1321583</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid24" type="inproceedings" rend="foot" n="footcite:Lustre:1000">
      <analytic>
        <title level="a">Lustre: Building a file system for
        1000-node clusters</title>
        <author>
          <persName>
            <foreName>P.</foreName>
            <surname>Schwan</surname>
            <initial>P.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Proceedings of the Linux Symposium</title>
        <imprint>
          <dateStruct>
            <year>2003</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.kernel.org/doc/ols/2003/ols2003-pages-380-386.pdf" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>www. 
          <allowbreak/>kernel. 
          <allowbreak/>org/ 
          <allowbreak/>doc/ 
          <allowbreak/>ols/ 
          <allowbreak/>2003/ 
          <allowbreak/>ols2003-pages-380-386. 
          <allowbreak/>pdf</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid10" type="inproceedings" rend="foot" n="footcite:TatMorMatetal02Gfarm">
      <analytic>
        <title level="a">Grid Datafarm Architecture for Petascale
        Data Intensive Computing</title>
        <author>
          <persName>
            <foreName>Osamu</foreName>
            <surname>Tatebe</surname>
            <initial>O.</initial>
          </persName>
          <persName>
            <foreName>Youhei</foreName>
            <surname>Morita</surname>
            <initial>Y.</initial>
          </persName>
          <persName>
            <foreName>Satoshi</foreName>
            <surname>Matsuoka</surname>
            <initial>S.</initial>
          </persName>
          <persName>
            <foreName>Noriyuki</foreName>
            <surname>Soda</surname>
            <initial>N.</initial>
          </persName>
          <persName>
            <foreName>Satoshi</foreName>
            <surname>Sekiguchi</surname>
            <initial>S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Proc. 2nd IEEE/ACM Intl. Symp. on Cluster
        Computing and the Grid (Cluster 2002)</title>
        <loc>Washington DC, USA</loc>
        <imprint>
          <publisher>
            <orgName>IEEE Computer Society</orgName>
          </publisher>
          <dateStruct>
            <year>2002</year>
          </dateStruct>
          <biblScope type="pages">102</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid3" type="article" rend="foot" n="footcite:Tho98Concurrency">
      <identifiant type="doi" value="10.1145/274440.274443"/>
      <analytic>
        <title level="a">Concurrency control: methods, performance,
        and analysis</title>
        <author>
          <persName>
            <foreName>Alexander</foreName>
            <surname>Thomasian</surname>
            <initial>A.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">ACM Computing Survey</title>
        <imprint>
          <biblScope type="volume">30</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <year>1998</year>
          </dateStruct>
          <biblScope type="pages">70–119</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://doi.acm.org/10.1145/274440.274443" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>doi. 
          <allowbreak/>acm. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1145/ 
          <allowbreak/>274440. 
          <allowbreak/>274443</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid27" type="article" rend="foot" n="footcite:VaqueroCompComm09">
      <identifiant type="doi" value="10.1145/1496091.1496100"/>
      <analytic>
        <title level="a">A break in the clouds: towards a cloud
        definition</title>
        <author>
          <persName>
            <foreName>Luis M.</foreName>
            <surname>Vaquero</surname>
            <initial>L. M.</initial>
          </persName>
          <persName key="graal-2010-idm486711874240">
            <foreName>Luis</foreName>
            <surname>Rodero-Merino</surname>
            <initial>L.</initial>
          </persName>
          <persName>
            <foreName>Juan</foreName>
            <surname>Caceres</surname>
            <initial>J.</initial>
          </persName>
          <persName>
            <foreName>Maik</foreName>
            <surname>Lindner</surname>
            <initial>M.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="j">SIGCOMM Comput. Commun. Rev.</title>
        <imprint>
          <biblScope type="volume">39</biblScope>
          <biblScope type="number">1</biblScope>
          <dateStruct>
            <year>2009</year>
          </dateStruct>
          <biblScope type="pages">50–55</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://doi.acm.org/10.1145/1496091.1496100" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>doi. 
          <allowbreak/>acm. 
          <allowbreak/>org/ 
          <allowbreak/>10. 
          <allowbreak/>1145/ 
          <allowbreak/>1496091. 
          <allowbreak/>1496100</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid20" type="inproceedings" rend="foot" n="footcite:ceph">
      <analytic>
        <title level="a">Ceph: a scalable, high-performance
        distributed file system</title>
        <author>
          <persName>
            <foreName>Sage A.</foreName>
            <surname>Weil</surname>
            <initial>S. A.</initial>
          </persName>
          <persName>
            <foreName>Scott A.</foreName>
            <surname>Brandt</surname>
            <initial>S. A.</initial>
          </persName>
          <persName>
            <foreName>Ethan L.</foreName>
            <surname>Miller</surname>
            <initial>E. L.</initial>
          </persName>
          <persName>
            <foreName>Darrell D. E.</foreName>
            <surname>Long</surname>
            <initial>D. D. E.</initial>
          </persName>
          <persName>
            <foreName>Carlos</foreName>
            <surname>Maltzahn</surname>
            <initial>C.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">OSDI '06: Proceedings of the 7th symposium
        on Operating systems design and implementation</title>
        <loc>Berkeley, CA, USA</loc>
        <imprint>
          <publisher>
            <orgName>USENIX Association</orgName>
          </publisher>
          <dateStruct>
            <year>2006</year>
          </dateStruct>
          <biblScope type="pages">307–320</biblScope>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://portal.acm.org/citation.cfm?id=1298455.1298485" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>portal. 
          <allowbreak/>acm. 
          <allowbreak/>org/ 
          <allowbreak/>citation. 
          <allowbreak/>cfm?id=1298455. 
          <allowbreak/>1298485</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid12" type="inproceedings" rend="foot" n="footcite:WhiWalHumGri01LegionFS">
      <analytic>
        <title level="a">LegionFS: a secure and scalable file
        system supporting cross-domain high-performance
        applications</title>
        <author>
          <persName>
            <foreName>Brian S.</foreName>
            <surname>White</surname>
            <initial>B. S.</initial>
          </persName>
          <persName key="virtual_plants-2009-idm428595684256">
            <foreName>Michael</foreName>
            <surname>Walker</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Marty</foreName>
            <surname>Humphrey</surname>
            <initial>M.</initial>
          </persName>
          <persName>
            <foreName>Andrew S.</foreName>
            <surname>Grimshaw</surname>
            <initial>A. S.</initial>
          </persName>
        </author>
      </analytic>
      <monogr>
        <title level="m">Proc. 2001 ACM/IEEE Conf. on
        Supercomputing (SC '01)</title>
        <loc>New York, NY, USA</loc>
        <imprint>
          <publisher>
            <orgName>ACM Press</orgName>
          </publisher>
          <dateStruct>
            <year>2001</year>
          </dateStruct>
          <biblScope type="pages">59–59</biblScope>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid7" type="misc" rend="foot" n="footcite:Chirp">
      <monogr>
        <title level="m">Chirp protocol specification</title>
        <imprint>
          <dateStruct>
            <year>2009</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.cs.wisc.edu/condor/chirp/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>www. 
          <allowbreak/>cs. 
          <allowbreak/>wisc. 
          <allowbreak/>edu/ 
          <allowbreak/>condor/ 
          <allowbreak/>chirp/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid6" type="misc" rend="foot" n="footcite:ldr">
      <monogr>
        <title level="m">Lightweight Data Replicator</title>
        <imprint>
          <dateStruct>
            <year>2009</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.lsc-group.phys.uwm.edu/LDR/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>www. 
          <allowbreak/>lsc-group. 
          <allowbreak/>phys. 
          <allowbreak/>uwm. 
          <allowbreak/>edu/ 
          <allowbreak/>LDR/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid30" type="misc" rend="foot" n="footcite:GoogleAppWeb">
      <monogr>
        <title level="m">Google App Engine</title>
        <imprint>
          <dateStruct>
            <year>2009</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://code.google.com/appengine/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>code. 
          <allowbreak/>google. 
          <allowbreak/>com/ 
          <allowbreak/>appengine/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid32" type="misc" rend="foot" n="footcite:GoogleDocsWeb">
      <monogr>
        <title level="m">Google Docs</title>
        <imprint>
          <dateStruct>
            <year>2009</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.google.com/google-d-s/tour1.html" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>www. 
          <allowbreak/>google. 
          <allowbreak/>com/ 
          <allowbreak/>google-d-s/ 
          <allowbreak/>tour1. 
          <allowbreak/>html</ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid26" type="misc" rend="foot" n="footcite:HadoopFSWeb">
      <monogr>
        <title level="m">HadoopFS</title>
        <imprint>
          <dateStruct>
            <year>2009</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://hadoop.apache.org/hdfs/docs/current/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>hadoop. 
          <allowbreak/>apache. 
          <allowbreak/>org/ 
          <allowbreak/>hdfs/ 
          <allowbreak/>docs/ 
          <allowbreak/>current/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid31" type="misc" rend="foot" n="footcite:MSAzureWeb">
      <monogr>
        <title level="m">Microsoft Azure</title>
        <imprint>
          <dateStruct>
            <year>2009</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.microsoft.com/azure/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>www. 
          <allowbreak/>microsoft. 
          <allowbreak/>com/ 
          <allowbreak/>azure/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid33" type="misc" rend="foot" n="footcite:MSOfficeLiveWeb">
      <monogr>
        <title level="m">Microsoft Office Live</title>
        <imprint>
          <dateStruct>
            <year>2009</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.officelive.com/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>www. 
          <allowbreak/>officelive. 
          <allowbreak/>com/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid34" type="misc" rend="foot" n="footcite:NimbusWeb">
      <monogr>
        <title level="m">The Nimbus project</title>
        <imprint>
          <dateStruct>
            <year>2009</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://workspace.globus.org/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>workspace. 
          <allowbreak/>globus. 
          <allowbreak/>org/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid36" type="misc" rend="foot" n="footcite:OpenNebulaWeb">
      <monogr>
        <title level="m">OpenNebula</title>
        <imprint>
          <dateStruct>
            <year>2010</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.opennebula.org/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>www. 
          <allowbreak/>opennebula. 
          <allowbreak/>org/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
    </biblStruct>
    <biblStruct id="kerdata-2010-bid38" type="misc" rend="foot" n="footcite:XtreemOSWeb">
      <monogr>
        <title level="m">The XtreemOS project</title>
        <imprint>
          <dateStruct>
            <year>2009</year>
          </dateStruct>
          <ref xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.xtreemos.eu/" location="extern" xlink:type="simple" xlink:show="replace" xlink:actuate="onRequest">http:// 
          <allowbreak/>www. 
          <allowbreak/>xtreemos. 
          <allowbreak/>eu/ 
          <allowbreak/></ref>
        </imprint>
      </monogr>
    </biblStruct>
  </biblio>
</raweb>
