<?xml version="1.0" encoding="UTF-8"?>
<raweb xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:html="http://www.w3.org/1999/xhtml" xml:lang="en" year="2004" id="id2614750"><identification id="prima" isproject="true"><shortname id="id2614730">PRIMA</shortname><projectName id="id2614738">Perception, recognition and integration for interactive environments</projectName><theme id="id2588555">3A</theme><team id="uid1"><participants id="id2614790" category="Head_of_the_team"><person key="prima-2005-id2245512"><firstname id="id2614799">James L.</firstname><lastname id="id2614803">Crowley</lastname><moreinfo id="id2614807">Professor INPG</moreinfo></person></participants><participants id="id2614814" category="Professors"><person key="prima-2005-id2245482"><firstname id="id2614822">Augustin</firstname><lastname id="id2614826">Lux</lastname><moreinfo id="id2614830">Professor INPG</moreinfo></person><person key="prima-2005-id2245460"><firstname id="id2640092">Patrick</firstname><lastname id="id2640096">Reignier</lastname><moreinfo id="id2640100">Assistant professor UJF</moreinfo></person></participants><participants id="id2640107" category="Team_assistant"><person key="prima-2005-id2245407"><firstname id="id2640115">Natacha</firstname><lastname id="id2640120">Laugier</lastname></person></participants><participants id="id2640126" category="Expert_Engineers"><person key="prima-2005-id2245372"><firstname id="id2640135">Daniela</firstname><lastname id="id2640139">Hall</lastname></person><person key="prima-2005-id2244745"><firstname id="id2640147">Alban</firstname><lastname id="id2640152">Caporossi</lastname></person><person key="prima-2005-id2244725"><firstname id="id2640160">Alba</firstname><lastname id="id2640164">Ferrer-Biosca</lastname></person><person key="prima-2005-id2244735"><firstname id="id2640172">Sebastien</firstname><lastname id="id2640176">Pesnel</lastname></person><person key="prima-2005-id2244814"><firstname id="id2640184">Jean-Marie</firstname><lastname id="id2640189">Vallet</lastname></person></participants><participants id="id2640196" category="Post-doctoral_Researchers"><person key="prima-2005-id2245437"><firstname id="id2640205">Dominique</firstname><lastname id="id2640209">Vaufreydaz</lastname></person></participants><participants id="id2640216" category="Doctoral_Researchers"><person key="prima-2005-id2244828"><firstname id="id2640224">Stanislas</firstname><lastname id="id2640229">Borkowski</lastname><moreinfo id="id2640233">Bourse EGIDE</moreinfo></person><person key="prima-2005-id2244842"><firstname id="id2640241">Stephane</firstname><lastname id="id2640245">Guy</lastname><moreinfo id="id2640250">Bourse INRIA</moreinfo></person><person key="prima-2005-id2244855"><firstname id="id2640258">Suphot</firstname><lastname id="id2640262">Chunwiphat</lastname><moreinfo id="id2640266">Bourse gouvernement thailandais</moreinfo></person><person key="prima-2005-id2244868"><firstname id="id2640274">Matthieu</firstname><lastname id="id2640278">Anne</lastname><moreinfo id="id2640283">Bourse CIFRE - France Telecom</moreinfo></person><person key="prima-2005-id2244580"><firstname id="id2640291">Thi-Thanh-Hai</firstname><lastname id="id2640296">Tran</lastname><moreinfo id="id2640300">Bourse EGIDE</moreinfo></person><person key="prima-2005-id2244896"><firstname id="id2640308">Olivier</firstname><lastname id="id2640312">Bertrand</lastname><moreinfo id="id2640317">Solde de Normalien</moreinfo></person><person key="prima-2005-id2244909"><firstname id="id2640325">Nicolas</firstname><lastname id="id2640329">Gourier</lastname><moreinfo id="id2640333">Bourse INRIA</moreinfo></person><person key="prima-2005-id2244923"><firstname id="id2640342">Julien</firstname><lastname id="id2640346">Letessier</lastname><moreinfo id="id2640350">Bourse INRIA</moreinfo></person><person key="prima-2005-id2244937"><firstname id="id2640359">Jerome</firstname><lastname id="id2640363">Maisonnasse</lastname><moreinfo id="id2640367">INPG SA - Contrat France Telecom</moreinfo></person><person key="prima-2005-id2244950"><firstname id="id2640376">Oliver</firstname><lastname id="id2640380">Brdiczka</lastname><moreinfo id="id2640384">Bourse INRIA</moreinfo></person></participants></team><UR id="id2640392" name="Grenoble"/></identification><presentation id="uid3"><bodyTitle id="id2640404">Overall Objectives</bodyTitle><subsection id="uid4"><bodyTitle id="id2640413">Perception, Recognition and Integration for Interactive Environments. </bodyTitle><keyword id="id2640417">Interactive Environments</keyword><keyword id="id2640420">Computer Vision</keyword><keyword id="id2640422">Machine Perception</keyword><keyword id="id2640425">Man-Machine Interaction</keyword><keyword id="id2640428">Perceptual User Interfaces</keyword><p id="id2640433">The objective of Project PRIMA is to develop a scientific and
technological foundation for interactive environments.
An
environment is said to be "interactive" when it is capable of
perceiving, acting, and communicating with its occupants.
The
construction of such environments offers a rich set of problems
related to interpretation of sensor information, learning, machine
understanding and man-machine interaction.
Our goal is make progress
on a theoretical foundation for cognitive or "aware" systems by using
interactive environments as a source of
example problems, as well as to develop new forms of man machine interaction.</p><p id="id2640436">An environment is a connected volume of space. An environment is said
to be "perceptive" when
it is capable of recognizing and describing things, people and
activities within its volume.
Simple forms of applications-specific
perception may be constructed using a single sensor.
However, to be
general purpose and robust, perception must integrate information
from multiple sensors and multiple modalities.
Project PRIMA
develops and employs machine perception techniques using
acoustics, speech, computer vision and mechanical sensors.</p><p id="id2640489">An environment is said to be "active" when it is capable of changing
its internal state.
Trivial forms of state change include regulating
ambient temperature and illumination.
Automatic presentation of
information and communication constitutes a challenging new form of
"action" with many applications.
The use of multiple display
surfaces coupled with location awareness of
occupants offers the possibility of automatically adapting
presentation to fit the current
activity of groups. The use of activity recognition and acoustic
topic spotting offers the possibility to provide
relevant information without disruption. The use of steerable video
projectors (with integrated
visual sensing) offers the possibilities of using any surface as for
presentation and interaction with information.</p><p id="id2640536">An environment may be considered as "interactive" when it is capable
responding to humans using tightly coupled perception and action.</p><p id="id2640546">Simple forms of interaction may be based on sensing grasping and
manipulation of sensor-enabled devices, or on visual sensing of
fingers or objects
placed into projected interaction widgets.
Richer forms of
interaction require perceiving and modeling of the current task of
users.
PRIMA explores multiple forms of interaction,
including projected interaction widgets, observation of manipulation
of objects, fusion of acoustic and visual information, and
federations of systems that model interaction context in order to
predict appropriate action by the environment.</p><p id="id2640561">For the design and integration of systems for perception of humans
and their actions, PRIMA has
developed:</p><simplelist id="id2640567"><li id="uid5"><p id="id2640576">A new approach to computer vision based on local appearance,</p></li><li id="uid6"><p id="id2640588">A software architecture model for reactive control of
multi-modal vision systems.</p></li><li id="uid7"><p id="id2640600">A conceptual framework and theoretical foundation for context
aware perception.</p></li></simplelist><p id="id2640606">The experiments in project PRIMA are oriented towards perception of
human activity. The project is particularly concerned with modeling
the interaction between communicating individuals in order to provide
video-conferencing and information services.
Application domains include context aware video communications, new
forms of man-machine interaction, visual surveillance, and new forms
of information services and entertainment.</p></subsection></presentation><fondements id="uid8"><bodyTitle id="id2640624">Scientific Foundations</bodyTitle><subsection id="uid9"><bodyTitle id="id2640634">Context Aware Observation of Activity</bodyTitle><keyword id="id2640636">Context Modeling</keyword><keyword id="id2640639">Context Aware Systems</keyword><keyword id="id2640642">Observation of Human Activity</keyword><p id="id2640646">Human activity is extremely complex. Current technology allows us to
handcraft real-time perception systems for a specific perceptual
task.
However, such an approach is inadequate for building systems
that accommodate the variety of activities that is typical of human
environments.
To respond to this need, we have defined a conceptual
framework for context aware observation of human activity.
This framework and methods are used to construct systems for
observation of human activity in the PRIMA "Augmented Meeting
Environment".
Within this framework, contexts are modeled as a
network of situations.
Situation networks are interpreted as a
specification of a
federation of processes for observing the entities and relations that
define a situation.
In this section we review conceptual foundations
for such systems. In the following section we describe a
process-based software architecture for building systems for
observing activity based on this framework.</p><p id="id2640669">In the models of activity developed in project PRIMA, human activity
is represented as a network of situations.
A situation is defined as a configuration of relations computed over
observed entities.
Relations are predicate functions evaluated over the properties of
one or more entities.
Changes in relations trigger events that signal a change in situation.
The entities and relations that define situations are detected and
observed by perceptual processes.
A federation of processes is composed and coordinated by a federation
controller (a "Federator") in order to predict and observe the
situations that describe an activity, and to perform the appropriate
actions.</p><p id="id2640694">The concept of role is an important (but subtle) tool for simplifying
the network of situations.
It is common to discover a collection of
situations that have the same configuration of relations, but where
the identity of one or more entities is varied. A role is an abstract
class of person or object that combines a discriminative recognition
test with symbolic description of functionality.
Within a situation
model, a role serves as a variable for the
entities to which the relations are applied, thus allowing an
equivalent set of situations to have the same representation.
A role
is played by an entity that can pass an acceptance test for the role.</p><p id="id2640710">In that case, it is said that the entity can play or adopt the role
for that situation.
In our framework, the relations that define a
situation are defined with respect to roles, and applied to entities
that pass the test for the relevant roles.</p><p id="id2640719">Entities are assigned to roles by role assignment processes.
A change in the assignment of an entity to a role does not change the
situation, unless the result is a change in a relations that define
the situation.
The context model specifies which roles are to be assigned and
launches the necessary role assignment processes.
A process federation is formed to detect and observe entities that
can play roles as well as the relation between entities.
We have found that this architecture provides a foundation for the
design of systems that act as a silent partner to assist humans in
their activities in order to provide appropriate services without
explicit commands and
configuration.</p><object id="uid10"><table id="id2640744"><tr id="id2640745"><td id="id2640747"><ressource aux="image_1.png" xylemeAttach="1" id="id2640752" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="Process-Federation" type="float" width="13cm" xyref="1822861877011" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2640773">A process supervisor launches and configures a network of
perceptual processes</caption></object></subsection><subsection id="uid11"><bodyTitle id="id2640783">A Process Architecture for Observation of Human Activity</bodyTitle><keyword id="id2640786">Process Architectures</keyword><keyword id="id2640789">Autonomic Systems</keyword><keyword id="id2640792">Reflexive Systems</keyword><keyword id="id2640795">Computer
Vision Systems</keyword><p id="id2640799">The PRIMA project has developed a data-flow architecture based on
dynamically assembled
federations <ref id="id2640806" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid0" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/><ref id="id2640822" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid1" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>. This model builds
on previous work on process-based architectures for
machine perception and computer vision <ref id="id2640841" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid2" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/><ref id="id2640857" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid3" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>,
as well as on data flow models for software
architecture <ref id="id2640875" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid4" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
Processes are launched and configured to observe the entities and
relations that define situations. This approach provides an
architecture in which reflexive elements are dynamically composed to
form federations of processes
for observing and predicting the situations that make up a context.</p><p id="id2640899">As context changes, the federation is restructured.
Restructuring
the federation enables the system to adapt to a range of
environmental conditions and to provide services that are appropriate
over a range of activities.</p><subsection id="uid12"><bodyTitle id="id2640912">Modules and Processes</bodyTitle><p id="id2640916">Perceptual processes are composed from a collection of modules
controlled by a process supervisor as shown in
figure <ref id="id2640925" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid13" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
Processes operate in a synchronous manner within a shared address
space. Such models are related to "work-flow" models increasingly
used in modeling human
organizations. Process models have been adapted for real time
computer vision systems in the
ESPRIT BRA project "Vision as
Process" <ref id="id2640956" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid5" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/><ref id="id2640972" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid2" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>. Such models permit the
dynamic
composition of software "federations" in response to events in the
scene <ref id="id2640992" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid1" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/><ref id="id2641008" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid6" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.</p><p id="id2641026">In our experimental system, the process supervisor is implemented as
a multi-language interpreter <ref id="id2641031" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid7" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> equipped with a dynamic
loader for precompiled libraries. This interpreter allows a processes
to
receive and interpret messages containing scripts, to add new
functions to a process during
execution.
Inter-process communication is provided by a software bus based on
the JORAM middleware from ObjectWeb <ref id="id2641054" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid8" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>. This allows us
to compose federations of processes distributed on multiple computers.</p><p id="id2590597">The modules that compose a process are formally defined as
transformations applied to a certain class of data or event.
Modules
are executed in cyclic manner by the supervisor according to a
process schedule.
We impose that transformations return an
auto-critical report that describes the
results of their execution. Examples of information contained in an
auto-critical report include elapsed execution time, confidence in
the result, and any exceptions that were encountered.
The
auto-critical report enables a supervisory controller to adapt
parameters for the next call in order to maintain a execution cycle
time, or other quality of service.</p><object id="uid13"><table id="id2590616"><tr id="id2590618"><td id="id2590620"><ressource aux="image_2.png" xylemeAttach="2" id="id2590624" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="Process-model" type="float" width="9cm" xyref="986417682007" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2590645">A perceptual process is composed of a set of modules
controlled by a supervisor. Processes transform data streams as well
as generate and respond to events.</caption></object></subsection><subsection id="uid14"><bodyTitle id="id2590656">Reflexive Process Supervision</bodyTitle><p id="id2590659">The supervisory component of a process provides four fundamental
functions: command interpretation, execution scheduling, parameter
regulation, and reflexive description.
The supervisor acts as a
programmable interpreter, receiving snippets of code script that
determine the composition and nature of the process execution cycle
and the manner in which the process reacts to events.
The supervisor
acts as a scheduler, invoking execution of modules in a synchronous
manner.
The supervisor regulates module parameters based on the
execution results.
Auto-critical reports from modules permit the
supervisor to dynamically adapt processing.
Finally, the supervisor
responds to external queries with a description of the current state
and capabilities.
We formalize these abilities as the autonomic
properties of auto-regulation, auto-description and auto-criticism.</p><p id="id2590679">A system requires information about the capabilities and the current
state of component processes in order to dynamically assemble and
control observational processes.
Such information can be provided by
assuring that supervisory controllers have the reflexive capabilities
of auto-regulation,
auto-description and auto-criticism.</p><p id="id2590690">A process is auto-regulated when processing is monitored and
controlled so as to maintain a certain quality of service.
For
example, processing time and precision are two important state
variables for a
tracking process.
These two may be traded off against each other.</p><p id="id2590699">The process controllers may be instructed to give priority to either
the processing rate or precision.
The choice of priority is dictated
by a more abstract supervisory controller.</p><p id="id2590706">An auto-descriptive controller can provide a symbolic description of
its capabilities and state. The
description of the capabilities includes both the basic command set
of the controller and a set of
services that the controller may provide to a more abstract
controller. Thus when applied to the
system's context, our model provides a means for the dynamic
composition of federations of
controllers. In this view, the observational processes may be seen as
entities in the system context.
The current state of a process provides its observational variable.
Supervisory controllers are formed
into hierarchical federations according to the system context. A
controller may be informed of the
possible roles that it may play using a meta-language, such as XML.</p><p id="id2590725">An auto-critical process maintains an estimate of the confidence for
its outputs. For example, the
skin-blob detection process maintains a confidence factor based on
the ratio of the sum of
probabilities to the number of pixels in the ROI. Such a confidence
factor is an important feature for
the control of processing. Associating a confidence factor to all
observations allows a higher-level
controller to detect and adapt to changing observational
circumstances. When supervisor controllers
are programmed to offer "services" to higher-level controllers, it
can be very useful to include an
estimate of the confidence for the role. A higher-level controller
can compare these responses from
several processes and determine the assignment of roles to processes.</p></subsection><subsection id="uid15"><bodyTitle id="id2590755">Tracking Processes</bodyTitle><object id="uid16"><table id="id2590765"><tr id="id2590766"><td id="id2590768"><ressource aux="image_3.png" xylemeAttach="3" id="id2590773" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="Tracking-cycle" type="float" width="15.cm" xyref="1671226161008" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2590794">Tracking is a cyclic process of four phases: Predict,
Observe, Detect and Estimate. Observation is provided by the
observation and grouping modules described above.</caption></object><p id="id2590801">Tracking provides a number of fundamentally important functions for a
perception system. Tracking aids interpretation by integrating
information over time. Tracking makes it possible to conserve
information, assuring that a label applied to an entity remains
associated with the entity at future times. Tracking provides a means
to focus attention, by predicting the region or interest and the
observation module that should be applied to a specific region of an
image. Tracking processes can be designed to provide information
about position speed and acceleration that can be useful in
describing situations.</p><p id="id2590770">Tracking is a cyclic process of recursive estimation applied to a
data stream. In perception systems, a tracking process is generally
composed of three phases: predict, observe and estimate, as
illustrated in figure <ref id="id2590826" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid16" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>. Tracking maintains a list
of entities, known as "targets". Each target is described by a unique
ID, a target type, a confidence (or probability of existence), a
vector of properties and a matrix of uncertainties (or precisions)
for the properties.</p><p id="id2590855">The prediction phase uses a temporal model (called a "process model"
in the tracking literature) to predict the properties that should be
observed at a specified time for each target. For many applications
of tracking, a simply linear model is adequate for such prediction.
A linear model maintains estimates of the temporal derivatives for
each target property and uses these to predict the observed property
values.</p><p id="id2590876">The prediction phase also updates the uncertainty (or precision
model) of properties. Uncertainty is generally represented as a
covariance matrix for errors between estimated and observed
properties. These uncertainties are assumed to arise from
imperfections in the process model as well as errors in the
observation process.
Restricting processing to a region of interest (ROI) can greatly
reduce the computational load for image analysis. The predicted
position of a target determines the position of the ROI at which the
target should be found. The predicted size of the target, combined
with the uncertainties of the size and position, can be used to
estimate the appropriate size for the ROI. In the tracking
literature, this ROI is part of the "validation gate", and is used to
determine the acceptable values for properties.</p><p id="id2590909">Observation is provided by the observation and grouping modules
described above. Processing is specific for each target. A call to a
module applies a specified observation procedure for a target at a
specified ROI in order to verify the presence of the target and to
update its properties. When the detection confidence is large,
grouping the resulting pixels provides the information to update the
target properties.</p><p id="id2590921">The estimation process combines (or fuses) the observed properties
with the previously estimated properties for each target. If the
average detection confidence is low, the confidence in the existence
of a target is reduced, and the predicted values are taken as the
estimates for the next cycle. If the confidence of existence falls
below a threshold, the target is removed from the target list.</p><p id="id2590933">The detection phase is used to trigger creation of new targets. In
this phase, specified observation modules are executed within a
specified list of "trigger" regions. Trigger regions can be specified
dynamically, or recalled from a specified list. Target detection is
inhibited whenever a target has been predicted to be present within a
trigger region.</p></subsection><subsection id="uid17"><bodyTitle id="id2590963">Process Federations</bodyTitle><p id="id2590966">Perceptual processes may be organized into software federations <ref id="id2590973" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid0" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/><ref id="id2590988" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid1" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
A federation is a collection of independent processes that cooperate
to perform a task.
A process federation is assembled as a network of processes
controlled by a federation supervisor.
Federation supervisors invoke and configure processes to perform the
transformations required to observe a context.
The states of processes are monitored by the supervisory controller
and process parameters are adapted in
response to events. Supervisory controllers may be assembled into
hierarchies in order to observe
human activity. The exact assembly depends on the task that the
system is to perform as described
by a model of the users task and context.</p><p id="id2590971">A crucial problem with this model is how to provide a mechanism for
dynamically composing
federations of supervisory controllers that observe the entities and
relations relative to the user's
context. Our approach is to propose a reflexive federation
supervisor. A federation supervisor is designed
for a specific context model. The federation supervisor maintains a
model of the
situation.</p><p id="id2591028">The federation supervisor can be seen as a form of reactive expert
system. For each user context, it
invokes and revokes the corresponding highest-level supervisory
controllers. These controllers, in
turn, invoke and revoke lower level controllers, down to the level of
the lowest level observational
processes. Supervisory controllers may evoke competing lower-level
processes, informing each
process of the roles that it may play. The selection of process for a
role can then be re-assigned
dynamically according to the quality of service estimate that each
process provides for its parent
controller.</p></subsection></subsection><subsection id="uid18"><bodyTitle id="id2591050">Describing and Matching Local Appearance</bodyTitle><keyword id="id2591053">Computer Vision</keyword><keyword id="id2591056">Appearance Based Vision</keyword><keyword id="id2591059">Object Recognition</keyword><keyword id="id2591062">Scale
Invariance</keyword><keyword id="id2591064">Receptive Fields</keyword><p id="id2591069">The appearance of something is the set of possible visual stimuli
that the thing may engender.
For computer vision, the appearance
function for an entity refers to the set of all possible images that
may be observed for that entity.
Appearance functions can be created
for objects, activities, and scenes.
Contrary to intuition, it is
possible to capture a computer model for appearance functions.
Such models can be used to provide efficient processes for detecting,
tracking, and observing people and things in real world environments.</p><p id="id2591083">The members of project PRIMA have a long history in defining and
developing methods for appearance based computer vision.
Recently
the project has demonstrated a variety of new computer
vision methods based on the use of chromatic receptive fields.</p><p id="id2591092">Adopting the terminology of Schiele <ref id="id2591099" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid9" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>, a
"receptive field" is a local function defined over some domain of
image space.
The term "receptive field", drawn from psychophysics,
refers to the weighting
functions used to encode visual stimuli in biological visual systems.</p><p id="id2591122">Although it is not our intention to propose a model for biological
vision, we note in passing that Young <ref id="id2591127" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid10" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> and others
has used
Gaussian derivatives as models for the simple cells in the early
layers of the primate visual cortex <ref id="id2591146" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid11" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
PRIMA
has developed a family of receptive field functions based on
evaluating scale normalized Gaussian derivatives in a color opponent
space.
These functions provide a foundation for robust real
time processes for observing objects and agents.</p><p id="id2591169">Receptive fields may be defined over image space, color, time,
view-point or any other image formation
parameter <ref id="id2591174" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid12" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
A projection of the appearance
manifold onto a receptive field at a
position in an image provides a scalar value which describes
appearance at that position on the manifold.
Projection to a vector
of receptive fields provides a vector of features.
Such a vector,
provides a concise description of the appearance within the local
neighborhood of the image.</p><p id="id2591200">Projection onto a receptive field is provided by an inner product.</p><p id="id2591204">Computing inner products at each image position is equivalent to a
convolution of the receptive field with the image.
When evaluated at
every image point in an image, a receptive field is a form of linear
filter.
Gaussian derivative filters
can be computed very rapidly using a variety of techniques including
separable recursive filters <ref id="id2591214" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid13" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> and Gaussian
Pyramids <ref id="id2591232" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid14" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.</p><p id="id2591250">The Hermite polynomial <ref id="id2591254" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid15" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> of Gaussian
derivatives has a variety of properties which make it
ideally suited as a basis for image description using a Taylor
series. Even ordered derivatives will
respond to symmetric structures such as spots and bars while odd
derivatives will respond to
asymmetric structures, such as edges. In two dimensions, the Gaussian
is the unique function which
is both separable and circularly symmetric. Oriented derivatives can
be defined as a convolution of
separable components.</p><p id="id2591281">Changing viewing distance changes the scale of appearance. View
invariant recognition requires
estimating and normalizing such changes. The scale equivariant
properties of Gaussian derivatives
provide a simple method to estimate changes in scale. Such
normalization provides both robustness
to changes in distance, and adaptation of the receptive field to the
most appropriate scale for
describing the appearance at each image position.</p><p id="id2591293">Local normalization of the receptive fields requires a local estimate
of the intrinsic scale and
orientation. Intrinsic scale in determined by local maxima in the
Laplacian with respect to change in
scale. Intrinsic orientation is determined by the direction of the
local gradient. With such
normalization, the Laplacian of the Gaussian computed at a range of
scales at each point provides a
"Laplacian Profile" which is equivariant with scale <ref id="id2591312" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid16" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.</p><p id="id2591330">Whenever the gradient is not close to zero, it is possible to
estimate an intrinsic orientation using the
arc-tangent of the ratio of first derivatives. The Gaussian
derivatives vector provides a basis for
synthesizing oriented derivatives <ref id="id2591338" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid17" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>. This can
shown by expressing the 1st, 2nd and 3rd order
Gaussian derivatives in polar coordinates and determining their
Fourier transforms. Rotation is equivalent to a shift in phase. Thus
oriented
Gaussian derivatives can be synthesized at arbitrary angles using
weighted sum of the Gaussian
derivatives computed in the cardinal directions. The weights are
provided by the direction cosines of
the rotation.</p><p id="id2591364">Color is a very powerful discriminant for object recognition. Color
images are commonly acquired
in the Cartesian color space, RGB. The RGB color space has certain
advantages for image
acquisition, but is not the most appropriate space for recognizing
objects or describing their shape.
The HLS (Hue, Luminance, Saturation) color space is a commonly used
representation which
separates intensity and chrominance. In HLS, chrominance is
represented by a polar coordinate
representation in which hue is the angle and saturation is the
radius. Projection of RGB onto a polar
coordinate representation causes computational problems in the design
of receptive fields. An
alternative is to compute a Cartesian representation for chrominance,
using differences of R, G and
B.
Such differences yield color opponent receptive fields resembling
those found in biological visual
systems.</p><p id="id2591400">The intensity component may be obtained by a weighted sum of the R,
G, and, B. The exact
weights depend on the camera and scene illumination and may be
adapted to the RGB filters and the
source illumination, by gain coefficients. The chromatic component
may be obtained by differences.
Such a separation may easily be performed by multiplying the color
vector by a matrix <ref id="id2591412" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid18" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.</p><p id="id2591430">The components C1 and C2 encodes the chromatic information in a
Cartesian representation.
Chromatic Gaussian receptive fields are computed by applying the
Gaussian derivatives
independently to each of the three components, (L, C1, C2). The
result is a set of color opponent
filters as shown in figure <ref id="id2591445" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid19" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>. Permutations of RGB lead to
different opponent color spaces. The
choice of the most appropriate space depends on the chromatic
composition of the scene.</p><object id="uid19"><table id="id2591470"><tr id="id2591472"><td id="id2591473"><ressource aux="image_4.png" xylemeAttach="4" id="id2591477" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="Color_field" type="float" width="15.cm" xyref="2426812468002" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2591498">Chromatic Gaussian Receptive Fields (<span class="math" align="left"><hi rend="it">G</hi><sub><hi rend="it">x</hi></sub><sup><hi rend="it">L</hi></sup>, <hi rend="it">G</hi><sup><hi rend="it">C</hi><sub>1</sub></sup>, <hi rend="it">G</hi><sup><hi rend="it">C</hi><sub>2</sub></sup>, <hi rend="it">G</hi><sub><hi rend="it">x</hi></sub><sup><hi rend="it">C</hi><sub>1</sub></sup>, <hi rend="it">G</hi><sub><hi rend="it">x</hi></sub><sup><hi rend="it">C</hi><sub>2</sub></sup>, <hi rend="it">G</hi><sub><hi rend="it">x</hi><hi rend="it">x</hi></sub><sup><hi rend="it">L</hi></sup>, <hi rend="it">G</hi><sub><hi rend="it">x</hi><hi rend="it">y</hi></sub><sup><hi rend="it">L</hi></sup>, <hi rend="it">G</hi><sub><hi rend="it">y</hi><hi rend="it">y</hi></sub><sup><hi rend="it">L</hi></sup></span>).</caption></object></subsection><subsection id="uid20"><bodyTitle id="id2591687">Generic Features for Robust Tracking and Recognition</bodyTitle><keyword id="id2591690">Computer Vision</keyword><keyword id="id2591693">Robust Matching</keyword><keyword id="id2591696">Generic Features</keyword><p id="id2591701">A successful detection, tracking and classification system must have two
properties: it must be general enough to correctly assign instances of
the same class despite large intra-class variability and it must be
specific enough to reject instances that are not part of the
class. Features robust to intra-class variability can be constructed
by learning from examples. The result is a feature or
part detector that can generalize from a small number of examples to
new examples. Such a detector can provide a hypothesis about the
presence of a class instance, but it is in general not specific enough
for reliable detection and classification.</p><p id="id2591717">The relative position of distinct object features is important for
classification and needs to be modeled. Current
approaches <ref id="id2591724" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid19" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/><ref id="id2591737" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid20" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> are computationally
expensive. In the result section, we propose an efficient method for
geometry verification. This enables a object detection and
identification module which can be applied to various object types.</p></subsection></fondements><domaine id="uid21"><bodyTitle id="id2591765">Application Domains</bodyTitle><subsection id="uid22"><bodyTitle id="id2591774">The Augmented Meeting Environment</bodyTitle><participants id="id2591778" category="None"><person key="prima-2005-id2245460"><firstname id="id2591783">Patrick</firstname><lastname id="id2591786">Reignier</lastname></person><person key="prima-2005-id2245437"><firstname id="id2591791">Dominique</firstname><lastname id="id2591794">Vaufreydaz</lastname></person><person><firstname id="id2591800">Christophe</firstname><lastname id="id2591802">Le Gal</lastname></person><person key="prima-2005-id2245512"><firstname id="id2591808">James L.</firstname><lastname id="id2591811">Crowley</lastname></person></participants><keyword id="id2591814">Augmented Reality</keyword><keyword id="id2591817">Multi-modal Interaction</keyword><keyword id="id2591820">Collaborative Work</keyword><object id="uid23"><table id="id2591830"><tr id="id2591831"><td id="id2591833"><ressource aux="image_5.png" xylemeAttach="5" id="id2591838" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="PRIMA_AME" type="float" width="12cm" xyref="2477956403011" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2591859">The augmented meeting environment is an office environment
equipped with a microphone array, wireless lapel microphones, a wide
angle surveillance camera, five steerable cameras, and three
video-interaction devices.</caption></object><p id="id2591866">In order to test and develop systems for observation of human
activity, Project PRIMA has constructed an "Augmented Meeting
Environment", show in figure <ref id="id2591873" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid23" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>. The PRIMA Augmented
Meeting Environment is equipped with a microphone array, a fixed wide
angle camera, five steerable cameras, three "video interaction
devices".
The microphone array is used as an acoustic sensor to detect, locate
and classify acoustic signals for recognizing human activities. The
wide-angle camera provides a field of view that covers the entire
room, and allows detection and tracking of individuals.
Steerable cameras are installed in each of the four corners of the
room, and used to acquire video of activities from any viewing
direction.</p><p id="id2591917">Video interaction devices associate a camera with a video projector
to provide new modes if man-machine interaction. Such devices may be
used for interaction, presentation or capture of information based on
natural activity. Examples include selecting menus and buttons with a
finger and capturing drawings from paper or a whiteboard.
Fixed video interaction devices in the AME have been constructed for
a vertical surface (a wall mounted white board) and a horizontal
desk-top work-space. Recently a steerable interaction device has
been constructed based on a tightly integrated steerable
camera-projector pair (SCP). The SCP described below, allows any
surface to be used for interaction with information. It also offers a
range new sensing techniques, including automatic surveillance of an
environment to discover the environment topology, as well as the use
of structured light for direct sensing of texture mapped 3D models.
</p></subsection><subsection id="uid24"><bodyTitle id="id2591942">The Steerable Camera Projector</bodyTitle><participants id="id2591946" category="None"><person key="prima-2005-id2244828"><firstname id="id2591951">Stan</firstname><lastname id="id2591954">Borkowski</lastname></person><person key="prima-2005-id2245512"><firstname id="id2591960">James L.</firstname><lastname id="id2591962">Crowley</lastname></person><person><firstname id="id2591968">Olivier</firstname><lastname id="id2591971">Riff</lastname></person></participants><keyword id="id2591974">Man-Machine Interaction</keyword><keyword id="id2591978">Interactive Environments</keyword><p id="id2591982">Surfaces dominate the physical world.
Every object is confined in space by its surface.
Surfaces are pervasive and play a predominant role in human
perception of the environment.
We believe that augmenting surfaces with information technology will
proved an interaction modality
that will be easily adopted by humans.</p><p id="id2591992">PRIMA has constructed a steerable video interaction device composed
of a tightly coupled camera and video projector. This device, known
as a Steerable Camera-Projector (or SCP) enables experiments in
which any surface in the augmented meeting environment may be used as
an interactive display for information <ref id="id2592003" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid21" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
With such a device, an interaction interface may follow a user,
automatically selecting the most appropriate surface.
The SCP provides a range of capabilities <i id="id2592024">(a)</i>The SCP can be
used a sensor to discover the geometry of the environment,
<i id="id2592028">(b)</i>The SCP can project interactive
surfaces anywhere in the environment and <i id="id2592033">(c)</i> The SCP can be
used to augment a mobile surface into a portable interactive display.
<i id="id2592039">(d)</i> The SCP can be used to capture text and drawings from
ordinary paper.
<i id="id2592044">(e)</i> The SCP can be used as a structured light sensor to
observe 3-D texture-mapped models of objects.</p><p id="id2592051">Current display technologies are based on planar surfaces.
Recent work on augmented reality systems has assumed simultaneous use
of multiple display
surfaces <ref id="id2592058" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid22" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/><ref id="id2592074" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid23" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/><ref id="id2592090" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid24" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
Displays are usually treated as access points to a common information
space, where users can
manipulate vast amounts of information with a set of common controls.
With the development of low-cost display technologies, the available
interaction surface will continue
to grow, and interfaces will migrate from a single, centralized
screen to multiple, space-distributed
interactive surfaces.
New interaction tools that accommodate multiple distributed
interaction surfaces will be required.</p><p id="id2592118">Video-projectors are increasingly used in augmented environment
systems <ref id="id2592122" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid25" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/><ref id="id2592138" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid26" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
Projecting images is a simple way of augmenting everyday objects and
offers the possibility to
change their appearance or their function.
However, standard video-projectors have a fairly small projection
area which significantly limits their
spatial flexibility as output devices in an pervasive system.
A certain degree of steerability can be achieved for a rigidly
mounted projector:
In particular, a sub window can be steered within the cone of
projection for a fixed projector <ref id="id2592165" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid27" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
However, extending and/or moving the display surface requires
augmenting the range of angles to
which the projector beam may be directed.
If using fixed projectors, this means increasing the number of
projectors which is relatively
expensive.
A natural solution is to use a Steerable projector-camera assembly <ref id="id2592188" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid28" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> and <ref id="id2592206" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid29" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
With a trend towards increasingly small and inexpensive video
projectors and cameras, this approach
will become increasingly attractive.
Additionally having the ability to modify the scene with projected
light, projector-camera systems
can be exploited as sensors,
thus enabling to collect data that can be used to build a model of
the environment.</p><p id="id2592231">Projection is an ecological (i.e. non-intrusive) way of augmenting
the environment.
Projection does not change the augmented object itself, only its appearance.
This change can be used to supplement the functionality of the object
and henceforth its role in the
world.
However, the most common consequence of augmenting an object with
projected images is
transforming the object into an access point to the virtual information space.
In <ref id="id2592242" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid29" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> ordinary artifacts such as walls, shelves,
and cups are transformed into
informative surfaces.
Though the superimposed projected image enables the user to take
advantage of the information
provided by the virtual world, the functionality of the object itself
does not change.
The object becomes a physical support for virtual functionalities.
An example of enhancing the functionality of an object was presented
in <ref id="id2592267" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid30" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>,
where users could interact with both physical and virtual ink on an
projection-augmented
whiteboard.</p><object id="uid25"><table id="id2592291"><tr id="id2592293"><td id="id2592294"><ressource aux="image_6.png" xylemeAttach="6" id="id2641145" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="SVP2" type="float" height="4cm" xyref="242437618004" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2641166">The Steerable Camera Projector</caption></object><p id="id2641169">The Steerable Camera Projector (SCP) (figure <ref id="id2641174" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid25" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>)
platform is a device that provides a video-projector with two
mechanical degrees of freedom: pan and tilt.
The mechanical performance of the SCP is presented in
Table <ref id="id2641197" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid26" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
While somewhat bulky, our device anticipates the current trend of
projectors to become portable
devices, similar in shape to hand-held torch lamps <ref id="id2641218" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid31" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.</p><table id="uid26"><tr id="id2641242" style="border-top-style:solid;border-top-width:1px;border-bottom-style:solid; border-bottom-width:1px;"><td id="id2641241" style="text-align:left;border-right-style:solid;border-right-width:1px;border-left-style:solid;border-left-width:1px;"/><td id="id2641256" style="text-align:center;border-right-style:solid;border-right-width:1px;">Pan</td><td id="id2641263" style="text-align:center;border-right-style:solid;border-right-width:1px;">Tilt</td></tr><tr id="id2641271" style="border-bottom-style:solid; border-bottom-width:1px;"><td id="id2641270" style="text-align:left;border-right-style:solid;border-right-width:1px;border-left-style:solid;border-left-width:1px;">Rotation range</td><td id="id2641284" style="text-align:center;border-right-style:solid;border-right-width:1px;"><span class="math" align="left"><img align="middle" width="28" height="15" src="math_image_1.png" xylemeAttach="17" border="0" alt="Im1 ${±177^\#8728 }$"/></span></td><td id="id2641322" style="text-align:center;border-right-style:solid;border-right-width:1px;"><span class="math" align="left"><img align="middle" width="23" height="15" src="math_image_2.png" xylemeAttach="18" border="0" alt="Im2 ${+90^\#8728 }$"/></span></td></tr><tr id="id2641357" style="border-bottom-style:solid; border-bottom-width:1px;"><td id="id2641356" style="text-align:left;border-right-style:solid;border-right-width:1px;border-left-style:solid;border-left-width:1px;">Angular resolution</td><td id="id2641362" style="text-align:center;border-right-style:solid;border-right-width:1px;"><span class="math" align="left"><img align="bottom" width="23" height="7" src="math_image_3.png" xylemeAttach="19" border="0" alt="Im3 ${0.11^\#8728 }$"/></span></td><td id="id2641406" style="text-align:center;border-right-style:solid;border-right-width:1px;"><span class="math" align="left"><img align="bottom" width="23" height="7" src="math_image_4.png" xylemeAttach="20" border="0" alt="Im4 ${0.18^\#8728 }$"/></span></td></tr><tr id="id2641444" style="border-bottom-style:solid; border-bottom-width:1px;"><td id="id2641443" style="text-align:left;border-right-style:solid;border-right-width:1px;border-left-style:solid;border-left-width:1px;">Angular velocity</td><td id="id2641455" style="text-align:center;border-right-style:solid;border-right-width:1px;"><span class="math" align="left"><img align="middle" width="30" height="20" src="math_image_5.png" xylemeAttach="21" border="0" alt="Im5 ${146\mfrac {deg}s}$"/></span></td><td id="id2641500" style="text-align:center;border-right-style:solid;border-right-width:1px;"><span class="math" align="left"><img align="middle" width="25" height="20" src="math_image_6.png" xylemeAttach="22" border="0" alt="Im6 ${80\mfrac {deg}s}$"/></span></td></tr><tr id="id2641537" style="border-bottom-style:solid; border-bottom-width:1px;"><td id="id2641535" style="text-align:left;border-right-style:solid;border-right-width:1px;border-left-style:solid;border-left-width:1px;">Response time</td><td id="id2641551" style="text-align:center;border-right-style:solid;border-right-width:1px;"><span class="math" align="left"><img align="bottom" width="29" height="7" src="math_image_7.png" xylemeAttach="23" border="0" alt="Im7 ${\#8764 2ms}$"/></span></td><td id="id2641582" style="text-align:center;border-right-style:solid;border-right-width:1px;"><span class="math" align="left"><img align="bottom" width="29" height="7" src="math_image_8.png" xylemeAttach="24" border="0" alt="Im8 ${\#8764 3ms}$"/></span></td></tr><caption id="id2641608">Rotation platform mechanical performance</caption></table><p id="id2641612">Note that the SCP is not only a motorized video-projector, but a
projector-camera pair.
The camera is mounted in such a way that the projected beam overlaps
with the camera-view.
Equipping an SCP with a camera offers a number of interesting possibilities.
User's actions can be observed within the field of view of the camera
and interpreted as input
information for the computer system.
Additionally the system is able to provide visual feedback in
response to users action.
In other words association of a camera to a projector creates a
powerful actuator-sensor pair.</p><p id="id2641627">The SCP can be used as a steerable structured light sensor to
automatically discover surfaces that are suitable for interaction.
Figure <ref id="id2641638" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid27" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> shows automatically discovered planar surfaces
within the AME.
described below.</p><object id="uid27"><table id="id2641662"><tr id="id2641664"><td id="id2641665"><ressource aux="image_7.png" xylemeAttach="7" id="id2641669" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="Monica" type="float" width="13cm" xyref="2965865165014" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2641690">Planar surfaces in the environment</caption></object></subsection><subsection id="uid28"><bodyTitle id="id2641698">Context Aware Video Acquisition</bodyTitle><participants id="id2641702" category="None"><person key="prima-2005-id2245460"><firstname id="id2641707">Patrick</firstname><lastname id="id2641710">Reignier</lastname></person><person key="prima-2005-id2245437"><firstname id="id2641715">Dominique</firstname><lastname id="id2641718">Vaufreydaz</lastname></person><person><firstname id="id2641723">Olivier</firstname><lastname id="id2641726">Riff</lastname></person><person key="prima-2005-id2244745"><firstname id="id2641732">Alban</firstname><lastname id="id2641734">Caporossi</lastname></person><person key="prima-2005-id2244725"><firstname id="id2641740">Alba</firstname><lastname id="id2641743">Ferrer-Biosca</lastname></person><person key="prima-2005-id2245512"><firstname id="id2641748">James L.</firstname><lastname id="id2641751">Crowley</lastname></person></participants><keyword id="id2641755">Video Conferencing</keyword><keyword id="id2641758">Context Aware Systems</keyword><keyword id="id2641761">Intelligent Environments</keyword><p id="id2641764">Video communication has long been seen as a potentially powerful tool
for communications, teaching
and collaborative work. Continued exponential decreases in the cost
of communication and
computation (for coding and compression) have eliminated the cost of
bandwidth as an economic
barrier for such technology. However, there is more to video
communication than acquiring and
transmitting an image. Video communications technology is generally
found to be disruptive to the
underlying task, and thus unusable. To avoid disruption, the video
stream must be composed of the
most appropriate targets, placed at an appropriate size and position
in the image. Inappropriately
composed video communications create distraction and ultimately
degrades the ability to
communicate and collaborate.</p><p id="id2641783">During a lecture or a collaborative work activity, the most
appropriate targets, camera angle, and
zoom and target position change continually. A human camera operator
understands the interactions
that are being filmed and adapts the camera angle and image
composition accordingly. However,
such human expertise is costly. The lack of an automatic video
composition and camera control
technology is the current fundamental obstacle to the widespread use
of video communications for
communication, teaching and collaborative work. One of the goals of
project PRIMA is to create a
technology that overcomes this obstacle.</p><p id="id2641799">To provide a useful service for a communications, teaching and
collaborative work, a video composition system must adapt the video
composition to events in the scene.
In common terms, we say that
the system must be "aware of context". Computationally, such a
technology requires that the video composition be determined by a
model of the activity that is being observed.
As a first approach,
we propose to hand-craft such models as finite networks of states,
where each state corresponds to a situation in the scene to be filmed
and specifies a camera placement, camera target, image placement and
zoom.</p><p id="id2641803">A finite state approach is feasible in cases where human behavior
follows an established
stereotypical "script". A lecture or class room presentation provides
an example of such a case.
Lecturers and audiences share a common stereotype about the context
of a lecture. Successful video
communications require structuring the actions and interactions of
actors to a great extent. We
recognize that there will always be some number of unpredictable
cases where humans deviate from
the script. However, the number of such cases should be sufficiently
limited so as limit the disruption.
Ultimately, we plan to investigate automatic techniques for
"learning" new situations.</p><p id="id2641867">This system described above is based on an approach to context aware
systems presented at UBICOMP in September
2002 <ref id="id2641875" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid6" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>. The behavior of this system is specified
as a situation graph that is automatically compiled into rules
for a Java based supervisory process. The design process for
compiling a situation graph into a rule based for the federation
supervisors has been developed and refined within the last two years.</p><p id="id2641899">In 2004, we have demonstrated a number of reals systems based on
this model.
In the FAME project, we demonstrated a context aware
video acquisition system at the Barcelona Forum of Cultures during
two weeks in July 2004.
This system was also demonstrated publicly
at "Fête de la science" in Grenoble in October 2004, and exhibited at
the IST Conference in Den Haag in November 2004.
A variation of this
system has been integrated into the ContAct context aware
presentation composition system developed with XRCE (Xerox European
Research Centre), and is at the heart of the CHIL Collaborative
Workspace Service used in the IP Project CHIL. A context aware
interpretation system for video surveillance is currently under
development for the IST project CAVIAR.</p></subsection></domaine><logiciels id="uid29"><bodyTitle id="id2641946">Software</bodyTitle><subsection id="uid30"><bodyTitle id="id2641956">IMALAB</bodyTitle><participants id="id2641960" category="None"><person key="prima-2005-id2245482"><firstname id="id2641965">Augustin</firstname><lastname id="id2641968">Lux</lastname></person><person><firstname id="id2641974">Olivier</firstname><lastname id="id2641976">Riff</lastname></person><person key="prima-2005-id2244745"><firstname id="id2641982">Alban</firstname><lastname id="id2641985">Caporossi</lastname></person><person key="prima-2005-id2245372"><firstname id="id2641990">Daniela</firstname><lastname id="id2641993">Hall</lastname></person></participants><keyword id="id2641997">Computer Vision Systems</keyword><keyword id="id2641999">Software Development Environments</keyword><p id="id2642003">The Imalab system represents a longstanding effort within the Prima team
(1) to capitalize on the work of successive generations of students,
(2) to provide a coherent software framework for the development of new
research, and (3) to supply a powerful toolbox for sophisticated
applications.
In its current form, it serves as a development environment
for all researchers in the Prima team, and represents a considerable
amount of effort (probably largely more than 10 man-years).</p><p id="id2642017">There are two major elements of the Imalab system:
the PrimaVision library,
which is a C++ based class library for the fundamental requirements of
research in computer vision;
and the Ravi system, which is an extensible system kernel
providing an interactive programming language shell.</p><p id="id2642026">With respect to other well known computer vision systems, e.g. KHOROS <ref id="id2642034" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid3" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> the most prominent features of Imalab are:</p><simplelist id="id2642052"><li id="uid31"><p id="id2642060">A large choice of data structures and algorithms
for the implementation of new algorithms.</p></li><li id="uid32"><p id="id2642073">A subset of C++ statements as interaction language.</p></li><li id="uid33"><p id="id2642084">Extensibility through dynamic loading.</p></li><li id="uid34"><p id="id2642096">A multi language facility including C++, Scheme, Clips, Prolog.</p></li></simplelist><p id="id2642102">The combination of these facilities is instrumental for
achieving efficiency and generality in a large Artificial
Intelligence system: efficiency is obtained through the use
of C++ coding for all critical pieces of code;
this code is seamlessly integrated with declarative programs
that strive for generality.</p><p id="id2642112">Imalab's system kernel is built on the Ravi system first described
in Bruno Zoppis's thesis <ref id="id2642119" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid32" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
The particular strength of this kernel comes from a combination
of dynamic loading and automatic program generation within an
interactive shell, in order to integrate new code, even new libraries,
in a completely automatic way.</p><p id="id2642142">The Imalab system has, in particular, been used for the development
of the BrandDetect software described below.
The Imalab system has proven to be extremely efficient tool for the
development of systems such as BrandDetect that extensive performance
evaluation
as well as incremental design of of a complex user interface.</p><p id="id2642151">We currently are in the process of
registering of ImaLab with the APP (Agence pour la Protection des Programmes).
Imalab has been distributed as share ware to several research
laboratories around Europe.
Imalab has been installed and is in use at:</p><simplelist id="id2642158"><li id="uid35"><p id="id2642168">XRCE - Xerox European Research Centre, Meylan France</p></li><li id="uid36"><p id="id2642180">JOANNEUM RESEARCH Forschungsgesellschaft mbH, Austria</p></li><li id="uid37"><p id="id2642191">HS-ART Digital Service GmbH, Austria</p></li><li id="uid38"><p id="id2642203">VIDEOCATION Fernseh-Systeme GmbH, Germany</p></li><li id="uid39"><p id="id2642214">Univ. of Edinburgh, Edinburgh, UK</p></li><li id="uid40"><p id="id2642225">Instituto Superior Tecnico, Lisbon, Portugal</p></li><li id="uid41"><p id="id2642236">Neural Networks Research Centre, Helsinki University of
Technology (HUT), Finland</p></li><li id="uid42"><p id="id2642248">Jaakko Pöyry Consulting, Helsinki, Finland</p></li><li id="uid43"><p id="id2642260">Université de Liège, Belgium</p></li><li id="uid44"><p id="id2642272">France Télécom R&amp;D,
Meylan France</p></li></simplelist></subsection><subsection id="uid45"><bodyTitle id="id2642284">BrandDetect</bodyTitle><participants id="id2642288" category="None"><person key="prima-2005-id2245482"><firstname id="id2642293">Augustin</firstname><lastname id="id2642296">Lux</lastname></person><person><firstname id="id2642301">Olivier</firstname><lastname id="id2642304">Riff</lastname></person><person key="prima-2005-id2244745"><firstname id="id2642310">Alban</firstname><lastname id="id2642312">Caporossi</lastname></person><person key="prima-2005-id2244725"><firstname id="id2642317">Alba</firstname><lastname id="id2642320">Ferrer-Biosca</lastname></person><person key="prima-2005-id2245512"><firstname id="id2642325">James L.</firstname><lastname id="id2642328">Crowley</lastname></person><person key="prima-2005-id2245372"><firstname id="id2642334">Daniela</firstname><lastname id="id2642336">Hall</lastname></person></participants><keyword id="id2642340">Digital Television</keyword><keyword id="id2642343">Video Monitoring</keyword><keyword id="id2642346">Media Metrics</keyword><object id="uid46"><table id="id2642357"><tr id="id2642358"><td id="id2642360"><ressource aux="image_8.png" xylemeAttach="8" id="id2642364" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="BrandDetect" type="float" width="15.cm" xyref="3104065329006" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2642386">BrandDetect collects statistics on appearance of publicity
panels in Broadcast video</caption></object><p id="id2642391">BandDetect is a system for detection, tracking and recognition of
corporate logos, commercial
trademarks and other publicity panels in broadcast television video
streams. BrandDetect collects
statistics on the frequency of occurrence, size, appearance and
duration of presentation of the
publicity. It is especially designed for use in the production of
broadcast video of sports events such
as football matches and formula one racing.</p><p id="id2642400">The BrandDetect software can permanently monitor streaming video
input from pre-recorded media
(MPEG, AVI and other formats) as well as from real time video.
BrandDetect looks for occurrences
of a predefined set of publicity panels in a manner that is
independent of size, rotation and position.
Once detected, a publicity panel is tracked in order to collect
statistics on duration, size, image
quality, and position relative to the center of the screen. These
statistics are used to produce an
objective report that may be used to establish the potential impact
and commercial value of publicity.
An example screen image of BrandDetect is shown in
figure <ref id="id2642430" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid46" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.</p><p id="id2642448">BrandDetect has been filed with the l'APP (Agence pour la
Protection des Programmes) the 07 Nov 03.
(IDDN.FR.450046.000.S.P.2003.000.21000.).
A license commercial exploitation has been negotiated with the
Austrian company HSArt.
</p></subsection><subsection id="uid47"><bodyTitle id="id2642460">CAR: Robust Real-Time Detection and Tracking</bodyTitle><participants id="id2642464" category="None"><person key="prima-2005-id2245512"><firstname id="id2642470">James L.</firstname><lastname id="id2642472">Crowley</lastname></person><person key="prima-2005-id2244735"><firstname id="id2642477">Sebastien</firstname><lastname id="id2642480">Pesnel</lastname></person><person key="prima-2005-id2244745"><firstname id="id2642485">Alban</firstname><lastname id="id2642488">Caporossi</lastname></person><person key="prima-2005-id2245372"><firstname id="id2642494">Daniela</firstname><lastname id="id2642496">Hall</lastname></person></participants><keyword id="id2642500">Computer Vision Systems</keyword><keyword id="id2642503">Video Surveillance</keyword><keyword id="id2642506">Monitoring</keyword><keyword id="id2642508">Robust Tracking</keyword><object id="uid48"><table id="id2642519"><tr id="id2642521"><td id="id2642522"><ressource aux="image_9.png" xylemeAttach="9" id="id2642527" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="CAR" type="float" width="13cm" xyref="361781542011" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2642548">The CAR systems integrates several detection modules with
a Kalman Filter for robust detection and tracking of entities</caption></object><p id="id2642526">Tracking is a basic enabling technology for observing and recognizing
human actions. A tracking
system integrates successive observations of targets so as to
conserve information about a target and
its history over a period of time. A tracking system makes it
possible to recognize an object using
off-line (non-video rate) processes and to associate the results of
recognition with a target when it is
available. A tracking system makes it possible to collect
spatio-temporal image sequences for a
target in order to recognize activity. A tracking system provides a
prediction of the current location
of a target which can improve the reliability, and reduce the
computational cost of observation.</p><p id="id2642568">Project PRIMA has implemented a robust real time detection and
tracking system (CAR). This
system is designed for observing the actions of individuals in a
commercial or public environment,
and is designed to be general so as to be easily integrated into
other applications.
This system has
been filed with the APP "Agence pour la Protection des Programmes"
and has Interdeposit Digital number of
IDDN.FR.001.350009.000.R.P.2002.0000.00000.
The basic component for the CAR systems is a method for robust
detection and tracking of
individuals [Schwerdt 00]. The system is robust in the sense that
it uses multiple, complementary
detection methods are used to ensure reliable detection. Targets are
detected by pixel level detection
processes based on back-ground subtraction, motion patterns and color
statistics. The module
architecture permits additional detection modes to be integrated into
the process. A process
supervisor adapts the parameters of tracking so as to minimize lost
targets and to maintain real time
response.</p><p id="id2642621">Individuals are tracked using a recursive estimation process.
Predicted position and spatial extent are
used to recalculate estimates for position and size using the first
and second moments. Detection
confidence is based on the detection energy. Tracking confidence is
based on a confidence factor
maintained for each target.</p><p id="id2642631">The CAR system uses techniques based on statistical estimation theory
and robust statistics to
predict, locate and track multiple targets. The location of targets
are determined by calculating the
center of gravity of detected regions. The spatial extent of a
targets are estimated by computing the
second moment (covariance) of detected regions. A form or recursive
estimator (or Kalman filter) is
used to integrate information from the multiple detection modes. All
targets, and all detections are
labeled with a confidence factor. The confidence factor is used to
control the tracking process and
the selection of detection mode.</p><p id="id2642660">outliers during estimation of the
selecting and reinitializing the detection
modes to reinitialize and adapt less</p><p id="id2642666">In 2003, with the assistance by INRIA Transfert and the GRAIN,
Project PRIMA has founded a small enterprise, Blue Eye Video to
develop commercial applications based on the CAR system.
Blue Eye Video has been awarded an exclusive license for commercial
application of the CAR tracker.
In June 2003, Blue Eye Video was named Laureat of the national
competition for the creation of enterprises.</p></subsection></logiciels><resultats id="uid49"><bodyTitle id="id2642685">New Results</bodyTitle><subsection id="uid50"><bodyTitle id="id2642694">A Programmable Robust Tracker</bodyTitle><participants id="id2642698" category="None"><person key="prima-2005-id2245512"><firstname id="id2642704">James L.</firstname><lastname id="id2642706">Crowley</lastname></person><person key="prima-2005-id2244745"><firstname id="id2642711">Alban</firstname><lastname id="id2642714">Caporossi</lastname></person><person key="prima-2005-id2244725"><firstname id="id2642719">Alba</firstname><lastname id="id2642722">Ferrer-Biosca</lastname></person><person key="prima-2005-id2245372"><firstname id="id2642728">Daniela</firstname><lastname id="id2642730">Hall</lastname></person><person key="prima-2005-id2245460"><firstname id="id2642736">Patrick</firstname><lastname id="id2642739">Reignier</lastname></person></participants><keyword id="id2642743">Computer Vision Systems</keyword><keyword id="id2642746">Video Surveillance</keyword><keyword id="id2642748">Monitoring</keyword><keyword id="id2642751">Robust
Tracking</keyword><keyword id="id2642754">Event Detection</keyword><object id="uid51"><table id="id2642765"><tr id="id2642766"><td id="id2642768"><ressource aux="image_10.png" xylemeAttach="10" id="id2642772" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="NewTracker" type="float" width="15.cm" xyref="2463793154010" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2642794">The components and architecture for the new agent
detection and tracking process.</caption></object><object id="uid52"><table id="id2642802"><tr id="id2642803"><td id="id2642805"><ressource aux="image_11.png" xylemeAttach="11" id="id2642809" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="Tracker-body-face" type="float" width="12cm" xyref="3322264059007" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2642830">The new programmable robust tracker makes it possible to
observe composite entities</caption></object><p id="id2642835">The CAR tracker, described in the section on Software Products was
implemented in C++ with hard-wired control. In order to support
experiments in observation of activity, a new programmable robust
tracking project has been implemented in the ImaLab environment. The
architecture for this process is shown in figure <ref id="id2642843" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid51" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.</p><p id="id2642861">The visual observation process is organized as a set of modules
controlled by an autonomic process supervisor. The supervisor
provides four functional capabilities : module execution scheduling,
parameter regulation, reflexive control, and interpretation and
response to messages. The process supervisor is implemented using a
Scheme interpreter. Scheme is a Lisp-like language with a very
simple small and simple implementation. Interpreters for C++ and
CLIPS rules have been added written in scheme for the process
supervisor, making it possible to download and execute small snippets
of code in C++, Lisp, or CLIPS in order to program new functions
while the module is executed. This capability is currently used by
the federation supervisor to configure modules for specific tasks at
system set up.</p><p id="id2642879">The process supervisor iteratively executes seven phases of execution
described in the following list. These phases provide the autonomic
and reflexive control.</p><orderedlist id="id2642886"><li id="uid53"><p id="id2642894">Acquisition: Get the next image from the video stream</p></li><li id="uid54"><p id="id2642906">Prediction and Observation: For each current target, predict a
region of interest in the new image. Execute the detection module
specified by the target in the region of interest, update the target
parameters. This phase will delete targets whose confidence drops
below a threshold.</p></li><li id="uid55"><p id="id2642921">Detection: For a subset of the list of "detection regions",
execute the specified detection module. If a sufficient level of
average detection energy is obtained in the region, calculate the
position and size of the target using moments an add the target to
the target list.</p></li><li id="uid56"><p id="id2642943">Regulation: Examine the time elapsed for each target and for
each detection region during the current cycle. Regulate the
precision or number of targets and detection regions so as to
maintain a specified video rate or other quality of service such as
target precision or priority.</p></li><li id="uid57"><p id="id2642959">Interpretation: Recognize configurations of targets as
composite targets. For example, an agent is a composite target with a
body and a face.</p></li><li id="uid58"><p id="id2642972">Event Detection: Generate events for targets and
interpretations. Possible events include: detectio n and loss of a
target , entry or exit to regions, entry or exit of the scene,
overlap, split, and merge of agents or their components.</p></li><li id="uid59"><p id="id2642987">Respond to messages: Messages are text strings that may
include requests for information about process state or may be new
snippets of code to be added to the process supervision or
interpretation phase.</p></li></orderedlist><p id="id2642996">During execution of each phase, the elapsed time is recorded. During
the observation and detection phases, time is noted for each target
or each detection region. During the regulation phase, if the elapsed
time exceeds the available frame time, the resolution of tracking may
be reduced to one pixel of N, the number of targets tracked may be
reduced, or the number of detection regions may be reduced so as to
maintain video rate.</p><p id="id2643008">The available detection modules include: adaptive background
subtraction, color histogram tracking, image motion detection with
hysteresis, and receptive field histograms. A new detection module
based object learned combinations of receptive fields will soon be
added to the system.</p><p id="id2643018">Each target an each detection region includes a specification of the
detection procedure that is to be used for detection and observation.
This specification is a symbolic label that can be changed during the
process regulation phase. As an example, figure
<ref id="id2643027" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid52" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> shows an example of a face and body
detection. The body region was first detected based on motion energy
in the detection region placed at the door (white rectangle). The
body is tracked using subtraction from an adaptive background within
the body ROI shown as a blue rectangle. The position and size of the
body are shown as a red ellipse. When a body has been tracked for a
sufficient number of fames, a detection region is created for the
face at a position relative to the body. Skin color detection is run
in this region. The interpretation process notes successful detection
of the face and associates the body and face to create a composite
entity for an actor (or person). Events are generated whenever
actors are detected as well as when they enter and leave certain
regions. Events are sent to the supervisor process and are used to
signal changes in situation.
</p></subsection><subsection id="uid60"><bodyTitle id="id2643066">Audio Processes for Detection and Tracking</bodyTitle><participants id="id2643070" category="None"><person key="prima-2005-id2245437"><firstname id="id2643075">Dominique</firstname><lastname id="id2643078">Vaufreydaz</lastname></person><person key="prima-2005-id2245460"><firstname id="id2643083">Patrick</firstname><lastname id="id2643086">Reignier</lastname></person></participants><keyword id="id2643090">Acoustic Perception</keyword><keyword id="id2643092">Monitoring</keyword><keyword id="id2643095">Surveillance</keyword><object id="uid61"><table id="id2643105"><tr id="id2643106"><td id="id2643108"><ressource aux="image_12.png" xylemeAttach="12" id="id2643113" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="AcousticProcess" type="float" width="15.cm" xyref="4157948882005" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2643134">Processes for detection, recognition and tracking of
Acoustic Sources</caption></object><p id="id2643139">In addition to video tracking, Project PRIMA has also implemented
processes for recognition and tracking of acoustic sources.
Due to hardware compatibility, these processes are implemented under
the MS Windows environment and communicate via the software bus.
Acoustic perception is designed around a microphones array (with 4 or
more microphones) and a set of lapel microphones.
There are 4 modules included in AudioProcesses: "AudioRouter",
"AudioLocalization", "SpeechRecognition" and "TopicSpotting".</p><p id="id2643159">AudioRouter is in charge of recording synchronously all the audio
channels and to distribute audio data to other modules,
and of some audio pre-processing: remove hardware recording offset
and speech/non-speech classification. Speech classification
techniques are used to detect speech activities on lapel or ambient
microphones. Doing that, it is possible for example in the
FAME context to determine if a lecturer or someone in his audience is
speaking. According to the recognized context, acoustic
signals tagged as speech can be sent to the SpeechRecognition module.
The AudioRouter speech detection is based on a dynamic
combination of 2 sub-modules: an energy detector and a neural
network. The first one requires that the average signal energy
over a specified (regulated) period be greater or smaller than a
specified (regulated) threshold. All the periods and thresholds
are established during system configuration and may be regulated by
the supervisory controller. In parallel, the neural network
is used to classify signals based on several temporal and spectral
acoustic parameters (Linear Predictive Coding, zero-crossing,
etc.). The neural network detects all voiced activity, i.e. sound
that have been echoed in a human vocal track: plosive sounds
are not recognized as speech but the following vowel is.</p><p id="id2643111">For AudioLocalization, the microphone array is composed of 4
microphones mounted at the corners of the presentation screen within
the PRIMA Augmented Meeting Environment. 4 microphones are needed to
do 3D sound localization. Relative phase information is used
to recognize the source position for speech signals. Location
estimation for an acoustic source is based on the Time Difference
Of Arrivals (TDOA) <ref id="id2643198" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid33" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>. The time lag between signals
received at each microphones pair is determined using
inter-correlation
function between signal energy. The maximum of function between
microphones provides a TDOA for each microphone pair. Then 2 methods
are available for estimating the position of an acoustic source. The
first method is a purely analytic approach. Given the relative
position of microphones, each possible time delay corresponds to a
sphere of positions whose distance correspond to the distance
that sound travels during the delay. The relative TDOA of two
microphones corresponds to a hyperbolic function that is the
intersection of two spheres. Given three microphone pairs, one can
compute the intersection of these hyperbolic functions to
exactly predict the position of the acoustic source. Experience has
shown that this intersection function is extremely unstable
for most positions, due to echo. The second method is based on
knowledge on a set of possible targets. It computes theoretical
TDOAs using sources positions and calculates the distance with the
estimated ones. The best target is then chosen with the minimal
distance. In this case, we can use video targets' positions, given by
the supervisory controller, to determine which system target
is activated. Using threshold, it is possible to decide that a sound
is not related to any known target. In this case, and under
some assumptions, the controller can decide or not to launch a new
video process in order to look after a new target.</p><p id="id2643197">The SpeechRecognition module uses state-of-the-art acoustic
parameters (Mel-scaled Frequency Spectral Coefficient - MFCC -,
energy, zero-crossing, variations and accelerations of these
parameters). It is based on Hidden Markov Models for the acoustic
module and on Statistical Language Model for the language modelling
part. SpeechRecognition can recognize either lapel or ambient
microphone signal. The TopicSpotting modules wait for messages from
the SpeechRecognition. It can use 2 different approaches:
a rule-based one using triggers and grammars, or a statistical one.
In all case, using topic spotting information, the SpeechRecognition
language models can be dynamically adapted to current interest of the
speaker(s) <ref id="id2643263" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid34" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
</p></subsection><subsection id="uid62"><bodyTitle id="id2643285">The Process Federation Tool</bodyTitle><participants id="id2643289" category="None"><person key="prima-2005-id2245460"><firstname id="id2643294">Patrick</firstname><lastname id="id2643297">Reignier</lastname></person><person key="prima-2005-id2244735"><firstname id="id2643302">Sebastien</firstname><lastname id="id2643305">Pesnel</lastname></person><person key="prima-2005-id2244745"><firstname id="id2643311">Alban</firstname><lastname id="id2643314">Caporossi</lastname></person><person key="prima-2005-id2245512"><firstname id="id2643319">James L.</firstname><lastname id="id2643322">Crowley</lastname></person></participants><p id="id2643327">The Process Federation Tool (PFT) has been defined to facilitate
experiments in both manual and automatic configuration and
supervision of perceptual processes and process federations.
This
tool allows processes and process federations to be specified,
configured and monitored during execution, thus providing an
important experimental tool for developing automatic methods for
process regulation and federation configuration.</p><p id="id2643338">A process
federation is a system of independent cooperating processes. A
process federation provides a convenient mechanism to extract and
integrate information from a network of sensors and cameras, without
the need to communicate large volumes of high-bandwidth data.
Federations can also be used to distribute processing over a network
of computing devices in an ad-hoc manner in response to changes in
operating context.</p><p id="id2643350">We have designed a middle ware environment that
allows us to dynamically launch and connect process on different
machines. In our system, processes are controlled by a federation
supervisor or "federator". The federator configures a process by
sending snippets of control script to be interpreted by the
controller. Each control script defines a command that can be
executed by a message from the federator. Processes may be
interrogated by the federator to determine their current state and
the current set of commands.</p><p id="id2643353">Federators can also launch and configure other federators so that
federations can be built up hierarchically. Each federator invokes
and controls lower level supervisors that perform the required
transformation. At the lowest level are Perceptual processes that
observe and track entities and observe the relations between
entities. These are grouped into federations as required for to
observe the situations in a context.</p><p id="id2643393">Design and debug of Federators is facilitated by the Process
Federation Tool.
The Process Federation Tool provides an interface
that allows available federations to be displayed, configured,
launched, monitored and saved.
The tool provides a convenient means
to launch, configure and monitor execution of individual processes or
process federations.
The user chooses the processes from a list of
available types and defines values for parameters.
The description
results in the creation of a "ProcesssDefinitions.scm" file.
Processes and federations configured with the tool can be saved for
further use in order to allow federations to be incrementally
constructed and refined.</p><p id="id2643416">Processes available to the PFT are described by a process description
file using an XML process description.
Process descriptions are
automatically imported from a selected process description directory,
and displayed in a process selection window.
The process selection
window allows listed processes to be selected and instantiated for
configuration.
Instantiation allows processes to be configured and
allocated to computing resources.</p><p id="id2643437">Process configuration requires knowledge of the set of available
sensors on the selected matching, as well as knowledge of the process
parameters that must be configured.
The available sensors for each
machine are described in a XML description that may be imported for
each machine.
As with processes, copies of these description s are
kept in a predefined directory.
A description of process parameters
is obtained by sending a message to the newly instantiated process.</p><p id="id2643450">This auto-description feature for processes was designed to allow
automatic configuration, but has been found to be very useful for
manual configuration of distributed processes.</p><p id="id2643457">Instantiated processes may be connected by communication channels to form
federations using the process federation window.
Process
federations may be specified from scratch using currently processes.</p><p id="id2643465">These descriptions can be named and saved using an XML format in a
process federation directory.
A list of previously specified
federations is obtained by importing the XML descriptions from this
directory.
Selecting such a description triggers instantiation and
default configuration for this federation.</p><p id="id2643475">The process federation specification window can display information
about the process federation and its completeness.
Incomplete
federations are federations in which input channels lack processes.</p><p id="id2643483">The federation tool validates communication channels and indicates
when a required communication channel is lacking.</p><subsection id="uid63"><bodyTitle id="id2643495">An Example:
Distributed Camera Net</bodyTitle><p id="id2643500">A simple example of a federation of
perceptual processes is provided by a system that detects and tracks
entities using a distributed network of cameras, each connected to a
separate computer running a separate robust tracking process, as
shown in figure <ref id="id2643510" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid64" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>. Targets detected within each
robust tracking process are reported to an entity composition
process. The composition process assembles targets into composite
entities and maintains a global history of the target evolution and
trajectory. This system has been used to track vehicles and
pedestrians within the INRIA CAVIAR parking lot test-bed combining
results from up to 6 distributed surveillance cameras.</p><object id="uid64"><table id="id2643540"><tr id="id2643541"><td id="id2643543"><ressource aux="image_13.png" xylemeAttach="13" id="id2643547" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="Federation" type="float" width="12cm" xyref="215641006029" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2643568">A simple process federation composed of entity detection
process, and a composition process</caption></object></subsection></subsection><subsection id="uid65"><bodyTitle id="id2643576">Specifying a context model</bodyTitle><participants id="id2643580" category="None"><person key="prima-2005-id2245460"><firstname id="id2643585">Patrick</firstname><lastname id="id2643588">Reignier</lastname></person><person key="prima-2005-id2245512"><firstname id="id2643594">James L.</firstname><lastname id="id2643596">Crowley</lastname></person></participants><keyword id="id2643600">Context Modeling</keyword><keyword id="id2643603">Context Aware Systems</keyword><keyword id="id2643606">Ambient Intelligence</keyword><p id="id2643610">A system exists to provide services. Providing services requires the
system to perform actions. The
results of actions are formalized by defining the output "state" of
the system. Simple examples of
actions for interactive environments include adapting the ambient
illumination and temperature in a
room. More sophisticated examples of tasks include configuring an
information display at a specific
location and orientation, or providing information or communications
services to a group of people
working on a common task.</p><p id="id2643614">The "state" of an environment is defined as a conjunction of
predicates. The environment must act so
as to render and maintain each of these predicates to be true.
Environmental predicates may be
functions of information observed in the environment, including the
position, orientation and activity
of people in the environment, as well as position, information and
state of other equipment. The
information required to maintain the environment state determines the
requirements of the
perception system.</p><p id="id2643658">The first step in building a context model is to specify the desired
system behavior. For an interactive
environment, this corresponds to the environmental states, defined in
terms of the variables to be
controlled by the environment, and predicates that should be
maintained as true. For each state, the
designer then lists a set of possible situations, where each
situation is a configuration of entities and
relations to be observed. Although a system state may correspond to
many situations, each situation
must uniquely belong to one state. Situations form a network, where
the arcs correspond to changes
in the relations between the entities that define the situation. Arcs
define events that must be detected
to observe the environment.</p><p id="id2643676">In real examples, we have noticed that there is a natural tendency
for designers to include entities
and relations that are not really relevant to the system task. Thus
it is important to define the
situations in terms of a minimal set of relations to prevent an
explosion in the complexity of the
system. This is best obtained by first specifying the environment
state, then for each state specifying
the situations, and for each situation specifying the entities and
relations. Finally for each entity and
relation, we determine the configuration of perceptual processes
that may be used.
</p></subsection><subsection id="uid66"><bodyTitle id="id2643697">Context model compiler</bodyTitle><participants id="id2643701" category="None"><person key="prima-2005-id2245460"><firstname id="id2643706">Patrick</firstname><lastname id="id2643709">Reignier</lastname></person></participants><p id="id2643714">PRIMA has constructed a graphical interaction tool for designing
situation graphs.
This tool allows situation graphs to be saved as an XML specification that is
automatically transformed into a computer program that can observe and
recognize situations and generate the desired actions.</p><subsection id="uid67"><bodyTitle id="id2643729">Situation graphs and temporal relations</bodyTitle><p id="id2643733">A context model is a graph of situations. Situations are connected by
arcs, representing temporal constraints between them.
They are decorated using the temporal operators defined by
Allen <ref id="id2643742" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid35" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> : <i id="id2643760">before, meets, overlaps, starts, equals,
during, finished</i>.</p><p id="id2643765">The graph structure is given by the temporal relations. A path inside
the graph is the result of the observation of the on-going situations.</p><p id="id2643771">A situation is a set of roles and relations. Based on the situation
definition, we move from situation S1 to situation S2 if a role or a
relation has changed in situation S1 (S1 is no more valid) and roles
and relations are verified in situation S2. The transitions are
event-driven. If we associate situations to places and events to
transitions, the situation graph can be mapped on the <i id="id2643782">Synchronized Petri Nets</i> formalism. This Petri Net can then be
transformed into a computer program.</p></subsection><subsection id="uid68"><bodyTitle id="id2643795">Synchronized Petri Nets</bodyTitle><p id="id2643799">A synchronized Petri Net is a Petri Net where transitions ares
associated to events. A transition can be fired if both :</p><simplelist id="id2643808"><li id="uid69"><p id="id2643816">The preconditions on places marks are verified.</p></li><li id="uid70"><p id="id2643827">The transition event has been received.</p></li></simplelist><p id="id2643832">We have proposed for each Allen operator a corresponding Petri Net
pattern. The synchronization events are automatically calculated based
on the roles and relations of connected situations.</p></subsection><subsection id="uid71"><bodyTitle id="id2643847">Jess rule generation</bodyTitle><p id="id2643851">We have to program an event based system. One of the possible solution
is to use a forward chaining rule programming environment. An event
corresponds to a new fact in the facts database, triggering the
corresponding rules.</p><p id="id2643860">We have selected the Jess expert system shell <ref id="id2643866" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid36" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> for our
rule based
programming environment.</p><p id="id2643885" noindent="true">The generated rules are separated in three groups :</p><simplelist id="id2643892"><li id="uid72"><p id="id2643900">The rules implementing the structure of the Petri Net. They are
a direct transcription of the Petri Net evolution function.</p></li><li id="uid73"><p id="id2643913">The rules immplementing the transition functions. They generate
the synchronization events based on modification of roles and (or) relations.</p></li><li id="uid74"><p id="id2643926">The rules implementing the control of the visual processes.
These rules are
based on the situation marks. When a situation mark is going to 0, we
are not interested anymore in observing the entities playing the
associated roles. We can shutdown the corresponding visual
processes. When a situation mark is positive, we must configure the
visual processes to search for entities playing the roles of all the
connected situations. This is to be able to observe which situation
will be the next one.</p></li></simplelist></subsection></subsection><subsection id="uid75"><bodyTitle id="id2643946">Generic Features for detection and classification</bodyTitle><participants id="id2643951" category="None"><person key="prima-2005-id2245372"><firstname id="id2643956">Daniela</firstname><lastname id="id2643959">Hall</lastname></person><person key="prima-2005-id2244909"><firstname id="id2643964">Nicolas</firstname><lastname id="id2643967">Gourier</lastname></person></participants><keyword id="id2643970">Computer Vision</keyword><keyword id="id2643972">Generic Features</keyword><keyword id="id2643975">Robust Matching</keyword><keyword id="id2643978">Robust Tracking</keyword><p id="id2643983">Feature extraction is essential for detection and recognition
tasks. In the case of detection and identification of agents in
intelligent environments with multiple cameras, the use of raw
Gaussian derivatives is going to fail due to the large variance
introduced by changes in appearance, viewpoint and camera
parameters. Appropriate features are robust to such changes by
learning the expected variance from examples. In this section we
propose an approach for the generation of such generic features and
show how these low level features can be integrated in an architecture
with two components (see Figure <ref id="id2644012" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid76" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>): a feature extraction
module that provides features invariant to intra-class variability and
a geometry verification module that introduces specificity, increases
the reliability of the detection and rejects false positives.</p><object id="uid76"><table id="id2644039"><tr id="id2644040"><td id="id2644042"><ressource aux="image_14.png" xylemeAttach="14" id="id2644046" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="arch" type="float" width="13cm" xyref="517389783008" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2644067">System architecture consisting of low level feature extraction and
higher level geometry verification.</caption></object><subsection id="uid77"><bodyTitle id="id2644077">Computation of class-specific feature detectors</bodyTitle><p id="id2644081">For the extraction of class-specific features, we learn the
appearance of class-specific object parts from a dense, pixelwise,
grid of features by clustering. Clustering of dense features is
similar to Leung and Malik's approach for computation of generic
features for texture classification <ref id="id2644091" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid37" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>. Furthermore,
k-means clustering produces statistically correct results, because a
large amount of data points is used.</p><p id="id2644112">Figure <ref id="id2644115" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid78" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> illustrates the feature extraction process. The
top right graph shows an example of the probability maps (low
probabilities are black, high probabilities are light grey). The
probability maps can be condensed to a cluster map
representation. This is an enormous data reduction, but at the same time, it
preserves the information about class specific features and their
location. This is the minimum information required for detection and
classification and makes the cluster map an ideal input for the
geometry verification module. For a detailed
description of the experiments please see <ref id="id2644155" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid38" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.</p><object id="uid78"><table id="id2644179"><tr id="id2644180"><td id="id2644182"><ressource aux="image_15.png" xylemeAttach="15" id="id2644186" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="algo" type="float" width="12cm" xyref="1125228605021" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2644206">Algorithm for raw feature extraction and mapping to most
probable class specific features. The probability maps are condensed
to a single color coded cluster map, where color <hi rend="italic">k</hi> marks points
that are assigned to
cluster <hi rend="italic">k</hi>.</caption></object></subsection><subsection id="uid79"><bodyTitle id="id2644240">Modeling spatial relations</bodyTitle><p id="id2644244">In this section we propose an automatic model generation that
learns spatial relations of generic features. This model is inspired by
Belongie's log-polar shape context <ref id="id2644252" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid39" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>.
A log-polar histogram has bins that are uniform in log-polar
space. This makes the log-polar description
appropriate for applications where the object undergoes affine
transformations.</p><p id="id2644274">A region around the query position is transformed into log-polar
representation using a lookup table for speedup. A bin of
the log-polar histogram contains the ratio of the surface covered by
the query pixel and the total surface of the histogram bin.</p><p id="id2644282">For learning the spatial relations of the target object, the user
selects a reference position within the cluster map representation of
a set of training images. This is the only user interaction required
for training. A model histogram is constructed.
For measuring the similarity between any query histogram <hi rend="italic">Q</hi> and the
model histogram <hi rend="italic">H</hi> we use the <span class="math" align="left"><img width="12" height="24" align="middle" border="0" src="/images/img_chi.png" alt="$ \chi$"/><sup>2</sup></span> divergence measure.</p><object id="uid80"><table id="id2644341"><tr id="id2644343"><td id="id2644344"><ressource aux="image_16.png" xylemeAttach="16" id="id2644348" media="WEB" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="equipe_prima1" type="float" width="12cm" xyref="3498428061019" xmlns:xlink="http://www.w3.org/1999/xlink"/></td></tr></table><caption id="id2644369"><small id="id2644370">Detection example on unconstrained image. The training is
performed on frontal faces from the AR
database <ref id="id2644375" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid40" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/>. Detected faces marked by
circles are characterized by the combined occurrence of facial
features. No false detections are observed.</small></caption></object><p id="id2644396">The log-polar implementation allows the modeling of spatial
relations that is sufficiently discriminant to avoid false detections
and is general enough to avoid over-fitting.
In order to show the stability of our approach to images with
cluttered background and different illumination conditions, we have
performed a face detection experiment on the Caltech face database <ref id="id2644405" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#bid41" location="biblio" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> on 435 images. We obtain a positive detection
rate of 97.7% (425 out of 435 images are detected). In the example of
unconstrained images in Figure <ref id="id2644428" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="#uid80" location="intern" xyref="4116818600024" xmlns:xlink="http://www.w3.org/1999/xlink"/> all faces are
correctly detected despite significant differences between the
training and observed faces, head pose, hairstyle and beard or
glasses. We observe no false detections. This is a convincing result
considering the variations in scale, head pose and lighting.</p></subsection></subsection></resultats><contrats id="uid81"><bodyTitle id="id2644456">Contracts and Grants with Industry</bodyTitle><subsection id="uid82"><bodyTitle id="id2644465">European and National Projects</bodyTitle><subsection id="uid83"><bodyTitle id="id2644475">IST-2000-28323 FAME: Facilitating Agent for
Multi-Cultural Exchange</bodyTitle><p id="id2644480">European Commission project IST-2000-28323</p><p id="id2644484">Starting Date : October 2001.</p><p id="id2644488">Duration: 40 months.</p><p id="id2644493">Key Action: MultiModal Interfaces</p><p id="id2644497">Consortium Members:</p><simplelist id="id2644502"><li id="uid84"><p id="id2644511">Universitaet Karlsruhe (TH), Germany, Prof. Alex Waibel</p></li><li id="uid85"><p id="id2644523">Laboratoire GRAVIR, UMR CNRS 5527, France, Prof. James L. Crowley</p></li><li id="uid86"><p id="id2644535">Université Joseph Fourier, Laboratoire CLIPS, France, Prof.
Joëlle Coutaz</p></li><li id="uid87"><p id="id2644548">Istituto Trentino di Cultura, Italy, Marcello Federico</p></li><li id="uid88"><p id="id2644560">Universitat Politècnica de Catalunya Centre TALP, Spain, Prof.
José B. Mariño</p></li><li id="uid89"><p id="id2644573">Sony International (Europe) GmbH, Germany, Ralf Kompe</p></li><li id="uid90"><p id="id2644585">Applied Technologies on Language and Speech S. L., Germany, David Font</p></li></simplelist><p id="id2644590">The goal of IST Project FAME is to construct an intelligent agent to
facilitate communication among people from different cultures who
collaborate on solving a common problem. This agent will provide
three services: 1) facilitate human to human communication through
multimodal interaction including vision, speech and object
manipulation, 2) provide the appropriate information relevant to the
context, and 3) make possible the production and manipulation of
information blending both electronic and physical representations.
The agent will serve as an information butler to aid multicultural
communication in a transparent way. The agent will not intervene in
the conversation, but will remain in the background to provide the
appropriate support. A public demonstration is planned for the
Barcelona Cultural Fair 2004.</p><p id="id2644610">management (WP1) and dissemination (WP11). Two to three demonstrators
of increasing complexity will be designed and the interfaces between
the components of the system will be specified (WP2). These
demonstrators will concern scenarios for seminar style discussion,
round-table group problem solving, and collaborative problem solving
involving remote groups. Each scenario will include assistance for
communication between people from different cultures using different
languages. A test-bed will be constructed to verify compliance with
the specifications and to provide an environment for evaluating the
performance and usability of software components (WP3). At the end of
the project, a robust demonstrator will be exhibited at the Barcelona
Cultural Fair (WP2). Constructing a facilitating agent for
multicultural communication requires technological innovations in
several areas:
speech understanding to control the acquisition and presentation of
video (WP4) and audio (WP5) for communication. 2)Active control of
acquisition to allow users to speak and move freely about the room
unencumbered by microphones or wires. 3)Conversational speech
recognition to recognise unplanned speech acts in order to feed the
conversation context model with the meaning of utterances (WP6).
4)Dialog modeling (WP10) to maintain a task model that integrates
information from perceptual awareness in order to inform the
information retrieval process. 5)An information retrieval process to
provide information relevant to the current communication and problem
solving context (WP7). 6)Natural interaction, blending physical and
electronic representations of information, to enable people to speak,
write and manipulate physical objects as part of the problem solving
process (WP8). 7)Automatic speech translation to enable communication
across cultural and language barriers (WP9).</p></subsection><subsection id="uid91"><bodyTitle id="id2644657">IST-2001-32157 DETECT: Real Time Detection of Motion
Picture Content in Live Broadcasts</bodyTitle><p id="id2644662">European Commission project IST-2001-32157</p><p id="id2644666">Start Data: Novembre 2001</p><p id="id2644671">Duration: 27 Months</p><p id="id2644675">Project: IST-2001-32157</p><p id="id2644680">Key Action: Cognitive Vision.</p><simplelist id="id2644684"><li id="uid92"><p id="id2644694">JOANNEUM RESEARCH Forschungsgesellschaft mbH, Austria</p></li><li id="uid93"><p id="id2644705">DUVIDEO II - Profissionais de Imagem S.A., Portugal</p></li><li id="uid94"><p id="id2644717">Taurus Media Technik GmbH, Germany</p></li><li id="uid95"><p id="id2644728">HS-ART Digital Service GmbH, Austria</p></li><li id="uid96"><p id="id2644739">VIDEOCATION Fernseh-Systeme GmbH, Germany</p></li><li id="uid97"><p id="id2644750">INRIA Rhône Alpes, INRIA-GRAVIR-UMR5527, France</p></li><li id="uid98"><p id="id2644763">Institut National Polytechnique de Grenoble, INPG-GRAVIR-UMR5527, France</p></li><li id="uid99"><p id="id2644775">Centre National de la Recherche Scientifique,
CNRS-GRAVIR-UMR5527, France</p></li><li id="uid100"><p id="id2644787">Université Joseph Fourier, UJF-GRAVIR-UMR5527, France</p></li></simplelist><p id="id2644794">The principle goal of the DETECT project is to implement a general
platform for real time detection of semantic blocks and regions
within digital video streams. These detected regions are then be
subject to further analysis and processing. The project will focus on
the applications problem of detecting and delimiting predefined
static and dynamic objects. This issue has currently a very large
demand for both cultural and economic reasons. DETECT is an
industrial driven project, although its nature is R&amp;D. The project
will result in a prototype, which can be turned into a product after
the project. For this reason the main modules are implemented as
sample applications (ProcessingUnits) for the categories which are of
high commercial interest (e.g.: identification of race-cars and
soccer-players).</p><p id="id2644817">DETECT provides a general platform for a real time analysis for
streaming video input and supports three different types of
ProcessingUnits (modules) which are as follows:</p><subsection id="uid101"><bodyTitle id="id2644830">Detection of semantic blocks.</bodyTitle><p id="id2644834">A semantic block covers the temporal domain only. Thus a typical
semantic block-detector just indicates, whether the streamed content
is of a certain type or not. Within DETECT the semantic block-concept
will be implemented for commercial/advertising blocks, as they appear
frequently in television-broadcast. Depending on the outcome of the
semantic block detection, a specific detection of regions of interest
(further on ROI) can be applied:</p></subsection><subsection id="uid102"><bodyTitle id="id2644855">Detection of ROI in order to identify static and dynamic objects.</bodyTitle><p id="id2644860">ROI is a specific local region which is due to the nature of the
streaming input also related to the temporal domain. Such ROI,
whenever identified in real time can be used to restrict further
analysis or to simply recognize predefined objects, which match with
the ROI. Within DETECT the ROI-concept will be applied to
sports-applications (soccer and formula1) and therein to locally
moving objects like soccer-player and race-car, but also to static
objects like hoardings. Each detected ROI can be further analyzed
with pattern recognition tools depending on the type of the ROI.</p></subsection><subsection id="uid103"><bodyTitle id="id2644882">Motion picture analysis.</bodyTitle><p id="id2644886">The main objective herein is, to detect predefined (company) logos
stored in a central reference database. Those logos can be trademarks
like in the DETECT sample application, but could also be of nearly
any other type. As the size of the reference-system has to be
scalable, the logo-detection analysis will be done off-line not as a
real time application</p></subsection></subsection><subsection id="uid104"><bodyTitle id="id2644904">IST 2001 37540 CAVIAR: Context Aware Vision using
Image-based Active Recognition</bodyTitle><p id="id2644910">European Commission project: IST 2001 37540</p><p id="id2644913">Starting Date: October 1, 2002</p><p id="id2644918">Durations: 36 Months</p><p id="id2644922">Key Action: Cognitive Vision</p><p id="id2644927">Consortium:</p><simplelist id="id2644931"><li id="uid105"><p id="id2644942">Univ. of Edinburgh (United Kingdom)</p></li><li id="uid106"><p id="id2644953">Instituto Superior Tecnico, Lisbon, Portugal</p></li><li id="uid107"><p id="id2644965">INRIA Rhône Alpes, France</p></li><li id="uid108"><p id="id2644977">Institut National Polytechnique de Grenoble, France</p></li><li id="uid109"><p id="id2644989">Université Joseph Fourier, Grenoble, France</p></li><li id="uid110"><p id="id2645002">CNRS, France</p></li></simplelist><p id="id2645006">The main objective of the CAVIAR is to address the scientific
question: Can rich local image descriptions from fovea and other
image sensors, selected by a hierarchal visual attention process and
guided and processed using task, scene, function and object
contextual knowledge improve image-based recognition processes? This
is clearly addressing issues central to the cognitive vision approach.</p><p id="id2645018">The two applications that the project will address are:</p><orderedlist id="id2645023"><li id="uid111"><p id="id2645032">City centre surveillance: Many large cities have nighttime
crime and antisocial behaviour problems, such as drunkenness, fights,
vandalism, breaking and entering shop windows, etc. Often these
cities have video cameras already installed, but what is lacking is a
semi-automatic analysis of the video stream. Such analysis could
detect unusual events, such as patterns of running people, converging
people, or stationary people, and then alert human security staff.</p></li><li id="uid112"><p id="id2645052">Marketers are interested in the behaviour of potential
customers in a commercial setting, such as what sequence of locations
do they visit, how long they stop at particular locations, what
behavioural options do typical customers take, etc. Automatic
analysis of customer behaviour could enable evaluation of shop
layouts, changing displays and the effect of promotional materials.</p></li></orderedlist></subsection><subsection id="uid113"><bodyTitle id="id2645071">IST 506909 CHIL: Computers in the Human Interaction Loop</bodyTitle><p id="id2645076">European Commission project IST 506909 (Framework VI - Call 1)</p><p id="id2645081">Strategic Objective: Multi-modal Interaction</p><p id="id2645086">Start Date 1 January 2004.</p><p id="id2645090">Duration 36 months (renewable).</p><p id="id2645095">CHIL is an Integrated Project in the new Framework VI programme.</p><p id="id2645100">Participants</p><simplelist id="id2645104"><li id="uid114"><p id="id2645114">Fraunhofer Institut für Informations- und Datenverabeitung,
Karlsruhe, Germany</p></li><li id="uid115"><p id="id2645128">Universität Karlsruhe (TH), Interactive Systems Laboratories, Germany</p></li><li id="uid116"><p id="id2645142">Daimler Chrysler AG, Stuttgart, Germany</p></li><li id="uid117"><p id="id2645154">ELDA, Paris, France</p></li><li id="uid118"><p id="id2645164">IBM Czech Republic, Prague, Czech Republic</p></li><li id="uid119"><p id="id2645176">Research and Education Society in Information Systems, Athens, Greece</p></li><li id="uid120"><p id="id2645188">Insitut National Polytechnique de Grenoble, France</p></li><li id="uid121"><p id="id2645200">Insituto Trentino di Cultura, Trento, Italy</p></li><li id="uid122"><p id="id2645211">Kungl Tekniska Högskolan (KTH), Stockholm, Sweden</p></li><li id="uid123"><p id="id2645224">Centre National de la Recherche Scientifique, Orsay, France</p></li><li id="uid124"><p id="id2645236">Technische Universiteit Eindhoven, Eindhoven, Netherlands</p></li><li id="uid125"><p id="id2645248">Universität Karlsruhe (TH), IPD, Karlsruhe, Germany</p></li><li id="uid126"><p id="id2645261">Universitat Politecnica de Catalunya, Barcelona, Spain</p></li><li id="uid127"><p id="id2645273">Stanford University, Stanford, USA</p></li><li id="uid128"><p id="id2645285">Carnegie Mellon University, Pittsburgh, USA</p></li></simplelist><p id="id2645289">The theme of project IP CHIL is to put Computers in the loop of
humans interacting with humans. To achieve this goal of Computers in
the Human Interaction Loop (CHIL), the computer must engage and act
on perceived human needs and intrude as little as possible with only
relevant information or on explicit request. The computer must also
learn from its interaction with the environment and people. Finally,
the computing devices must allow for a dynamically networked and
self-healing hardware and software infrastructure. The CHIL
consortium will build prototypical, integrated environments providing:</p><p id="id2645305">Perceptually Aware Interfaces: Perceptually aware interfaces can
gather all relevant information (speech, faces, people, writing, and
emotion) to model and interpret human activity, behaviour, and
actions. To achieve this task we need a variety of core technologies
that have progressed individually over the years: speech recognition
and synthesis, people identification and tracking, computer vision,
automatic categorization and retrieval, to name a few. Perceptually
aware interfaces differ dramatically from past and present
approaches, since the machine now observes human interaction rather
than being directly addressed. This requires considerably more robust
and integrated perceptual technology, since perspectives, styles and
recording conditions are less controlled and less predictable,
leading to dramatically higher error rates.</p><p id="id2645341">Cognitive Infrastructure: The supporting infrastructure that will
allow the perceptual interfaces to provide real services to the uses
needs to be dramatically advanced. Cognitive and Social modeling to
understand human activities, model human workload, infer and predict
human needs has to be included in the agent and middleware technology
that supports CHIL. Further, the network infrastructure has to be
dynamic and reconfigurable to accommodate the integration of a
variety of platforms, components, and sensory systems to collaborate
seamlessly and on-demand to satisfy user needs.</p><p id="id2645357">Context Aware Computing Devices: CHIL aims to change present desktop
computer systems to context aware computing devices that provide
services implicitly and autonomously. Devices will be able to utilize
the advanced perceptual interfaces developed and the infrastructure
in CHIL to free the user and allow him instead of serving the device
to be served and supported in the tasks and human-to-human
interactions he needs to focus. Further, human centered design, where
the artistic value, appeal, and look &amp; feel, become important in
taking computing devices and human environments to the next level.</p><p id="id2645380">Novel services: The above innovations and advances in perceptual
interfaces, cognitive infrastructure and context aware computing
devices are integrated and showcased in novel services that aim at
radically changing the way humans interact with computers to achieve
their tasks in a more productive and less stressful way. These
services are based on a thorough understanding of the social setting,
the task situation, and the optimal interaction that maximizes human
control while minimizing workload. Furthermore, some issues of
privacy and security are to be addressed since the change
human-computer interaction introduced by CHIL also touches a lot of
the ways information in which is shared and communicated.</p><p id="id2645398">New measures of Performance: The resulting systems should reduce
workload in measurable ways. To achieve these breakthroughs in a
number of component technologies, the integrated system and a better
understanding of its new use in human spaces are needed. Evaluation
must be carried out both, in terms of performance and effectiveness
to assess and track progress of each component, and the "end to end"
integrated system(s). This will be carried out by an independent
infrastructure that would also allow any third party to benchmark its
findings against the project results after the end of the project.</p></subsection><subsection id="uid129"><bodyTitle id="id2645425">RNTL/Proact: ContAct Context management for pro-Active
computing </bodyTitle><p id="id2645430">Start Date February 2003.</p><p id="id2645435">Duration: 24 months</p><p id="id2645439">The consortium consists of five partners:</p><simplelist id="id2645443"><li id="uid130"><p id="id2645452">Xerox Research Centre Europe (Project coordinator)</p></li><li id="uid131"><p id="id2645464">Project PRIMA, Laboratoire GRAVIR, INRIA Rhone Alpes</p></li><li id="uid132"><p id="id2645475">Neural Networks Research Centre, Helsinki University of
Technology (HUT), Finland</p></li><li id="uid133"><p id="id2645487">Jaakko Pöyry Consulting, Helsinki, Finland</p></li><li id="uid134"><p id="id2645500">Ellipse, Helsinki, Finland</p></li></simplelist><p id="id2645504">Project Contact is one of three RNTL projects that have been included
in the French-Finland scientific program: ProAct.</p><p id="id2645511">The aim of Project RNTL CONTACT is to explore novel approaches, based
mainly on neural nets2, to the detection and manipulation of
contextual information to support proactive computing applications,
and to make the results available as part of a more extensive toolkit
for ubiquitous and proactive computing. The project will investigate
the effectiveness of neural networks as part of a "contextual
middleware". In particular the project will address two levels of
context manipulation:</p><p id="id2645529">Support for developing and adapting neural network classifiers to
compute context variables. Potentially, an end user will be able to
specify the learning phase of the network and associate it with a
specific new context attribute to be used in the application.
The provision of example classifiers already trained. In this case
some samples of such attributes will be developed and provided as a
library. To develop these functions a specific scenario will be used:
people planning, coordinating and reflecting on their activities in
an organization in an augmented smart building (equipped with large
screens, wireless networks, video cameras, identification sensors,
and mobile devices). The attributes will be articulated at two
different levels of abstraction: sensor oriented and application/user
oriented</p><p id="id2645548">To achieve these results project CONTACT will cover four major activities:
Definition of an ontology that describes context variables both at
the user and at the sensor level.
Definition of a platform providing formalism and an appropriate
architecture to learn and combine context attributes.
Definition of a library of context attributes, general enough to be
reusable in support of different scenarios than the one used in the
project.
Validation of the contextual middleware on a pilot case. The chosen
application of personal time management will help guide the
development of the middleware and also to conduct an evaluation of
our technology using a real-world problem.</p></subsection></subsection></contrats><international id="uid135"><bodyTitle id="id2645573">Other Grants and Activities</bodyTitle><subsection id="uid136"><bodyTitle id="id2645583">European Research Networks</bodyTitle><subsection id="uid137"><bodyTitle id="id2645594">IST-2001-35454 ECVision: European Research Network for
Cognitive AI-enabled Computer Vision Systems </bodyTitle><p id="id2645599">Project Acronym: ECVision</p><p id="id2645604">Project Full Title: European Research Network for Cognitive
AI-enabled Computer Vision Systems</p><p id="id2645610">Start Date: March 2002</p><p id="id2645614">Duration: 36 months</p><p id="id2645618">ECVision is a thematic network devoted to Cognitive Enabled Computer
Vision Systems. ECVision serves to unify the set of 8 IST projects
funded in Framework V under the EC's Cognitive Vision program.</p><p id="id2645626">The principal goal of ECVision is to promote research, education,
and application systems engineering in cognitive AI-enabled computer
vision in Europe through focussed networking, multi-disciplinary
peer-interaction, targeted identification of priority issues, and
wide-spread promotion of the area's challenges and successes within
both the academic and industrial communities.</p><p id="id2645637">The project goal can be realized by achieved by setting up and
running a research network with the following objectives:
These objectives will be accomplished through four main operational goals:</p><p id="id2645644">Research Planning - identify key challenges, problems, and system
functionalities so that the community and the EC can target the
critical areas efficiently and effectively. In doing so, ECVision
will develop a 'research roadmap' which will identify the key
challenges and priority topics, together with plans and time scales
for attacking them.</p><p id="id2645655">Education and Training - identify and develop courses, curricula,
texts, material, and delivery mechanisms; promote excellence in
education at all levels, and foster exchange of ideas through
inter-institutional interaction of staff and students.
Information Dissemination - promote the visibility and profile of
cognitive vision at conferences and in journals by organizing special
sessions, workshops, tutorials, summer schools, short courses, and by
providing links to the work of those in the AI &amp; Robotics
communities.
Industrial Liaison - identify application drivers and highlight any
successes, promote research trials, addressing all types of
industries: games, entertainment, white goods manufacturers (e.g.
vigilant appliances), construction (e.g smart buildings), medicine
(e.g. aids for the disabled), etc.</p><p id="id2645678">In addition, the network will include two support activities:</p><simplelist id="id2645683"><li id="uid138"><p id="id2645694">Provision of an Information Infrastructure for both
computer-supported cooperative work, e.g. discussion forums and email
distribution lists, and for web-based dissemination of all material
generated under the four areas identified above.</p></li><li id="uid139"><p id="id2645709">Operational management by a Network Coordinator and Area
Leaders in each of the four areas above; these people will constitute
the ECVision Executive Committee.</p></li></simplelist><p id="id2645717">James Crowley of Project prima is coordinator of Research Planning
for ECVision.</p></subsection><subsection id="uid140"><bodyTitle id="id2645729">IST-2000-26434 FGnet: Face and Gesture Recognition
Working Group</bodyTitle><p id="id2645734">Start Date: 15 October 2001</p><p id="id2645739">Duration: 36 months</p><simplelist id="id2645743"><li id="uid141"><p id="id2645752">University of Manchester, UK</p></li><li id="uid142"><p id="id2645763">Technical University of Munich, Germany</p></li><li id="uid143"><p id="id2645775">Computer Vision &amp; Media Technology Lab, Aalborg University, Danmark</p></li><li id="uid144"><p id="id2645790">Laboratoire GRAVIR, INRIA Rhône Alpes, France</p></li><li id="uid145"><p id="id2645803">The Dalle Molle Institute for Perceptual Artificial
Intelligence, Switzerland</p></li><li id="uid146"><p id="id2645815">Dept. of Computer Science, Cyprus College, Nicosia, Cyrpus</p></li></simplelist><p id="id2645820">FGnet is a thematic network devoted to visual techniques for
detection, tracking and recognition of faces and gestures.
The aim of this project is to encourage technology development in the
area of face and gesture recognition. The precise goals are: (1) to
act as a focus for the workers developing face and gesture
recognition technology (2) to create a set of foresight reports
defining development road maps and future use scenarios for the
technology in the medium (5-7 years) and long (10-20 years) term (3)
to specify, develop and supply resources (e.g. image sets) supporting
these scenarios (4) to use these resources to encouraging technology
development. The use of shared resources and data sets to encourage
the development of complex process and recognition systems has been
very successful in the speech analysis and recognition field, and in
the image analysis field in the specific cases where it has been
applied. The basis of project, is that when properly defined and
collects, such resources would also be of benefit in the development
of wider problems in face and gesture recognition.</p><p id="id2645845">Project PRIMA is responsible for organizing dissemination workshops
for FGnet, as well as contributing to the collection of benchmark
data sets for performance evaluation. FGnet has provided resources
for organizing the PETS series of workshops (Performance Evaluation
for Tracking and Surveillance).</p></subsection></subsection></international><diffusion id="uid147"><bodyTitle id="id2645861">Dissemination</bodyTitle><subsection id="uid148"><bodyTitle id="id2645871">Contribution to the Scientific Community</bodyTitle><subsection id="uid149"><bodyTitle id="id2645881">EUSAI '04: European Symposium on Ambient Intelligence</bodyTitle><p id="id2645886">James L. Crowley was program co-chair of the 2nd European Symposium
on Ambient Intelligence, held in Eindhoven in the Netherlands, on
November 8-10, 2004.</p><p id="id2645893">EUSAI provides a venue for an emerging
multi-disciplinary community of researchers that work on Ambient
Intelligence.
Ambient Intelligence represents a vision of the future
where we shall be surrounded by electronic environments, sensitive
and responsive to people.
Ambient intelligence technologies are
expected to combine concepts of ubiquitous computing and intelligent
systems putting humans in the center of technological developments.</p><p id="id2645914">Ambient Intelligence represents a long-term objective for European
research bringing together researchers across multiple disciplines:
computer science, electronics and mechanical engineering, design,
architecture, social sciences, software engineering.</p><p id="id2645923">EUSAI '04
presented research in the areas of Ubiquitous computing, Context
Awareness, Intelligence and Natural user-system interaction.
Conference Proceedings are available from Springer Verlag Lecture
Notes in Computer Science.</p></subsection><subsection id="uid150"><bodyTitle id="id2645938">PETS '04: Performance Evaluation for Tracking and Surveillance</bodyTitle><p id="id2645943">James L. Crowley organized the</p><p id="id2645948">The Sixth IEEE International Workshop on Performance Evaluation of
Tracking and Surveillance (PETS 2004) held in Prague, 10 May 2004, in
collaboration with ECCV '04.</p><p id="id2645955">The theme for PETS '04 has been the automatic observation of human
activity in public places.
Six scenarios were recorded with a
wide-angle camera lens in the entrance lobby of the INRIA Rhône-Alpes
research laboratory in Montbonnot France, using members of the CAVIAR
project as actors.
Activities included a person walking in a
straight line (3 sequences), a person browsing at information
displays (5 sequences), behaviours while seated in a chair (3
sequences), persons abandoning packages (5 sequences), groups of
people encountering (6 sequences), and people fighting (4
sequences).
For each scenario, a ground-truth file has been
constructed to indicate a bounding box for each individual, activity
labels for each individual (appear, disappear, occluded, inactive,
active, walking, running), a scenario label for each individual
(fighter role, browser role, left victim role, leaving group role,
walker role, left object role) and a situation label for each frame:
(moving, inactive, browsing) and a scenario label for each frame
(browsing, immobile, walking, drop down).
These ground truth files
have been made public for half of the sequences for use as training
data.
The PETS challenge was to demonstrate automatic labeling for
the remaining sequences.</p><p id="id2646008">Authors were asked to describe the tracking and recognition methods,
estimate or measure computational costs, and present error rates
obtained with the published ground truth. Authors are also invited
to propose new performance evaluation metrics that might be of
interest.</p><p id="id2646017">The workshop was attended by 27 registered participants, assisted by
several unregistered "walk-ins". The program was composed of 11
presentations, selected from 12 papers submitted to the review
process. The PETS '04 workshop has helped to establish the CAVIAR
labeled data sets as a known and widely used benchmark data set for
the community.</p><p id="id2646037">Datasets are available on the FGNet web server, maintained by PRIMA
(www-prima.inrialpes.fr/FGnet).</p></subsection><subsection id="uid151"><bodyTitle id="id2646050">Pointing '04: International Workshop on Diectic Gestures</bodyTitle><p id="id2646055">Project PRIMA organized "Pointing 2004: International Workshop on
Visual Observation of Deictic Gestures" in Cambridge, UK, on the 22
August 2004, in conjunction with the International Conference on
Pattern Recognition (ICPR). The workshop was attended by 29
participants and included 8 formal presentations and extensive
informal discussion after each presentation.</p><p id="id2646080">The theme for the POINTING '04 workshop was pointing (or deictic)
gestures.
Project PRIMA has prepared video images and sequences of
people pointing at targets with their hands and faces.
Data has been
prepared by projecting a target on a wall using a steerable video
projector. Subject were seated in a room at a distance of 3 meters
from the wall and asked to point at the target with their right hand
and with their face.
Video sequences were recorded from four cameras
placed at different positions around the subject.
Still images were
also obtained for each gesture. For each gesture a ground truth was
automatically recorded in the form of the known target position.</p><p id="id2646109">Participants have been invited to compete in recognizing or
estimating the target position with four categories of data:
Stereo
image pairs of hand pointing.
Monocular images sequences of hand
pointing, monocular images of face pointing.
A third data set has
been included in which pointing gestures are observed from a hat
mounted camera.</p><p id="id2646119">Two sets of sequences and still image sets have
been published on the workshop web site.
Ground truth data has been
provided for the first set, but withheld from the second.
For each
image or sequence, participants have been invited to determine:
precision of target estimation and probability of failure.
The workshop has been organized with three objectives:</p><orderedlist id="id2646130"><li id="uid152"><p id="id2646140">To provide encourage comparative evaluation of vision techniques.</p></li><li id="uid153"><p id="id2646152">To encourage the definition and use of new metrics for
performance evaluation.</p></li><li id="uid154"><p id="id2646165">To provide a publicly available benchmark by which future
algorithms may be compared.</p></li></orderedlist><p id="id2646171">The data sets will remain available via the FGNET web site maintained
by Project PRIMA.</p></subsection><subsection id="uid155"><bodyTitle id="id2646183">RFIA '04: Reconnaissance de Forme et Intelligence Artificielle </bodyTitle><p id="id2646188">James L. Crowley was a member of the editorial Board for RFIA 2004 :
the
``14ème Congrès Francophone AFRIF-AFIA de Reconnaissance des
Formes et Intelligence Artificielle'', 28 - 30 January 2004, in
Toulouse</p></subsection><subsection id="uid156"><bodyTitle id="id2646207">Participation on Conference Program Committees</bodyTitle><p id="id2646212">James L. Crowley served as a member of the program committee for the
following conferences.</p><simplelist id="id2646217"><li id="uid157"><p id="id2646226">CVPR 2005, IEEE International Conference on
Computer Vision and Pattern Recognition, 2005</p></li><li id="uid158"><p id="id2646238">ICRA 2005 2005,
IEEE International Conference on Robotics and Automation, 2005</p></li><li id="uid159"><p id="id2646250">EUSAI 2004, European Symposium on Ambient Intelligence,
Eindhoven, Oct. 2004</p></li><li id="uid160"><p id="id2646262">ICMI 2004, IEEE Conf. on Multimodal Interaction, Penn State, Oct. 2004</p></li><li id="uid161"><p id="id2646274">ICPR 2004, International Conference on Pattern Recognition,
Cambridge, UK, August 2004</p></li><li id="uid162"><p id="id2646287">CVPR 2004, IEEE Conf. on Computer
Vision and Pattern Recognition, Washington, June 2004</p></li><li id="uid163"><p id="id2646299">FG 2004, International Conference on Automatic Face and Gesture
Recogntion, June 2004.</p></li><li id="uid164"><p id="id2646312">ECCV 2004, European Conference on Computer Vision, Prague, June 2004.</p></li><li id="uid165"><p id="id2646324">IAS 2004, Intelligent Autonomous Systems, March 2004.</p></li><li id="uid166"><p id="id2646336">RFIA 2004, Reconnaisance des Formes et Intelligence
Artificielle, Jan 2004</p></li></simplelist></subsection><subsection id="uid167"><bodyTitle id="id2646348">Participation on Advisory Panels</bodyTitle><p id="id2646353">During April and May 2004, James L. Crowley has served evaluation
panel on FET proactive Programme "Presence".</p></subsection><subsection id="uid168"><bodyTitle id="id2646367">Invited Plenary Presentations at Conferences</bodyTitle><simplelist id="id2646372"><li id="uid169"><p id="id2646381">"What does it mean to see? Recent progress in
Computer Vision", Plenary presentation at the Polish National
Robotics Conference, Poland, June 2004</p></li><li id="uid170"><p id="id2646399">"Context Driven Observation of Human Activity", Présentation au
colloque : PSIPS Processing Sensory Information for Proactive
Systems, Oulu June 2004.</p></li><li id="uid171"><p id="id2646417">"Context Driven Observation of Human Activity", Présentation
invite à France-Taiwan Colloque sur la Technologie Informatique,
Ecole Polytechnique, Avril 2004.</p></li></simplelist></subsection></subsection></diffusion><biblio id="bibliography" html="bibliography" numero="10" titre="Bibliography"><biblStruct rend="refer" n="cite:Chomat00" type="inproceedings" id="bid64" default="NO" TEIform="biblStruct"><analytic id="id2646458" TEIform="analytic"><title id="id2646464" level="a" TEIform="title">Local Scale Selection for Gaussian Based Description Techniques</title><author id="id2646475" TEIform="author"><persName TEIform="persName"><foreName id="id2646484" full="yes" TEIform="foreName">O.</foreName><surname id="id2646494" full="yes" TEIform="surname">Chomat</surname></persName><persName TEIform="persName"><foreName id="id2646510" full="yes" TEIform="foreName">V.</foreName><surname id="id2646520" full="yes" TEIform="surname">Colin de Verdière</surname></persName><persName key="prima-2005-id2245372" TEIform="persName"><foreName id="id2646537" full="yes" TEIform="foreName">D.</foreName><surname id="id2646546" full="yes" TEIform="surname">Hall</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2646563" full="yes" TEIform="foreName">J.</foreName><surname id="id2646572" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2646585" TEIform="monogr"><title id="id2646592" level="m" TEIform="title">European Conference on Computer Vision, Dublin, Ireland</title><imprint id="id2646602" TEIform="imprint"><dateStruct id="id2646606" full="yes" TEIform="dateStruct"><month id="id2646615" full="yes" TEIform="month">June</month><year id="id2646624" full="yes" TEIform="year">2000</year></dateStruct><biblScope id="id2646636" type="pages" TEIform="biblScope">I 117–133</biblScope></imprint></monogr></biblStruct><biblStruct rend="refer" n="cite:Crowley97b" type="inproceedings" id="bid67" default="NO" TEIform="biblStruct"><analytic id="id2646698" TEIform="analytic"><title id="id2646704" level="a" TEIform="title">Multi–Modal Tracking of Faces for Video Communications</title><author id="id2646715" TEIform="author"><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2646726" full="yes" TEIform="foreName">J.</foreName><surname id="id2646735" full="yes" TEIform="surname">Crowley</surname></persName><persName TEIform="persName"><foreName id="id2646752" full="yes" TEIform="foreName">F.</foreName><surname id="id2646761" full="yes" TEIform="surname">Bérard</surname></persName></author></analytic><monogr id="id2646774" TEIform="monogr"><title id="id2646781" level="m" TEIform="title">IEEE Conference on Computer Vision and Pattern Recognition, CVPR '97, San Juan, Puerto Rico</title><imprint id="id2646792" TEIform="imprint"><dateStruct id="id2646796" full="yes" TEIform="dateStruct"><month id="id2646805" full="yes" TEIform="month">June</month><year id="id2646814" full="yes" TEIform="year">1997</year></dateStruct><biblScope id="id2646826" type="pages" TEIform="biblScope">640–645</biblScope></imprint></monogr></biblStruct><biblStruct rend="refer" n="cite:Crowley00" type="article" id="bid59" default="NO" TEIform="biblStruct"><analytic id="id2646887" TEIform="analytic"><title id="id2646893" level="a" TEIform="title">Things that See: Machine Perception for Human Computer Interaction</title><author id="id2646903" TEIform="author"><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2646914" full="yes" TEIform="foreName">J. L.</foreName><surname id="id2646923" full="yes" TEIform="surname">Crowley</surname></persName><persName TEIform="persName"><foreName id="id2646939" full="yes" TEIform="foreName">J.</foreName><surname id="id2646949" full="yes" TEIform="surname">Coutaz</surname></persName><persName TEIform="persName"><foreName id="id2646965" full="yes" TEIform="foreName">F.</foreName><surname id="id2646975" full="yes" TEIform="surname">Berard</surname></persName></author></analytic><monogr id="id2646987" TEIform="monogr"><title id="id2646995" level="j" TEIform="title">Communications of the A.C.M.</title><imprint id="id2647004" TEIform="imprint"><biblScope id="id2647010" type="volume" TEIform="biblScope">43</biblScope><biblScope id="id2647020" type="number" TEIform="biblScope">3</biblScope><dateStruct id="id2647028" full="yes" TEIform="dateStruct"><month id="id2647036" full="yes" TEIform="month">March</month><year id="id2647045" full="yes" TEIform="year">2000</year></dateStruct><biblScope id="id2647057" type="pages" TEIform="biblScope">54-64</biblScope></imprint></monogr></biblStruct><biblStruct rend="refer" n="cite:Crowley02b" type="inproceedings" id="bid66" default="NO" TEIform="biblStruct"><analytic id="id2647118" TEIform="analytic"><title id="id2647125" level="a" TEIform="title">Using Context to Structure Perceptual Processes for Observing Activity</title><author id="id2647135" TEIform="author"><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2647145" full="yes" TEIform="foreName">J.</foreName><surname id="id2647155" full="yes" TEIform="surname">Crowley</surname></persName><persName TEIform="persName"><foreName id="id2647171" full="yes" TEIform="foreName">J.</foreName><surname id="id2647181" full="yes" TEIform="surname">Coutaz</surname></persName><persName TEIform="persName"><foreName id="id2647197" full="yes" TEIform="foreName">G.</foreName><surname id="id2647206" full="yes" TEIform="surname">Rey</surname></persName><persName key="prima-2005-id2245460" TEIform="persName"><foreName id="id2647223" full="yes" TEIform="foreName">P.</foreName><surname id="id2647232" full="yes" TEIform="surname">Reignier</surname></persName></author></analytic><monogr id="id2647245" TEIform="monogr"><title id="id2647252" level="m" TEIform="title">UBICOMP, Sweden</title><imprint id="id2647261" TEIform="imprint"><dateStruct id="id2647266" full="yes" TEIform="dateStruct"><month id="id2647274" full="yes" TEIform="month">September</month><year id="id2647284" full="yes" TEIform="year">2002</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="refer" n="cite:Crowley95" type="article" id="bid62" default="NO" TEIform="biblStruct"><analytic id="id2647348" TEIform="analytic"><title id="id2647354" level="a" TEIform="title">Integration and Control of Reactive Visual Processes</title><author id="id2647364" TEIform="author"><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2647374" full="yes" TEIform="foreName">J. L.</foreName><surname id="id2647384" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2647396" TEIform="monogr"><title id="id2647404" level="j" TEIform="title">Robotics and Autonomous Systems</title><imprint id="id2647413" TEIform="imprint"><biblScope id="id2647419" type="volume" TEIform="biblScope">15</biblScope><biblScope id="id2647428" type="number" TEIform="biblScope">1</biblScope><dateStruct id="id2647437" full="yes" TEIform="dateStruct"><month id="id2647445" full="yes" TEIform="month">December</month><year id="id2647454" full="yes" TEIform="year">1995</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="refer" n="cite:Crowley97" type="article" id="bid61" default="NO" TEIform="biblStruct"><analytic id="id2647518" TEIform="analytic"><title id="id2647524" level="a" TEIform="title">Vision for Man machine interaction</title><author id="id2647534" TEIform="author"><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2647544" full="yes" TEIform="foreName">J. L.</foreName><surname id="id2647554" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2647566" TEIform="monogr"><title id="id2647573" level="j" TEIform="title">Robotics and Autonomous Systems</title><imprint id="id2647583" TEIform="imprint"><biblScope id="id2647589" type="volume" TEIform="biblScope">19</biblScope><biblScope id="id2647598" type="number" TEIform="biblScope">3-4</biblScope><dateStruct id="id2647607" full="yes" TEIform="dateStruct"><month id="id2647615" full="yes" TEIform="month">April</month><year id="id2647624" full="yes" TEIform="year">1997</year></dateStruct><biblScope id="id2647636" type="pages" TEIform="biblScope">347-359</biblScope></imprint></monogr></biblStruct><biblStruct rend="refer" n="cite:Legal01" type="article" id="bid58" default="NO" TEIform="biblStruct"><analytic id="id2647697" TEIform="analytic"><title id="id2647703" level="a" TEIform="title">Smart Office: An Intelligent Interactive Environment</title><author id="id2647713" TEIform="author"><persName TEIform="persName"><foreName id="id2647723" full="yes" TEIform="foreName">C. L.</foreName><surname id="id2647733" full="yes" TEIform="surname">Gal</surname></persName><persName TEIform="persName"><foreName id="id2647749" full="yes" TEIform="foreName">J.</foreName><surname id="id2647759" full="yes" TEIform="surname">Martin</surname></persName><persName key="prima-2005-id2245482" TEIform="persName"><foreName id="id2647775" full="yes" TEIform="foreName">A.</foreName><surname id="id2647784" full="yes" TEIform="surname">Lux</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2647801" full="yes" TEIform="foreName">J. L.</foreName><surname id="id2647810" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2647823" TEIform="monogr"><title id="id2647830" level="j" TEIform="title">IEEE Intelligent Systems</title><imprint id="id2647840" TEIform="imprint"><dateStruct id="id2647844" full="yes" TEIform="dateStruct"><month id="id2647852" full="yes" TEIform="month">July/August</month><year id="id2647862" full="yes" TEIform="year">2001</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="refer" n="cite:Hall00" type="inproceedings" id="bid63" default="NO" TEIform="biblStruct"><analytic id="id2647926" TEIform="analytic"><title id="id2647932" level="a" TEIform="title">Object Recognition using Coloured Receptive Fields</title><author id="id2647942" TEIform="author"><persName key="prima-2005-id2245372" TEIform="persName"><foreName id="id2647952" full="yes" TEIform="foreName">D.</foreName><surname id="id2647962" full="yes" TEIform="surname">Hall</surname></persName><persName TEIform="persName"><foreName id="id2647978" full="yes" TEIform="foreName">V.</foreName><surname id="id2647988" full="yes" TEIform="surname">Colin de Verdière</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2648004" full="yes" TEIform="foreName">J.</foreName><surname id="id2648014" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2648027" TEIform="monogr"><title id="id2648034" level="m" TEIform="title">European Conference on Computer Vision, Dublin, Ireland</title><imprint id="id2648044" TEIform="imprint"><dateStruct id="id2648048" full="yes" TEIform="dateStruct"><month id="id2648057" full="yes" TEIform="month">June</month><year id="id2648066" full="yes" TEIform="year">2000</year></dateStruct><biblScope id="id2648078" type="pages" TEIform="biblScope">I 164–177</biblScope></imprint></monogr></biblStruct><biblStruct rend="refer" n="cite:Schiele00" type="article" id="bid60" default="NO" TEIform="biblStruct"><analytic id="id2648139" TEIform="analytic"><title id="id2648146" level="a" TEIform="title">Recognition without Correspondence using Multidimensional Receptive Field Histograms</title><author id="id2648156" TEIform="author"><persName TEIform="persName"><foreName id="id2648167" full="yes" TEIform="foreName">B.</foreName><surname id="id2648176" full="yes" TEIform="surname">Schiele</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2648192" full="yes" TEIform="foreName">J.</foreName><surname id="id2648202" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2648215" TEIform="monogr"><title id="id2648222" level="j" TEIform="title">International Journal of Computer Vision</title><imprint id="id2648231" TEIform="imprint"><biblScope id="id2648238" type="volume" TEIform="biblScope">36</biblScope><biblScope id="id2648247" type="number" TEIform="biblScope">1</biblScope><dateStruct id="id2648256" full="yes" TEIform="dateStruct"><month id="id2648263" full="yes" TEIform="month">January</month><year id="id2648273" full="yes" TEIform="year">2000</year></dateStruct><biblScope id="id2648285" type="pages" TEIform="biblScope">31–50</biblScope></imprint></monogr></biblStruct><biblStruct rend="refer" n="cite:Schwerdt00b" type="inproceedings" id="bid65" default="NO" TEIform="biblStruct"><analytic id="id2648346" TEIform="analytic"><title id="id2648352" level="a" TEIform="title">Robust Face Tracking using Color</title><author id="id2648362" TEIform="author"><persName TEIform="persName"><foreName id="id2648372" full="yes" TEIform="foreName">K.</foreName><surname id="id2648381" full="yes" TEIform="surname">Schwerdt</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2648398" full="yes" TEIform="foreName">J.</foreName><surname id="id2648407" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2648420" TEIform="monogr"><title id="id2648427" level="m" TEIform="title">International Conference on Automatic Face and Gesture Recognition, Grenoble, France</title><imprint id="id2648438" TEIform="imprint"><dateStruct id="id2648442" full="yes" TEIform="dateStruct"><month id="id2648451" full="yes" TEIform="month">March</month><year id="id2648460" full="yes" TEIform="year">2000</year></dateStruct><biblScope id="id2648472" type="pages" TEIform="biblScope">90–95</biblScope></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Coutaz05" type="article" subtype="nonparu" id="bid42" default="NO" TEIform="biblStruct"><analytic id="id2648533" TEIform="analytic"><title id="id2648540" level="a" TEIform="title">Contex is Key</title><author id="id2648549" TEIform="author"><persName TEIform="persName"><foreName id="id2648559" full="yes" TEIform="foreName">J.</foreName><surname id="id2648569" full="yes" TEIform="surname">Coutaz</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2648585" full="yes" TEIform="foreName">J.</foreName><surname id="id2648594" full="yes" TEIform="surname">Crowley</surname></persName><persName TEIform="persName"><foreName id="id2648611" full="yes" TEIform="foreName">S.</foreName><surname id="id2648620" full="yes" TEIform="surname">Dobson</surname></persName><persName TEIform="persName"><foreName id="id2648637" full="yes" TEIform="foreName">D.</foreName><surname id="id2648646" full="yes" TEIform="surname">Garlan</surname></persName></author></analytic><monogr id="id2648659" TEIform="monogr"><title id="id2648666" level="j" TEIform="title">Communications of the ACM, Special issue on the Disappearing Computer</title><note id="id2648677" anchored="yes" place="unspecified" type="bnote">to appear</note><imprint id="id2648688" TEIform="imprint"><dateStruct id="id2648693" full="yes" TEIform="dateStruct"><month id="id2648701" full="yes" TEIform="month">March</month><year id="id2648711" full="yes" TEIform="year">2005</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Crowley04" type="article" id="bid51" default="NO" TEIform="biblStruct"><analytic id="id2648774" TEIform="analytic"><title id="id2648781" level="a" TEIform="title">Introduction to the special issue: International Conference on Vision Systems</title><author id="id2648791" TEIform="author"><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2648801" full="yes" TEIform="foreName">J.</foreName><surname id="id2648811" full="yes" TEIform="surname">Crowley</surname></persName><persName TEIform="persName"><foreName id="id2648827" full="yes" TEIform="foreName">J.</foreName><surname id="id2648837" full="yes" TEIform="surname">Piater</surname></persName></author></analytic><monogr id="id2648849" TEIform="monogr"><title id="id2648857" level="j" TEIform="title">Machine Vision and Applications</title><imprint id="id2648866" TEIform="imprint"><biblScope id="id2648872" type="volume" TEIform="biblScope">16</biblScope><biblScope id="id2648882" type="number" TEIform="biblScope">1</biblScope><dateStruct id="id2648890" full="yes" TEIform="dateStruct"><year id="id2648898" full="yes" TEIform="year">2004</year></dateStruct><biblScope id="id2648910" type="pages" TEIform="biblScope">4–5</biblScope></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Hall04c" type="article" id="bid53" default="NO" TEIform="biblStruct"><analytic id="id2648971" TEIform="analytic"><title id="id2648977" level="a" TEIform="title">Brand Identification using Gaussian Derivative Histograms</title><author id="id2648987" TEIform="author"><persName key="prima-2005-id2245372" TEIform="persName"><foreName id="id2648997" full="yes" TEIform="foreName">D.</foreName><surname id="id2649007" full="yes" TEIform="surname">Hall</surname></persName><persName TEIform="persName"><foreName id="id2649023" full="yes" TEIform="foreName">F.</foreName><surname id="id2649033" full="yes" TEIform="surname">Pélisson</surname></persName><persName TEIform="persName"><foreName id="id2649049" full="yes" TEIform="foreName">O.</foreName><surname id="id2649059" full="yes" TEIform="surname">Riff</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2649075" full="yes" TEIform="foreName">J.</foreName><surname id="id2649085" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2649097" TEIform="monogr"><title id="id2649104" level="j" TEIform="title">Machine Vision and Applications</title><imprint id="id2649114" TEIform="imprint"><biblScope id="id2649120" type="volume" TEIform="biblScope">16</biblScope><biblScope id="id2649129" type="number" TEIform="biblScope">1</biblScope><dateStruct id="id2649138" full="yes" TEIform="dateStruct"><year id="id2649146" full="yes" TEIform="year">2004</year></dateStruct><biblScope id="id2649158" type="pages" TEIform="biblScope">41–46</biblScope></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Lux04" type="article" id="bid52" default="NO" TEIform="biblStruct"><analytic id="id2649219" TEIform="analytic"><title id="id2649225" level="a" TEIform="title">The Imalab Method for Vision Systems</title><author id="id2649234" TEIform="author"><persName key="prima-2005-id2245482" TEIform="persName"><foreName id="id2649245" full="yes" TEIform="foreName">A.</foreName><surname id="id2649254" full="yes" TEIform="surname">Lux</surname></persName></author></analytic><monogr id="id2649267" TEIform="monogr"><title id="id2649274" level="j" TEIform="title">Machine Vision and Applications</title><imprint id="id2649284" TEIform="imprint"><biblScope id="id2649290" type="volume" TEIform="biblScope">16</biblScope><biblScope id="id2649299" type="number" TEIform="biblScope">1</biblScope><dateStruct id="id2649308" full="yes" TEIform="dateStruct"><year id="id2649316" full="yes" TEIform="year">2004</year></dateStruct><biblScope id="id2649328" type="pages" TEIform="biblScope">21–26</biblScope></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Borkowski04" type="inproceedings" id="bid48" default="NO" TEIform="biblStruct"><analytic id="id2649388" TEIform="analytic"><title id="id2649394" level="a" TEIform="title">Spatial Control of Interactive Surfaces in an Augmented Environment</title><author id="id2649403" TEIform="author"><persName key="prima-2005-id2244828" TEIform="persName"><foreName id="id2649415" full="yes" TEIform="foreName">S.</foreName><surname id="id2649424" full="yes" TEIform="surname">Borkowski</surname></persName><persName key="prima-2005-id2244923" TEIform="persName"><foreName id="id2649440" full="yes" TEIform="foreName">J.</foreName><surname id="id2649450" full="yes" TEIform="surname">Letessier</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2649466" full="yes" TEIform="foreName">J.</foreName><surname id="id2649476" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2649488" TEIform="monogr"><title id="id2649496" level="m" TEIform="title">European Conference on Human Computer Interaction, EHCI 04</title><imprint id="id2649506" TEIform="imprint"><dateStruct id="id2649510" full="yes" TEIform="dateStruct"><month id="id2649519" full="yes" TEIform="month">July</month><year id="id2649528" full="yes" TEIform="year">2004</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Borkowski04b" type="inproceedings" id="bid49" default="NO" TEIform="biblStruct"><analytic id="id2649592" TEIform="analytic"><title id="id2649598" level="a" TEIform="title">Camera-projector sensing and actuating abilities</title><author id="id2649608" TEIform="author"><persName key="prima-2005-id2244828" TEIform="persName"><foreName id="id2649618" full="yes" TEIform="foreName">S.</foreName><surname id="id2649628" full="yes" TEIform="surname">Borkowski</surname></persName><persName TEIform="persName"><foreName id="id2649644" full="yes" TEIform="foreName">S.</foreName><surname id="id2649654" full="yes" TEIform="surname">Sabry</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2649670" full="yes" TEIform="foreName">J.</foreName><surname id="id2649680" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2649692" TEIform="monogr"><title id="id2649699" level="m" TEIform="title">Polish National Robotics Conference</title><imprint id="id2649709" TEIform="imprint"><dateStruct id="id2649713" full="yes" TEIform="dateStruct"><month id="id2649722" full="yes" TEIform="month">June</month><year id="id2649731" full="yes" TEIform="year">2004</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Brdiczka05" type="inproceedings" subtype="nonparu" id="bid46" default="NO" TEIform="biblStruct"><analytic id="id2649794" TEIform="analytic"><title id="id2649801" level="a" TEIform="title">Automatic Development of an Abstract Context Model for an Intelligent Environment</title><author id="id2649811" TEIform="author"><persName key="prima-2005-id2244950" TEIform="persName"><foreName id="id2649822" full="yes" TEIform="foreName">O.</foreName><surname id="id2649831" full="yes" TEIform="surname">Brdiczka</surname></persName><persName key="prima-2005-id2245460" TEIform="persName"><foreName id="id2649847" full="yes" TEIform="foreName">P.</foreName><surname id="id2649857" full="yes" TEIform="surname">Reignier</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2649873" full="yes" TEIform="foreName">J.</foreName><surname id="id2649883" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2649895" TEIform="monogr"><title id="id2649903" level="m" TEIform="title">International Conference on Pervasive Computing, PerCom 05, Kuai, Hawaii</title><note id="id2649913" anchored="yes" place="unspecified" type="bnote">to appear</note><imprint id="id2649925" TEIform="imprint"><dateStruct id="id2649929" full="yes" TEIform="dateStruct"><month id="id2649938" full="yes" TEIform="month">March</month><year id="id2649947" full="yes" TEIform="year">2005</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Caporossi04" type="inproceedings" id="bid54" default="NO" TEIform="biblStruct"><analytic id="id2650011" TEIform="analytic"><title id="id2650017" level="a" TEIform="title">Robust visual tracking from dynamic control of processing</title><author id="id2650027" TEIform="author"><persName key="prima-2005-id2244745" TEIform="persName"><foreName id="id2650038" full="yes" TEIform="foreName">A.</foreName><surname id="id2650047" full="yes" TEIform="surname">Caporossi</surname></persName><persName key="prima-2005-id2245372" TEIform="persName"><foreName id="id2650064" full="yes" TEIform="foreName">D.</foreName><surname id="id2650073" full="yes" TEIform="surname">Hall</surname></persName><persName key="prima-2005-id2245460" TEIform="persName"><foreName id="id2650089" full="yes" TEIform="foreName">P.</foreName><surname id="id2650099" full="yes" TEIform="surname">Reignier</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2650115" full="yes" TEIform="foreName">J.</foreName><surname id="id2650125" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2650137" TEIform="monogr"><title id="id2650145" level="m" TEIform="title">Performance Evaluation of Tracking and Surveillance, Prague, Czech Republic</title><imprint id="id2650155" TEIform="imprint"><dateStruct id="id2650159" full="yes" TEIform="dateStruct"><month id="id2650168" full="yes" TEIform="month">May</month><year id="id2650177" full="yes" TEIform="year">2004</year></dateStruct><biblScope id="id2650189" type="pages" TEIform="biblScope">23–31</biblScope></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Crowley04b" type="inproceedings" id="bid47" default="NO" TEIform="biblStruct"><analytic id="id2650250" TEIform="analytic"><title id="id2650256" level="a" TEIform="title">Context Aware Observation of Human Activity</title><author id="id2650265" TEIform="author"><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2650276" full="yes" TEIform="foreName">J.</foreName><surname id="id2650286" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2650298" TEIform="monogr"><title id="id2650306" level="m" TEIform="title">PSIPS, Oulu, Finland</title><imprint id="id2650315" TEIform="imprint"><dateStruct id="id2650319" full="yes" TEIform="dateStruct"><year id="id2650328" full="yes" TEIform="year">2004</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Gourier04b" type="inproceedings" id="bid55" default="NO" TEIform="biblStruct"><analytic id="id2650391" TEIform="analytic"><title id="id2650397" level="a" TEIform="title">Estimating Face orientation from Robust Detection of Salient Facial Structures</title><author id="id2650408" TEIform="author"><persName key="prima-2005-id2244909" TEIform="persName"><foreName id="id2650418" full="yes" TEIform="foreName">N.</foreName><surname id="id2650428" full="yes" TEIform="surname">Gourier</surname></persName><persName key="prima-2005-id2245372" TEIform="persName"><foreName id="id2650444" full="yes" TEIform="foreName">D.</foreName><surname id="id2650453" full="yes" TEIform="surname">Hall</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2650470" full="yes" TEIform="foreName">J.</foreName><surname id="id2650479" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2650492" TEIform="monogr"><title id="id2650499" level="m" TEIform="title">FG Net Workshop on Visual Observation of Deictic Gestures (POINTING)</title><imprint id="id2650509" TEIform="imprint"><dateStruct id="id2650514" full="yes" TEIform="dateStruct"><month id="id2650522" full="yes" TEIform="month">August</month><year id="id2650532" full="yes" TEIform="year">2004</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Gourier04" type="inproceedings" id="bid56" default="NO" TEIform="biblStruct"><analytic id="id2650595" TEIform="analytic"><title id="id2650601" level="a" TEIform="title">Facial feature detection robust to pose, illumination, and identity</title><author id="id2650611" TEIform="author"><persName key="prima-2005-id2244909" TEIform="persName"><foreName id="id2650622" full="yes" TEIform="foreName">N.</foreName><surname id="id2650631" full="yes" TEIform="surname">Gourier</surname></persName><persName key="prima-2005-id2245372" TEIform="persName"><foreName id="id2650648" full="yes" TEIform="foreName">D.</foreName><surname id="id2650657" full="yes" TEIform="surname">Hall</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2650673" full="yes" TEIform="foreName">J.</foreName><surname id="id2650683" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2650696" TEIform="monogr"><title id="id2650703" level="m" TEIform="title">International Conference on Systems, Man and Cybernetics, Special track on Automatic Facial Expression Analysis</title><imprint id="id2650714" TEIform="imprint"><dateStruct id="id2650718" full="yes" TEIform="dateStruct"><month id="id2650727" full="yes" TEIform="month">October</month><year id="id2650736" full="yes" TEIform="year">2004</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Hall04" type="inproceedings" id="bid57" default="NO" TEIform="biblStruct"><analytic id="id2650799" TEIform="analytic"><title id="id2650806" level="a" TEIform="title">Détection du visage par caractéristiques génériques calculées à partir des images de luminance</title><author id="id2650818" TEIform="author"><persName key="prima-2005-id2245372" TEIform="persName"><foreName id="id2650829" full="yes" TEIform="foreName">D.</foreName><surname id="id2650838" full="yes" TEIform="surname">Hall</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2650855" full="yes" TEIform="foreName">J.</foreName><surname id="id2650864" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2650877" TEIform="monogr"><title id="id2650884" level="m" TEIform="title">Reconnaissance des formes et intelligence artificelle, Toulouse, France</title><imprint id="id2650894" TEIform="imprint"><dateStruct id="id2650899" full="yes" TEIform="dateStruct"><month id="id2650907" full="yes" TEIform="month">January</month><year id="id2650917" full="yes" TEIform="year">2004</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Letessier04" type="inproceedings" id="bid44" default="NO" TEIform="biblStruct"><analytic id="id2650980" TEIform="analytic"><title id="id2650987" level="a" TEIform="title">Visual tracking of bare fingers for interactive surfaces</title><author id="id2650997" TEIform="author"><persName key="prima-2005-id2244923" TEIform="persName"><foreName id="id2651007" full="yes" TEIform="foreName">J.</foreName><surname id="id2651016" full="yes" TEIform="surname">Letessier</surname></persName><persName TEIform="persName"><foreName id="id2651033" full="yes" TEIform="foreName">F.</foreName><surname id="id2651042" full="yes" TEIform="surname">Berard</surname></persName></author></analytic><monogr id="id2651055" TEIform="monogr"><editor id="id2651061" role="editor" TEIform="editor"><persName TEIform="persName"><foreName id="id2651075" full="yes" TEIform="foreName">A.</foreName><surname id="id2651084" full="yes" TEIform="surname">Press</surname></persName></editor><title id="id2651097" level="m" TEIform="title">17th annual ACM symposium on User interface software and technology (UIST'04), Santa Fe, USA</title><imprint id="id2651108" TEIform="imprint"><dateStruct id="id2651113" full="yes" TEIform="dateStruct"><month id="id2651121" full="yes" TEIform="month">October</month><year id="id2651131" full="yes" TEIform="year">2004</year></dateStruct><biblScope id="id2651143" type="pages" TEIform="biblScope">119–122</biblScope></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Letessier04b" type="inproceedings" id="bid43" default="NO" TEIform="biblStruct"><analytic id="id2651204" TEIform="analytic"><title id="id2651210" level="a" TEIform="title">Video sequence database for 3D finger pointing tasks</title><author id="id2651220" TEIform="author"><persName key="prima-2005-id2244923" TEIform="persName"><foreName id="id2651230" full="yes" TEIform="foreName">J.</foreName><surname id="id2651240" full="yes" TEIform="surname">Letessier</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2651256" full="yes" TEIform="foreName">J.</foreName><surname id="id2651266" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2651278" TEIform="monogr"><title id="id2651286" level="m" TEIform="title">ICPR Workshop on Pointing Gestures (Pointing'04), Cambridge, United Kingdom</title><imprint id="id2651295" TEIform="imprint"><dateStruct id="id2651301" full="yes" TEIform="dateStruct"><month id="id2651309" full="yes" TEIform="month">September</month><year id="id2651319" full="yes" TEIform="year">2004</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Hai04" type="inproceedings" id="bid50" default="NO" TEIform="biblStruct"><analytic id="id2651382" TEIform="analytic"><title id="id2651388" level="a" TEIform="title">A method for ridge extraction</title><author id="id2651398" TEIform="author"><persName key="prima-2005-id2244580" TEIform="persName"><foreName id="id2651408" full="yes" TEIform="foreName">T.</foreName><surname id="id2651417" full="yes" TEIform="surname">Tran</surname></persName><persName key="prima-2005-id2245482" TEIform="persName"><foreName id="id2651434" full="yes" TEIform="foreName">A.</foreName><surname id="id2651443" full="yes" TEIform="surname">Lux</surname></persName></author></analytic><monogr id="id2651456" TEIform="monogr"><title id="id2651463" level="m" TEIform="title">Asian Conference on Computer Vision</title><imprint id="id2651473" TEIform="imprint"><dateStruct id="id2651477" full="yes" TEIform="dateStruct"><year id="id2651486" full="yes" TEIform="year">2004</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="year" n="cite:Villasen04" type="inproceedings" id="bid45" default="NO" TEIform="biblStruct"><analytic id="id2651548" TEIform="analytic"><title id="id2651555" level="a" TEIform="title">Experiments on the Construction of a Phonetically Balanced Corpus from the Web</title><author id="id2651565" TEIform="author"><persName TEIform="persName"><foreName id="id2651576" full="yes" TEIform="foreName">L.</foreName><surname id="id2651585" full="yes" TEIform="surname">Villaseñor-Pineda</surname></persName><persName TEIform="persName"><foreName id="id2651602" full="yes" TEIform="foreName">M.</foreName><surname id="id2651612" full="yes" TEIform="surname">Montes-y-Gómez</surname></persName><persName key="prima-2005-id2245437" TEIform="persName"><foreName id="id2651628" full="yes" TEIform="foreName">D.</foreName><surname id="id2651638" full="yes" TEIform="surname">Vaufreydaz</surname></persName><persName TEIform="persName"><foreName id="id2651654" full="yes" TEIform="foreName">J.-F.</foreName><surname id="id2651664" full="yes" TEIform="surname">Serignat</surname></persName></author></analytic><monogr id="id2651676" TEIform="monogr"><title id="id2651684" level="m" TEIform="title">Conference on Intelligent Text Processing and Computational Linguistics CICLing-2004, Seoul, Korea</title><title id="id2651695" level="s" TEIform="title">Lecture Notes in Computer Science</title><imprint id="id2651704" TEIform="imprint"><biblScope id="id2651710" type="volume" TEIform="biblScope">2945</biblScope><publisher id="id2651719" TEIform="publisher"><orgName id="id2651724" TEIform="orgName">Springer</orgName></publisher><dateStruct id="id2651732" full="yes" TEIform="dateStruct"><month id="id2651740" full="yes" TEIform="month">February</month><year id="id2651750" full="yes" TEIform="year">2004</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Abramowitz65" type="book" id="bid15" default="NO" TEIform="biblStruct"><monogr id="id2651813" TEIform="monogr"><title id="id2651819" level="m" TEIform="title">Handbook of Mathematical Functions</title><author id="id2651829" TEIform="author"><persName TEIform="persName"><foreName id="id2651839" full="yes" TEIform="foreName">M.</foreName><surname id="id2651848" full="yes" TEIform="surname">Abramowitz</surname></persName><persName TEIform="persName"><foreName id="id2651865" full="yes" TEIform="foreName">I.</foreName><surname id="id2651874" full="yes" TEIform="surname">Stegun</surname></persName></author><imprint id="id2651886" TEIform="imprint"><publisher id="id2651892" TEIform="publisher"><orgName id="id2651897" TEIform="orgName">MIT Press</orgName></publisher><dateStruct id="id2651905" full="yes" TEIform="dateStruct"><year id="id2651914" full="yes" TEIform="year">1965</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Agarwal02" type="inproceedings" id="bid20" default="NO" TEIform="biblStruct"><analytic id="id2651977" TEIform="analytic"><title id="id2651983" level="a" TEIform="title">Learning a sparse Reprsentation for Object Detection</title><author id="id2651993" TEIform="author"><persName TEIform="persName"><foreName id="id2652003" full="yes" TEIform="foreName">S.</foreName><surname id="id2652013" full="yes" TEIform="surname">Agarwal</surname></persName><persName TEIform="persName"><foreName id="id2652029" full="yes" TEIform="foreName">D.</foreName><surname id="id2652039" full="yes" TEIform="surname">Roth</surname></persName></author></analytic><monogr id="id2652051" TEIform="monogr"><title id="id2652059" level="m" TEIform="title">European Conference on Computer Vision</title><imprint id="id2652067" TEIform="imprint"><dateStruct id="id2652073" full="yes" TEIform="dateStruct"><year id="id2652081" full="yes" TEIform="year">2002</year></dateStruct><biblScope id="id2652093" type="pages" TEIform="biblScope">113-130</biblScope></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:allen84" type="article" id="bid35" default="NO" TEIform="biblStruct"><analytic id="id2652154" TEIform="analytic"><title id="id2652160" level="a" TEIform="title">Towards a general theory of action and time</title><author id="id2652169" TEIform="author"><persName TEIform="persName"><foreName id="id2652180" full="yes" TEIform="foreName">J.</foreName><surname id="id2652190" full="yes" TEIform="surname">Allen</surname></persName></author></analytic><monogr id="id2652202" TEIform="monogr"><title id="id2652209" level="j" TEIform="title">Artificial Intelligence</title><imprint id="id2652219" TEIform="imprint"><biblScope id="id2652225" type="volume" TEIform="biblScope">13</biblScope><dateStruct id="id2652234" full="yes" TEIform="dateStruct"><year id="id2652241" full="yes" TEIform="year">1984</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:MagicTable-03" type="inproceedings" id="bid30" default="NO" TEIform="biblStruct"><analytic id="id2652305" TEIform="analytic"><title id="id2652311" level="a" TEIform="title">The Magic Table: Computer-Vision Based Augmentation of a Whiteboard for Creative Meetings</title><author id="id2652322" TEIform="author"><persName TEIform="persName"><foreName id="id2652332" full="yes" TEIform="foreName">F.</foreName><surname id="id2652341" full="yes" TEIform="surname">Bérard</surname></persName></author></analytic><monogr id="id2652354" TEIform="monogr"><title id="id2652361" level="m" TEIform="title">Proceedings of the ICCV Workshop on Projector-Camera Systems</title><imprint id="id2652372" TEIform="imprint"><publisher id="id2652376" TEIform="publisher"><orgName id="id2652382" TEIform="orgName">IEEE Computer Society Press</orgName></publisher><dateStruct id="id2652390" full="yes" TEIform="dateStruct"><year id="id2652398" full="yes" TEIform="year">2003</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Belongie02" type="article" id="bid39" default="NO" TEIform="biblStruct"><analytic id="id2652462" TEIform="analytic"><title id="id2652468" level="a" TEIform="title">Shape Matching and Object Recognition Using Shape Context</title><author id="id2652478" TEIform="author"><persName TEIform="persName"><foreName id="id2652488" full="yes" TEIform="foreName">S.</foreName><surname id="id2652498" full="yes" TEIform="surname">Belongie</surname></persName><persName TEIform="persName"><foreName id="id2652514" full="yes" TEIform="foreName">J.</foreName><surname id="id2652524" full="yes" TEIform="surname">Malik</surname></persName><persName TEIform="persName"><foreName id="id2652540" full="yes" TEIform="foreName">J.</foreName><surname id="id2652550" full="yes" TEIform="surname">Puzicha</surname></persName></author></analytic><monogr id="id2652562" TEIform="monogr"><title id="id2652569" level="j" TEIform="title">Pattern Analysis and Machine Intelligence</title><imprint id="id2652578" TEIform="imprint"><biblScope id="id2652585" type="volume" TEIform="biblScope">24</biblScope><biblScope id="id2652594" type="number" TEIform="biblScope">4</biblScope><dateStruct id="id2652603" full="yes" TEIform="dateStruct"><month id="id2652611" full="yes" TEIform="month">April</month><year id="id2652620" full="yes" TEIform="year">2002</year></dateStruct><biblScope id="id2652632" type="pages" TEIform="biblScope">509–522</biblScope></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:PROCAMS-03" type="inproceedings" id="bid21" default="NO" TEIform="biblStruct"><analytic id="id2652693" TEIform="analytic"><title id="id2652700" level="a" TEIform="title">Projecting Rectified Images in an Augmented Environment</title><author id="id2652710" TEIform="author"><persName key="prima-2005-id2244828" TEIform="persName"><foreName id="id2652720" full="yes" TEIform="foreName">S.</foreName><surname id="id2652730" full="yes" TEIform="surname">Borkowski</surname></persName><persName TEIform="persName"><foreName id="id2652746" full="yes" TEIform="foreName">O.</foreName><surname id="id2652755" full="yes" TEIform="surname">Riff</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2652772" full="yes" TEIform="foreName">J. L.</foreName><surname id="id2652781" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2652794" TEIform="monogr"><title id="id2652801" level="m" TEIform="title">PROCAMS'03 Workshop</title><imprint id="id2652810" TEIform="imprint"><dateStruct id="id2652815" full="yes" TEIform="dateStruct"><year id="id2652823" full="yes" TEIform="year">2003</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Chomat99" type="inproceedings" id="bid12" default="NO" TEIform="biblStruct"><analytic id="id2652887" TEIform="analytic"><title id="id2652893" level="a" TEIform="title">Probabilistic Recognition of Activity using local appearance</title><author id="id2652903" TEIform="author"><persName TEIform="persName"><foreName id="id2652914" full="yes" TEIform="foreName">O.</foreName><surname id="id2652923" full="yes" TEIform="surname">Chomat</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2652940" full="yes" TEIform="foreName">J.</foreName><surname id="id2652949" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2652962" TEIform="monogr"><title id="id2652969" level="m" TEIform="title">Computer Vision and Pattern Recognition, Fort Collins, USA</title><imprint id="id2652979" TEIform="imprint"><dateStruct id="id2652983" full="yes" TEIform="dateStruct"><month id="id2652992" full="yes" TEIform="month">June</month><year id="id2653001" full="yes" TEIform="year">1999</year></dateStruct><biblScope id="id2653013" type="pages" TEIform="biblScope">104–109</biblScope></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Chomat00" type="inproceedings" id="bid16" default="NO" TEIform="biblStruct"><analytic id="id2653074" TEIform="analytic"><title id="id2653080" level="a" TEIform="title">Local Scale Selection for Gaussian Based Description Techniques</title><author id="id2653090" TEIform="author"><persName TEIform="persName"><foreName id="id2653101" full="yes" TEIform="foreName">O.</foreName><surname id="id2653110" full="yes" TEIform="surname">Chomat</surname></persName><persName TEIform="persName"><foreName id="id2653127" full="yes" TEIform="foreName">V.</foreName><surname id="id2653136" full="yes" TEIform="surname">Colin de Verdière</surname></persName><persName key="prima-2005-id2245372" TEIform="persName"><foreName id="id2653153" full="yes" TEIform="foreName">D.</foreName><surname id="id2653163" full="yes" TEIform="surname">Hall</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2653179" full="yes" TEIform="foreName">J.</foreName><surname id="id2653188" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2653201" TEIform="monogr"><title id="id2653208" level="m" TEIform="title">European Conference on Computer Vision, Dublin, Ireland</title><imprint id="id2653218" TEIform="imprint"><dateStruct id="id2653223" full="yes" TEIform="dateStruct"><month id="id2653231" full="yes" TEIform="month">June</month><year id="id2653241" full="yes" TEIform="year">2000</year></dateStruct><biblScope id="id2653253" type="pages" TEIform="biblScope">I 117–133</biblScope></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Crowley93" type="book" id="bid5" default="NO" TEIform="biblStruct"><monogr id="id2653313" TEIform="monogr"><title id="id2653320" level="m" TEIform="title">Vision as Process</title><editor id="id2653329" role="editor" TEIform="editor"><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2653342" full="yes" TEIform="foreName">J. L.</foreName><surname id="id2653351" full="yes" TEIform="surname">Crowley</surname></persName><persName TEIform="persName"><foreName id="id2653368" full="yes" TEIform="foreName">H. I.</foreName><surname id="id2653377" full="yes" TEIform="surname">Christensen</surname></persName></editor><imprint id="id2653389" TEIform="imprint"><publisher id="id2653394" TEIform="publisher"><orgName id="id2653400" TEIform="orgName">Springer Verlag</orgName></publisher><dateStruct id="id2653408" full="yes" TEIform="dateStruct"><year id="id2653417" full="yes" TEIform="year">1993</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Crowley02b" type="inproceedings" id="bid6" default="NO" TEIform="biblStruct"><analytic id="id2653480" TEIform="analytic"><title id="id2653487" level="a" TEIform="title">Using Context to Structure Perceptual Processes for Observing Activity</title><author id="id2653497" TEIform="author"><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2653507" full="yes" TEIform="foreName">J.</foreName><surname id="id2653517" full="yes" TEIform="surname">Crowley</surname></persName><persName TEIform="persName"><foreName id="id2653533" full="yes" TEIform="foreName">J.</foreName><surname id="id2653543" full="yes" TEIform="surname">Coutaz</surname></persName><persName TEIform="persName"><foreName id="id2653559" full="yes" TEIform="foreName">G.</foreName><surname id="id2653568" full="yes" TEIform="surname">Rey</surname></persName><persName key="prima-2005-id2245460" TEIform="persName"><foreName id="id2653585" full="yes" TEIform="foreName">P.</foreName><surname id="id2653594" full="yes" TEIform="surname">Reignier</surname></persName></author></analytic><monogr id="id2653607" TEIform="monogr"><title id="id2653614" level="m" TEIform="title">UBICOMP, Sweden</title><imprint id="id2653623" TEIform="imprint"><dateStruct id="id2653628" full="yes" TEIform="dateStruct"><month id="id2653636" full="yes" TEIform="month">September</month><year id="id2653646" full="yes" TEIform="year">2002</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Crowley95" type="article" id="bid2" default="NO" TEIform="biblStruct"><analytic id="id2653709" TEIform="analytic"><title id="id2653715" level="a" TEIform="title">Integration and Control of Reactive Visual Processes</title><author id="id2653725" TEIform="author"><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2653736" full="yes" TEIform="foreName">J.</foreName><surname id="id2653745" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2653758" TEIform="monogr"><title id="id2653765" level="j" TEIform="title">Robotics and Autonomous Systems</title><imprint id="id2653774" TEIform="imprint"><biblScope id="id2653780" type="volume" TEIform="biblScope">15</biblScope><biblScope id="id2653790" type="number" TEIform="biblScope">1</biblScope><dateStruct id="id2653799" full="yes" TEIform="dateStruct"><year id="id2653806" full="yes" TEIform="year">1995</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Crowley02" type="inproceedings" id="bid14" default="NO" TEIform="biblStruct"><analytic id="id2653870" TEIform="analytic"><title id="id2653876" level="a" TEIform="title">Fast Computation of Characteristic Scale using a Half Octave Pyramid</title><author id="id2653886" TEIform="author"><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2653897" full="yes" TEIform="foreName">J.</foreName><surname id="id2653906" full="yes" TEIform="surname">Crowley</surname></persName><persName TEIform="persName"><foreName id="id2653922" full="yes" TEIform="foreName">O.</foreName><surname id="id2653932" full="yes" TEIform="surname">Riff</surname></persName><persName TEIform="persName"><foreName id="id2653948" full="yes" TEIform="foreName">J.</foreName><surname id="id2653958" full="yes" TEIform="surname">Piater</surname></persName></author></analytic><monogr id="id2653970" TEIform="monogr"><title id="id2653978" level="m" TEIform="title">International Workshop on Cognitive Computing, Zurich, Switzerland</title><imprint id="id2653988" TEIform="imprint"><dateStruct id="id2653992" full="yes" TEIform="dateStruct"><month id="id2654001" full="yes" TEIform="month">September</month><year id="id2654010" full="yes" TEIform="year">2002</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Estublier97" type="inproceedings" id="bid1" default="NO" TEIform="biblStruct"><analytic id="id2654073" TEIform="analytic"><title id="id2654080" level="a" TEIform="title">Architectures for Process Support Interoperability</title><author id="id2654090" TEIform="author"><persName TEIform="persName"><foreName id="id2654100" full="yes" TEIform="foreName">J.</foreName><surname id="id2654109" full="yes" TEIform="surname">Estublier</surname></persName><persName TEIform="persName"><foreName id="id2654126" full="yes" TEIform="foreName">P. Y.</foreName><surname id="id2654135" full="yes" TEIform="surname">Cunin</surname></persName><persName TEIform="persName"><foreName id="id2654152" full="yes" TEIform="foreName">N.</foreName><surname id="id2654161" full="yes" TEIform="surname">Belkhatir</surname></persName></author></analytic><monogr id="id2654174" TEIform="monogr"><title id="id2654181" level="m" TEIform="title">ICSP5</title><imprint id="id2654190" TEIform="imprint"><dateStruct id="id2654194" full="yes" TEIform="dateStruct"><year id="id2654203" full="yes" TEIform="year">1997</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Fergus03" type="inproceedings" id="bid19" default="NO" TEIform="biblStruct"><analytic id="id2654266" TEIform="analytic"><title id="id2654272" level="a" TEIform="title">Object Class Recognition by Unsupervised Scale-Invariant Learning</title><author id="id2654283" TEIform="author"><persName TEIform="persName"><foreName id="id2654293" full="yes" TEIform="foreName">R.</foreName><surname id="id2654302" full="yes" TEIform="surname">Fergus</surname></persName><persName TEIform="persName"><foreName id="id2654319" full="yes" TEIform="foreName">P.</foreName><surname id="id2654328" full="yes" TEIform="surname">Perona</surname></persName><persName TEIform="persName"><foreName id="id2654345" full="yes" TEIform="foreName">A.</foreName><surname id="id2654354" full="yes" TEIform="surname">Zisserman</surname></persName></author></analytic><monogr id="id2654367" TEIform="monogr"><title id="id2654374" level="m" TEIform="title">Computer Vision and Pattern Recognition, Madison, USA</title><imprint id="id2654384" TEIform="imprint"><dateStruct id="id2654388" full="yes" TEIform="dateStruct"><year id="id2654397" full="yes" TEIform="year">2003</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Finkelstein94" type="book" id="bid0" default="NO" TEIform="biblStruct"><monogr id="id2654460" TEIform="monogr"><title id="id2654466" level="m" TEIform="title">Software Process Modeling and Technology</title><editor id="id2654475" role="editor" TEIform="editor"><persName TEIform="persName"><foreName id="id2654489" full="yes" TEIform="foreName">A.</foreName><surname id="id2654498" full="yes" TEIform="surname">Finkelstein</surname></persName><persName TEIform="persName"><foreName id="id2654515" full="yes" TEIform="foreName">J.</foreName><surname id="id2654524" full="yes" TEIform="surname">Kramer</surname></persName><persName TEIform="persName"><foreName id="id2654541" full="yes" TEIform="foreName">B.</foreName><surname id="id2654550" full="yes" TEIform="surname">Nuseibeh</surname></persName></editor><imprint id="id2654562" TEIform="imprint"><publisher id="id2654567" TEIform="publisher"><orgName id="id2654573" TEIform="orgName">Research Studies Press, John Wiley and Sons Inc</orgName></publisher><dateStruct id="id2654582" full="yes" TEIform="dateStruct"><year id="id2654590" full="yes" TEIform="year">1994</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Freeman91" type="article" id="bid17" default="NO" TEIform="biblStruct"><analytic id="id2654654" TEIform="analytic"><title id="id2654660" level="a" TEIform="title">The Design and Use of Steerable Filters</title><author id="id2654669" TEIform="author"><persName TEIform="persName"><foreName id="id2654680" full="yes" TEIform="foreName">W.</foreName><surname id="id2654690" full="yes" TEIform="surname">Freeman</surname></persName><persName TEIform="persName"><foreName id="id2654706" full="yes" TEIform="foreName">E.</foreName><surname id="id2654716" full="yes" TEIform="surname">Adelson</surname></persName></author></analytic><monogr id="id2654728" TEIform="monogr"><title id="id2654736" level="j" TEIform="title">Pattern Analysis and Machine Intelligence</title><imprint id="id2654744" TEIform="imprint"><biblScope id="id2654751" type="volume" TEIform="biblScope">13</biblScope><biblScope id="id2654761" type="number" TEIform="biblScope">9</biblScope><dateStruct id="id2654769" full="yes" TEIform="dateStruct"><month id="id2654777" full="yes" TEIform="month">September</month><year id="id2654786" full="yes" TEIform="year">1991</year></dateStruct><biblScope id="id2654798" type="pages" TEIform="biblScope">891–906</biblScope></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Hall04" type="inproceedings" subtype="nonparu" id="bid38" default="NO" TEIform="biblStruct"><analytic id="id2654860" TEIform="analytic"><title id="id2654866" level="a" TEIform="title">Détection du visage par caractéristiques génériques calculées à partir des images de luminance</title><author id="id2654879" TEIform="author"><persName key="prima-2005-id2245372" TEIform="persName"><foreName id="id2654889" full="yes" TEIform="foreName">D.</foreName><surname id="id2654899" full="yes" TEIform="surname">Hall</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2654915" full="yes" TEIform="foreName">J.</foreName><surname id="id2654924" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2654937" TEIform="monogr"><title id="id2654944" level="m" TEIform="title">Reconnaissance des formes et intelligence artificelle, Toulouse, France</title><note id="id2654955" anchored="yes" place="unspecified" type="bnote">to appear</note><imprint id="id2654966" TEIform="imprint"><dateStruct id="id2654971" full="yes" TEIform="dateStruct"><year id="id2654980" full="yes" TEIform="year">2004</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:jess" type="misc" id="bid36" default="NO" TEIform="biblStruct"><monogr id="id2655043" TEIform="monogr"><title id="id2655049" level="m" TEIform="title">Jess : the rule engine for the java</title><imprint id="id2655058" TEIform="imprint"/></monogr><note id="id2655065" anchored="yes" place="unspecified" type="howpublished">http://herzberg.ca.sandia.gov/jess/</note></biblStruct><biblStruct rend="foot" n="footcite:iRoom-02" type="article" id="bid22" default="NO" TEIform="biblStruct"><analytic id="id2655127" TEIform="analytic"><title id="id2655133" level="a" TEIform="title">PointRight: Experience with Flexible Input Redirection in Interactive Workspaces</title><author id="id2655143" TEIform="author"><persName TEIform="persName"><foreName id="id2655154" full="yes" TEIform="foreName">B.</foreName><surname id="id2655163" full="yes" TEIform="surname">Johanson</surname></persName><persName TEIform="persName"><foreName id="id2655180" full="yes" TEIform="foreName">G.</foreName><surname id="id2655189" full="yes" TEIform="surname">Hutchins</surname></persName><persName TEIform="persName"><foreName id="id2655205" full="yes" TEIform="foreName">T.</foreName><surname id="id2655215" full="yes" TEIform="surname">Winograd</surname></persName><persName TEIform="persName"><foreName id="id2655231" full="yes" TEIform="foreName">M.</foreName><surname id="id2655241" full="yes" TEIform="surname">Stone</surname></persName></author></analytic><monogr id="id2655253" TEIform="monogr"><title id="id2655261" level="j" TEIform="title">Proceedings of UIST-2002</title><imprint id="id2655270" TEIform="imprint"><dateStruct id="id2655274" full="yes" TEIform="dateStruct"><year id="id2655283" full="yes" TEIform="year">2002</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:joram" type="misc" id="bid8" default="NO" TEIform="biblStruct"><monogr id="id2655346" TEIform="monogr"><title id="id2655353" level="m" TEIform="title">JORAM</title><imprint id="id2655362" TEIform="imprint"><ref id="id2655370" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="http://joram.objectweb.org/" location="extern" xyref="241325867002" xmlns:xlink="http://www.w3.org/1999/xlink">http://joram.objectweb.org/</ref></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Kulikowski81" type="article" id="bid11" default="NO" TEIform="biblStruct"><analytic id="id2655439" TEIform="analytic"><title id="id2655445" level="a" TEIform="title">Fourier Analysis and Spatial Representation in the visual Cortex</title><author id="id2655455" TEIform="author"><persName TEIform="persName"><foreName id="id2655466" full="yes" TEIform="foreName">J. J.</foreName><surname id="id2655475" full="yes" TEIform="surname">Kulikowski</surname></persName><persName TEIform="persName"><foreName id="id2655492" full="yes" TEIform="foreName">P. O.</foreName><surname id="id2655501" full="yes" TEIform="surname">Bishop</surname></persName></author></analytic><monogr id="id2655514" TEIform="monogr"><title id="id2655521" level="j" TEIform="title">Experientia</title><imprint id="id2655530" TEIform="imprint"><biblScope id="id2655536" type="volume" TEIform="biblScope">37</biblScope><biblScope id="id2655545" type="number" TEIform="biblScope">1</biblScope><dateStruct id="id2655554" full="yes" TEIform="dateStruct"><year id="id2655562" full="yes" TEIform="year">1981</year></dateStruct><biblScope id="id2655574" type="pages" TEIform="biblScope">160–163</biblScope></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Leung99" type="inproceedings" id="bid37" default="NO" TEIform="biblStruct"><analytic id="id2655635" TEIform="analytic"><title id="id2655641" level="a" TEIform="title">Recognizing Surfaces using Three-dimensional Textons</title><author id="id2655651" TEIform="author"><persName TEIform="persName"><foreName id="id2655662" full="yes" TEIform="foreName">T.</foreName><surname id="id2655671" full="yes" TEIform="surname">Leung</surname></persName><persName TEIform="persName"><foreName id="id2655688" full="yes" TEIform="foreName">J.</foreName><surname id="id2655697" full="yes" TEIform="surname">Malik</surname></persName></author></analytic><monogr id="id2655710" TEIform="monogr"><title id="id2655717" level="m" TEIform="title">International Conference on Computer Vision, Corfu, Greece</title><imprint id="id2655727" TEIform="imprint"><dateStruct id="id2655731" full="yes" TEIform="dateStruct"><month id="id2655740" full="yes" TEIform="month">September</month><year id="id2655749" full="yes" TEIform="year">1999</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Lux03" type="inproceedings" id="bid7" default="NO" TEIform="biblStruct"><analytic id="id2655813" TEIform="analytic"><title id="id2655819" level="a" TEIform="title">The Imalab Method for Vision Systems</title><author id="id2655828" TEIform="author"><persName key="prima-2005-id2245482" TEIform="persName"><foreName id="id2655839" full="yes" TEIform="foreName">A.</foreName><surname id="id2655849" full="yes" TEIform="surname">Lux</surname></persName></author></analytic><monogr id="id2655862" TEIform="monogr"><title id="id2655869" level="m" TEIform="title">ICVS03, Graz, Austria</title><imprint id="id2655878" TEIform="imprint"><dateStruct id="id2655883" full="yes" TEIform="dateStruct"><month id="id2655891" full="yes" TEIform="month">April</month><year id="id2655901" full="yes" TEIform="year">2003</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Martinez98" type="techreport" id="bid40" default="NO" TEIform="biblStruct"><monogr id="id2655964" TEIform="monogr"><title id="id2655971" level="m" TEIform="title">The AR Face Database</title><author id="id2655980" TEIform="author"><persName TEIform="persName"><foreName id="id2655990" full="yes" TEIform="foreName">A.</foreName><surname id="id2656000" full="yes" TEIform="surname">Martinez</surname></persName><persName TEIform="persName"><foreName id="id2656016" full="yes" TEIform="foreName">R.</foreName><surname id="id2656026" full="yes" TEIform="surname">Benavente</surname></persName></author><note id="id2656039" anchored="yes" place="unspecified" type="typdoc">Technical report</note><imprint id="id2656050" TEIform="imprint"><biblScope id="id2656057" type="number" TEIform="biblScope">24</biblScope><publisher id="id2656065" TEIform="publisher"><orgName id="id2656072" type="institution" TEIform="orgName">CVC</orgName></publisher><dateStruct id="id2656082" full="yes" TEIform="dateStruct"><month id="id2656089" full="yes" TEIform="month">June</month><year id="id2656099" full="yes" TEIform="year">1998</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Nakamura-UIST02" type="inproceedings" id="bid28" default="NO" TEIform="biblStruct"><analytic id="id2656162" TEIform="analytic"><title id="id2656168" level="a" TEIform="title">Active Projector: Image correction for moving image over uneven screens</title><author id="id2656177" TEIform="author"><persName TEIform="persName"><foreName id="id2656189" full="yes" TEIform="foreName">N.</foreName><surname id="id2656198" full="yes" TEIform="surname">Nakamura</surname></persName><persName TEIform="persName"><foreName id="id2656214" full="yes" TEIform="foreName">R.</foreName><surname id="id2656224" full="yes" TEIform="surname">Hiraike</surname></persName></author></analytic><monogr id="id2656237" TEIform="monogr"><title id="id2656244" level="m" TEIform="title">Companion of the 15th Annual ACM Symposium on User Interface Software and Technology</title><imprint id="id2656254" TEIform="imprint"><dateStruct id="id2656259" full="yes" TEIform="dateStruct"><month id="id2656267" full="yes" TEIform="month">October</month><year id="id2656277" full="yes" TEIform="year">2002</year></dateStruct><biblScope id="id2656289" type="pages" TEIform="biblScope">1–2</biblScope></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Ohta94" type="inproceedings" id="bid18" default="NO" TEIform="biblStruct"><analytic id="id2656350" TEIform="analytic"><title id="id2656356" level="a" TEIform="title">Recovery of Illuminant and Surface Colors from Images Based on the CIE Daylight</title><author id="id2656367" TEIform="author"><persName TEIform="persName"><foreName id="id2656377" full="yes" TEIform="foreName">Y.</foreName><surname id="id2656387" full="yes" TEIform="surname">Ohta</surname></persName><persName TEIform="persName"><foreName id="id2656403" full="yes" TEIform="foreName">Y.</foreName><surname id="id2656413" full="yes" TEIform="surname">Hamashi</surname></persName></author></analytic><monogr id="id2656425" TEIform="monogr"><editor id="id2656431" role="editor" TEIform="editor"><persName TEIform="persName"><foreName id="id2656445" full="yes" TEIform="foreName">J.-O.</foreName><surname id="id2656455" full="yes" TEIform="surname">Eklundh</surname></persName></editor><title id="id2656468" level="m" TEIform="title">European Conference on Computer Vision</title><imprint id="id2656476" TEIform="imprint"><dateStruct id="id2656482" full="yes" TEIform="dateStruct"><year id="id2656490" full="yes" TEIform="year">1994</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Omologo97" type="article" id="bid33" default="NO" TEIform="biblStruct"><analytic id="id2656554" TEIform="analytic"><title id="id2656560" level="a" TEIform="title">Use of the Crossposwer-Spectrum Phase in Acoustic Event Location</title><author id="id2656570" TEIform="author"><persName TEIform="persName"><foreName id="id2656581" full="yes" TEIform="foreName">M.</foreName><surname id="id2656590" full="yes" TEIform="surname">Omologo</surname></persName><persName TEIform="persName"><foreName id="id2656607" full="yes" TEIform="foreName">P.</foreName><surname id="id2656616" full="yes" TEIform="surname">Svaizer</surname></persName></author></analytic><monogr id="id2656629" TEIform="monogr"><title id="id2656636" level="j" TEIform="title">IEEE Transaction on Speech and Audio processing</title><imprint id="id2656646" TEIform="imprint"><biblScope id="id2656652" type="volume" TEIform="biblScope">5</biblScope><biblScope id="id2656661" type="number" TEIform="biblScope">3</biblScope><dateStruct id="id2656670" full="yes" TEIform="dateStruct"><year id="id2656678" full="yes" TEIform="year">1997</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Pinhanez01" type="inproceedings" id="bid29" default="NO" TEIform="biblStruct"><analytic id="id2656740" TEIform="analytic"><title id="id2656747" level="a" TEIform="title">The Everywhere Displays Projector: A Device to Create Ubiquitous Graphical Interfaces</title><author id="id2656757" TEIform="author"><persName TEIform="persName"><foreName id="id2656768" full="yes" TEIform="foreName">C.</foreName><surname id="id2656777" full="yes" TEIform="surname">Pinhanez</surname></persName></author></analytic><monogr id="id2656790" TEIform="monogr"><title id="id2656797" level="m" TEIform="title">Proceedings of Ubiquitous Computing 2001 Conference</title><imprint id="id2656806" TEIform="imprint"><dateStruct id="id2656812" full="yes" TEIform="dateStruct"><month id="id2656820" full="yes" TEIform="month">September</month><year id="id2656830" full="yes" TEIform="year">2001</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Raskar-iLamps03" type="inproceedings" id="bid31" default="NO" TEIform="biblStruct"><analytic id="id2656893" TEIform="analytic"><title id="id2656900" level="a" TEIform="title">iLamps: Geometrically Aware and Self-Configuring Projectors</title><author id="id2656910" TEIform="author"><persName TEIform="persName"><foreName id="id2656920" full="yes" TEIform="foreName">R.</foreName><surname id="id2656929" full="yes" TEIform="surname">Raskar</surname></persName></author></analytic><monogr id="id2656942" TEIform="monogr"><title id="id2656949" level="m" TEIform="title">ACM SIGGRAPH 2003 Conference Proceedings</title><imprint id="id2656958" TEIform="imprint"><dateStruct id="id2656963" full="yes" TEIform="dateStruct"><year id="id2656972" full="yes" TEIform="year">2003</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Raskar-Office98" type="inproceedings" id="bid25" default="NO" TEIform="biblStruct"><analytic id="id2657035" TEIform="analytic"><title id="id2657041" level="a" TEIform="title">The Office of the Future: A Unified Approach to Image-Based Modeling and Spatially Immersive Displays</title><author id="id2657052" TEIform="author"><persName TEIform="persName"><foreName id="id2657063" full="yes" TEIform="foreName">R.</foreName><surname id="id2657072" full="yes" TEIform="surname">Raskar</surname></persName><persName TEIform="persName"><foreName id="id2657088" full="yes" TEIform="foreName">G.</foreName><surname id="id2657098" full="yes" TEIform="surname">Welch</surname></persName><persName TEIform="persName"><foreName id="id2657114" full="yes" TEIform="foreName">M.</foreName><surname id="id2657124" full="yes" TEIform="surname">Cutts</surname></persName><persName TEIform="persName"><foreName id="id2657140" full="yes" TEIform="foreName">A.</foreName><surname id="id2657150" full="yes" TEIform="surname">Lake</surname></persName><persName TEIform="persName"><foreName id="id2657166" full="yes" TEIform="foreName">L.</foreName><surname id="id2657176" full="yes" TEIform="surname">Stesin</surname></persName><persName TEIform="persName"><foreName id="id2657192" full="yes" TEIform="foreName">H.</foreName><surname id="id2657201" full="yes" TEIform="surname">Fuchs</surname></persName></author></analytic><monogr id="id2657214" TEIform="monogr"><title id="id2657221" level="m" TEIform="title">Proceedings of the ACM SIGGRAPH'98 Conference</title><imprint id="id2657231" TEIform="imprint"><dateStruct id="id2657236" full="yes" TEIform="dateStruct"><year id="id2657244" full="yes" TEIform="year">1998</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Rasure94" type="incollection" id="bid3" default="NO" TEIform="biblStruct"><analytic id="id2657307" TEIform="analytic"><title id="id2657314" level="a" TEIform="title">The Khoros Application Development Environment</title><author id="id2657323" TEIform="author"><persName TEIform="persName"><foreName id="id2657334" full="yes" TEIform="foreName">J.</foreName><surname id="id2657343" full="yes" TEIform="surname">Rasure</surname></persName><persName TEIform="persName"><foreName id="id2657360" full="yes" TEIform="foreName">S.</foreName><surname id="id2657369" full="yes" TEIform="surname">Kubica</surname></persName></author></analytic><monogr id="id2657382" TEIform="monogr"><editor id="id2657387" role="editor" TEIform="editor"><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2657402" full="yes" TEIform="foreName">J.</foreName><surname id="id2657411" full="yes" TEIform="surname">Crowley</surname></persName><persName TEIform="persName"><foreName id="id2657427" full="yes" TEIform="foreName">H.</foreName><surname id="id2657437" full="yes" TEIform="surname">Christensen</surname></persName></editor><title id="id2657450" level="m" TEIform="title">Experimental Environments for Computer Vision and Image Processing</title><title id="id2657460" level="s" TEIform="title">Machine Perception Artificial Intelligence Series</title><imprint id="id2657470" TEIform="imprint"><biblScope id="id2657476" type="volume" TEIform="biblScope">11</biblScope><biblScope id="id2657486" type="number" TEIform="biblScope">1</biblScope><publisher id="id2657494" TEIform="publisher"><orgName id="id2657499" TEIform="orgName">World Scientific Press</orgName></publisher><dateStruct id="id2657508" full="yes" TEIform="dateStruct"><year id="id2657516" full="yes" TEIform="year">1994</year></dateStruct><biblScope id="id2657528" type="pages" TEIform="biblScope">1-32</biblScope></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Schiele00" type="article" id="bid9" default="NO" TEIform="biblStruct"><analytic id="id2657589" TEIform="analytic"><title id="id2657595" level="a" TEIform="title">Recognition without Correspondence using Multidimensional Receptive Field Histograms</title><author id="id2657606" TEIform="author"><persName TEIform="persName"><foreName id="id2657616" full="yes" TEIform="foreName">B.</foreName><surname id="id2657626" full="yes" TEIform="surname">Schiele</surname></persName><persName key="prima-2005-id2245512" TEIform="persName"><foreName id="id2657642" full="yes" TEIform="foreName">J.</foreName><surname id="id2657652" full="yes" TEIform="surname">Crowley</surname></persName></author></analytic><monogr id="id2657664" TEIform="monogr"><title id="id2657672" level="j" TEIform="title">International Journal of Computer Vision</title><imprint id="id2657680" TEIform="imprint"><biblScope id="id2657687" type="volume" TEIform="biblScope">36</biblScope><biblScope id="id2657697" type="number" TEIform="biblScope">1</biblScope><dateStruct id="id2657705" full="yes" TEIform="dateStruct"><month id="id2657713" full="yes" TEIform="month">January</month><year id="id2657722" full="yes" TEIform="year">2000</year></dateStruct><biblScope id="id2657734" type="pages" TEIform="biblScope">31–50</biblScope></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Shaw96" type="book" id="bid4" default="NO" TEIform="biblStruct"><monogr id="id2657795" TEIform="monogr"><title id="id2657801" level="m" TEIform="title">Software Architecture: Perspectives on an Emerging Disciplines</title><author id="id2657811" TEIform="author"><persName TEIform="persName"><foreName id="id2657822" full="yes" TEIform="foreName">M.</foreName><surname id="id2657831" full="yes" TEIform="surname">Shaw</surname></persName><persName TEIform="persName"><foreName id="id2657847" full="yes" TEIform="foreName">D.</foreName><surname id="id2657857" full="yes" TEIform="surname">Garlan</surname></persName></author><imprint id="id2657868" TEIform="imprint"><publisher id="id2657874" TEIform="publisher"><orgName id="id2657880" TEIform="orgName">Prentice Hall</orgName></publisher><dateStruct id="id2657888" full="yes" TEIform="dateStruct"><year id="id2657896" full="yes" TEIform="year">1996</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:iLand-99" type="article" id="bid23" default="NO" TEIform="biblStruct"><analytic id="id2657960" TEIform="analytic"><title id="id2657966" level="a" TEIform="title">i-LAND: An interactive Landscape for Creativitiy and Innovation</title><author id="id2657976" TEIform="author"><persName TEIform="persName"><foreName id="id2657987" full="yes" TEIform="foreName">N. A.</foreName><surname id="id2657996" full="yes" TEIform="surname">Streitz</surname></persName><persName TEIform="persName"><foreName id="id2658013" full="yes" TEIform="foreName">J.</foreName><surname id="id2658022" full="yes" TEIform="surname">Geißler</surname></persName><persName TEIform="persName"><foreName id="id2658039" full="yes" TEIform="foreName">T.</foreName><surname id="id2658048" full="yes" TEIform="surname">Holmer</surname></persName><persName TEIform="persName"><foreName id="id2658064" full="yes" TEIform="foreName">S.</foreName><surname id="id2658074" full="yes" TEIform="surname">Konomi</surname></persName><persName TEIform="persName"><foreName id="id2658090" full="yes" TEIform="foreName">C.</foreName><surname id="id2658100" full="yes" TEIform="surname">Müller-Tomfelde</surname></persName><persName TEIform="persName"><foreName id="id2658117" full="yes" TEIform="foreName">W.</foreName><surname id="id2658126" full="yes" TEIform="surname">Reischl</surname></persName><persName TEIform="persName"><foreName id="id2658142" full="yes" TEIform="foreName">P.</foreName><surname id="id2658152" full="yes" TEIform="surname">Rexroth</surname></persName><persName TEIform="persName"><foreName id="id2658168" full="yes" TEIform="foreName">P.</foreName><surname id="id2658178" full="yes" TEIform="surname">Seitz</surname></persName><persName TEIform="persName"><foreName id="id2658194" full="yes" TEIform="foreName">R.</foreName><surname id="id2658204" full="yes" TEIform="surname">Steinmetz</surname></persName></author></analytic><monogr id="id2658216" TEIform="monogr"><title id="id2658224" level="j" TEIform="title">ACM Conference on Human Factors in Computing Systems</title><imprint id="id2658234" TEIform="imprint"><dateStruct id="id2658238" full="yes" TEIform="dateStruct"><year id="id2658246" full="yes" TEIform="year">1999</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Underkoffler-Pixels99" type="inproceedings" id="bid26" default="NO" TEIform="biblStruct"><analytic id="id2658309" TEIform="analytic"><title id="id2658316" level="a" TEIform="title">Emancipated Pixels: Real-World Graphics in the Luminous Room</title><author id="id2658326" TEIform="author"><persName TEIform="persName"><foreName id="id2658336" full="yes" TEIform="foreName">J. U. B.</foreName><surname id="id2658346" full="yes" TEIform="surname">Ullmer</surname></persName><persName TEIform="persName"><foreName id="id2658362" full="yes" TEIform="foreName">H.</foreName><surname id="id2658372" full="yes" TEIform="surname">Ishii</surname></persName></author></analytic><monogr id="id2658384" TEIform="monogr"><title id="id2658391" level="m" TEIform="title">Proceedings of ACM SIGGRAPH</title><imprint id="id2658401" TEIform="imprint"><dateStruct id="id2658405" full="yes" TEIform="dateStruct"><year id="id2658414" full="yes" TEIform="year">1999</year></dateStruct><biblScope id="id2658426" type="pages" TEIform="biblScope">385-392</biblScope></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Vliet98" type="inproceedings" id="bid13" default="NO" TEIform="biblStruct"><analytic id="id2658487" TEIform="analytic"><title id="id2658493" level="a" TEIform="title">Recursive Gaussian Derivative Filters</title><author id="id2658502" TEIform="author"><persName TEIform="persName"><foreName id="id2658513" full="yes" TEIform="foreName">L.</foreName><surname id="id2658523" full="yes" TEIform="surname">van Vliet</surname></persName><persName TEIform="persName"><foreName id="id2658539" full="yes" TEIform="foreName">I.</foreName><surname id="id2658549" full="yes" TEIform="surname">Young</surname></persName><persName TEIform="persName"><foreName id="id2658565" full="yes" TEIform="foreName">P.</foreName><surname id="id2658575" full="yes" TEIform="surname">Verbeek</surname></persName></author></analytic><monogr id="id2658587" TEIform="monogr"><title id="id2658594" level="m" TEIform="title">International Conference on Pattern Recognition</title><imprint id="id2658604" TEIform="imprint"><dateStruct id="id2658609" full="yes" TEIform="dateStruct"><month id="id2658617" full="yes" TEIform="month">August</month><year id="id2658627" full="yes" TEIform="year">1998</year></dateStruct><biblScope id="id2658639" type="pages" TEIform="biblScope">509–514</biblScope></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Vaufreydaz02a" type="phdthesis" id="bid34" default="NO" TEIform="biblStruct"><monogr id="id2658700" TEIform="monogr"><title id="id2658706" level="m" TEIform="title">Modélisation statistique du langage à partir d'Internet pour la reconnaissance automatique de la parole continue</title><author id="id2658720" TEIform="author"><persName key="prima-2005-id2245437" TEIform="persName"><foreName id="id2658730" full="yes" TEIform="foreName">D.</foreName><surname id="id2658740" full="yes" TEIform="surname">Vaufreydaz</surname></persName></author><note id="id2658753" anchored="yes" place="unspecified" type="typdoc">Ph.D. thesis in Computer Sciences</note><imprint id="id2658764" TEIform="imprint"><publisher id="id2658769" TEIform="publisher"><orgName id="id2658777" type="school" TEIform="orgName">University Joseph Fourier</orgName><address id="id2658786" TEIform="address"><addrLine id="id2658791" TEIform="addrLine">Grenoble (France)</addrLine></address></publisher><dateStruct id="id2658800" full="yes" TEIform="dateStruct"><month id="id2658808" full="yes" TEIform="month">January</month><year id="id2658818" full="yes" TEIform="year">2002</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Vernier-CircularTable" type="inproceedings" id="bid27" default="NO" TEIform="biblStruct"><analytic id="id2658881" TEIform="analytic"><title id="id2658887" level="a" TEIform="title">Visualization Techniques for Circular Tabletop Interfaces</title><author id="id2658897" TEIform="author"><persName TEIform="persName"><foreName id="id2658908" full="yes" TEIform="foreName">F.</foreName><surname id="id2658917" full="yes" TEIform="surname">Vernier</surname></persName><persName TEIform="persName"><foreName id="id2658934" full="yes" TEIform="foreName">N.</foreName><surname id="id2658943" full="yes" TEIform="surname">Lesh</surname></persName><persName TEIform="persName"><foreName id="id2658959" full="yes" TEIform="foreName">C.</foreName><surname id="id2658969" full="yes" TEIform="surname">Shen</surname></persName></author></analytic><monogr id="id2658982" TEIform="monogr"><title id="id2658989" level="m" TEIform="title">Advanced Visual Interfaces</title><imprint id="id2658998" TEIform="imprint"><dateStruct id="id2659003" full="yes" TEIform="dateStruct"><year id="id2659011" full="yes" TEIform="year">2002</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Kimura-02" type="inproceedings" id="bid24" default="NO" TEIform="biblStruct"><analytic id="id2659075" TEIform="analytic"><title id="id2659081" level="a" TEIform="title">Integrating virtual and physical context to support knowledge workers</title><author id="id2659091" TEIform="author"><persName TEIform="persName"><foreName id="id2659102" full="yes" TEIform="foreName">S.</foreName><surname id="id2659112" full="yes" TEIform="surname">Voida</surname></persName><persName TEIform="persName"><foreName id="id2659128" full="yes" TEIform="foreName">E.</foreName><surname id="id2659138" full="yes" TEIform="surname">Mynatt</surname></persName><persName TEIform="persName"><foreName id="id2659154" full="yes" TEIform="foreName">B.</foreName><surname id="id2659163" full="yes" TEIform="surname">MacIntyre</surname></persName><persName TEIform="persName"><foreName id="id2659180" full="yes" TEIform="foreName">G.</foreName><surname id="id2659189" full="yes" TEIform="surname">Corso</surname></persName></author></analytic><monogr id="id2659202" TEIform="monogr"><title id="id2659209" level="m" TEIform="title">Proceedings of Pervasive Computing Conference</title><imprint id="id2659219" TEIform="imprint"><publisher id="id2659223" TEIform="publisher"><orgName id="id2659229" TEIform="orgName">IEEE Computer Society Press</orgName></publisher><dateStruct id="id2659237" full="yes" TEIform="dateStruct"><year id="id2659246" full="yes" TEIform="year">2002</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:CaltechFaces" type="misc" id="bid41" default="NO" TEIform="biblStruct"><monogr id="id2659310" TEIform="monogr"><title id="id2659316" level="m" TEIform="title">Frontal face dataset</title><author id="id2659325" TEIform="author"><persName TEIform="persName"><foreName id="id2659336" full="yes" TEIform="foreName">M.</foreName><surname id="id2659345" full="yes" TEIform="surname">Weber</surname></persName></author><imprint id="id2659357" TEIform="imprint"><dateStruct id="id2659362" full="yes" TEIform="dateStruct"><year id="id2659371" full="yes" TEIform="year">2003</year></dateStruct><ref id="id2659385" xlink:actuate="onRequest" xlink:show="replace" xlink:type="simple" xlink:href="http://www.vision.caltech.edu/html-files/archive.html" location="extern" xyref="3933586697013" xmlns:xlink="http://www.w3.org/1999/xlink">http://www.vision.caltech.edu/html-files/archive.html</ref></imprint></monogr><note id="id2659406" anchored="yes" place="unspecified" type="howpublished">internet</note></biblStruct><biblStruct rend="foot" n="footcite:Young85" type="techreport" id="bid10" default="NO" TEIform="biblStruct"><monogr id="id2659467" TEIform="monogr"><title id="id2659474" level="m" TEIform="title">The Gaussian Derivative Theory for Spatial Vision: Analysis of Cortical Cell Receptive Field Line-Weighting Profiles</title><author id="id2659485" TEIform="author"><persName TEIform="persName"><foreName id="id2659495" full="yes" TEIform="foreName">R.</foreName><surname id="id2659505" full="yes" TEIform="surname">Young</surname></persName></author><note id="id2659518" anchored="yes" place="unspecified" type="typdoc">Technical report</note><imprint id="id2659529" TEIform="imprint"><publisher id="id2659534" TEIform="publisher"><orgName id="id2659542" type="institution" TEIform="orgName">General Motors Research Laboratories</orgName></publisher><dateStruct id="id2659551" full="yes" TEIform="dateStruct"><month id="id2659560" full="yes" TEIform="month">May</month><year id="id2659569" full="yes" TEIform="year">1985</year></dateStruct></imprint></monogr></biblStruct><biblStruct rend="foot" n="footcite:Zoppis97" type="phdthesis" id="bid32" default="NO" TEIform="biblStruct"><monogr id="id2659633" TEIform="monogr"><title id="id2659639" level="m" TEIform="title">Outils pour l'Intégration et le Contrôle en Vision et Robotique Mobile</title><author id="id2659651" TEIform="author"><persName TEIform="persName"><foreName id="id2659661" full="yes" TEIform="foreName">B.</foreName><surname id="id2659670" full="yes" TEIform="surname">Zoppis</surname></persName></author><note id="id2659684" anchored="yes" place="unspecified" type="typdoc">Ph. D. Thesis</note><imprint id="id2659695" TEIform="imprint"><publisher id="id2659700" TEIform="publisher"><orgName id="id2659707" type="school" TEIform="orgName">Institut National Polytechnique de Grenoble</orgName></publisher><dateStruct id="id2659717" full="yes" TEIform="dateStruct"><month id="id2659725" full="yes" TEIform="month">June</month><year id="id2659735" full="yes" TEIform="year">1997</year></dateStruct></imprint></monogr></biblStruct></biblio></raweb>