lm2010.bib

@comment{{This file has been generated by bib2bib 1.99}}

@comment{{Command line: bib2bib -oc lm2010.keys -ob lm2010.bib -c 'export = "yes" and year=2010' lm.bib ../euprovenance.bib ../ops.bib}}

@comment{{This file has been generated by bib2bib 1.99}}

@comment{{Command line: bib2bib -ob lm.bib -oc lm.keys -c 'export = "yes"' ../lm.bib}}

@article{Simmhan:FGCS11,
  month = dec,
  title = {Special section: the Third Provenance Challenge on using the open provenance model for interoperability (Editorial) },
  author = {Yogesh Simmhan and Paul Groth and Luc Moreau},
  publisher = {Elsevier},
  year = {2010},
  pages = {737--742},
  volume = {27},
  number = {6},
  month = jun,
  export = {yes},
  journal = {Future Generation Computer Systems},
  local = {https://nms.kcl.ac.uk/luc.moreau/papers/pc3.pdf},
  doi = {10.1016/j.future.2010.11.020},
  eprints = {https://eprints.soton.ac.uk/410314/},
  abstract = {The third provenance challenge was organized to evaluate the efficacy of the Open
Provenance Model (OPM) in representing and sharing provenance with the goal of improving the
specification. A data loading workflow that ingests data files into a relational database for the Pan-
STARRS sky survey project was selected as a candidate for collecting provenance. Challenge
participants record provenance, running queries over it, and export/import provenance as OPM
documents with other teams. 15 teams participated in the challenge that concluded at a workshop in
June 2009 at Amsterdam. In this paper, we describe the challenge in detail and its outcome.}
}

@inproceedings{Aldeco-Perez:IPAW10,
  abstract = {Given the significant increase of on-line services that require
                  personal information from users, the risk that such
                  information is misused has become an important concern. In
                  such a context, information accountability is desirable since
                  it allows users (and society in general) to decide, by means
                  of audits, whether information is used appropriately. To
                  ensure information accountability, information flow should be
                  made transparent. It has been argued that data provenance can
                  be used as the mechanism to underpin such a
                  transparency. Under these conditions, an audit's quality
                  depends on the quality of the captured provenance
                  information. Thereby, the integrity of provenance information
                  emerges as a decisive issue in the quality of a
                  provenance-based audit. The aim of this paper is to secure
                  provenance-based audits by the inclusion of cryptographic
                  elements in the communication between the involved entities
                  as well as in the provenance representation. This paper also
                  presents a formalisation and an automatic verification of a
                  set of security properties that increase the level of trust
                  in provenance-based audit results.},
  address = {Troy, NY},
  export = {yes},
  author = {Aldeco-P\'{e}rez, Rocio and Moreau, Luc},
  booktitle = {International Provenance and Annotation Workshop (IPAW '10)},
  title = {{Securing Provenance-based Audits}},
  series = {Lecture Notes in Computer Science},
  volume = 6378,
  eprints = {https://eprints.soton.ac.uk/271436/},
  doi = {10.1007/978-3-642-17819-1_18},
  local = {https://nms.kcl.ac.uk/luc.moreau/papers/aldeco-ipaw10.pdf},
  year = {2010},
  pages = {148--164}
}

@inproceedings{Aldeco-Perez:FIS10,
  abstract = {Given the significant amount of personal information available on
                  the Web, verifying its correct use emerges as an important
                  issue. When personal information is published, it should be
                  later used under a set of usage policies. If these policies
                  are not followed, sensitive data could be exposed and used
                  against its owner. Under these circumstances, processing
                  transparency is desirable since it allows users to decide
                  whether information is used appropriately. It has been argued
                  that data provenance can be used as the mechanism to underpin
                  such a transparency. Thereby, if provenance of data is
                  available, processing becomes transparent since the
                  provenance of data can be analysed against usage policies to
                  decide whether processing was performed in compliance with
                  such policies. The aim of this paper is to present a
                  Provenance-based Compliance Framework that uses provenance to
                  verify the compliance of processing to predefined information
                  usage policies. It consists of a provenance-based view of
                  past processing of information, a representation of
                  processing policies and a comparison stage in which the past
                  processing is analysed against the processing policies. This
                  paper also presents an implementation using a very common
                  on-line activity: on-line shopping.},
  address = {Berlin, Germany},
  export = {yes},
  author = {Aldeco-P\'{e}rez, Rocio and Moreau, Luc},
  booktitle = {Future Internet Symposium (FIS 2010)},
  month = sep,
  title = {{A Provenance-based Compliance Framework}},
  eprints = {https://eprints.soton.ac.uk/271437/},
  local = {https://nms.kcl.ac.uk/luc.moreau/papers/fis10.pdf},
  doi = {10.1007/978-3-642-15877-3_14},
  pages = {128--137},
  volume = 6369,
  series = {Lecture Notes in Computer Science},
  year = 2010
}

@inproceedings{Naja:IPAW10,
  booktitle = {International Provenance and Annotation Workshop (IPAW'10)},
  month = {June},
  title = {Provenance of Decisions in Emergency Response Environments},
  author = {Iman Naja and Luc Moreau and Alex Rogers},
  publisher = {Springer-Verlag},
  year = {2010},
  pages = {221--230},
  series = {Lecture Notes in Computer Science},
  volume = 6378,
  export = {yes},
  keywords = {provenance, intelligent agents},
  doi = {10.1007/978-3-642-17819-1_25},
  eprints = {https://eprints.soton.ac.uk/271440/},
  local = {https://nms.kcl.ac.uk/luc.moreau/papers/naja-ipaw10.pdf},
  abstract = {Mitigating the devastating ramifications of major disasters requires emergency workers to respond in a maximally efficient way. Information systems can improve their efficiency by organizing their efforts and automating many of their decisions. However, absence of documenting how decisions were made by the system prevents decisions from being reviewed to check the reasons for their making or their compliance with policies. We apply the concept of provenance to decision making in emergency response situations and use the Open Provenance Model to express provenance produced in RoboCup Rescue Simulation. We produce provenance DAGs using a novel OPM profile that conceptualizes decisions in the context of emergency response. Finally, we traverse the OPM DAGs to answer some provenance questions about those decisions.}
}

@article{Moreau:FTWS10,
  month = nov,
  title = {The Foundations for Provenance on the Web},
  author = {Luc Moreau},
  year = {2010},
  export = {yes},
  volume = {2},
  number = {2--3},
  pages = {99--241},
  journal = {Foundations and Trends in Web Science},
  doi = {http://dx.doi.org/10.1561/1800000010},
  eprints = {http://eprints.ecs.soton.ac.uk/21691/},
  local = {https://nms.kcl.ac.uk/luc.moreau/papers/survey.pdf},
  abstract = {Provenance, i.e., the origin or source of something, is becoming an important concern, since it offers the means to verify data products, to infer their quality, to analyse the processes that led to them, and to decide whether they can be trusted.  For instance, provenance enables the reproducibility of scientific results; provenance is necessary to track attribution and credit in curated databases; and, it is essential for reasoners to make trust judgements about the information they use over the Semantic Web.  

As the Web allows information sharing, discovery, aggregation, filtering and flow in an unprecedented manner, it also becomes very difficult to identify, reliably, the original source that produced an information item on the Web. Since the emerging use of provenance in niche applications is undoubtedly demonstrating the benefits of provenance, we contend that provenance can and should reliably be tracked and exploited on the Web, and we survey the necessary foundations to achieve such a vision.

Using multiple data sources, we have compiled the largest bibliographical database on provenance so far. This large corpus allows us to analyse emerging trends in the research community.  Specifically, using the CiteSpace tool, we identify clusters of papers that constitute research fronts, from which we derive characteristics that we use to structure our foundational framework for provenance on the Web. We note that such an endeavour requires a multi-disciplinary approach, since it requires contributions from many computer science sub-disciplines, but also other non-technical fields given the human challenge that is anticipated.

To develop our vision, it is necessary to provide a definition of provenance that applies to the Web context. Our conceptual definition of provenance is expressed in terms of processes, and is shown to generalise various definitions of provenance commonly encountered.  Furthermore, by bringing realistic distributed systems assumptions, we refine our definition as a query over assertions made by processes.

Given that the majority of work on provenance has been undertaken by the database, workflow and e-science communities, we review some of their work, contrasting approaches, and focusing on important topics we believe to be crucial for bringing provenance to the Web, such as abstraction, collections, storage, queries, workflow evolution, semantics and activities involving human interactions. 

However, provenance approaches developed in the context of databases and workflows essentially deal with closed systems. By that, we mean that  workflow or database management systems are in full control of the data they manage, and track their provenance within their own scope, but not beyond. In the context of the Web, a broader approach is required by which chunks of provenance representation can be brought together to describe the provenance of information flowing across multiple systems. This is the specific purpose of the Open Provenance Vision, which is an approach that consists of controlled vocabulary, serialization formats and interfaces that allow the provenance of individual systems to be expressed, connected in a coherent fashion, and queried seamlessly. In this context, the Open Provenance Model is an emerging community-driven representation of provenance, which has been actively used by some twenty teams to exchange provenance information according to the Open Provenance Vision.

Having identified an open approach and a model for provenance, we then look at techniques that have been proposed to expose provenance over the Web. We also study how Semantic Web technologies have been successfully exploited to express, query and reason over provenance. Symmetrically, we also identify how Semantic Web technologies such as RDF underpinning the Linked Data effort bring their own difficulties with respect to provenance.

A powerful argument for provenance is that it can help make systems transparent, so that it becomes possible to determine whether a particular use of information is appropriate under a set of rules. Such capability helps make systems and information accountable.  To offer accountability, provenance itself must be authentic, and rely on security approaches that we review.  We then discuss systems where provenance is the basis of an auditing mechanism to check past processes against rules or regulations. In practice, not all users want to check and audit provenance, instead, they may rely on measures of quality or trust; hence, we review emerging provenance-based approaches to compute trust and quality of data.}
}

@article{Moreau:FAOPM10,
  month = {December},
  title = {A Formal Account of the Open Provenance Model},
  author = {Natalia Kwasnikowska and Luc Moreau and Jan {Van den Bussche}},
  year = {2010},
  export = {yes},
  note = {Submitted for publication},
  eprints = {http://eprints.ecs.soton.ac.uk/21819/},
  abstract = {The Open Provenance Model (OPM) is a community data model for provenance that is designed to facilitate the meaningful interchange of provenance information between systems. Underpinning OPM, is a notion of directed graph, used to represent data products and processes in- volved in past computations, and dependencies between them; it is complemented by inference rules allowing new dependencies to be derived. The Open Provenance Model was designed from requirements captured in two `Provenance Challenges', and tested during the third: these challenges were international, multi-disciplinary activities aiming to exchange provenance information between multiple systems and query it. The design of OPM was mostly driven by practical and pragmatic considerations. The purpose of this paper is to formalize the theory underpinning this data model. Specifically, this paper proposes a temporal semantics for OPM graphs, defined in terms of a set of ordering constraints between time-points associated with OPM constructs. OPM inferences are characterized with respect to this temporal semantics, and a novel set of patterns is introduced to establish soundness and completeness properties. Building on this novel foundation, the paper proposes new definitions for graph algebraic operations, graph refinement and the notion of account, by which multiple descriptions of a same execution are allowed to co-exist in a same graph. Overall, this paper provides a strong theoretical underpinning to a data model being adopted by a community of users that help its disambiguation and promote inter-operability. }
}

@techreport{Gil:XG10,
  author = {Yolanda Gil and James Cheney and Paul Groth and Olaf Hartig
                  and Simon Miles and Luc Moreau and Paulo {Pinheiro da Silva}},
  title = {Provenance XG Final Report},
  institution = {World Wide Web Consortium},
  year = {2010},
  url = {http://www.w3.org/2005/Incubator/prov/XGR-prov-20101214/},
  export = {yes},
  optkey = {},
  opttype = {},
  optnumber = {},
  optaddress = {},
  optmonth = dec,
  optnote = {},
  optannote = {},
  abstract = {Given the increased interest in provenance in the Semantic Web area and in the Web community at large, the W3C established the Provenance Incubator Group as part of the W3C Incubator Activity with a charter to provide a state-of-the art understanding and develop a roadmap in the area of provenance and possible recommendations for standardization efforts. This document summarizes the findings of the group.}
}