<?xml version="1.0" encoding="UTF-8"?>
<!--
  +===========================================================================+
  |                                                                           |
  |                    NATIONAL GEOSPATIAL DIGITAL ARCHIVE                    |
  |                 University of California at Santa Barbara                 |
  |                                                                           |
  +===========================================================================+
-->
<!-- $Header: /export/home/gjanee/ngda/data-model/RCS/manifest.rnc,v 1.2 2006/02/15 18:00:39 gjanee Exp $ -->
<!--
  DESCRIPTION
  
      RELAX NG schema for archival object manifests.
  
      Storage note: there is a direct correspondence between an
      archival object's manifest and the representation of that object
      in the storage subsystem.  In the storage subsystem, an archival
      object is represented as a directory tree.  Files and
      subdirectories within the tree correspond one-to-one to
      components in the manifest and have the names listed in the
      manifest.  The manifest itself is stored in a file
      "manifest.xml" in the root directory of the tree.
  
  AUTHOR
  
      Greg Janee
      gjanee@alexandria.ucsb.edu
  
  HISTORY
  
      $Log: manifest.rnc,v $
      Revision 1.2  2006/02/15 18:00:39  gjanee
      Added support for representing inter-object relationships and
      lineage.  Multiple definitions per archival object and component
      are now allowed.  To reflect actual usage, relaxed the
      restriction that original filenames must be path-less; they may
      now be pathnames.  Added documentation regarding identifiers,
      restrictions, and extra-schema validation.  This schema is fully
      backward-compatible with the previous version except for the
      version change in the XML namespace.  (There is a new
      restriction on object identifiers that is not
      backward-compatible in theory, but in practice the change should
      have no implications.)
  
      Revision 1.1  2005/11/14 00:19:38  gjanee
      Initial revision
  
-->
<grammar ns="tag:ngda.org,2005:schemas/1.1/manifest" xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
  <!--
    A <manifest> describes an archival object by a persistent,
    universally unique identifier; zero or more relationships to other
    archival objects; zero or more definitions that apply to the
    object as a whole; optionally, lineage information that applies to
    the object as a whole; and finally and most fundamentally, zero or
    more components that comprise the object.
    
    The object identifier must be an absolute URI.  It must not
    contain a number sign ('#') or fragment identifier, as the latter
    is used to identify components within archival objects.  The "tag"
    URI scheme (http://www.ietf.org/rfc/rfc4151.txt) is recommended.
    
    As an ingest and validation aid, the manifest may also contain a
    reference to a template that defines the common structure of a
    class of objects.
  -->
  <start>
    <element name="manifest">
      <element name="objectIdentifier">
        <data type="anyURI"/>
      </element>
      <optional>
        <element name="templateRef">
          <data type="anyURI"/>
        </element>
      </optional>
      <zeroOrMore>
        <ref name="Relationship"/>
      </zeroOrMore>
      <zeroOrMore>
        <ref name="Definition"/>
      </zeroOrMore>
      <optional>
        <ref name="Lineage"/>
      </optional>
      <zeroOrMore>
        <ref name="Component"/>
      </zeroOrMore>
    </element>
  </start>
  <!--
    A <relationship> records a typed (i.e., named) unidirectional
    relationship between the object and another, "target" archival
    object; the target object is specified by its identifier.
    
    It is recommended that relationship names be URIs.
  -->
  <define name="Relationship">
    <element name="relationship">
      <attribute name="type"/>
      <attribute name="targetObjectRef">
        <data type="anyURI"/>
      </attribute>
    </element>
  </define>
  <!-- A component may be a directory or a file. -->
  <define name="Component">
    <choice>
      <ref name="Directory"/>
      <ref name="File"/>
    </choice>
  </define>
  <!--
    Both kinds of components have the following elements in common: a
    name that uniquely identifies the component within the enclosing
    scope (either the enclosing directory or, in the case of top-level
    components, the archival object's root directory); zero or more
    definitions that apply to the component; and, optionally, lineage
    information that applies to the component.
    
    The name "manifest.xml" is reserved in the root directory.
  -->
  <define name="CommonComponentElements">
    <element name="name">
      <data type="NCName"/>
    </element>
    <zeroOrMore>
      <ref name="Definition"/>
    </zeroOrMore>
    <optional>
      <ref name="Lineage"/>
    </optional>
  </define>
  <!--
    A <directory> contains zero or more files and subdirectories.
    
    There are two kinds of directories.  In a "subcomponents"
    directory, the members of the directory are conceptually
    subcomponents of the archival object.  If any definitions are
    present, the definitions apply to the directory as a whole (for
    example, a directory containing the multiple files comprising a
    Shapefile might have the ESRI Shapefile specification as a
    definition).  Similarly, if lineage information is present, it
    applies to the directory as a whole.
    
    In an "alternatives" directory, the directory members are
    conceptually equivalent representations of the archival object
    (e.g., JPEG 2000 and TIFF versions of the same image).  An
    "alternatives" directory must NOT have any definitions and must
    NOT have lineage information.
    
    Recall that the <manifest> element contains zero or more
    components.  If it contains more than one component, there is an
    implicit <directory type="subcomponents"> element at the outermost
    level.
  -->
  <define name="Directory">
    <element name="directory">
      <attribute name="type">
        <choice>
          <value>subcomponents</value>
          <value>alternatives</value>
        </choice>
      </attribute>
      <ref name="CommonComponentElements"/>
      <zeroOrMore>
        <ref name="Component"/>
      </zeroOrMore>
    </element>
  </define>
  <!--
    A <file> describes a file by a size in bytes and a content
    signature.  The original filename or pathname may optionally be
    included.
    
    The "MD5" attribute value refers to the MD5 message-digest
    algorithm (http://www.ietf.org/rfc/rfc1321.txt).
    
    Storage note: the corresponding physical file in the storage
    subsystem is named by the name, not the original filename.
  -->
  <define name="File">
    <element name="file">
      <ref name="CommonComponentElements"/>
      <optional>
        <element name="originalFilename">
          <text/>
        </element>
      </optional>
      <element name="size">
        <data type="nonNegativeInteger"/>
      </element>
      <element name="signature">
        <attribute name="algorithm">
          <value>MD5</value>
        </attribute>
        <text/>
      </element>
    </element>
  </define>
  <!--
    A definition is a reference to another archival object, which is
    specified by identifier.  The other object should be some kind of
    specification (e.g., a format specification) that describes the
    syntax, semantics, or other interpretation of the component or
    archival object containing the definition.
  -->
  <define name="Definition">
    <element name="definitionRef">
      <data type="anyURI"/>
    </element>
  </define>
  <!--
    <lineage> describes the derivation of an archival object or
    component thereof from zero or more other archival objects or
    components thereof.  A <sourceComponentRef> may reference:
    
        Another component within the same object, by specifying the
        complete pathname to the component relative to the object's
        root directory (e.g., "path/to/component").  See <path> in
        ingest.rnc for a syntactic specification of component paths.
    
        Another archival object, by specifying the other object's
        identifier (e.g., "tag:ngda.org,2005:path/to/object").  Note
        that this case is distinguished syntactically from the
        previous case by the presence of a colon-delimited URI scheme.
    
        A component within another archival object, by specifying the
        object's identifier followed by the component's complete
        pathname relative to the object's root directory as a URI
        fragment (e.g.,
        "tag:ngda.org,2005:path/to/object#path/to/component").
    
    The directed graph induced by lineage derivations must be acyclic.
    Furthermore, an archival object or component thereof must not be
    derived from a constituent component.  Equivalently, every
    component is implicitly derived from the containing component or
    archival object, and thus a lineage derivation in the reverse
    direction would violate the aforementioned acyclicity requirement.
    
    The optional <notes> element may be used to record other,
    lineage-related information.
  -->
  <define name="Lineage">
    <element name="lineage">
      <zeroOrMore>
        <element name="sourceComponentRef">
          <data type="anyURI"/>
        </element>
      </zeroOrMore>
      <optional>
        <element name="notes">
          <text/>
        </element>
      </optional>
    </element>
  </define>
  <!--
    Summary of additional validity checks that are not specified by
    the schema itself:
    
        1. Object identifiers are absolute, fragment-less URIs.
    
        2. Component names are unique within the enclosing scope.
    
        3. The component name "manifest.xml" is reserved at the root
        level for the manifest.
    
        4. "Alternatives" directories have neither definitions nor
        lineage.
    
        5. Referenced objects (in relationships, definitions, and
        lineage derivations) and referenced components (in lineage
        derivations) exist.
    
        6. Objects and components are not derived from constituent
        components.
    
        7. The lineage derivation graph is acyclic.
  -->
</grammar>
