<!--
       '$RCSfile: eml-physical.dtd,v $'
       Copyright: 2000 Regents of the University of California and the
                  National Center for Ecological Analysis and Synthesis
     For Details: http://knb.ecoinformatics.org/

        '$Author: higgins $'
          '$Date: 2001-10-19 21:24:30 $'
      '$Revision: 1.3 $'

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-->
<!-- EML DTD document that defines the structural
     characteristics of physical objects -->
<!-- The root element, which contains an identifier and 
     an physical description -->
<!ELEMENT eml-physical (identifier, format, characterEncoding?, size?, authentication*, compressionMethod?, 
                                         encodingMethod?, numHeaderLines?, recordDelimiter?, maxRecordLength?, 
                                         quoteCharacter*, literalCharacter*, (fieldStartColumn?, (fieldDelimiter | fieldWidth))* )>
<!-- File identifier of the metadata document -->
<!ELEMENT identifier (#PCDATA)>
<!ATTLIST identifier
  system CDATA #IMPLIED
>
<!-- size -->
<!ELEMENT size (#PCDATA)>
<!ATTLIST size
  unit CDATA "bytes"
>
<!-- Authentication value and method -->
<!ELEMENT authentication (#PCDATA)>
<!ATTLIST authentication
  method CDATA #IMPLIED
>
<!-- Entity format (e.g., text, name of various binary formats [TIFF]) -->
<!ELEMENT format (#PCDATA)>
<!-- characterEncoding fro text files (e.g., ASCII, UTF-8) -->
<!ELEMENT characterEncoding (#PCDATA)>
<!-- Method of compression -->
<!ELEMENT compressionMethod (#PCDATA)>
<!-- Method of encoding -->
<!ELEMENT encodingMethod (#PCDATA)>
<!-- The character used to delimit records in the entity -->
<!ELEMENT recordDelimiter (#PCDATA)>
<!ELEMENT maxRecordLength (#PCDATA)>
<!-- The character used to delimit quote data values so that the
     filed delimeters can be used in the data value, typically
     " or ' -->
<!ELEMENT quoteCharacter (#PCDATA)>
<!-- The character used to escape special characters
     so that they are interpreted literally, usually \  -->
<!ELEMENT literalCharacter (#PCDATA)>
<!-- Number of header lines or information that prepares data -->
<!ELEMENT numHeaderLines (#PCDATA)>

<!--
                        Variable width format fields (attributes) can vary in their 
                        field length, thus the end of the field is 
                        delimited by a special character called a 
                        field delimiter (typically a comma or a space).

                        Data sets are generally classified as fixedWidth
                        format or variableWidth format, but we have
                        determined that this is actually a per-field
                        classification because one may encounter
                        fixedWidth fields mixed together in the same
                        data file with variableWidth fields.
                        
                        In our encoding scheme, the start of each field
                        is assumed to be the column after the last column
                        of the previous field, or the first column
                        if this is the first field in the dataset, unless 
                        the starting column is explicity enumerated using the
                        "fieldStartColumn" element.
                        The end column for each field is classified
                        using either a special character delimeter indicated
                        using the filedDelimiter element,
                        or a fixed field length indicated by using the "fieldWidth"
                        element.  The delimiter for the last field in the data set can be omitted.
                        variableWidth fields can vary in their field length, and the end of
                        the field is delimited by a special character
                        called a field delimiter, usually a comma or
                        a tab character.  fixedWidth fields have a set
                        length, and so the end of the field can always
                        be determined by adding the fieldWidth to the
                        starting column number.  Here is an example:
                        
                        Assume we have the following data in a data set:

                        May,100aaaa,1.2,
                        April,200aaaa,3.4,
                        June,300bbbb,4.6,

                        The metadata indicating the physical layout of the 4 fields would include the 
                        following:

                          <delimiter>,</delimiter>
                          <fieldWidth>3</fieldWidth>
                          <fieldWidth>3</fieldWidth>
                          <delimiter>,</delimiter>

                        In a strictly fixed format file, the metadata would be slightly different:

                        May100aaaa1.2
                        Apr200aaaa3.4
                        Jun300bbbb4.6

                          <fieldWidth>3</fieldWidth>
                          <fieldWidth>3</fieldWidth>
                          <fieldWidth>4</fieldWidth>
                          <fieldWidth>3</fieldWidth>           

                        or, one could explicitly describe the starting columns:
                
                          <fieldStartColumn>1</fieldStartColumn>
                          <fieldWidth>3</fieldWidth>
                          <fieldStartColumn>4</fieldStartColumn>
                          <fieldWidth>3</fieldWidth>
                          <fieldStartColumn>7</fieldStartColumn>
                          <fieldWidth>4</fieldWidth>
                          <fieldStartColumn>11</fieldStartColumn>
                          <fieldWidth>3</fieldWidth>                   
-->
<!ELEMENT fieldStartColumn (#PCDATA)>
<!ELEMENT fieldDelimiter (#PCDATA)>
<!ELEMENT fieldWidth (#PCDATA)>
<!-- End of file -->