Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  • layerTransform (literal string or DFDL expression) - XSD NCNames - all reserved. In the future this may become extensible allowing QNames to be used.
  • layerEncoding (literal string or DFDL expression)
  • layerLengthKind - same values as dfdl:lengthKindCan be 'implicit', 'explicit', or 'boundaryMark'. Perhaps other values in the future (e.g., 'pattern')
  • layerBoundaryMark (literal string or DFDL expression) - used with dfdl:layerLengthKind 'delimitedboundaryMark'
  • layerLength (literal string or DFDL expression) - used with dfdl:layerLengthKind 'explicit'
  • layerLengthPattern - used with dfdl:layerLengthKind 'pattern'
  • (TBD layer properties for when dfdl:layerLengthKind is 'prefixed' - such time as that is supported, if ever.)

...

The meaning of the values of these properties, and any constraints on those values, are essentially specified by the layer transform algorithm. For example: for layerLengthKind 'delimitedboundaryMark' and the base64 layer transform, we expect that the layerBoundaryMark must be a string, and this string has constraints beyond those we see in the the regular dfdl:terminator property. In particular for base64, we expect that layerBoundaryMark cannot contain DFDL character entities of any kind. This makes it impossible, for example, for a base64 'delimitedboundaryMark' layer to be bounded by a string containing ASCII NUL (character code 0), as there is no way to express this without use of DFDL character entities. These limitations must be enforced by the algorithms themselves.

...

Code Block
languagexml
<dfdl:defineFormat name="foldedLines">
  <dfdl:format layerTransform="foldedLines" dfdl:layerLengthKind="delimitedboundaryMark"/>
</dfdl:defineFormat>

<dfdl:defineFormat name="base64">
  <dfdl:format layerTransform="base64" layerEncoding="us-ascii" layerLengthKind="delimitedboundaryMark" layerBoundaryMark='{ ./marker }'/>
  <!-- note expression above is ./marker, not ../marker -->
</dfdl:defineFormat>

 <xs:sequence dfdl:ref="tns:foldedLines">
   <xs:sequence>
     ...
     ... presumably everything here is textual, and utf-8. FoldedLines only applies sensibly to text.
     ...
     <xs:element name="marker" type="xs:string" .../>
     <xs:sequence dfdl:ref="tns:base64">
        <xs:sequence>
          ...
          ... everything here is parsed against the bytes obtained from base64 decoding
          ... which is itself decoding the output of the foldedLines transform
          ... above. Base64 requires only us-ascii, which is a subset of utf-8.
          ...
        </xs:sequence><!-- end base64 data -->
      </xs:sequence><!-- end base64 sequence e.g., framing, aligning -->
   </xs:sequence><!-- end of foldedLines data-->
 </xs:sequence><!-- end foldedLines sequence e.g., framing, aligning -->

...

Code Block
languagexml
<xs:schema ....>

 <dfdl:format separatorPosition="infix" lengthKind="delimitedboundaryMark" encoding="utf-8"
  occursCountKind="parsed" separator="" sequenceKind="ordered"
  separatorPosition="infix"/>

 <dfdl:defineFormat name="folded">
  <dfdl:format layerTransform="foldedLines" layerLengthKind="delimitedboundaryMark" layerEncoding="us-ascii"/>
  <!-- delimitedboundaryMark here means to enclosing end-of-data, as no boundary mark delimiter is defined. -->
</dfdl:defineFormat>

<dfdl:defineFormat name="qp">
  <dfdl:format layerTransform="quotedPrintable" layerLengthKind="pattern"
     layerLengthPattern="[^\n]*?(?=(?<!=)\n)"/>
 
 <!-- QPs are terminated by a newline that is not preceded by an =. 
      This final newline is not consumed as part of the content. -->
  
 <!-- Alternatively, the QP transform itself can determine the length 
      by searching for this final newline (but leaving it there).
      In which case the lengthKind would be "implicit" -->
</dfdl:defineFormat>

 <xs:element name="VCalendar" dfdl:initiator="BEGIN:VCALENDAR%NL;" dfdl:terminator="END:VCALENDAR%NL; END:VCALENDAR">
  <xs:complexType>
    <xs:sequence dfdl:separator="%NL;" dfdl:sequenceKind="unordered">
      <xs:sequence dfdl:ref="tns:folded">
         <xs:element name="ProdID" type="xs:string" dfdl:initiator="PRODID:" minOccurs="0"/>
      </xs:sequence>
      <xs:element name="Version" type="xs:string" dfdl:initiator="VERSION:" minOccurs="0" />
      <xs:element name="VEvent" maxOccurs="unbounded" minOccurs="0" dfdl:occursCountKind="parsed"
        dfdl:initiator="BEGIN:VEVENT%NL;" dfdl:terminator="END:VEVENT">
        <xs:complexType>
          <xs:sequence dfdl:separator="%NL;" dfdl:sequenceKind="unordered">
            <xs:element name="DTStart" type="xs:string" dfdl:initiator="DTSTART:" />
            <xs:element name="DTEnd" type="xs:string" dfdl:initiator="DTEND:" />
            <!-- 
              content from here could have long lines, so must be folded 
            -->
            <xs:sequence dfdl:ref="tns:folded">
              <xs:element name="Location" type="xs:string" dfdl:initiator="LOCATION:" minOccurs="0"/>
              <xs:element name="UID" type="xs:string" dfdl:initiator="UID:" minOccurs="0"/>
              <xs:element name="Description" dfdl:initiator="DESCRIPTION:" minOccurs="0">
                <xs:complexType>
                  <xs:sequence>              
                   <xs:element name="Encoding" type="xs:string" 
                               dfdl:initiator="ENCODING=" dfdl:terminator=":" minOccurs="0" />
                     <xs:choice dfdl:choiceDispatchKey="{ if (fn:exists(./Encoding)) then ./Encoding else '' }">
                       <!-- 
                         we inspect the value of the Encoding element and decide what branch of the choice
                         based on it 
                        -->
                       <xs:sequence dfdl:choiceBranchKey="QUOTED-PRINTABLE">
                         dfdl:separator="" dfdl:sequenceKind="unordered">
                         <!--
                          Each branch starts with a distinct dummy element to satisfy the UPA rules of XML Schema
                         -->
                         <xs:element name="QP" type="xs:string" dfdl:inputValueCalc="{ '' }" />
                         <!--
                          Here notice tha tthe layerRef for the qp data is scoped to just this inner element.
                         -->
                         <xs:sequence dfdl:ref="tns:qp">
                           <xs:element name="Value" type="xs:string"/>
                         </xs:sequence><!-- end layer quoted printable -->
                       </xs:sequence>
                       <!-- 
                          repeat the above pattern for the choice branches for the various encodings 
                        -->
                    </xs:choice>
                  </xs:sequence>
                </xs:complexType>
              </xs:element>           
              <xs:element name="Summary" type="xs:string"  dfdl:initiator="SUMMARY:" minOccurs="0"/>
              <xs:element name="Priority" type="xs:string" dfdl:initiator="PRIORITY:" minOccurs="0" />
            </xs:sequence>
          </xs:complexType>
        </xs:element>
      </xs:sequence><!-- end folded layer -->
    </xs:sequence>
  </xs:complexType>
</xs:element>
</xs:schema>

...

Code Block
languagexml
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:fn="http://www.w3.org/2005/xpath-functions"
  xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/" xmlns:ex="http://example.com"
  targetNamespace="http://example.com" elementFormDefault="unqualified">

  <xs:include schemaLocation="built-in-formats.xsd" />

  <xs:annotation>
    <xs:appinfo source="http://www.ogf.org/dfdl/">
      <dfdl:format ref="ex:daffodilTest1" separator="" initiator=""
        terminator="" leadingSkip='0' textTrimKind="none" initiatedContent="no"
        alignment="implicit" alignmentUnits="bits" trailingSkip="0" ignoreCase="no"
        separatorPosition="infix" occursCountKind="implicit"
        emptyValueDelimiterPolicy="both" representation="text" textNumberRep="standard"
        lengthKind="delimitedboundaryMark" encoding="ASCII" />
    </xs:appinfo>
  </xs:annotation>

    <xs:element name="file" type="ex:fileType"/>

    <!-- broke this up to provide some resuable types and groups here -->

    <xs:complexType name="fileType">
      <xs:group ref="ex:fileTypeGroup"/>
    </xs:complexType>

    <xs:group name="fileTypeGroup">
      <xs:sequence dfdl:separator="%NL;" dfdl:separatorPosition="postfix">
        <xs:element name="header" minOccurs="0" maxOccurs="1"
          dfdl:occursCountKind="implicit">
          <xs:complexType>
            <xs:sequence dfdl:separator=",">
              <xs:element name="title" type="xs:string" maxOccurs="unbounded" />
            </xs:sequence>
          </xs:complexType>
        </xs:element>
        <xs:element name="record" maxOccurs="unbounded">
          <xs:complexType>
            <xs:sequence dfdl:separator=",">
              <xs:element name="item" type="xs:string" maxOccurs="unbounded"
                dfdl:occursCount="{ fn:count(../../header/title) }"
                dfdl:occursCountKind="expression" />
            </xs:sequence>
          </xs:complexType>
        </xs:element>
      </xs:sequence>
    </xs:group>

</xs:schema>

...