tags:

views:

56

answers:

1

Hi. I have input XML which I have to reorganize into another XML using XSLT. Here is the example of input file:

<?xml version="1.0" encoding="UTF-8"?>
<Report asOfDate="2010-03-31 00:00:00"
       <Record>
 <FieldValue fieldName="rapportage_nihil" fieldValue="false" fieldValueIsNull="false" fieldValueNatural="false"/>
 <FieldValue fieldName="periode" fieldValue="31-Mar-2010" fieldValueIsNull="false" fieldValueNatural="2010-03-31 00:00:00"/>
 <FieldValue fieldName="formulierid" fieldValue="9001HK4V10" fieldValueIsNull="false" fieldValueNatural="9001HK4V10"/>
 <FieldValue fieldName="versie" fieldValue="1" fieldValueIsNull="false" fieldValueNatural="1"/>
 <FieldValue fieldName="frequentie" fieldValue="M" fieldValueIsNull="false" fieldValueNatural="M"/>
 <FieldValue fieldName="variant_type" fieldValue="Landen" fieldValueIsNull="false" fieldValueNatural="Landen"/>
 <FieldValue fieldName="value" fieldValue="0003" fieldValueIsNull="false" fieldValueNatural="0003"/>
 <FieldValue fieldName="post_value" fieldValue="61,941.00" fieldValueIsNull="false" fieldValueNatural="61941.0"/>
 <FieldValue fieldName="cube" fieldValue="c01" fieldValueIsNull="false" fieldValueNatural="c01"/>
 <FieldValue fieldName="rij" fieldValue="r_06_000_0_065_0" fieldValueIsNull="false" fieldValueNatural="r_06_000_0_065_0"/>
 <FieldValue fieldName="kolom" fieldValue="c_2250_SPU" fieldValueIsNull="false" fieldValueNatural="c_2250_SPU"/>
    </Record>
    <Record>
 <FieldValue fieldName="rapportage_nihil" fieldValue="false" fieldValueIsNull="false" fieldValueNatural="false"/>
 <FieldValue fieldName="periode" fieldValue="31-Mar-2010"  fieldValueIsNull="false" fieldValueNatural="2010-03-31 00:00:00"/>
 <FieldValue fieldName="formulierid" fieldValue="9001HK1V10" fieldValueIsNull="false" fieldValueNatural="9001HK1V10"/>
 <FieldValue fieldName="versie" fieldValue="1" fieldValueIsNull="false" fieldValueNatural="1"/>
 <FieldValue fieldName="frequentie" fieldValue="M" fieldValueIsNull="false" fieldValueNatural="M"/>
 <FieldValue fieldName="variant_type" fieldValue="Landen" fieldValueIsNull="false" fieldValueNatural="Landen"/>
 <FieldValue fieldName="value" fieldValue="0003" fieldValueIsNull="false" fieldValueNatural="0003"/>
 <FieldValue fieldName="post_value" fieldValue="4,157.00" fieldValueIsNull="false" fieldValueNatural="4157.0"/>
 <FieldValue fieldName="cube" fieldValue="c01" fieldValueIsNull="false" fieldValueNatural="c01"/>
 <FieldValue fieldName="rij" fieldValue="r_06_570_0_070_0" fieldValueIsNull="false" fieldValueNatural="r_06_570_0_070_0"/>
 <FieldValue fieldName="kolom" fieldValue="c_2250_SPU" fieldValueIsNull="false" fieldValueNatural="c_2250_SPU"/>
    </Record>
    <Record>
 <FieldValue fieldName="rapportage_nihil" fieldValue="false" fieldValueIsNull="false" fieldValueNatural="false"/>
 <FieldValue fieldName="periode" fieldValue="31-Mar-2010" fieldValueIsNull="false" fieldValueNatural="2010-03-31 00:00:00"/>
 <FieldValue fieldName="formulierid" fieldValue="9001HK1V10" fieldValueIsNull="false" fieldValueNatural="9001HK1V10"/>
 <FieldValue fieldName="versie" fieldValue="1" fieldValueIsNull="false" fieldValueNatural="1"/>
 <FieldValue fieldName="frequentie" fieldValue="M" fieldValueIsNull="false" fieldValueNatural="M"/>
 <FieldValue fieldName="variant_type" fieldValue="Landen" fieldValueIsNull="false" fieldValueNatural="Landen"/>
 <FieldValue fieldName="value" fieldValue="0629" fieldValueIsNull="false" fieldValueNatural="0629"/>
 <FieldValue fieldName="post_value" fieldValue="1.00" fieldValueIsNull="false" fieldValueNatural="1.0"/>
 <FieldValue fieldName="cube" fieldValue="c01" fieldValueIsNull="false" fieldValueNatural="c01"/>
 <FieldValue fieldName="rij" fieldValue="r_24_000_1_020_0" feldValueIsNull="false" fieldValueNatural="r_24_000_1_020_0"/>
 <FieldValue fieldName="kolom" fieldValue="c_2250_SPU" fieldValueIsNull="false" fieldValueNatural="c_2250_SPU"/>
    </Record>
    <Record>
 <FieldValue fieldName="rapportage_nihil" fieldValue="false" fieldValueIsNull="false" fieldValueNatural="false"/>
 <FieldValue fieldName="periode" fieldValue="31-Mar-2010" fieldValueIsNull="false" fieldValueNatural="2010-03-31 00:00:00"/>
 <FieldValue fieldName="formulierid" fieldValue="9001HK1V10" fieldValueIsNull="false" fieldValueNatural="9001HK1V10"/>
 <FieldValue fieldName="versie" fieldValue="1" fieldValueIsNull="false" fieldValueNatural="1"/>
 <FieldValue fieldName="frequentie" fieldValue="M" fieldValueIsNull="false" fieldValueNatural="M"/>
 <FieldValue fieldName="variant_type" fieldValue="Landen" fieldValueIsNull="false" fieldValueNatural="Landen"/>
 <FieldValue fieldName="value" fieldValue="0003" fieldValueIsNull="false" fieldValueNatural="0003"/>
 <FieldValue fieldName="post_value" fieldValue="61.00" fieldValueIsNull="false" fieldValueNatural="61.0"/>
 <FieldValue fieldName="cube" fieldValue="c01" fieldValueIsNull="false" fieldValueNatural="c01"/>
 <FieldValue fieldName="rij" fieldValue="r_06_570_1_070_0" fieldValueIsNull="false" fieldValueNatural="r_06_570_1_070_0"/>
 <FieldValue fieldName="kolom" fieldValue="c_2242_SPU" fieldValueIsNull="false" fieldValueNatural="c_2242_SPU"/>
    </Record>
    <Record>
 <FieldValue fieldName="rapportage_nihil" fieldValue="false" fieldValueIsNull="false" fieldValueNatural="false"/>
 <FieldValue fieldName="periode" fieldValue="31-Mar-2010" fieldValueIsNull="false" fieldValueNatural="2010-03-31 00:00:00"/>
 <FieldValue fieldName="formulierid" fieldValue="9001HK1V10" fieldValueIsNull="false" fieldValueNatural="9001HK1V10"/>
 <FieldValue fieldName="versie" fieldValue="1" fieldValueIsNull="false" fieldValueNatural="1"/>
 <FieldValue fieldName="frequentie" fieldValue="M" fieldValueIsNull="false" fieldValueNatural="M"/>
 <FieldValue fieldName="variant_type" fieldValue="Landen" fieldValueIsNull="false" fieldValueNatural="Landen"/>
 <FieldValue fieldName="value" fieldValue="0002" fieldValueIsNull="false" fieldValueNatural="0002"/>
 <FieldValue fieldName="post_value" fieldValue="3.00" fieldValueIsNull="false" fieldValueNatural="3.0"/>
 <FieldValue fieldName="cube" fieldValue="c01" fieldValueIsNull="false" fieldValueNatural="c01"/>
 <FieldValue fieldName="rij" fieldValue="r_18_000_1_000_0" fieldValueIsNull="false" fieldValueNatural="r_18_000_1_000_0"/>
 <FieldValue fieldName="kolom" fieldValue="c_9000_SPU" fieldValueIsNull="false" fieldValueNatural="c_9000_SPU"/>
    </Record>
    <Record>
 <FieldValue fieldName="rapportage_nihil" fieldValue="false" fieldValueIsNull="false" fieldValueNatural="false"/>
 <FieldValue fieldName="periode" fieldValue="31-Mar-2010" fieldValueIsNull="false" fieldValueNatural="2010-03-31 00:00:00"/>
 <FieldValue fieldName="formulierid" fieldValue="9001HK4V10" fieldValueIsNull="false" fieldValueNatural="9001HK4V10"/>
 <FieldValue fieldName="versie" fieldValue="1" fieldValueIsNull="false" fieldValueNatural="1"/>
 <FieldValue fieldName="frequentie" fieldValue="M" fieldValueIsNull="false" fieldValueNatural="M"/>
 <FieldValue fieldName="variant_type" fieldValue="Landen" fieldValueIsNull="false" fieldValueNatural="Landen"/>
 <FieldValue fieldName="value" fieldValue="0003" fieldValueIsNull="false" fieldValueNatural="0003"/>
 <FieldValue fieldName="post_value" fieldValue="2.00" fieldValueIsNull="false" fieldValueNatural="2.0"/>
 <FieldValue fieldName="cube" fieldValue="c01" fieldValueIsNull="false" fieldValueNatural="c01"/>
 <FieldValue fieldName="rij" fieldValue="r_06_000_1_040_0" fieldValueIsNull="false" fieldValueNatural="r_06_000_1_040_0"/>
 <FieldValue fieldName="kolom" fieldValue="c_2250_SPU" fieldValueIsNull="false" fieldValueNatural="c_2250_SPU"/>
        </Record>
</Report>

The output file should look like this:

<?xml version="1.0" encoding="UTF-8"?>
<bestand registratienummer="93391">
  <rapportage nihil="false" periode="2010-03-31" formulierid="9001HK1V10" versie="1" frequentie="M">
   <variant type="Landen" value="0003"/>
   <post value="4,157.00" cube="c01" rij="r_06_570_0_070_0" kolom="c_2250_SPU"/>
   <post value="61.00" cube="c01" rij="r_06_570_1_070_0" kolom="c_2242_SPU"/>
 </rapportage>
 <rapportage nihil="false" periode="2010-03-31" formulierid="9001HK4V10" versie="1" frequentie="M">
 <variant type="Landen" value="0002"/>
 <post value="61,941.00" cube="c01" rij="r_06_000_0_065_0" kolom="c_2250_SPU"/>
 <post value="2.00" cube="c01" rij="r_06_000_1_040_0" kolom="c_2250_SPU"/>
 </rapportage>
 <rapportage nihil="false" periode="2010-03-31" formulierid="9001HK1V10" versie="1" frequentie="M">
 <variant type="Landen" value="0629"/>
 <post value="1.00" cube="c01" rij="r_24_000_1_020_0" kolom="c_2250_SPU"/>
 </rapportage>
</bestand>

Basically I need to group records by variables formulierid and value. The real file contains thousands of records and contains numerous combinations. Can anyone please provide me with a sample script that can do this? I appreciate any help. Thanks very much.

+1  A: 

This XSLT 1.0 transformation

<xsl:stylesheet 
  version="1.0" 
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
>
  <xsl:output method="xml" encoding="utf-8" />

  <xsl:key name="kRecordG2" match="Record" use="
    concat(
      FieldValue[@fieldName='formulierid']/@fieldValue,
      ':',
      FieldValue[@fieldName='value']/@fieldValue
    )
  " />

  <xsl:template match="Report">
    <bestand registratienummer="93391"><!-- the number is nowhere in the input -->
      <xsl:apply-templates mode="rapportage" select="
        Record[
          generate-id()
          = 
          generate-id(key('kRecordG2', 
            concat(
              FieldValue[@fieldName='formulierid']/@fieldValue,
              ':',
              FieldValue[@fieldName='value']/@fieldValue
            )
          )[1])
        ]
      " />
    </bestand>        
  </xsl:template>

  <xsl:template match="Record" mode="rapportage">
    <rapportage>
      <xsl:apply-templates select="FieldValue[@fieldName='rapportage_nihil']" />
      <xsl:apply-templates select="FieldValue[@fieldName='periode']" />
      <xsl:apply-templates select="FieldValue[@fieldName='formulierid']" />
      <xsl:apply-templates select="FieldValue[@fieldName='versie']" />
      <xsl:apply-templates select="FieldValue[@fieldName='frequentie']" />
      <variant type="Landen" value="{FieldValue[@fieldName='value']/@fieldValue}"/>
      <xsl:apply-templates mode="post" select="
        key('kRecordG2', 
          concat(
            FieldValue[@fieldName='formulierid']/@fieldValue,
            ':',
            FieldValue[@fieldName='value']/@fieldValue
          )
        )
      " />
    </rapportage>
  </xsl:template>

  <xsl:template match="Record" mode="post">
    <post>
      <xsl:apply-templates select="FieldValue[@fieldName='post_value']" />
      <xsl:apply-templates select="FieldValue[@fieldName='cube']" />
      <xsl:apply-templates select="FieldValue[@fieldName='rij']" />
      <xsl:apply-templates select="FieldValue[@fieldName='kolom']" />
    </post>
  </xsl:template>

  <xsl:template match="FieldValue">
    <xsl:variable name="attrname">
      <xsl:choose>
        <xsl:when test="@fieldName = 'rapportage_nihil'">nihil</xsl:when>
        <xsl:when test="@fieldName = 'post_value'">value</xsl:when>
        <xsl:otherwise><xsl:value-of select="@fieldName" /></xsl:otherwise>
      </xsl:choose>
    </xsl:variable>
    <xsl:attribute name="{$attrname}">
      <xsl:value-of select="@fieldValue" />
    </xsl:attribute>
  </xsl:template>

</xsl:stylesheet>

gives:

<bestand registratienummer="93391">
  <rapportage nihil="false" periode="31-Mar-2010" formulierid="9001HK4V10" versie="1" frequentie="M">
    <variant type="Landen" value="0003" />
    <post value="61,941.00" cube="c01" kolom="c_2250_SPU"></post>
    <post value="2.00" cube="c01" kolom="c_2250_SPU"></post>
  </rapportage>
  <rapportage nihil="false" periode="31-Mar-2010" formulierid="9001HK1V10" versie="1" frequentie="M">
    <variant type="Landen" value="0003" />
    <post value="4,157.00" cube="c01" kolom="c_2250_SPU"></post>
    <post value="61.00" cube="c01" kolom="c_2242_SPU"></post>
  </rapportage>
  <rapportage nihil="false" periode="31-Mar-2010" formulierid="9001HK1V10" versie="1" frequentie="M">
    <variant type="Landen" value="0629" />
    <post value="1.00" cube="c01" kolom="c_2250_SPU"></post>
  </rapportage>
  <rapportage nihil="false" periode="31-Mar-2010" formulierid="9001HK1V10" versie="1" frequentie="M">
    <variant type="Landen" value="0002" />
    <post value="3.00" cube="c01" kolom="c_9000_SPU"></post>
  </rapportage>
</bestand>

I'm working on a more elegant version, let's see if I can find one...


Well, I think it does not get much better than this, at least not in XSLT 1.0. A little faster for big input sets maybe, by using an extra key:

<xsl:stylesheet 
  version="1.0" 
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
>
  <xsl:output method="xml" encoding="utf-8" />

  <xsl:key name="kRecordG2" match="Record" use="
    concat(
      FieldValue[@fieldName='formulierid']/@fieldValue,
      ':',
      FieldValue[@fieldName='value']/@fieldValue
    )
  " />
  <xsl:key name="kProp" match="FieldValue" use="
    concat(generate-id(..), @fieldName)
  " />

  <xsl:template match="Report">
    <bestand registratienummer="93391"><!-- the number is nowhere in the input -->
      <xsl:apply-templates mode="rapportage" select="
        Record[
          generate-id()
          = 
          generate-id(key('kRecordG2', 
            concat(
              FieldValue[@fieldName='formulierid']/@fieldValue,
              ':',
              FieldValue[@fieldName='value']/@fieldValue
            )
          )[1])
        ]
      " />
    </bestand>
  </xsl:template>

  <xsl:template match="Record" mode="rapportage">
    <rapportage>
      <xsl:variable name="id" select="generate-id()" />
      <xsl:apply-templates select="key('kProp', concat($id, 'rapportage_nihil'))" />
      <xsl:apply-templates select="key('kProp', concat($id, 'periode'))" />
      <xsl:apply-templates select="key('kProp', concat($id, 'formulierid'))" />
      <xsl:apply-templates select="key('kProp', concat($id, 'versie'))" />
      <xsl:apply-templates select="key('kProp', concat($id, 'frequentie'))" />
      <variant type="Landen" value="{key('kProp', concat($id, 'value'))/@fieldValue}"/>
      <xsl:apply-templates mode="post" select="
        key('kRecordG2', 
          concat(
            key('kProp', concat($id, 'formulierid'))/@fieldValue,
            ':',
            key('kProp', concat($id, 'value'))/@fieldValue
          )
        )
      " />
    </rapportage>
  </xsl:template>

  <xsl:template match="Record" mode="post">
    <post>
      <xsl:variable name="id" select="generate-id()" />
      <xsl:apply-templates select="key('kProp', concat($id, 'post_value'))" />
      <xsl:apply-templates select="key('kProp', concat($id, 'cube'))" />
      <xsl:apply-templates select="key('kProp', concat($id, 'cube'))" />
      <xsl:apply-templates select="key('kProp', concat($id, 'kolom'))" />
    </post>
  </xsl:template>

  <xsl:template match="FieldValue">
    <xsl:variable name="attrname">
      <xsl:choose>
        <xsl:when test="@fieldName = 'rapportage_nihil'">nihil</xsl:when>
        <xsl:when test="@fieldName = 'post_value'">value</xsl:when>
        <xsl:otherwise><xsl:value-of select="@fieldName" /></xsl:otherwise>
      </xsl:choose>
    </xsl:variable>
    <xsl:attribute name="{$attrname}">
      <xsl:value-of select="@fieldValue" />
    </xsl:attribute>
  </xsl:template>

</xsl:stylesheet>
Tomalak
Tomalak, thanks a lot for your help. I'll try it out and will let you know how it comes out. Thanks again!
Tomalak, thanks again, XSLT you wrote works good with original input file!Thanks.
@klipa: If this works for you, please mark it as accepted! Thanks.. :)
Tomalak