tags:

views:

64

answers:

3

Simple situation... With any random XML file, I want to create a list of every node that it contains, but without any duplicates! So something like:

<root name="example">
  <child id="1">
    <grandchild/>
  </child>
  <child id="2"/>
  <child id="3"/>
</root>

Is translated to:

/root
/root/@name
/root/child
/root/child/@id
/root/child/grandchild

How to do this, by just using XSLT?

+3  A: 

This transformation (133 lines and many of them commented out):

<xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 xmlns:msxsl="urn:schemas-microsoft-com:xslt"
 exclude-result-prefixes="msxsl">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:key name="kPathByVal" match="path" use="."/>

 <xsl:template match="node()|@*">
  <path>
    <xsl:call-template name="buildPath"/>
  </path>
  <xsl:apply-templates select="node()|@*"/>
 </xsl:template>

 <xsl:template match="/">
  <xsl:variable name="vrtfPaths">
   <xsl:apply-templates/>
  </xsl:variable>

  <xsl:variable name="vPaths" select="msxsl:node-set($vrtfPaths)/*"/>

  <xsl:for-each select=
   "$vPaths[generate-id()
           =
            generate-id(key('kPathByVal',.)[1])
           ]
   ">
    <xsl:value-of select="concat(.,'&#xA;')"/>
  </xsl:for-each>
 </xsl:template>

<xsl:template name="buildPath">
 <xsl:variable name="pNode" select="."/>
  <xsl:variable name="theResult">
    <xsl:for-each select="$pNode">
    <xsl:variable name="theNode" select="."/>
    <xsl:for-each select=
    "$theNode
    |
     $theNode/ancestor-or-self::node()[..]">
      <xsl:element name="slash">/</xsl:element>
      <xsl:choose>
        <xsl:when test="self::*">
          <xsl:element name="nodeName">
            <xsl:value-of select="name()"/>
          </xsl:element>
        </xsl:when>
        <xsl:otherwise> <!-- This node is not an element -->
          <xsl:choose>
            <xsl:when test="count(. | ../@*) = count(../@*)">
            <!-- Attribute -->
              <xsl:element name="nodeName">
                <xsl:value-of select="concat('@',name())"/>
              </xsl:element>
            </xsl:when>
            <xsl:when test="self::text()">  <!-- Text -->
              <xsl:element name="nodeName">
                <xsl:value-of select="'text()'"/>
                <xsl:variable name="thisPosition"
                          select="count(preceding-sibling::text())"/>
                <xsl:variable name="numFollowing"
                          select="count(following-sibling::text())"/>
                <xsl:if test="$thisPosition + $numFollowing > 0">
                  <xsl:value-of select=
                  "concat('[', $thisPosition +1, ']')"/>
                </xsl:if>
              </xsl:element>
            </xsl:when>
            <xsl:when test="self::processing-instruction()">
            <!-- Processing Instruction -->
              <xsl:element name="nodeName">
                <xsl:value-of select="'processing-instruction()'"/>
                <xsl:variable name="thisPosition"
                   select="count(preceding-sibling::processing-instruction())"/>
                <xsl:variable name="numFollowing"
                    select="count(following-sibling::processing-instruction())"/>
                <xsl:if test="$thisPosition + $numFollowing > 0">
                  <xsl:value-of select=
                  "concat('[', $thisPosition +1, ']')"/>
                </xsl:if>
              </xsl:element>
            </xsl:when>
            <xsl:when test="self::comment()">   <!-- Comment -->
              <xsl:element name="nodeName">
                <xsl:value-of select="'comment()'"/>
                <xsl:variable name="thisPosition"
                         select="count(preceding-sibling::comment())"/>
                <xsl:variable name="numFollowing"
                         select="count(following-sibling::comment())"/>
                <xsl:if test="$thisPosition + $numFollowing > 0">
                  <xsl:value-of select=
                  "concat('[', $thisPosition +1, ']')"/>
                </xsl:if>
              </xsl:element>
            </xsl:when>
            <!-- Namespace: -->
            <xsl:when test=
              "count(. | ../namespace::*)
              =
               count(../namespace::*)">

              <xsl:variable name="apos">'</xsl:variable>
              <xsl:element name="nodeName">
                <xsl:value-of select="concat('namespace::*',
                '[local-name() = ', $apos, local-name(), $apos, ']')"/>

              </xsl:element>
            </xsl:when>
          </xsl:choose>
        </xsl:otherwise>
      </xsl:choose>
    </xsl:for-each>
    <!-- <xsl:text>&#xA;</xsl:text> -->
  </xsl:for-each>
 </xsl:variable>
 <xsl:value-of select="$theResult"/>
</xsl:template>

when applied on the provided XML document:

<root name="example">
  <child id="1">
    <grandchild/>
  </child>
  <child id="2"/>
  <child id="3"/>
</root>

produces the wanted, correct result:

/root
/root/@name
/root/child
/root/child/@id
/root/child/grandchild
Dimitre Novatchev
Almost! But I don't want the [1] numbering stuff in it. Only the node names themselves. (See example.)
Workshop Alex
@Workshop-Alex: Fixed now.
Dimitre Novatchev
@Workshop: Fixed now.
Dimitre Novatchev
+1 Good answer.
Alejandro
+2  A: 

I solved it myself, too! This way:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt" exclude-result-prefixes="fo msxsl">
  <xsl:output method="text" standalone="yes" encoding="UTF-8"/>
  <xsl:param name="Detect">false</xsl:param>
  <xsl:param name="Root"/>
  <xsl:variable name="NewLine" select="'
'"/>
  <xsl:template match="/">
          <xsl:variable name="Nodes">
            <xsl:apply-templates select="/" mode="Loop"/>
          </xsl:variable>
          <xsl:variable name="SortedNodes">
            <xsl:apply-templates select="msxsl:node-set($Nodes)" mode="Nodes">
              <xsl:sort select="." order="ascending" case-order="lower-first" data-type="text"/>
            </xsl:apply-templates>
          </xsl:variable>
          <xsl:apply-templates select="msxsl:node-set($SortedNodes)" mode="Text"/>
  </xsl:template>
  <!-- Elementen. -->
  <xsl:template match="*" mode="Loop">
    <xsl:param name="Node"/>
    <Node>
      <xsl:value-of select="$Node"/>/<xsl:value-of select="name()"/>
    </Node>
    <xsl:apply-templates select="@*" mode="Loop">
      <xsl:with-param name="Node" select="concat($Node, '/', name())"/>
    </xsl:apply-templates>
    <xsl:apply-templates select="*" mode="Loop">
      <xsl:with-param name="Node" select="concat($Node, '/', name())"/>
    </xsl:apply-templates>
  </xsl:template>
  <!-- Attributen. -->
  <xsl:template match="@*" mode="Loop">
    <xsl:param name="Node"/>
    <Node>
      <xsl:value-of select="$Node"/>/@<xsl:value-of select="name()"/>
    </Node>
  </xsl:template>
  <!-- Node. -->
  <xsl:template match="Node" mode="Nodes">
    <xsl:if test="(1=position()) or (preceding-sibling::*[1]/. != .)">
      <Node>
        <xsl:value-of select="."/>
      </Node>
    </xsl:if>
  </xsl:template>
  <xsl:template match="Node" mode="Text"><xsl:value-of select="concat(., $NewLine)"/></xsl:template>
</xsl:stylesheet>

Only 48 lines. :-)

Workshop Alex
My solution was 133 lines because it uses existing code that does much more than what you want -- I really had to cut out some of the code. My solution can also produce the paths to any other node: the root (`/`), text nodes, PIs, comments and namespace nodes. Also, I just needed 2 minutes to re-use this code, while writing from scratch even your simple solution would probably require at least 10 minutes and is prone to errors, so may actually require a lot longer. :)
Dimitre Novatchev
+1  A: 

Just for fun, without extension function.

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"&gt;
    <xsl:template match="text()"/>
    <xsl:template match="*|@*">
        <xsl:param name="pPath"/>
        <xsl:param name="pNames" select="'&#xA;'"/>
        <xsl:variable name="vPath"
                      select="concat($pPath,'/',
                                     substring('@',
                                               1 div (count(.|../@*) =
                                                      count(../@*))),
                                     name())"/>
        <xsl:variable name="vNames">
            <xsl:if test="not(contains($pNames,
                                       concat('&#xA;',$vPath,'&#xA;')))">
                <xsl:value-of select="concat($vPath,'&#xA;')"/>
            </xsl:if>
            <xsl:apply-templates select="*[1]|@*">
                <xsl:with-param name="pPath" select="$vPath"/>
                <xsl:with-param name="pNames" select="$pNames"/>
            </xsl:apply-templates>
        </xsl:variable>
        <xsl:value-of select="$vNames"/>
        <xsl:apply-templates select="following-sibling::*[1]">
            <xsl:with-param name="pPath" select="$pPath"/>
            <xsl:with-param name="pNames" select="concat($pNames,$vNames)"/>
        </xsl:apply-templates>
    </xsl:template>
</xsl:stylesheet>

Output:

/root
/root/@name
/root/child
/root/child/@id
/root/child/grandchild

Edit: Better example of XSLT/XPath 2.0. This XPath 2.0 line:

string-join(
   distinct-values(
      (//*|//@*)
         /string-join(
            (ancestor::node()/name(),
             if (self::attribute())
                then concat('@',name())
                else name()),
            '/')),
   '&#xA;')
Alejandro