views:

47

answers:

1

Dear All:

I am trying to replace specific XmlSlurper tags with arbitrary XML strings. The best way I have managed to come up with to do this is:

#!/usr/bin/env groovy

import groovy.xml.StreamingMarkupBuilder

def page=new XmlSlurper(new org.cyberneko.html.parsers.SAXParser()).parseText("""
<html>
<head></head>
<body>
<one attr1='val1'>asdf</one>
<two />
<replacemewithxml />
</body>
</html>
""".trim())

import groovy.xml.XmlUtil

def closure
closure={ bind,node->
  if (node.name()=="REPLACEMEWITHXML") {
    bind.mkp.yieldUnescaped "<replacementxml>sometext</replacementxml>"
  } else {
    bind."${node.name()}"(node.attributes()) {
      mkp.yield node.text()
      node.children().each { child->
 closure(bind,child)
      }
    }
  }
}
println XmlUtil.serialize(
  new StreamingMarkupBuilder().bind { bind->
    closure(bind,page)
  }
)

However, the only problem is the text() element seems to capture all child text nodes, and thus I get:

<?xml version="1.0" encoding="UTF-8"?>
<HTML>asdf<HEAD/>
   <BODY>asdf<ONE attr1="val1">asdf</ONE>
      <TWO/>
      <replacementxml>sometext</replacementxml>
   </BODY>
</HTML>

Any ideas/help much appreciated.

Thank you! Misha

p.s. Also, out of curiosity, if I change the above to the "Groovier" notation as follows, the groovy compiler thinks I am trying to access the ${node.name()} member of my test class. Is there a way to specify this is not the case while still not passing the actual builder object? Thank you! :)

def closure
closure={ node->
  if (node.name()=="REPLACEMEWITHXML") {
    mkp.yieldUnescaped "<replacementxml>sometext</replacementxml>"
  } else {
    "${node.name()}"(node.attributes()) {
      mkp.yield node.text()
      node.children().each { child->
 closure(child)
      }
    }
  }
}
println XmlUtil.serialize(
  new StreamingMarkupBuilder().bind { 
    closure(page)
  }
)
A: 

Ok here is what I came up with:

#!/usr/bin/env groovy

import groovy.xml.StreamingMarkupBuilder
import groovy.xml.XmlUtil

def printSlurper={page->
  println XmlUtil.serialize(
    new StreamingMarkupBuilder().bind { bind->
      mkp.yield page
    }
  )
}
def saxParser=new org.cyberneko.html.parsers.SAXParser()
saxParser.setFeature('http://xml.org/sax/features/namespaces',false)
saxParser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment",true)

def string="TEST"
def middleClosureHelper={ builder->
  builder."${string}" {
    mkp.yieldUnescaped "<inner>XML</inner>"
  }
}

def middleClosure={ 
  MiddleClosure {
    middleClosureHelper(delegate)
  }
}

def original=new XmlSlurper(saxParser).parseText("""
<original>
<middle>
</middle>
</original>
""")

original.depthFirst().find { it.name()=='MIDDLE' }.replaceNode { node->
   mkp.yield middleClosure
}

printSlurper(original)

assert original.depthFirst().find { it.name()=='INNER' } == null
def modified=new XmlSlurper(saxParser).parseText(new StreamingMarkupBuilder().bind {mkp.yield original}.toString())
assert modified.depthFirst().find { it.name()=='INNER' } != null

You have to reload the slurper, but it works!

Misha

Misha Koshelev