views:

624

answers:

4

When importing xml to a DB with Hibernate, is there a way to resolve an attribute consisting of comma-separated values to populate related table(s)?

In this (somewhat obfuscated) example I have an xml file, each row of which represents a Person. The Person has a Hobbies property which contains a comma-separated list of values. The Person-Hobby relationship is many to many. In reality I have gigs of data to process.

When importing each Person to the PEOPLE table, I would like to add each Hobby to the HOBBIES table (ignoring duplicates), then add a mapping to the PEOPLE_HOBBIES table.

I've set up my mapping files with bi-directional associations and Hibernate appears to construct the tables as I'd expect (details below), however I don't see what mechanism I can use for extracting/populating the HOBBIES and PEOPLE_HOBBIES while processing PEOPLE.

All help and/or RTFM references gratefully received.

This is the file I'm processing (people.xml):

<People>
  <Person Id="1" Name="Dave" Hobbies="drinking, walking"/>
  <Person Id="2" Name="Geoff" Hobbies="football, ballet"/>
  <Person Id="3" Name="Anne" Hobbies="walking, karate"/>
  <Person Id="4" Name="Frank" Hobbies="karate, cross-stitch"/>
</People>

The Person.hbm.xml is (omitting xml decl):

<?xml version="1.0"?><!DOCTYPE hibernate-mapping PUBLIC
 "-//Hibernate/Hibernate Mapping DTD 3.0//EN"
"http://hibernate.sourceforge.net/hibernate-mapping-3.0.dtd"&gt;
<hibernate-mapping package="name.seller.rich.hobby">  
  <class name="Person" node="Person" table="PEOPLE">
    <id name="id" node="@Id" column="PEOPLE_ID"/>
    <property name="name" node="@Name" column="NAME" type="string"/>
    <property name="hobbies" node="@Hobbies" column="HOBBIES" type="string"/>
    <set name="hobbiesSet" table="PEOPLE_HOBBIES">
      <key column="PEOPLE_ID"/>
      <many-to-many column="HOBBY" class="Hobby"/>
    </set>
  </class>
</hibernate-mapping>

The Hobby.hbm.xml is:

<hibernate-mapping package="name.seller.rich.hobby">  
  <class name="Hobby" node="Hobby" table="HOBBIES">
    <id name="hobby" column="HOBBY" type="string"/>
    <set name="people" table="PEOPLE_HOBBIES" inverse="true">
      <key column="HOBBY"/>
      <many-to-many column="PEOPLE_ID" class="Person"/>
    </set>
  </class>
</hibernate-mapping>

This is the Person class, in the setHobbies() method I populate the hobbiesSet with Hobby instances:

package name.seller.rich.hobby;

import java.util.HashSet;
import java.util.Set;

public class Person {

    private long id;

    private String name;

    private String hobbies;

    private Set hobbiesSet = new HashSet();

    public String getHobbies() {
        return hobbies;
    }

    public Set getHobbiesSet() {
        if (hobbiesSet == null) {
            hobbiesSet = new HashSet();
        }
        return hobbiesSet;
    }

    public long getId() {
        return id;
    }

    public String getName() {
        return name;
    }

    public void setHobbies(final String hobbies) {
        this.hobbies = hobbies;
    }

    public void setHobbiesSet(final Set hobbiesSet) {
        this.hobbiesSet = hobbiesSet;
    }

    public void setId(final long id) {
        this.id = id;
    }

    public void setName(final String name) {
        this.name = name;
    }
}

This is the code I'm using to process the file:

package name.seller.rich.hobby;

import java.io.File;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
import org.hibernate.EntityMode;
import org.hibernate.HibernateException;
import org.hibernate.Session;
import org.hibernate.SessionFactory;
import org.hibernate.Transaction;
import org.hibernate.cfg.Configuration;
import org.hibernate.tool.hbm2ddl.SchemaExport;

public class DataImporter {

    public static void main(final String[] args) {
        File baseDir = new File("C:\\workspaces\\hobby");
        DataImporter importer = new DataImporter();
        Configuration config = importer.setupDb(baseDir);

        if (config != null) {
            importer.importContents(new File(baseDir, "people.xml"), config);
        }
    }

    private void importContents(final File file, final Configuration config) {
        SessionFactory sessionFactory = config.buildSessionFactory();
        Session session = sessionFactory.openSession();    
        Transaction tx = session.beginTransaction();
        Session dom4jSession = session.getSession(EntityMode.DOM4J);

        SAXReader saxReader = new SAXReader();
        try {
            Document document = saxReader.read(file);

            List list = document.selectNodes("//Person");
            Iterator iter = list.iterator();

            while (iter.hasNext()) {
                Object personObj = iter.next();
                dom4jSession.save(Person.class.getName(), personObj);
            }

            session.flush();
            tx.commit();
            session.close();
        } catch (HibernateException e) {
            e.printStackTrace();
        } catch (DocumentException e) {
            e.printStackTrace();
        }
    }

    private Configuration setupDb(final File baseDir) throws HibernateException {
        Configuration cfg = new Configuration();
        cfg.addFile(new File(baseDir, "name/seller/rich/hobby/Person.hbm.xml"));
        cfg.addFile(new File(baseDir, "name/seller/rich/hobby/Hobby.hbm.xml"));

        SchemaExport export = new SchemaExport(cfg);

        export.setOutputFile("hobbyDB.txt");
        export.execute(false, true, false, false);
        return cfg;
    }
}

This is the resulting content in the PEOPLE table.

PEOPLE_ID           |NAME        |HOBBIES              
-------------------------------------------------------
1                   |Dave        |drinking, walking    
2                   |Geoff       |football, ballet     
3                   |Anne        |walking, karate      
4                   |Frank       |karate, cross-stitch

...and these are the empty HOBBIES and PEOPLE_HOBBIES tables:

HOBBIES:

HOBBY
----------------------

0 rows selected

PEOPLE_HOBBIES:

PEOPLE_ID           |HOBBY
---------------------------------------

0 rows selected
A: 

When Hibernate reads the hobbies attribute, it just stores it as text directly into the Person table. It has no way of knowing about hobbiesSet at this point, since the only time that you are populating the set is when the object is read back out of the database again. But since the set was never populated in the database, it doesn't work.

The way you've configured both hobbies and hobbiesSet is confusing, and I wouldn't recommend mixing hobbies and hobbiesSet like that. I strongly suggest you read the XML into the object model yourself, including splitting the hobbies string, and then persist the manually-constructed objects to Hibernate in the normal way, using a hobbies collection.

skaffman
Hibernate 3 has support for XML mapping, so it seems reasonable to me for it to support this kind of processing. I know I can roll my own but that kind of defeats the point of using ORM.The setHobbies() implementation was part of my investigation to see how the mapping works, I take the point that it is not a good way to handle setting the property, but as you say it is not called when processing the XML (didn't know until I tried) so not really relevant. I'll remove the method contents to avoid confusion.
Rich Seller
The XML persistence mode is perfectly fine when you're storing the XML without altering its structure. But you are altering the structure, and so it makes no sense to store it in one structure and to try and retrieve it in another.
skaffman
A: 

I have found a partial solution, and thought it would be worth recording it here. Unfortunately if there are duplicate keys in the list-attribute, you need to do a merge rather than a save on the element, and this is not yet supported for EntityMode.DOM4J. This is the comment from org.hibernate.type.CollectionType.replaceElements():

// TODO: does not work for EntityMode.DOM4J yet!

You can add an ElementHandler to the SAXReader to process each element and dynamically turn the attributes into child elements, this is my implementation:

SAXReader saxReader = new SAXReader();
saxReader.addHandler("/People/Person", new ElementHandler() {

    public void onEnd(final ElementPath elementPath) {
        Element element = elementPath.getCurrent();
        Attribute hobbyAttribute = element.attribute("Hobbies");

        if (hobbyAttribute != null) {
            String hobbies = hobbyAttribute.getValue();
            Element hobbiesList = new DefaultElement("Hobbies");
            element.add(hobbiesList);
            String[] hobbiesArray = hobbies.split(",");

            for (String hobby : hobbiesArray) {
                if (hobby.trim().length() > 0) {
                    Element hobbyElement = new DefaultElement("Hobby");
                    hobbiesList.add(hobbyElement);
                    Element idElement = new DefaultElement("id");
                    hobbyElement.add(idElement);
                    idElement.setText(hobby.trim());
                }
            }
        }
    }

    public void onStart(final ElementPath elementPath) {
        //no-op;
    }
});

And the later loop is modified as follows:

while (iter.hasNext()) {
    Object peopleObj = iter.next();
    dom4jSession.merge(Person.class.getName(), peopleObj);
}

Once I'd updated the mapping files to handle the child elements and renamed the relevant methods in the domain objects it persists the related data (as long as there are no duplicates in the hobbies natch).

Updated Hobby.hbm.xml:

<?xml version="1.0"?><!DOCTYPE hibernate-mapping PUBLIC
 "-//Hibernate/Hibernate Mapping DTD 3.0//EN"
"http://hibernate.sourceforge.net/hibernate-mapping-3.0.dtd"&gt;
<hibernate-mapping package="name.seller.rich.hobby">  
  <class name="Hobby" node="Hobby" table="HOBBIES">
    <!--id name="id" column="HOBBY_ID">
      <generator class="native"/>
    </id-->
    <id name="id" column="HOBBY_ID" type="string"/>
    <set name="people" table="PEOPLE_HOBBIES" inverse="true">
      <key column="HOBBY_ID"/>
      <many-to-many column="PEOPLE_ID" class="Person"/>
    </set>
  </class>
</hibernate-mapping>

Updated Person.hbm.xml:

<hibernate-mapping package="name.seller.rich.hobby">  
  <class name="Person" node="Person" table="PEOPLE">
    <id name="id" node="@Id" column="PEOPLE_ID"/>
    <property name="name" node="@Name" column="NAME" type="string"/>
    <!-- property name="hobbies" node="@Hobbies" column="HOBBIES" type="string"/-->
    <set name="hobbies" node="Hobbies" table="PEOPLE_HOBBIES" cascade="save-update,persist">
    <key column="PEOPLE_ID"/>
    <many-to-many column="HOBBY_ID" class="Hobby"/>
    </set>
  </class>
</hibernate-mapping>
Rich Seller
+1  A: 

You might consider pre-processing your xml into something more suitable. It is generally better to represent lists of things as elements rather than comma separated attribute values.

For example:

<People>
  <Person Id="1" Name="Dave" Hobbies="drinking, walking"/>
  <Person Id="2" Name="Geoff" Hobbies="football, ballet"/>
  <Person Id="3" Name="Anne" Hobbies="walking, karate"/>
  <Person Id="4" Name="Frank" Hobbies="karate, cross-stitch"/>
</People>

Would be better as:

<People>
  <Person Id="1" Name="Dave">
    <Hobbies>
      <Hobby>drinking</Hobby>
      <Hobby>walking</Hobby>
    </Hobbies>
  </Person>

  ...
</People>

You could do this with an XSLT script - see http://stackoverflow.com/questions/584082/xslt-best-way-to-split-and-render-comma-separated-text-as-html for an example.

That should then make it easier to import into Hibernate in the manner you desire.

Richard
My "partial solution" effectively does this in memory. It seems that this is a Hibernate issue, where the support for EntityMode.DOM4J is not yet complete.
Rich Seller
I'll second the "support for EntityMode.DOM4J is not yet complete". We ran into Hibernate JIRA issues HHH-422, HHH-1039, HHH-1265, and HHH-1999 when trying to use it a couple years ago. All of these issues are still open today.
Alan Krueger
A: 

We tried using Hibernate's DOM4J and POJO entity modes in the same application a while ago. Maybe it's matured more by now, but we had nothing but problems with the DOM4J entity mode.

I'd recommend using Hibernate with your POJOs and use something like XStream or raw DOM4J for doing your XML serialization to and from the POJOs.

Alan Krueger