views:

374

answers:

2

Hi! I'm working on an intranet website based on OpenCMS and I'd like to include a tag cloud to the site. I've found some opensource tagcloud java libraries (like OpenCloud). Do you have any experience with wiring those two (or other tagcloud libraries+OpenCMS) together?

+3  A: 

OK so I solved this partly myself in the end. I also used some code from Richard Friedman's tag cloud.

The way I do it is the following: At specified intervals, OpenCMS runs a scheduled job that reads the Lucene index, extracts all the terms from the "keyword" fields (which can be filled in for every file in VFS), generates the tag cloud and stores the result in a file that is part of my OpenCMS template. There are two Java files: Cloud.java and BuildTagCloud.java. "Cloud" reads the index and returns a List of the most common terms. "BuildTagCloud" implements I_CmsScheduledJob interface and is registered as a scheduled job.

BuildTagCloud.java:

package mypackage;

import org.opencms.file.*;
import org.opencms.main.*;
import org.opencms.scheduler.I_CmsScheduledJob;
import java.text.SimpleDateFormat;
import java.util.*;


public class BuildTagCloud implements I_CmsScheduledJob {

  private final String indexaddress = "address/of/your/index/folder"; // something like ../webapps/opencms/WEB-INF/index/nameOfIndex
  private final String tagsFile = "address"; // part of my template; it's where I store the tag cloud
  private final int numTerms = 10; // number of terms in the tag cloud                                                                   


  public String launch(CmsObject object, java.util.Map parameters) throws java.lang.Exception {
      Cloud cloud = new Cloud(indexaddress, numTerms);

      Calendar cal = Calendar.getInstance();
      SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
      String data;
      data = "<div style=\"border-top: 3px solid #000099; padding-top: 6px; margin-top: 17px;\"><span style=\"font-weight: bold; font-size: 11px; color: #000099;\">Tag cloud</span><br />";
      data += sdf.format(cal.getTime()) + "<br />";

      try {
          List<TermInfo> list = cloud.getCloud();

          for(int i = 0; i<list.size(); i++) {
              data += "<br />" + i + ". " + list.get(i).term.text() + " ... " + list.get(i).docFreq; // list.get(i).docFreq
          }

      } catch (Exception e) {

          data += e.getMessage();
          data += "<br />";

      } finally {

          data+="</div>";
      }

      writeAndPublishResource(object, tagsFile, data);

      return "OK";
  }

  private void writeAndPublishResource(CmsObject object, String resouce, String data) throws java.lang.Exception {
      object.loginUser("administrator's user name", "and his password");

      CmsRequestContext cmsContext = object.getRequestContext();
      CmsProject curProject = cmsContext.currentProject();

      if(curProject.isOnlineProject()){
            CmsProject offlineProject = object.readProject("Name of the project");
            cmsContext.setCurrentProject(offlineProject);
      }
      CmsResource res = object.readResource(resouce);
      object.lockResource(resouce);
      CmsFile file = object.readFile(res);
      file.setContents(data.getBytes());
      object.writeFile(file);
      OpenCms.getPublishManager().publishResource(object, resouce);
      object.unlockResource(resouce);  
  }

}

Cloud.java:

package mypackage;

import java.io.*;
import java.util.*;
import org.apache.lucene.index.*;

public class Cloud {

    private String indexaddress;
    private int numTerms;

    private int max;
    private int sum;

    public Cloud(String indexaddress, int numTerms) {
        this.indexaddress = indexaddress;
        this.numTerms = numTerms;
        max = 0;
        sum = 0;
    }

    public List<TermInfo> getCloud() throws Exception {

        TermInfoQueue termQ = new TermInfoQueue(numTerms);

        IndexReader reader = IndexReader.open(new File(indexaddress));
        TermEnum terms = reader.terms();



        int minFreq = 0;
        while (terms.next()) {

            if (!terms.term().field().equals("keywords")) continue;

            if ( terms.docFreq() > minFreq) {
                if (termQ.size() >= numTerms)            // if tiq overfull
                {
                    termQ.pop();                 // remove lowest in tiq
                    termQ.put(new TermInfo(terms.term(), terms.docFreq()));
                    minFreq = ((TermInfo)termQ.top()).docFreq; // reset minFreq
                } else {
                    termQ.put(new TermInfo(terms.term(), terms.docFreq()));
                }
            }
        }

        terms.close();
        reader.close();

        ArrayList<TermInfo> res = new ArrayList<TermInfo>( termQ.size() );
        while ( termQ.size() > 0 ) {
            TermInfo ti = (TermInfo)termQ.pop();
            max = Math.max( max, ti.docFreq );
            sum += ti.docFreq;
            res.add( ti );
        }

        // Shuffles the results up, since a sorted cloud would be predictiable.
        //Collections.shuffle( res );

        return res;
      }

      public int getMaxFrequency() {
          return max;
      }
}

class TermInfo {

    TermInfo(Term t, int df) {
        term = t;
        docFreq = df;
    }

    public int docFreq;
    public Term term;
}

class TermInfoQueue extends org.apache.lucene.util.PriorityQueue {

    TermInfoQueue(int size) {
        initialize(size);
    }

    protected final boolean lessThan(Object a, Object b) {
        TermInfo termInfoA = (TermInfo)a;
        TermInfo termInfoB = (TermInfo)b;
        return termInfoA.docFreq < termInfoB.docFreq;
    }
}

Hope this can help someone as I spent an awful lot of time figuring it out!

John Manak
A: 

I understand what type of information I set on tagsFile. On element name of my template?

Luigi 1982