I'm trying to find a better way to implement these methods, as over very large sets they take a very long time, any ideas?
import java.util.HashMap;
import java.util.HashSet;
public class Multiset<E> extends HashSet<E> {
private static final long serialVersionUID = -9013417064272046980L;
private HashMap<E, Integer> multiplicities = new HashMap<E, Integer>();
@Override
public boolean add(E element){
if(multiplicities.containsKey(element)){
int x = (int) multiplicities.get(element);
multiplicities.put(element, ++x);
}else{
multiplicities.put(element, 1);
}
return super.add(element);
}
/**
* Adds all of the elements of another multiset to this one.
* This method allows the preservation of multiplicities
* which would not occur using the superclass's addAll().
* @param elements
* @return true if all elements were successfully added
*/
public boolean addAll(Multiset<E> elements) {
boolean flag = false;
for(E element : elements){
for(int i = 0; i < elements.multiplicity(element); i++)
flag = add(element);
}
return flag;
}
/**
* The set-view of a multiset is the ordinary set of all
* elements with multiplicity >= 1.
* @return all elements that have multiplicity >= 1
*/
public Multiset<E> setView(){
Multiset<E> set = new Multiset<E>();
for(E o : multiplicities.keySet()){
set.add(o);
}
return set;
}
/**
* provides a union of two multisets whereby the multiplicity of each
* element is the larger of the two
* @param second
* @return
*/
public Multiset<E> union(Multiset<E> second){
Multiset<E> union = new Multiset<E>();
Multiset<E> join = new Multiset<E>();
join.addAll(this);
join.addAll(second);
for(E o : join){
int i = this.multiplicity(o);
int j = second.multiplicity(o);
i = i > j ? i : j;
for(int c = 0; c < i; c++){
union.add(o);
}
}
return union;
}
/**
* provides an intersection of two multisets whereby
* the multiplicity of each element is the smaller of the two
* @param second
* @return The multiset containing the intersection of two multisets
*/
public Multiset<E> intersect(Multiset<E> second){
Multiset<E> intersection = new Multiset<E>();
for(E o : this.setView()){
if (second.setView().contains(o)) {
int i = this.multiplicity(o);
int j = second.multiplicity(o);
i = i < j ? i : j;
for(int c = 0; c < i; c++){
intersection.add(o);
}
}
}
return intersection;
}
/**
* The Multiplicity is the number of occurrences of an object
* in the multiset
* @param o
* @return number of occurrences of o
*/
public int multiplicity(E o){
return (multiplicities.containsKey(o)) ? multiplicities.get(o) : 0;
}
public int cardinality(){
int card = 0;
for(Integer i : multiplicities.values()){
card += i;
}
return card;
}
/**
* Measures the similarity between two multisets
* @param A
* @param B
* @return the cardinality of the difference of A and B
*/
public int similarityOfMultisets(Multiset<E> second){
Multiset<E> union, intersection;
int difference;
union = union(second);
intersection = intersect(second);
difference = union.cardinality() - intersection.cardinality();
return difference;
}
}
EDIT:
I believe I have found a faster way to calculate the similarityOfMultisets method:
public int similarityOfMultisets(Multiset<E> second){
int c = 0;
for(E elem: this.setView()){
c += Math.min(this.multiplicity(elem), second.multiplicity(elem));
}
Multiset<E> union = this.union(second);
return union.cardinality() - c;
}