views:

162

answers:

2

Given a java.util.Collection what is the easiest way to create an endless java.util.Iterator which returns those elements such that they show up according to a given distribution (org.apache.commons.math.distribution)?

+4  A: 
List<Object> l = new ArrayList<Object>(coll);
Iterator<Object> i = new Iterator<Object>() {
    public boolean hasNext() { return true; }

    public Object next() {
        return coll.get(distribution.nextInt(0, l.size());
    }
}

Your problem is then how to convert the Distribution classes in the apache library to implement the nextInt method. I have to say that it is far from obvious to me how you can actually do this from the Distribution interface.

One (slightly rubbish) way I can think of is to generate an EmpiricalDistribution (in the random package) dataset using the probability defined by your actual distribution and then using that emprirical dsitribution as the distribution (above)

oxbow_lakes
Is that an unmatched bracket on line 1? Did you mean to make it an anonymous class or not?
Michael Myers
Oops - cheers for the spot
oxbow_lakes
Hmm, but the mathematical aspect is the hardest part of this problem... so far the question remains practically unanswered.The temporary list should be final, shouldn't it?
Michael Locher
*@Michael* - haha yes, I hoped no-one would notice. But 4 people have upvoted me, the fools!
oxbow_lakes
+1  A: 

Solution for Gaussian distribution

import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.SortedMap;
import java.util.Map.Entry;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.ImmutableSortedMap.Builder;

/**
 * Endless sequence with gaussian distribution.
 * 
 * @param <T> the type of the elements
 * @author Michael Locher
 */
public class GaussianSequence<T> implements Iterable<T>, Iterator<T> {

  private static final int HISTOGRAMM_SAMPLES = 50000;

  private static final int HISTOGRAMM_ELEMENTS = 100;

  private static final int HISTOGRAMM_LENGTH = 80;

  private static final double DEFAULT_CUTOFF = 4.0;

  private final List<T> elements;
  private final int maxIndex;
  private final Random rnd;
  private final double scaling;
  private final double halfCount;

  /**
   * Creates this.
   * @param rnd the source of randomness to use
   * @param elements the elements to deliver
   */
  public GaussianSequence(final Random rnd, final Collection<T> elements) {
    this(rnd, DEFAULT_CUTOFF, elements);
  }

  private GaussianSequence(final Random rnd, final double tailCutOff, final Collection<T> elements) {
    super();
    this.rnd = rnd;
    this.elements = new ArrayList<T>(elements);
    if (this.elements.isEmpty()) {
      throw new IllegalArgumentException("no elements provided");
    }
    this.maxIndex = this.elements.size() - 1;
    this.halfCount = this.elements.size() / 2.0;
    this.scaling = this.halfCount / tailCutOff;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public Iterator<T> iterator() {
    return this;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public boolean hasNext() {
    return true;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void remove() {
    throw new UnsupportedOperationException();
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public T next() {
    return this.elements.get(sanitizeIndex(determineNextIndex()));
  }

  private int determineNextIndex() {
    final double z = this.rnd.nextGaussian();
    return (int) (this.halfCount + (this.scaling * z));
  }

  private int sanitizeIndex(final int index) {
    if (index < 0) {
      return 0;
    }
    if (index > this.maxIndex) {
      return this.maxIndex;
    }
    return index;
  }

  /**
   * Prints a histogramm to stdout.
   * @param args not used
   */
  public static void main(final String[] args) {
    final PrintWriter out = new PrintWriter(new OutputStreamWriter(System.out, Charset.forName("UTF-8")), true);
    plotHistogramm(new Random(), out);
  }

  private static void plotHistogramm(final Random rnd, final PrintWriter out) {
    // build elements
    final Multimap<Integer, Integer> results = ArrayListMultimap.create();
    final List<Integer> elements = Lists.newArrayListWithCapacity(HISTOGRAMM_ELEMENTS);
    for (int i = 1; i < HISTOGRAMM_ELEMENTS; i++) {
      elements.add(i);
    }
    // sample sequence
    final Iterator<Integer> randomSeq = new GaussianSequence<Integer>(rnd, elements);
    for (int j = 0; j < HISTOGRAMM_SAMPLES; j++) {
      final Integer sampled = randomSeq.next();
      results.put(sampled, sampled);
    }
    // count and sort results
    final Builder<Integer, Integer> r = ImmutableSortedMap.naturalOrder();
    for (final Entry<Integer, Collection<Integer>> e : results.asMap().entrySet()) {
      final int count = e.getValue().size();
      r.put(e.getKey(), count);
    }
    // plot results
    final SortedMap<Integer, Integer> sortedAndCounted = r.build();
    final double histogramScale = (double) HISTOGRAMM_LENGTH / Collections.max(sortedAndCounted.values());
    for (final Entry<Integer, Integer> e : sortedAndCounted.entrySet()) {
      out.format("%3d [%4d]", e.getKey(), e.getValue());
      final StringBuilder c = new StringBuilder();
      final int lineLength = (int) (histogramScale * e.getValue());
      for (int i = 0; i < lineLength; i++) {
        c.append('*');
      }
      out.println(c);
    }
  }

}
Michael Locher