Here's the algorithm. However, I still don't know what it's called.
#!/usr/bin/env python
"""
This module declares Histogram, a class that maintains a histogram.
It can insert and remove values from a predetermined range, and return order
statistics. Insert, remove, and query are logarithmic time in the size of the
range. The space requirement is linear in the size of the range.
"""
import numpy as np
class Histogram:
def __init__(self, size):
"""Create the data structure that holds elements in the range
[0, size)."""
self.__data = np.zeros(size, np.int32)
self.total = 0
def size(self):
return self.__data.shape[0]
def __find(self, o, a, b):
if b == a + 1:
return a
mid = (b - a) / 2 + a
if o > self.__data[mid]:
return self.__find(o - self.__data[mid], mid, b)
return self.__find(o, a, mid)
def find(self, o):
"""Return the o'th smallest element in the data structure. Takes
O(log(size)) time."""
return self.__find(o + 1, 0, self.size())
def __alter(self, x, a, b, delta):
if b == a + 1:
self.total += delta
return
mid = (b - a) / 2 + a
if x >= mid:
self.__alter(x, mid, b, delta)
else:
self.__data[mid] += delta
self.__alter(x, a, mid, delta)
def insert(self, x):
"""Inserts element x into the data structure in O(log(size)) time."""
assert(0 <= x < self.size())
self.__alter(x, 0, self.size(), +1)
def remove(self, x):
"""Removes element x from the data structure in O(log(size)) time."""
assert(0 <= x < self.size())
self.__alter(x, 0, self.size(), -1)
def display(self):
print self.__data
def histogram_test():
size = 100
total = 100
h = Histogram(size)
data = np.random.random_integers(0, size - 1, total)
for x in data:
h.insert(x)
data.sort()
for i in range(total):
assert(h.find(i) == data[i])
assert(h.find(total + 1) == size - 1)
h.display()