BK-trees do precisely what you want. Here's a good article on implementing them.
And here is a Scala implementation:
class BKTree[T](computeDistance: (T, T) => Int, node: T) {
val subnodes = scala.collection.mutable.HashMap.empty[Int,BKTree[T]]
def query(what: T, distance: Int): List[T] = {
val currentDistance = computeDistance(node, what)
val minDistance = currentDistance - distance
val maxDistance = currentDistance + distance
val elegibleNodes = (
subnodes.keys.toList
filter (key => minDistance to maxDistance contains key)
map subnodes
)
val partialResult = elegibleNodes flatMap (_.query(what, distance))
if (currentDistance <= distance) node :: partialResult else partialResult
}
def insert(what: T): Boolean = if (node == what) false else (
subnodes.get(computeDistance(node, what))
map (_.insert(what))
getOrElse {
subnodes(computeDistance(node, what)) = new BKTree(computeDistance, what)
true
}
)
override def toString = node.toString+"("+subnodes.toString+")"
}
object Test {
def main(args: Array[String]) {
val root = new BKTree(distance, 'A')
root.insert('C')
root.insert('M')
root.insert('Z')
println(findClosest(root, 'D'))
}
def charDistance(a: Char, b: Char) = a - b abs
def findClosest[T](root: BKTree[T], what: T): List[T] = {
var distance = 0
var closest = root.query(what, distance)
while(closest.isEmpty) {
distance += 1
closest = root.query(what, distance)
}
closest
}
}
I'll admit to a certain dirt&uglyness about it, and of being way too clever with the insertion algorithm. Also, it will only work fine for small distance, otherwise you'll search repeatedly the tree. Here's an alternate implementation that does a better job of it:
class BKTree[T](computeDistance: (T, T) => Int, node: T) {
val subnodes = scala.collection.mutable.HashMap.empty[Int,BKTree[T]]
def query(what: T, distance: Int): List[T] = {
val currentDistance = computeDistance(node, what)
val minDistance = currentDistance - distance
val maxDistance = currentDistance + distance
val elegibleNodes = (
subnodes.keys.toList
filter (key => minDistance to maxDistance contains key)
map subnodes
)
val partialResult = elegibleNodes flatMap (_.query(what, distance))
if (currentDistance <= distance) node :: partialResult else partialResult
}
private def find(what: T, bestDistance: Int): (Int,List[T]) = {
val currentDistance = computeDistance(node, what)
val presentSolution = if (currentDistance <= bestDistance) List(node) else Nil
val best = currentDistance min bestDistance
subnodes.keys.foldLeft((best, presentSolution))(
(acc, key) => {
val (currentBest, currentSolution) = acc
val (possibleBest, possibleSolution) =
if (key <= currentDistance + currentBest)
subnodes(key).find(what, currentBest)
else
(0, Nil)
(possibleBest, possibleSolution) match {
case (_, Nil) => acc
case (better, solution) if better < currentBest => (better, solution)
case (_, solution) => (currentBest, currentSolution ::: solution)
}
}
)
}
def findClosest(what: T): List[T] = find(what, computeDistance(node, what))._2
def insert(what: T): Boolean = if (node == what) false else (
subnodes.get(computeDistance(node, what))
map (_.insert(what))
getOrElse {
subnodes(computeDistance(node, what)) = new BKTree(computeDistance, what)
true
}
)
override def toString = node.toString+"("+subnodes.toString+")"
}
object Test {
def main(args: Array[String]) {
val root = new BKTree(distance, 'A')
root.insert('C')
root.insert('E')
root.insert('M')
root.insert('Z')
println(root.findClosest('D'))
}
def charDistance(a: Char, b: Char) = a - b abs
}