A naive implementation of this will be slow: O(n^2), testing for each node against each other node. Use a tree to speed it up.
This implementation uses a simple quadtree to make searching more efficient. This doesn't make any attempt to balance the tree, so a badly-ordered list of points could make it very inefficient. For a lot of uses, simply shuffling the list is likely to make it good enough; just be sure not to pass it a lot of items sorted by coordinate, since that'll reduce it to a linked list.
The optimization here is simple: if we're looking for items within a Euclidean distance of 3 units of some point, and we know that all items in a subtree are at least 3 units to the right, there's no way any points in that area could possibly be less than 3 units away.
This code is public domain. Try not to turn it in as homework.
#!/usr/bin/python
import math
def euclidean_distance(pos1, pos2):
x = math.pow(pos1[0] - pos2[0], 2)
y = math.pow(pos1[1] - pos2[1], 2)
return math.sqrt(x + y)
class QuadTreeNode(object):
def __init__(self, pos):
"""
Create a QuadTreeNode at the specified position. pos must be an (x, y) tuple.
Children are classified by quadrant.
"""
# Children of this node are ordered TL, TR, BL, BL (origin top-left).
self.children = [None, None, None, None]
self.pos = pos
def classify_node(self, pos):
"""
Return which entry in children can contain pos. If pos is equal to this
node, return None.
>>> node = QuadTreeNode((10, 20))
>>> node.classify_node((10, 20)) == None
True
>>> node.classify_node((2, 2))
0
>>> node.classify_node((50, 2))
1
>>> node.classify_node((2, 50))
2
>>> node.classify_node((50, 50))
3
X boundary condition:
>>> node.classify_node((10, 2))
0
>>> node.classify_node((10, 50))
2
Y boundary conditoin:
>>> node.classify_node((2, 20))
0
>>> node.classify_node((50, 20))
1
"""
if pos == self.pos:
return None
if pos[0] <= self.pos[0]: # Left
if pos[1] <= self.pos[1]: # Top-left
return 0
else: # Bottom-left
return 2
else: # Right
if pos[1] <= self.pos[1]: # Top-right
return 1
else: # Bottom-right
return 3
assert False, "not reached"
def add_node(self, node):
"""
Add a specified point under this node.
"""
type = self.classify_node(node.pos)
if type is None:
# node is equal to self, so this is a duplicate node. Ignore it.
return
if self.children[type] is None:
self.children[type] = node
else:
# We already have a node there; recurse and add it to the child.
self.children[type].add_node(node)
@staticmethod
def CreateQuadTree(data):
"""
Create a quad tree from the specified list of points.
"""
root = QuadTreeNode(data[0])
for val in data[1:]:
node = QuadTreeNode(val)
root.add_node(node)
return root
def distance_from_pos(self, pos):
return euclidean_distance(self.pos, pos)
def __str__(self): return str(self.pos)
def find_point_within_range(self, pos, distance):
"""
If a point exists within the specified Euclidean distance of the specified
point, return it. Otherwise, return None.
"""
if self.distance_from_pos(pos) <= distance:
return self
for axis in range(0, 4):
if self.children[axis] is None:
# We don't have a node on this axis.
continue
# If moving forward on this axis would permanently put us out of range of
# the point, short circuit the search on that axis.
if axis in (0, 2): # axis moves left on X
if self.pos[0] < pos[0] - distance:
continue
if axis in (1, 3): # axis moves right on X
if self.pos[0] > pos[0] + distance:
continue
if axis in (0, 1): # axis moves up on Y
if self.pos[1] < pos[1] - distance:
continue
if axis in (2, 3): # axis moves down on Y
if self.pos[1] > pos[1] + distance:
continue
node = self.children[axis].find_point_within_range(pos, distance)
if node is not None:
return node
return None
@staticmethod
def find_point_in_range_for_all_trees(point, trees, distance):
"""
If all QuadTreeNodes in trees contain a a point within the specified distance
of point, return True, Otherwise, return False.
"""
for tree in trees:
if tree.find_point_within_range(point, distance) is None:
return False
return True
def test_naive(data, distance):
def find_point_in_list(iter, point):
for i in iter:
if euclidean_distance(i, point) <= distance:
return True
return False
def find_point_in_all_lists(point):
for d in data:
if not find_point_in_list(d, point):
return False
return True
results = []
for d in data:
for point in d:
if find_point_in_all_lists(point):
results.append(point)
return set(results)
def test_tree(data, distance):
trees = [QuadTreeNode.CreateQuadTree(d) for d in data]
results = []
for d in data:
for point in d:
if QuadTreeNode.find_point_in_range_for_all_trees(point, trees, 3):
results.append(point)
return set(results)
def test():
sample_data = [
[(10, 20), (100, 120), (0, 5), (50, 60)],
[(13, 20), (300, 400), (100, 120), (51, 62)]
]
result1 = test_naive(sample_data, 3)
result2 = test_tree(sample_data, 3)
print result1
assert result1 == result2
# Loosely validate the tree algorithm against a lot of sample data, and compare
# performance while we're at it:
def random_data():
import random
return [(random.randint(0,1000), random.randint(0,1000)) for d in range(0,500)]
data = [random_data() for x in range(0,10)]
print "Searching (naive)..."
result1 = test_naive(data, 3)
print "Searching (tree)..."
result2 = test_tree(data, 3)
assert result1 == result2
if __name__ == "__main__":
test()
import doctest
doctest.testmod()