Let's assume the following data structur with three numpy arrays (id, parent_id) (parent_id of the root element is -1):
import numpy as np
class MyStructure(object):
def __init__(self):
"""
Default structure for now:
1
/ \
2 3
/ \
4 5
"""
self.ids = np.array([1,2,3,4,5])
self.parent_ids = np.array([-1, 1, 1, 3, 3])
def id_successors(self, idOfInterest):
"""
Return logical index.
"""
return self.parent_ids == idOfInterest
def subtree(self, newRootElement):
"""
Return logical index pointing to elements of the subtree.
"""
init_vector = np.zeros(len(self.ids), bool)
init_vector[np.where(self.ids==newRootElement)[0]] = 1
if sum(self.id_successors(newRootElement))==0:
return init_vector
else:
subtree_vec = init_vector
for sucs in self.ids[self.id_successors(newRootElement)==1]:
subtree_vec += self.subtree(sucs)
return subtree_vec
This get's really slow for many ids (>1000). Is there a faster way to implement that?