What is the most efficient way of ignoring case, punctuation, and whitespace in strings? These strings should be divided into words instead of characters should ignore the aforementioned details on comparisons, and slices of these word-strings should be as efficient as possible with speed in mind.
I was going to use case and punctuation insensitive strings for the following code, but after seeing how long it would take to evaluate class Slice: def __eq__(self, other): return self.root == other.root, I have decided to work with data = tuple(string.split()) instead. Having strings that are insensitive to case, punctuation, and spacing and that work over words instead of characters was too expensive into the computationally expensive algorithms already expressed in the code below.
class Slice:
    def __init__(self, data, offset, length):
        self.prefix = data[:offset]
        self.root = data[offset:offset+length]
        self.suffix = data[offset+length:]
    def __eq__(self, other):
        return self.root == other.root
    def __len__(self):
        return len(self.root)
################################################################################
class Match:
    def __init__(self, data, key, prefix_tree, suffix_tree):
        self.data = data
        self.key = key
        self.prefix_tree = prefix_tree
        self.suffix_tree = suffix_tree
        self.__value = len(key) + prefix_tree.value() + suffix_tree.value()
    def value(self):
        return self.__value
################################################################################
class Tree(tuple):
    def __new__(cls, nodes):
        tree = super().__new__(cls, nodes)
        tree.__value = max(map(Match.value, tree)) if tree else 0
        return tree
    def value(self):
        return self.__value
    def find(self, value):
        for index, match in enumerate(self):
            if match.value() == value:
                return index
        raise ValueError()
################################################################################
def search(data, key):
    length = 0
    nodes = []
    for d_block in shrink(data, len(key)):
        block_len = len(d_block)
        if length > block_len:
            return Tree(nodes)
        for k_block in slide(key, block_len):
            if d_block == k_block:
                length = block_len
                prefix_tree = search(d_block.prefix, k_block.prefix)
                suffix_tree = search(d_block.suffix, k_block.suffix)
                match = Match(d_block, k_block, prefix_tree, suffix_tree)
                nodes.append(match)
    return Tree(nodes)
def shrink(data, max_len):
    for length in range(min(len(data), max_len), 0, -1):
        for block in slide(data, length):
            yield block
def slide(data, length):
    for offset in range(len(data) - length + 1):
        yield Slice(data, offset, length)
################################################################################
def build_tree(nodes):
    match = nodes[nodes.find(nodes.value())]
    node = match.key
    if match.prefix_tree:
        node.prefix = build_tree(match.prefix_tree)
    if match.suffix_tree:
        node.suffix = build_tree(match.suffix_tree)
    return node
def flatten_tree(node):
    array = [0]
    _flatten(node, array)
    return tuple(array)
def _flatten(node, array):
    if isinstance(node.prefix, Slice):
        _flatten(node.prefix, array)
    else:
        array.append(node.prefix)
    array[0] += 1
    array.append((array[0], node.root))
    if isinstance(node.suffix, Slice):
        _flatten(node.suffix, array)
    else:
        array.append(node.suffix)