views:

631

answers:

4

UPDATE: Hey guys thanks for the replies. Last night and tonight I tried a few different approaches and came up with one similar to the one laid out below by Jeff (I had even already done what he suggested in his update, and put together my own simple LL implementation for additional gains). Here is the code, at this point it doesn't look particularily clean anymore, but I have been over this numerous times changing anything I could to beef up performance.

public class NewLRU2<K, V> where V : class
{
    int m_iMaxItems;
    Dictionary<K, LRUNode<K, V>> m_oMainDict;

    private LRUNode<K,V> m_oHead;
    private LRUNode<K,V> m_oTail;
    private LRUNode<K,V> m_oCurrent;

    public NewLRU2(int iSize)
    {
        m_iMaxItems = iSize;
        m_oMainDict = new Dictionary<K, LRUNode<K,V>>();

        m_oHead = null;
        m_oTail = null;
    }

    public V this[K key]
    {
        get
        {
            m_oCurrent = m_oMainDict[key];

            if (m_oCurrent == m_oHead)
            {
                //do nothing
            }
            else if (m_oCurrent == m_oTail)
            {
                m_oTail = m_oCurrent.Next;
                m_oTail.Prev = null;

                m_oHead.Next = m_oCurrent;
                m_oCurrent.Prev = m_oHead;
                m_oCurrent.Next = null;
                m_oHead = m_oCurrent;
            }
            else
            {
                m_oCurrent.Prev.Next = m_oCurrent.Next;
                m_oCurrent.Next.Prev = m_oCurrent.Prev;

                m_oHead.Next = m_oCurrent;
                m_oCurrent.Prev = m_oHead;
                m_oCurrent.Next = null;
                m_oHead = m_oCurrent;
            }

            return m_oCurrent.Value;
        }
    }

    public void Add(K key, V value)
    {
        if (m_oMainDict.Count >= m_iMaxItems)
        {   
            //remove old
            m_oMainDict.Remove(m_oTail.Key);

            //reuse old
            LRUNode<K, V> oNewNode = m_oTail;
            oNewNode.Key = key;
            oNewNode.Value = value;

            m_oTail = m_oTail.Next;
            m_oTail.Prev = null;

            //add new
            m_oHead.Next = oNewNode;
            oNewNode.Prev = m_oHead;
            oNewNode.Next = null;
            m_oHead = oNewNode;
            m_oMainDict.Add(key, oNewNode);
        }
        else
        {
            LRUNode<K, V> oNewNode = new LRUNode<K, V>(key, value);
            if (m_oHead == null)
            {
                m_oHead = oNewNode;
                m_oTail = oNewNode;
            }
            else
            {
                m_oHead.Next = oNewNode;
                oNewNode.Prev = m_oHead;
                m_oHead = oNewNode;
            }
            m_oMainDict.Add(key, oNewNode);
        }
    }

    public bool Contains(K key)
    {
        return m_oMainDict.ContainsKey(key);
    }
}


internal class LRUNode<K,V>
{
    public LRUNode(K key, V val)
    {
        Key = key;
        Value = val;
    }

    public K Key;
    public V Value;
    public LRUNode<K, V> Next;
    public LRUNode<K, V> Prev;
}

There are a few parts that look/feel wonky -- like reusing the old node when doing an add -- but I was able to get an appreciable boost in porformance out of them. I was also slightly surprised at the difference it made to switch from actual properties on the node to just public variables, but I guess that's how it goes with this stuff. At this point the code above is almost entirely performance-limited by the dictionary operations, so I'm not sure I'd get a lot more out of mashing it around. I'll continue to think on it and look into some of the responses.

Explanation From Original Post: Hello all. So I've written a simple lightweight LRU implementation for use in a compression library (I'm using it to find matching byte-strings in the input based on a hash, LZW-style), and I'm looking for ways to make it faster.

+2  A: 

UPDATE #2

This reduces the need for list traversal on a linked list Remove. It introduces a LruCacheNode that has both the key and the value. The key is only used when you trim the cache. You could get better performance if you wrote your own linked list implementation where each node essentially is an LruCacheNode along with a Next and Back reference. This is sort of what a LinkedHashMap does (see these two questions).

public class LruCache<K, V>
{
    private readonly int m_iMaxItems;
    private readonly Dictionary<K, LinkedListNode<LruCacheNode<K, V>>> m_oMainDict;
    private readonly LinkedList<LruCacheNode<K, V>> m_oMainList;

    public LruCache(int iSize)
    {
        m_iMaxItems = iSize;
        m_oMainDict = new Dictionary<K, LinkedListNode<LruCacheNode<K, V>>>();
        m_oMainList = new LinkedList<LruCacheNode<K, V>>();
    }

    public V this[K key]
    {
        get
        {
            return BumpToFront(key).Value;
        }
        set
        {
            BumpToFront(key).Value = value;
        }
    }

    public void Add(K key, V value)
    {
        LinkedListNode<LruCacheNode<K, V>> newNode = m_oMainList.AddFirst(new LruCacheNode<K, V>(key, value));
        m_oMainDict.Add(key, newNode);

        if (m_oMainList.Count > m_iMaxItems)
        {
            m_oMainDict.Remove(m_oMainList.Last.Value.Key);
            m_oMainList.RemoveLast();
        }
    }

    private LruCacheNode<K, V> BumpToFront(K key)
    {
        LinkedListNode<LruCacheNode<K, V>> node = m_oMainDict[key];
        if (m_oMainList.First != node)
        {
            m_oMainList.Remove(node);
            m_oMainList.AddFirst(node);
        }
        return node.Value;
    }

    public bool Contains(K key)
    {
        return m_oMainDict.ContainsKey(key);
    }
}

internal sealed class LruCacheNode<K, V>
{
    private readonly K m_Key;
    private V m_Value;

    public LruCacheNode(K key, V value)
    {
        m_Key = key;
        m_Value = value;
    }

    public K Key
    {
        get { return m_Key; }
    }

    public V Value
    {
        get { return m_Value; }
        set { m_Value = value; }
    }
}

You'll have to profile things to see if this is an improvement in your environment.

Minor Update: I updated BumpToFront to check to see if the node is already at the front per comment from Tim Stewart.

Jeff Moser
I tried this code out, and unfortunately it has demolished the performance. All other operations are now dwarfed by Contains which now uses 96% of all execution time -- I would expect due to traversing the entire list on every lookup.
David Hay
Try it again, I updated it to use a HashSet to optimize the .Contains code. If you can't use HashSet<T> because you're working before 3.5, you can replace it with a Dictionary<K,bool>
Jeff Moser
Very nice. @Jeff may I suggest you use Dict<K,K> in order to enjoy better JIT? That suggestion was passed onto me so you can take advantage of probably already JIT'd Dict<ref,ref> rather than Dict<ref,bool>.
sixlettervariables
Actually, after doing some runtime analysis it looks like yours is slower D: Definitely looks nicer though.
sixlettervariables
What type of access patterns did you test?
Jeff Moser
@Jeff: hit rates from 0-100% at 3 different cache sizes for 5 different sized working sets. Your update now outperforms both.
sixlettervariables
Nice implementation! BumpToFront() could be optimized by checking to see if the node it's bumping is already at the front and, if so, there's no need to bump.
Tim Stewart
@Tim Stewart: Fair point. I updated the code. Thanks!
Jeff Moser
A: 

With hardware caches, instead of having say 128 elements, and maintaining the order of items 1-128, you might have it as 32 x 4, so 32 rows of 4 elements each. The first 5 bits of an address would determine which of the 32 rows that address would map to, then you would search only the 4 items, and if not found replace the oldest of the 4.

This is much faster, and is IIRC within 10% of the hit rate of a 1 x 128 cache.

To translate, you would instead of one linked list, have multiple ones, so traversing them was much faster. You would have to have a way of determining which list a particular item mapped to.

The point being, as your list grows in size, you get diminishing returns from trying to maintain with perfect accuracy the exact order of each element in the list. You might even be better off with an unordered list, and randomly replacing any element when you have a cache miss. Depends on the size of your list, and the penalty for a miss vs the cost of maintaining the list.

MikeW
+1  A: 

Isn't the point of a LRU cache to allow you to trim the cache and throw out the least-recently-used stuff? :-) I don't see any code to trim the cache. Since you most likely want high performance for the retrieve use-case, and the trim use-case is less important why not offload the list maintenance to the trim process?

IOW, just throw the entries into the cache, but timestamp them on retrieval. Don't reorder the entries, just mark them whenever they're used. Could be a true DateTime timestamp, or could be a simple counter in the class, highest number was most recently used. Then in the trim process just walk the whole tree and remove the entries with the oldest stamps.

WaldenL
+2  A: 

After considering Jeff's point, here is the solution I came up with, which should be much faster. To recap, your code was slow at list.Remove(key) operations since it needed to iterate over each item in the list. Instead we are here keeping KeyValuePair objects in the list and have the dictionary point to "LinkedListNode"s. This way we can find list item from dictionary entry as fast as we can find dictionary entry from the list item.

I didn't test it but idea should be solid. Sorry for not adhering to the exact interface of yours :)

// not thread-safe
class LruCache<K, V>
{
    Dictionary<K, LinkedListNode<KeyValuePair<K, V>>> dict;
    LinkedList<KeyValuePair<K, V>> list;
    int internalSize;

    public LruCache(int size)
    {
        internalSize = size;
        Debug.Assert(size > 1); // 1 wouldn't work right
        dict = new Dictionary<K, LinkedListNode<KeyValuePair<K, V>>>(size);
        list = new LinkedList<KeyValuePair<K, V>>();
    }

    public V this[K key] {
        get
        {
            var node = dict[key];
            if (node != list.First)
            {
                list.Remove(node);
                list.AddFirst(node);
            }
            return node.Value.Value;
        }
        set
        {
            if (!dict.ContainsKey(key))
            {
                // open space for old item if cap reached
                if (dict.Count == internalSize)
                {
                    var oldNode = list.Last;
                    list.Remove(oldNode);           
                    dict.Remove(oldNode.Value.Key); 
                }
                // create new pair 
                var pair = new KeyValuePair<K, V>(key, value);
                // and wrap with a linkedlistnode object
                var node = new LinkedListNode<KeyValuePair<K, V>>(pair);
                dict.Add(key, node);
                list.AddFirst(node);
            }
            else
            {
                var node = dict[key];
                // set value
                node.Value = new KeyValuePair<K,V>(key, value);
                // just refresh the newly set key
                if (node != list.First)
                {
                    list.Remove(node);      
                    list.AddFirst(node);    
                }
            }
        }
    }
}
ssg