views:

236

answers:

5

I need to sort a doubly-linked list. According to the almighty wikipedia, mergesort is the way to go for that.

The recursive algorithm works reasonably well, but as I'm writing a general-purpose implementation, performance might be an issue.

Porting the iterative version for arrays will kill performance as rescanning the list to divide it into sublists is slow; for anyone interested - here's the code:

static void sort(struct linked_list *list,
    int (*cmp)(const void *, const void *))
{
    size_t slice_size = 1;
    for(; slice_size < list->size; slice_size *= 2)
    {
        struct node *tail = list->first;
        while(tail)
        {
            struct node *head = tail;

            size_t count = slice_size;
            while(tail && count--) // performance killer
                tail = tail->next;

            count = slice_size;
            while(head != tail && tail && count)
            {
                if(cmp(head->data, tail->data) <= 0)
                    head = head->next;
                else
                {
                    struct node *node = tail;
                    tail = tail->next;
                    remove_node(node, list);
                    insert_before(node, list, head);
                    --count;
                }
            }

            while(tail && count--) // performance killer
                tail = tail->next;
        }
    }
}

But there's another iterative version using a stack-based approach:

struct slice
{
    struct node *head;
    size_t size;
};

static void sort(struct linked_list *list,
    int (*cmp)(const void *, const void *))
{
    if(list->size < 2) return;

    struct slice stack[32];
    size_t top = -1;
    struct node *current = list->first;

    for(; current; current = current->next)
    {
        stack[++top] = (struct slice){ current, 1 };
        for(; top && stack[top-1].size <= stack[top].size; --top)
            merge_down(list, cmp, stack + top);
    }

    for(; top; --top)
        merge_down(list, cmp, stack + top);
}

This will push size 1 lists onto the stack and merges down as long as the top list is of greater or equal size than its predecessor.

Unfortunately, there's a bug somewhere as for some input lists, merge_down() will fail a sanity check:

static void merge_down(struct linked_list *list,
    int (*cmp)(const void *, const void *), struct slice *top)
{
    struct node *right = top->head;
    size_t count = top->size;

    --top;

    struct node *left = top->head;
    top->size += count;

{
    // sanity check: count nodes in right list
    int i = count;
    struct node *node = right;
    for(; i--; node = node->next) if(!node)
    {
        puts("too few right nodes");
        exit(0);
    }
}

    // determine merged list's head
    if(cmp(left->data, right->data) <= 0)
    {
        top->head = left;
        left = left->next;
    }
    else
    {
        top->head = right;
        struct node *node = right;
        right = right->next;
        remove_node(node, list);
        insert_before(node, list, left);
        --count;
    }

    while(left != right && count)
    {
        if(cmp(left->data, right->data) <= 0)
            left = left->next;
        else
        {
            struct node *node = right;
            right = right->next;
            remove_node(node, list);
            insert_before(node, list, left);
            --count;
        }
    }
}

The linked list implementation might be relevant as well:

struct node
{
    struct node *prev;
    struct node *next;
    long long data[]; // use `long long` for alignment
};

struct linked_list
{
    struct _list _list; // ignore
    size_t size;
    struct node *first;
    struct node *last;
};

static void insert_before(struct node *node, struct linked_list *list,
    struct node *ref_node)
{
    if(ref_node)
    {
        node->next = ref_node;
        node->prev = ref_node->prev;
        if(ref_node->prev) ref_node->prev->next = node;
        else list->first = node;
        ref_node->prev = node;
    }
    else // empty list
    {
        node->next = NULL;
        node->prev = NULL;
        list->first = node;
        list->last = node;
    }
    ++list->size;
}

static void remove_node(struct node *node, struct linked_list *list)
{
    if(node->prev) node->prev->next = node->next;
    else list->first = node->next;
    if(node->next) node->next->prev = node->prev;
    else list->last = node->prev;
    --list->size;
}

What am I missing here?

A: 

stack-based approach

/* ... */
    struct slice stack[32];
    size_t top = -1;
    struct node *current = list->first;

    for(; current; current = current->next)
    {
        stack[++top] = (struct slice){ current, 1 };
        for(; top && stack[top-1].size <= stack[top].size; --top)
        /*    ^^^    */
            merge_down(list, cmp, stack + top);
    }
/* ... */

top is always going to be 0 the first time through the loop, right?
The merge_down() function is never going to get called. I didn't try the code, but that doesn't look right.


Edit
32 elements for the stack is not enough... When the list has more than 32 elements in order (maybe after a few passes) you write beyond the end of stack.

pmg
afaik it's correct: you can only merge down if there are at leat two list on the stack, ie no merging on the first iteration
Christoph
Hmmm ... right! Makes sense too.
pmg
Check that `top` doesn't go over the limit: `assert (top < (sizeof stack / sizeof stack[0]) - 1);`
pmg
+1  A: 

Do you ever need to copy a node to the end of the list?
What's the insert_before() call then?

insert_before(node, list, NULL);

That would mess up list->first and node->prev.

pmg
I think such a call can't happen - I'll add some debug output to `insert_before()` to make sure...
Christoph
nope, must be something else
Christoph
+1  A: 

I've now run your code and got it to working after I commented out the line indicated below.

static void merge_down(struct linked_list *list,
    int (*cmp)(const void *, const void *), struct slice *top)
{
    struct node *right = top->head;
    size_t count = top->size;

    --top;

    struct node *left = top->head;
    top->size += count; /* possible bug? */
 /* ^^^^^^^^^^^^^^^^^^^ */

Does that work for you too?

pmg
seems to work - but I don't understand why: as I'm merging the top 2 lists and discarding the right one, shouldn't the left list's size be increased by the right one's
Christoph
Christoph
A: 

As kriss asked for it, here's the recursive version (a standard mergesort using the merge function from the other exaples):

static struct node *merge(struct linked_list *list,
    int (*cmp)(const void *, const void *),
    struct node *left, struct node *right, size_t right_count)
{
    struct node *head;
    if(cmp(left->data, right->data) <= 0)
    {
        head = left;
        left = left->next;
    }
    else
    {
        head = right;
        struct node *node = right;
        right = right->next;
        remove_node(node, list);
        insert_before(node, list, left);
        --right_count;
    }

    while(left != right && right_count)
    {
        if(cmp(left->data, right->data) <= 0)
            left = left->next;
        else
        {
            struct node *node = right;
            right = right->next;
            remove_node(node, list);
            insert_before(node, list, left);
            --right_count;
        }
    }

    return head;
}

static struct node *mergesort(struct linked_list *list,
    int (*cmp)(const void *, const void *), struct node *head, size_t size)
{
    if(size < 2) return head;
    size_t left_count = size / 2;
    size_t right_count = size - left_count;

    struct node *tail = head;
    size_t count = left_count;
    while(count--) tail = tail->next;

    return merge(list, cmp,
        mergesort(list, cmp, head, left_count),
        mergesort(list, cmp, tail, right_count),
        right_count);
}

static void sort(struct linked_list *list,
    int (*cmp)(const void *, const void *))
{
    mergesort(list, cmp, list->first, list->size);
}
Christoph
+1  A: 

I found the error myself:

for(; current; current = current->next)
{
    stack[++top] = (struct slice){ current, 1 };
    for(; top && stack[top-1].size <= stack[top].size; --top)
        merge_down(list, cmp, stack + top);
}

Here, the next value of current gets determined after the call to merge_down(), which might move the node around, ie current->next will no longer point to the correct node.

Rearranging fixes the problem:

while(current)
{
    stack[++top] = (struct slice){ current, 1 };
    current = current->next;
    for(; top && stack[top-1].size <= stack[top].size; --top)
        merge_down(list, cmp, stack + top);
}

Thanks to pmg for the effort: I added some votes for that.

Christoph
hm.. I'll have to wait two days befor being able to accept my own answer :(
Christoph
YAY! Glad you found the problem!
pmg