tags:

views:

87

answers:

1

I am trying to do merge sort using void*. If I keep numbers I want to sort to 1-byte it works perfectly. However if the number is located in more than 1 byte it doesn't work well. I believe it takes it as 2 numbers. I tested it with number 500 and at the end I have 256 and 244 not sorted ....

#include "msort.h"
#include <iostream>

using namespace std;

void Mergesort( void* base, size_t nelem, size_t width,
                    int (*fcmp)( const void*, const void* ) )
{
    if (nelem <=1)
        return;

    int lhsnelem = nelem/2; 
    int rhsnelem = nelem - lhsnelem;
    char* midpoint = (char*)(base) + ((nelem/2)*width);

    cout << "width is: " << width << endl;

    Mergesort(base, lhsnelem, width, fcmp);
    Mergesort(midpoint, rhsnelem, width, fcmp);

    //ItemType* temp= new ItemType[nelem];
    int temp[20];
    char* lhs = (char*)(base);
    char* rhs = midpoint;
    char* end = rhs + (rhsnelem*width);        
    int count = 0;

    while ((lhs != midpoint) && (rhs != end))
    {
         int num = fcmp( lhs, rhs ); 
         if (num <= 0)
         {
             temp[count] = *lhs;
             lhs = lhs + width;
         }
         else
         {
             temp[count] = *rhs; 
             rhs = rhs + width;
         }
         count++; 
    }

    if (lhs == midpoint)
    {
        while (rhs != end)
        {
            temp[count] = *rhs;
            rhs = rhs + width;
            count++;
        }
    }
    else
    {

        while (lhs != midpoint) 
        {
            temp[count] = *lhs; 
            lhs = lhs + width;
            count++;
        }

    }

    for (int i=0; i<nelem; i++)
    {
        lhs = (char*)(base)+ (width*i);
        *lhs = temp[i];
        lhs = lhs + width;
    }
}

main.cpp

/////// main.cpp ///////////////////////
// here is my comparison function in main.cpp

int compare(const void *one, const void *two)
{
    if (*(int*)one > *(int*)two) return 1;
    if (*(int*)one < *(int*)two) return -1;
    return 0;
}

The output for 1-byte numbers is OK:

In:    5 8 7 4 1 6 11 41 160 47 38 120 40 58 12 43 66 98 17 140
Out:   1 4 5 6 7 8 11 12 17 38 40 41 43 47 58 66 98 120 140 160

The output if there is a 2-byte number is not OK the sorting doesn't work well:

In:    500 50 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Out:   256 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 50 244
+1  A: 

Your immediate problem is that you are using char *lhs to copy the integers around. You need to convert the pointer back to an int * before doing the copying.

#include <iostream>

using namespace std;

static void Mergesort(void *base, size_t nelem, size_t width,
                      int (*fcmp)(const void *, const void *))
{
    if (nelem <= 1)
        return;

    int lhsnelem = nelem/2; 
    int rhsnelem = nelem - lhsnelem;
    char* midpoint = (char*)(base) + ((nelem/2)*width);

    Mergesort(base, lhsnelem, width, fcmp);
    Mergesort(midpoint, rhsnelem, width, fcmp);

    int temp[20];
    char* lhs = (char*)(base);
    char* rhs = midpoint;
    char* end = rhs + (rhsnelem*width);        
    int count = 0;

    while ((lhs != midpoint) && (rhs != end))
    {
        int num = fcmp(lhs, rhs); 
        if (num <= 0)
        {
            temp[count] = *(int *)lhs;          // Here!
            lhs += width;
        }
        else
        {
            temp[count] = *(int *)rhs;          // Here!
            rhs += width;
        }
        count++; 
    }


    while (rhs != end)
    {
        temp[count] = *(int *)rhs;          // Here!
        rhs = rhs + width;
        count++;
    }
    while (lhs != midpoint) 
    {
        temp[count] = *(int *)lhs;          // Here!
        lhs = lhs + width;
        count++;
    }

    for (int i = 0; i < nelem; i++)
    {
        lhs = (char *)(base)+ (width*i);
        *(int *)lhs = temp[i];                  // Here!
        lhs = lhs + width;
    }
}

Test code

static int compare(const void *one, const void *two)
{
    if (*(int*)one > *(int*)two) return 1;
    if (*(int*)one < *(int*)two) return -1;
    return 0;
}

#define DIM(x) (sizeof(x)/sizeof(*(x)))

int array1[] = { 5, 8, 7, 4, 1, 6, 11, 41, 160, 47, 38, 120,
                 40, 58, 12, 43, 66, 98, 17, 140 };
int array2[] = { 500, 50, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 0, 0, 0, 0, 0 };

static void PrintArray(int *array, size_t n_items)
{
    const char *pad = "";
    for (size_t i = 0; i < n_items; i++)
    {
        cout << pad << array[i];
        pad = ", ";
    }
    cout << endl;
}

int main()
{
    PrintArray(array1, DIM(array1));
    Mergesort(array1, DIM(array1), sizeof(array1[0]), compare);
    PrintArray(array1, DIM(array1));

    PrintArray(array2, DIM(array2));
    Mergesort(array2, DIM(array2), sizeof(array2[0]), compare);
    PrintArray(array2, DIM(array2));

    return 0;
}

You are unlucky to be using a little-endian (Intel) machine; had you been using a big-endian machine (SPARC, PPC), you would have gotten an array of zeroes for the small number test case.

There is a more serious, more deep-seated problem too. The code is actually only usable to sort arrays of up to 20 integers, because of the int temp[20]; (which limits the size to 20 and which restricts the type to int) and because of the 'fixed' assignments.

The assignments should be replaced with memory moves (probably calls to memmove()) which in turn means that the code is only going to work for POD (plain ol' data) types. The temp array would need to be allocated as an array of bytes of the appropriate size (nelem * width). And the memory allocation is nasty; it will slow the code down. The 'maybe good news' is that the final loop which copies the temp array over the original array can be replaced by a memmove().

It is not clear that this is good C++ code - I think a templated implementation would be more sensible. As C code, with the memory move issues sorted out, it is OK.

General code

static void Mergesort(void *base, size_t nelem, size_t width,
                      int (*fcmp)(const void *, const void *))
{
    if (nelem <= 1)
        return;

    int lhsnelem = nelem/2; 
    int rhsnelem = nelem - lhsnelem;
    char* midpoint = (char*)(base) + ((nelem/2)*width);

    Mergesort(base, lhsnelem, width, fcmp);
    Mergesort(midpoint, rhsnelem, width, fcmp);

    char *temp = new char[nelem * width];
    char* lhs = (char*)(base);
    char* rhs = midpoint;
    char* end = rhs + (rhsnelem*width);        
    int count = 0;

    while ((lhs != midpoint) && (rhs != end))
    {
        int num = fcmp(lhs, rhs); 
        if (num <= 0)
        {
            memmove(&temp[count], lhs, width);
            lhs += width;
        }
        else
        {
            memmove(&temp[count], rhs, width);
            rhs += width;
        }
        count += width;
    }

    while (rhs != end)
    {
        memmove(&temp[count], rhs, width);
        rhs += width;
        count += width;
    }
    while (lhs != midpoint) 
    {
        memmove(&temp[count], lhs, width);
        lhs += width;
        count += width;
    }

    memmove(base, temp, nelem * width);
    delete[] temp;
}
Jonathan Leffler
first thanks that way it works with my int array :) However, I am confused. I did it with char because I want to be able to sort any data from main.cpp ... int, float, char .. etc. THat's why I used void* and worked with the data byte by byte. my sort function should be able to sort any data just like the qsort.
@user478984: Yes, you wanted to achieve that, and the generalized solution shown works fine - I changed the PrintArray function so its signature is `template <class T> static void PrintArray(T *array, size_t n_items)`, converted the comparison function into a templated, changed the element type of array2 to `double` and added a few entries to it, and that sorted fine too.
Jonathan Leffler
Thanks Jonathan I believe my code works well now. However, it is about 8 times slower than integrated qsort :)
@user478984: the speed differential does not surprise me in the slightest - having to copy the array at the end penalizes the code enormously, not to mention the intermediate copying.
Jonathan Leffler