I've got four unsigned 32-bit integers representing an unsigned 128-bit integer, in little endian order:
typedef struct {
    unsigned int part[4];
} bigint_t;
I'd like to convert this number into its decimal string representation and output it to a file.
Right now, I'm using a bigint_divmod10 function to divide the number by 10, keeping track of the remainder.  I call this function repeatedly, outputting the remainder as a digit, until the number is zero.  It's pretty slow.  Is this the fastest way to do it?  If so, is there a clever way to implement this function that I'm not seeing?  I've tried looking at GMP's get_str.c, but I find it pretty impenetrable.
EDIT: here's the fastest code I was able to come up with for the divmod10 function:
static unsigned uint128_divmod10(uint128 *value)
{
    unsigned int a = value->word[3];
    unsigned int b = value->word[2];
    unsigned int c = value->word[1];
    unsigned int d = value->word[0];
    unsigned int diva = a / 5;
    unsigned int divb = b / 5;
    unsigned int divc = c / 5;
    unsigned int divd = d / 5;
    value->word[3] = diva;
    value->word[2] = divb;
    value->word[1] = divc;
    value->word[0] = divd;
    unsigned int moda = a - diva*5;
    unsigned int modb = b - divb*5;
    unsigned int modc = c - divc*5;
    unsigned int modd = d - divd*5;
    unsigned int mod = 0;
    mod += moda;
    unsigned int carryb = mod*858993459;
    mod += modb;
    if (mod >= 5) {
        mod -= 5;
        carryb++;
    }
    unsigned int carryc = mod*858993459;
    mod += modc;
    if (mod >= 5) {
        mod -= 5;
        carryc++;
    }
    unsigned int carryd = mod*858993459;
    mod += modd;
    if (mod >= 5) {
        mod -= 5;
        carryd++;
    }
    uint128_add(value, carryd, 0);
    uint128_add(value, carryc, 1);
    uint128_add(value, carryb, 2);
    if (value->word[0] & 1) {
        mod += 5;
    }
    uint128_shift(value, -1);
    return mod;
}
where the add function is defined as:
static void uint128_add(uint128 *value, unsigned int k, unsigned int pos)
{
    unsigned int a = value->word[pos];
    value->word[pos] += k;
    if (value->word[pos] < a) {
        // overflow
        for (int i=pos+1; i<4; i++) {
            value->word[i]++;
            if (value->word[i]) {
                break;
            }
        }
    }
}