views:

756

answers:

2

I have a UTC date time without the formatting stored in a uint64, ie: 20090520145024798 I need to get the hours, minutes, seconds and milliseconds out of this time. I can do this very easily by converting it to a string and using substring. However, this code needs to be very fast so I would like to avoid string manipulations. Is there faster way, perhaps using bit manipulation to do this? Oh by the way this needs to be done in C++ on Linux.

+4  A: 
uint64 u = 20090520145024798;
unsigned long w = u % 1000000000;
unsigned millisec = w % 1000;
w /= 1000;
unsigned sec = w % 100;
w /= 100;
unsigned min = w % 100;
unsigned hour = w / 100;
unsigned long v = w / 1000000000;
unsigned day = v % 100;
v /= 100;
unsigned month = v % 100;
unsigned year = v / 100;

The reason why this solution switches from uint64 u to unsigned long w (and v) in the middle is that the YYYYMMDD and HHMMSSIII fit to 32 bits, and 32-bit division is faster than 64-bit division on some systems.

pts
I've you're worried about 64bit division, you could avoid all but one (pair) of them. Split at 1E9 to start with, and put the 8-digit date in one int32, and the 9-digit time in the other.
Steve Jessop
Thanks, updated.
pts
+3  A: 

To build upon pts and onebyone's suggestions, here's a benchmark of their approaches using 32-bit and 64-operations, on a core 2 processor:

#include <stdio.h>
#include <sys/time.h>
#include <sys/resource.h>

typedef unsigned long long uint64;

struct outs {
    unsigned millisec, sec, min, hour, day, month, year;
};

void tbreakdown2(uint64 u, struct outs *outp) {
    outp->millisec = u % 1000;
    u /= 1000;
    outp->sec = u % 100;
    u /= 100;
    outp->min = u % 100;
    u /= 100;
    outp->hour = u % 100;
    unsigned long v = u / 100;
    outp->day = v % 100;
    v /= 100;
    outp->month = v % 100;
    outp->year = v / 100;
}


void tbreakdown(uint64 u, struct outs *outp) {
    unsigned int  daypart, timepart; //4000000000
                                  //  YYYYMMDD
                                  //HHMMssssss

    daypart = u / 1000000000ULL;
    timepart = u % 1000000000ULL;

    outp->millisec = timepart % 1000;
    timepart /= 1000;
    outp->sec = timepart % 100;
    timepart /= 100;
    outp->min = timepart % 100;
    timepart /= 100;
    outp->hour = timepart;

    outp->day = daypart % 100;
    daypart /= 100;
    outp->month = daypart % 100;
    daypart /= 100;
    outp->year = daypart;
}

uint64 inval = 20090520145024798ULL;

void printstruct(uint64 u, struct outs *outp) {
    printf("%018llu\n", u);
    printf("%04d-%02d-%02d %02d:%02d:%02d.%04d\n",
      outp->year, outp->month, outp->day,
      outp->hour, outp->min, outp->sec,
      outp->millisec);
}

void print_elapsed(struct timeval *tv_begin, struct timeval *tv_end) {
    unsigned long long mcs_begin, mcs_end, mcs_delta;

    mcs_begin = (unsigned long long)tv_begin->tv_sec * 1000000ULL;
    mcs_begin += tv_begin->tv_usec;
    mcs_end = (unsigned long long)tv_end->tv_sec * 1000000ULL;
    mcs_end += tv_end->tv_usec;

    mcs_delta = mcs_end - mcs_begin;

    printf("Elapsed time: %llu.%llu\n", mcs_delta / 1000000ULL, mcs_delta % 1000000ULL);
}

int main() {
    struct outs out;
    struct outs *outp = &out;
    struct rusage rusage_s;
    struct rusage begin, end;

    __sync_synchronize();
    printf("Testing impl 1:\n");
    tbreakdown(inval, outp);
    printstruct(inval, outp);

    __sync_synchronize();
    getrusage(RUSAGE_SELF, &begin);
    for (int i = 0; i < 100000000; i++) {
     __sync_synchronize();
     tbreakdown(inval, outp);
     __sync_synchronize();
    }
    getrusage(RUSAGE_SELF, &end);
    print_elapsed(&begin.ru_utime, &end.ru_utime);

    printf("Testing impl 2:\n");
    tbreakdown2(inval, outp);
    printstruct(inval, outp);

    __sync_synchronize();
    getrusage(RUSAGE_SELF, &begin);
    for (int i = 0; i < 100000000; i++) {
     __sync_synchronize();
     tbreakdown2(inval, outp);
     __sync_synchronize();
    }
    getrusage(RUSAGE_SELF, &end);
    print_elapsed(&begin.ru_utime, &end.ru_utime);

    return 0;
}

And the output:

=====32-bit=====
Testing impl 1:
020090520145024798
2009-05-20 14:50:24.0798
Elapsed time: 6.840427
Testing impl 2:
020090520145024798
2009-05-20 14:50:24.0798
Elapsed time: 19.921245

=====64-bit=====
Testing impl 1:
020090520145024798
2009-05-20 14:50:24.0798
Elapsed time: 3.152197
Testing impl 2:
020090520145024798
2009-05-20 14:50:24.0798
Elapsed time: 4.200262

As you can see, avoiding excess 64-bit manipulations helps even in native 64-bit mode - but makes a huge difference in 32-bit.

Benchmark was performed under a core2duo T7500 processor at 2.2GHz, and compiled using gcc 4.3.3 on -O3. Those memory barriers you see are to ensure the compiler doesn't try to optimize away the actual operation, while allowing it to inline it if it chooses.

bdonlan