Here's an example of a program, where coroutines really help to simplify the algorithm - imho its hardly possible to implement otherwise. I also tried to choose a useful task for the demo - this utility converts a binary file to a sequence of A-Z symbols (and back), without any significant redundancy, and it has an ability to work with any specified alphabet (see M.Init line). Basically its something like generalized base64. The code is tested and worked with MSC,IntelC and gcc/mingw.
Update: The algorithm is based on precise arithmetic coding, so its not a one-liner by default. It may be possible to cut it in half by using putc/getc file i/o (thus only a modified rangecoder class and do_process() would remain), but then it would be very limited (eg. won't be applicable to decode a memory block or network stream). Actually coroutines are used as a speed optimization here, and its the point of this post. Unfortunately I don't have any simpler application to properly demonstrate this - I could write a context modelling compressor instead, but that would be like 100 lines more.
Questions:
1) How to replace INCLUDE_PROCESS_TEMPLATE macro with proper C++ code?
2) Is there a way to implement this without coroutines?
(but still with in-memory encoding and buffered file i/o)
3) Any fixes/improvements?
#include <io.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <setjmp.h>
#define NOINLINE __declspec(noinline)
typedef unsigned int uint;
typedef unsigned char byte;
typedef unsigned long long int qword;
enum{ STKPAD=1<<16 };
struct coroutine {
volatile int state;
volatile byte* inpptr;
volatile byte* inpbeg;
volatile byte* inpend;
volatile byte* outptr;
volatile byte* outbeg;
volatile byte* outend;
jmp_buf PointA, PointB;
void yield( int value ) { if( setjmp(PointB)==0 ) { state=value; longjmp(PointA,value); } }
void chkinp( void ) { if( inpptr>=inpend ) yield(1), inpptr=*&inpptr; }
void chkout( void ) { if( outptr>=outend ) yield(2); }
template <int f> byte get( void ) { if( f ) chkinp(); return *inpptr++; }
template <int f> void put( uint c ) { *outptr++ = c; if( f ) chkout(); }
void coro_init( void ) { inpptr=inpbeg=inpend=0; outptr=outbeg=outend=0; state=0; }
void addinp( byte* inp,uint inplen ) { inpbeg=inpptr=inp; inpend=&inp[inplen]; }
void addout( byte* out,uint outlen ) { outbeg=outptr=out; outend=&out[outlen]; }
};
#define INCLUDE_PROCESS_TEMPLATE \
NOINLINE void call_do_process() { byte stktmp[STKPAD]; state=ptrdiff_t(stktmp); do_process(); } \
int coro_process( void ) { if( setjmp(PointA)==0 ) if( state ) longjmp(PointB,3); else call_do_process(); return state; }
struct Rangecoder_SH1x : coroutine {
enum { SCALElog=15, SCALE=1<<SCALElog };
enum { NUM=4, sTOP=0x01000000U, Thres=0xFF000000U };
uint f_decode; // 0=encode, 1=decode;
uint range, Cache, FFNum;
union {
struct { uint low; uint Carry; };
qword lowc;
uint code;
};
uint rc_BProcess( uint freq, uint bit ) {
uint rnew = (range>>SCALElog)*freq;
if( f_decode ) bit = (code>=rnew);
range = ((range-rnew-rnew)&(-bit)) + rnew;
rnew &= -bit;
if( f_decode ) code-=rnew; else lowc+=rnew;
if( f_decode ) while( range<sTOP ) range<<=8, (code<<=8)+=get<1>();
else while( range<sTOP ) range<<=8, ShiftLow();
return bit;
}
void ShiftLow( void ) {
if( low<Thres || Carry ) {
put<1>( Cache+Carry );
for(; FFNum!=0; FFNum-- ) put<1>( Carry-1 );
Cache=low>>24; Carry=0;
} else FFNum++;
low<<=8;
}
void rc_Init( int DECODE ) {
f_decode=DECODE; range=-1; lowc=FFNum=Cache=0;
if( f_decode ) for(int _=0; _<NUM+0; _++) (code<<=8)+=get<1>();
}
};
struct Model : Rangecoder_SH1x {
uint DECODE, f_quit;
enum{ lCNUM=8, CNUM=1<<lCNUM, ROWSIZE=80 };
uint count[2*CNUM];
enum{ inpbufsize=1<<16, outbufsize=1<<16 };
byte inpbuf[inpbufsize], outbuf[outbufsize];
void Init( const char* s ) {
uint i,j;
uint (&p)[CNUM] = (uint(&)[CNUM])count[CNUM];
for( i=0; i<2*CNUM; i++) count[i]=0;
for( i=0; s[i]; i++ ) p[byte(s[i])]++;
for( j=0; j<lCNUM; j++ ) for( i=(CNUM>>j); i<((CNUM+CNUM)>>j); i++ ) count[i>>1] += count[i];
}
INCLUDE_PROCESS_TEMPLATE
void do_process( void ) {
uint i,j;
rc_Init(1-DECODE);
for( i=0; !f_quit; i++ ) {
uint c=0, ctx=1;
if( DECODE ) do c=get<1>(); while( c==10 );
for( j=lCNUM-1; j!=-1; j-- ) {
uint freq = count[ctx*2+0]*SCALE/(count[ctx*2+0]+count[ctx*2+1]);
ctx += ctx + ((freq==0) ? 1 : (freq==SCALE) ? 0 : rc_BProcess(freq,(c>>j)&1));
}
if( !DECODE ) put<1>(ctx), (((i+1)%ROWSIZE==0)?put<1>(10),0:0);
}
yield(0);
}
void ProcessFile( uint Mode, FILE* f, FILE* g ) {
volatile uint r; volatile qword g_len=0; uint f_len=0;
DECODE=Mode; f_quit=0;
if( DECODE ) addout( (byte*)&g_len, sizeof(f_len)+1 ), r=1;
else f_len=filelength(fileno(f)), addinp( (byte*)&f_len, sizeof(f_len) ),addout(0,0), r=2;
do {
if( r==1 ) {
uint l = fread( inpbuf, 1, inpbufsize, f );
if( l>0 ) {
addinp( inpbuf, l );
} else {
if( inpbeg==inpbuf+1 ) f_quit=1;
memset( inpbuf, 0x80, inpbufsize );
addinp( inpbuf+1, 5 );
}
} else if( r==2 ) {
if( outbeg==outbuf ) fwrite( outbuf, 1, outptr-outbeg, g ); else g_len>>=8;
addout( outbuf, outbufsize );
}
r = coro_process();
} while(r);
fwrite( outbuf, 1,outptr-outbuf, g ); // flush
if( DECODE==0 ) fputc( 10, g ); else fflush(g), chsize( fileno(g), g_len );
}
} M;
int main( int argc, char** argv ) {
if( argc<4 ) return 1;
int DECODE = argv[1][0]=='d';
FILE* f = fopen( argv[2], "rb" ); if( f==0 ) return 2;
FILE* g = fopen( argv[3], "wb" ); if( g==0 ) return 3;
M.Init( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );
M.ProcessFile( DECODE, f, g );
}