The problem is that when I run the code below, on a single core, sometimes it runs correctly,and sometimes I get segmentation fault. Probably this problem will occure more frequently on a multi-core machine. I need to know where this non-determinism is introduces in my program and how can I resolve it.thanks.
int numThreads = 4;
class Evaluator;
struct E {
    Evaluator* evaluator;
    int id;
};
class Evaluator {
public:
    pthread_t * threads;
    sem_t* fork_sync;
    sem_t* join_sync;
    int tin;
    pthread_mutex_t tin_mut;
    double * d;
    int sz;
    int cursor;
    pthread_mutex_t c_mut;
    Evaluator(sem_t* fs, sem_t* js) {
        fork_sync = fs;
        join_sync = js;
        threads = new pthread_t[numThreads];
        tin = 0;
        pthread_mutex_init(&tin_mut,NULL);
        for(int i=0 ;i<numThreads; i++) {
            E arg;
            arg.evaluator = this;
            arg.id = i;
            pthread_create(&threads[i],NULL,(void* (*) (void*) )func,(void*)&arg);
        }
        //dummy init
        sz = 20;
        d = new double[sz];
        for(int i=0; i<sz ; i++) d[i] = .5 + i;
        cursor = 0;
        pthread_mutex_init(&c_mut,NULL);
    }
    static void func(E* e) {        
        Evaluator* eval = e -> evaluator;
        eval -> go(e -> id);
    }
    void reset() {
        cursor = 0;
    }
    void go(int id) {
        while(1) {
            sem_wait(fork_sync);
            pthread_mutex_lock(&tin_mut);
            ++tin;
            pthread_mutex_unlock(&tin_mut);
            while(1) {
                int idx;
                pthread_mutex_lock(&c_mut);
                idx = cursor++;
                pthread_mutex_unlock(&c_mut);
                if(idx >= sz ) break;
                // do the evaluation
                cout << "evaluating  index " << idx << " using thread " << id << endl;
            }
            int remain;
            pthread_mutex_lock(&tin_mut);
            remain = --tin;
            pthread_mutex_unlock(&tin_mut);
            if(remain == 0) sem_post(join_sync);
        }
    }
};
int main(int argc, char *argv[]) {
    sem_t fork_sync;
    sem_t join_sync;
    sem_init(&fork_sync,0,0);
    sem_init(&join_sync,0,0);
    Evaluator e(&fork_sync,&join_sync);
    //evaluating t times
    int t = 3;
    for(int i=0; i<t; i++) {
        cout << "---------- evaluation number :" << i << endl;
        e.reset();
        for(int j=0; j<numThreads; j++) sem_post(&fork_sync);
        sem_wait(&join_sync);
        cout << endl;
    }
    return 0;
}