Does the POSIX standard allow a named shared memory block to contain a mutex and condition variable?
We've been trying to use a mutex and condition variable to synchronise access to named shared memory by two processes on a LynuxWorks LynxOS-SE system (POSIX-conformant).
One shared memory block is called "/sync"
and contains the mutex and condition variable, the other is "/data"
and contains the actual data we are syncing access to.
We're seeing failures from pthread_cond_signal()
if both processes don't perform the mmap()
calls in exactly the same order, or if one process mmaps in some other piece of shared memory before it mmaps the "/sync"
memory.
This example code is about as short as I can make it:
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/file.h>
#include <stdlib.h>
#include <pthread.h>
#include <errno.h>
#include <iostream>
#include <string>
using namespace std;
static const string shm_name_sync("/sync");
static const string shm_name_data("/data");
struct shared_memory_sync
{
pthread_mutex_t mutex;
pthread_cond_t condition;
};
struct shared_memory_data
{
int a;
int b;
};
//Create 2 shared memory objects
// - sync contains 2 shared synchronisation objects (mutex and condition)
// - data not important
void create()
{
// Create and map 'sync' shared memory
int fd_sync = shm_open(shm_name_sync.c_str(), O_CREAT|O_RDWR, S_IRUSR|S_IWUSR);
ftruncate(fd_sync, sizeof(shared_memory_sync));
void* addr_sync = mmap(0, sizeof(shared_memory_sync), PROT_READ|PROT_WRITE, MAP_SHARED, fd_sync, 0);
shared_memory_sync* p_sync = static_cast<shared_memory_sync*> (addr_sync);
// init the cond and mutex
pthread_condattr_t cond_attr;
pthread_condattr_init(&cond_attr);
pthread_condattr_setpshared(&cond_attr, PTHREAD_PROCESS_SHARED);
pthread_cond_init(&(p_sync->condition), &cond_attr);
pthread_condattr_destroy(&cond_attr);
pthread_mutexattr_t m_attr;
pthread_mutexattr_init(&m_attr);
pthread_mutexattr_setpshared(&m_attr, PTHREAD_PROCESS_SHARED);
pthread_mutex_init(&(p_sync->mutex), &m_attr);
pthread_mutexattr_destroy(&m_attr);
// Create the 'data' shared memory
int fd_data = shm_open(shm_name_data.c_str(), O_CREAT|O_RDWR, S_IRUSR|S_IWUSR);
ftruncate(fd_data, sizeof(shared_memory_data));
void* addr_data = mmap(0, sizeof(shared_memory_data), PROT_READ|PROT_WRITE, MAP_SHARED, fd_data, 0);
shared_memory_data* p_data = static_cast<shared_memory_data*> (addr_data);
// Run the second process while it sleeps here.
sleep(10);
int res = pthread_cond_signal(&(p_sync->condition));
assert(res==0); // <--- !!!THIS ASSERT WILL FAIL ON LYNXOS!!!
munmap(addr_sync, sizeof(shared_memory_sync));
shm_unlink(shm_name_sync.c_str());
munmap(addr_data, sizeof(shared_memory_data));
shm_unlink(shm_name_data.c_str());
}
//Open the same 2 shared memory objects but in reverse order
// - data
// - sync
void open()
{
sleep(2);
int fd_data = shm_open(shm_name_data.c_str(), O_RDWR, S_IRUSR|S_IWUSR);
void* addr_data = mmap(0, sizeof(shared_memory_data), PROT_READ|PROT_WRITE, MAP_SHARED, fd_data, 0);
shared_memory_data* p_data = static_cast<shared_memory_data*> (addr_data);
int fd_sync = shm_open(shm_name_sync.c_str(), O_RDWR, S_IRUSR|S_IWUSR);
void* addr_sync = mmap(0, sizeof(shared_memory_sync), PROT_READ|PROT_WRITE, MAP_SHARED, fd_sync, 0);
shared_memory_sync* p_sync = static_cast<shared_memory_sync*> (addr_sync);
// Wait on the condvar
pthread_mutex_lock(&(p_sync->mutex));
pthread_cond_wait(&(p_sync->condition), &(p_sync->mutex));
pthread_mutex_unlock(&(p_sync->mutex));
munmap(addr_sync, sizeof(shared_memory_sync));
munmap(addr_data, sizeof(shared_memory_data));
}
int main(int argc, char** argv)
{
if(argc>1)
{
open();
}
else
{
create();
}
return (0);
}
Run this program with no args, then another copy with args, and the first one will fail at the assert checking the pthread_cond_signal()
.
But change the order of the open()
function to mmap()
the "/sync
" memory before the "/data"
and it will all work fine.
This seems like a major bug in LynxOS to me, but LynuxWorks claim that using mutex and condition variable within named shared memory in this way is not covered by the POSIX standard, so they are not interested.
Can anyone determine if this code does actually violate POSIX?
Or does anyone have any convincing documentation that it is POSIX compliant?
Edit: we know that PTHREAD_PROCESS_SHARED
is POSIX and is supported by LynxOS. The area of contention is whether mutexes and semaphores can be used within named shared memory (as we have done) or if POSIX only allows them to be used when one process creates and mmaps the shared memory and then forks the second process.