views:

133

answers:

4

I'm running into a VERY frustrating pointer issue. I previously posted here: http://stackoverflow.com/questions/3114997/tough-dealing-with-deeply-nested-pointers-in-c

But that post got overly long and is stale, so I chose to repost with more details.

Here is my header file that defines my types:

#include <string>
#include <vector>
#include <sstream>
#include <iostream>

#define USE_3D_GEOM
//#define USE_2D GEOM

#define DEBUG

#ifdef USE_3D_GEOM
 #define DIMENSIONS 3
#elif USE_2D_GEOM
 #define DIMENSIONS 2
#else
 #define DIMENSIONS 1
#endif

#ifndef _COMMON_H
#define _COMMON_H

template<class T>
inline T from_string(const std::string& s)
{
     std::istringstream stream (s);
     T t;
     stream >> t;
     return t;
};

template <class T>
inline std::string to_string (const T& t)
{
std::stringstream ss;
ss << t;
return ss.str();
}

enum e_ensemble_kind 
{
  MICROCANONICAL,
  CANONICAL,
  NVT,
  GRAND_CANONICAL,
  NPT,
  NVE
};

enum e_potential_kind 
{
  HARD_SPHERE,
  SQUARE_WELL,
  LENNARD_JONES
};

enum e_file_types
{
  MC_SIMPLE,
  NAMD,
  GROMACS,
  CHARMM
};

#ifdef USE_3D_GEOM
typedef struct s_coordinates t_coordinates;
#endif

#ifdef USE_2D_GEOM
typedef struct s_coordinates t_coordinates;
#endif

typedef struct s_particle t_particle;

typedef struct s_bond t_bond;

typedef struct s_angle t_angle;


typedef struct s_dihedral t_dihedral;

typedef struct s_molecule t_molecule;

typedef struct s_lj_param t_lj_param;

typedef struct s_bond_param t_bond_param;

typedef struct s_angle_param t_angle_param;

typedef struct s_dih_param t_dih_param;

typedef struct s_lookup_tab t_lookup_tab;

#ifdef USE_3D_GEOM
struct s_coordinates
{
  double x;
  double y;
  double z;

  s_coordinates& operator+(const s_coordinates &to_add)
  {
    x += to_add.x;
    y += to_add.y;
    z += to_add.z;
    return *this;
  }
  s_coordinates& operator-(const s_coordinates &to_subtract)
  {
    x -= to_subtract.x;
    y -= to_subtract.y;
    z -= to_subtract.z;
    return *this;
  }
  s_coordinates& operator=(const s_coordinates &to_assign)
  {
    x = to_assign.x;
    y = to_assign.y;
    z = to_assign.z;
    return *this;
  }
  bool operator==(const s_coordinates &to_assign)
  {

    return x == to_assign.x && y == to_assign.y && z == to_assign.z;
  }
};
#endif

#ifdef USE_2D_GEOM
struct s_coordinates
{
  double x;
  double y;

  s_coordinates& operator+(const s_coordinates &to_add)
  {
    x += to_add.x;
    y += to_add.y;
    return *this;
  }
  s_coordinates& operator-(const s_coordinates &to_subtract)
  {
    x -= to_subtract.x;
    y -= to_subtract.y;
    return *this;
  }
  s_coordinates& operator=(const s_coordinates &to_assign)
  {
    x = to_assign.x;
    y = to_assign.y;
    return *this;
  }
  bool operator==(const s_coordinates &to_assign)
  {

    return x == to_assign.x && y == to_assign.y;
  }
};
#endif

typedef struct s_particle
{
  t_coordinates position;
  double charge;
  double mass;
  std::string name;
  std::vector<t_lj_param>::iterator my_particle_kind_iter;

  s_particle& operator=(const s_particle &to_assign)
  {
    position = to_assign.position;
    charge = to_assign.charge;
    mass = to_assign.mass;
    name = to_assign.name;
    my_particle_kind_iter = to_assign.my_particle_kind_iter;
    return *this;
  }
} t_particle;

struct s_bond
{
  t_particle * particle_1;
  t_particle * particle_2;
  std::vector<t_bond_param>::iterator my_bond_kind_iter;

  s_bond& operator=(const s_bond &to_assign)
  {
    particle_1 = to_assign.particle_1;
    particle_2 = to_assign.particle_2;
    my_bond_kind_iter = to_assign.my_bond_kind_iter;
    return *this;
  }
};

struct s_angle
{
  t_particle * particle_1;
  t_particle * particle_2;
  t_particle * particle_3;
  std::vector<t_angle_param>::iterator my_angle_kind_iter;

  s_angle& operator=(const s_angle &to_assign)
  {
    particle_1 = to_assign.particle_1;
    particle_2 = to_assign.particle_2;
    particle_3 = to_assign.particle_3;
    my_angle_kind_iter = to_assign.my_angle_kind_iter;
    return *this;
  }
};


struct s_dihedral
{
  t_particle * particle_1;
  t_particle * particle_2;
  t_particle * particle_3;
  t_particle * particle_4;
  std::vector<t_dih_param>::iterator my_dih_kind_iter;

  s_dihedral& operator=(const s_dihedral &to_assign)
  {
    particle_1 = to_assign.particle_1;
    particle_2 = to_assign.particle_2;
    particle_3 = to_assign.particle_3;
    particle_4 = to_assign.particle_4;
    my_dih_kind_iter = to_assign.my_dih_kind_iter;
    return *this;
  }
};

struct s_molecule
{
  std::string res_name;
  std::vector<t_particle> my_particles;
  std::vector<t_bond> my_bonds;
  std::vector<t_angle> my_angles;
  std::vector<t_dihedral> my_dihedrals;

  s_molecule& operator=(const s_molecule &to_assign)
  {
    res_name = to_assign.res_name;
    my_particles = to_assign.my_particles;
    my_bonds = to_assign.my_bonds;
    my_angles = to_assign.my_angles;
    my_dihedrals = to_assign.my_dihedrals;
    return *this;
  }
};

struct s_lj_param
{
  double epsilon;
  double sigma;
  std::string atom_kind_name;
};

struct s_bond_param
{
  std::string atom_1;
  std::string atom_2;
  double bond_coeff;
  double default_length;
};

struct s_angle_param
{
  std::string atom_1;
  std::string atom_2; 
  std::string atom_3;
  double angle_coeff;
  double default_angle;
};

struct s_dih_param
{
  std::string atom_1;
  std::string atom_2; 
  std::string atom_3; 
  std::string atom_4;  
  std::vector<double> dih_coeff;
  std::vector<unsigned int> n;
  std::vector<double> delta;
};

struct s_lookup_tab {
  std::string name;
  int code;
};

#endif /*_COMMON_H*/

And here is a call that I make to add a var of type t_molecule (see above header for t_molecule's definition) to an array of molecules.

    void Molecule_Manager_Main::add_molecule(const t_molecule new_molecule)
{
    std::cout << "TYPE :" << new_molecule.res_name << std::endl; 
    std::cout << "3: BOND PARTICLE 1 : "
      << new_molecule.my_bonds[new_molecule.my_bonds.size()-1].particle_1->name
          << std::endl;  
    std::cout << "3: BOND PARTICLE 2 : "
    << new_molecule.my_bonds[new_molecule.my_bonds.size()-1].particle_2->name
          << std::endl; 
    std::cout << "3: BOND ITER CONST : "
    << new_molecule.my_bonds[new_molecule.my_bonds.size()-1].my_bond_kind_iter->bond_coeff
          << " "
    << new_molecule.my_bonds[new_molecule.my_bonds.size()-1].my_bond_kind_iter->default_length
          << std::endl;
    my_molecules.push_back(new_molecule);
    std::cout << "99: INDEX : " << my_molecules.size()-1 << std::endl;
    std::cout << "TYPE :" << my_molecules[my_molecules.size()-1].res_name << std::endl; 
    std::cout << "4: BOND PARTICLE 1 : "
          << my_molecules[my_molecules.size()-1].my_bonds[my_molecules[my_molecules.size()-1].my_bonds.size()-1].particle_1->name
          << std::endl;  
    std::cout << "4: BOND PARTICLE 2 : "
    << my_molecules[my_molecules.size()-1].my_bonds[my_molecules[my_molecules.size()-1].my_bonds.size()-1].particle_2->name
          << std::endl; 
    std::cout << "4: BOND ITER CONST : "
    << my_molecules[my_molecules.size()-1].my_bonds[my_molecules[my_molecules.size()-1].my_bonds.size()-1].my_bond_kind_iter->bond_coeff
          << " "
    << my_molecules[my_molecules.size()-1].my_bonds[my_molecules[my_molecules.size()-1].my_bonds.size()-1].my_bond_kind_iter->default_length
          << std::endl;
  add_performed = true;
}

This works perfectly... the resname string prints, and the info on the last bond in the bonds vector prints. Then once I've added all my molecules. I call this:

t_molecule * Molecule_Manager_Main::get_molecule(unsigned int index)
{
    std::cout << "TYPE :" << my_molecules[index].res_name << std::endl; 
    std::cout << "5: BOND PARTICLE 1 : "
      << my_molecules[index].my_bonds[my_molecules[index].my_bonds.size()-1].particle_1->name
          << std::endl;  
    std::cout << "5: BOND PARTICLE 2 : "
    << my_molecules[index].my_bonds[my_molecules[index].my_bonds.size()-1].particle_2->name
          << std::endl; 
    std::cout << "5: BOND ITER CONST : "
    << my_molecules[index].my_bonds[my_molecules[index].my_bonds.size()-1].my_bond_kind_iter->bond_coeff
          << " "
    << my_molecules[index].my_bonds[my_molecules[index].my_bonds.size()-1].my_bond_kind_iter->default_length
          << std::endl;
  return &(my_molecules[index]);
} 

This segfaults on the bonds line.

I can tell from the indices I print in the add step that I'm not overwriting the molecules I'm pushing onto the vector (the size is growing)..

In other words what appears to be happening is: Read sub-vector (works) -> add some more items in parent vector -> reread sub-vector (seg-faults)

These functions are the ONLY means for molecules vars to be added to the vector, and molecules vars are only added once and not modified posthumously in my current test.

Any ideas???? Thank you in advance!!

+1  A: 

Just reading that you access a varaible called my_bond_kind_iter. After you add some more items in the parent vector, it will resize. This means (assuming that you don't have C++0x rvalue-aware containers) that the child vector will also be copied, invalidating all existing pointers and references into it. So when you try to access this old iterator that is now thoroughly invalid, hello segmentation fault. This will of course also happen if you add more into the child vector.

Vector iterators are not safe, you cannot keep them around and access them later, because vectors resize which means moving memory, and this happens at the whim of the implementation.

DeadMG
cool finally a real answer! How do I fix this, when I add more items to the parent vector? Is there a tutorial somewhere online that discusses this? (It seems like a more complex issue...) Would switching my version of gcc help?
Jason R. Mick
@Jason: No, it's a fundamental algorithmic problem. You need to store locations in a vector by index, not pointer or reference or iterator. Alternatively, you could use a non-resizing container such as a linked list, a map or hash map which don't invalidate references/pointers.
DeadMG
oh one more thing... just for clarification the bond iter is to a vector of bond parameters ( `t_bond_param` ) that are all in place before a single molecule is added... so would the iterator still be invalidated if the parent vector is changing, even if the child vector pointed at stays the same?
Jason R. Mick
If this is the problem, then it's due to the way that vector is defined to work. When the size increases past the capacity, the elements are moved into a larger buffer, changing their addresses. The usual solutions are to use a different container that doesn't have this behaviour (e.g., list), to store pointers in the vectors, or to work out the size ahead of time and resize (or reserve) the vector to the right size and then take care not to increase the size.
brone
The particle pointers also are seg faulting... is that the same sort of issue? Because they're stored in a child vector?
Jason R. Mick
There is a way you know -- if you can know the size required in advance you can call reserve() [probably not quite the right name] beforehand so your iterators don't get invalidated.
Joshua
Alright I was thinking more about this and you are completely right DeadMG. I guess I never thought about it like that, but yea there'd be no way for it to resize dynamically without copying. I'm switching everything over to indices and crossing my fingers... if it works, you are officially my coding hero!! ^_^
Jason R. Mick
p.s. DeadMG, someone told me my overloaded = operator for some of my types in my common header are redundant with what gcc would autogenerate, is this true?
Jason R. Mick
@Jason: Yes. I'm also concerned because your operator- acts like operator-=, not operator-. And the particle pointers, yes that is the same issue. Also, a lot of your defines are redundant.
DeadMG
It worked!!! Thanks again. I will work on cleaning up my redundant overloads too. Awesome job, DeadMG, you really helped where a lot of others couldn't figure out the true issue...
Jason R. Mick
A: 

In the different objects you store iterators to access the elements in some vectors. These iterators get invalidated when the underlying vector is modified, for example by adding new elements. Dereferencing such an iterator is undefined behavior.

Probably you modify the vectors for which you have stored iterators when you add new molecules and using these iterators later on leads to the segmentation faults.

sth
Yep, this is the problem! I've switched to indices, as per DeadMG's advice above and my program is now working like a charm!! Hurrah!
Jason R. Mick
A: 

I would be you, I would heavily refactor this code.

Most of the time when I get a problem like yours (and it's becoming very rare) I refactor the code until I see clearly the problem. Here, there is too much repetitions that can clearly be avoided. Use typedefs, references, consts to avoid repetitions.

Refactoring allow you to reorganise your code and your thought, simplify, make problems obvious. Take time to do this and you'll find the source of the problem.

That might be out of this code.

(about refactoring, i recommend reading this : http://sourcemaking.com/refactoring )

Klaim
im reading the guide you sent as my time permits. thanks! Btw the problem was solved -- it was that the pointers in the vector's structures were getting invalidated when the vector was resized...
Jason R. Mick
A common mistake, I've been through this hell too ^^;
Klaim
A: 

I will recommend to avoid subscript operator (p. ex. my_molecules[index]) while writing trace code (but not restrictive to trace code), prefer at() member function.

tojas
how does that work? I looked in my O'Reily Pocket Reference C++ and don't see an "at" function section... Obviously typing "at function" "at c" , etc in Google is a failing venture... do you have a good resources for this? Thanks in advance!
Jason R. Mick
http://www.codeguru.com/cpp/cpp/cpp_mfc/stl/article.php/c4027
tojas