views:

104

answers:

2

I have a std::vector<DOUBLEPOINT> I'm making an application with bezier curves and the results are shown in real time. I convert bezier points into a bunch of short lines. I store the coordinates for the small lines in the vector above. Here is my issue:. When the size of my vector exceeds a cache line, things get very slow very fast. I was wondering if it would be better to have a lot of std::vector<DOUBLEPOINT> and basically every 100 points, it switches to another one. Would this solve my cache problem? Otherwise, how else could I allow the user to create as many points as needed without it becoming very very slow? all my other algorithms are lightening fast (such as polygon filling) so these are not my issues. What really slows it down is the std::vector.

Thanks

    struct SHAPECONTOUR{

        std::vector<USERFPOINT> UserPoints;
        std::vector<DOUBLEPOINT> DrawingPoints;

        SHAPEOUTLINE Outline;

    };

I call UpdateShape() every time a point is added but I assure you my other algorithms are fast...


void OGLSHAPE::UpdateShape()
{
    if(Contour.size() == 0)
    {
        return;
    }
    for(int i = 0; i < Contour.size(); ++i)
    {
        Contour[i].DrawingPoints.clear();

     if(Contour[i].UserPoints.size() < 2)
     {
         break;
     }


    Contour[i].DrawingPoints.clear();
Contour[i].DrawingPoints.reserve(1000);
     for(unsigned int x = 0; x < Contour[i].UserPoints.size() - 1; ++x)
         SetCubicBezier(
             Contour[i].UserPoints[x],
             Contour[i].UserPoints[x + 1],
             i);




     //Remove Duplicates
     for(int j = 0; j < 2; ++j)
     {
         if(Contour[i].DrawingPoints.size() > 2)
             for(unsigned int x = 0; x < Contour[i].DrawingPoints.size() - 1; ++x)
             {
                 if(Contour[i].DrawingPoints[x].point[0] ==
                     Contour[i].DrawingPoints[x + 1].point[0] &&
                     Contour[i].DrawingPoints[x].point[1] ==
                     Contour[i].DrawingPoints[x + 1].point[1] 

                 )
                     Contour[i].DrawingPoints.erase(Contour[i].DrawingPoints.begin() + x);
             }
     }


     GenerateLinePoly(Contour[i].DrawingPoints,Contour[i].Outline.OutlineWidth);
     Contour[i].Outline.OutlineSize = OutlineVec.size()  / 2;
     glBindBufferARB(GL_ARRAY_BUFFER_ARB,Contour[i].Outline.OutlineVBO);
     glBufferDataARB(GL_ARRAY_BUFFER_ARB,sizeof(GLfloat) * OutlineVec.size(),&OutlineVec[0],GL_STATIC_COPY);


    }

    gluTessNormal(PolygonTesselator.tobj, 0, 0, 1);
    PolygonTesselator.Set_Winding_Rule(WindingRule); 

    //PolygonTesselator.SetDimensions(layer[currentlayer].Shapes[i].Dimensions,layer[currentlayer].Shapes[i].minima);

    PolygonTesselator.Begin_Polygon(); 
    for(unsigned int c = 0; c < Contour.size(); ++c)
    {
            PolygonTesselator.Begin_Contour();

            for(unsigned int j = 0; j < Contour[c].DrawingPoints.size(); ++j)
            {
                gluTessVertex(PolygonTesselator.tobj,&Contour[c].DrawingPoints[j].point[0],
                    &Contour[c].DrawingPoints[j].point[0]);
            }

            PolygonTesselator.End_Contour();

    }
    PolygonTesselator.End_Polygon();



    PolygonTesselator.TransferVerticies(
        ObjectVBOInt,
        TextureCoordsVBOInt,
        ObjectVBOCount,
        TextureCoordsVBOCount);
}

void OGLSHAPE::SetCubicBezier(USERFPOINT &a,USERFPOINT &b, int &currentcontour )
{





        double dx1 = a.RightHandle.x - a.UserPoint.x;
        double dy1 = a.RightHandle.y - a.UserPoint.y;
        double dx2 = b.LeftHandle.x - a.RightHandle.x;
        double dy2 = b.LeftHandle.y - a.RightHandle.y;
        double dx3 = b.UserPoint.x - b.LeftHandle.x;
        double dy3 = b.UserPoint.y - b.LeftHandle.y;

        float len = sqrt(dx1 * dx1 + dy1 * dy1) + 
            sqrt(dx2 * dx2 + dy2 * dy2) + 
            sqrt(dx3 * dx3 + dy3 * dy3);




        int NUM_STEPS =  int(len * 0.049);

        if(NUM_STEPS > 55)
        {
            NUM_STEPS = 55;
        }
        double subdiv_step  = 1.0 / (NUM_STEPS + 1);
        double subdiv_step2 = subdiv_step*subdiv_step;
        double subdiv_step3 = subdiv_step*subdiv_step*subdiv_step;

        double pre1 = 3.0 * subdiv_step;
        double pre2 = 3.0 * subdiv_step2;
        double pre4 = 6.0 * subdiv_step2;
        double pre5 = 6.0 * subdiv_step3;



        double tmp1x = a.UserPoint.x - a.RightHandle.x * 2.0 + b.LeftHandle.x;
        double tmp1y = a.UserPoint.y - a.RightHandle.y  * 2.0 + b.LeftHandle.y;

        double tmp2x = (a.RightHandle.x - b.LeftHandle.x)*3.0 - a.UserPoint.x + b.UserPoint.x;
        double tmp2y = (a.RightHandle.y - b.LeftHandle.y)*3.0 - a.UserPoint.y + b.UserPoint.y;

        temp.point[0] = a.UserPoint.x;
        temp.point[1] = a.UserPoint.y;

        //a user
        //a right
        //b left
        //b user

        double dfx = (a.RightHandle.x - a.UserPoint.x)*pre1 + tmp1x*pre2 + tmp2x*subdiv_step3;
        double dfy = (a.RightHandle.y - a.UserPoint.y)*pre1 + tmp1y*pre2 + tmp2y*subdiv_step3;

        double ddfx = tmp1x*pre4 + tmp2x*pre5;
        double ddfy = tmp1y*pre4 + tmp2y*pre5;

        double dddfx = tmp2x*pre5;
        double dddfy = tmp2y*pre5;

        int step = NUM_STEPS;

        // Suppose, we have some abstract object Polygon which
        // has method AddVertex(x, y), similar to LineTo in
        // many graphical APIs.
        // Note, that the loop has only operation add!

        while(step--)
        {


            temp.point[0]  += dfx;
            temp.point[1]  += dfy;
            dfx  += ddfx;
            dfy  += ddfy;
            ddfx += dddfx;
            ddfy += dddfy;

            Contour[currentcontour].DrawingPoints.push_back(temp);
        }


        temp.point[0] = (GLdouble)b.UserPoint.x;
        temp.point[1] = (GLdouble)b.UserPoint.y;
        Contour[currentcontour].DrawingPoints.push_back(temp);


}

void OGLSHAPE::GenerateLinePoly(const std::vector<DOUBLEPOINT> &input, int width)
{
    OutlineVec.clear();
    if(input.size() < 2)
    {
        return;
    }



    float w = width / 2.0f;

    //glBegin(GL_TRIANGLES);
    for( size_t i = 0; i < input.size()-1; ++i )
    {
        POINTFLOAT cur;
        cur.x = input[i].point[0];
        cur.y = input[i].point[1];


        POINTFLOAT nxt;
        nxt.x = input[i+1].point[0];
        nxt.y = input[i+1].point[1];

        POINTFLOAT b;
        b.x = nxt.x - cur.x;
        b.y = nxt.y - cur.y;

        b = normalize(b);



        POINTFLOAT b_perp;
        b_perp.x = -b.y;
        b_perp.y = b.x;


        POINTFLOAT p0;
        POINTFLOAT p1;
        POINTFLOAT p2;
        POINTFLOAT p3;

        p0.x = cur.x + b_perp.x * w;
        p0.y = cur.y + b_perp.y * w;

        p1.x = cur.x - b_perp.x * w;
        p1.y = cur.y - b_perp.y * w;

        p2.x = nxt.x + b_perp.x * w;
        p2.y = nxt.y + b_perp.y * w;

        p3.x = nxt.x - b_perp.x * w;
        p3.y = nxt.y - b_perp.y * w;

        OutlineVec.push_back(p0.x);
        OutlineVec.push_back(p0.y);
        OutlineVec.push_back(p1.x);
        OutlineVec.push_back(p1.y);
        OutlineVec.push_back(p2.x);
        OutlineVec.push_back(p2.y);

        OutlineVec.push_back(p2.x);
        OutlineVec.push_back(p2.y);
        OutlineVec.push_back(p1.x);
        OutlineVec.push_back(p1.y);
        OutlineVec.push_back(p3.x);
        OutlineVec.push_back(p3.y);



        // only do joins when we have a prv
        if( i == 0 ) continue;

        POINTFLOAT prv;
        prv.x = input[i-1].point[0];
        prv.y = input[i-1].point[1];

        POINTFLOAT a;
        a.x = prv.x - cur.x;
        a.y = prv.y - cur.y;

        a = normalize(a);

        POINTFLOAT a_perp;
        a_perp.x = a.y;
        a_perp.y = -a.x;

        float det = a.x * b.y  - b.x * a.y;
        if( det > 0 )
        {
            a_perp.x = -a_perp.x;
            a_perp.y = -a_perp.y;

            b_perp.x = -b_perp.x;
            b_perp.y = -b_perp.y;
        }

        // TODO: do inner miter calculation

        // flip around normals and calculate round join points
        a_perp.x = -a_perp.x;
        a_perp.y = -a_perp.y;

        b_perp.x = -b_perp.x;
        b_perp.y = -b_perp.y;

        size_t num_pts = 4;

        std::vector< POINTFLOAT> round( 1 + num_pts + 1 );
        POINTFLOAT nc;
        nc.x = cur.x + (a_perp.x * w);
        nc.y = cur.y + (a_perp.y * w);

        round.front() = nc;

        nc.x = cur.x + (b_perp.x * w);
        nc.y = cur.y + (b_perp.y * w);

        round.back() = nc;

        for( size_t j = 1; j < num_pts+1; ++j )
        {
            float t = (float)j/(float)(num_pts+1);
            if( det > 0 )
         {
             POINTFLOAT nin;
             nin = slerp2d( b_perp, a_perp, 1.0f-t );
             nin.x *= w;
             nin.y *= w;

             nin.x += cur.x;
             nin.y += cur.y;

             round[j] = nin;
         }
            else
         {
             POINTFLOAT nin;
             nin = slerp2d( a_perp, b_perp, t );
             nin.x *= w;
             nin.y *= w;

             nin.x += cur.x;
             nin.y += cur.y;

             round[j] = nin;
         }
        }

        for( size_t j = 0; j < round.size()-1; ++j )
        {

            OutlineVec.push_back(cur.x);
            OutlineVec.push_back(cur.y);


            if( det > 0 )
         {
             OutlineVec.push_back(round[j + 1].x);
             OutlineVec.push_back(round[j + 1].y);
             OutlineVec.push_back(round[j].x);
             OutlineVec.push_back(round[j].y);
         }
            else
         {

             OutlineVec.push_back(round[j].x);
             OutlineVec.push_back(round[j].y);

             OutlineVec.push_back(round[j + 1].x);
             OutlineVec.push_back(round[j + 1].y);
         }
        }
    }

}
A: 

I don't know anything about caching, but it might help to expand your vector manually, instead of letting it expand itself one value at a time as they are added.

According to this reference, you can call reserve to increase the maximum size of your vector. I would suggest starting your vector with a length of say, 100, and once the 100th slot is filled, request 100 more slots.

This answer might not address the question, as I do not know how caching relates to std::vectors but it is possible you are facing the problem I describe.

31eee384
Sorry, this is not the problem, I already do this
Milo
A: 

This probably isn't the root of your issue. But you may want to try using iterators instead of indexing everywhere. It may help the compiler make better optimization decisions. std::for_each looks like a possible candidate for this, you can just defer to another function, for example:

void OGLSHAPE::real_update_shap(SHAPECONTOUR &contour) {
    if(contour.UserPoints.size() < 2) {
        return;
    }
    // do your thing!
}

void OGLSHAPE::UpdateShape() {
    // no need to explicitly test if empty, for_each won't do anything if the vector
    // has no elements
    std::for_each(Contour.begin(), Contour.end(), std::bind1st(std::mem_fun(real_update_shape), this));
}

Or at the very least, use some references to help the compiler out. For example, convert this:

 //Remove Duplicates
 for(int j = 0; j < 2; ++j)
 {
     if(Contour[i].DrawingPoints.size() > 2)
         for(unsigned int x = 0; x < Contour[i].DrawingPoints.size() - 1; ++x)
         {
             if(Contour[i].DrawingPoints[x].point[0] ==
                 Contour[i].DrawingPoints[x + 1].point[0] &&
                 Contour[i].DrawingPoints[x].point[1] ==
                 Contour[i].DrawingPoints[x + 1].point[1] 

             )
                 Contour[i].DrawingPoints.erase(Contour[i].DrawingPoints.begin() + x);
         }
 }

To something like this:

 //Remove Duplicates
 for(int j = 0; j < 2; ++j)
 {
     // reference to the one we care about, this may be allowed to be const
     // but I am not sure since it depends on your specific use cases, if it can be
     // it is better to shoot for const correctness as much as possible.
     // PS: types in all caps is very ugly, people will think it's a macro!
     SHAPECONTOUR &current_contour = Contour[i];
     if(current_contour.DrawingPoints.size() > 2)
         for(unsigned int x = 0; x < current_contour.DrawingPoints.size() - 1; ++x)
         {
             if( current_contour.DrawingPoints[x    ].point[0] ==
                 current_contour.DrawingPoints[x + 1].point[0] &&
                 current_contour.DrawingPoints[x    ].point[1] ==
                 current_contour.DrawingPoints[x + 1].point[1])

             current_contour.DrawingPoints.erase(current_contour.DrawingPoints.begin() + x);
         }
 }

It's one of those things that can possible help, and definitely doesn't hurt. So it's worth a try.

Evan Teran
Unfortunately I profiled my code and realized it was my polygon tessellation that was killing my speed. I don't know how to speed this up...
Milo