I am using free to free the memory allocated for a bunch of temporary arrays in a recursive function. I would post the code but it is pretty long. When I comment out these free() calls, the program runs in less than a second. However, when I am using them, the programs takes about 20 seconds to run. Why is this happening, and how can it be fixed? This is like 100 or so MB so I'd rather not just leave the memory leak.
Additionally, when I run the program that includes all of the free() calls with profiling enabled, it runs in less than a second. I don't know how that would have an effect, but it does.
After using only some of the free() calls, it seems that there are a few in particular that cause the program to slow down. The rest do not seem to have an effect.
Ok... here's the code as requested:
void KDTree::BuildBranch(int height, Mailbox** objs, int nObjects)
{
int dnObjects = nObjects * 2;
int dnmoObjects = dnObjects - 1;
//Check for termination
if(height == -1 || nObjects < minObjectsPerNode)
{
//Create leaf
tree[nodeIndex] = KDTreeNode();
if(nObjects == 1)
tree[nodeIndex].InitializeLeaf(objs[0], 1);
else
tree[nodeIndex].InitializeLeaf(objs, nObjects);
//Added a node, increment index
nodeIndex++;
return;
}
//Save this node's index and increment the current index to save space for this node
int thisNodeIndex = nodeIndex;
nodeIndex++;
//Allocate memory for split options
float* xMins = (float*)malloc(nObjects * sizeof(float));
float* yMins = (float*)malloc(nObjects * sizeof(float));
float* zMins = (float*)malloc(nObjects * sizeof(float));
float* xMaxs = (float*)malloc(nObjects * sizeof(float));
float* yMaxs = (float*)malloc(nObjects * sizeof(float));
float* zMaxs = (float*)malloc(nObjects * sizeof(float));
//Find all possible split locations
int index = 0;
BoundingBox* tempBox = new BoundingBox();
for(int i = 0; i < nObjects; i++)
{
//Get bounding box
objs[i]->prim->MakeBoundingBox(tempBox);
//Add mins to split lists
xMins[index] = tempBox->x0;
yMins[index] = tempBox->y0;
zMins[index] = tempBox->z0;
//Add maxs
xMaxs[index] = tempBox->x1;
yMaxs[index] = tempBox->y1;
zMaxs[index] = tempBox->z1;
index++;
}
//Sort lists
Util::sortFloats(xMins, nObjects);
Util::sortFloats(yMins, nObjects);
Util::sortFloats(zMins, nObjects);
Util::sortFloats(xMaxs, nObjects);
Util::sortFloats(yMaxs, nObjects);
Util::sortFloats(zMaxs, nObjects);
//Allocate bin lists
Bin* xLeft = (Bin*)malloc(dnObjects * sizeof(Bin));
Bin* xRight = (Bin*)malloc(dnObjects * sizeof(Bin));
Bin* yLeft = (Bin*)malloc(dnObjects * sizeof(Bin));
Bin* yRight = (Bin*)malloc(dnObjects * sizeof(Bin));
Bin* zLeft = (Bin*)malloc(dnObjects * sizeof(Bin));
Bin* zRight = (Bin*)malloc(dnObjects * sizeof(Bin));
//Initialize all bins
for(int i = 0; i < dnObjects; i++)
{
xLeft[i] = Bin(0, 0.0f);
xRight[i] = Bin(0, 0.0f);
yLeft[i] = Bin(0, 0.0f);
yRight[i] = Bin(0, 0.0f);
zLeft[i] = Bin(0, 0.0f);
zRight[i] = Bin(0, 0.0f);
}
//Construct min and max bins bins from split locations
//Merge min/max lists together for each axis
int minIndex = 0, maxIndex = 0;
for(int i = 0; i < dnObjects; i++)
{
if(maxIndex == nObjects || (xMins[minIndex] <= xMaxs[maxIndex] && minIndex != nObjects))
{
//Add split location to both bin lists
xLeft[i].rightEdge = xMins[minIndex];
xRight[i].rightEdge = xMins[minIndex];
//Add geometry to mins counter
xLeft[i+1].objectBoundCounter++;
minIndex++;
}
else
{
//Add split location to both bin lists
xLeft[i].rightEdge = xMaxs[maxIndex];
xRight[i].rightEdge = xMaxs[maxIndex];
//Add geometry to maxs counter
xRight[i].objectBoundCounter++;
maxIndex++;
}
}
//Repeat for y axis
minIndex = 0, maxIndex = 0;
for(int i = 0; i < dnObjects; i++)
{
if(maxIndex == nObjects || (yMins[minIndex] <= yMaxs[maxIndex] && minIndex != nObjects))
{
//Add split location to both bin lists
yLeft[i].rightEdge = yMins[minIndex];
yRight[i].rightEdge = yMins[minIndex];
//Add geometry to mins counter
yLeft[i+1].objectBoundCounter++;
minIndex++;
}
else
{
//Add split location to both bin lists
yLeft[i].rightEdge = yMaxs[maxIndex];
yRight[i].rightEdge = yMaxs[maxIndex];
//Add geometry to maxs counter
yRight[i].objectBoundCounter++;
maxIndex++;
}
}
//Repeat for z axis
minIndex = 0, maxIndex = 0;
for(int i = 0; i < dnObjects; i++)
{
if(maxIndex == nObjects || (zMins[minIndex] <= zMaxs[maxIndex] && minIndex != nObjects))
{
//Add split location to both bin lists
zLeft[i].rightEdge = zMins[minIndex];
zRight[i].rightEdge = zMins[minIndex];
//Add geometry to mins counter
zLeft[i+1].objectBoundCounter++;
minIndex++;
}
else
{
//Add split location to both bin lists
zLeft[i].rightEdge = zMaxs[maxIndex];
zRight[i].rightEdge = zMaxs[maxIndex];
//Add geometry to maxs counter
zRight[i].objectBoundCounter++;
maxIndex++;
}
}
//Free split memory
free(xMins);
free(xMaxs);
free(yMins);
free(yMaxs);
free(zMins);
free(zMaxs);
//PreCalcs
float voxelL = xRight[dnmoObjects].rightEdge - xLeft[0].rightEdge;
float voxelD = zRight[dnmoObjects].rightEdge - zLeft[0].rightEdge;
float voxelH = yRight[dnmoObjects].rightEdge - yLeft[0].rightEdge;
float voxelSA = 2.0f * voxelL * voxelD + 2.0f * voxelL * voxelH + 2.0f * voxelD * voxelH;
//Minimum cost preset to no split at all
float minCost = (float)nObjects;
float splitLoc;
int minLeftCounter = 0, minRightCounter = 0;
int axis = -1;
//---------------------------------------------------------------------------------------------
//Check costs of x-axis split planes keeping track of derivative using
//the fact that there is a minimum point on the graph costs vs split location
//Since there is one object per split plane
int splitIndex = 1;
float lastCost = nObjects * voxelL;
float tempCost;
float lastSplit = xLeft[1].rightEdge;
int leftCount = xLeft[1].objectBoundCounter, rightCount = nObjects - xRight[1].objectBoundCounter;
int lastLO = 0, lastRO = nObjects;
//Keep looping while cost is decreasing
while(splitIndex < dnObjects)
{
tempCost = leftCount * (xLeft[splitIndex].rightEdge - xLeft[0].rightEdge) + rightCount * (xLeft[dnmoObjects].rightEdge - xLeft[splitIndex].rightEdge);
if(tempCost < lastCost)
{
lastCost = tempCost;
lastSplit = xLeft[splitIndex].rightEdge;
lastLO = leftCount;
lastRO = rightCount;
}
//Update counters
splitIndex++;
leftCount += xLeft[splitIndex].objectBoundCounter;
rightCount -= xRight[splitIndex].objectBoundCounter;
}
//Calculate full SAH cost
lastCost = ((lastLO * (2 * (lastSplit - xLeft[0].rightEdge) * voxelD + 2 * (lastSplit - xLeft[0].rightEdge) * voxelH + 2 * voxelD * voxelH)) + (lastRO * (2 * (xLeft[dnmoObjects].rightEdge - lastSplit) * voxelD + 2 * (xLeft[dnmoObjects].rightEdge - lastSplit) * voxelH + 2 * voxelD * voxelH))) / voxelSA;
if(lastCost < minCost)
{
minCost = lastCost;
splitLoc = lastSplit;
minLeftCounter = lastLO;
minRightCounter = lastRO;
axis = 0;
}
//---------------------------------------------------------------------------------------------
//Repeat for y axis
splitIndex = 1;
lastCost = nObjects * voxelH;
lastSplit = yLeft[1].rightEdge;
leftCount = yLeft[1].objectBoundCounter;
rightCount = nObjects - yRight[1].objectBoundCounter;
lastLO = 0;
lastRO = nObjects;
//Keep looping while cost is decreasing
while(splitIndex < dnObjects)
{
tempCost = leftCount * (yLeft[splitIndex].rightEdge - yLeft[0].rightEdge) + rightCount * (yLeft[dnmoObjects].rightEdge - yLeft[splitIndex].rightEdge);
if(tempCost < lastCost)
{
lastCost = tempCost;
lastSplit = yLeft[splitIndex].rightEdge;
lastLO = leftCount;
lastRO = rightCount;
}
//Update counters
splitIndex++;
leftCount += yLeft[splitIndex].objectBoundCounter;
rightCount -= yRight[splitIndex].objectBoundCounter;
}
//Calculate full SAH cost
lastCost = ((lastLO * (2 * (lastSplit - yLeft[0].rightEdge) * voxelD + 2 * (lastSplit - yLeft[0].rightEdge) * voxelL + 2 * voxelD * voxelL)) + (lastRO * (2 * (yLeft[dnmoObjects].rightEdge - lastSplit) * voxelD + 2 * (yLeft[dnmoObjects].rightEdge - lastSplit) * voxelL + 2 * voxelD * voxelL))) / voxelSA;
if(lastCost < minCost)
{
minCost = lastCost;
splitLoc = lastSplit;
minLeftCounter = lastLO;
minRightCounter = lastRO;
axis = 1;
}
//---------------------------------------------------------------------------------------------
//Repeat for z axis
splitIndex = 1;
lastCost = nObjects * voxelD;
lastSplit = zLeft[1].rightEdge;
leftCount = zLeft[1].objectBoundCounter;
rightCount = nObjects - zRight[1].objectBoundCounter;
lastLO = 0;
lastRO = nObjects;
//Keep looping while cost is decreasing
while(splitIndex < dnObjects)
{
tempCost = leftCount * (zLeft[splitIndex].rightEdge - zLeft[0].rightEdge) + rightCount * (zLeft[dnmoObjects].rightEdge - zLeft[splitIndex].rightEdge);
if(tempCost < lastCost)
{
lastCost = tempCost;
lastSplit = zLeft[splitIndex].rightEdge;
lastLO = leftCount;
lastRO = rightCount;
}
//Update counters
splitIndex++;
leftCount += zLeft[splitIndex].objectBoundCounter;
rightCount -= zRight[splitIndex].objectBoundCounter;
}
//Calculate full SAH cost
lastCost = ((lastLO * (2 * (lastSplit - zLeft[0].rightEdge) * voxelL + 2 * (lastSplit - zLeft[0].rightEdge) * voxelH + 2 * voxelH * voxelL)) + (lastRO * (2 * (zLeft[dnmoObjects].rightEdge - lastSplit) * voxelL + 2 * (zLeft[dnmoObjects].rightEdge - lastSplit) * voxelH + 2 * voxelH * voxelL))) / voxelSA;
if(lastCost < minCost)
{
minCost = lastCost;
splitLoc = lastSplit;
minLeftCounter = lastLO;
minRightCounter = lastRO;
axis = 2;
}
//Free bin memory
free(xLeft);
free(xRight);
free(yLeft);
free(yRight);
free(zLeft);
free(zRight);
//---------------------------------------------------------------------------------------------
//Make sure a split is in our best interest
if(axis == -1)
{
//If not decrement the node counter
nodeIndex--;
BuildBranch(-1, objs, nObjects);
return;
}
//Allocate space for left and right lists
Mailbox** leftList = (Mailbox**)malloc(minLeftCounter * sizeof(void*));
Mailbox** rightList = (Mailbox**)malloc(minRightCounter * sizeof(void*));
//Sort objects into lists of those to the left and right of the split plane
int leftIndex = 0, rightIndex = 0;
leftCount = 0;
rightCount = 0;
switch(axis)
{
case 0:
for(int i = 0; i < nObjects; i++)
{
//Get object bounding box
objs[i]->prim->MakeBoundingBox(tempBox);
//Add to left and right lists when necessary
if(tempBox->x0 < splitLoc)
{
leftList[leftIndex++] = objs[i];
leftCount++;
}
if(tempBox->x1 > splitLoc)
{
rightList[rightIndex++] = objs[i];
rightCount++;
}
}
break;
case 1:
for(int i = 0; i < nObjects; i++)
{
//Get object bounding box
objs[i]->prim->MakeBoundingBox(tempBox);
//Add to left and right lists when necessary
if(tempBox->y0 < splitLoc)
{
leftList[leftIndex++] = objs[i];
leftCount++;
}
if(tempBox->y1 > splitLoc)
{
rightList[rightIndex++] = objs[i];
rightCount++;
}
}
break;
case 2:
for(int i = 0; i < nObjects; i++)
{
//Get object bounding box
objs[i]->prim->MakeBoundingBox(tempBox);
//Add to left and right lists when necessary
if(tempBox->z0 < splitLoc)
{
leftList[leftIndex++] = objs[i];
leftCount++;
}
if(tempBox->z1 > splitLoc)
{
rightList[rightIndex++] = objs[i];
rightCount++;
}
}
break;
};
//Delete the bounding box
delete tempBox;
//Delete old objects array
free(objs);
//Construct left and right branches
BuildBranch(height - 1, leftList, leftCount);
BuildBranch(height - 1, rightList, rightCount);
//Build this node
tree[thisNodeIndex] = KDTreeNode();
tree[thisNodeIndex].InitializeInterior(axis, splitLoc, nodeIndex - 1);
return;
}
EDIT: Ok well I tried to replace the malloc/free with new/delete and that had no effect on the speed. I also found that it is only the free() on xLeft/xRight arrays that seem to affect the execution time significantly. I was able to eliminate the problem by moving the free() calls to after the recursive calls, although I do not know why this is making a difference because I don't see anywhere that these arrays are used after the original location for free(). As for why I am using malloc... some portions of this program use cache aligned memory, so I had been using _aligned_malloc. Although there probably is a way to get new to cache align, this is the only way I know to do it.