This is the whole function:
void find_nodes(htmlNodePtr *found_nodes, int &numb_found, htmlNodePtr root_node, SearchMode mode, const char *attribute, const char *value) {
htmlNodePtr tmp_ptr;
switch (mode) {
case S_HREF:
for (htmlNodePtr current_node=root_node; current_node!=NULL; current_node=current_node->next) {
if (xmlHasProp(current_node,(xmlChar *)"href")) {
if (xmlHasProp(current_node,(xmlChar *)attribute)) {
if (strcmp(value,(char *)xmlGetProp(current_node,(xmlChar *)attribute))==0) {
found_nodes[numb_found]=current_node;
numb_found++;
}
}
}
find_nodes(found_nodes,numb_found,current_node->children,mode,attribute,value);
}
break;
case S_KEYWORD:
for (htmlNodePtr current_node=root_node; current_node!=NULL; current_node=current_node->next) {
if (xmlHasProp(current_node,(xmlChar *)"href")) {
if (strcmp(value,(char *)xmlNodeGetContent(current_node))==0) {
found_nodes[numb_found]=current_node;
numb_found++;
}
}
find_nodes(found_nodes,numb_found,current_node->children,mode,attribute,value);
}
break;
case S_TAG:
for (htmlNodePtr current_node=root_node; current_node!=NULL; current_node=current_node->next) {
if (xmlHasProp(current_node,(xmlChar *)attribute)) {
if (strcmp(value,(char *)xmlGetProp(current_node,(xmlChar *)attribute))==0) {
tmp_ptr=inner_href_seek(current_node);
if (tmp_ptr==NULL) {
find_nodes(found_nodes,numb_found,current_node->children,mode,attribute,value);
continue;
}
else {
found_nodes[numb_found]=tmp_ptr;
numb_found++;
}
}
}
find_nodes(found_nodes,numb_found,current_node->children,mode,attribute,value);
}
break;
}
}
The array is fixed sixe, but it's way bigger than needed. Am I passing the numb_found in the proper way?
=== EDIT ===
char** get_urls(string url, ParseTreeNode *tree_root, int &numb_found) {
numb_found=0;
char **url_list;
htmlDocPtr doc;
htmlNodePtr root_node;
string site_content;
if (get_page(url,site_content)<0) {
url_list=NULL;
return url_list;
}
// get a DOM
doc=htmlReadMemory(site_content.data(),site_content.size(),url.data(),NULL,0);
// and the root
root_node=xmlDocGetRootElement(doc);
if (tree_root==NULL) {
url_list=NULL;
return url_list;
}
LeafList *l_list;
l_list= new LeafList();
l_list->numb_leafs=0;
get_leaf_list(l_list,tree_root);
htmlNodePtr matching_nodes[256];
int numb_matching_nodes;
htmlNodePtr tmp_nodes[64];
int numb_tmp;
SearchMode tmp_rule;
for (int i=0;i<l_list->numb_leafs;i++) {
if (l_list->leaf_buff[i]->data->rule!=TAG) continue;
else {
numb_matching_nodes=0;
find_nodes(matching_nodes,numb_matching_nodes,root_node,S_TAG,l_list->leaf_buff[i]->data->attribute.data(),l_list->leaf_buff[i]->data->value.data());
if (numb_matching_nodes==0) continue;
else l_list->leaf_buff[i]->state=true;
for (int j=0;j<numb_matching_nodes;j++) {
for (int k=0;k<l_list->numb_leafs;k++) {
if (k==i) continue;
else {
switch(l_list->leaf_buff[k]->data->rule) {
case HREF:
tmp_rule=S_HREF;
break;
case TAG:
tmp_rule=S_TAG;
break;
case KEYWORD:
tmp_rule=S_KEYWORD;
break;
}
find_nodes(tmp_nodes,numb_tmp,matching_nodes[j],tmp_rule,l_list->leaf_buff[k]->data->attribute.data(),l_list->leaf_buff[i]->data->value.data());
if (numb_tmp>0) l_list->leaf_buff[k]->state=true;
else l_list->leaf_buff[k]->state=false;
}
}
if (tree_is_true(l_list)) {
url_list[numb_found]=(char *)xmlGetProp(matching_nodes[j],(xmlChar *)"href");
numb_found++;
}
}
}
}
for (int i=0;i<l_list->numb_leafs;i++) {
if (l_list->leaf_buff[i]->data->rule!=HREF) continue;
else {
numb_matching_nodes=0;
find_nodes(matching_nodes,numb_matching_nodes,root_node,S_HREF,l_list->leaf_buff[i]->data->attribute.data(),l_list->leaf_buff[i]->data->value.data());
if (numb_matching_nodes==0) continue;
else l_list->leaf_buff[i]->state=true;
for (int j=0;j<numb_matching_nodes;j++) {
for (int k=0;k<l_list->numb_leafs;k++) {
if ((k==i)||(l_list->leaf_buff[k]->data->rule==TAG)) continue;
else {
switch(l_list->leaf_buff[k]->data->rule) {
case HREF:
tmp_rule=S_HREF;
break;
case KEYWORD:
tmp_rule=S_KEYWORD;
break;
}
find_nodes(tmp_nodes,numb_tmp,matching_nodes[j],tmp_rule,l_list->leaf_buff[k]->data->attribute.data(),l_list->leaf_buff[i]->data->value.data());
if (numb_tmp>0) l_list->leaf_buff[k]->state=true;
else l_list->leaf_buff[k]->state=false;
}
}
if (tree_is_true(l_list)) {
url_list[numb_found]=(char *)xmlGetProp(matching_nodes[j],(xmlChar *)"href");
numb_found++;
}
}
}
}
for (int i=0;i<l_list->numb_leafs;i++) {
if (l_list->leaf_buff[i]->data->rule!=KEYWORD) continue;
else {
numb_matching_nodes=0;
find_nodes(matching_nodes,numb_matching_nodes,root_node,S_KEYWORD,l_list->leaf_buff[i]->data->attribute.data(),l_list->leaf_buff[i]->data->value.data());
if (numb_matching_nodes==0) continue;
else {
for (int i=0;i<numb_matching_nodes;i++) {
url_list[numb_found]=(char *)xmlGetProp(matching_nodes[i],(xmlChar *)"href");
numb_found++;
}
}
}
}
return url_list;
}