views:

435

answers:

6

Lets say I have a string:

"(aaa and bbb or (aaa or aaa or bbb))"

**for simplicity sake, this will always be the format of the string, always 3 a's followed by a space or ')' or 3b's followed by a space or ')'.

what would be the best way to replace every occurence of 'aaa' with a '1' and everyoccurrence of 'bbb' with a '0' in C. ending string should look like:

"(1 and 0 or (1 or 1 or 0))"

EDIT Let me be more specific:

char* blah = (char *) malloc (8);
sprintf(blah, "%s", "aaa bbb");

blah = replace(blah);

how can I write replace so that it allocates space and stores a new string

"1 0"
+1  A: 

For this specific case, a simple while/for loop would do the trick. But it looks like a homework problem, so I won't write it explicitly for you. Had more generic string manipulations be required, I would use pcre.

jxy
+1 for using PCRE.
A: 

You can try a loop for each replacement using the functions std::find and std::replace. You will find more informations about std::string here.

Patrice Bernassola
This is a C question, not C++.
GMan
Oki, I read to fast...
Patrice Bernassola
It's ok, we all do it.
GMan
A: 

This is in no way the worlds most elegant solution, and it also assumes that the ending string is always going to be a smaller size than the original, oh and I hardcoded the conversions, but hopefully it points you more or less in the right direction or gives you an idea to jump off from:

char* replace( char *string ) {
    char *aaa = NULL;
    char *bbb = NULL;
    char *buffer = malloc( strlen( string ) );
    int length = 0;
    aaa = strstr( string, "aaa" );
    bbb = strstr( string, "bbb" );
    while ( aaa || bbb ) {
     if ( aaa && (bbb || aaa < bbb ) ) {
      char startToHere = aaa - string;
      strncpy( buffer, string, startToHere );
      string += startToHere;
      length += startToHere;
      buffer[length] = '1';
     }
     else if ( bbb ) {
      char startToHere = aaa - string;
      strncpy( buffer, string, startToHere );
      string += startToHere;
      length += startTohere;
      buffer[length] = '0';
     }
     aaa = strstr( string, "aaa" );
     bbb = strstr( string, "bbb" );
    }
    buffer[length] = '\0';
    string = realloc( string, length );
    strcpy( string, buffer );
    free( buffer );

    return string;
}

Disclaimer, I didn't even test this, but it should be at least semi in the direction of what you want.

Morinar
+4  A: 

The most efficient way is to use regex family which is POSIX. Some implementation will build a proper automaton for the patterns. Another way is to repeatedly use KMP or Boyer-Moore search, but you have to scan the string several times, which is less efficient. In addition, what results you want given such input: aa=1, ab=2, bb=3 on string "aabb"?

By the way, when you implement this function, a cleaner solution is to allocate a new dynamic C string and not to modify the original string while replacing. You could implement a in-place replacement, but that would be much more complicated.

regex_t r; regmatch_t match[2]; int last = 0;
regcomp(&r, "(aaa|bbb)", REG_EXTENDED);
insert(hashtable, "aaa", "0"); insert(hashtable, "bbb", "1");
while (regexec(&r, oristr, 1, match, 0) != REG_NOMATCH) {
  char *val;
  strncat(newstr, oristr + last, match->rm_so);
  lookup(hashtable, oristr + match->rm_so, match->rm_eo - match->rm_so, &val);
  last = match->rm_eo;
  strncat(newstr, val);
}
strcat(newstr, oristr + last);
oristr = realloc(oristr, strlen(newstr));
strcpy(oristr, newstr); free(newstr); regfree(&r);

In practical implementation, you should change the size of newstr dynamically. You should record the end of newstr rather than using strcat/strlen. The source code may be buggy as I have not really tried it. But the idea is there. This is the most efficient implementation I can think of.

okay, i edited ? to make it allocate new space. not sure what you mean by input: aa=1,ab=2,bb=3.
I mean to replace aa with 1, ab with 2 and bb with 3. You may get "13" or "a2b".
+1 for addressing the overlapping string case
Ashwin
A: 

This is a job for a FSM!

#include <assert.h>
#include <stdio.h>
#include <string.h>

/*
//     | 0          | 1             | 2              | 3             | 4              |
// ----+------------+---------------+----------------+---------------+----------------+
// 'a' | 1          | 2             | ('1') 0        | ('b') 1       | ('bb') 1       |
// 'b' | 3          | ('a') 3       | ('aa') 3       | 4             | ('0') 0        |
// NUL | (NUL) halt | ('a'NUL) halt | ('aa'NUL) halt | ('b'NUL) halt | ('bb'NUL) halt |
// (*) | (*) 0      | ('a'*) 0      | ('aa'*) 0      | ('b'*) 0      | ('bb'*) 0      |
*/

void chg_data(char *src) {
  char *dst, ch;
  int state = 0;
  dst = src;
  for (;;) {
    ch = *src++;
    if (ch == 'a' && state == 0) {state=1;}
    else if (ch == 'a' && state == 1) {state=2;}
    else if (ch == 'a' && state == 2) {state=0; *dst++='1';}
    else if (ch == 'a' && state == 3) {state=1; *dst++='b';}
    else if (ch == 'a' && state == 4) {state=1; *dst++='b'; *dst++='b';}
    else if (ch == 'b' && state == 0) {state=3;}
    else if (ch == 'b' && state == 1) {state=3; *dst++='a';}
    else if (ch == 'b' && state == 2) {state=3; *dst++='a'; *dst++='a';}
    else if (ch == 'b' && state == 3) {state=4;}
    else if (ch == 'b' && state == 4) {state=0; *dst++='0';}
    else if (ch == '\0' && state == 0) {*dst++='\0'; break;}
    else if (ch == '\0' && state == 1) {*dst++='a'; *dst++='\0'; break;}
    else if (ch == '\0' && state == 2) {*dst++='a'; *dst++='a'; *dst++='\0'; break;}
    else if (ch == '\0' && state == 3) {*dst++='b'; *dst++='\0'; break;}
    else if (ch == '\0' && state == 4) {*dst++='b'; *dst++='b'; *dst++='\0'; break;}
    else if (state == 0) {state=0; *dst++=ch;}
    else if (state == 1) {state=0; *dst++='a'; *dst++=ch;}
    else if (state == 2) {state=0; *dst++='a'; *dst++='a'; *dst++=ch;}
    else if (state == 3) {state=0; *dst++='b'; *dst++=ch;}
    else if (state == 4) {state=0; *dst++='b'; *dst++='b'; *dst++=ch;}
    else assert(0 && "this didn't happen!");
  }
}

int main(void) {
  char data[] = "(aaa and bbb or (aaa or aaa or bbb))";
  printf("Before: %s\n", data);
  chg_data(data);
  printf(" After: %s\n", data);
  return 0;
}
pmg
A: 

Here it is without memory limitation:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

/* ---------------------------------------------------------------------------
  Name       : replace - Search & replace a substring by another one. 
  Creation   : Thierry Husson, Sept 2010
  Parameters :
      str    : Big string where we search
      oldstr : Substring we are looking for
      newstr : Substring we want to replace with
      count  : Optional pointer to int (input / output value). NULL to ignore.  
               Input:  Maximum replacements to be done. NULL or < 1 to do all.
               Output: Number of replacements done or -1 if not enough memory.
  Returns    : Pointer to the new string or NULL if error.
  Notes      : 
     - Case sensitive - Otherwise, replace functions "strstr" by "strcasestr"
     - Always allocate memory for the result.
--------------------------------------------------------------------------- */
char* replace(const char *str, const char *oldstr, const char *newstr, int *count)
{
   const char *tmp = str;
   char *result;
   int   found = 0;
   int   length, reslen;
   int   oldlen = strlen(oldstr);
   int   newlen = strlen(newstr);
   int   limit = (count != NULL && *count > 0) ? *count : -1; 

   tmp = str;
   while ((tmp = strstr(tmp, oldstr)) != NULL && found != limit)
      found++, tmp += oldlen;

   length = strlen(str) + found * (newlen - oldlen);
   if ( (result = (char *)malloc(length+1)) == NULL) {
      fprintf(stderr, "Not enough memory\n");
      found = -1;
   } else {
      tmp = str;
      limit = found; /* Countdown */
      reslen = 0; /* length of current result */ 
      /* Replace each old string found with new string  */
      while ((limit-- > 0) && (tmp = strstr(tmp, oldstr)) != NULL) {
         length = (tmp - str); /* Number of chars to keep intouched */
         strncpy(result + reslen, str, length); /* Original part keeped */ 
         strcpy(result + (reslen += length), newstr); /* Insert new string */
         reslen += newlen;
         tmp += oldlen;
         str = tmp;
      }
      strcpy(result + reslen, str); /* Copies last part and ending nul char */
   }
   if (count != NULL) *count = found;
   return result;
}


/* ---------------------------------------------------------------------------
   Samples
--------------------------------------------------------------------------- */
int main(void)
{
   char *str, *str2;
   int rpl;

   /* ---------------------------------------------------------------------- */
   /* Simple sample */
   rpl = 0; /* Illimited replacements */
   str = replace("Hello World!", "World", "Canada", &rpl);
   printf("Replacements: %d\tResult: [%s]\n\n", rpl, str);
   /* Replacements: 1        Result: [Hello Canada!] */
   free(str);

   /* ---------------------------------------------------------------------- */
   /* Sample with dynamic memory to clean */
   rpl = 0; /* Illimited replacements */
   str = strdup("abcdef");
   if ( (str2 = replace(str, "cd", "1234", &rpl)) != NULL ) {
      free(str);
      str = str2;
   }
   printf("Replacements: %d\tResult: [%s]\n\n", rpl, str);
   /* Replacements: 1        Result: [ab1234ef] */
   free(str);

   /* ---------------------------------------------------------------------- */
   /* Illimited replacements - Case sensitive & Smaller result */
   str = replace("XXXHello XXXX world XX salut xxx monde!XXX", "XXX", "-",NULL);
   printf("Result: [%s]\n\n", str);
   /* Result: [-Hello -X world XX salut xxx monde!-] */
   free(str);

   /* ---------------------------------------------------------------------- */
   rpl = 3; /* Limited replacements */
   str = replace("AAAAAA", "A", "*", &rpl);
   printf("Replacements: %d\tResult: [%s]\n\n", rpl, str);
   /* Replacements: 3        Result: [***AAA] */
   free(str);

  return 0;
}
Thierry H.