views:

320

answers:

2

Hi,

Wanted to write a program to implement a dictionary of words using Tries Data Structure.

Please tell me the structure for the implementation so that I could start the program, as i haven't got any article on internet, for Tries Implementation..

The confusion is, that when we search through the word, and we get through the word at the leaf node, only then is the meaning of the word would be stored.. But all the nodes in Tries data structure will be of waste. i.e. storing a variable of meaning in every internal node......

So, the basic idea, is with a help of small example as how to store dictionary, please let me know the structure for Tries Data Structure..

And please C program Implementation..

Thanks..

Compressed Tries.. This is giving me the correct Compressed Trie, as expected,,,, but there are some issues with it.... And wanted to discuss that....

1) I Build a simple trie first, and then compressed it using a function trie_compress(), Now when i want to add any word into it, it would want a changed trie_add(), also changed trie_lookup(), ok i will do this on my own, just wanted to know, is my approach correct or there could be some better way..

2) In trie_new(), I have used t->substr = (char*)malloc(10);,,,,,, This doesn't look efficient, as memory should be allocated, as in when required. Can we improve upon this.

typedef struct trie
{
   int on;
   char *substr;
   struct trie *first_child;
   struct trie *next_sibling;
}trie;

trie* trie_new()
{
   trie *t = (trie*)malloc(sizeof(trie));
   t->substr = (char*)malloc(10);
   t->on = 0;
   t->substr[0] = '\0';
   t->first_child = NULL;
   t->next_sibling = NULL;

   return t;
}

trie* trie_at_level(trie *t, char c)
{
   while(t != NULL)
   {
      if(t->substr[0] == c)
      {
         return t;
      }
      t = t->next_sibling;
   }
   return NULL;
}

void trie_add(trie *t, const char *str)
{
   const int n = strlen(str);
   int i;

   for(i=0; i<n; i++)
   {
      const char c = str[i];
      trie* parent = t;

      t = t->first_child;
      t = trie_at_level(t,c);
      if(t == NULL)
      {
         t = trie_new();
         t->substr[0] = c;
         t->substr[1] = '\0';
         t->next_sibling = parent->first_child;
         parent->first_child = t;
      }
   }
   t->on = 1;
}

int trie_lookup(trie *t, const char *str)
{
   const int n = strlen(str);
   int i;

   for(i=0; i<n; i++)
   {
      const char c = str[i];
      t = t->first_child;
      t = trie_at_level(t,c);
      if(t == NULL)
         return 0;
   }
   return t->on;
}

void trie_compress(trie *t)
{
   trie* parent = t;
   t = t->first_child;

   if(t->first_child != NULL)
      trie_compress(t);

   if(t->next_sibling == NULL)
   {
      parent->substr = strcat(parent->substr,t->substr);
      parent->first_child = t->first_child;
      parent->on = t->first_child->on;
      free(t);

      return;
   }
   else
      trie_compress(t->next_sibling);
}
A: 

Maybe this would help?

the_void
+2  A: 

Okay, I think I got it right this time around.

Compressed Trie:

#include <string.h>
#include <stdlib.h>

typedef struct trie {
  int value;
  char* key;
  struct trie* kids;
  struct trie* next;
} trie;

/* Creates an empty trie.
 */
trie* trie_new () {
  trie* t = (trie*) malloc (sizeof (trie));
  t->value = 0;
  t->key = NULL;
  t->kids = NULL;
  t->next = NULL;
  return t;
}

/* Sets |t->key| to |key|.
 */
static void trie_set_key (trie* t, const char* key) {
  char* key_copy = (char*) malloc (sizeof (char) * (strlen (key) + 1));
  strcpy (key_copy, key);
  free (t->key);
  t->key = key_copy;
}

/* Creates a trie with |->key| set to |key| whose |->value| is on.
 */
static trie* trie_new_init (const char* key) {
  trie* t = trie_new ();
  t->value = 1;
  trie_set_key (t, key);
  return t;
}

/* Frees all memory used by the trie |t|.
 */
void trie_delete (trie* t) {
  if (t == NULL) {
    return;
  }
  trie_delete (t->kids);
  trie_delete (t->next);
  free (t->key);
  free (t);
}

typedef struct trie_str_pair {
  trie* trie;
  const char* str;
} trie_str_pair;

/* Creates a trie_str_pair with the values |->trie| and |->str| set to
 *  |t| and |str|, respectively.
 */
static trie_str_pair mk_trie_str_pair (trie* t, const char* str) {
  trie_str_pair pair;
  pair.trie = t;
  pair.str = str;
  return pair;
}

/* Tries to find a sibling of |t| or |t| itself that matches the input
 *  choice function |choiceFunc|. A match occurs if |choiceFunc| returns
 *  a string other than NULL. Upon a match, the matching trie and the string
 *  are returned. Otherwise NULL values are returned in the pair struct.
 */
static trie_str_pair lookup_by (
      const char* (*choiceFunc)(const char*, trie*)
    , const char* key, trie* t
  ) {
  while (t != NULL) {
    const char* str = choiceFunc (key, t);
    if (str != NULL) {
      return mk_trie_str_pair (t, str);
    }
    t = t->next;
  }
  return mk_trie_str_pair (NULL, NULL);
}

/* If |prefix| is a prefix of |str|, returns a pointer into |str| immediately
 *  after the prefix.
 */
static const char* strip_prefix (const char* prefix, const char* str) {
  int i;
  for (i = 0; prefix [i] != '\0'; ++i) {
    if (str [i] != prefix [i]) {
      return NULL;
    }
  }
  return str + i;
}

/* If |t->key| is a prefix of |str|, returns a pointer into |str| immediately
 *  after the prefix.
 */
static const char* strip_prefix_with_key (const char* str, trie* t) {
  return strip_prefix (t->key, str);
}

/* If |str| is a prefix of |t->key|, returns a pointer into |t->key|
 *  immediately after the prefix.
 */
static const char* strip_prefix_from_key (const char* str, trie* t) {
  return strip_prefix (str, t->key);
}

/* Returns a pointer into |str1| immediately after the longest common prefix
 *  between |str1| and |str2|.
 */
static const char* strip_common_prefix (const char* str1, const char* str2) {
  int i;
  for (i = 0; str1 [i] != '\0' && str2 [i] != '\0'; ++i) {
    if (str1 [i] != str2 [i]) {
      break;
    }
  }
  if (i == 0) {
    return NULL;
  }
  return str1 + i;
}

/* Returns a pointer into |str| past the longest common prefix between
 *  |str| and |t->str|.
 */
static const char* strip_common_prefix_on_key (const char* str, trie* t) {
  return strip_common_prefix (str, t->key);
}

/* Returns 1 if |key| is in the trie |t|. Returns 0 if not.
 */
int trie_lookup (trie* t, const char* key) {
  while (key != NULL && key [0] != '\0') {
    trie_str_pair pair = lookup_by (strip_prefix_with_key, key, t->kids);
    t = pair.trie;
    if (t == NULL) {
      return 0;
    }
    key = pair.str;
  }
  return t->value;
}

/* Adds |kid| to |t|'s list of kids.
 */
static void trie_add_kid (trie* t, trie* kid) {
  kid->next = t->kids;
  t->kids = kid;
}

/* Removes |kid| from |t|'s list of kids.
 * |kid| must be in |t|'s list of kids.
 */
static void trie_remove_kid (trie* t, trie* kid) {
  if (t->kids == kid) {
    t->kids = kid->next;
  }
  else {
    t = t->kids;
    while (t->next != kid) {
      t = t->next;
    }
    t->next = kid->next;
  }
}

/* Replaces |kid| from |t|'s list of kids with |new_kid|.
 * |kid| must be in |t|'s list of kids.
 */
static void trie_replace_kid (trie* t, trie* kid, trie* new_kid) {
  trie_remove_kid (t, kid);
  trie_add_kid (t, new_kid);
}

/* If |t| has exactly one kid and no grandkids, |t| and its kid are merged
 *  into one trie node. In other words, |t|'s kid's |->key| is appended to
 *  |t->key| and |t->value| becomes that of its kid's |->value|.
 */
static void trie_try_merge_with_kids (trie* t) {
  if (t->key != NULL) {
    trie* kid = t->kids;
    if (kid != NULL && kid->next == NULL) {
      t->value = kid->value;
      t->kids = kid->kids;
      int new_len = strlen (t->key) + strlen (kid->key);
      t->key = realloc (t->key, sizeof (char) * (new_len + 1));
      strcat (t->key, kid->key);
      free (kid->key);
      free (kid);
    }
  }
}

/* Helper for trie_insert.
 */
static void trie_insert_split_key (
      trie* t
    , const char* key_prefix, const char* key_suffix
  ) {
  trie* kid = trie_new_init (key_suffix);
  trie_add_kid (t, kid);
  trie_set_key (t, key_prefix);
}

/* Helper for trie_insert.
 */
static void trie_insert_simple (trie* t, const char* key) {
  trie* kid = trie_new_init (key);
  trie_add_kid (t, kid);
}

/* Helper for trie_insert.
 */
static void trie_insert_fork (
      trie* t
    , trie* kid
    , char* key_prefix  /* Caller loses ownership of this string */
    , const char* key_suffix
    , const char* kid_key_suffix
  ) {
  trie* fork_kid = trie_new ();
  fork_kid->key = key_prefix;
  fork_kid->kids = trie_new_init (key_suffix);
  fork_kid->kids->next = kid;
  trie_replace_kid (t, kid, fork_kid);
  fork_kid->next = kid->next;
  kid->next = NULL;
  trie_set_key (kid, kid_key_suffix);
}

/* Inserts |key| into the trie |t|.
 */
void trie_insert (trie* t, const char* key) {
  if (key [0] == '\0') {
    return;
  }
  while (1) {
    trie_str_pair pair = lookup_by (strip_prefix_with_key, key, t->kids);
    trie* kid = pair.trie;
    const char* stripped = pair.str;
    if (kid != NULL) {
      if (stripped [0] == '\0') {
        kid->value = 1;
        return;
      }
      t = kid;
      key = stripped;
      continue;
    }
    pair = lookup_by (strip_prefix_from_key, key, t->kids);
    kid = pair.trie;
    stripped = pair.str;
    if (kid != NULL) {
      trie_insert_split_key (kid, key, stripped);
      return;
    }
    pair = lookup_by (strip_common_prefix_on_key, key, t->kids);
    kid = pair.trie;
    stripped = pair.str;
    if (kid == NULL) {
      trie_insert_simple (t, key);
      return;
    }
    int prefix_len = stripped - key;
    char* common_prefix = (char*) malloc (sizeof (char) * (prefix_len + 1));
    strncpy (common_prefix, key, prefix_len);
    common_prefix [prefix_len] = '\0';
    trie_insert_fork (t, kid, common_prefix, stripped, kid->key + prefix_len);
    return;
  }
}

/* Helper for trie_remove.
 */
static void trie_remove_simple (trie* t, trie* kid) {
  trie_remove_kid (t, kid);
  free (kid->key);
  free (kid);
}

/* Helper for trie_remove.
 */
static void trie_remove_merge (trie* t) {
  t->value = 0;
  trie_try_merge_with_kids (t);
}

/* Removes |key| from the trie |t|.
 */
void trie_remove (trie* t, const char* key) {
  trie_str_pair pair = lookup_by (strip_prefix_with_key, key, t->kids);
  trie* kid = pair.trie;
  const char* stripped = pair.str;
  if (kid == NULL) {
    return;
  }
  if (stripped [0] == '\0') {
    if (kid->kids == NULL) {
      trie_remove_simple (t, kid);
    }
    else {
      trie_remove_merge (kid);
    }
  }
  else {
    trie_remove (kid, stripped);
  }
  trie_try_merge_with_kids (t);
}
trinithis
How Can I delete a word from this implementation...... Also, if possible, if I want to write structure for Compressed Tries.... Please let me know your thoughts over it.... Only structure for Compressed tries.....
AGeek
Added a remove function. I'm not going to write a compressing function though.
trinithis
Thankss Sir.... U are so helpful..... Ur codes are just giving me a great insight into Tries and related structures.... As i am working all these alone, so bit helpful.. Thanks....Also, now I worked upon a code, which gave me a compressed Trie. Article http://www.heppenstall.ca/academics/doc/242/F2001Archives/Ch11_3_tries.pdfNow, the idea I followed is lets build a simple trie first, and then trie to compress the trie. Code attached in the question....
AGeek
Well, I'm bored, so I'm going to have a go at trie compression.
trinithis
Why Are you using these static and const keywords at places..... Kindly let me know the idea behind this,, IF it makes smthing works better,, than I would also like to incorporate into my codes. n thanks again that you are able to work out the problems.. Thanks Sir.
AGeek
I updated my code. I did some testing it and it SEEMS to work. Placing static in front of a function makes the function hidden outside of the file. For the use of static within a function see http://en.wikipedia.org/wiki/Local_variable#Static_local_variables. As for const, I recommend googling it.
trinithis