The Trie Data Structure

Trie is an efficient information reTrieval data structure.

Let word be a single string and let dictionary be a large set of words. If we have a dictionary, and we need to know if a single word is inside of the dictionary the tries are a data structure that can help us. 

The following figure shows a trie with the words “tree”, “trie”, “algo”, “assoc”, “all”, and “also.”




The tries can insert and find strings in O(L) time (where L represent the length of a single word). 


Every node of Trie consists of multiple branches. Each branch represents a possible character of keys. We need to mark the last node of every key as end of word node.

Structure of each node of a trie:-


// Trie node
struct TrieNode
{
     struct TrieNode *children[ALPHABET_SIZE];
     bool isEndOfWord;                       // represent end of word
};



struct TrieNode *getNode(void)
{
    struct TrieNode *pNode =  new TrieNode;
 
    pNode->isEndOfWord = false;
 
    for (int i = 0; i < ALPHABET_SIZE; i++)
        pNode->children[i] = NULL;
 
    return pNode;
}

Operations(Templates):-

Insert:

void insert(struct TrieNode *root, string key)
{
    struct TrieNode *pCrawl = root;
 
    for (int i = 0; i < key.length(); i++)
    {
        int index = key[i] - 'a';
        if (!pCrawl->children[index])
            pCrawl->children[index] = getNode();
 
        pCrawl = pCrawl->children[index];
    }
 
    // mark last node as leaf
    pCrawl->isEndOfWord = true;
}


Search:

bool search(struct TrieNode *root, string key)
{
    struct TrieNode *pCrawl = root;
 
    for (int i = 0; i < key.length(); i++)
    {
        int index = key[i] - 'a';
        if (!pCrawl->children[index])
            return false;
 
        pCrawl = pCrawl->children[index];
    }
 
    return (pCrawl != NULL && pCrawl->isEndOfWord);
}



 Doubts ?

  Reference Links:


 Lets Jump to some questions :
 Question 1: Link To Question

  If auto - completions exists, output all of them in lexicographical order(lowercase) else output "No  suggestions" without quotes.

  Input                                      Output

  3 fact
fact factorial
factorial factory
factory No suggestions
2
fact
pradyumn 

Solution:


#include <bits/stdc++.h>
using namespace std;

const int ALPHABET_SIZE = 26;

struct TrieNode
{
     struct TrieNode *children[ALPHABET_SIZE];
     bool isEndOfWord;                       // represent end of word
};
 
struct TrieNode *getNode(void)
{
    struct TrieNode *pNode =  new TrieNode;
    pNode->isEndOfWord = false;
    for (int i = 0; i < ALPHABET_SIZE; i++)
        pNode->children[i] = NULL;
    return pNode;
}
 
void insert(struct TrieNode *root, string key)
{
    struct TrieNode *pCrawl = root;
 
    for (int i = 0; i < key.length(); i++)
    {
        int index = key[i] - 'a';
        if (!pCrawl->children[index])
            pCrawl->children[index] = getNode();
 
        pCrawl = pCrawl->children[index];
    }
 
    // mark last node as leaf
    pCrawl->isEndOfWord = true;
}
 
struct TrieNode *search(struct TrieNode *root, string key)
{
    struct TrieNode *pCrawl = root;
    for (int i = 0; i < key.length(); i++)
    {
        int index = key[i] - 'a';
        if (!pCrawl->children[index])
            return NULL;
        pCrawl = pCrawl->children[index];
    }
    return pCrawl;
}

void dfs(struct TrieNode *curNode, string curString, vector<string> &res)
{
 for(int letter = 0; letter < ALPHABET_SIZE; letter++)
 {
  if(curNode->children[letter] != NULL)
  {
   string tmp = curString;
   tmp.push_back(letter + 'a');
   dfs(curNode->children[letter], tmp, res); 
  }
 }
 
 if(curNode->isEndOfWord)
  res.push_back(curString);
}
 
int main()
{
    ios::sync_with_stdio(false);
    cin.tie(0);
    
 struct TrieNode *root= getNode();
 int n,q;
 cin>>n;
 while(n--)
 {
  string s;
  cin>>s;
  transform(s.begin(), s.end(), s.begin(), ::tolower);
  insert(root,s);
 }
 
 cin>>q;
 while(q--)
 {
  string s;
  cin>>s; 
  struct TrieNode *p = search(root,s);
  if(p==NULL)
  {
   cout <<"No suggestions\n"; 
   insert(root,s);
  }
  else
  {
   vector<string> res;
   dfs(p, s, res);
   sort(res.begin(), res.end());
   for(int i = 0, L = res.size(); i < L; i++)
    cout<<res[i]<<endl;
  }
 }
 
 return 0;
}


Practice Problems:














Comments

Popular posts from this blog

Lazy Propagation - Too lazy to update all at a time

Combinatorial Game Theory

Bit Manipulation Problems