您的位置:首页 > 其它

Trie树统计词频和指定前缀的单词个数

2015-08-30 13:51 489 查看
//trie.h
#ifndef __TRIE_H__
#define __TRIE_H__
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

namespace alg {
	const int NUMWORD = 26;
	class Trie {
		private:
			class node {
				public:
					int words;
					int prefixes;
					node *edges[NUMWORD];
					node():words(0),prefixes(0) {
						memset(edges, 0, sizeof(edges));
					}
					~node() {
						for (int i=0;i<NUMWORD;i++) {
							if (edges[i] != NULL) {
								delete edges[i];
							}
						}
					}
			};
			node *m_root;
		public:
			Trie() {
				m_root = new node;
			}

			~Trie() {
				delete m_root;
				m_root=NULL;
			}

			void Add(char * str) {
				_lowercase(str);
				_add(m_root, str);
			}

			int Count(const char *str) {
				char * _str = strdup(str);
				_lowercase(_str);

				int cnt = _count(m_root, _str);
				free(_str);
				return cnt;
			}

			int CountPrefix(const char *prefix) {
				char * _str = strdup(prefix);
				_lowercase(_str);

				int cnt = _count_prefix(m_root, _str);
				free(_str);
				return cnt;
			}

		private:
			void _lowercase(char *str) {
				int i;
				for (i=0;str[i];i++) {
					str[i] = tolower(str[i]);
				}
			}

			void _add(node *n, const char * str) {
				if (str[0] == '\0') {
					n->words++;
				} else {
					n->prefixes++;
					int index=str[0]-'a';
					if (n->edges[index]==NULL) {
						n->edges[index] = new node;
					}

					_add(n->edges[index], ++str);
				} 
			}

			int _count(node *n, const char * str) {
				if (str[0] == '\0') {
					return n->words;
				} else {
					int index=str[0]-'a';
					if (n->edges[index]==NULL) {
						return 0;
					}

					return _count(n->edges[index], ++str);
				} 
			}

			int _count_prefix(node *n, char * str) {
				if (str[0] == '\0') {
					return n->prefixes;
				} else {
					int index=str[0]-'a';
					if (n->edges[index]==NULL) {
						return 0;
					}

					return _count_prefix(n->edges[index], ++str);
				} 
			}
	};
}

#endif //

#include "trie.h"

int main(void) {
    alg::Trie trie;    
    const char *strs[] = {"sap", "sat", "sad", "rat", "ram", "rag", "rap", "sat", "ram","rag", "nap", "Nat", "lap"};

    for (uint32_t i=0;i<sizeof(strs)/sizeof(char*);i++) {
        char * str = strdup(strs[i]);
        trie.Add(str);
        free(str);
    }

    printf("count of :%s %d\n", "sat", trie.Count("sat"));
    printf("count of prefix :%s %d\n", "ra", trie.CountPrefix("ra"));
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: