@article{1696, author = {Peng Zhu}, title = {N-Grams Based Linguistic Search Engine}, journal = {International Journal of Computational Linguistics Research }, year = {2015}, volume = {6}, number = {1}, doi = {}, url = {}, abstract = {With the rapid development of the Internet, the advantages of Web text in scalability and diversity play an important role in English research and teaching. This paper presents a N-Grams based English search engine for words in context, which incorporate information retrieval, part-of-speech, named entity recognition, word similarity and other natural language processing technology with web based text data. The engine retrieves N-Grams in response to a given query, which might contain keywords, wildcards, wild parts of speech (PoS), name entity information, synonyms, and other regular expression operators. On the side of word similarity measures, this paper proposes a method of automatically extracting similar words in corpus, which use a deep learning based tool, Word2Vec. We can train word representations to measure the words similarity and test on WBST (WordNet-based Synonymy Test); our result was comparable with the result in Freitag et al. (2005) [2]. Then, we can automatically extract the similar words and embedded them in our search engine.}, }