# Python program to generate word vectors using Word2Vec # importing all necessary modules from nltk.tokenize import sent_tokenize, word_tokenize import warnings warnings.filterwarnings(action='ignore') import gensim import re sample = open("/Users/neha/AdvancePP/corpus.txt", "r") s = sample.read() f = re.sub(r"[^a-zA-Z0-9' '.]+", "", s) data = [] # iterate through each sentence in the file for i in sent_tokenize(f): temp = [] # tokenize the sentence into words for j in word_tokenize(i): temp.append(j.lower()) data.append(temp) # Create CBOW model model1 = gensim.models.Word2Vec(data, min_count=1, size=100, window=5) print(model1['depression']) print("Cosine similarity between 'depression' " + "and 'geology' - CBOW : ", model1.similarity('depression', 'great')) #create skip gram model model2 = gensim.models.Word2Vec(data, min_count = 1, size = 100, window = 5, sg = 1) print("Cosine similarity between 'depression' " + "and 'economics' - Skip Gram : ", model2.similarity('depression', 'great'))