Demo: testing the performance of word embeddings

from gensim.models import Word2Vec

from nltk.corpus import brown

# let's get spaces.

# first, a handmade one, tiny.

space_romance = Word2Vec(brown.sents(categories = "romance"), iter=10, min_count=10,

size=300, sg = 1).wv

# and a precomputed one

# (the only word2vec in English is too large to download right now,

# so I'm using GloVE)

import gensim.downloader as gensim_api

space = gensim_api.load("glove-wiki-gigaword-300")

# please download wordsim353 from Canvas

# (The official link to Wordsim353 seems to have gone away)

# then we can test correlation between

# human ratings and similarity predictions of each of our spaces

# very comfortably with gensim:

pearson, spearman, oov_ratio = space_romance.evaluate_word_pairs("wordsim353.txt")

pearson_r, pearson_p = pearson

spearman_rho, spearman_p = spearman

print("Evaluating our tiny romance space against WordSim353")

print("Pearson r:", pearson_r, "with p-value", pearson_p)

print("Spearman's rho:", spearman_rho, "with p-value", spearman_p)

print("Ratio of out-of-vocabulary words is gigantic:", oov_ratio)

pearson, spearman, oov_ratio = space_large.evaluate_word_pairs("wordsim353.txt")

pearson_r, pearson_p = pearson

spearman_rho, spearman_p = spearman

print("Evaluating the large space against WordSim353")

print("Pearson r:", pearson_r, "with p-value", pearson_p)

print("Spearman's rho:", spearman_rho, "with p-value", spearman_p)

print("Ratio of out-of-vocabulary words is now much smaller:", oov_ratio)

#########3

# or doing the same by hand

wordsim353 = [ ]

with open("wordsim353.txt") as f:

# discard legend

f.readline()

# and read all other lines

for line in f:

word1, word2, rating_string = line.split()

wordsim353.append(( word1, word2, float(rating_string)))

# pulling similarity ratings from the model:

# if a word is missing, we want to just return a similarity of zero

def sim_or_zero(word1, word2, model):

if word1 in model and word2 in model:

return model.similarity(word1, word2)

else:

return 0.0

# making predictions for the wordsim353 data

predictions = [ sim_or_zero(w1, w2, space_large) for w1, w2, rating in wordsim353 ]

# here are the gold values

gold = [ rating for w1, w2, rating in wordsim353]

# computing correlation:

import scipy

print("Performance of the large space on predicting wordsim353 similarities:")

print("Pearson:", scipy.stats.pearsonr(gold, predictions))

print("Spearman:", scipy.stats.spearmanr(gold, predictions))