Courses‎ > ‎Python worksheets‎ > ‎

Demo: testing the performance of word embeddings


from gensim.models import Word2Vec
from nltk.corpus import brown

# let's get spaces.
# first, a handmade one, tiny.
space_romance = Word2Vec(brown.sents(categories = "romance"), iter=10, min_count=10,
                                size=300, sg = 1).wv

# and a precomputed one
# (the only word2vec in English is too large to download right now,
# so I'm using GloVE)
import gensim.downloader as gensim_api
space = gensim_api.load("glove-wiki-gigaword-300")

# please download wordsim353 from Canvas
# (The official link to Wordsim353 seems to have gone away)
# then we can test correlation between
# human ratings and similarity predictions of each of our spaces
# very comfortably with gensim:
pearson, spearman, oov_ratio = space_romance.evaluate_word_pairs("wordsim353.txt")
pearson_r, pearson_p = pearson
spearman_rho, spearman_p = spearman
print("Evaluating our tiny romance space against WordSim353")
print("Pearson r:", pearson_r, "with p-value", pearson_p)
print("Spearman's rho:", spearman_rho, "with p-value", spearman_p)
print("Ratio of out-of-vocabulary words is gigantic:", oov_ratio)

pearson, spearman, oov_ratio = space_large.evaluate_word_pairs("wordsim353.txt")
pearson_r, pearson_p = pearson
spearman_rho, spearman_p = spearman
print("Evaluating the large space against WordSim353")
print("Pearson r:", pearson_r, "with p-value", pearson_p)
print("Spearman's rho:", spearman_rho, "with p-value", spearman_p)
print("Ratio of out-of-vocabulary words is now much smaller:", oov_ratio)

#########3
# or doing the same by hand
wordsim353 = [ ]
with open("wordsim353.txt") as f:
    # discard legend
    f.readline()
    # and read all other lines
    for line in f:
        word1, word2, rating_string = line.split()
        wordsim353.append(( word1, word2, float(rating_string)))
       
# pulling similarity ratings from the model:
# if a word is missing, we want to just return a similarity of zero
def sim_or_zero(word1, word2, model):
    if word1 in model and word2 in model:
        return model.similarity(word1, word2)
    else:
        return 0.0

# making predictions for the wordsim353 data
predictions = [ sim_or_zero(w1, w2, space_large) for w1, w2, rating in wordsim353 ]
# here are the gold values
gold = [ rating for w1, w2, rating in wordsim353]


# computing correlation:
import scipy
print("Performance of the large space on predicting wordsim353 similarities:")
print("Pearson:", scipy.stats.pearsonr(gold, predictions))
print("Spearman:", scipy.stats.spearmanr(gold, predictions))




Comments