Functions that use a pretrained FlyVec model to create sparse binary representations
Simply run FlyVec.load()
) to download the existing model and use as desired
model = FlyVec.load()
hsh = model.get_sparse_embedding("hello"); hsh
hsh['embedding']
is non-zero for the top hash_length
most activated neurons in our model
If you provide multiple words in the input string, FlyVec
will provided the word vector for the first word
hsh2 = model.get_sparse_embedding("hello world");
assert np.all(hsh2['embedding'] == hsh['embedding'])
_f = lambda x: model.get_sparse_embedding(x)
test_eq(_f("hello")['embedding'], model.get_sparse_embedding("hello", 50)['embedding'])
test_eq(_f("hello")['token'], "hello")
assert np.all(_f("BOXNAFS")['embedding'] == 0), "Expected unknown embedding to be all zero"
test_eq(_f("HELLO")['embedding'], _f("hello")['embedding'])
test_eq(_f("not a single token")['embedding'], _f("not")['embedding'])
test_fail(lambda: _f(""), contains="empty string")
test_eq(_f("NotARealWord")['embedding'], _f("<UNK>")['embedding'])