import asyncio
import torch
import os
import pandas as pd
from flair.data import Sentence
from flair.embeddings import FlairEmbeddings, DocumentPoolEmbeddings, WordEmbeddings
device = torch.device("cpu")
print(device)
# first, declare how you want to embed
embeddings = DocumentPoolEmbeddings(
[WordEmbeddings('glove'), FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward')])
path = os.getcwd()
df=pd.read_pickle(path+'/embedding_all_courses_2.pkl')
query_emd=[]
cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
query= Sentence("some text")
embeddings.embed([query])
query_emd.append(query.embedding)
async def count(index,row):
for i in query_emd:
print(words,row['course_name'],cos(i, row['embedding']))
print(index)
async def main():
await asyncio.gather(*(count(index,row) for index,row in df.iterrows()))
if __name__ == "__main__":
import time
s = time.perf_counter()
asyncio.run(main())
elapsed = time.perf_counter() - s
print(f"{__file__} executed in {elapsed:0.2f} seconds.")
Trying to run pytorch cosine similarity in asyncio package to get the parallel result. Using a flair model for embedding text. Need to compare a text with a huge dataframe and get the most similar text as a result and response should be pretty fast. Can you please suggest an alternative way also? I also need to run this code on the CPU memory only not on the GPU Cuda system
Error:
raceback (most recent call last):
File "asyn_emd.py", line 74, in <module>
asyncio.run(main())
File "/home/linuxbrew/.linuxbrew/Cellar/python/3.7.5/lib/python3.7/asyncio/runners.py", line 43, in run
return loop.run_until_complete(main)
File "/home/linuxbrew/.linuxbrew/Cellar/python/3.7.5/lib/python3.7/asyncio/base_events.py", line 579, in run_until_complete
return future.result()
File "/asyn_emd.py", line 68, in main
await asyncio.gather(*(count(index,row) for index,row in df.iterrows()))
File "/asyn_emd.py", line 61, in count
print(words,row['course_name'],cos(i, row['embedding']))
File "/home/karthickaravindan/.virtualenvs/test/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/karthickaravindan/.virtualenvs/test/lib/python3.7/site-packages/torch/nn/modules/distance.py", line 75, in forward
return F.cosine_similarity(x1, x2, self.dim, self.eps)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!