Spaces:
Runtime error
Runtime error
File size: 1,281 Bytes
dcfa2ec 92e1aef dcfa2ec 92e1aef dcfa2ec 92e1aef dcfa2ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import numpy as np
import pandas as pd
import nltk
import re
import torch
import networkx as nx
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')
nltk.download('punkt')
model = SentenceTransformer('all-mpnet-base-v2')
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
def get_summary(text, num_words: int=1000):
sentences = nltk.sent_tokenize(text)
embeddings = model.encode(sentences, show_progress_bar=False)
try:
sim_matrix = cosine_similarity(embeddings)
except Exception as e:
print(e, type(e))
print(embeddings.shape)
nx_graph = nx.from_numpy_array(sim_matrix)
scores = nx.pagerank(nx_graph)
ranked_sentences = sorted(((scores[i],s, i) for i,s in enumerate(sentences)), reverse=True)
final_sents = []
total_length = 0
for score, sents, i in ranked_sentences:
total_length += len(sents.split())
if total_length < num_words:
final_sents.append((score, sents, i))
else:
break
top_k_sents = sorted(final_sents, key=lambda x: x[2])
sents = " ".join([s[1] for s in top_k_sents])
return sents |