Mya-Mya commited on
Commit
8066662
·
1 Parent(s): fd4ac87

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend import Backend
2
+ import frontend
3
+ import numpy as np
4
+ from pandas import DataFrame
5
+ from transformers import BertJapaneseTokenizer, BertModel
6
+ import pickle
7
+
8
+ with open("./himitsudogu_db.pkl", "rb") as file:
9
+ himitsudogu_db: dict = pickle.load(file)
10
+
11
+
12
+ class HFBackend(Backend):
13
+ def __init__(self):
14
+ super().__init__()
15
+ self.feature_matrix = himitsudogu_db["feature_matrix_s"][
16
+ "sonoisa/sentence-bert-base-ja-mean-tokens-v2"
17
+ ]
18
+ # モデルsonoisa/sentence-bert-base-ja-mean-tokens-v2を使用する
19
+ self.tokenizer = BertJapaneseTokenizer.from_pretrained(
20
+ "sonoisa/sentence-bert-base-ja-mean-tokens-v2"
21
+ )
22
+ self.model = BertModel.from_pretrained(
23
+ "sonoisa/sentence-bert-base-ja-mean-tokens-v2"
24
+ )
25
+
26
+ def on_submit_button_press(self, query: str) -> DataFrame:
27
+ # 文章を形態素解析し、形態素ID列へ変換
28
+ tokenized = self.tokenizer(query, return_tensors="pt")
29
+ # 言語モデルへ形態素ID列を代入
30
+ output = self.model(**tokenized)
31
+ # 文章の特徴ベクトルを取得
32
+ pooler_output = output["pooler_output"]
33
+ query_feature_vector = pooler_output[0].detach().numpy()
34
+ # 各ひみつ道具の説明文の特徴ベクトルとの内積を取る
35
+ cs_s = self.feature_matrix @ query_feature_vector
36
+ # 内積が大きかったもの順にひみつ道具を表示するようにする
37
+ ranked_index_s = np.argsort(cs_s)[::-1]
38
+ output = DataFrame(columns=["類似度", "名前", "説明"])
39
+ for rank, i in enumerate(ranked_index_s[:20], 1):
40
+ output.loc[rank] = [
41
+ cs_s[i],
42
+ himitsudogu_db["name_s"][i],
43
+ himitsudogu_db["description_s"][i],
44
+ ]
45
+ return output
46
+
47
+
48
+ frontend.launch_frontend(backend=HFBackend())