Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -94,12 +94,12 @@ class NEI:
|
|
| 94 |
# plt.colorbar()
|
| 95 |
# plt.savefig('Confusion_Matrix.png')
|
| 96 |
|
| 97 |
-
def vectorize(self, w, scaled_position, prev_tag=0, next_tag=0
|
| 98 |
is_titlecase = 1 if w[0].isupper() else 0
|
| 99 |
is_allcaps = 1 if w.isupper() else 0
|
| 100 |
is_sw = 1 if w.lower() in SW else 0
|
| 101 |
is_punct = 1 if w in PUNCT else 0
|
| 102 |
-
is_surrounded_by_entities = 1 if (prev_tag > 0 and next_tag > 0) else 0
|
| 103 |
is_connector = 1 if (w.lower() in connectors) and (prev_tag > 0 and next_tag > 0) else 0
|
| 104 |
# is_start_of_sentence = 1 if (scaled_position == 0 or prev_token in [".", "!", "?"]) and w.lower() not in start_words else 0
|
| 105 |
# is_start_of_sentence = 1 if scaled_position == 0 else 0
|
|
@@ -113,7 +113,7 @@ class NEI:
|
|
| 113 |
for i, token in enumerate(tokens):
|
| 114 |
prev_tag = tags[i - 1] if i > 0 else 0
|
| 115 |
next_tag = tags[i + 1] if i < len(tokens) - 1 else 0
|
| 116 |
-
x = self.vectorize(token, scaled_position=(i / len(tokens)), prev_tag=prev_tag, next_tag=next_tag
|
| 117 |
y = 1 if tags[i] > 0 else 0
|
| 118 |
features.append(x)
|
| 119 |
labels.append(y)
|
|
@@ -140,7 +140,7 @@ class NEI:
|
|
| 140 |
tokens = word_tokenize(sentence)
|
| 141 |
features = []
|
| 142 |
|
| 143 |
-
raw_features = [self.vectorize(token, i / len(tokens)
|
| 144 |
raw_features = np.array(raw_features, dtype=np.float32)
|
| 145 |
scaled_features = self.scaler.transform(raw_features)
|
| 146 |
y_pred = self.model.predict(scaled_features)
|
|
@@ -149,7 +149,7 @@ class NEI:
|
|
| 149 |
prev_tag = y_pred[i - 1] if i > 0 else 0
|
| 150 |
next_tag = y_pred[i + 1] if i < len(tokens) - 1 else 0
|
| 151 |
|
| 152 |
-
feature_with_context = self.vectorize(token, i / len(tokens), prev_tag, next_tag
|
| 153 |
features.append(feature_with_context)
|
| 154 |
|
| 155 |
features = np.array(features, dtype=np.float32)
|
|
|
|
| 94 |
# plt.colorbar()
|
| 95 |
# plt.savefig('Confusion_Matrix.png')
|
| 96 |
|
| 97 |
+
def vectorize(self, w, scaled_position, prev_tag=0, next_tag=0):
|
| 98 |
is_titlecase = 1 if w[0].isupper() else 0
|
| 99 |
is_allcaps = 1 if w.isupper() else 0
|
| 100 |
is_sw = 1 if w.lower() in SW else 0
|
| 101 |
is_punct = 1 if w in PUNCT else 0
|
| 102 |
+
# is_surrounded_by_entities = 1 if (prev_tag > 0 and next_tag > 0) else 0
|
| 103 |
is_connector = 1 if (w.lower() in connectors) and (prev_tag > 0 and next_tag > 0) else 0
|
| 104 |
# is_start_of_sentence = 1 if (scaled_position == 0 or prev_token in [".", "!", "?"]) and w.lower() not in start_words else 0
|
| 105 |
# is_start_of_sentence = 1 if scaled_position == 0 else 0
|
|
|
|
| 113 |
for i, token in enumerate(tokens):
|
| 114 |
prev_tag = tags[i - 1] if i > 0 else 0
|
| 115 |
next_tag = tags[i + 1] if i < len(tokens) - 1 else 0
|
| 116 |
+
x = self.vectorize(token, scaled_position=(i / len(tokens)), prev_tag=prev_tag, next_tag=next_tag)
|
| 117 |
y = 1 if tags[i] > 0 else 0
|
| 118 |
features.append(x)
|
| 119 |
labels.append(y)
|
|
|
|
| 140 |
tokens = word_tokenize(sentence)
|
| 141 |
features = []
|
| 142 |
|
| 143 |
+
raw_features = [self.vectorize(token, i / len(tokens)) for i, token in enumerate(tokens)]
|
| 144 |
raw_features = np.array(raw_features, dtype=np.float32)
|
| 145 |
scaled_features = self.scaler.transform(raw_features)
|
| 146 |
y_pred = self.model.predict(scaled_features)
|
|
|
|
| 149 |
prev_tag = y_pred[i - 1] if i > 0 else 0
|
| 150 |
next_tag = y_pred[i + 1] if i < len(tokens) - 1 else 0
|
| 151 |
|
| 152 |
+
feature_with_context = self.vectorize(token, i / len(tokens), prev_tag, next_tag)
|
| 153 |
features.append(feature_with_context)
|
| 154 |
|
| 155 |
features = np.array(features, dtype=np.float32)
|