Update app.py
Browse files
app.py
CHANGED
@@ -22,38 +22,15 @@ def transcribe_long_form(filepath):
|
|
22 |
return output["text"]
|
23 |
|
24 |
ner = pipeline("ner",
|
25 |
-
|
26 |
-
#tokenizer="mrm8488/bert-spanish-cased-finetuned-ner",
|
27 |
-
#aggregation_strategy="simple" # Esto combina las etiquetas en entidades completas
|
28 |
-
#max_length=512
|
29 |
)
|
30 |
|
31 |
-
def merge_tokens(tokens):
|
32 |
-
merged_tokens = []
|
33 |
-
for token in tokens:
|
34 |
-
if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]):
|
35 |
-
# If current token continues the entity of the last one, merge them
|
36 |
-
last_token = merged_tokens[-1]
|
37 |
-
last_token['word'] += token['word'].replace('##', '')
|
38 |
-
last_token['end'] = token['end']
|
39 |
-
last_token['score'] = (last_token['score'] + token['score']) / 2
|
40 |
-
else:
|
41 |
-
# Otherwise, add the token to the list
|
42 |
-
merged_tokens.append(token)
|
43 |
-
|
44 |
-
return merged_tokens
|
45 |
-
|
46 |
def get_ner(input_text):
|
47 |
if input_text is None:
|
48 |
gr.Warning("No transcription found, please retry.")
|
49 |
return {"text": "", "entities": ""}
|
50 |
-
print(input_text)
|
51 |
-
#output = get_ner(input_text, max_length=128)
|
52 |
output = ner(input_text)
|
53 |
-
|
54 |
-
merged_tokens = merge_tokens(output)
|
55 |
-
print(merged_tokens)
|
56 |
-
return {"text": input_text, "entities": merged_tokens}
|
57 |
|
58 |
def main(filepath):
|
59 |
transcription = transcribe_long_form(filepath)
|
|
|
22 |
return output["text"]
|
23 |
|
24 |
ner = pipeline("ner",
|
25 |
+
model="mrm8488/bert-spanish-cased-finetuned-ner",
|
|
|
|
|
|
|
26 |
)
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def get_ner(input_text):
|
29 |
if input_text is None:
|
30 |
gr.Warning("No transcription found, please retry.")
|
31 |
return {"text": "", "entities": ""}
|
|
|
|
|
32 |
output = ner(input_text)
|
33 |
+
return {"text": input_text, "entities": output}
|
|
|
|
|
|
|
34 |
|
35 |
def main(filepath):
|
36 |
transcription = transcribe_long_form(filepath)
|