fcernafukuzaki commited on
Commit
9ef0fe3
·
verified ·
1 Parent(s): 6022473

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -25
app.py CHANGED
@@ -22,38 +22,15 @@ def transcribe_long_form(filepath):
22
  return output["text"]
23
 
24
  ner = pipeline("ner",
25
- model="mrm8488/bert-spanish-cased-finetuned-ner", # Modelo preentrenado para NER en español
26
- #tokenizer="mrm8488/bert-spanish-cased-finetuned-ner",
27
- #aggregation_strategy="simple" # Esto combina las etiquetas en entidades completas
28
- #max_length=512
29
  )
30
 
31
- def merge_tokens(tokens):
32
- merged_tokens = []
33
- for token in tokens:
34
- if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]):
35
- # If current token continues the entity of the last one, merge them
36
- last_token = merged_tokens[-1]
37
- last_token['word'] += token['word'].replace('##', '')
38
- last_token['end'] = token['end']
39
- last_token['score'] = (last_token['score'] + token['score']) / 2
40
- else:
41
- # Otherwise, add the token to the list
42
- merged_tokens.append(token)
43
-
44
- return merged_tokens
45
-
46
  def get_ner(input_text):
47
  if input_text is None:
48
  gr.Warning("No transcription found, please retry.")
49
  return {"text": "", "entities": ""}
50
- print(input_text)
51
- #output = get_ner(input_text, max_length=128)
52
  output = ner(input_text)
53
- print(output)
54
- merged_tokens = merge_tokens(output)
55
- print(merged_tokens)
56
- return {"text": input_text, "entities": merged_tokens}
57
 
58
  def main(filepath):
59
  transcription = transcribe_long_form(filepath)
 
22
  return output["text"]
23
 
24
  ner = pipeline("ner",
25
+ model="mrm8488/bert-spanish-cased-finetuned-ner",
 
 
 
26
  )
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def get_ner(input_text):
29
  if input_text is None:
30
  gr.Warning("No transcription found, please retry.")
31
  return {"text": "", "entities": ""}
 
 
32
  output = ner(input_text)
33
+ return {"text": input_text, "entities": output}
 
 
 
34
 
35
  def main(filepath):
36
  transcription = transcribe_long_form(filepath)