Spaces:
Running
Running
Simplified highlighting
Browse files
app.py
CHANGED
|
@@ -1,70 +1,52 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from jiwer import wer,
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
# Function to highlight errors
|
| 5 |
def highlight_errors(ground_truth, hypothesis):
|
| 6 |
-
measures = compute_measures(ground_truth, hypothesis)
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
# Split the ground truth and hypothesis into words
|
| 11 |
-
gt_words = ground_truth.split()
|
| 12 |
-
hyp_words = hypothesis.split()
|
| 13 |
|
| 14 |
-
|
| 15 |
-
hyp_index = 0
|
| 16 |
|
| 17 |
# Process each alignment operation in measures
|
| 18 |
-
for alignment in
|
| 19 |
for chunk in alignment:
|
| 20 |
if chunk.type == 'equal':
|
| 21 |
# Add equal words without highlighting
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
hyp_index = chunk.hyp_end_idx
|
| 25 |
elif chunk.type == 'insert':
|
| 26 |
# Highlight inserted words in green
|
| 27 |
-
|
| 28 |
-
|
| 29 |
elif chunk.type == 'substitute':
|
| 30 |
-
# Highlight substitutions
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
hyp_index += 1
|
| 35 |
elif chunk.type == 'delete':
|
| 36 |
# Highlight deleted words in red with strikethrough
|
| 37 |
-
|
| 38 |
-
gt_index += 1
|
| 39 |
-
|
| 40 |
-
# Handle any remaining words in hypothesis as insertions
|
| 41 |
-
while hyp_index < len(hyp_words):
|
| 42 |
-
highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
|
| 43 |
-
hyp_index += 1
|
| 44 |
|
| 45 |
-
|
| 46 |
-
while gt_index < len(gt_words):
|
| 47 |
-
highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
|
| 48 |
-
gt_index += 1
|
| 49 |
-
|
| 50 |
-
highlighted_hyp_str = ' '.join(highlighted_hyp)
|
| 51 |
|
| 52 |
-
error_rate = wer(ground_truth, hypothesis)
|
| 53 |
-
|
| 54 |
# Color Legend HTML
|
| 55 |
legend_html = """
|
| 56 |
<div style="margin-top: 10px;">
|
| 57 |
-
<strong>Legend
|
| 58 |
-
<span style="color:green;">Insertion</span>:
|
| 59 |
-
<span style="color:purple;">Substitution</span>:
|
| 60 |
-
<span style="color:red; text-decoration:line-through;">Deletion</span>:
|
| 61 |
</div>
|
| 62 |
"""
|
| 63 |
|
| 64 |
# Combine highlighted output and legend
|
| 65 |
-
combined_output = f"{
|
| 66 |
|
| 67 |
-
return combined_output,
|
| 68 |
|
| 69 |
# Gradio Interface
|
| 70 |
interface = gr.Interface(
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from jiwer import wer, process_words
|
| 3 |
+
|
| 4 |
+
def make_string(words):
|
| 5 |
+
return " ".join(words)
|
| 6 |
|
| 7 |
# Function to highlight errors
|
| 8 |
def highlight_errors(ground_truth, hypothesis):
|
|
|
|
| 9 |
|
| 10 |
+
highlighted_text = []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
processed = process_words(ground_truth, hypothesis)
|
|
|
|
| 13 |
|
| 14 |
# Process each alignment operation in measures
|
| 15 |
+
for alignment, ref, hyp in zip(processed.alignments, processed.references, processed.hypotheses):
|
| 16 |
for chunk in alignment:
|
| 17 |
if chunk.type == 'equal':
|
| 18 |
# Add equal words without highlighting
|
| 19 |
+
highlighted_text.extend(ref[chunk.ref_start_idx:chunk.ref_end_idx])
|
| 20 |
+
|
|
|
|
| 21 |
elif chunk.type == 'insert':
|
| 22 |
# Highlight inserted words in green
|
| 23 |
+
highlighted_text.append(f'<span style="color:green;">{make_string(hyp[chunk.hyp_start_idx:chunk.hyp_end_idx])}</span>')
|
| 24 |
+
|
| 25 |
elif chunk.type == 'substitute':
|
| 26 |
+
# Highlight substitutions in purple: ground truth is striked through
|
| 27 |
+
highlighted_text.append(f'<span style="color:purple;">{make_string(hyp[chunk.hyp_start_idx:chunk.hyp_end_idx])}</span>') # Hypothesis word
|
| 28 |
+
highlighted_text.append(f'<span style="color:purple; text-decoration:line-through;">{make_string(ref[chunk.ref_start_idx:chunk.ref_end_idx])}</span>') # Ground truth word
|
| 29 |
+
|
|
|
|
| 30 |
elif chunk.type == 'delete':
|
| 31 |
# Highlight deleted words in red with strikethrough
|
| 32 |
+
highlighted_text.append(f'<span style="color:red; text-decoration:line-through;">{make_string(ref[chunk.ref_start_idx:chunk.ref_end_idx])}</span>')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
+
highlighted_text_str = ' '.join(highlighted_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
|
|
|
|
|
|
| 36 |
# Color Legend HTML
|
| 37 |
legend_html = """
|
| 38 |
<div style="margin-top: 10px;">
|
| 39 |
+
<strong>Legend</strong><br>
|
| 40 |
+
<span style="color:green;">Insertion</span>: Green<br>
|
| 41 |
+
<span style="color:purple;">Substitution</span>: Purple<br>
|
| 42 |
+
<span style="color:red; text-decoration:line-through;">Deletion</span>: Red<br>
|
| 43 |
</div>
|
| 44 |
"""
|
| 45 |
|
| 46 |
# Combine highlighted output and legend
|
| 47 |
+
combined_output = f"{legend_html}<br>{highlighted_text_str}"
|
| 48 |
|
| 49 |
+
return combined_output, processed.wer, processed.substitutions, processed.insertions, processed.deletions
|
| 50 |
|
| 51 |
# Gradio Interface
|
| 52 |
interface = gr.Interface(
|