Spaces:
Running
Running
| import pandas as pd | |
| import gradio as gr | |
| def compare_csv_files(selected_languages): | |
| max_num = 10 | |
| # Load data | |
| df1 = pd.read_csv("result_1.5.csv") | |
| df2 = pd.read_csv("result_1.4.csv") | |
| # Merge with Language column | |
| merged_df = pd.merge(df1, df2, on=["SourceText", "Language"], suffixes=("_1.5", "_1.4")) | |
| # Filter by selected languages | |
| if selected_languages: | |
| merged_df = merged_df[merged_df["Language"].isin(selected_languages)] | |
| # Calculate differences | |
| merged_df["WordErrorRate_Diff"] = merged_df["WordErrorRate_1.5"] - merged_df["WordErrorRate_1.4"] | |
| merged_df["CharacterErrorRate_Diff"] = merged_df["CharacterErrorRate_1.5"] - merged_df["CharacterErrorRate_1.4"] | |
| # Add comparison columns | |
| merged_df["WordErrorRate_Comparison"] = merged_df["WordErrorRate_Diff"].apply( | |
| lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else ( | |
| f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else ( | |
| f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)" | |
| ) | |
| ) | |
| ) | |
| merged_df["CharacterErrorRate_Comparison"] = merged_df["CharacterErrorRate_Diff"].apply( | |
| lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else ( | |
| f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else ( | |
| f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)" | |
| ) | |
| ) | |
| ) | |
| # Overall averages | |
| avg_word_diff = merged_df["WordErrorRate_Diff"].loc[merged_df["WordErrorRate_Diff"].abs() <= max_num].mean() | |
| avg_char_diff = merged_df["CharacterErrorRate_Diff"].loc[merged_df["CharacterErrorRate_Diff"].abs() <= 1].mean() | |
| overall_summary = f""" | |
| <h3>Overall Comparison:</h3> | |
| <p>Average WordErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_word_diff:.8f})' if avg_word_diff < 0 else f'1.4 is stronger ({0 - avg_word_diff:.8f})'}</p> | |
| <p>Average CharacterErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_char_diff:.8f})' if avg_char_diff < 0 else f'1.4 is stronger ({0 - avg_char_diff:.8f})'}</p> | |
| """ | |
| # Generate result HTML | |
| result_html = overall_summary + merged_df[[ | |
| "Language", | |
| "SourceText", | |
| "WordErrorRate_1.5", "WordErrorRate_1.4", "WordErrorRate_Comparison", | |
| "CharacterErrorRate_1.5", "CharacterErrorRate_1.4", "CharacterErrorRate_Comparison", | |
| ]].to_html(escape=False, index=False) | |
| return result_html | |
| # Load unique languages from the data | |
| df1 = pd.read_csv("result_1.5.csv") | |
| df2 = pd.read_csv("result_1.4.csv") | |
| languages = sorted(set(df1["Language"]).union(set(df2["Language"]))) | |
| gr.Interface( | |
| fn=compare_csv_files, | |
| inputs=gr.CheckboxGroup(choices=languages, label="Select Languages to Compare"), | |
| outputs="html", | |
| title="Fish Speech Benchmark", | |
| description="Select specific languages to compare the results of WordErrorRate and CharacterErrorRate." | |
| ).launch() | |