Upload 4 files
Browse files
app.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
import gradio as gr
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
# Import necessary components from PyABSA for dataset listing
|
6 |
+
from pyabsa import AspectTermExtraction as ATEPC
|
7 |
+
|
8 |
+
# Import the initialized ATEPC model and utility functions from our local files
|
9 |
+
from src.models import aspect_extractor
|
10 |
+
from src.utils import load_atepc_examples
|
11 |
+
|
12 |
+
|
13 |
+
# Defining the specific datasets
|
14 |
+
DESIRED_ATEPC_DATASETS = [
|
15 |
+
"Laptop14",
|
16 |
+
"Restaurant14",
|
17 |
+
"SemEval",
|
18 |
+
"Twitter",
|
19 |
+
"TShirt"
|
20 |
+
]
|
21 |
+
|
22 |
+
# Pre load Dataset Examples
|
23 |
+
# This dictionary will store example sentences for each desired dataset,
|
24 |
+
# which are used when the user leaves the input text box blank.
|
25 |
+
|
26 |
+
print("Loading ATEPC dataset examples for Gradio interface...")
|
27 |
+
atepc_dataset_examples = {}
|
28 |
+
# Iterate through available ATEPC datasets and load examples for each
|
29 |
+
# Filter to only include the desired datasets
|
30 |
+
for dataset_name in DESIRED_ATEPC_DATASETS:
|
31 |
+
try:
|
32 |
+
# Check if the dataset name is valid in ATEPC.ATEPCDatasetList
|
33 |
+
if hasattr(ATEPC.ATEPCDatasetList(), dataset_name):
|
34 |
+
atepc_dataset_examples[dataset_name] = load_atepc_examples(dataset_name)
|
35 |
+
else:
|
36 |
+
print(f"Warning: Dataset '{dataset_name}' not found in ATEPC.ATEPCDatasetList. Skipping.")
|
37 |
+
except Exception as e:
|
38 |
+
print(f"Error loading examples for ATEPC dataset '{dataset_name}': {e}")
|
39 |
+
print("ATEPC dataset examples loading complete.")
|
40 |
+
|
41 |
+
|
42 |
+
# Inference Function for Gradio
|
43 |
+
|
44 |
+
def run_atepc_inference(input_text: str, selected_dataset: str) -> tuple[pd.DataFrame, str]:
|
45 |
+
|
46 |
+
# Check if the aspect_extractor model was successfully initialized
|
47 |
+
if aspect_extractor is None:
|
48 |
+
return pd.DataFrame({"Error": ["Model not initialized. Please check logs."]},
|
49 |
+
columns=["Error"]), "Model Unavailable"
|
50 |
+
|
51 |
+
analyzed_text = input_text.strip() # Remove leading/trailing whitespace
|
52 |
+
|
53 |
+
# If no text is provided, select a random example from the pre-loaded data
|
54 |
+
if not analyzed_text:
|
55 |
+
examples = atepc_dataset_examples.get(selected_dataset)
|
56 |
+
if examples:
|
57 |
+
analyzed_text = random.choice(examples)
|
58 |
+
else:
|
59 |
+
return pd.DataFrame({"Message": ["No examples available for this dataset or input text provided."]},
|
60 |
+
columns=["Message"]), "Please provide text or select a valid dataset."
|
61 |
+
|
62 |
+
print(f"Performing ATEPC Inference on: '{analyzed_text}' (Dataset: {selected_dataset})")
|
63 |
+
|
64 |
+
try:
|
65 |
+
# Predict aspects and their sentiments
|
66 |
+
prediction_result = aspect_extractor.predict(analyzed_text, pred_sentiment=True)
|
67 |
+
|
68 |
+
# Check if any aspects were detected
|
69 |
+
if not prediction_result or not prediction_result.get("aspect"):
|
70 |
+
return pd.DataFrame({"Message": ["No aspects detected for the given text."]},
|
71 |
+
columns=["Message"]), analyzed_text
|
72 |
+
|
73 |
+
# Create a DataFrame from the prediction results
|
74 |
+
df_result = pd.DataFrame(
|
75 |
+
{
|
76 |
+
"Aspect": prediction_result["aspect"],
|
77 |
+
"Sentiment": prediction_result["sentiment"],
|
78 |
+
"Confidence": [round(c, 4) for c in prediction_result["confidence"]],
|
79 |
+
"Position": prediction_result["position"],
|
80 |
+
}
|
81 |
+
)
|
82 |
+
return df_result, analyzed_text
|
83 |
+
except Exception as e:
|
84 |
+
print(f"Error during ATEPC inference: {e}")
|
85 |
+
return pd.DataFrame({"Error": [f"An error occurred: {e}"]},
|
86 |
+
columns=["Error"]), analyzed_text
|
87 |
+
|
88 |
+
|
89 |
+
# Gradio User Interface Definition
|
90 |
+
|
91 |
+
# Initialize the Gradio Blocks interface
|
92 |
+
with gr.Blocks(title="PyABSA Demonstration: Aspect-based Sentiment Analysis") as sentiment_analysis_app:
|
93 |
+
# Main title for the entire application
|
94 |
+
gr.Markdown("# <p align='center'>PyABSA: Multilingual Aspect-based Sentiment Analysis</p>")
|
95 |
+
gr.Markdown("---") # Visual separator
|
96 |
+
|
97 |
+
with gr.Row():
|
98 |
+
with gr.Column():
|
99 |
+
gr.Markdown("## 📈 Analyze Aspects and Sentiments")
|
100 |
+
gr.Markdown(
|
101 |
+
"This tool identifies specific aspects (entities or attributes) in a sentence "
|
102 |
+
"and determines the sentiment (positive, negative, neutral) associated with each. "
|
103 |
+
"For example, in 'The laptop's battery life is excellent', 'battery life' would be "
|
104 |
+
"identified with a 'positive' sentiment."
|
105 |
+
)
|
106 |
+
|
107 |
+
# Input area for ATEPC
|
108 |
+
atepc_input_box = gr.Textbox(
|
109 |
+
placeholder="Type a sentence here, or leave blank to load a random example from a dataset...",
|
110 |
+
label="Input Sentence:",
|
111 |
+
lines=3
|
112 |
+
)
|
113 |
+
|
114 |
+
|
115 |
+
# Dataset selection for ATEPC examples restricted to desired list
|
116 |
+
atepc_dataset_selection = gr.Radio(
|
117 |
+
choices=DESIRED_ATEPC_DATASETS, # Use the predefined list
|
118 |
+
value=DESIRED_ATEPC_DATASETS[0] if DESIRED_ATEPC_DATASETS else None, # Set default to first or None
|
119 |
+
label="Select Dataset (for random examples):",
|
120 |
+
interactive=True
|
121 |
+
)
|
122 |
+
|
123 |
+
# Button to trigger ATEPC inference
|
124 |
+
atepc_run_button = gr.Button("Analyze Aspects!", variant="primary")
|
125 |
+
|
126 |
+
# Output areas for ATEPC
|
127 |
+
atepc_output_sentence = gr.TextArea(label="Analyzed Sentence:", interactive=False)
|
128 |
+
atepc_prediction_results_df = gr.DataFrame(label="Aspect Prediction Results:", interactive=False)
|
129 |
+
|
130 |
+
# Define the interaction for the ATEPC button click
|
131 |
+
atepc_run_button.click(
|
132 |
+
fn=run_atepc_inference,
|
133 |
+
inputs=[atepc_input_box, atepc_dataset_selection],
|
134 |
+
outputs=[atepc_prediction_results_df, atepc_output_sentence],
|
135 |
+
api_name="run_atepc_inference"
|
136 |
+
)
|
137 |
+
|
138 |
+
gr.Markdown("---") # Visual separator
|
139 |
+
|
140 |
+
|
141 |
+
# Launch the Gradio application
|
142 |
+
if __name__ == "__main__":
|
143 |
+
if aspect_extractor is None:
|
144 |
+
print("Warning: PyABSA ATEPC model failed to initialize. The application may not function correctly.")
|
145 |
+
sentiment_analysis_app.launch(share=False, debug=True)
|
146 |
+
|
models.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import traceback
|
3 |
+
from pyabsa import download_all_available_datasets, available_checkpoints
|
4 |
+
from pyabsa import AspectTermExtraction as ATEPC
|
5 |
+
|
6 |
+
# Model Initialization
|
7 |
+
|
8 |
+
# 1. Download all required datasets for PyABSA
|
9 |
+
# This operation might take some time the first time it's run as it downloads
|
10 |
+
# various datasets used by PyABSA's models.
|
11 |
+
print("PyABSA: Starting dataset download (if not already present)...")
|
12 |
+
try:
|
13 |
+
download_all_available_datasets()
|
14 |
+
print("PyABSA: Datasets download complete.")
|
15 |
+
except Exception as e:
|
16 |
+
print(f"PyABSA: Error during dataset download: {e}")
|
17 |
+
print(traceback.format_exc())
|
18 |
+
# Continue execution even if download fails, models might still work if data is cached.
|
19 |
+
|
20 |
+
# 2. Initialize the Aspect Term Extraction and Polarity Classification (ATEPC) model
|
21 |
+
# Using 'multilingual' checkpoint for broad language support.
|
22 |
+
print("\nPyABSA: Initializing Aspect Term Extractor (ATEPC) with 'multilingual' checkpoint...")
|
23 |
+
try:
|
24 |
+
aspect_extractor = ATEPC.AspectExtractor(checkpoint="multilingual")
|
25 |
+
print("PyABSA: Aspect Term Extractor initialized successfully.")
|
26 |
+
except Exception as e:
|
27 |
+
print(f"PyABSA: FAILED to initialize Aspect Term Extractor: {e}")
|
28 |
+
print(traceback.format_exc())
|
29 |
+
aspect_extractor = None
|
30 |
+
|
31 |
+
# 3. No ASTE model initialization as requested.
|
32 |
+
|
33 |
+
# 4. Print available checkpoints for ATEPC for verification purposes
|
34 |
+
print("\nPyABSA: Available ATEPC checkpoints:")
|
35 |
+
try:
|
36 |
+
available_checkpoints("ATEPC", True) # Changed to ATEPC specific checkpoints
|
37 |
+
except Exception as e:
|
38 |
+
print(f"PyABSA: Could not retrieve ATEPC checkpoints: {e}")
|
39 |
+
print(traceback.format_exc())
|
40 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
pyabsa>=2.0
|
2 |
+
gradio>=3.0
|
3 |
+
pandas>=1.0
|
utils.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pyabsa import AspectTermExtraction as ATEPC
|
2 |
+
from pyabsa import TaskCodeOption
|
3 |
+
from pyabsa.utils.data_utils.dataset_manager import detect_infer_dataset
|
4 |
+
|
5 |
+
|
6 |
+
def load_atepc_examples(dataset_name: str) -> list[str]:
|
7 |
+
|
8 |
+
task = TaskCodeOption.Aspect_Polarity_Classification
|
9 |
+
|
10 |
+
atepc_dataset_item = ATEPC.ATEPCDatasetList().__getattribute__(dataset_name)
|
11 |
+
|
12 |
+
dataset_files = detect_infer_dataset(atepc_dataset_item, task)
|
13 |
+
|
14 |
+
all_lines = []
|
15 |
+
|
16 |
+
if isinstance(dataset_files, str):
|
17 |
+
dataset_files = [dataset_files]
|
18 |
+
|
19 |
+
for fpath in dataset_files:
|
20 |
+
print(f"Loading ATEPC examples from: {fpath}")
|
21 |
+
try:
|
22 |
+
with open(fpath, "r", encoding="utf-8") as fin:
|
23 |
+
lines = fin.readlines()
|
24 |
+
for line in lines:
|
25 |
+
|
26 |
+
cleaned_line = line.split("$LABEL$")[0] if "$LABEL$" in line else line
|
27 |
+
cleaned_line = cleaned_line.replace("[B-ASP]", "").replace("[E-ASP]", "").strip()
|
28 |
+
if cleaned_line:
|
29 |
+
all_lines.append(cleaned_line)
|
30 |
+
except FileNotFoundError:
|
31 |
+
print(f"Warning: Dataset file not found: {fpath}")
|
32 |
+
except Exception as e:
|
33 |
+
print(f"Error loading {fpath}: {e}")
|
34 |
+
|
35 |
+
|
36 |
+
seen = set()
|
37 |
+
unique_ordered_lines = []
|
38 |
+
for line in all_lines:
|
39 |
+
if line not in seen:
|
40 |
+
unique_ordered_lines.append(line)
|
41 |
+
seen.add(line)
|
42 |
+
return unique_ordered_lines
|
43 |
+
|