aun09 commited on
Commit
bd9b5f1
·
verified ·
1 Parent(s): bd14639

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +146 -0
  2. models.py +40 -0
  3. requirements.txt +3 -0
  4. utils.py +43 -0
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import gradio as gr
3
+ import pandas as pd
4
+
5
+ # Import necessary components from PyABSA for dataset listing
6
+ from pyabsa import AspectTermExtraction as ATEPC
7
+
8
+ # Import the initialized ATEPC model and utility functions from our local files
9
+ from src.models import aspect_extractor
10
+ from src.utils import load_atepc_examples
11
+
12
+
13
+ # Defining the specific datasets
14
+ DESIRED_ATEPC_DATASETS = [
15
+ "Laptop14",
16
+ "Restaurant14",
17
+ "SemEval",
18
+ "Twitter",
19
+ "TShirt"
20
+ ]
21
+
22
+ # Pre load Dataset Examples
23
+ # This dictionary will store example sentences for each desired dataset,
24
+ # which are used when the user leaves the input text box blank.
25
+
26
+ print("Loading ATEPC dataset examples for Gradio interface...")
27
+ atepc_dataset_examples = {}
28
+ # Iterate through available ATEPC datasets and load examples for each
29
+ # Filter to only include the desired datasets
30
+ for dataset_name in DESIRED_ATEPC_DATASETS:
31
+ try:
32
+ # Check if the dataset name is valid in ATEPC.ATEPCDatasetList
33
+ if hasattr(ATEPC.ATEPCDatasetList(), dataset_name):
34
+ atepc_dataset_examples[dataset_name] = load_atepc_examples(dataset_name)
35
+ else:
36
+ print(f"Warning: Dataset '{dataset_name}' not found in ATEPC.ATEPCDatasetList. Skipping.")
37
+ except Exception as e:
38
+ print(f"Error loading examples for ATEPC dataset '{dataset_name}': {e}")
39
+ print("ATEPC dataset examples loading complete.")
40
+
41
+
42
+ # Inference Function for Gradio
43
+
44
+ def run_atepc_inference(input_text: str, selected_dataset: str) -> tuple[pd.DataFrame, str]:
45
+
46
+ # Check if the aspect_extractor model was successfully initialized
47
+ if aspect_extractor is None:
48
+ return pd.DataFrame({"Error": ["Model not initialized. Please check logs."]},
49
+ columns=["Error"]), "Model Unavailable"
50
+
51
+ analyzed_text = input_text.strip() # Remove leading/trailing whitespace
52
+
53
+ # If no text is provided, select a random example from the pre-loaded data
54
+ if not analyzed_text:
55
+ examples = atepc_dataset_examples.get(selected_dataset)
56
+ if examples:
57
+ analyzed_text = random.choice(examples)
58
+ else:
59
+ return pd.DataFrame({"Message": ["No examples available for this dataset or input text provided."]},
60
+ columns=["Message"]), "Please provide text or select a valid dataset."
61
+
62
+ print(f"Performing ATEPC Inference on: '{analyzed_text}' (Dataset: {selected_dataset})")
63
+
64
+ try:
65
+ # Predict aspects and their sentiments
66
+ prediction_result = aspect_extractor.predict(analyzed_text, pred_sentiment=True)
67
+
68
+ # Check if any aspects were detected
69
+ if not prediction_result or not prediction_result.get("aspect"):
70
+ return pd.DataFrame({"Message": ["No aspects detected for the given text."]},
71
+ columns=["Message"]), analyzed_text
72
+
73
+ # Create a DataFrame from the prediction results
74
+ df_result = pd.DataFrame(
75
+ {
76
+ "Aspect": prediction_result["aspect"],
77
+ "Sentiment": prediction_result["sentiment"],
78
+ "Confidence": [round(c, 4) for c in prediction_result["confidence"]],
79
+ "Position": prediction_result["position"],
80
+ }
81
+ )
82
+ return df_result, analyzed_text
83
+ except Exception as e:
84
+ print(f"Error during ATEPC inference: {e}")
85
+ return pd.DataFrame({"Error": [f"An error occurred: {e}"]},
86
+ columns=["Error"]), analyzed_text
87
+
88
+
89
+ # Gradio User Interface Definition
90
+
91
+ # Initialize the Gradio Blocks interface
92
+ with gr.Blocks(title="PyABSA Demonstration: Aspect-based Sentiment Analysis") as sentiment_analysis_app:
93
+ # Main title for the entire application
94
+ gr.Markdown("# <p align='center'>PyABSA: Multilingual Aspect-based Sentiment Analysis</p>")
95
+ gr.Markdown("---") # Visual separator
96
+
97
+ with gr.Row():
98
+ with gr.Column():
99
+ gr.Markdown("## 📈 Analyze Aspects and Sentiments")
100
+ gr.Markdown(
101
+ "This tool identifies specific aspects (entities or attributes) in a sentence "
102
+ "and determines the sentiment (positive, negative, neutral) associated with each. "
103
+ "For example, in 'The laptop's battery life is excellent', 'battery life' would be "
104
+ "identified with a 'positive' sentiment."
105
+ )
106
+
107
+ # Input area for ATEPC
108
+ atepc_input_box = gr.Textbox(
109
+ placeholder="Type a sentence here, or leave blank to load a random example from a dataset...",
110
+ label="Input Sentence:",
111
+ lines=3
112
+ )
113
+
114
+
115
+ # Dataset selection for ATEPC examples restricted to desired list
116
+ atepc_dataset_selection = gr.Radio(
117
+ choices=DESIRED_ATEPC_DATASETS, # Use the predefined list
118
+ value=DESIRED_ATEPC_DATASETS[0] if DESIRED_ATEPC_DATASETS else None, # Set default to first or None
119
+ label="Select Dataset (for random examples):",
120
+ interactive=True
121
+ )
122
+
123
+ # Button to trigger ATEPC inference
124
+ atepc_run_button = gr.Button("Analyze Aspects!", variant="primary")
125
+
126
+ # Output areas for ATEPC
127
+ atepc_output_sentence = gr.TextArea(label="Analyzed Sentence:", interactive=False)
128
+ atepc_prediction_results_df = gr.DataFrame(label="Aspect Prediction Results:", interactive=False)
129
+
130
+ # Define the interaction for the ATEPC button click
131
+ atepc_run_button.click(
132
+ fn=run_atepc_inference,
133
+ inputs=[atepc_input_box, atepc_dataset_selection],
134
+ outputs=[atepc_prediction_results_df, atepc_output_sentence],
135
+ api_name="run_atepc_inference"
136
+ )
137
+
138
+ gr.Markdown("---") # Visual separator
139
+
140
+
141
+ # Launch the Gradio application
142
+ if __name__ == "__main__":
143
+ if aspect_extractor is None:
144
+ print("Warning: PyABSA ATEPC model failed to initialize. The application may not function correctly.")
145
+ sentiment_analysis_app.launch(share=False, debug=True)
146
+
models.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import traceback
3
+ from pyabsa import download_all_available_datasets, available_checkpoints
4
+ from pyabsa import AspectTermExtraction as ATEPC
5
+
6
+ # Model Initialization
7
+
8
+ # 1. Download all required datasets for PyABSA
9
+ # This operation might take some time the first time it's run as it downloads
10
+ # various datasets used by PyABSA's models.
11
+ print("PyABSA: Starting dataset download (if not already present)...")
12
+ try:
13
+ download_all_available_datasets()
14
+ print("PyABSA: Datasets download complete.")
15
+ except Exception as e:
16
+ print(f"PyABSA: Error during dataset download: {e}")
17
+ print(traceback.format_exc())
18
+ # Continue execution even if download fails, models might still work if data is cached.
19
+
20
+ # 2. Initialize the Aspect Term Extraction and Polarity Classification (ATEPC) model
21
+ # Using 'multilingual' checkpoint for broad language support.
22
+ print("\nPyABSA: Initializing Aspect Term Extractor (ATEPC) with 'multilingual' checkpoint...")
23
+ try:
24
+ aspect_extractor = ATEPC.AspectExtractor(checkpoint="multilingual")
25
+ print("PyABSA: Aspect Term Extractor initialized successfully.")
26
+ except Exception as e:
27
+ print(f"PyABSA: FAILED to initialize Aspect Term Extractor: {e}")
28
+ print(traceback.format_exc())
29
+ aspect_extractor = None
30
+
31
+ # 3. No ASTE model initialization as requested.
32
+
33
+ # 4. Print available checkpoints for ATEPC for verification purposes
34
+ print("\nPyABSA: Available ATEPC checkpoints:")
35
+ try:
36
+ available_checkpoints("ATEPC", True) # Changed to ATEPC specific checkpoints
37
+ except Exception as e:
38
+ print(f"PyABSA: Could not retrieve ATEPC checkpoints: {e}")
39
+ print(traceback.format_exc())
40
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pyabsa>=2.0
2
+ gradio>=3.0
3
+ pandas>=1.0
utils.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pyabsa import AspectTermExtraction as ATEPC
2
+ from pyabsa import TaskCodeOption
3
+ from pyabsa.utils.data_utils.dataset_manager import detect_infer_dataset
4
+
5
+
6
+ def load_atepc_examples(dataset_name: str) -> list[str]:
7
+
8
+ task = TaskCodeOption.Aspect_Polarity_Classification
9
+
10
+ atepc_dataset_item = ATEPC.ATEPCDatasetList().__getattribute__(dataset_name)
11
+
12
+ dataset_files = detect_infer_dataset(atepc_dataset_item, task)
13
+
14
+ all_lines = []
15
+
16
+ if isinstance(dataset_files, str):
17
+ dataset_files = [dataset_files]
18
+
19
+ for fpath in dataset_files:
20
+ print(f"Loading ATEPC examples from: {fpath}")
21
+ try:
22
+ with open(fpath, "r", encoding="utf-8") as fin:
23
+ lines = fin.readlines()
24
+ for line in lines:
25
+
26
+ cleaned_line = line.split("$LABEL$")[0] if "$LABEL$" in line else line
27
+ cleaned_line = cleaned_line.replace("[B-ASP]", "").replace("[E-ASP]", "").strip()
28
+ if cleaned_line:
29
+ all_lines.append(cleaned_line)
30
+ except FileNotFoundError:
31
+ print(f"Warning: Dataset file not found: {fpath}")
32
+ except Exception as e:
33
+ print(f"Error loading {fpath}: {e}")
34
+
35
+
36
+ seen = set()
37
+ unique_ordered_lines = []
38
+ for line in all_lines:
39
+ if line not in seen:
40
+ unique_ordered_lines.append(line)
41
+ seen.add(line)
42
+ return unique_ordered_lines
43
+