Rogerjs commited on
Commit
fcfe145
·
verified ·
1 Parent(s): 7121e24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -63
app.py CHANGED
@@ -1,83 +1,119 @@
1
  import gradio as gr
2
  from transformers import pipeline
 
3
 
4
- """
5
- Psychotherapy Session Summarizer (Open-Source Implementation)
6
-
7
- This is a Hugging Face Spaces-compatible Gradio app that:
8
- - Takes psychotherapy session transcripts (as text input).
9
- - Summarizes key themes, emotional tones, and patterns.
10
- - Optionally allows custom instructions or focus areas (e.g., "Focus on client's progress since last session").
11
- - Utilizes open-source models only.
12
 
13
- Modular:
14
- - Summarization model can be swapped easily.
15
- - Sentiment analysis model can be changed if desired.
16
 
17
- Scalable:
18
- - Can be extended to handle multiple transcripts and clustering by topic.
19
 
20
- Note:
21
- All used models are open-source and freely available on Hugging Face:
22
- - Summarization model: "google/flan-t5-small" (FLAN-T5 is open-sourced by Google)
23
- - Sentiment model: "cardiffnlp/twitter-roberta-base-sentiment-latest" (open-source sentiment analysis)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- You can adapt these to different models if needed, as long as they are open-source.
 
 
26
 
27
- Instructions:
28
- 1. Paste or upload a session transcript (preferably a few paragraphs of conversation).
29
- 2. (Optional) Provide a custom instruction or focus question, e.g., "Highlight moments of cognitive reframing."
30
- 3. Click "Summarize" to generate a concise summary with themes and emotional insights.
31
 
32
- """
 
 
33
 
34
- # Initialize pipelines
35
- summarizer = pipeline("text2text-generation", model="google/flan-t5-small", tokenizer="google/flan-t5-small")
36
- # Note: "text2text-generation" pipeline for FLAN-T5 also works for summarization if we prompt it properly.
37
- sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
38
-
39
- def analyze_session(transcript, custom_instruction):
40
- # Basic input validation
41
- if not transcript.strip():
42
- return "Please provide a transcript."
43
 
44
- # Construct a prompt for the summarization model
45
- # FLAN-T5 is instruction tuned, so we can give it instructions directly.
46
- # For example:
47
- prompt = "Summarize the following psychotherapy session transcript, focusing on key themes, emotional shifts, and patterns."
48
- if custom_instruction.strip():
49
- prompt += " Additionally, " + custom_instruction.strip()
50
- prompt += "\n\nTranscript:\n" + transcript.strip()
51
 
52
- # Generate summary
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  summary_output = summarizer(prompt, max_length=200, do_sample=False)
54
  summary = summary_output[0]['generated_text'].strip()
55
 
56
- # Sentiment analysis: We'll run it on the entire transcript to gauge the overall emotional tone.
57
- sentiment_results = sentiment_analyzer(transcript)
58
- # The sentiment model returns something like: [{'label': 'positive', 'score': ...}]
59
- # We'll aggregate the results (though it's a single input) and just pick the top.
60
  main_sentiment = sentiment_results[0]['label']
61
 
62
- # Construct a more informative result
63
- # You could elaborate this logic: detect recurring concerns by simple keyword frequency analysis, etc.
64
- # For a simple first iteration, just provide summary and sentiment.
65
-
66
- # Optional: Identify recurring concerns (simple keyword extraction)
67
- # We'll do a naive keyword frequency approach just as a demonstration:
68
- words = transcript.lower().split()
69
- # Common therapy-related words (just a naive approach, could be replaced by a proper keyword extraction model)
70
- # This is a placeholder for demonstration
71
  keywords_of_interest = ["anxiety", "depression", "relationship", "stress", "fear", "goals", "progress", "cognitive", "behavior"]
72
  recurring_concerns = [word for word in words if word in keywords_of_interest]
73
- recurring_concerns = list(set(recurring_concerns)) # unique
74
  if not recurring_concerns:
75
  recurring_concerns_str = "No specific recurring concerns identified from the predefined list."
76
  else:
77
  recurring_concerns_str = "Recurring concerns include: " + ", ".join(recurring_concerns)
78
 
79
- # Recommended follow-up topics (just a heuristic based on summary)
80
- # If certain keywords appear in summary, we can suggest follow-up:
81
  follow_up_suggestions = []
82
  if "progress" in summary.lower():
83
  follow_up_suggestions.append("Explore client's perception of progress in more detail.")
@@ -85,10 +121,8 @@ def analyze_session(transcript, custom_instruction):
85
  follow_up_suggestions.append("Discuss client's relationship dynamics further.")
86
  if not follow_up_suggestions:
87
  follow_up_suggestions.append("Consider following up on the emotional themes identified in the summary.")
88
-
89
  follow_up_suggestions_str = " ".join(follow_up_suggestions)
90
 
91
- # Combine results into a final output
92
  final_output = f"**Summary of Session:**\n{summary}\n\n**Overall Sentiment:** {main_sentiment}\n\n**{recurring_concerns_str}**\n\n**Suggested Follow-Up Topics:** {follow_up_suggestions_str}"
93
 
94
  return final_output
@@ -96,19 +130,24 @@ def analyze_session(transcript, custom_instruction):
96
  # Build Gradio UI
97
  description = """# Psychotherapy Session Summarizer
98
 
99
- Upload or paste your psychotherapy session transcript and optionally provide a custom instruction (e.g., "Focus on anxiety and coping strategies.").
100
- Click 'Summarize' to generate a concise summary of key themes, emotional tones, recurring concerns, and suggested follow-up topics.
 
 
 
 
101
  """
102
 
103
  with gr.Blocks() as demo:
104
  gr.Markdown(description)
105
  with gr.Row():
106
- transcript_input = gr.Textbox(label="Session Transcript", lines=10, placeholder="Paste the session transcript here...")
107
- custom_instruction_input = gr.Textbox(label="Custom Instruction (Optional)", placeholder="e.g., Focus on how the client describes their feelings about progress.")
 
108
  summarize_button = gr.Button("Summarize")
109
  output_box = gr.Markdown()
110
 
111
- summarize_button.click(fn=analyze_session, inputs=[transcript_input, custom_instruction_input], outputs=output_box)
112
 
113
  if __name__ == "__main__":
114
  demo.launch()
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ import re
4
 
5
+ # Initialize pipelines
6
+ # Summarization pipeline with FLAN-T5
7
+ summarizer = pipeline("text2text-generation", model="google/flan-t5-small", tokenizer="google/flan-t5-small")
 
 
 
 
 
8
 
9
+ # Sentiment analysis pipeline
10
+ sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
 
11
 
12
+ # Automatic speech recognition pipeline for audio
13
+ asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-small")
14
 
15
+ def convert_to_json(transcript_text):
16
+ """
17
+ Convert the transcript into a structured JSON format.
18
+ Attempts to identify speaker turns based on lines starting with 'Therapist:' or 'Client:'.
19
+ If no clear pattern is found, the entire transcript is considered one turn.
20
+ """
21
+ lines = transcript_text.strip().split("\n")
22
+ session_data = []
23
+
24
+ # Regex patterns to identify lines with a speaker
25
+ therapist_pattern = re.compile(r"^\s*(Therapist|T):", re.IGNORECASE)
26
+ client_pattern = re.compile(r"^\s*(Client|C):", re.IGNORECASE)
27
+
28
+ current_speaker = None
29
+ current_text = []
30
+
31
+ for line in lines:
32
+ line = line.strip()
33
+ if therapist_pattern.match(line):
34
+ # If we have accumulated text from previous speaker, store it
35
+ if current_speaker and current_text:
36
+ session_data.append({"speaker": current_speaker, "text": " ".join(current_text).strip()})
37
+ current_text = []
38
+
39
+ current_speaker = "Therapist"
40
+ # Remove the speaker prefix
41
+ text_part = therapist_pattern.sub("", line).strip()
42
+ current_text.append(text_part)
43
+
44
+ elif client_pattern.match(line):
45
+ if current_speaker and current_text:
46
+ session_data.append({"speaker": current_speaker, "text": " ".join(current_text).strip()})
47
+ current_text = []
48
+
49
+ current_speaker = "Client"
50
+ text_part = client_pattern.sub("", line).strip()
51
+ current_text.append(text_part)
52
+
53
+ else:
54
+ # Just text, append to current speaker's segment if identified
55
+ if current_speaker is None:
56
+ # No speaker identified yet, assume unknown
57
+ current_speaker = "Unknown"
58
+ current_text.append(line)
59
 
60
+ # Append the last collected segment
61
+ if current_speaker and current_text:
62
+ session_data.append({"speaker": current_speaker, "text": " ".join(current_text).strip()})
63
 
64
+ # If no speakers identified at all and just one big chunk, still return it as JSON
65
+ if not session_data:
66
+ session_data = [{"speaker": "Unknown", "text": transcript_text.strip()}]
 
67
 
68
+ # Create a final JSON structure
69
+ json_data = {"session": session_data}
70
+ return json_data
71
 
72
+ def analyze_session(transcript, custom_instruction, audio):
73
+ # If audio is provided, we transcribe it and ignore the text transcript field
74
+ if audio is not None:
75
+ # Transcribe audio
76
+ asr_result = asr_pipeline(audio)
77
+ transcript_text = asr_result['text']
78
+ else:
79
+ # Use the provided transcript text
80
+ transcript_text = transcript
81
 
82
+ if not transcript_text.strip():
83
+ return "Please provide a transcript or an audio file."
 
 
 
 
 
84
 
85
+ # Convert transcript to JSON
86
+ json_data = convert_to_json(transcript_text)
87
+
88
+ # Prepare the prompt for summarization
89
+ prompt = (
90
+ "You are a helpful assistant that summarizes psychotherapy sessions. "
91
+ "The session is provided in JSON format with speaker turns. "
92
+ "Summarize the key themes, emotional shifts, and patterns from this session. "
93
+ )
94
+ if custom_instruction.strip():
95
+ prompt += f" Additionally, {custom_instruction.strip()}"
96
+ prompt += "\n\nJSON data:\n" + str(json_data)
97
+
98
+ # Summarize using the LLM
99
  summary_output = summarizer(prompt, max_length=200, do_sample=False)
100
  summary = summary_output[0]['generated_text'].strip()
101
 
102
+ # Sentiment analysis of the entire transcript
103
+ sentiment_results = sentiment_analyzer(transcript_text)
 
 
104
  main_sentiment = sentiment_results[0]['label']
105
 
106
+ # Simple keyword-based recurring concerns
107
+ words = transcript_text.lower().split()
 
 
 
 
 
 
 
108
  keywords_of_interest = ["anxiety", "depression", "relationship", "stress", "fear", "goals", "progress", "cognitive", "behavior"]
109
  recurring_concerns = [word for word in words if word in keywords_of_interest]
110
+ recurring_concerns = list(set(recurring_concerns))
111
  if not recurring_concerns:
112
  recurring_concerns_str = "No specific recurring concerns identified from the predefined list."
113
  else:
114
  recurring_concerns_str = "Recurring concerns include: " + ", ".join(recurring_concerns)
115
 
116
+ # Suggest follow-up topics based on summary
 
117
  follow_up_suggestions = []
118
  if "progress" in summary.lower():
119
  follow_up_suggestions.append("Explore client's perception of progress in more detail.")
 
121
  follow_up_suggestions.append("Discuss client's relationship dynamics further.")
122
  if not follow_up_suggestions:
123
  follow_up_suggestions.append("Consider following up on the emotional themes identified in the summary.")
 
124
  follow_up_suggestions_str = " ".join(follow_up_suggestions)
125
 
 
126
  final_output = f"**Summary of Session:**\n{summary}\n\n**Overall Sentiment:** {main_sentiment}\n\n**{recurring_concerns_str}**\n\n**Suggested Follow-Up Topics:** {follow_up_suggestions_str}"
127
 
128
  return final_output
 
130
  # Build Gradio UI
131
  description = """# Psychotherapy Session Summarizer
132
 
133
+ This tool summarizes psychotherapy session transcripts (text or audio) into key themes, emotional shifts, and patterns.
134
+
135
+ **How to Use:**
136
+ - You may upload an audio file of the session or paste the text transcript.
137
+ - Optionally provide a custom focus or instruction (e.g., "Focus on how the client talks about their anxiety.").
138
+ - Click 'Summarize' to generate a summary along with identified concerns and suggested follow-ups.
139
  """
140
 
141
  with gr.Blocks() as demo:
142
  gr.Markdown(description)
143
  with gr.Row():
144
+ transcript_input = gr.Textbox(label="Session Transcript (Text)", lines=10, placeholder="Paste the session transcript here...")
145
+ audio_input = gr.Audio(source="upload", type="file", label="Session Audio (Optional)")
146
+ custom_instruction_input = gr.Textbox(label="Custom Instruction (Optional)", placeholder="e.g., Focus on anxiety and coping strategies.")
147
  summarize_button = gr.Button("Summarize")
148
  output_box = gr.Markdown()
149
 
150
+ summarize_button.click(fn=analyze_session, inputs=[transcript_input, custom_instruction_input, audio_input], outputs=output_box)
151
 
152
  if __name__ == "__main__":
153
  demo.launch()