Naphat Sornwichai commited on
Commit
81889f9
·
1 Parent(s): b4c6511

update major files

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. app.py +143 -190
  3. test.ipynb +74 -0
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  .venv
2
  __pycache__
3
- downloaded_audio.mp3
 
 
1
  .venv
2
  __pycache__
3
+ *.mp3
4
+ *.wav
app.py CHANGED
@@ -1,52 +1,36 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import pipeline
4
  import yt_dlp
5
  from openai import OpenAI
6
  import os
7
  import json
8
- import torchaudio
9
  import time
10
-
11
- # --- 1. Model & Pipeline Initialization ---
12
- # Setup device and data type for PyTorch
13
- print("Initializing transcription model...")
14
- # Updated device selection logic for CUDA, Apple MPS, and CPU
15
- device = "cuda:0" if torch.cuda.is_available() else "mps" if hasattr(torch.backends, "mps") and torch.backends.mps.is_available() else "cpu"
16
- torch_dtype = torch.float16 if torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else torch.float32
17
-
18
- # Switched to the medium model as requested
19
- model_id = "nectec/Pathumma-whisper-th-medium"
20
-
21
- print(f"Using device: {device} with dtype: {torch_dtype}")
22
-
23
- # Initialize the ASR pipeline, which is more robust for handling inputs
24
- pipe = pipeline(
25
- task="automatic-speech-recognition",
26
- model=model_id,
27
- dtype=torch_dtype,
28
- device=device,
29
- )
30
-
31
- # Set the language and task for the pipeline
32
- pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="th", task="transcribe")
33
-
34
  print("Transcription model loaded successfully.")
35
 
36
  # --- 2. Helper Functions ---
37
  def download_youtube_audio(url: str) -> str:
38
- """
39
- Downloads audio from a YouTube URL and saves it as an mp3 file.
40
- Returns the path to the downloaded file.
41
- """
42
- output_template = 'downloaded_audio.%(ext)s'
43
  ydl_opts = {
44
  'format': 'bestaudio/best',
45
- 'postprocessors': [{
46
- 'key': 'FFmpegExtractAudio',
47
- 'preferredcodec': 'mp3',
48
- 'preferredquality': '192',
49
- }],
50
  'outtmpl': output_template,
51
  'quiet': True,
52
  'overwrite': True,
@@ -54,106 +38,92 @@ def download_youtube_audio(url: str) -> str:
54
  try:
55
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
56
  ydl.download([url])
57
- return 'downloaded_audio.mp3'
58
  except Exception as e:
59
- raise gr.Error(f"Failed to download audio from YouTube. Please check the link. Error: {str(e)}")
60
-
61
 
62
- # --- 3. Core Logic ---
63
  def transcribe_and_summarize(audio_file: str, youtube_url: str):
64
- """
65
- Main function to process audio, transcribe, and summarize.
66
- This is a generator function to yield status updates and logs to the UI.
67
- No longer uses gr.Progress, shows loading state in the output component itself.
68
- """
69
  log_history = ""
70
  def log(message):
71
  nonlocal log_history
72
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
73
  log_history += f"[{timestamp}] {message}\n"
74
  return log_history
75
 
76
- loading_message = "⏳ Please wait, your article is being generated..."
77
- yield log("Process started."), "", loading_message
78
 
79
- # Step 1: Get API Key and validate inputs
80
  api_key = os.getenv('TYPHOON_API')
81
  if not api_key:
82
- raise gr.Error("TYPHOON_API environment variable not set. Please set it before running the app.")
 
 
 
83
  if audio_file is None and not youtube_url:
84
  raise gr.Error("Please upload an audio file or provide a YouTube link.")
85
 
86
- # Step 2: Determine audio source and get file path
87
  filepath = ""
88
- if youtube_url:
89
- yield log("YouTube link detected. Starting download."), "", loading_message
90
- try:
91
- filepath = download_youtube_audio(youtube_url)
92
- yield log(f"Audio downloaded successfully to '{filepath}'."), "", loading_message
93
- except Exception as e:
94
- yield log(f"Error downloading from YouTube: {e}"), "", ""
95
- return
96
- else:
97
- filepath = audio_file
98
- yield log(f"Processing uploaded file: '{filepath}'."), "", loading_message
99
-
100
-
101
- # Step 3: Transcribe audio using the pipeline for robustness
102
- yield log("Beginning audio transcription... This may take a while for long audio."), "", loading_message
103
  try:
104
- # The pipeline handles resampling, chunking, and batching automatically
105
- result = pipe(filepath, chunk_length_s=30, batch_size=8, return_timestamps=False)
106
- transcribed_text = result["text"]
107
- yield log("Transcription complete."), transcribed_text, loading_message
108
-
109
- except Exception as e:
110
- raise gr.Error(f"An error occurred during transcription: {str(e)}")
111
-
112
-
113
- # Step 4: Summarize with Typhoon LLM
114
- yield log("Sending transcription to Typhoon LLM for summarization."), transcribed_text, loading_message
115
- if not transcribed_text or not transcribed_text.strip():
116
- yield log("Transcription is empty. Aborting summarization."), "", "Could not generate summary because the transcription is empty."
117
- return
118
-
119
- # Initialize OpenAI client for Typhoon
120
- client = OpenAI(
121
- api_key=api_key,
122
- base_url="https://api.opentyphoon.ai/v1"
123
- )
 
 
 
 
 
 
 
124
 
125
- system_prompt = """You are a professional editor and content creator. Your task is to take a raw transcript and reformat it into a beautiful, easy-to-read blog post.
126
- You MUST reply ONLY with a valid JSON object. Do not add any text before or after the JSON.
127
  The JSON object must have the following structure:
128
- {
129
- "title": "A catchy and relevant title for the blog post in Thai.",
130
- "key_takeaway": "A single paragraph summarizing the most important point of the entire content in Thai.",
131
  "main_ideas": [
132
- "A key point or feature, written as a string in Thai.",
133
- "Another key point or feature, written as a string in Thai.",
134
  "And so on..."
135
  ],
136
- "conclusion": "A concluding paragraph that wraps up the main ideas in Thai."
137
- }"""
138
-
139
- try:
140
  response = client.chat.completions.create(
141
  model="typhoon-v2.1-12b-instruct",
142
- messages=[
143
- {"role": "system", "content": system_prompt},
144
- {"role": "user", "content": f"Please summarize and restructure the following transcript into the specified JSON format:\n\n---\n\n{transcribed_text}"}
145
- ],
146
  max_tokens=2048,
147
  temperature=0.7
148
  )
 
149
  summary_json_string = response.choices[0].message.content
150
- yield log("Received summary from Typhoon LLM. Parsing JSON."), transcribed_text, loading_message
151
 
152
- # Parse the JSON and format it as Markdown
 
 
 
 
 
153
  try:
154
- # Clean potential markdown code blocks from the response
155
- if summary_json_string.strip().startswith("```json"):
156
- summary_json_string = summary_json_string.strip()[7:-4]
157
 
158
  data = json.loads(summary_json_string)
159
  title = data.get("title", "Title Not Found")
@@ -161,113 +131,96 @@ The JSON object must have the following structure:
161
  main_ideas = data.get("main_ideas", [])
162
  conclusion = data.get("conclusion", "")
163
 
164
- # Build the blog post in Markdown format
165
- summary_markdown = f"# {title}\n\n"
166
- summary_markdown += f"<p>{key_takeaway}</p>\n\n"
167
- if main_ideas:
168
- summary_markdown += "## Key Features & Main Ideas\n\n"
169
- summary_markdown += "<ul>\n"
170
- for idea in main_ideas:
171
- summary_markdown += f" <li>{idea}</li>\n"
172
- summary_markdown += "</ul>\n\n"
173
- summary_markdown += f"## Conclusion\n\n<p>{conclusion}</p>"
174
- yield log("Successfully parsed and formatted summary."), transcribed_text, summary_markdown
175
 
176
- except (json.JSONDecodeError, AttributeError) as e:
177
- error_message = f"Failed to parse the summary from the AI. Raw response: {summary_json_string}"
178
- raise gr.Error(error_message)
 
 
 
179
 
180
  except Exception as e:
181
- raise gr.Error(f"Could not connect to the Typhoon API. Please check your API key. Error: {str(e)}")
182
-
183
- # Step 5: Return final results
184
- yield log("Process finished successfully."), transcribed_text, summary_markdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- # --- 4. Gradio UI ---
187
- # Custom CSS for a beautiful, blog-like output.
188
  css = """
189
- @import url('[https://fonts.googleapis.com/css2?family=Sarabun:wght@400;700&display=swap](https://fonts.googleapis.com/css2?family=Sarabun:wght@400;700&display=swap)');
190
- .blog-output {
191
- font-family: 'Sarabun', sans-serif;
192
- line-height: 1.8;
193
- max-width: 800px;
194
- margin: auto;
195
- padding: 2rem;
196
- border-radius: 12px;
197
- background-color: #ffffff;
198
- border: 1px solid #e5e7eb;
199
- }
200
- .blog-output h1 {
201
- font-size: 2.2em;
202
- font-weight: 700;
203
- border-bottom: 2px solid #f3f4f6;
204
- padding-bottom: 15px;
205
- margin-bottom: 25px;
206
- color: #111827;
207
- }
208
- .blog-output h2 {
209
- font-size: 1.6em;
210
- font-weight: 700;
211
- margin-top: 40px;
212
- margin-bottom: 20px;
213
- color: #1f2937;
214
- }
215
- .blog-output p {
216
- font-size: 1.1em;
217
- margin-bottom: 20px;
218
- color: #374151;
219
- }
220
- .blog-output ul {
221
- padding-left: 25px;
222
- list-style-type: disc;
223
- }
224
- .blog-output li {
225
- margin-bottom: 12px;
226
- padding-left: 5px;
227
- }
228
  """
229
-
230
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), css=css) as demo:
231
- gr.Markdown(
232
- """
233
- # 🎙️ Audio to Blog Summarizer ✒️
234
- Upload an audio file (MP3, WAV) or paste a YouTube link to transcribe it to Thai text and summarize the content into a beautiful, blog-style article using AI from NECTEC and OpenTyphoon.
235
- """
236
- )
237
-
238
  with gr.Row():
239
  with gr.Column(scale=1):
240
  with gr.Tabs():
241
- with gr.TabItem("⬆️ Upload Audio File"):
242
- audio_file_input = gr.Audio(
243
- label="Upload MP3 or WAV file",
244
- type="filepath",
245
- sources=["upload"]
246
- )
247
- with gr.TabItem("🔗 Paste YouTube Link"):
248
- youtube_url_input = gr.Textbox(
249
- label="Paste YouTube link here",
250
- placeholder="e.g., [https://www.youtube.com/watch?v=](https://www.youtube.com/watch?v=)..."
251
- )
252
-
253
  submit_button = gr.Button("🚀 Generate Blog Post", variant="primary")
 
 
254
  with gr.Accordion("📝 View Process Log", open=True):
255
  log_output = gr.Textbox(label="Log", interactive=False, lines=10)
256
-
257
  with gr.Column(scale=2):
258
  gr.Markdown("## ✨ Article Output")
259
  blog_summary_output = gr.Markdown(elem_classes=["blog-output"])
260
  with gr.Accordion("📜 View Full Transcription", open=False):
261
  transcription_output = gr.Textbox(label="Full Text", interactive=False, lines=10)
262
 
263
-
264
- # Link button to the main function
265
  submit_button.click(
266
  fn=transcribe_and_summarize,
267
  inputs=[audio_file_input, youtube_url_input],
268
  outputs=[log_output, transcription_output, blog_summary_output]
269
  )
 
 
 
 
 
 
 
 
 
 
270
 
 
271
  if __name__ == "__main__":
272
- demo.launch(debug=True)
273
-
 
1
  import gradio as gr
2
  import torch
3
+ from faster_whisper import WhisperModel
4
  import yt_dlp
5
  from openai import OpenAI
6
  import os
7
  import json
 
8
  import time
9
+ import uuid
10
+
11
+ # --- 1. Model Initialization (Efficient: Done Once at Startup) ---
12
+ print("Initializing transcription model (faster-whisper)...")
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ if device == "cuda":
15
+ compute_type = "float16"
16
+ print("CUDA detected. Using GPU with compute_type: 'float16'")
17
+ else:
18
+ compute_type = "int8"
19
+ print("No CUDA device found. Using CPU with compute_type: 'int8'")
20
+
21
+ model_size = "large-v3"
22
+ model = WhisperModel(model_size, device=device, compute_type=compute_type)
 
 
 
 
 
 
 
 
 
 
23
  print("Transcription model loaded successfully.")
24
 
25
  # --- 2. Helper Functions ---
26
  def download_youtube_audio(url: str) -> str:
27
+ """Downloads audio from a YouTube URL and saves it as an MP3 file."""
28
+ unique_id = uuid.uuid4()
29
+ output_template = f'{unique_id}.%(ext)s'
30
+ final_filepath = f'{unique_id}.mp3'
 
31
  ydl_opts = {
32
  'format': 'bestaudio/best',
33
+ 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192'}],
 
 
 
 
34
  'outtmpl': output_template,
35
  'quiet': True,
36
  'overwrite': True,
 
38
  try:
39
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
40
  ydl.download([url])
41
+ return final_filepath
42
  except Exception as e:
43
+ raise gr.Error(f"Failed to download audio from YouTube. Error: {str(e)}")
 
44
 
 
45
  def transcribe_and_summarize(audio_file: str, youtube_url: str):
46
+ """The main processing pipeline: download, transcribe (with streaming), and summarize."""
 
 
 
 
47
  log_history = ""
48
  def log(message):
49
  nonlocal log_history
50
+ timestamp = time.strftime("%H:%M:%S") # Use shorter timestamp
51
  log_history += f"[{timestamp}] {message}\n"
52
  return log_history
53
 
54
+ loading_message = "⏳ Generating summary..."
55
+ yield log("Process started."), "", ""
56
 
 
57
  api_key = os.getenv('TYPHOON_API')
58
  if not api_key:
59
+ error_msg = "TYPHOON_API environment variable not set. Cannot summarize."
60
+ yield log(error_msg), "", gr.Markdown(f"## Error\n{error_msg}")
61
+ return
62
+
63
  if audio_file is None and not youtube_url:
64
  raise gr.Error("Please upload an audio file or provide a YouTube link.")
65
 
 
66
  filepath = ""
67
+ is_downloaded = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  try:
69
+ if youtube_url:
70
+ yield log("Downloading YouTube audio..."), "", ""
71
+ filepath = download_youtube_audio(youtube_url)
72
+ is_downloaded = True
73
+ yield log(f"Downloaded to {filepath}"), "", ""
74
+ else:
75
+ filepath = audio_file
76
+
77
+ yield log("Transcription started..."), "", ""
78
+ segments, info = model.transcribe(filepath, beam_size=5)
79
+ detected_lang = info.language
80
+ yield log(f"Detected language '{detected_lang}' with probability {info.language_probability:.2f}"), "", ""
81
+
82
+ transcribed_text = ""
83
+ for segment in segments:
84
+ line = f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text.strip()}"
85
+ transcribed_text += segment.text + " "
86
+ yield log(line), transcribed_text, ""
87
+
88
+ yield log("Transcription complete."), transcribed_text, ""
89
+ yield log("Sending to AI for summarization..."), transcribed_text, loading_message
90
+
91
+ client = OpenAI(api_key=api_key, base_url="https://api.opentyphoon.ai/v1")
92
+ system_prompt = f"""You are an automated system that converts transcripts into a blog post.
93
+ Your ONLY function is to output a valid JSON object.
94
+ Do NOT write any explanations, apologies, or introductory text.
95
+ The response MUST start with a `{{` and end with a `}}`.
96
 
 
 
97
  The JSON object must have the following structure:
98
+ {{
99
+ "title": "A catchy and relevant title for the blog post in {detected_lang}.",
100
+ "key_takeaway": "A single paragraph summarizing the most important point of the entire content in {detected_lang}.",
101
  "main_ideas": [
102
+ "A key point or feature, written as a string in {detected_lang}.",
103
+ "Another key point or feature...",
104
  "And so on..."
105
  ],
106
+ "conclusion": "A concluding paragraph that wraps up the main ideas in {detected_lang}."
107
+ }}"""
108
+
 
109
  response = client.chat.completions.create(
110
  model="typhoon-v2.1-12b-instruct",
111
+ messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": transcribed_text}],
 
 
 
112
  max_tokens=2048,
113
  temperature=0.7
114
  )
115
+
116
  summary_json_string = response.choices[0].message.content
 
117
 
118
+ # --- THIS IS THE FIX ---
119
+ # Clean up the string to remove markdown fences if the AI included them
120
+ if summary_json_string.strip().startswith("```json"):
121
+ summary_json_string = summary_json_string.strip()[7:-4].strip()
122
+ # --- END OF FIX ---
123
+
124
  try:
125
+ if not summary_json_string or not summary_json_string.strip():
126
+ raise json.JSONDecodeError("Empty response from API", summary_json_string, 0)
 
127
 
128
  data = json.loads(summary_json_string)
129
  title = data.get("title", "Title Not Found")
 
131
  main_ideas = data.get("main_ideas", [])
132
  conclusion = data.get("conclusion", "")
133
 
134
+ summary_markdown = f"# {title}\n\n<p>{key_takeaway}</p>\n\n## Key Ideas\n\n<ul>"
135
+ for idea in main_ideas:
136
+ summary_markdown += f"<li>{idea}</li>"
137
+ summary_markdown += f"</ul>\n\n## Conclusion\n\n<p>{conclusion}</p>"
 
 
 
 
 
 
 
138
 
139
+ yield log("Summarization complete."), transcribed_text, summary_markdown
140
+
141
+ except json.JSONDecodeError:
142
+ error_log_message = "ERROR: Failed to decode JSON from AI response."
143
+ error_display_message = f"## Summarization Failed\n**The AI did not return a valid JSON article.**\n\n**Raw AI Response:**\n```\n{summary_json_string}\n```"
144
+ yield log(error_log_message), transcribed_text, gr.Markdown(error_display_message)
145
 
146
  except Exception as e:
147
+ yield log(f"An unexpected error occurred: {str(e)}"), "", f"## Error\nAn unexpected error occurred: {str(e)}"
148
+ finally:
149
+ if is_downloaded and filepath and os.path.exists(filepath):
150
+ print(f"Cleaning up temporary file: {filepath}")
151
+ os.remove(filepath)
152
+
153
+ def update_video_preview(url):
154
+ """Parses a YouTube URL to find the video ID, then returns an HTML iframe embed."""
155
+ if not url:
156
+ return gr.update(value=None, visible=False)
157
+
158
+ video_id = None
159
+ try:
160
+ if "youtube.com/shorts/" in url:
161
+ video_id = url.split("/shorts/")[1].split("?")[0]
162
+ elif "watch?v=" in url:
163
+ video_id = url.split("watch?v=")[1].split("&")[0]
164
+ elif "youtu.be/" in url:
165
+ video_id = url.split("youtu.be/")[1].split("?")[0]
166
+ except IndexError:
167
+ return gr.update(value=None, visible=False)
168
+
169
+ if video_id:
170
+ iframe_html = f'<iframe width="100%" height="315" src="https://www.youtube.com/embed/{video_id}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
171
+ return gr.update(value=iframe_html, visible=True)
172
+ else:
173
+ return gr.update(value=None, visible=False)
174
 
175
+ # --- 3. Gradio UI Layout ---
 
176
  css = """
177
+ @import url('https://fonts.googleapis.com/css2?family=Sarabun:wght@400;700&display=swap');
178
+ .blog-output { font-family: 'Sarabun', sans-serif; line-height: 1.8; max-width: 800px; margin: auto; padding: 2rem; border-radius: 12px; background-color: #ffffff; border: 1px solid #e5e7eb; }
179
+ .blog-output h1 { font-size: 2.2em; font-weight: 700; border-bottom: 2px solid #f3f4f6; padding-bottom: 15px; margin-bottom: 25px; color: #111827; }
180
+ .blog-output h2 { font-size: 1.6em; font-weight: 700; margin-top: 40px; margin-bottom: 20px; color: #1f2937; }
181
+ .blog-output p { font-size: 1.1em; margin-bottom: 20px; color: #374151; }
182
+ .blog-output ul { padding-left: 25px; list-style-type: disc; }
183
+ .blog-output li { margin-bottom: 12px; padding-left: 5px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  """
 
185
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), css=css) as demo:
186
+ gr.Markdown("# 🎙️ Audio to Blog Summarizer ✒️")
 
 
 
 
 
 
187
  with gr.Row():
188
  with gr.Column(scale=1):
189
  with gr.Tabs():
190
+ with gr.TabItem("⬆️ Upload Audio"):
191
+ audio_file_input = gr.Audio(label="Upload Audio File", type="filepath")
192
+ with gr.TabItem("🔗 YouTube Link"):
193
+ youtube_url_input = gr.Textbox(label="YouTube URL", placeholder="Paste a YouTube link here...")
194
+
 
 
 
 
 
 
 
195
  submit_button = gr.Button("🚀 Generate Blog Post", variant="primary")
196
+ video_preview = gr.HTML(visible=False)
197
+
198
  with gr.Accordion("📝 View Process Log", open=True):
199
  log_output = gr.Textbox(label="Log", interactive=False, lines=10)
200
+
201
  with gr.Column(scale=2):
202
  gr.Markdown("## ✨ Article Output")
203
  blog_summary_output = gr.Markdown(elem_classes=["blog-output"])
204
  with gr.Accordion("📜 View Full Transcription", open=False):
205
  transcription_output = gr.Textbox(label="Full Text", interactive=False, lines=10)
206
 
207
+ # --- 4. Event Listeners ---
 
208
  submit_button.click(
209
  fn=transcribe_and_summarize,
210
  inputs=[audio_file_input, youtube_url_input],
211
  outputs=[log_output, transcription_output, blog_summary_output]
212
  )
213
+ youtube_url_input.change(
214
+ fn=update_video_preview,
215
+ inputs=youtube_url_input,
216
+ outputs=video_preview
217
+ )
218
+ demo.load(
219
+ fn=update_video_preview,
220
+ inputs=youtube_url_input,
221
+ outputs=video_preview
222
+ )
223
 
224
+ # --- 5. App Launch ---
225
  if __name__ == "__main__":
226
+ demo.launch(debug=True)
 
test.ipynb ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 6,
6
+ "id": "81d301b6",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Detected language 'th' with probability 0.993038\n",
14
+ "[0.00s -> 6.72s] เช่น ลุงแดงบอกว่า การเล่นเนี่ย สมมุติเล่นคอร์ดสี่ คอร์ดสี่อย่างงี้\n",
15
+ "[6.72s -> 11.88s] คอร์ดสี่อย่างงี้ มันถูกทั้งหมด แต่เวลาเอาไปใช้งานจริงจริง\n",
16
+ "[11.88s -> 15.60s] มันจะทําอย่างนั้นไม่ได้ มันต้องเลือกเอาว่าเล่นอะไรที่มันดีที่สุด\n",
17
+ "[15.60s -> 19.50s] เออ ลูกหลานลองฟังเสียงคอร์ด เงื้อเสียงมันต่างกัน\n",
18
+ "[19.50s -> 23.10s] ฟังแบบนี้มันกําแก่งนะ เนี้ย\n",
19
+ "[24.78s -> 30.58s] แล้วแขมเล่นไปต้องคอยระวัง ระวังไอ้สายห้ากับหกด้วย\n",
20
+ "[30.58s -> 32.98s] เดี๋ยวมันจะวิ่งออกมากวนกัน เพราะปลิ๊กมันขบยาก\n",
21
+ "[32.98s -> 35.54s] เดี๋ยวมันปลายไปโดนนิดหนึ่ง มันก็ออกแล้ว\n",
22
+ "[35.54s -> 40.58s] เราจะดิดหกสายฟังให้ดีนะลูกหลาย ถ้าจับแบบนี้\n",
23
+ "[40.58s -> 45.98s] บอร์ด ดัง บอร์ด เห็นไหม เล่นแบบนี้ก็เล่นในทั่วไป\n",
24
+ "[45.98s -> 50.18s] เสียงแรงต่างมา ไม่ผิดนะ แต่ก็ดีแบบนั้น เอาดี ๆ เลย\n",
25
+ "[50.18s -> 54.50s] บอร์ด ชัดเจน บอร์ด เห็นไหม แล้วดีดกันเลย\n"
26
+ ]
27
+ }
28
+ ],
29
+ "source": [
30
+ "from faster_whisper import WhisperModel\n",
31
+ "\n",
32
+ "model_size = \"large-v3\"\n",
33
+ "\n",
34
+ "model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")\n",
35
+ "\n",
36
+ "segments, info = model.transcribe(\"bacfd788-dd5c-4ff3-851a-45bbf742acd5.mp3\", beam_size=5)\n",
37
+ "\n",
38
+ "print(\"Detected language '%s' with probability %f\" % (info.language, info.language_probability))\n",
39
+ "\n",
40
+ "for segment in segments:\n",
41
+ " print(\"[%.2fs -> %.2fs] %s\" % (segment.start, segment.end, segment.text))"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": null,
47
+ "id": "0e94c566",
48
+ "metadata": {},
49
+ "outputs": [],
50
+ "source": []
51
+ }
52
+ ],
53
+ "metadata": {
54
+ "kernelspec": {
55
+ "display_name": "Jumps",
56
+ "language": "python",
57
+ "name": "python3"
58
+ },
59
+ "language_info": {
60
+ "codemirror_mode": {
61
+ "name": "ipython",
62
+ "version": 3
63
+ },
64
+ "file_extension": ".py",
65
+ "mimetype": "text/x-python",
66
+ "name": "python",
67
+ "nbconvert_exporter": "python",
68
+ "pygments_lexer": "ipython3",
69
+ "version": "3.11.11"
70
+ }
71
+ },
72
+ "nbformat": 4,
73
+ "nbformat_minor": 5
74
+ }