Naphat Sornwichai
commited on
Commit
·
9910e37
1
Parent(s):
2705d4a
update major files
Browse files- .gitignore +3 -0
- app.py +293 -4
- requirements.txt +0 -341
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.venv
|
2 |
+
__pycache__
|
3 |
+
downloaded_audio.mp3
|
app.py
CHANGED
@@ -1,7 +1,296 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
demo = gr.Interface(fn=greet, inputs="text", outputs="text")
|
7 |
-
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
|
4 |
+
import yt_dlp
|
5 |
+
from openai import OpenAI
|
6 |
+
import os
|
7 |
+
import json
|
8 |
+
import torchaudio
|
9 |
+
import torchaudio.transforms as T
|
10 |
+
import time
|
11 |
|
12 |
+
# --- 1. Model & Pipeline Initialization ---
|
13 |
+
# Setup device and data type for PyTorch
|
14 |
+
print("Initializing transcription model...")
|
15 |
+
# Updated device selection logic for CUDA, Apple MPS, and CPU
|
16 |
+
device = "cuda:0" if torch.cuda.is_available() else "mps" if hasattr(torch.backends, "mps") and torch.backends.mps.is_available() else "cpu"
|
17 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else torch.float32
|
18 |
+
|
19 |
+
# Switched to the medium model as requested
|
20 |
+
model_id = "nectec/Pathumma-whisper-th-medium"
|
21 |
+
|
22 |
+
print(f"Using device: {device} with dtype: {torch_dtype}")
|
23 |
+
|
24 |
+
# Load the model and processor directly
|
25 |
+
# We will use the model's .generate() method for long-form transcription
|
26 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
27 |
+
model_id, dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
28 |
+
)
|
29 |
+
model.to(device)
|
30 |
+
|
31 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
32 |
+
|
33 |
+
print("Transcription model loaded successfully.")
|
34 |
+
|
35 |
+
# --- 2. Helper Functions ---
|
36 |
+
def download_youtube_audio(url: str) -> str:
|
37 |
+
"""
|
38 |
+
Downloads audio from a YouTube URL and saves it as an mp3 file.
|
39 |
+
Returns the path to the downloaded file.
|
40 |
+
"""
|
41 |
+
output_template = 'downloaded_audio.%(ext)s'
|
42 |
+
ydl_opts = {
|
43 |
+
'format': 'bestaudio/best',
|
44 |
+
'postprocessors': [{
|
45 |
+
'key': 'FFmpegExtractAudio',
|
46 |
+
'preferredcodec': 'mp3',
|
47 |
+
'preferredquality': '192',
|
48 |
+
}],
|
49 |
+
'outtmpl': output_template,
|
50 |
+
'quiet': True,
|
51 |
+
'overwrite': True,
|
52 |
+
}
|
53 |
+
try:
|
54 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
55 |
+
ydl.download([url])
|
56 |
+
return 'downloaded_audio.mp3'
|
57 |
+
except Exception as e:
|
58 |
+
raise gr.Error(f"Failed to download audio from YouTube. Please check the link. Error: {str(e)}")
|
59 |
+
|
60 |
+
|
61 |
+
# --- 3. Core Logic ---
|
62 |
+
def transcribe_and_summarize(audio_file: str, youtube_url: str, progress=gr.Progress()):
|
63 |
+
"""
|
64 |
+
Main function to process audio, transcribe, and summarize.
|
65 |
+
This is a generator function to yield status updates and logs to the UI.
|
66 |
+
"""
|
67 |
+
log_history = ""
|
68 |
+
def log(message):
|
69 |
+
nonlocal log_history
|
70 |
+
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
|
71 |
+
log_history += f"[{timestamp}] {message}\n"
|
72 |
+
return log_history
|
73 |
+
|
74 |
+
progress(0, desc="Starting...")
|
75 |
+
yield log("Process started."), "", "", "Starting..."
|
76 |
+
|
77 |
+
# Step 1: Get API Key and validate inputs
|
78 |
+
api_key = os.getenv('TYPHOON_API')
|
79 |
+
if not api_key:
|
80 |
+
raise gr.Error("TYPHOON_API environment variable not set. Please set it before running the app.")
|
81 |
+
if audio_file is None and not youtube_url:
|
82 |
+
raise gr.Error("Please upload an audio file or provide a YouTube link.")
|
83 |
+
|
84 |
+
# Step 2: Determine audio source and get file path
|
85 |
+
filepath = ""
|
86 |
+
if youtube_url:
|
87 |
+
progress(0.1, desc="Downloading Audio...")
|
88 |
+
yield log("YouTube link detected. Starting download."), "", "", "Downloading Audio..."
|
89 |
+
try:
|
90 |
+
filepath = download_youtube_audio(youtube_url)
|
91 |
+
yield log(f"Audio downloaded successfully to '{filepath}'."), "", "", "Download Complete"
|
92 |
+
except Exception as e:
|
93 |
+
yield log(f"Error downloading from YouTube: {e}"), "", "", f"Error: {e}"
|
94 |
+
return
|
95 |
+
else:
|
96 |
+
filepath = audio_file
|
97 |
+
yield log(f"Processing uploaded file: '{filepath}'."), "", "", "Processing File..."
|
98 |
+
|
99 |
+
|
100 |
+
# Step 3: Transcribe audio using the model's generate method for long-form audio
|
101 |
+
progress(0.3, desc="Transcribing Audio...")
|
102 |
+
yield log("Beginning audio transcription..."), "", "", "Transcribing Audio..."
|
103 |
+
try:
|
104 |
+
# Load audio file using torchaudio
|
105 |
+
waveform, sr = torchaudio.load(filepath)
|
106 |
+
|
107 |
+
# Resample to 16kHz if necessary, as Whisper expects this rate
|
108 |
+
if sr != 16000:
|
109 |
+
yield log(f"Original sample rate is {sr}Hz. Resampling to 16000Hz."), "", "", "Resampling..."
|
110 |
+
resampler = T.Resample(orig_freq=sr, new_freq=16000)
|
111 |
+
waveform = resampler(waveform)
|
112 |
+
|
113 |
+
# Process the audio waveform to get input features
|
114 |
+
input_features = processor(
|
115 |
+
waveform.squeeze().numpy(),
|
116 |
+
return_tensors="pt",
|
117 |
+
sampling_rate=16000
|
118 |
+
).input_features.to(device, dtype=torch_dtype)
|
119 |
+
|
120 |
+
# Set the generation language and task for Thai transcription
|
121 |
+
decoder_prompt_ids = processor.get_decoder_prompt_ids(language="th", task="transcribe")
|
122 |
+
|
123 |
+
# Generate token IDs from the input features
|
124 |
+
predicted_ids = model.generate(input_features, forced_decoder_ids=decoder_prompt_ids)
|
125 |
+
|
126 |
+
# Decode the token IDs to text
|
127 |
+
transcribed_text = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
128 |
+
yield log("Transcription complete."), transcribed_text, "", "Transcription Complete"
|
129 |
+
|
130 |
+
except Exception as e:
|
131 |
+
raise gr.Error(f"An error occurred during transcription: {str(e)}")
|
132 |
+
|
133 |
+
|
134 |
+
# Step 4: Summarize with Typhoon LLM
|
135 |
+
progress(0.8, desc="Generating Summary...")
|
136 |
+
yield log("Sending transcription to Typhoon LLM for summarization."), transcribed_text, "", "Generating Summary..."
|
137 |
+
if not transcribed_text or not transcribed_text.strip():
|
138 |
+
yield log("Transcription is empty. Aborting summarization."), "", "Could not generate summary because the transcription is empty.", "Aborted"
|
139 |
+
return
|
140 |
+
|
141 |
+
# Initialize OpenAI client for Typhoon
|
142 |
+
client = OpenAI(
|
143 |
+
api_key=api_key,
|
144 |
+
base_url="https://api.opentyphoon.ai/v1"
|
145 |
+
)
|
146 |
+
|
147 |
+
system_prompt = """You are a professional editor and content creator. Your task is to take a raw transcript and reformat it into a beautiful, easy-to-read blog post.
|
148 |
+
You MUST reply ONLY with a valid JSON object. Do not add any text before or after the JSON.
|
149 |
+
The JSON object must have the following structure:
|
150 |
+
{
|
151 |
+
"title": "A catchy and relevant title for the blog post in Thai.",
|
152 |
+
"key_takeaway": "A single paragraph summarizing the most important point of the entire content in Thai.",
|
153 |
+
"main_ideas": [
|
154 |
+
"A key point or feature, written as a string in Thai.",
|
155 |
+
"Another key point or feature, written as a string in Thai.",
|
156 |
+
"And so on..."
|
157 |
+
],
|
158 |
+
"conclusion": "A concluding paragraph that wraps up the main ideas in Thai."
|
159 |
+
}"""
|
160 |
+
|
161 |
+
try:
|
162 |
+
response = client.chat.completions.create(
|
163 |
+
model="typhoon-v2.1-12b-instruct",
|
164 |
+
messages=[
|
165 |
+
{"role": "system", "content": system_prompt},
|
166 |
+
{"role": "user", "content": f"Please summarize and restructure the following transcript into the specified JSON format:\n\n---\n\n{transcribed_text}"}
|
167 |
+
],
|
168 |
+
max_tokens=2048,
|
169 |
+
temperature=0.7
|
170 |
+
)
|
171 |
+
summary_json_string = response.choices[0].message.content
|
172 |
+
yield log("Received summary from Typhoon LLM. Parsing JSON."), transcribed_text, "", "Parsing Summary..."
|
173 |
+
|
174 |
+
# Parse the JSON and format it as Markdown
|
175 |
+
try:
|
176 |
+
# Clean potential markdown code blocks from the response
|
177 |
+
if summary_json_string.strip().startswith("```json"):
|
178 |
+
summary_json_string = summary_json_string.strip()[7:-4]
|
179 |
+
|
180 |
+
data = json.loads(summary_json_string)
|
181 |
+
title = data.get("title", "Title Not Found")
|
182 |
+
key_takeaway = data.get("key_takeaway", "")
|
183 |
+
main_ideas = data.get("main_ideas", [])
|
184 |
+
conclusion = data.get("conclusion", "")
|
185 |
+
|
186 |
+
# Build the blog post in Markdown format
|
187 |
+
summary_markdown = f"# {title}\n\n"
|
188 |
+
summary_markdown += f"{key_takeaway}\n\n"
|
189 |
+
if main_ideas:
|
190 |
+
summary_markdown += "## Key Features & Main Ideas\n\n"
|
191 |
+
for idea in main_ideas:
|
192 |
+
summary_markdown += f"- {idea}\n"
|
193 |
+
summary_markdown += "\n"
|
194 |
+
summary_markdown += f"## Conclusion\n\n{conclusion}"
|
195 |
+
yield log("Successfully parsed and formatted summary."), transcribed_text, summary_markdown, "Formatting Complete"
|
196 |
+
|
197 |
+
except (json.JSONDecodeError, AttributeError) as e:
|
198 |
+
error_message = f"Failed to parse the summary from the AI. Raw response: {summary_json_string}"
|
199 |
+
raise gr.Error(error_message)
|
200 |
+
|
201 |
+
except Exception as e:
|
202 |
+
raise gr.Error(f"Could not connect to the Typhoon API. Please check your API key. Error: {str(e)}")
|
203 |
+
|
204 |
+
# Step 5: Return final results
|
205 |
+
progress(1.0, desc="Done!")
|
206 |
+
yield log("Process finished successfully."), transcribed_text, summary_markdown, "Done!"
|
207 |
+
|
208 |
+
# --- 4. Gradio UI ---
|
209 |
+
# Custom CSS for a beautiful, blog-like output.
|
210 |
+
css = """
|
211 |
+
@import url('[https://fonts.googleapis.com/css2?family=Sarabun:wght@400;700&display=swap](https://fonts.googleapis.com/css2?family=Sarabun:wght@400;700&display=swap)');
|
212 |
+
.blog-output {
|
213 |
+
font-family: 'Sarabun', sans-serif;
|
214 |
+
line-height: 1.8;
|
215 |
+
max-width: 800px;
|
216 |
+
margin: auto;
|
217 |
+
padding: 2rem;
|
218 |
+
border-radius: 12px;
|
219 |
+
background-color: #ffffff;
|
220 |
+
border: 1px solid #e5e7eb;
|
221 |
+
}
|
222 |
+
.blog-output h1 {
|
223 |
+
font-size: 2.2em;
|
224 |
+
font-weight: 700;
|
225 |
+
border-bottom: 2px solid #f3f4f6;
|
226 |
+
padding-bottom: 15px;
|
227 |
+
margin-bottom: 25px;
|
228 |
+
color: #111827;
|
229 |
+
}
|
230 |
+
.blog-output h2 {
|
231 |
+
font-size: 1.6em;
|
232 |
+
font-weight: 700;
|
233 |
+
margin-top: 40px;
|
234 |
+
margin-bottom: 20px;
|
235 |
+
color: #1f2937;
|
236 |
+
}
|
237 |
+
.blog-output p {
|
238 |
+
font-size: 1.1em;
|
239 |
+
margin-bottom: 20px;
|
240 |
+
color: #374151;
|
241 |
+
}
|
242 |
+
.blog-output ul {
|
243 |
+
padding-left: 25px;
|
244 |
+
list-style-type: disc;
|
245 |
+
}
|
246 |
+
.blog-output li {
|
247 |
+
margin-bottom: 12px;
|
248 |
+
padding-left: 5px;
|
249 |
+
}
|
250 |
+
"""
|
251 |
+
|
252 |
+
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), css=css) as demo:
|
253 |
+
gr.Markdown(
|
254 |
+
"""
|
255 |
+
# 🎙️ Audio to Blog Summarizer ✒️
|
256 |
+
Upload an audio file (MP3, WAV) or paste a YouTube link to transcribe it to Thai text and summarize the content into a beautiful, blog-style article using AI from NECTEC and OpenTyphoon.
|
257 |
+
"""
|
258 |
+
)
|
259 |
+
|
260 |
+
with gr.Row():
|
261 |
+
with gr.Column(scale=1):
|
262 |
+
with gr.Tabs():
|
263 |
+
with gr.TabItem("⬆️ Upload Audio File"):
|
264 |
+
audio_file_input = gr.Audio(
|
265 |
+
label="Upload MP3 or WAV file",
|
266 |
+
type="filepath",
|
267 |
+
sources=["upload"]
|
268 |
+
)
|
269 |
+
with gr.TabItem("🔗 Paste YouTube Link"):
|
270 |
+
youtube_url_input = gr.Textbox(
|
271 |
+
label="Paste YouTube link here",
|
272 |
+
placeholder="e.g., [https://www.youtube.com/watch?v=](https://www.youtube.com/watch?v=)..."
|
273 |
+
)
|
274 |
+
|
275 |
+
submit_button = gr.Button("🚀 Generate Blog Post", variant="primary")
|
276 |
+
status_output = gr.Textbox(label="Status", interactive=False, lines=1)
|
277 |
+
with gr.Accordion("📝 View Process Log", open=False):
|
278 |
+
log_output = gr.Textbox(label="Log", interactive=False, lines=10)
|
279 |
+
|
280 |
+
with gr.Column(scale=2):
|
281 |
+
gr.Markdown("## ✨ Article Output")
|
282 |
+
blog_summary_output = gr.Markdown(elem_classes=["blog-output"])
|
283 |
+
with gr.Accordion("📜 View Full Transcription", open=False):
|
284 |
+
transcription_output = gr.Textbox(label="Full Text", interactive=False, lines=10)
|
285 |
+
|
286 |
+
|
287 |
+
# Link button to the main function
|
288 |
+
submit_button.click(
|
289 |
+
fn=transcribe_and_summarize,
|
290 |
+
inputs=[audio_file_input, youtube_url_input],
|
291 |
+
outputs=[log_output, transcription_output, blog_summary_output, status_output]
|
292 |
+
)
|
293 |
+
|
294 |
+
if __name__ == "__main__":
|
295 |
+
demo.launch(debug=True)
|
296 |
|
|
|
|
requirements.txt
DELETED
@@ -1,341 +0,0 @@
|
|
1 |
-
accelerate==1.8.1
|
2 |
-
aiocache==0.12.3
|
3 |
-
aiofiles==24.1.0
|
4 |
-
aiohappyeyeballs==2.6.1
|
5 |
-
aiohttp==3.11.11
|
6 |
-
aiosignal==1.3.2
|
7 |
-
alembic==1.14.0
|
8 |
-
annotated-types==0.7.0
|
9 |
-
anthropic==0.56.0
|
10 |
-
anyio==4.9.0
|
11 |
-
appdirs==1.4.4
|
12 |
-
appnope==0.1.4
|
13 |
-
APScheduler==3.10.4
|
14 |
-
argon2-cffi==23.1.0
|
15 |
-
argon2-cffi-bindings==21.2.0
|
16 |
-
asgiref==3.8.1
|
17 |
-
asttokens==3.0.0
|
18 |
-
async-timeout==5.0.1
|
19 |
-
attrs==25.3.0
|
20 |
-
Authlib==1.4.1
|
21 |
-
av==14.4.0
|
22 |
-
azure-ai-documentintelligence==1.0.2
|
23 |
-
azure-core==1.35.0
|
24 |
-
azure-identity==1.20.0
|
25 |
-
azure-storage-blob==12.24.1
|
26 |
-
backoff==2.2.1
|
27 |
-
bcrypt==4.3.0
|
28 |
-
beautifulsoup4==4.13.4
|
29 |
-
bidict==0.23.1
|
30 |
-
bitarray==3.4.3
|
31 |
-
black==25.1.0
|
32 |
-
blinker==1.9.0
|
33 |
-
boto3==1.35.53
|
34 |
-
botocore==1.35.99
|
35 |
-
Brotli==1.1.0
|
36 |
-
build==1.2.2.post1
|
37 |
-
cachetools==5.5.2
|
38 |
-
certifi==2025.6.15
|
39 |
-
cffi==1.17.1
|
40 |
-
chardet==5.2.0
|
41 |
-
charset-normalizer==3.4.2
|
42 |
-
chroma-hnswlib==0.7.6
|
43 |
-
chromadb==0.6.3
|
44 |
-
click==8.2.1
|
45 |
-
colbert-ai==0.2.21
|
46 |
-
colorama==0.4.6
|
47 |
-
colorclass==2.2.2
|
48 |
-
coloredlogs==15.0.1
|
49 |
-
comm==0.2.2
|
50 |
-
compressed-rtf==1.0.7
|
51 |
-
cryptography==45.0.5
|
52 |
-
ctranslate2==4.6.0
|
53 |
-
dataclasses-json==0.6.7
|
54 |
-
datasets==3.6.0
|
55 |
-
debugpy==1.8.5
|
56 |
-
decorator==5.2.1
|
57 |
-
defusedxml==0.7.1
|
58 |
-
dill==0.3.8
|
59 |
-
distro==1.9.0
|
60 |
-
dnspython==2.7.0
|
61 |
-
docker==7.1.0
|
62 |
-
docx2txt==0.8
|
63 |
-
duckduckgo_search==8.0.2
|
64 |
-
durationpy==0.10
|
65 |
-
easygui==0.98.3
|
66 |
-
ebcdic==1.1.1
|
67 |
-
ecdsa==0.19.1
|
68 |
-
einops==0.8.1
|
69 |
-
elastic-transport==8.17.1
|
70 |
-
elasticsearch==9.0.1
|
71 |
-
emoji==2.14.1
|
72 |
-
et_xmlfile==2.0.0
|
73 |
-
eval_type_backport==0.2.2
|
74 |
-
Events==0.5
|
75 |
-
executing==2.2.0
|
76 |
-
extract-msg==0.54.1
|
77 |
-
fake-useragent==2.1.0
|
78 |
-
fastapi==0.115.7
|
79 |
-
faster-whisper==1.1.1
|
80 |
-
filelock==3.18.0
|
81 |
-
filetype==1.2.0
|
82 |
-
firecrawl-py==1.12.0
|
83 |
-
Flask==3.1.1
|
84 |
-
flatbuffers==25.2.10
|
85 |
-
fonttools==4.58.5
|
86 |
-
fpdf2==2.8.2
|
87 |
-
frozenlist==1.7.0
|
88 |
-
fs==2.4.16
|
89 |
-
fsspec==2025.3.0
|
90 |
-
ftfy==6.2.3
|
91 |
-
gcp-storage-emulator==2024.8.3
|
92 |
-
git-python==1.0.3
|
93 |
-
gitdb==4.0.12
|
94 |
-
GitPython==3.1.44
|
95 |
-
google-ai-generativelanguage==0.6.15
|
96 |
-
google-api-core==2.25.1
|
97 |
-
google-api-python-client==2.174.0
|
98 |
-
google-auth==2.40.3
|
99 |
-
google-auth-httplib2==0.2.0
|
100 |
-
google-auth-oauthlib==1.2.2
|
101 |
-
google-cloud-core==2.4.3
|
102 |
-
google-cloud-storage==2.19.0
|
103 |
-
google-crc32c==1.7.1
|
104 |
-
google-genai==1.15.0
|
105 |
-
google-generativeai==0.8.5
|
106 |
-
google-resumable-media==2.7.2
|
107 |
-
googleapis-common-protos==1.63.2
|
108 |
-
greenlet==3.1.1
|
109 |
-
grpcio==1.67.1
|
110 |
-
grpcio-status==1.67.1
|
111 |
-
grpcio-tools==1.67.1
|
112 |
-
h11==0.16.0
|
113 |
-
h2==4.2.0
|
114 |
-
hf-xet==1.1.5
|
115 |
-
hf_transfer==0.1.9
|
116 |
-
hpack==4.1.0
|
117 |
-
html5lib==1.1
|
118 |
-
httpcore==1.0.9
|
119 |
-
httplib2==0.22.0
|
120 |
-
httptools==0.6.4
|
121 |
-
httpx==0.28.1
|
122 |
-
httpx-sse==0.4.1
|
123 |
-
huggingface-hub==0.33.2
|
124 |
-
humanfriendly==10.0
|
125 |
-
hyperframe==6.1.0
|
126 |
-
idna==3.10
|
127 |
-
importlib_metadata==8.7.0
|
128 |
-
importlib_resources==6.5.2
|
129 |
-
iniconfig==2.1.0
|
130 |
-
ipykernel==6.29.5
|
131 |
-
ipython==9.4.0
|
132 |
-
ipython_pygments_lexers==1.1.1
|
133 |
-
isodate==0.7.2
|
134 |
-
itsdangerous==2.2.0
|
135 |
-
jedi==0.19.2
|
136 |
-
Jinja2==3.1.6
|
137 |
-
jiter==0.10.0
|
138 |
-
jmespath==1.0.1
|
139 |
-
joblib==1.5.1
|
140 |
-
jsonpatch==1.33
|
141 |
-
jsonpointer==3.0.0
|
142 |
-
jupyter_client==8.6.3
|
143 |
-
jupyter_core==5.7.2
|
144 |
-
kubernetes==33.1.0
|
145 |
-
langchain==0.3.24
|
146 |
-
langchain-community==0.3.23
|
147 |
-
langchain-core==0.3.67
|
148 |
-
langchain-text-splitters==0.3.8
|
149 |
-
langdetect==1.0.9
|
150 |
-
langfuse==2.44.0
|
151 |
-
langsmith==0.3.45
|
152 |
-
lark==1.1.9
|
153 |
-
ldap3==2.9.1
|
154 |
-
loguru==0.7.3
|
155 |
-
lxml==6.0.0
|
156 |
-
Mako==1.3.10
|
157 |
-
Markdown==3.7
|
158 |
-
markdown-it-py==3.0.0
|
159 |
-
MarkupSafe==3.0.2
|
160 |
-
marshmallow==3.26.1
|
161 |
-
matplotlib-inline==0.1.7
|
162 |
-
mdurl==0.1.2
|
163 |
-
milvus-lite==2.5.1
|
164 |
-
mmh3==5.1.0
|
165 |
-
moto==5.1.6
|
166 |
-
mpmath==1.3.0
|
167 |
-
msal==1.32.3
|
168 |
-
msal-extensions==1.3.1
|
169 |
-
msoffcrypto-tool==5.4.2
|
170 |
-
multidict==6.6.3
|
171 |
-
multiprocess==0.70.16
|
172 |
-
mypy_extensions==1.1.0
|
173 |
-
nest-asyncio==1.6.0
|
174 |
-
networkx==3.5
|
175 |
-
ninja==1.11.1.4
|
176 |
-
nltk==3.9.1
|
177 |
-
numpy==1.26.4
|
178 |
-
oauthlib==3.3.1
|
179 |
-
olefile==0.47
|
180 |
-
oletools==0.60.2
|
181 |
-
onnxruntime==1.20.1
|
182 |
-
open-webui==0.6.15
|
183 |
-
openai==1.93.0
|
184 |
-
opencv-python==4.11.0.86
|
185 |
-
opencv-python-headless==4.11.0.86
|
186 |
-
openpyxl==3.1.5
|
187 |
-
opensearch-py==2.8.0
|
188 |
-
opentelemetry-api==1.34.1
|
189 |
-
opentelemetry-exporter-otlp-proto-common==1.34.1
|
190 |
-
opentelemetry-exporter-otlp-proto-grpc==1.34.1
|
191 |
-
opentelemetry-instrumentation==0.55b1
|
192 |
-
opentelemetry-instrumentation-asgi==0.55b1
|
193 |
-
opentelemetry-instrumentation-fastapi==0.55b1
|
194 |
-
opentelemetry-proto==1.34.1
|
195 |
-
opentelemetry-sdk==1.34.1
|
196 |
-
opentelemetry-semantic-conventions==0.55b1
|
197 |
-
opentelemetry-util-http==0.55b1
|
198 |
-
orjson==3.10.18
|
199 |
-
overrides==7.7.0
|
200 |
-
packaging==23.2
|
201 |
-
pandas==2.2.3
|
202 |
-
parso==0.8.4
|
203 |
-
passlib==1.7.4
|
204 |
-
pathspec==0.12.1
|
205 |
-
pcodedmp==1.2.6
|
206 |
-
peewee==3.18.1
|
207 |
-
peewee-migrate==1.12.2
|
208 |
-
pexpect==4.9.0
|
209 |
-
pgvector==0.4.0
|
210 |
-
pillow==11.2.1
|
211 |
-
pinecone==6.0.2
|
212 |
-
pinecone-plugin-interface==0.0.7
|
213 |
-
platformdirs==4.3.6
|
214 |
-
playwright==1.49.1
|
215 |
-
pluggy==1.6.0
|
216 |
-
portalocker==2.10.1
|
217 |
-
posthog==6.0.2
|
218 |
-
primp==0.15.0
|
219 |
-
prompt_toolkit==3.0.51
|
220 |
-
propcache==0.3.2
|
221 |
-
proto-plus==1.26.1
|
222 |
-
protobuf==5.29.5
|
223 |
-
psutil==7.0.0
|
224 |
-
psycopg2-binary==2.9.9
|
225 |
-
ptyprocess==0.7.0
|
226 |
-
pure_eval==0.2.3
|
227 |
-
py-partiql-parser==0.6.1
|
228 |
-
pyarrow==20.0.0
|
229 |
-
pyasn1==0.4.8
|
230 |
-
pyasn1_modules==0.4.1
|
231 |
-
pyclipper==1.3.0.post6
|
232 |
-
pycparser==2.22
|
233 |
-
pydantic==2.10.6
|
234 |
-
pydantic-settings==2.10.1
|
235 |
-
pydantic_core==2.27.2
|
236 |
-
pydub==0.25.1
|
237 |
-
pyee==12.0.0
|
238 |
-
Pygments==2.19.2
|
239 |
-
PyJWT==2.10.1
|
240 |
-
pymdown-extensions==10.14.2
|
241 |
-
pymilvus==2.5.0
|
242 |
-
pymongo==4.13.2
|
243 |
-
PyMySQL==1.1.1
|
244 |
-
pypandoc==1.15
|
245 |
-
pyparsing==3.2.3
|
246 |
-
pypdf==4.3.1
|
247 |
-
PyPika==0.48.9
|
248 |
-
pyproject_hooks==1.2.0
|
249 |
-
pytest==8.3.5
|
250 |
-
pytest-docker==3.1.2
|
251 |
-
python-dateutil==2.9.0.post0
|
252 |
-
python-dotenv==1.1.1
|
253 |
-
python-engineio==4.12.2
|
254 |
-
python-iso639==2025.2.18
|
255 |
-
python-jose==3.4.0
|
256 |
-
python-magic==0.4.27
|
257 |
-
python-multipart==0.0.20
|
258 |
-
python-oxmsg==0.0.2
|
259 |
-
python-pptx==1.0.2
|
260 |
-
python-socketio==5.13.0
|
261 |
-
pytube==15.0.0
|
262 |
-
pytz==2025.2
|
263 |
-
pyxlsb==1.0.10
|
264 |
-
PyYAML==6.0.2
|
265 |
-
pyzmq==26.2.0
|
266 |
-
qdrant-client==1.12.2
|
267 |
-
rank-bm25==0.2.2
|
268 |
-
RapidFuzz==3.13.0
|
269 |
-
rapidocr-onnxruntime==1.4.4
|
270 |
-
red-black-tree-mod==1.22
|
271 |
-
redis==6.2.0
|
272 |
-
regex==2024.11.6
|
273 |
-
requests==2.32.4
|
274 |
-
requests-oauthlib==2.0.0
|
275 |
-
requests-toolbelt==1.0.0
|
276 |
-
responses==0.25.7
|
277 |
-
RestrictedPython==8.0
|
278 |
-
rich==14.0.0
|
279 |
-
rsa==4.9.1
|
280 |
-
RTFDE==0.1.2.1
|
281 |
-
s3transfer==0.10.4
|
282 |
-
safetensors==0.5.3
|
283 |
-
scikit-learn==1.7.0
|
284 |
-
scipy==1.16.0
|
285 |
-
sentence-transformers==4.1.0
|
286 |
-
sentencepiece==0.2.0
|
287 |
-
shapely==2.1.1
|
288 |
-
shellingham==1.5.4
|
289 |
-
simple-websocket==1.1.0
|
290 |
-
six==1.17.0
|
291 |
-
smmap==5.0.2
|
292 |
-
sniffio==1.3.1
|
293 |
-
soundfile==0.13.1
|
294 |
-
soupsieve==2.7
|
295 |
-
SQLAlchemy==2.0.38
|
296 |
-
stack-data==0.6.3
|
297 |
-
starlette==0.45.3
|
298 |
-
starlette-compress==1.6.0
|
299 |
-
sympy==1.14.0
|
300 |
-
tabulate==0.9.0
|
301 |
-
tenacity==9.1.2
|
302 |
-
tencentcloud-sdk-python==3.0.1336
|
303 |
-
threadpoolctl==3.6.0
|
304 |
-
tiktoken==0.9.0
|
305 |
-
tokenizers==0.21.2
|
306 |
-
torch==2.7.1
|
307 |
-
tornado==6.4.1
|
308 |
-
tqdm==4.67.1
|
309 |
-
traitlets==5.14.3
|
310 |
-
transformers==4.53.0
|
311 |
-
typer==0.16.0
|
312 |
-
typing-inspect==0.9.0
|
313 |
-
typing-inspection==0.4.1
|
314 |
-
typing_extensions==4.14.0
|
315 |
-
tzdata==2025.2
|
316 |
-
tzlocal==5.3.1
|
317 |
-
ujson==5.10.0
|
318 |
-
unstructured==0.16.17
|
319 |
-
unstructured-client==0.32.3
|
320 |
-
uritemplate==4.2.0
|
321 |
-
urllib3==2.5.0
|
322 |
-
uv==0.8.6
|
323 |
-
uvicorn==0.34.2
|
324 |
-
uvloop==0.21.0
|
325 |
-
validators==0.35.0
|
326 |
-
watchfiles==1.1.0
|
327 |
-
wcwidth==0.2.13
|
328 |
-
webencodings==0.5.1
|
329 |
-
websocket-client==1.8.0
|
330 |
-
websockets==15.0.1
|
331 |
-
Werkzeug==3.1.3
|
332 |
-
wrapt==1.17.2
|
333 |
-
wsproto==1.2.0
|
334 |
-
xlrd==2.0.1
|
335 |
-
xlsxwriter==3.2.5
|
336 |
-
xmltodict==0.14.2
|
337 |
-
xxhash==3.5.0
|
338 |
-
yarl==1.20.1
|
339 |
-
youtube-transcript-api==1.1.0
|
340 |
-
zipp==3.23.0
|
341 |
-
zstandard==0.23.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|