Spaces:
Sleeping
Sleeping
add gradio client
Browse files- app.py +2 -1
- libs/rename_file.py +25 -0
- libs/transformer/get_transcript_gradio_api.py +13 -0
- libs/transformer/open_ai_whisper.py +28 -0
- libs/transformer/youtube_download.py +27 -0
- requirements.txt +23 -2
- routers/get_transcript.py +2 -1
- routers/get_transcript_gradio.py +44 -0
- routers/get_transcript_transformer.py +8 -3
app.py
CHANGED
@@ -3,7 +3,7 @@ import os
|
|
3 |
from fastapi import FastAPI
|
4 |
from fastapi.middleware.cors import CORSMiddleware
|
5 |
|
6 |
-
from routers import get_transcript, get_chatrespone, get_transcript_transformer
|
7 |
|
8 |
os.environ['HF_HOME'] = "./cached/"
|
9 |
|
@@ -14,6 +14,7 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
|
|
14 |
app.include_router(get_transcript.router)
|
15 |
app.include_router(get_chatrespone.router)
|
16 |
app.include_router(get_transcript_transformer.router)
|
|
|
17 |
|
18 |
@app.get("/")
|
19 |
def read_root():
|
|
|
3 |
from fastapi import FastAPI
|
4 |
from fastapi.middleware.cors import CORSMiddleware
|
5 |
|
6 |
+
from routers import get_transcript, get_chatrespone, get_transcript_transformer, get_transcript_gradio
|
7 |
|
8 |
os.environ['HF_HOME'] = "./cached/"
|
9 |
|
|
|
14 |
app.include_router(get_transcript.router)
|
15 |
app.include_router(get_chatrespone.router)
|
16 |
app.include_router(get_transcript_transformer.router)
|
17 |
+
app.include_router(get_transcript_gradio.router)
|
18 |
|
19 |
@app.get("/")
|
20 |
def read_root():
|
libs/rename_file.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
s1 = u'ÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚÝàáâãèéêìíòóôõùúýĂăĐđĨĩŨũƠơƯưẠạẢảẤấẦầẨẩẪẫẬậẮắẰằẲẳẴẵẶặẸẹẺẻẼẽẾếỀềỂểỄễỆệỈỉỊịỌọỎỏỐốỒồỔổỖỗỘộỚớỜờỞởỠỡỢợỤụỦủỨứỪừỬửỮữỰựỲỳỴỵỶỷỸỹ'
|
4 |
+
s0 = u'AAAAEEEIIOOOOUUYaaaaeeeiioooouuyAaDdIiUuOoUuAaAaAaAaAaAaAaAaAaAaAaAaEeEeEeEeEeEeEeEeIiIiOoOoOoOoOoOoOoOoOoOoOoOoUuUuUuUuUuUuUuYyYyYyYy'
|
5 |
+
def remove_accents(input_str):
|
6 |
+
s = ''
|
7 |
+
input_str.encode('utf-8')
|
8 |
+
for c in input_str:
|
9 |
+
if c in s1:
|
10 |
+
s += s0[s1.index(c)]
|
11 |
+
else:
|
12 |
+
s += c
|
13 |
+
return s
|
14 |
+
|
15 |
+
|
16 |
+
os.chdir('./cached/audio')
|
17 |
+
|
18 |
+
for count, f in enumerate(os.listdir()):
|
19 |
+
f_name, f_ext = os.path.splitext(f)
|
20 |
+
|
21 |
+
f_name = remove_accents(f_name).lower().replace(" ","-")
|
22 |
+
|
23 |
+
new_name = f'{f_name}{f_ext}'
|
24 |
+
|
25 |
+
os.rename(f, new_name)
|
libs/transformer/get_transcript_gradio_api.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gradio_client import Client, handle_file
|
2 |
+
|
3 |
+
|
4 |
+
def api_gradio_transcribe(url: str):
|
5 |
+
client = Client("hf-audio/whisper-large-v3-turbo")
|
6 |
+
result = client.predict(
|
7 |
+
inputs=handle_file(url),
|
8 |
+
task="transcribe",
|
9 |
+
api_name="/predict"
|
10 |
+
)
|
11 |
+
|
12 |
+
return result
|
13 |
+
|
libs/transformer/open_ai_whisper.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
3 |
+
|
4 |
+
|
5 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
6 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
7 |
+
|
8 |
+
model_id = "openai/whisper-large-v3-turbo"
|
9 |
+
|
10 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
11 |
+
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
12 |
+
)
|
13 |
+
model.to(device)
|
14 |
+
|
15 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
16 |
+
|
17 |
+
pipe = pipeline(
|
18 |
+
"automatic-speech-recognition",
|
19 |
+
model=model,
|
20 |
+
tokenizer=processor.tokenizer,
|
21 |
+
feature_extractor=processor.feature_extractor,
|
22 |
+
torch_dtype=torch_dtype,
|
23 |
+
device=device,
|
24 |
+
return_timestamps=True
|
25 |
+
)
|
26 |
+
|
27 |
+
result = pipe("https://static.langkingdom.com/user_playlist_practice_videos/bdfd406cb3c62603f653fa02d93fcae8.mov")
|
28 |
+
print(result["text"])
|
libs/transformer/youtube_download.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from yt_dlp import YoutubeDL
|
2 |
+
import uuid
|
3 |
+
|
4 |
+
def download_youtube(url:str):
|
5 |
+
output_audio_folder = f"./cached/audio"
|
6 |
+
# file_path = output_audio_folder + '/%(title)s.%(ext)s'
|
7 |
+
file_path = output_audio_folder + f'/{str(uuid.uuid4())}'
|
8 |
+
# f"{file_path}.%(ext)s"
|
9 |
+
# url = "https://www.youtube.com/watch?v=WtpPolBLRN0"
|
10 |
+
|
11 |
+
|
12 |
+
yt = YoutubeDL(params={'postprocessors': [{ # Post-process to convert to MP3
|
13 |
+
'key': 'FFmpegExtractAudio',
|
14 |
+
'preferredcodec': 'mp3', # Convert to mp3
|
15 |
+
'preferredquality': '0', # '0' means best quality, auto-determined by source
|
16 |
+
}],
|
17 |
+
'outtmpl': f"{file_path}.%(ext)s",
|
18 |
+
})
|
19 |
+
|
20 |
+
with yt as ydl:
|
21 |
+
ydl.download(url)
|
22 |
+
|
23 |
+
return f"{file_path}.mp3"
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
|
requirements.txt
CHANGED
@@ -1,20 +1,28 @@
|
|
1 |
accelerate==1.1.1
|
|
|
|
|
|
|
2 |
annotated-types==0.7.0
|
3 |
anyio==4.5.0
|
|
|
4 |
av==12.3.0
|
5 |
certifi==2024.8.30
|
6 |
charset-normalizer==3.3.2
|
7 |
click==8.1.7
|
8 |
coloredlogs==15.0.1
|
9 |
ctranslate2==4.4.0
|
|
|
10 |
fastapi==0.115.0
|
11 |
faster-whisper==1.0.3
|
12 |
filelock==3.16.1
|
13 |
flatbuffers==24.3.25
|
|
|
14 |
fsspec==2024.9.0
|
|
|
15 |
h11==0.14.0
|
16 |
httpcore==1.0.7
|
17 |
httpx==0.27.2
|
|
|
18 |
huggingface-hub==0.25.1
|
19 |
humanfriendly==10.0
|
20 |
idna==3.10
|
@@ -22,22 +30,30 @@ Jinja2==3.1.4
|
|
22 |
joblib==1.4.2
|
23 |
jsonpatch==1.33
|
24 |
jsonpointer==3.0.0
|
25 |
-
langchain
|
|
|
|
|
26 |
langchain-huggingface==0.1.2
|
27 |
langchain-ollama==0.2.0
|
|
|
28 |
langsmith==0.1.144
|
29 |
MarkupSafe==3.0.2
|
|
|
30 |
mpmath==1.3.0
|
|
|
|
|
31 |
networkx==3.4.2
|
32 |
-
numpy==
|
33 |
ollama==0.4.1
|
34 |
onnxruntime==1.19.2
|
35 |
orjson==3.10.11
|
36 |
packaging==24.1
|
37 |
pillow==11.0.0
|
|
|
38 |
protobuf==5.28.2
|
39 |
psutil==6.1.0
|
40 |
pydantic==2.9.2
|
|
|
41 |
pydantic_core==2.23.4
|
42 |
python-dotenv==1.0.1
|
43 |
PyYAML==6.0.2
|
@@ -50,6 +66,7 @@ scipy==1.14.1
|
|
50 |
sentence-transformers==3.3.1
|
51 |
setuptools==75.1.0
|
52 |
sniffio==1.3.1
|
|
|
53 |
starlette==0.38.5
|
54 |
sympy==1.13.1
|
55 |
tenacity==9.0.0
|
@@ -58,6 +75,10 @@ tokenizers==0.20.0
|
|
58 |
torch==2.5.1
|
59 |
tqdm==4.66.5
|
60 |
transformers==4.46.3
|
|
|
61 |
typing_extensions==4.12.2
|
62 |
urllib3==2.2.3
|
63 |
uvicorn==0.30.6
|
|
|
|
|
|
|
|
1 |
accelerate==1.1.1
|
2 |
+
aiohappyeyeballs==2.4.3
|
3 |
+
aiohttp==3.11.7
|
4 |
+
aiosignal==1.3.1
|
5 |
annotated-types==0.7.0
|
6 |
anyio==4.5.0
|
7 |
+
attrs==24.2.0
|
8 |
av==12.3.0
|
9 |
certifi==2024.8.30
|
10 |
charset-normalizer==3.3.2
|
11 |
click==8.1.7
|
12 |
coloredlogs==15.0.1
|
13 |
ctranslate2==4.4.0
|
14 |
+
dataclasses-json==0.6.7
|
15 |
fastapi==0.115.0
|
16 |
faster-whisper==1.0.3
|
17 |
filelock==3.16.1
|
18 |
flatbuffers==24.3.25
|
19 |
+
frozenlist==1.5.0
|
20 |
fsspec==2024.9.0
|
21 |
+
gradio_client==1.5.0
|
22 |
h11==0.14.0
|
23 |
httpcore==1.0.7
|
24 |
httpx==0.27.2
|
25 |
+
httpx-sse==0.4.0
|
26 |
huggingface-hub==0.25.1
|
27 |
humanfriendly==10.0
|
28 |
idna==3.10
|
|
|
30 |
joblib==1.4.2
|
31 |
jsonpatch==1.33
|
32 |
jsonpointer==3.0.0
|
33 |
+
langchain==0.3.8
|
34 |
+
langchain-community==0.3.8
|
35 |
+
langchain-core==0.3.21
|
36 |
langchain-huggingface==0.1.2
|
37 |
langchain-ollama==0.2.0
|
38 |
+
langchain-text-splitters==0.3.2
|
39 |
langsmith==0.1.144
|
40 |
MarkupSafe==3.0.2
|
41 |
+
marshmallow==3.23.1
|
42 |
mpmath==1.3.0
|
43 |
+
multidict==6.1.0
|
44 |
+
mypy-extensions==1.0.0
|
45 |
networkx==3.4.2
|
46 |
+
numpy==1.26.4
|
47 |
ollama==0.4.1
|
48 |
onnxruntime==1.19.2
|
49 |
orjson==3.10.11
|
50 |
packaging==24.1
|
51 |
pillow==11.0.0
|
52 |
+
propcache==0.2.0
|
53 |
protobuf==5.28.2
|
54 |
psutil==6.1.0
|
55 |
pydantic==2.9.2
|
56 |
+
pydantic-settings==2.6.1
|
57 |
pydantic_core==2.23.4
|
58 |
python-dotenv==1.0.1
|
59 |
PyYAML==6.0.2
|
|
|
66 |
sentence-transformers==3.3.1
|
67 |
setuptools==75.1.0
|
68 |
sniffio==1.3.1
|
69 |
+
SQLAlchemy==2.0.35
|
70 |
starlette==0.38.5
|
71 |
sympy==1.13.1
|
72 |
tenacity==9.0.0
|
|
|
75 |
torch==2.5.1
|
76 |
tqdm==4.66.5
|
77 |
transformers==4.46.3
|
78 |
+
typing-inspect==0.9.0
|
79 |
typing_extensions==4.12.2
|
80 |
urllib3==2.2.3
|
81 |
uvicorn==0.30.6
|
82 |
+
websockets==12.0
|
83 |
+
yarl==1.18.0
|
84 |
+
yt-dlp==2024.11.18
|
routers/get_transcript.py
CHANGED
@@ -22,7 +22,7 @@ def get_transcript(audio_path: str, model_size: str = "distil-large-v3", api_key
|
|
22 |
# model = WhisperModel(model_size, device="cuda", cosmpute_type="int8_float16")
|
23 |
# or run on CPU with INT8
|
24 |
# model_run = WhisperModel(model_size, device="cpu", compute_type="int8")
|
25 |
-
|
26 |
|
27 |
print(f"model>>>: {model_size}")
|
28 |
|
@@ -59,6 +59,7 @@ def get_transcript(audio_path: str, model_size: str = "distil-large-v3", api_key
|
|
59 |
|
60 |
for segment in segments:
|
61 |
text += segment.text
|
|
|
62 |
listSentences.append({
|
63 |
"start_time": segment.start,
|
64 |
"end_time": segment.end,
|
|
|
22 |
# model = WhisperModel(model_size, device="cuda", cosmpute_type="int8_float16")
|
23 |
# or run on CPU with INT8
|
24 |
# model_run = WhisperModel(model_size, device="cpu", compute_type="int8")
|
25 |
+
model_size: str = "distil-large-v3"
|
26 |
|
27 |
print(f"model>>>: {model_size}")
|
28 |
|
|
|
59 |
|
60 |
for segment in segments:
|
61 |
text += segment.text
|
62 |
+
print(segment)
|
63 |
listSentences.append({
|
64 |
"start_time": segment.start,
|
65 |
"end_time": segment.end,
|
routers/get_transcript_gradio.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
import time
|
4 |
+
from fastapi import APIRouter, Depends, HTTPException, status
|
5 |
+
|
6 |
+
|
7 |
+
from libs.convert_to_audio import convert_to_audio
|
8 |
+
from libs.header_api_auth import get_api_key
|
9 |
+
from libs.transformer.get_transcript_gradio_api import api_gradio_transcribe
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
router = APIRouter(prefix="/get-transcript-gradio", tags=["transcript"])
|
14 |
+
|
15 |
+
@router.get("/")
|
16 |
+
def get_transcript(audio_path: str, model_size: str = "distil-whisper/distil-small.en", api_key: str = Depends(get_api_key)):
|
17 |
+
st = time.time()
|
18 |
+
|
19 |
+
output_audio_folder = f"./cached/audio"
|
20 |
+
|
21 |
+
if not os.path.exists(output_audio_folder):
|
22 |
+
os.makedirs(output_audio_folder)
|
23 |
+
|
24 |
+
|
25 |
+
output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"
|
26 |
+
convert_to_audio(audio_path.strip(), output_file)
|
27 |
+
|
28 |
+
try:
|
29 |
+
text = api_gradio_transcribe(output_file)
|
30 |
+
|
31 |
+
except Exception as error:
|
32 |
+
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")
|
33 |
+
finally:
|
34 |
+
if os.path.exists(output_file):
|
35 |
+
os.remove(output_file)
|
36 |
+
|
37 |
+
et = time.time()
|
38 |
+
|
39 |
+
elapsed_time = et - st
|
40 |
+
|
41 |
+
return {"text": text,
|
42 |
+
'list_sentence': [],
|
43 |
+
'elapsed_time': round(elapsed_time, 2)
|
44 |
+
}
|
routers/get_transcript_transformer.py
CHANGED
@@ -6,7 +6,10 @@ from libs.convert_to_audio import convert_to_audio
|
|
6 |
from libs.transformer.get_transcript import get_transcript_gpu
|
7 |
from libs.transformer.get_transcript_2 import get_transcribe_transformers
|
8 |
|
|
|
|
|
9 |
from libs.header_api_auth import get_api_key
|
|
|
10 |
|
11 |
|
12 |
router = APIRouter(prefix="/get-transcript-transformer", tags=["transcript"])
|
@@ -20,9 +23,11 @@ def get_transcript(audio_path: str, model_size: str = "distil-whisper/distil-sma
|
|
20 |
if not os.path.exists(output_audio_folder):
|
21 |
os.makedirs(output_audio_folder)
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
26 |
|
27 |
try:
|
28 |
text, chunks = get_transcribe_transformers(output_file, model_size)
|
|
|
6 |
from libs.transformer.get_transcript import get_transcript_gpu
|
7 |
from libs.transformer.get_transcript_2 import get_transcribe_transformers
|
8 |
|
9 |
+
from langchain_community.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
|
10 |
+
|
11 |
from libs.header_api_auth import get_api_key
|
12 |
+
from libs.transformer.youtube_download import download_youtube
|
13 |
|
14 |
|
15 |
router = APIRouter(prefix="/get-transcript-transformer", tags=["transcript"])
|
|
|
23 |
if not os.path.exists(output_audio_folder):
|
24 |
os.makedirs(output_audio_folder)
|
25 |
|
26 |
+
if("https://www.youtube.com" in audio_path):
|
27 |
+
output_file = download_youtube(audio_path)
|
28 |
+
else:
|
29 |
+
output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"
|
30 |
+
convert_to_audio(audio_path.strip(), output_file)
|
31 |
|
32 |
try:
|
33 |
text, chunks = get_transcribe_transformers(output_file, model_size)
|