minhpng commited on
Commit
3c36fb5
·
1 Parent(s): 746bf8f

add gradio client

Browse files
app.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  from fastapi import FastAPI
4
  from fastapi.middleware.cors import CORSMiddleware
5
 
6
- from routers import get_transcript, get_chatrespone, get_transcript_transformer
7
 
8
  os.environ['HF_HOME'] = "./cached/"
9
 
@@ -14,6 +14,7 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
14
  app.include_router(get_transcript.router)
15
  app.include_router(get_chatrespone.router)
16
  app.include_router(get_transcript_transformer.router)
 
17
 
18
  @app.get("/")
19
  def read_root():
 
3
  from fastapi import FastAPI
4
  from fastapi.middleware.cors import CORSMiddleware
5
 
6
+ from routers import get_transcript, get_chatrespone, get_transcript_transformer, get_transcript_gradio
7
 
8
  os.environ['HF_HOME'] = "./cached/"
9
 
 
14
  app.include_router(get_transcript.router)
15
  app.include_router(get_chatrespone.router)
16
  app.include_router(get_transcript_transformer.router)
17
+ app.include_router(get_transcript_gradio.router)
18
 
19
  @app.get("/")
20
  def read_root():
libs/rename_file.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ s1 = u'ÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚÝàáâãèéêìíòóôõùúýĂăĐđĨĩŨũƠơƯưẠạẢảẤấẦầẨẩẪẫẬậẮắẰằẲẳẴẵẶặẸẹẺẻẼẽẾếỀềỂểỄễỆệỈỉỊịỌọỎỏỐốỒồỔổỖỗỘộỚớỜờỞởỠỡỢợỤụỦủỨứỪừỬửỮữỰựỲỳỴỵỶỷỸỹ'
4
+ s0 = u'AAAAEEEIIOOOOUUYaaaaeeeiioooouuyAaDdIiUuOoUuAaAaAaAaAaAaAaAaAaAaAaAaEeEeEeEeEeEeEeEeIiIiOoOoOoOoOoOoOoOoOoOoOoOoUuUuUuUuUuUuUuYyYyYyYy'
5
+ def remove_accents(input_str):
6
+ s = ''
7
+ input_str.encode('utf-8')
8
+ for c in input_str:
9
+ if c in s1:
10
+ s += s0[s1.index(c)]
11
+ else:
12
+ s += c
13
+ return s
14
+
15
+
16
+ os.chdir('./cached/audio')
17
+
18
+ for count, f in enumerate(os.listdir()):
19
+ f_name, f_ext = os.path.splitext(f)
20
+
21
+ f_name = remove_accents(f_name).lower().replace(" ","-")
22
+
23
+ new_name = f'{f_name}{f_ext}'
24
+
25
+ os.rename(f, new_name)
libs/transformer/get_transcript_gradio_api.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client, handle_file
2
+
3
+
4
+ def api_gradio_transcribe(url: str):
5
+ client = Client("hf-audio/whisper-large-v3-turbo")
6
+ result = client.predict(
7
+ inputs=handle_file(url),
8
+ task="transcribe",
9
+ api_name="/predict"
10
+ )
11
+
12
+ return result
13
+
libs/transformer/open_ai_whisper.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+
4
+
5
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
6
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
7
+
8
+ model_id = "openai/whisper-large-v3-turbo"
9
+
10
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
11
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
12
+ )
13
+ model.to(device)
14
+
15
+ processor = AutoProcessor.from_pretrained(model_id)
16
+
17
+ pipe = pipeline(
18
+ "automatic-speech-recognition",
19
+ model=model,
20
+ tokenizer=processor.tokenizer,
21
+ feature_extractor=processor.feature_extractor,
22
+ torch_dtype=torch_dtype,
23
+ device=device,
24
+ return_timestamps=True
25
+ )
26
+
27
+ result = pipe("https://static.langkingdom.com/user_playlist_practice_videos/bdfd406cb3c62603f653fa02d93fcae8.mov")
28
+ print(result["text"])
libs/transformer/youtube_download.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from yt_dlp import YoutubeDL
2
+ import uuid
3
+
4
+ def download_youtube(url:str):
5
+ output_audio_folder = f"./cached/audio"
6
+ # file_path = output_audio_folder + '/%(title)s.%(ext)s'
7
+ file_path = output_audio_folder + f'/{str(uuid.uuid4())}'
8
+ # f"{file_path}.%(ext)s"
9
+ # url = "https://www.youtube.com/watch?v=WtpPolBLRN0"
10
+
11
+
12
+ yt = YoutubeDL(params={'postprocessors': [{ # Post-process to convert to MP3
13
+ 'key': 'FFmpegExtractAudio',
14
+ 'preferredcodec': 'mp3', # Convert to mp3
15
+ 'preferredquality': '0', # '0' means best quality, auto-determined by source
16
+ }],
17
+ 'outtmpl': f"{file_path}.%(ext)s",
18
+ })
19
+
20
+ with yt as ydl:
21
+ ydl.download(url)
22
+
23
+ return f"{file_path}.mp3"
24
+
25
+
26
+
27
+
requirements.txt CHANGED
@@ -1,20 +1,28 @@
1
  accelerate==1.1.1
 
 
 
2
  annotated-types==0.7.0
3
  anyio==4.5.0
 
4
  av==12.3.0
5
  certifi==2024.8.30
6
  charset-normalizer==3.3.2
7
  click==8.1.7
8
  coloredlogs==15.0.1
9
  ctranslate2==4.4.0
 
10
  fastapi==0.115.0
11
  faster-whisper==1.0.3
12
  filelock==3.16.1
13
  flatbuffers==24.3.25
 
14
  fsspec==2024.9.0
 
15
  h11==0.14.0
16
  httpcore==1.0.7
17
  httpx==0.27.2
 
18
  huggingface-hub==0.25.1
19
  humanfriendly==10.0
20
  idna==3.10
@@ -22,22 +30,30 @@ Jinja2==3.1.4
22
  joblib==1.4.2
23
  jsonpatch==1.33
24
  jsonpointer==3.0.0
25
- langchain-core==0.3.19
 
 
26
  langchain-huggingface==0.1.2
27
  langchain-ollama==0.2.0
 
28
  langsmith==0.1.144
29
  MarkupSafe==3.0.2
 
30
  mpmath==1.3.0
 
 
31
  networkx==3.4.2
32
- numpy==2.1.2
33
  ollama==0.4.1
34
  onnxruntime==1.19.2
35
  orjson==3.10.11
36
  packaging==24.1
37
  pillow==11.0.0
 
38
  protobuf==5.28.2
39
  psutil==6.1.0
40
  pydantic==2.9.2
 
41
  pydantic_core==2.23.4
42
  python-dotenv==1.0.1
43
  PyYAML==6.0.2
@@ -50,6 +66,7 @@ scipy==1.14.1
50
  sentence-transformers==3.3.1
51
  setuptools==75.1.0
52
  sniffio==1.3.1
 
53
  starlette==0.38.5
54
  sympy==1.13.1
55
  tenacity==9.0.0
@@ -58,6 +75,10 @@ tokenizers==0.20.0
58
  torch==2.5.1
59
  tqdm==4.66.5
60
  transformers==4.46.3
 
61
  typing_extensions==4.12.2
62
  urllib3==2.2.3
63
  uvicorn==0.30.6
 
 
 
 
1
  accelerate==1.1.1
2
+ aiohappyeyeballs==2.4.3
3
+ aiohttp==3.11.7
4
+ aiosignal==1.3.1
5
  annotated-types==0.7.0
6
  anyio==4.5.0
7
+ attrs==24.2.0
8
  av==12.3.0
9
  certifi==2024.8.30
10
  charset-normalizer==3.3.2
11
  click==8.1.7
12
  coloredlogs==15.0.1
13
  ctranslate2==4.4.0
14
+ dataclasses-json==0.6.7
15
  fastapi==0.115.0
16
  faster-whisper==1.0.3
17
  filelock==3.16.1
18
  flatbuffers==24.3.25
19
+ frozenlist==1.5.0
20
  fsspec==2024.9.0
21
+ gradio_client==1.5.0
22
  h11==0.14.0
23
  httpcore==1.0.7
24
  httpx==0.27.2
25
+ httpx-sse==0.4.0
26
  huggingface-hub==0.25.1
27
  humanfriendly==10.0
28
  idna==3.10
 
30
  joblib==1.4.2
31
  jsonpatch==1.33
32
  jsonpointer==3.0.0
33
+ langchain==0.3.8
34
+ langchain-community==0.3.8
35
+ langchain-core==0.3.21
36
  langchain-huggingface==0.1.2
37
  langchain-ollama==0.2.0
38
+ langchain-text-splitters==0.3.2
39
  langsmith==0.1.144
40
  MarkupSafe==3.0.2
41
+ marshmallow==3.23.1
42
  mpmath==1.3.0
43
+ multidict==6.1.0
44
+ mypy-extensions==1.0.0
45
  networkx==3.4.2
46
+ numpy==1.26.4
47
  ollama==0.4.1
48
  onnxruntime==1.19.2
49
  orjson==3.10.11
50
  packaging==24.1
51
  pillow==11.0.0
52
+ propcache==0.2.0
53
  protobuf==5.28.2
54
  psutil==6.1.0
55
  pydantic==2.9.2
56
+ pydantic-settings==2.6.1
57
  pydantic_core==2.23.4
58
  python-dotenv==1.0.1
59
  PyYAML==6.0.2
 
66
  sentence-transformers==3.3.1
67
  setuptools==75.1.0
68
  sniffio==1.3.1
69
+ SQLAlchemy==2.0.35
70
  starlette==0.38.5
71
  sympy==1.13.1
72
  tenacity==9.0.0
 
75
  torch==2.5.1
76
  tqdm==4.66.5
77
  transformers==4.46.3
78
+ typing-inspect==0.9.0
79
  typing_extensions==4.12.2
80
  urllib3==2.2.3
81
  uvicorn==0.30.6
82
+ websockets==12.0
83
+ yarl==1.18.0
84
+ yt-dlp==2024.11.18
routers/get_transcript.py CHANGED
@@ -22,7 +22,7 @@ def get_transcript(audio_path: str, model_size: str = "distil-large-v3", api_key
22
  # model = WhisperModel(model_size, device="cuda", cosmpute_type="int8_float16")
23
  # or run on CPU with INT8
24
  # model_run = WhisperModel(model_size, device="cpu", compute_type="int8")
25
-
26
 
27
  print(f"model>>>: {model_size}")
28
 
@@ -59,6 +59,7 @@ def get_transcript(audio_path: str, model_size: str = "distil-large-v3", api_key
59
 
60
  for segment in segments:
61
  text += segment.text
 
62
  listSentences.append({
63
  "start_time": segment.start,
64
  "end_time": segment.end,
 
22
  # model = WhisperModel(model_size, device="cuda", cosmpute_type="int8_float16")
23
  # or run on CPU with INT8
24
  # model_run = WhisperModel(model_size, device="cpu", compute_type="int8")
25
+ model_size: str = "distil-large-v3"
26
 
27
  print(f"model>>>: {model_size}")
28
 
 
59
 
60
  for segment in segments:
61
  text += segment.text
62
+ print(segment)
63
  listSentences.append({
64
  "start_time": segment.start,
65
  "end_time": segment.end,
routers/get_transcript_gradio.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import time
4
+ from fastapi import APIRouter, Depends, HTTPException, status
5
+
6
+
7
+ from libs.convert_to_audio import convert_to_audio
8
+ from libs.header_api_auth import get_api_key
9
+ from libs.transformer.get_transcript_gradio_api import api_gradio_transcribe
10
+
11
+
12
+
13
+ router = APIRouter(prefix="/get-transcript-gradio", tags=["transcript"])
14
+
15
+ @router.get("/")
16
+ def get_transcript(audio_path: str, model_size: str = "distil-whisper/distil-small.en", api_key: str = Depends(get_api_key)):
17
+ st = time.time()
18
+
19
+ output_audio_folder = f"./cached/audio"
20
+
21
+ if not os.path.exists(output_audio_folder):
22
+ os.makedirs(output_audio_folder)
23
+
24
+
25
+ output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"
26
+ convert_to_audio(audio_path.strip(), output_file)
27
+
28
+ try:
29
+ text = api_gradio_transcribe(output_file)
30
+
31
+ except Exception as error:
32
+ raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")
33
+ finally:
34
+ if os.path.exists(output_file):
35
+ os.remove(output_file)
36
+
37
+ et = time.time()
38
+
39
+ elapsed_time = et - st
40
+
41
+ return {"text": text,
42
+ 'list_sentence': [],
43
+ 'elapsed_time': round(elapsed_time, 2)
44
+ }
routers/get_transcript_transformer.py CHANGED
@@ -6,7 +6,10 @@ from libs.convert_to_audio import convert_to_audio
6
  from libs.transformer.get_transcript import get_transcript_gpu
7
  from libs.transformer.get_transcript_2 import get_transcribe_transformers
8
 
 
 
9
  from libs.header_api_auth import get_api_key
 
10
 
11
 
12
  router = APIRouter(prefix="/get-transcript-transformer", tags=["transcript"])
@@ -20,9 +23,11 @@ def get_transcript(audio_path: str, model_size: str = "distil-whisper/distil-sma
20
  if not os.path.exists(output_audio_folder):
21
  os.makedirs(output_audio_folder)
22
 
23
- output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"
24
-
25
- convert_to_audio(audio_path.strip(), output_file)
 
 
26
 
27
  try:
28
  text, chunks = get_transcribe_transformers(output_file, model_size)
 
6
  from libs.transformer.get_transcript import get_transcript_gpu
7
  from libs.transformer.get_transcript_2 import get_transcribe_transformers
8
 
9
+ from langchain_community.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
10
+
11
  from libs.header_api_auth import get_api_key
12
+ from libs.transformer.youtube_download import download_youtube
13
 
14
 
15
  router = APIRouter(prefix="/get-transcript-transformer", tags=["transcript"])
 
23
  if not os.path.exists(output_audio_folder):
24
  os.makedirs(output_audio_folder)
25
 
26
+ if("https://www.youtube.com" in audio_path):
27
+ output_file = download_youtube(audio_path)
28
+ else:
29
+ output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"
30
+ convert_to_audio(audio_path.strip(), output_file)
31
 
32
  try:
33
  text, chunks = get_transcribe_transformers(output_file, model_size)