Upload 5 files
Browse files- .gitattributes +1 -0
- README.md +55 -0
- main.py +13 -0
- requirements.txt +12 -0
- sample.mp3 +0 -0
- sample.wav +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
sample.wav filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Speech-to-Text (CPU/GPU)
|
2 |
+
|
3 |
+
- **Model:** `openai/whisper-tiny` (MIT)
|
4 |
+
- **Task:** Transcribe short audio clips. Requires **ffmpeg** installed.
|
5 |
+
- **Note:** Here we just provide the resources for to run this models in the laptops we didn't develop this entire models we just use the open source models for the experiment this model is developed by OpenAI
|
6 |
+
|
7 |
+
## Quick start (any project)
|
8 |
+
|
9 |
+
```bash
|
10 |
+
# 1) Create env
|
11 |
+
python -m venv venv && source .venv/bin/activate # Windows: ./venv/Scripts/activate
|
12 |
+
|
13 |
+
# 2) Install deps
|
14 |
+
pip install -r requirements.txt
|
15 |
+
|
16 |
+
# 3) Run
|
17 |
+
python main.py --help
|
18 |
+
```
|
19 |
+
|
20 |
+
> Tip: If you have a GPU + CUDA, PyTorch will auto-use it. If not, everything runs on CPU (slower but works).
|
21 |
+
|
22 |
+
---
|
23 |
+
|
24 |
+
and while running the main.py code using command then only you the output
|
25 |
+
**Use:** python main.py --audio sample.wav
|
26 |
+
|
27 |
+
## FFmpeg Installation
|
28 |
+
|
29 |
+
1. Download FFmpeg:
|
30 |
+
- Visit https://www.gyan.dev/ffmpeg/builds/ and download `ffmpeg-git-essentials.zip`.
|
31 |
+
- Extract to `C:\ffmpeg` (or another folder, e.g., `C:\Users\jhaishna\Documents\ffmpeg`).
|
32 |
+
2. Add FFmpeg to System PATH:
|
33 |
+
- Right-click 'This PC' > Properties > Advanced system settings > Environment Variables.
|
34 |
+
- Under 'System Variables', find `Path`, click 'Edit', and add `C:\ffmpeg\bin` (adjust if extracted elsewhere).
|
35 |
+
- Save changes.
|
36 |
+
3. Verify Installation:
|
37 |
+
- Open CMD (or VS Code terminal) and run:
|
38 |
+
|
39 |
+
```
|
40 |
+
ffmpeg -version
|
41 |
+
```
|
42 |
+
- Expected output: `ffmpeg version ...`.
|
43 |
+
4. For VS Code PowerShell Terminal:
|
44 |
+
- If `ffmpeg -version` fails in VS Code, add FFmpeg to the PowerShell PATH:
|
45 |
+
|
46 |
+
```
|
47 |
+
$env:PATH += ";C:\ffmpeg\bin"
|
48 |
+
```
|
49 |
+
- To persist, edit PowerShell profile:
|
50 |
+
|
51 |
+
```
|
52 |
+
notepad $PROFILE
|
53 |
+
```
|
54 |
+
|
55 |
+
Add: `$env:PATH += ";C:\ffmpeg\bin"`Save and restart the terminal.
|
main.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse, whisper
|
2 |
+
|
3 |
+
def main():
|
4 |
+
parser = argparse.ArgumentParser()
|
5 |
+
parser.add_argument("--audio", type=str, required=True, help="Path to WAV/MP3/M4A file")
|
6 |
+
args = parser.parse_args()
|
7 |
+
|
8 |
+
model = whisper.load_model("tiny") # auto-select device
|
9 |
+
result = model.transcribe(args.audio, language="en")
|
10 |
+
print(result["text"])
|
11 |
+
|
12 |
+
if __name__ == "__main__":
|
13 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.1.0
|
2 |
+
torchvision==0.16.0
|
3 |
+
torchaudio==2.1.0
|
4 |
+
transformers==4.38.2
|
5 |
+
datasets==2.18.0
|
6 |
+
Pillow==10.2.0
|
7 |
+
numpy==1.26.4
|
8 |
+
tqdm==4.66.2
|
9 |
+
sentencepiece==0.1.99
|
10 |
+
sentence-transformers==2.6.1
|
11 |
+
easyocr==1.7.1
|
12 |
+
openai-whisper
|
sample.mp3
ADDED
Binary file (17.2 kB). View file
|
|
sample.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c38478da3500c9d88f981eed088dd1f06e2128cf9afa8d8aade14e271704b98
|
3 |
+
size 137166
|