Spaces:
Running
Running
Commit
·
ea54579
1
Parent(s):
954cfbc
Reduce peak GPU memory use during model init
Browse filesSigned-off-by: Piotr Żelasko <[email protected]>
app.py
CHANGED
|
@@ -16,10 +16,8 @@ MAX_AUDIO_MINUTES = 10 # wont try to transcribe if longer than this
|
|
| 16 |
CHUNK_SECONDS = 40.0 # max audio length seen by the model
|
| 17 |
BATCH_SIZE = 8 # for parallel transcription of audio longer than CHUNK_SECONDS
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
model = SALM.from_pretrained("nvidia/canary-qwen-2.5b").bfloat16().eval().to(device)
|
| 22 |
-
torch.set_default_dtype(torch.float32)
|
| 23 |
|
| 24 |
|
| 25 |
def timestamp(idx: int):
|
|
|
|
| 16 |
CHUNK_SECONDS = 40.0 # max audio length seen by the model
|
| 17 |
BATCH_SIZE = 8 # for parallel transcription of audio longer than CHUNK_SECONDS
|
| 18 |
|
| 19 |
+
|
| 20 |
+
model = SALM.from_pretrained("nvidia/canary-qwen-2.5b").bfloat16().eval().to(device)
|
|
|
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
def timestamp(idx: int):
|