Add Darija transcription and topic extraction app6
Browse files
app.py
CHANGED
@@ -13,8 +13,9 @@ transcription_model = Wav2Vec2ForCTC.from_pretrained("boumehdi/wav2vec2-large-xl
|
|
13 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
14 |
|
15 |
# Topic Classification Model (BERT for example)
|
16 |
-
topic_model = BertForSequenceClassification.from_pretrained("
|
17 |
-
topic_tokenizer = BertTokenizer.from_pretrained("
|
|
|
18 |
|
19 |
# Function to resample audio to 16kHz if necessary
|
20 |
def resample_audio(audio_path, target_sr=16000):
|
|
|
13 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
14 |
|
15 |
# Topic Classification Model (BERT for example)
|
16 |
+
topic_model = BertForSequenceClassification.from_pretrained("bert-base-uncased") # Example model
|
17 |
+
topic_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
|
18 |
+
|
19 |
|
20 |
# Function to resample audio to 16kHz if necessary
|
21 |
def resample_audio(audio_path, target_sr=16000):
|