tdd + diarization
Browse files- app.py +16 -9
- audio/audioanalyser_anglais.py +15 -0
- audio/audioanalyser_diarization.py +14 -0
- audio/audioanalyser_francais.py +18 -0
- index.html +6 -0
- tests/audio/test_audio.py +19 -0
- tests/testsdata/audio/1.wav +0 -0
app.py
CHANGED
@@ -4,18 +4,14 @@
|
|
4 |
from fastapi import FastAPI, UploadFile
|
5 |
from fastapi.staticfiles import StaticFiles
|
6 |
from fastapi.responses import FileResponse
|
7 |
-
from
|
8 |
-
|
9 |
-
from transformers import pipeline # le framework de huggingface
|
10 |
#from datasets import load_dataset, Audio # ça c'est pour entrainer mon modele
|
11 |
|
12 |
|
13 |
app = FastAPI()
|
14 |
|
15 |
-
|
16 |
-
#deepneurones = pipeline("automatic-speech-recognition")# la liste des pipelines de huggingface est disponible ici :https://huggingface.co/docs/transformers/quicktour. pipeline() telecharge dans un cache local le modele deeplearning
|
17 |
-
#deepneurones= pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h") # il faut choisir un modele
|
18 |
-
deepneurones = Pipeline.from_pretrained("pyannote/speaker-diarization")
|
19 |
@app.get("/healthcheck")
|
20 |
def healthcheck():
|
21 |
|
@@ -27,11 +23,22 @@ def healthcheck():
|
|
27 |
async def stt(file: str = UploadFile(...)):
|
28 |
#file_content = base64.b64decode(file)
|
29 |
file_content = await file.read()
|
|
|
30 |
#dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
|
31 |
-
|
32 |
return {"output":results}
|
33 |
#app.mount("/", StaticFiles(directory="static", html=True), name="static")
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
@app.get("/")
|
37 |
def index() -> FileResponse:
|
|
|
4 |
from fastapi import FastAPI, UploadFile
|
5 |
from fastapi.staticfiles import StaticFiles
|
6 |
from fastapi.responses import FileResponse
|
7 |
+
from audio.audioanalyser_francais import AudioAnalyserAnglais
|
8 |
+
from audio.audioanalyser_diarization import AudioAnalyserDiarization
|
|
|
9 |
#from datasets import load_dataset, Audio # ça c'est pour entrainer mon modele
|
10 |
|
11 |
|
12 |
app = FastAPI()
|
13 |
|
14 |
+
|
|
|
|
|
|
|
15 |
@app.get("/healthcheck")
|
16 |
def healthcheck():
|
17 |
|
|
|
23 |
async def stt(file: str = UploadFile(...)):
|
24 |
#file_content = base64.b64decode(file)
|
25 |
file_content = await file.read()
|
26 |
+
results = AudioAnalyserAnglais.stt(file_content)
|
27 |
#dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
|
28 |
+
|
29 |
return {"output":results}
|
30 |
#app.mount("/", StaticFiles(directory="static", html=True), name="static")
|
31 |
+
|
32 |
+
@app.post("/diarization")
|
33 |
+
async def diarization(file: str = UploadFile(...)):
|
34 |
+
#file_content = base64.b64decode(file)
|
35 |
+
file_content = await file.read()
|
36 |
+
results = AudioAnalyserDiarization.diarization(file_content)
|
37 |
+
#dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
|
38 |
+
|
39 |
+
return {"output":results}
|
40 |
+
#app.mount("/", StaticFiles(directory="static", html=True), name="static")
|
41 |
+
|
42 |
|
43 |
@app.get("/")
|
44 |
def index() -> FileResponse:
|
audio/audioanalyser_anglais.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#deepneurones = pipeline("text2text-generation", model="google/flan-t5-small")
|
2 |
+
#deepneurones = pipeline("automatic-speech-recognition")# la liste des pipelines de huggingface est disponible ici :https://huggingface.co/docs/transformers/quicktour. pipeline() telecharge dans un cache local le modele deeplearning
|
3 |
+
from transformers import pipeline # le framework de huggingface
|
4 |
+
deepneurones= pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h") # il faut choisir un modele
|
5 |
+
|
6 |
+
#from pyannote.audio import Pipeline
|
7 |
+
#,use_auth_token="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")
|
8 |
+
#deepneuronesdiarizatin = Pipeline.from_pretrained("pyannote/speaker-diarization",use_auth_token="test")
|
9 |
+
|
10 |
+
|
11 |
+
class AudioAnalyserAnglais:
|
12 |
+
|
13 |
+
@classmethod
|
14 |
+
def stt(cls, file_content):
|
15 |
+
return deepneurones(file_content)
|
audio/audioanalyser_diarization.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#deepneurones = pipeline("text2text-generation", model="google/flan-t5-small")
|
2 |
+
from pyannote.audio import Pipeline
|
3 |
+
use_auth_token="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"
|
4 |
+
deepneuronesdiarization = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=use_auth_token)
|
5 |
+
|
6 |
+
class AudioAnalyserDiarization:
|
7 |
+
|
8 |
+
@classmethod
|
9 |
+
def diarization(cls, file_content):
|
10 |
+
# results = deepneuronesdiarizatin(file_content)
|
11 |
+
# return results
|
12 |
+
return deepneuronesdiarization(file_content)
|
13 |
+
|
14 |
+
|
audio/audioanalyser_francais.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
2 |
+
#from datasets import Audio, load_dataset
|
3 |
+
#
|
4 |
+
## load model and processor
|
5 |
+
#processor = WhisperProcessor.from_pretrained("openai/whisper-base")
|
6 |
+
#model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
|
7 |
+
#forced_decoder_ids = processor.get_decoder_prompt_ids(language="french", task="transcribe")
|
8 |
+
#
|
9 |
+
## load streaming dataset and read first audio sample
|
10 |
+
#input_speech = next(iter(ds))["audio"]
|
11 |
+
#input_features = processor(input_speech["array"], sampling_rate=input_speech["sampling_rate"], return_tensors="pt").input_features
|
12 |
+
#
|
13 |
+
## generate token ids
|
14 |
+
#predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids)
|
15 |
+
## decode token ids to text
|
16 |
+
#transcription = processor.batch_decode(predicted_ids)
|
17 |
+
#
|
18 |
+
#transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
index.html
CHANGED
@@ -1,6 +1,12 @@
|
|
1 |
<html>
|
|
|
2 |
<form action="/stt" method="post" enctype="multipart/form-data">
|
3 |
<input type="file" name="file">
|
4 |
<input type="submit" value="ok">
|
5 |
</form>
|
|
|
|
|
|
|
|
|
|
|
6 |
</html>
|
|
|
1 |
<html>
|
2 |
+
stt
|
3 |
<form action="/stt" method="post" enctype="multipart/form-data">
|
4 |
<input type="file" name="file">
|
5 |
<input type="submit" value="ok">
|
6 |
</form>
|
7 |
+
diarization
|
8 |
+
<form action="/diarization" method="post" enctype="multipart/form-data">
|
9 |
+
<input type="file" name="file">
|
10 |
+
<input type="submit" value="ok">
|
11 |
+
</form>
|
12 |
</html>
|
tests/audio/test_audio.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import unittest
|
2 |
+
|
3 |
+
from audio.audioanalyser_diarization import AudioAnalyserDiarization
|
4 |
+
from audio.audioanalyser_anglais import AudioAnalyserAnglais
|
5 |
+
|
6 |
+
|
7 |
+
class MyTestCase(unittest.TestCase):
|
8 |
+
def test_stt_en(self):
|
9 |
+
with open("../testsdata/audio/en.wav", "rb") as filecontent:
|
10 |
+
#print(filecontent.read())
|
11 |
+
res = AudioAnalyserAnglais.stt(filecontent.read())
|
12 |
+
self.assertEqual(res, True) # add assertion here
|
13 |
+
|
14 |
+
def test_diarization(self):
|
15 |
+
with open("../testsdata/audio/en.wav", "rb") as filecontent:
|
16 |
+
#print(filecontent.read())
|
17 |
+
res = AudioAnalyserDiarization.stt(filecontent.read())
|
18 |
+
self.assertEqual(res, True) # add assertion here
|
19 |
+
|
tests/testsdata/audio/1.wav
ADDED
Binary file (287 kB). View file
|
|