Ld75 commited on
Commit
3ecd153
·
1 Parent(s): 6042711

tdd + diarization

Browse files
app.py CHANGED
@@ -4,18 +4,14 @@
4
  from fastapi import FastAPI, UploadFile
5
  from fastapi.staticfiles import StaticFiles
6
  from fastapi.responses import FileResponse
7
- from pyannote.audio import Pipeline
8
-
9
- from transformers import pipeline # le framework de huggingface
10
  #from datasets import load_dataset, Audio # ça c'est pour entrainer mon modele
11
 
12
 
13
  app = FastAPI()
14
 
15
- #deepneurones = pipeline("text2text-generation", model="google/flan-t5-small")
16
- #deepneurones = pipeline("automatic-speech-recognition")# la liste des pipelines de huggingface est disponible ici :https://huggingface.co/docs/transformers/quicktour. pipeline() telecharge dans un cache local le modele deeplearning
17
- #deepneurones= pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h") # il faut choisir un modele
18
- deepneurones = Pipeline.from_pretrained("pyannote/speaker-diarization")
19
  @app.get("/healthcheck")
20
  def healthcheck():
21
 
@@ -27,11 +23,22 @@ def healthcheck():
27
  async def stt(file: str = UploadFile(...)):
28
  #file_content = base64.b64decode(file)
29
  file_content = await file.read()
 
30
  #dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
31
- results = deepneurones(file_content)
32
  return {"output":results}
33
  #app.mount("/", StaticFiles(directory="static", html=True), name="static")
34
-
 
 
 
 
 
 
 
 
 
 
35
 
36
  @app.get("/")
37
  def index() -> FileResponse:
 
4
  from fastapi import FastAPI, UploadFile
5
  from fastapi.staticfiles import StaticFiles
6
  from fastapi.responses import FileResponse
7
+ from audio.audioanalyser_francais import AudioAnalyserAnglais
8
+ from audio.audioanalyser_diarization import AudioAnalyserDiarization
 
9
  #from datasets import load_dataset, Audio # ça c'est pour entrainer mon modele
10
 
11
 
12
  app = FastAPI()
13
 
14
+
 
 
 
15
  @app.get("/healthcheck")
16
  def healthcheck():
17
 
 
23
  async def stt(file: str = UploadFile(...)):
24
  #file_content = base64.b64decode(file)
25
  file_content = await file.read()
26
+ results = AudioAnalyserAnglais.stt(file_content)
27
  #dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
28
+
29
  return {"output":results}
30
  #app.mount("/", StaticFiles(directory="static", html=True), name="static")
31
+
32
+ @app.post("/diarization")
33
+ async def diarization(file: str = UploadFile(...)):
34
+ #file_content = base64.b64decode(file)
35
+ file_content = await file.read()
36
+ results = AudioAnalyserDiarization.diarization(file_content)
37
+ #dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
38
+
39
+ return {"output":results}
40
+ #app.mount("/", StaticFiles(directory="static", html=True), name="static")
41
+
42
 
43
  @app.get("/")
44
  def index() -> FileResponse:
audio/audioanalyser_anglais.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #deepneurones = pipeline("text2text-generation", model="google/flan-t5-small")
2
+ #deepneurones = pipeline("automatic-speech-recognition")# la liste des pipelines de huggingface est disponible ici :https://huggingface.co/docs/transformers/quicktour. pipeline() telecharge dans un cache local le modele deeplearning
3
+ from transformers import pipeline # le framework de huggingface
4
+ deepneurones= pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h") # il faut choisir un modele
5
+
6
+ #from pyannote.audio import Pipeline
7
+ #,use_auth_token="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")
8
+ #deepneuronesdiarizatin = Pipeline.from_pretrained("pyannote/speaker-diarization",use_auth_token="test")
9
+
10
+
11
+ class AudioAnalyserAnglais:
12
+
13
+ @classmethod
14
+ def stt(cls, file_content):
15
+ return deepneurones(file_content)
audio/audioanalyser_diarization.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #deepneurones = pipeline("text2text-generation", model="google/flan-t5-small")
2
+ from pyannote.audio import Pipeline
3
+ use_auth_token="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"
4
+ deepneuronesdiarization = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=use_auth_token)
5
+
6
+ class AudioAnalyserDiarization:
7
+
8
+ @classmethod
9
+ def diarization(cls, file_content):
10
+ # results = deepneuronesdiarizatin(file_content)
11
+ # return results
12
+ return deepneuronesdiarization(file_content)
13
+
14
+
audio/audioanalyser_francais.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #from transformers import WhisperProcessor, WhisperForConditionalGeneration
2
+ #from datasets import Audio, load_dataset
3
+ #
4
+ ## load model and processor
5
+ #processor = WhisperProcessor.from_pretrained("openai/whisper-base")
6
+ #model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
7
+ #forced_decoder_ids = processor.get_decoder_prompt_ids(language="french", task="transcribe")
8
+ #
9
+ ## load streaming dataset and read first audio sample
10
+ #input_speech = next(iter(ds))["audio"]
11
+ #input_features = processor(input_speech["array"], sampling_rate=input_speech["sampling_rate"], return_tensors="pt").input_features
12
+ #
13
+ ## generate token ids
14
+ #predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids)
15
+ ## decode token ids to text
16
+ #transcription = processor.batch_decode(predicted_ids)
17
+ #
18
+ #transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
index.html CHANGED
@@ -1,6 +1,12 @@
1
  <html>
 
2
  <form action="/stt" method="post" enctype="multipart/form-data">
3
  <input type="file" name="file">
4
  <input type="submit" value="ok">
5
  </form>
 
 
 
 
 
6
  </html>
 
1
  <html>
2
+ stt
3
  <form action="/stt" method="post" enctype="multipart/form-data">
4
  <input type="file" name="file">
5
  <input type="submit" value="ok">
6
  </form>
7
+ diarization
8
+ <form action="/diarization" method="post" enctype="multipart/form-data">
9
+ <input type="file" name="file">
10
+ <input type="submit" value="ok">
11
+ </form>
12
  </html>
tests/audio/test_audio.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+
3
+ from audio.audioanalyser_diarization import AudioAnalyserDiarization
4
+ from audio.audioanalyser_anglais import AudioAnalyserAnglais
5
+
6
+
7
+ class MyTestCase(unittest.TestCase):
8
+ def test_stt_en(self):
9
+ with open("../testsdata/audio/en.wav", "rb") as filecontent:
10
+ #print(filecontent.read())
11
+ res = AudioAnalyserAnglais.stt(filecontent.read())
12
+ self.assertEqual(res, True) # add assertion here
13
+
14
+ def test_diarization(self):
15
+ with open("../testsdata/audio/en.wav", "rb") as filecontent:
16
+ #print(filecontent.read())
17
+ res = AudioAnalyserDiarization.stt(filecontent.read())
18
+ self.assertEqual(res, True) # add assertion here
19
+
tests/testsdata/audio/1.wav ADDED
Binary file (287 kB). View file