ISOM5240_Group25_testing

Sleeping

App Files Files Community

kkngan commited on Mar 23, 2024

Commit

f597603

verified ·

1 Parent(s): 9854ac0

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -81

app.py CHANGED Viewed

@@ -3,120 +3,103 @@ from transformers import WhisperForConditionalGeneration, WhisperProcessor
 from transformers import pipeline
 import librosa
 import torch
-from spleeter.separator import Separator
 from pydub import AudioSegment
 from IPython.display import Audio
 import os
 import accelerate
-# steamlit setup
-st.set_page_config(page_title="Sentiment Analysis on Your Cantonese Song",)
-st.header("Cantonese Song Sentiment Analyzer")
-input_file = st.file_uploader("upload a song in mp3 format", type="mp3") # upload song
-if input_file is not None:
-   st.write("File uploaded successfully!")
-   st.write(input_file)
-else:
-   st.write("No file uploaded.")
-button_click = st.button("Run Analysis", type="primary")
-# load song
-#input_file = os.path.isfile("test1.mp3")
-output_file = os.path.isdir("")
 # preprocess and crop audio file
 def audio_preprocess(file_name = '/test1/vocals.wav'):
-   # separate music and vocal
-   separator = Separator('spleeter:2stems')
-   separator.separate_to_file(input_file, output_file)
-   # Crop the audio
-   start_time = 60000  # e.g. 30 seconds, 30000
-   end_time = 110000  # e.g. 40 seconds, 40000
-   audio = AudioSegment.from_file(file_name)
-   cropped_audio = audio[start_time:end_time]
-   processed_audio = cropped_audio
-   # .export('cropped_vocals.wav', format='wav') # save vocal audio file
-   return processed_audio
 # ASR transcription
 def asr_model(processed_audio):
-   # load audio file
-   y, sr = librosa.load(processed_audio, sr=16000)
-   # ASR model
-   MODEL_NAME = "RexChan/ISOM5240-whisper-small-zhhk_1"
-   processor = WhisperProcessor.from_pretrained(MODEL_NAME)
-   model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, low_cpu_mem_usage=True)
-   model.config.forced_decoder_ids = None
-   model.config.suppress_tokens = []
-   model.config.use_cache = False
-   processed_in = processor(y, sampling_rate=sr, return_tensors="pt")
-   gout = model.generate(
-       input_features=processed_in.input_features,
-       output_scores=True, return_dict_in_generate=True
-   )
-   transcription = processor.batch_decode(gout.sequences, skip_special_tokens=True)[0]
-   # print result
-   print(f"Song lyrics = {transcription}")
-   return transcription
 # sentiment analysis
 def senti_model(transcription):
-   pipe = pipeline("text-classification", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
-   final_result = pipe(transcription)
-   display = f"Sentiment Analysis shows that this song is {final_result[0]['label']}. Confident level of this analysis is {final_result[0]['score']*100:.1f}%."
-   print(display)
-   return display
-   # return final_result
 # main
 def main():
-   processed_audio = audio_preprocess(input_file)
-   transcription = asr_model(processed_audio)
-   final_result = senti_model(transcription)
-   st.write(final_result)
-   if st.button("Play Audio"):
-       st.audio(audio_data['audio'],
-                   format="audio/wav",
-                   start_time=0,
-                   sample_rate = audio_data['sampling_rate'])
 if __name__ == '__main__':
-   if button_click:
-       main()

 from transformers import pipeline
 import librosa
 import torch
+# from spleeter.separator import Separator
 from pydub import AudioSegment
 from IPython.display import Audio
 import os
 import accelerate
 # preprocess and crop audio file
 def audio_preprocess(file_name = '/test1/vocals.wav'):
+    # separate music and vocal
+    separator = Separator('spleeter:2stems')
+    separator.separate_to_file(input_file, output_file)
+    # Crop the audio
+    start_time = 60000  # e.g. 30 seconds, 30000
+    end_time = 110000  # e.g. 40 seconds, 40000
+    audio = AudioSegment.from_file(file_name)
+    cropped_audio = audio[start_time:end_time]
+    processed_audio = cropped_audio
+    # .export('cropped_vocals.wav', format='wav') # save vocal audio file
+    return processed_audio
 # ASR transcription
 def asr_model(processed_audio):
+    # load audio file
+    y, sr = librosa.load(processed_audio, sr=16000)
+    # ASR model
+    MODEL_NAME = "RexChan/ISOM5240-whisper-small-zhhk_1"
+    processor = WhisperProcessor.from_pretrained(MODEL_NAME)
+    model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, low_cpu_mem_usage=True)
+    model.config.forced_decoder_ids = None
+    model.config.suppress_tokens = []
+    model.config.use_cache = False
+    processed_in = processor(y, sampling_rate=sr, return_tensors="pt")
+    gout = model.generate(
+        input_features=processed_in.input_features,
+        output_scores=True, return_dict_in_generate=True
+    )
+    transcription = processor.batch_decode(gout.sequences, skip_special_tokens=True)[0]
+    # print result
+    print(f"Song lyrics = {transcription}")
+    return transcription
 # sentiment analysis
 def senti_model(transcription):
+    pipe = pipeline("text-classification", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
+    final_result = pipe(transcription)
+    display = f"Sentiment Analysis shows that this song is {final_result[0]['label']}. Confident level of this analysis is {final_result[0]['score']*100:.1f}%."
+    print(display)
+    return display
+    # return final_result
 # main
 def main():
+    # processed_audio = audio_preprocess(input_file)
+    processed_audio = input_file
+    transcription = asr_model(processed_audio)
+    final_result = senti_model(transcription)
+    st.write(final_result)
+    if st.button("Play Audio"):
+        st.audio(audio_data['audio'],
+                    format="audio/wav",
+                    start_time=0,
+                    sample_rate = audio_data['sampling_rate'])
 if __name__ == '__main__':
+    # steamlit setup
+    st.set_page_config(page_title="Sentiment Analysis on Your Cantonese Song",)
+    st.header("Cantonese Song Sentiment Analyzer")
+    input_file = st.file_uploader("upload a song in mp3 format", type="mp3") # upload song
+    if input_file is not None:
+        st.write("File uploaded successfully!")
+        st.write(input_file)
+    else:
+        st.write("No file uploaded.")
+    button_click = st.button("Run Analysis", type="primary")
+    # load song
+    #input_file = os.path.isfile("test1.mp3")
+    output_file = os.path.isdir("")
+    if button_click:
+        main()