Spaces:

AdithyaSNair
/

AdithyaLM

Sleeping

App Files Files Community

AdithyaLM / generate.py

AdithyaSNair

added base files

35cda3c verified 5 months ago

raw

history blame contribute delete

3.84 kB

	import os
	import re
	import json
	import shutil
	import pyttsx3
	from pydub import AudioSegment
	from transformers import GPT2LMHeadModel, GPT2Tokenizer
	from dotenv import load_dotenv

	# Load environment variables from .env file
	load_dotenv()

	# Initialize GPT-2 model and tokenizer
	model_name = "distilgpt2"
	tokenizer = GPT2Tokenizer.from_pretrained(model_name)
	model = GPT2LMHeadModel.from_pretrained(model_name)

	# System prompt and article content
	system_prompt = """Generate a conversation between Sascha and Marina based on the article content provided.
	Sascha is the article writer, and Marina is the interviewer. Make it engaging and emotional, with natural pauses (like "uh")
	to make it sound conversational. This is for a podcast called "The Machine Learning Engineer"."""

	# TTS voice map for Sascha and Marina
	speaker_voice_map = {
	"Sascha": "pyttsx3", # Sascha will use pyttsx3 for offline TTS
	"Marina": "pyttsx3" # Marina uses pyttsx3 for offline TTS
	}

	# Initialize pyttsx3 engine for offline TTS
	engine = pyttsx3.init()
	engine.setProperty('rate', 150) # Speed of speech
	engine.setProperty('volume', 0.9) # Volume (0.0 to 1.0)

	# Pyttsx3 TTS function for offline TTS
	def synthesize_speech_pyttsx3(text, speaker, index):
	filename = f"audio-files/{index}_{speaker}.mp3"
	engine.save_to_file(text, filename)
	engine.runAndWait()
	print(f'Audio content written to file "{filename}"')

	# Function to synthesize speech based on the speaker
	def synthesize_speech(text, speaker, index):
	synthesize_speech_pyttsx3(text, speaker, index)

	# Function to sort filenames naturally
	def natural_sort_key(filename):
	return [int(text) if text.isdigit() else text for text in re.split(r'(\d+)', filename)]

	# Function to merge audio files
	def merge_audios(audio_folder, output_file):
	combined = AudioSegment.empty()
	audio_files = sorted(
	[f for f in os.listdir(audio_folder) if f.endswith(".mp3") or f.endswith(".wav")],
	key=natural_sort_key
	)
	for filename in audio_files:
	audio_path = os.path.join(audio_folder, filename)
	print(f"Processing: {audio_path}")
	audio = AudioSegment.from_file(audio_path)
	combined += audio
	combined.export(output_file, format="mp3")
	print(f"Merged audio saved as {output_file}")

	# Function to generate conversation using distilgpt2
	def generate_conversation(article):
	input_text = f"{system_prompt}\n\n{article}\n\nSascha: "
	inputs = tokenizer.encode(input_text, return_tensors="pt")

	outputs = model.generate(inputs, max_length=1024, num_return_sequences=1, temperature=1.0)
	generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Parse conversation into JSON format
	lines = re.split(r'(Sascha:\|Marina:)', generated_text)[1:] # split by speaker names
	conversation = [{"speaker": lines[i].strip(), "text": lines[i + 1].strip()} for i in range(0, len(lines), 2)]

	formatted_json = json.dumps(conversation, indent=4)
	print(formatted_json)
	return conversation

	# Function to generate the podcast audio
	def generate_audio(conversation):
	if os.path.exists('audio-files'):
	shutil.rmtree('audio-files')
	os.makedirs('audio-files', exist_ok=True)

	for index, part in enumerate(conversation):
	speaker = part['speaker']
	text = part['text']
	synthesize_speech(text, speaker, index)

	output_file = "podcast.mp3"
	merge_audios("audio-files", output_file)
	return output_file

	# Read the article from the file
	with open('function-calling.txt', 'r') as file:
	article = file.read()

	# Generate conversation and audio
	conversation = generate_conversation(article)
	generate_audio(conversation)