Spaces:
Running
Running
Upload 6 files
Browse files- utility/audio_generator.py +5 -0
- utility/image_generator.py +39 -0
- utility/logging.py +20 -0
- utility/render_engine.py +31 -0
- utility/script_generator.py +50 -0
- utility/timed_captions_generator.py +69 -0
utility/audio_generator.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import edge_tts
|
2 |
+
|
3 |
+
async def generate_audio(text, outputFilename):
|
4 |
+
communicate = edge_tts.Communicate(text, "en-AU-WilliamNeural")
|
5 |
+
await communicate.save(outputFilename)
|
utility/image_generator.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from diffusers import DiffusionPipeline
|
2 |
+
import torch
|
3 |
+
import re
|
4 |
+
from PIL import Image
|
5 |
+
import io
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import os
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
# Ensure GPU is used if available
|
12 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
13 |
+
from diffusers import DiffusionPipeline
|
14 |
+
|
15 |
+
pipeline = DiffusionPipeline.from_pretrained("Shakker-Labs/AWPortrait-FL")
|
16 |
+
|
17 |
+
def generate_image_prompts(script):
|
18 |
+
# Split the script into sentences
|
19 |
+
sentences = re.split(r'(?<=[.!?]) +', script)
|
20 |
+
|
21 |
+
# Generate prompts for each sentence
|
22 |
+
prompts = []
|
23 |
+
for sentence in sentences:
|
24 |
+
if sentence.strip(): # Ensure the sentence is not empty
|
25 |
+
prompts.append(sentence.strip())
|
26 |
+
|
27 |
+
return prompts
|
28 |
+
|
29 |
+
def generate_images(prompts):
|
30 |
+
image_files = []
|
31 |
+
for idx, prompt in enumerate(prompts):
|
32 |
+
print(f"Generating image for prompt: {prompt}")
|
33 |
+
# Ensure the prompt is processed on the correct device
|
34 |
+
image = pipeline(prompt).images[0]
|
35 |
+
filename = f"generated_image_{idx}.png"
|
36 |
+
image.save(filename)
|
37 |
+
image_files.append(filename)
|
38 |
+
|
39 |
+
return image_files
|
utility/logging.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
from datetime import datetime
|
4 |
+
|
5 |
+
LOG_TYPE_GPT = "GPT"
|
6 |
+
DIRECTORY_LOG_GPT = ".logs/gpt_logs"
|
7 |
+
|
8 |
+
def log_response(log_type, query, response):
|
9 |
+
log_entry = {
|
10 |
+
"query": query,
|
11 |
+
"response": response,
|
12 |
+
"timestamp": datetime.now().isoformat()
|
13 |
+
}
|
14 |
+
directory = DIRECTORY_LOG_GPT
|
15 |
+
if not os.path.exists(directory):
|
16 |
+
os.makedirs(directory)
|
17 |
+
filename = f'{datetime.now().strftime("%Y%m%d_%H%M%S")}_{log_type.lower()}.txt'
|
18 |
+
filepath = os.path.join(directory, filename)
|
19 |
+
with open(filepath, "w") as outfile:
|
20 |
+
json.dump(log_entry, outfile)
|
utility/render_engine.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import tempfile
|
3 |
+
from moviepy.editor import (AudioFileClip, CompositeVideoClip, CompositeAudioClip,
|
4 |
+
TextClip, ImageClip, concatenate_videoclips)
|
5 |
+
|
6 |
+
def get_output_media(audio_file_path, timed_captions, image_files):
|
7 |
+
OUTPUT_FILE_NAME = "rendered_video.mp4"
|
8 |
+
|
9 |
+
visual_clips = []
|
10 |
+
audio_clips = []
|
11 |
+
audio_file_clip = AudioFileClip(audio_file_path)
|
12 |
+
audio_clips.append(audio_file_clip)
|
13 |
+
|
14 |
+
for idx, ((t1, t2), text) in enumerate(timed_captions):
|
15 |
+
# Create an ImageClip for each generated image
|
16 |
+
image_filename = image_files[idx] if idx < len(image_files) else image_files[-1]
|
17 |
+
image_clip = ImageClip(image_filename).set_duration(t2 - t1).set_start(t1)
|
18 |
+
image_clip = image_clip.resize(height=720) # Resize if necessary
|
19 |
+
visual_clips.append(image_clip)
|
20 |
+
|
21 |
+
# Add text overlay
|
22 |
+
text_clip = TextClip(txt=text, fontsize=50, color="white", stroke_width=2, stroke_color="black", method="caption", size=(image_clip.w, None))
|
23 |
+
text_clip = text_clip.set_start(t1).set_end(t2)
|
24 |
+
text_clip = text_clip.set_position(("center", "bottom"))
|
25 |
+
visual_clips.append(text_clip)
|
26 |
+
|
27 |
+
final_clip = CompositeVideoClip(visual_clips)
|
28 |
+
final_clip = final_clip.set_audio(CompositeAudioClip(audio_clips))
|
29 |
+
final_clip.write_videofile(OUTPUT_FILE_NAME, codec="libx264", fps=24, audio_codec="aac")
|
30 |
+
|
31 |
+
return OUTPUT_FILE_NAME
|
utility/script_generator.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import g4f
|
2 |
+
from g4f.client import Client
|
3 |
+
import json
|
4 |
+
|
5 |
+
def generate_script(topic):
|
6 |
+
prompt = (
|
7 |
+
"""You are a seasoned content writer for a YouTube Shorts channel, specializing in facts videos.
|
8 |
+
Your facts shorts are concise, each lasting less than 50 seconds (approximately 140 words).
|
9 |
+
They are incredibly engaging and original. When a user requests a specific type of facts short, you will create it.
|
10 |
+
|
11 |
+
For instance, if the user asks for:
|
12 |
+
Weird facts
|
13 |
+
You would produce content like this:
|
14 |
+
|
15 |
+
Weird facts you don't know:
|
16 |
+
- Bananas are berries, but strawberries aren't.
|
17 |
+
- A single cloud can weigh over a million pounds.
|
18 |
+
- There's a species of jellyfish that is biologically immortal.
|
19 |
+
- Honey never spoils; archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still edible.
|
20 |
+
- The shortest war in history was between Britain and Zanzibar on August 27, 1896. Zanzibar surrendered after 38 minutes.
|
21 |
+
- Octopuses have three hearts and blue blood.
|
22 |
+
|
23 |
+
You are now tasked with creating the best short script based on the user's requested type of 'facts'.
|
24 |
+
|
25 |
+
Keep it brief, highly interesting, and unique.
|
26 |
+
|
27 |
+
Strictly output the script in a JSON format like below, and only provide a parsable JSON object with the key 'script'.
|
28 |
+
|
29 |
+
# Output
|
30 |
+
{"script": "Here is the script ..."}
|
31 |
+
"""
|
32 |
+
)
|
33 |
+
|
34 |
+
client = Client()
|
35 |
+
response = client.chat.completions.create(
|
36 |
+
model='gpt-4o',
|
37 |
+
messages=[{'role': 'user', 'content': prompt + "\n\n" + topic}]
|
38 |
+
|
39 |
+
)
|
40 |
+
|
41 |
+
content = response.choices[0].message.content
|
42 |
+
try:
|
43 |
+
script = json.loads(content)["script"]
|
44 |
+
except json.JSONDecodeError:
|
45 |
+
print("JSONDecodeError. Attempting to extract JSON from the response.")
|
46 |
+
json_start = content.find('{')
|
47 |
+
json_end = content.rfind('}') + 1
|
48 |
+
script = json.loads(content[json_start:json_end])["script"]
|
49 |
+
|
50 |
+
return script
|
utility/timed_captions_generator.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import whisper_timestamped as whisper
|
3 |
+
from whisper_timestamped import load_model, transcribe_timestamped
|
4 |
+
import re
|
5 |
+
|
6 |
+
# Ensure ffmpeg is in the PATH
|
7 |
+
os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\bin"
|
8 |
+
|
9 |
+
def generate_timed_captions(audio_filename, model_size="base"):
|
10 |
+
WHISPER_MODEL = load_model(model_size)
|
11 |
+
gen = transcribe_timestamped(WHISPER_MODEL, audio_filename, verbose=False, fp16=False)
|
12 |
+
return getCaptionsWithTime(gen)
|
13 |
+
|
14 |
+
def splitWordsBySize(words, maxCaptionSize):
|
15 |
+
halfCaptionSize = maxCaptionSize / 2
|
16 |
+
captions = []
|
17 |
+
while words:
|
18 |
+
caption = words[0]
|
19 |
+
words = words[1:]
|
20 |
+
while words and len(caption + ' ' + words[0]) <= maxCaptionSize:
|
21 |
+
caption += ' ' + words[0]
|
22 |
+
words = words[1:]
|
23 |
+
if len(caption) >= halfCaptionSize and words:
|
24 |
+
break
|
25 |
+
captions.append(caption)
|
26 |
+
return captions
|
27 |
+
|
28 |
+
def getTimestampMapping(whisper_analysis):
|
29 |
+
index = 0
|
30 |
+
locationToTimestamp = {}
|
31 |
+
for segment in whisper_analysis['segments']:
|
32 |
+
for word in segment['words']:
|
33 |
+
newIndex = index + len(word['text']) + 1
|
34 |
+
locationToTimestamp[(index, newIndex)] = word['end']
|
35 |
+
index = newIndex
|
36 |
+
return locationToTimestamp
|
37 |
+
|
38 |
+
def cleanWord(word):
|
39 |
+
return re.sub(r'[^\w\s\-_"\'\']', '', word)
|
40 |
+
|
41 |
+
def interpolateTimeFromDict(word_position, d):
|
42 |
+
for key, value in d.items():
|
43 |
+
if key[0] <= word_position <= key[1]:
|
44 |
+
return value
|
45 |
+
return None
|
46 |
+
|
47 |
+
def getCaptionsWithTime(whisper_analysis, maxCaptionSize=15, considerPunctuation=False):
|
48 |
+
wordLocationToTime = getTimestampMapping(whisper_analysis)
|
49 |
+
position = 0
|
50 |
+
start_time = 0
|
51 |
+
CaptionsPairs = []
|
52 |
+
text = whisper_analysis['text']
|
53 |
+
|
54 |
+
if considerPunctuation:
|
55 |
+
sentences = re.split(r'(?<=[.!?]) +', text)
|
56 |
+
words = [word for sentence in sentences for word in splitWordsBySize(sentence.split(), maxCaptionSize)]
|
57 |
+
else:
|
58 |
+
words = text.split()
|
59 |
+
words = splitWordsBySize(words, maxCaptionSize)
|
60 |
+
|
61 |
+
for word in words:
|
62 |
+
cleaned_word = cleanWord(word)
|
63 |
+
position += len(word) + 1
|
64 |
+
end_time = interpolateTimeFromDict(position, wordLocationToTime)
|
65 |
+
if end_time and cleaned_word:
|
66 |
+
CaptionsPairs.append(((start_time, end_time), cleaned_word))
|
67 |
+
start_time = end_time
|
68 |
+
|
69 |
+
return CaptionsPairs
|