codewithdark commited on
Commit
c72bd11
·
verified ·
1 Parent(s): 4ead59e

Upload 6 files

Browse files
utility/audio_generator.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import edge_tts
2
+
3
+ async def generate_audio(text, outputFilename):
4
+ communicate = edge_tts.Communicate(text, "en-AU-WilliamNeural")
5
+ await communicate.save(outputFilename)
utility/image_generator.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import DiffusionPipeline
2
+ import torch
3
+ import re
4
+ from PIL import Image
5
+ import io
6
+ from dotenv import load_dotenv
7
+ import os
8
+
9
+ load_dotenv()
10
+
11
+ # Ensure GPU is used if available
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ from diffusers import DiffusionPipeline
14
+
15
+ pipeline = DiffusionPipeline.from_pretrained("Shakker-Labs/AWPortrait-FL")
16
+
17
+ def generate_image_prompts(script):
18
+ # Split the script into sentences
19
+ sentences = re.split(r'(?<=[.!?]) +', script)
20
+
21
+ # Generate prompts for each sentence
22
+ prompts = []
23
+ for sentence in sentences:
24
+ if sentence.strip(): # Ensure the sentence is not empty
25
+ prompts.append(sentence.strip())
26
+
27
+ return prompts
28
+
29
+ def generate_images(prompts):
30
+ image_files = []
31
+ for idx, prompt in enumerate(prompts):
32
+ print(f"Generating image for prompt: {prompt}")
33
+ # Ensure the prompt is processed on the correct device
34
+ image = pipeline(prompt).images[0]
35
+ filename = f"generated_image_{idx}.png"
36
+ image.save(filename)
37
+ image_files.append(filename)
38
+
39
+ return image_files
utility/logging.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from datetime import datetime
4
+
5
+ LOG_TYPE_GPT = "GPT"
6
+ DIRECTORY_LOG_GPT = ".logs/gpt_logs"
7
+
8
+ def log_response(log_type, query, response):
9
+ log_entry = {
10
+ "query": query,
11
+ "response": response,
12
+ "timestamp": datetime.now().isoformat()
13
+ }
14
+ directory = DIRECTORY_LOG_GPT
15
+ if not os.path.exists(directory):
16
+ os.makedirs(directory)
17
+ filename = f'{datetime.now().strftime("%Y%m%d_%H%M%S")}_{log_type.lower()}.txt'
18
+ filepath = os.path.join(directory, filename)
19
+ with open(filepath, "w") as outfile:
20
+ json.dump(log_entry, outfile)
utility/render_engine.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from moviepy.editor import (AudioFileClip, CompositeVideoClip, CompositeAudioClip,
4
+ TextClip, ImageClip, concatenate_videoclips)
5
+
6
+ def get_output_media(audio_file_path, timed_captions, image_files):
7
+ OUTPUT_FILE_NAME = "rendered_video.mp4"
8
+
9
+ visual_clips = []
10
+ audio_clips = []
11
+ audio_file_clip = AudioFileClip(audio_file_path)
12
+ audio_clips.append(audio_file_clip)
13
+
14
+ for idx, ((t1, t2), text) in enumerate(timed_captions):
15
+ # Create an ImageClip for each generated image
16
+ image_filename = image_files[idx] if idx < len(image_files) else image_files[-1]
17
+ image_clip = ImageClip(image_filename).set_duration(t2 - t1).set_start(t1)
18
+ image_clip = image_clip.resize(height=720) # Resize if necessary
19
+ visual_clips.append(image_clip)
20
+
21
+ # Add text overlay
22
+ text_clip = TextClip(txt=text, fontsize=50, color="white", stroke_width=2, stroke_color="black", method="caption", size=(image_clip.w, None))
23
+ text_clip = text_clip.set_start(t1).set_end(t2)
24
+ text_clip = text_clip.set_position(("center", "bottom"))
25
+ visual_clips.append(text_clip)
26
+
27
+ final_clip = CompositeVideoClip(visual_clips)
28
+ final_clip = final_clip.set_audio(CompositeAudioClip(audio_clips))
29
+ final_clip.write_videofile(OUTPUT_FILE_NAME, codec="libx264", fps=24, audio_codec="aac")
30
+
31
+ return OUTPUT_FILE_NAME
utility/script_generator.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import g4f
2
+ from g4f.client import Client
3
+ import json
4
+
5
+ def generate_script(topic):
6
+ prompt = (
7
+ """You are a seasoned content writer for a YouTube Shorts channel, specializing in facts videos.
8
+ Your facts shorts are concise, each lasting less than 50 seconds (approximately 140 words).
9
+ They are incredibly engaging and original. When a user requests a specific type of facts short, you will create it.
10
+
11
+ For instance, if the user asks for:
12
+ Weird facts
13
+ You would produce content like this:
14
+
15
+ Weird facts you don't know:
16
+ - Bananas are berries, but strawberries aren't.
17
+ - A single cloud can weigh over a million pounds.
18
+ - There's a species of jellyfish that is biologically immortal.
19
+ - Honey never spoils; archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still edible.
20
+ - The shortest war in history was between Britain and Zanzibar on August 27, 1896. Zanzibar surrendered after 38 minutes.
21
+ - Octopuses have three hearts and blue blood.
22
+
23
+ You are now tasked with creating the best short script based on the user's requested type of 'facts'.
24
+
25
+ Keep it brief, highly interesting, and unique.
26
+
27
+ Strictly output the script in a JSON format like below, and only provide a parsable JSON object with the key 'script'.
28
+
29
+ # Output
30
+ {"script": "Here is the script ..."}
31
+ """
32
+ )
33
+
34
+ client = Client()
35
+ response = client.chat.completions.create(
36
+ model='gpt-4o',
37
+ messages=[{'role': 'user', 'content': prompt + "\n\n" + topic}]
38
+
39
+ )
40
+
41
+ content = response.choices[0].message.content
42
+ try:
43
+ script = json.loads(content)["script"]
44
+ except json.JSONDecodeError:
45
+ print("JSONDecodeError. Attempting to extract JSON from the response.")
46
+ json_start = content.find('{')
47
+ json_end = content.rfind('}') + 1
48
+ script = json.loads(content[json_start:json_end])["script"]
49
+
50
+ return script
utility/timed_captions_generator.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import whisper_timestamped as whisper
3
+ from whisper_timestamped import load_model, transcribe_timestamped
4
+ import re
5
+
6
+ # Ensure ffmpeg is in the PATH
7
+ os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\bin"
8
+
9
+ def generate_timed_captions(audio_filename, model_size="base"):
10
+ WHISPER_MODEL = load_model(model_size)
11
+ gen = transcribe_timestamped(WHISPER_MODEL, audio_filename, verbose=False, fp16=False)
12
+ return getCaptionsWithTime(gen)
13
+
14
+ def splitWordsBySize(words, maxCaptionSize):
15
+ halfCaptionSize = maxCaptionSize / 2
16
+ captions = []
17
+ while words:
18
+ caption = words[0]
19
+ words = words[1:]
20
+ while words and len(caption + ' ' + words[0]) <= maxCaptionSize:
21
+ caption += ' ' + words[0]
22
+ words = words[1:]
23
+ if len(caption) >= halfCaptionSize and words:
24
+ break
25
+ captions.append(caption)
26
+ return captions
27
+
28
+ def getTimestampMapping(whisper_analysis):
29
+ index = 0
30
+ locationToTimestamp = {}
31
+ for segment in whisper_analysis['segments']:
32
+ for word in segment['words']:
33
+ newIndex = index + len(word['text']) + 1
34
+ locationToTimestamp[(index, newIndex)] = word['end']
35
+ index = newIndex
36
+ return locationToTimestamp
37
+
38
+ def cleanWord(word):
39
+ return re.sub(r'[^\w\s\-_"\'\']', '', word)
40
+
41
+ def interpolateTimeFromDict(word_position, d):
42
+ for key, value in d.items():
43
+ if key[0] <= word_position <= key[1]:
44
+ return value
45
+ return None
46
+
47
+ def getCaptionsWithTime(whisper_analysis, maxCaptionSize=15, considerPunctuation=False):
48
+ wordLocationToTime = getTimestampMapping(whisper_analysis)
49
+ position = 0
50
+ start_time = 0
51
+ CaptionsPairs = []
52
+ text = whisper_analysis['text']
53
+
54
+ if considerPunctuation:
55
+ sentences = re.split(r'(?<=[.!?]) +', text)
56
+ words = [word for sentence in sentences for word in splitWordsBySize(sentence.split(), maxCaptionSize)]
57
+ else:
58
+ words = text.split()
59
+ words = splitWordsBySize(words, maxCaptionSize)
60
+
61
+ for word in words:
62
+ cleaned_word = cleanWord(word)
63
+ position += len(word) + 1
64
+ end_time = interpolateTimeFromDict(position, wordLocationToTime)
65
+ if end_time and cleaned_word:
66
+ CaptionsPairs.append(((start_time, end_time), cleaned_word))
67
+ start_time = end_time
68
+
69
+ return CaptionsPairs