Spaces:

codewithdark
/

Faceless-video

Running

App Files Files Community

codewithdark commited on Sep 14, 2024

Commit

c72bd11

verified ·

1 Parent(s): 4ead59e

Upload 6 files

Browse files

Files changed (6) hide show

utility/audio_generator.py +5 -0
utility/image_generator.py +39 -0
utility/logging.py +20 -0
utility/render_engine.py +31 -0
utility/script_generator.py +50 -0
utility/timed_captions_generator.py +69 -0

utility/audio_generator.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import edge_tts
+async def generate_audio(text, outputFilename):
+    communicate = edge_tts.Communicate(text, "en-AU-WilliamNeural")
+    await communicate.save(outputFilename)

utility/image_generator.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from diffusers import DiffusionPipeline
+import torch
+import re
+from PIL import Image
+import io
+from dotenv import load_dotenv
+import os
+load_dotenv()
+# Ensure GPU is used if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+from diffusers import DiffusionPipeline
+pipeline = DiffusionPipeline.from_pretrained("Shakker-Labs/AWPortrait-FL")
+def generate_image_prompts(script):
+    # Split the script into sentences
+    sentences = re.split(r'(?<=[.!?]) +', script)
+    # Generate prompts for each sentence
+    prompts = []
+    for sentence in sentences:
+        if sentence.strip():  # Ensure the sentence is not empty
+            prompts.append(sentence.strip())
+    return prompts
+def generate_images(prompts):
+    image_files = []
+    for idx, prompt in enumerate(prompts):
+        print(f"Generating image for prompt: {prompt}")
+        # Ensure the prompt is processed on the correct device
+        image = pipeline(prompt).images[0]
+        filename = f"generated_image_{idx}.png"
+        image.save(filename)
+        image_files.append(filename)
+    return image_files

utility/logging.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import os
+import json
+from datetime import datetime
+LOG_TYPE_GPT = "GPT"
+DIRECTORY_LOG_GPT = ".logs/gpt_logs"
+def log_response(log_type, query, response):
+    log_entry = {
+        "query": query,
+        "response": response,
+        "timestamp": datetime.now().isoformat()
+    }
+    directory = DIRECTORY_LOG_GPT
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    filename = f'{datetime.now().strftime("%Y%m%d_%H%M%S")}_{log_type.lower()}.txt'
+    filepath = os.path.join(directory, filename)
+    with open(filepath, "w") as outfile:
+        json.dump(log_entry, outfile)

utility/render_engine.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import os
+import tempfile
+from moviepy.editor import (AudioFileClip, CompositeVideoClip, CompositeAudioClip,
+                            TextClip, ImageClip, concatenate_videoclips)
+def get_output_media(audio_file_path, timed_captions, image_files):
+    OUTPUT_FILE_NAME = "rendered_video.mp4"
+    visual_clips = []
+    audio_clips = []
+    audio_file_clip = AudioFileClip(audio_file_path)
+    audio_clips.append(audio_file_clip)
+    for idx, ((t1, t2), text) in enumerate(timed_captions):
+        # Create an ImageClip for each generated image
+        image_filename = image_files[idx] if idx < len(image_files) else image_files[-1]
+        image_clip = ImageClip(image_filename).set_duration(t2 - t1).set_start(t1)
+        image_clip = image_clip.resize(height=720)  # Resize if necessary
+        visual_clips.append(image_clip)
+        # Add text overlay
+        text_clip = TextClip(txt=text, fontsize=50, color="white", stroke_width=2, stroke_color="black", method="caption", size=(image_clip.w, None))
+        text_clip = text_clip.set_start(t1).set_end(t2)
+        text_clip = text_clip.set_position(("center", "bottom"))
+        visual_clips.append(text_clip)
+    final_clip = CompositeVideoClip(visual_clips)
+    final_clip = final_clip.set_audio(CompositeAudioClip(audio_clips))
+    final_clip.write_videofile(OUTPUT_FILE_NAME, codec="libx264", fps=24, audio_codec="aac")
+    return OUTPUT_FILE_NAME

utility/script_generator.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import g4f
+from g4f.client import Client
+import json
+def generate_script(topic):
+    prompt = (
+        """You are a seasoned content writer for a YouTube Shorts channel, specializing in facts videos.
+        Your facts shorts are concise, each lasting less than 50 seconds (approximately 140 words).
+        They are incredibly engaging and original. When a user requests a specific type of facts short, you will create it.
+        For instance, if the user asks for:
+        Weird facts
+        You would produce content like this:
+        Weird facts you don't know:
+        - Bananas are berries, but strawberries aren't.
+        - A single cloud can weigh over a million pounds.
+        - There's a species of jellyfish that is biologically immortal.
+        - Honey never spoils; archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still edible.
+        - The shortest war in history was between Britain and Zanzibar on August 27, 1896. Zanzibar surrendered after 38 minutes.
+        - Octopuses have three hearts and blue blood.
+        You are now tasked with creating the best short script based on the user's requested type of 'facts'.
+        Keep it brief, highly interesting, and unique.
+        Strictly output the script in a JSON format like below, and only provide a parsable JSON object with the key 'script'.
+        # Output
+        {"script": "Here is the script ..."}
+        """
+    )
+    client = Client()
+    response = client.chat.completions.create(
+        model='gpt-4o',
+        messages=[{'role': 'user', 'content': prompt + "\n\n" + topic}]
+    )
+    content = response.choices[0].message.content
+    try:
+        script = json.loads(content)["script"]
+    except json.JSONDecodeError:
+        print("JSONDecodeError. Attempting to extract JSON from the response.")
+        json_start = content.find('{')
+        json_end = content.rfind('}') + 1
+        script = json.loads(content[json_start:json_end])["script"]
+    return script

utility/timed_captions_generator.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import whisper_timestamped as whisper
+from whisper_timestamped import load_model, transcribe_timestamped
+import re
+# Ensure ffmpeg is in the PATH
+os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\bin"
+def generate_timed_captions(audio_filename, model_size="base"):
+    WHISPER_MODEL = load_model(model_size)
+    gen = transcribe_timestamped(WHISPER_MODEL, audio_filename, verbose=False, fp16=False)
+    return getCaptionsWithTime(gen)
+def splitWordsBySize(words, maxCaptionSize):
+    halfCaptionSize = maxCaptionSize / 2
+    captions = []
+    while words:
+        caption = words[0]
+        words = words[1:]
+        while words and len(caption + ' ' + words[0]) <= maxCaptionSize:
+            caption += ' ' + words[0]
+            words = words[1:]
+            if len(caption) >= halfCaptionSize and words:
+                break
+        captions.append(caption)
+    return captions
+def getTimestampMapping(whisper_analysis):
+    index = 0
+    locationToTimestamp = {}
+    for segment in whisper_analysis['segments']:
+        for word in segment['words']:
+            newIndex = index + len(word['text']) + 1
+            locationToTimestamp[(index, newIndex)] = word['end']
+            index = newIndex
+    return locationToTimestamp
+def cleanWord(word):
+    return re.sub(r'[^\w\s\-_"\'\']', '', word)
+def interpolateTimeFromDict(word_position, d):
+    for key, value in d.items():
+        if key[0] <= word_position <= key[1]:
+            return value
+    return None
+def getCaptionsWithTime(whisper_analysis, maxCaptionSize=15, considerPunctuation=False):
+    wordLocationToTime = getTimestampMapping(whisper_analysis)
+    position = 0
+    start_time = 0
+    CaptionsPairs = []
+    text = whisper_analysis['text']
+    if considerPunctuation:
+        sentences = re.split(r'(?<=[.!?]) +', text)
+        words = [word for sentence in sentences for word in splitWordsBySize(sentence.split(), maxCaptionSize)]
+    else:
+        words = text.split()
+        words = splitWordsBySize(words, maxCaptionSize)
+    for word in words:
+        cleaned_word = cleanWord(word)
+        position += len(word) + 1
+        end_time = interpolateTimeFromDict(position, wordLocationToTime)
+        if end_time and cleaned_word:
+            CaptionsPairs.append(((start_time, end_time), cleaned_word))
+            start_time = end_time
+    return CaptionsPairs