diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000000000000000000000000000000000000..8491816397fe82d64f64475d98865defbb361005 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,33 @@ +{ + "name": "Python 3", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", + "customizations": { + "codespaces": { + "openFiles": [ + "README.md", + "app.py" + ] + }, + "vscode": { + "settings": {}, + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance" + ] + } + }, + "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y in the Authorize button or headers. + +--- +### 📡 API Endpoints Summary +| Endpoint | Method | Description | +|--------------------------|--------|---------------------------| +| /api/v1/audio/generate | POST | Generate audio from text | +| /api/v1/image/generate | POST | Generate image from text | +| /api/v1/video/generate | POST | Generate video from text | +| /api/v1/download | GET | Download generated file | + +--- +###📦 Deployment (Streamlit/Optional UI) +Option 1: Run with Streamlit (for demo) +streamlit run streamlit_ui.py + +Option 2: Docker (Production-ready) +docker build -t media-gen-api . +docker run -p 8000:8000 media-gen-api + +--- +### 📊 Metrics Logging (Optional) +- BLEU score and CLIPScore (WIP) +- Latency, GPU/CPU tracking +- Log file: logs/generation.log + +--- +#### 📋 Submission Checklist +- ✅ RESTful modular architecture +- ✅ Multi-format (MP4, PNG, WAV) +- ✅ Token Auth + Swagger UI +- ✅ Compatible with DD/PIB via API +- ✅ Streamlit demo app (optional) + + diff --git a/Spacefile b/Spacefile new file mode 100644 index 0000000000000000000000000000000000000000..6efb1315b4dc60ad47986f2f8d82d1318fb94405 --- /dev/null +++ b/Spacefile @@ -0,0 +1,5 @@ +[build] +builder = "heroku/buildpacks:20" + +[run] +command = "uvicorn main:app --host 0.0.0.0 --port $PORT" diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/api/v1/__init__.py b/app/api/v1/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/api/v1/audio.py b/app/api/v1/audio.py new file mode 100644 index 0000000000000000000000000000000000000000..3eca9cd1e58f652335845ab0d5367512502122f5 --- /dev/null +++ b/app/api/v1/audio.py @@ -0,0 +1,34 @@ +from fastapi import APIRouter, HTTPException, Body +from fastapi.responses import Response # ✅ add this +from pydantic import BaseModel +from gtts import gTTS +import uuid +import os +router = APIRouter() + +class AudioRequest(BaseModel): + text: str + voice: str = "default" + language: str = "en" + +@router.post("/generate") +def generate_audio_endpoint(payload: AudioRequest): + try: + # ✅ Save inside generated/audio for consistency + filename = f"audio_{uuid.uuid4().hex}.mp3" + folder = "generated/audio" + os.makedirs("generated_audio", exist_ok=True) + file_path = f"generated_audio/{filename}" # ✅ match your video & image folders + + # ✅ Generate TTS audio + tts = gTTS(text=payload.text, lang=payload.language) + tts.save(file_path) + + # ✅ Return audio bytes for inline Streamlit playback + with open(file_path, "rb") as f: + audio_bytes = f.read() + + return Response(content=audio_bytes, media_type="audio/mpeg") + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) diff --git a/app/api/v1/download.py b/app/api/v1/download.py new file mode 100644 index 0000000000000000000000000000000000000000..5c357b521d23cbce2f0ab2a53ae5195434fdca6a --- /dev/null +++ b/app/api/v1/download.py @@ -0,0 +1,32 @@ +from fastapi import APIRouter, HTTPException, Query +from fastapi.responses import FileResponse +import os + +router = APIRouter() + +@router.get("/") +def download_file(file_path: str = Query(..., description="Relative path from project root")): + print(f"🔍 Requested file path: {file_path}") + + # Sanitize and resolve absolute path + full_path = os.path.abspath(file_path) + + # Ensure file is inside your allowed folder (to prevent directory traversal) + allowed_root = os.path.abspath("generated") + if not full_path.startswith(allowed_root): + raise HTTPException(status_code=400, detail="Invalid file path") + + print(f"📂 Resolved full path: {full_path}") + + if not os.path.isfile(full_path): + print("❌ File not found.") + raise HTTPException(status_code=404, detail="File not found") + + # Set correct media type dynamically (you can refine this later) + media_type = "audio/mpeg" if full_path.endswith(".mp3") else "image/png" + + return FileResponse( + full_path, + media_type=media_type, + filename=os.path.basename(full_path) + ) diff --git a/app/api/v1/image.py b/app/api/v1/image.py new file mode 100644 index 0000000000000000000000000000000000000000..37ec2a45db22073f81636470896a558db009a90b --- /dev/null +++ b/app/api/v1/image.py @@ -0,0 +1,41 @@ +from fastapi import APIRouter, HTTPException, Depends, Body +from fastapi.responses import Response +from pydantic import BaseModel +from app.auth.auth import verify_token +import requests +import os +from pydantic import BaseModel +from dotenv import load_dotenv +load_dotenv() + + +# ✅ Define router +router = APIRouter() + +# ✅ Define Request schema +class ImageRequest(BaseModel): + prompt: str + style: str = "default" + +UNSPLASH_ACCESS_KEY = os.getenv("UNSPLASH_ACCESS_KEY") # store this in .env +print(f"unsplash key is: {UNSPLASH_ACCESS_KEY}") + +# ✅ Endpoint +@router.post("/generate") +def generate_image_file_endpoint( + data: ImageRequest = Body(...), + token: str = Depends(verify_token) +): + query = f"{data.prompt} {data.style}" + url = f"https://api.unsplash.com/photos/random?query={query}&client_id={UNSPLASH_ACCESS_KEY}&orientation=landscape" + + try: + r = requests.get(url) + r.raise_for_status() + image_url = r.json()["urls"]["regular"] + img_data = requests.get(image_url).content + return Response(content=img_data, media_type="image/jpeg") + + except Exception as e: + print(f"❌ Image fetch failed: {str(e)}") + raise HTTPException(status_code=500, detail="Image generation failed.") diff --git a/app/api/v1/metrics.py b/app/api/v1/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..dab081cf2831aed30ad2f9ea8c2f37c38c10c819 --- /dev/null +++ b/app/api/v1/metrics.py @@ -0,0 +1,24 @@ +# app/api/v1/metrics.py +from fastapi import APIRouter, HTTPException, Query, Depends, Request +from typing import List +from sklearn.metrics import accuracy_score +from nltk.translate.bleu_score import sentence_bleu +from sentence_transformers import SentenceTransformer, util +from app.auth.auth import verify_token + +router = APIRouter() +model = SentenceTransformer("clip-ViT-B-32") # for CLIP-like semantic score + +@router.post("/evaluate/bleu", dependencies=[Depends(verify_token)]) +def compute_bleu(reference: str, candidate: str): + ref_tokens = [reference.split()] + cand_tokens = candidate.split() + score = sentence_bleu(ref_tokens, cand_tokens) + return {"metric": "BLEU", "score": score} + +@router.post("/evaluate/clipscore") +def compute_clip_score(reference: str, candidate: str): + ref_emb = model.encode(reference, convert_to_tensor=True) + cand_emb = model.encode(candidate, convert_to_tensor=True) + score = util.cos_sim(ref_emb, cand_emb).item() + return {"metric": "CLIPScore", "score": score} diff --git a/app/api/v1/ppt.py b/app/api/v1/ppt.py new file mode 100644 index 0000000000000000000000000000000000000000..664fec6ed6336645e1aa08a09831d5da6abb4d77 --- /dev/null +++ b/app/api/v1/ppt.py @@ -0,0 +1,24 @@ +# app/api/v1/ppt.py +from fastapi import APIRouter, HTTPException, Query, Depends, Request +from pydantic import BaseModel +from typing import List +from app.services.ppt_service import generate_ppt_file +from app.auth.auth import verify_token + +router = APIRouter() + +class Slide(BaseModel): + title: str + content: str + +class PPTInput(BaseModel): + slides: List[Slide] + +@router.post("/generate") +def generate_ppt(payload: PPTInput): + filename = generate_ppt_file([slide.dict() for slide in payload.slides]) + return { + "message": "PPT generated successfully", + "filename": filename, + "download_url": f"/api/v1/download?file_path=generated/ppt/{filename}" + } diff --git a/app/api/v1/utils.py b/app/api/v1/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..431a49d2c6550c36587d3c4931e8a2ed57d3df52 --- /dev/null +++ b/app/api/v1/utils.py @@ -0,0 +1,9 @@ +# app/api/v1/utils.py +from fastapi.responses import FileResponse + +def download_file(file_path: str): + return FileResponse( + path=file_path, + filename=file_path.split("/")[-1], + media_type="application/octet-stream" + ) diff --git a/app/api/v1/video.py b/app/api/v1/video.py new file mode 100644 index 0000000000000000000000000000000000000000..4d22f6a2433a80202ba1be445d90c2458e4f7a8a --- /dev/null +++ b/app/api/v1/video.py @@ -0,0 +1,45 @@ +# app/api/v1/video.py +from fastapi import APIRouter, HTTPException, Depends, Body +from fastapi.responses import FileResponse +from pydantic import BaseModel +from app.services.video_service import generate_video_file +from app.auth.auth import verify_token +import os +from typing import Optional + +# ✅ Define router FIRST +router = APIRouter() + +class VideoInput(BaseModel): + prompt: str + tone: str + domain: str + environment: str + transcript: Optional[str] = None + +@router.post("/generate") +def generate_video_endpoint( + payload: VideoInput = Body(...), + token: str = Depends(verify_token) +): + try: + # Generate video file + filename = generate_video_file( + script=payload.prompt, + duration=10 # Optional: could be dynamic + ) + video_path = os.path.join("generated/video", filename) + + if not os.path.exists(video_path): + raise HTTPException(status_code=500, detail="Video not found") + + # ✅ Return the actual file for Streamlit to play + return FileResponse( + video_path, + media_type="video/mp4", + filename=filename + ) + + except Exception as e: + print("❌ Video generation error:", str(e)) + raise HTTPException(status_code=500, detail=str(e)) diff --git a/app/auth/auth.py b/app/auth/auth.py new file mode 100644 index 0000000000000000000000000000000000000000..4e5cfb92ab7ea707292409d05836f264d37b4e86 --- /dev/null +++ b/app/auth/auth.py @@ -0,0 +1,25 @@ +#from fastapi import Depends, HTTPException, status +from fastapi import HTTPException, Security +from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer +from starlette.status import HTTP_403_FORBIDDEN + +from fastapi import Security +from fastapi.security import HTTPBearer + +bearer_scheme = HTTPBearer() + +#security = HTTPBearer() + +from fastapi import Header, HTTPException, Depends + +VALID_TOKENS = ["my_secure_token_123"] # or load from file/db/env + +def verify_token(credentials: HTTPAuthorizationCredentials = Security(bearer_scheme)): + token = credentials.credentials + # Replace with your actual logic (static check shown here) + if token != "my_secure_token_123": + raise HTTPException( + status_code=HTTP_403_FORBIDDEN, + detail="Invalid or expired token" + ) + diff --git a/app/core/config.py b/app/core/config.py new file mode 100644 index 0000000000000000000000000000000000000000..089d6178dac39cfe00b49bb990e2d644a388c1bf --- /dev/null +++ b/app/core/config.py @@ -0,0 +1,6 @@ +import os + +class Settings: + API_KEY: str = os.getenv("API_KEY", "dummy-api-key") + +settings = Settings() diff --git a/app/db.py b/app/db.py new file mode 100644 index 0000000000000000000000000000000000000000..7f8b9e2177da6eae003c90a75360e2b728c18c17 --- /dev/null +++ b/app/db.py @@ -0,0 +1,7 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +DATABASE_URL = "sqlite:///./media_gen.db" + +engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False}) +SessionLocal = sessionmaker(bind=engine, autoflush=False) diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000000000000000000000000000000000000..3350ee62bbda94f251c4a420b15058709271277f --- /dev/null +++ b/app/main.py @@ -0,0 +1,39 @@ +# app/main.py +from fastapi import FastAPI, Depends +from fastapi.security import HTTPBearer +from fastapi import Security + +from fastapi import FastAPI + +from app.api.v1.audio import router as audio_router +from app.api.v1.video import router as video_router +from app.api.v1.image import router as image_router +from app.api.v1.ppt import router as ppt_router +from app.api.v1.metrics import router as metrics_router +from app.api.v1.download import router as download_router +from fastapi import Security + +from app.auth.auth import verify_token + +bearer_scheme = HTTPBearer() + + +app = FastAPI( + title="Media Generation API", + description="Generate audio, video, image, and PPT content via secure endpoints.", + version="1.0.0" +) + +# Root for health check +@app.get("/") +def root(): + return {"message": "FastAPI running successfully!"} + +# Registering route modules +app.include_router(audio_router, prefix="/api/v1/audio", tags=["Audio"], dependencies=[Depends(verify_token)]) +app.include_router(video_router, prefix="/api/v1/video", tags=["Video"], dependencies=[Depends(verify_token)]) +app.include_router(image_router, prefix="/api/v1/image", tags=["Image"], dependencies=[Depends(verify_token)]) +app.include_router(ppt_router, prefix="/api/v1/ppt", tags=["PPT"], dependencies=[Depends(verify_token)]) +app.include_router(metrics_router, prefix="/api/v1/metrics", tags=["Metrics"], dependencies=[Depends(verify_token)]) +app.include_router(download_router, prefix="/api/v1/download", tags=["Download"]) + diff --git a/app/models.py b/app/models.py new file mode 100644 index 0000000000000000000000000000000000000000..02ec785077c1d966d67f19044a5cd1945eb1b0fc --- /dev/null +++ b/app/models.py @@ -0,0 +1,14 @@ +from sqlalchemy import Column, Integer, String, DateTime +from sqlalchemy.ext.declarative import declarative_base +from datetime import datetime + +Base = declarative_base() + +class MediaGeneration(Base): + __tablename__ = "media_generations" + + id = Column(Integer, primary_key=True, index=True) + media_type = Column(String) + prompt = Column(String) + file_path = Column(String) + timestamp = Column(DateTime, default=datetime.utcnow) diff --git a/app/services/audio_service.py b/app/services/audio_service.py new file mode 100644 index 0000000000000000000000000000000000000000..b80bf6f0f579212fabc751a4b11f116b4df0b3d0 --- /dev/null +++ b/app/services/audio_service.py @@ -0,0 +1,39 @@ +# app/services/audio_service.py +from gtts import gTTS +import os +from datetime import datetime +from app.db import SessionLocal +from app.models import MediaGeneration +import logging +logger = logging.getLogger(__name__) +import uuid + +def generate_audio_file(text: str, voice: str = "default", language: str = "en") -> str: + try: + tts = gTTS(text=text, lang=language, slow=False) + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + filename = f"audio_{timestamp}.mp3" + output_dir = "generated/audio" + os.makedirs(output_dir, exist_ok=True) + file_path = os.path.join(output_dir, filename) + tts.save(file_path) + logger.info(f"Generated Audio: {filename}") + return file_path + except: + logger.error(f"Audio Generation Failed: {str(e)}") + raise + + +from app.db import SessionLocal +from app.models import MediaGeneration + +def save_metadata(media_type, prompt, file_path): + db = SessionLocal() + record = MediaGeneration( + media_type=media_type, + prompt=prompt, + file_path=file_path, + ) + db.add(record) + db.commit() + db.close() diff --git a/app/services/image_service.py b/app/services/image_service.py new file mode 100644 index 0000000000000000000000000000000000000000..6e7762e4a245d5f904c4a44163b0b1f309efef4b --- /dev/null +++ b/app/services/image_service.py @@ -0,0 +1,42 @@ +# app/services/image_service.py +import os +from datetime import datetime +from app.db import SessionLocal +from app.models import MediaGeneration +import logging +logger = logging.getLogger(__name__) + + +def generate_image_file(prompt: str, style: str = "default") -> str: + try: + # Simulate saving a generated image file + filename = f"image_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png" + folder = "generated/image" + os.makedirs(folder, exist_ok=True) + + # Placeholder: Simulate image generation by writing prompt text to a file + with open(os.path.join(folder, filename), "w") as f: + f.write(f"Prompt: {prompt}\nStyle: {style}") + logger.info(f"Generated Image: {filename}") + if os.path.isfile(output_path): + print(f"✅ Image created: {output_path}, size = {os.path.getsize(output_path)} bytes") + else: + print(f"❌ Image file not found at: {output_path}") + return filename + except: + logger.error(f"Image Geneartion failed: {str(e)}") + raise + +from app.db import SessionLocal +from app.models import MediaGeneration + +def save_metadata(media_type, prompt, file_path): + db = SessionLocal() + record = MediaGeneration( + media_type=media_type, + prompt=prompt, + file_path=file_path, + ) + db.add(record) + db.commit() + db.close() diff --git a/app/services/ppt_service.py b/app/services/ppt_service.py new file mode 100644 index 0000000000000000000000000000000000000000..093b17caedb3356fcb0e179dd762d9fa69154ff9 --- /dev/null +++ b/app/services/ppt_service.py @@ -0,0 +1,34 @@ +# app/services/ppt_service.py +import os +from datetime import datetime +from app.db import SessionLocal +from app.models import MediaGeneration +import logging +logger = logging.getLogger(__name__) + +def generate_ppt_file(slides: list[dict]) -> str: + try: + filename = f"ppt_{datetime.now().strftime('%Y%m%d_%H%M%S')}.ppt" + folder = "generated/ppt" + os.makedirs(folder, exist_ok=True) + + with open(os.path.join(folder, filename), "w") as f: + for i, slide in enumerate(slides, 1): + f.write(f"Slide {i}:\nTitle: {slide['title']}\nContent: {slide['content']}\n\n") + logger.info(f"Generated PPT: {filename}") + return filename + except: + logger.error(f"PPT Generation failed: {str(e)}") + raise + + +def save_metadata(media_type, prompt, file_path): + db = SessionLocal() + record = MediaGeneration( + media_type=media_type, + prompt=prompt, + file_path=file_path, + ) + db.add(record) + db.commit() + db.close() \ No newline at end of file diff --git a/app/services/video_service - Copy.py b/app/services/video_service - Copy.py new file mode 100644 index 0000000000000000000000000000000000000000..5f514f0ea3319f52ab71e51cf396d216d134dd6b --- /dev/null +++ b/app/services/video_service - Copy.py @@ -0,0 +1,35 @@ +# app/services/video_service.py +import os +from datetime import datetime +from app.db import SessionLocal +from app.models import MediaGeneration +import logging +logger = logging.getLogger(__name__) + +def generate_video_file(script: str, duration: int = 10) -> str: + try: + # Simulate saving a generated video file + filename = f"video_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4" + folder = "generated/video" + os.makedirs(folder, exist_ok=True) + + # Placeholder: Simulate video generation by writing script info to a file + with open(os.path.join(folder, filename), "w") as f: + f.write(f"Script: {script}\nDuration: {duration} seconds") + logger.info(f"Generated Video: {filename}") + return filename + except: + logger.error(f"Video generation failed: {str(e)}") + raise + + +def save_metadata(media_type, prompt, file_path): + db = SessionLocal() + record = MediaGeneration( + media_type=media_type, + prompt=prompt, + file_path=file_path, + ) + db.add(record) + db.commit() + db.close() \ No newline at end of file diff --git a/app/services/video_service.py b/app/services/video_service.py new file mode 100644 index 0000000000000000000000000000000000000000..e6de69370c0b1d957751b486e6201d27037e9127 --- /dev/null +++ b/app/services/video_service.py @@ -0,0 +1,94 @@ +# app/services/video_service.py + +import os +import uuid +import requests +from gtts import gTTS +from mutagen.mp3 import MP3 +from moviepy.editor import ImageClip, concatenate_videoclips, AudioFileClip +from dotenv import load_dotenv + +load_dotenv() + +UNSPLASH_KEY = os.getenv("UNSPLASH_ACCESS_KEY") +UNSPLASH_API = "https://api.unsplash.com/photos/random" + +def fetch_unsplash_images(query, count=3): + headers = {"Accept-Version": "v1", "Authorization": f"Client-ID {UNSPLASH_KEY}"} + urls = [] + + for _ in range(count): + r = requests.get(UNSPLASH_API, params={"query": query}, headers=headers) + if r.status_code == 200: + data = r.json() + if isinstance(data, dict): + urls.append(data["urls"]["regular"]) + elif isinstance(data, list) and len(data) > 0: + urls.append(data[0]["urls"]["regular"]) + return urls + +def generate_video_file(script: str, duration: int = None) -> str: + os.makedirs("generated/video", exist_ok=True) + os.makedirs("generated/audio", exist_ok=True) + os.makedirs("generated/tmp", exist_ok=True) + + video_filename = f"video_{uuid.uuid4().hex}.mp4" + video_path = os.path.join("generated/video", video_filename) + audio_path = f"generated/audio/audio_{uuid.uuid4().hex}.mp3" + + # Step 1: Generate audio + tts = gTTS(text=script, lang='en') + tts.save(audio_path) + + # Get audio duration (fallback if 0) + audio = MP3(audio_path) + audio_duration = max(audio.info.length, 3.0) # ensure at least 3s + + # Step 2: Fetch Unsplash images + images = fetch_unsplash_images(script, count=3) + if not images: + raise Exception("No images found from Unsplash for the prompt") + + # Step 3: Create slideshow clips + clips = [] + per_image_duration = audio_duration / len(images) + tmp_files = [] + + for url in images: + img_data = requests.get(url).content + tmp_file = f"generated/tmp/tmp_{uuid.uuid4().hex}.jpg" + tmp_files.append(tmp_file) + + with open(tmp_file, "wb") as f: + f.write(img_data) + + clip = ImageClip(tmp_file).resize(height=720).set_duration(per_image_duration) + clips.append(clip) + + # Step 4: Concatenate without negative padding + final_clip = concatenate_videoclips(clips, method="compose") + + # Step 5: Force duration to match audio + final_clip = final_clip.set_duration(audio_duration) + + # Step 6: Add audio + final_clip = final_clip.set_audio(AudioFileClip(audio_path)) + + # Step 7: Export video + final_clip.write_videofile( + video_path, + fps=24, + codec="libx264", + audio_codec="aac", + threads=4, + preset="ultrafast" + ) + + # Cleanup + for file in tmp_files: + try: + os.remove(file) + except: + pass + + return video_filename diff --git a/assets/default.jpg b/assets/default.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f73a81ae651cbd5af14c4c47bb4d3bb9bae64779 Binary files /dev/null and b/assets/default.jpg differ diff --git a/assets/logo_watermark.png b/assets/logo_watermark.png new file mode 100644 index 0000000000000000000000000000000000000000..8e815d9fa66d6d7b086fa6daf740e22d6d401026 --- /dev/null +++ b/assets/logo_watermark.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7a35246c3c7c6df8cf325d6a1ea7f45e4ce0899303cc68a3771f8193cff0c35 +size 5238 diff --git a/backend/media_gen.py b/backend/media_gen.py new file mode 100644 index 0000000000000000000000000000000000000000..bbd36bbc09271628e6b56a156bc30d903276ecc3 --- /dev/null +++ b/backend/media_gen.py @@ -0,0 +1,187 @@ +# ✅ Updated media_gen.py with file logging + UI debug toggle +import os +import re +import logging +import streamlit as st +import requests +from PIL import Image, UnidentifiedImageError +from io import BytesIO +from dotenv import load_dotenv +from moviepy.editor import ImageClip, AudioFileClip +from elevenlabs import generate, save, set_api_key +from googletrans import Translator +from PIL import ImageEnhance, Image +import tempfile + +# Load env vars +load_dotenv() + +# Logging setup +logging.basicConfig( + filename="app.log", + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) + +# Constants +OUTPUT_DIR = "outputs" +DEFAULT_IMAGE = "assets/fallback.jpg" +WATERMARK_PATH = "assets/logo_watermark.png" +UNSPLASH_ACCESS_KEY = os.getenv("UNSPLASH_ACCESS_KEY") + +os.makedirs("outputs/audio", exist_ok=True) +os.makedirs("outputs/images", exist_ok=True) +os.makedirs("outputs/videos", exist_ok=True) + +def translate_text(text, target_lang): + return Translator().translate(text, dest=target_lang).text + +def sanitize_filename(text): + return re.sub(r'\W+', '_', text).lower()[:50] + +def apply_watermark(image_path, watermark_path=WATERMARK_PATH): + try: + base = Image.open(image_path).convert("RGBA") + watermark = Image.open(watermark_path).convert("RGBA").resize((100, 100)) + base.paste(watermark, (base.width - 110, base.height - 110), watermark) + base.convert("RGB").save(image_path) + except Exception as e: + logging.error(f"Watermarking failed: {e}") + st.write(f"❌ Watermarking failed: {e}") + +def use_fallback_image(prompt, add_watermark=False): + try: + fallback_path = DEFAULT_IMAGE + output_path = f"outputs/images/{sanitize_filename(prompt)}.jpg" + with Image.open(fallback_path) as img: + img.save(output_path) + if add_watermark: + apply_watermark(output_path) + return output_path + except UnidentifiedImageError: + logging.error("Could not open fallback image.") + st.write("❌ Could not open fallback image.") + return None + +def generate_gtts_fallback(prompt, output_path, lang="en", debug_mode=False): + try: + from gtts import gTTS + tts = gTTS(text=prompt, lang=lang) + tts.save(output_path) + logging.info(f"gTTS fallback audio saved to {output_path}") + if debug_mode: + st.write(f"✅ Fallback audio (gTTS) saved to {output_path}") + return output_path + except Exception as e: + logging.error(f"gTTS fallback failed: {e}") + st.write(f"❌ gTTS fallback failed: {str(e)}") + return None + + +def generate_image(prompt, file_tag, add_watermark=False, dark_mode=False, debug_mode=False): + try: + # Enhance prompt if dark mode is enabled + if dark_mode: + prompt += " at night, dark theme, low light, moody lighting" + + url = f"https://api.unsplash.com/photos/random?query={requests.utils.quote(prompt)}&client_id={UNSPLASH_ACCESS_KEY}" + response = requests.get(url, timeout=10) + response.raise_for_status() + image_url = response.json()["urls"]["regular"] + image_response = requests.get(image_url, timeout=10) + image_response.raise_for_status() + + output_path = f"outputs/images/{sanitize_filename(prompt)}.jpg" + img = Image.open(BytesIO(image_response.content)) + img.convert("RGB").save(output_path) + + if add_watermark: + apply_watermark(output_path) + + return output_path + + except Exception as e: + logging.error(f"Image generation failed: {e}") + st.write("🔁 Unsplash failed. Using fallback.") + st.write(f"❌ Image generation failed: {e}") + return use_fallback_image(prompt, add_watermark=add_watermark) + + +# ✅ Updated generate_audio with proper language handling + +def generate_audio(prompt, output_path, debug_mode=False, lang="en"): + try: + api_key = os.getenv("ELEVEN_API_KEY") or st.secrets.get("ELEVEN_API_KEY", None) + + # Use gTTS for non-English languages + if lang != "en": + if debug_mode: + st.write(f"🌐 Non-English language selected: {lang}. Using gTTS.") + return generate_gtts_fallback(prompt, output_path, lang=lang, debug_mode=debug_mode) + + if api_key: + if debug_mode: + st.write(f"✅ ELEVEN_API_KEY loaded: {api_key[:4]}...****") + + set_api_key(api_key) + if debug_mode: + st.write(f"🎧 Generating audio for prompt: {prompt}") + + try: + audio = generate(text=prompt, voice="Aria", model="eleven_monolingual_v1") + save(audio, output_path) + logging.info(f"Audio saved successfully to {output_path}") + + if debug_mode: + st.write(f"🔍 File exists after save? {os.path.exists(output_path)}") + st.write(f"✅ Audio saved successfully to {output_path}") + return output_path + + except Exception as e: + logging.warning(f"ElevenLabs failed: {e}") + if debug_mode: + st.write(f"⚠️ ElevenLabs failed: {str(e)}") + st.write("🔁 Falling back to gTTS...") + return generate_gtts_fallback(prompt, output_path, lang=lang, debug_mode=debug_mode) + + else: + logging.warning("ELEVEN_API_KEY not found") + if debug_mode: + st.write("❌ ELEVEN_API_KEY not found. Falling back to gTTS.") + return generate_gtts_fallback(prompt, output_path, lang=lang, debug_mode=debug_mode) + + except Exception as e: + logging.error(f"Exception during audio generation setup: {e}") + if debug_mode: + st.write(f"❌ Exception during audio generation setup: {str(e)}") + st.write("🔁 Falling back to gTTS...") + return generate_gtts_fallback(prompt, output_path, lang=lang, debug_mode=debug_mode) + + +def generate_video(prompt, image_path, audio_path, output_path, add_watermark=False, dark_mode=False): + try: + # If dark_mode, darken the image temporarily + if dark_mode: + with Image.open(image_path) as img: + enhancer = ImageEnhance.Brightness(img) + darker_img = enhancer.enhance(0.5) # Reduce brightness to 50% + + # Save to a temporary file + with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp: + temp_image_path = tmp.name + darker_img.save(temp_image_path) + image_path = temp_image_path + + audio_clip = AudioFileClip(audio_path) + image_clip = ImageClip(image_path).set_duration(audio_clip.duration).resize(height=720) + video = image_clip.set_audio(audio_clip) + + output_path = f"outputs/videos/{sanitize_filename(prompt)}.mp4" + video.write_videofile(output_path, fps=24, codec="libx264", audio_codec="aac", verbose=False, logger=None) + return output_path + + except Exception as e: + logging.error(f"Video generation failed: {e}") + st.write(f"❌ Video generation failed: {e}") + return None + diff --git a/backend/subtitle_utils.py b/backend/subtitle_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cbfe9359c9f7a07f3837d86ea6aead937d291368 --- /dev/null +++ b/backend/subtitle_utils.py @@ -0,0 +1,84 @@ +from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip +from moviepy.video.tools.subtitles import SubtitlesClip +import srt +from datetime import timedelta +import os + + +def export_srt(text, duration=10, words_per_caption=6, output_path="output.srt"): + """ + Converts text into SRT subtitles and saves to output_path. + """ + lines = [] + words = text.split() + start = 0 + index = 1 + while start < len(words): + end = start + words_per_caption + chunk = words[start:end] + content = " ".join(chunk) + start_time = timedelta(seconds=(index - 1) * duration) + end_time = timedelta(seconds=index * duration) + sub = srt.Subtitle(index=index, start=start_time, end=end_time, content=content) + lines.append(sub) + start += words_per_caption + index += 1 + + srt_data = srt.compose(lines) + with open(output_path, "w", encoding="utf-8") as f: + f.write(srt_data) + return output_path + + +def add_subtitles_and_bgm( + video_path, + srt_path, + bgm_path, + output_path="enhanced_output.mp4", + font="Arial-Bold", + font_size=36, + font_color="white", + subtitle_position=("center", "bottom") +): + """ + Adds subtitles from .srt and background music to the given video. + """ + # Load video + video = VideoFileClip(video_path) + + # Parse .srt file + with open(srt_path, "r", encoding="utf-8") as f: + subtitles = list(srt.parse(f.read())) + + # Create subtitle clips + def make_textclip(txt): + return TextClip(txt, font=font, fontsize=font_size, color=font_color, stroke_color='black', stroke_width=2) + + subtitle_clips = [] + for sub in subtitles: + txt_clip = (make_textclip(sub.content) + .set_position(subtitle_position) + .set_start(sub.start.total_seconds()) + .set_duration((sub.end - sub.start).total_seconds())) + subtitle_clips.append(txt_clip) + + # Background music + if os.path.exists(bgm_path): + bgm = AudioFileClip(bgm_path).volumex(0.2) # reduce volume + bgm = bgm.set_duration(video.duration) + final_audio = video.audio.volumex(0.8).audio_fadein(1).audio_fadeout(1).set_duration(video.duration) + final_audio = final_audio.set_audio(bgm) + else: + final_audio = video.audio + + final = CompositeVideoClip([video, *subtitle_clips]) + final = final.set_audio(final_audio) + + # Export final video + final.write_videofile(output_path, codec="libx264", audio_codec="aac", fps=video.fps) + return output_path + + +# Aliases for compatibility with streamlit_ui.py +generate_srt_from_text = export_srt +enhance_video_with_subtitles_and_bgm = add_subtitles_and_bgm diff --git a/default_bgm.mp3 b/default_bgm.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..f0d628ecf7a04df2e1e02d2d947f711b83b73776 --- /dev/null +++ b/default_bgm.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04217a47ea351a5332f063822e233536c99b5c99fbbac8f806e9ea2470249327 +size 160514 diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000000000000000000000000000000000000..9544d6eac8128a7f8b3bfbeb18fbdad1a2d01312 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,62 @@ + + + + + + + OSN Media Generator + + + +
+

🎮 OSN Media Generator

+

+ Welcome to the official homepage of OSN Media Generator, created by O.S.Narayana. +

+

+ This app helps you generate audio, images, and videos using AI tools like ElevenLabs, Unsplash, and Streamlit. +

+

+ 👉 Try the app now:
+ Launch OSN Media Generator +

+

+ 📄 View our Privacy Policy +

+ +
+ + diff --git a/docs/privacy.html b/docs/privacy.html new file mode 100644 index 0000000000000000000000000000000000000000..fa096b59631bc2a73d92fa6a9b298d3d4d8b9d74 --- /dev/null +++ b/docs/privacy.html @@ -0,0 +1,68 @@ + + + + + + Privacy Policy - OSN Media Generator + + + + + + +

🔐 Privacy Policy for OSN Media Generator

+ +

Effective Date: July 19, 2025

+ +

Your privacy is important to us. This app is built to prioritize user data security and transparency.

+ +

1. What We Collect

+ + + +

2. API Usage

+ + + +

3. Data Storage

+ + + +

4. Analytics & Ads

+ + + +

5. Changes to Policy

+ +

Any updates to this policy will be reflected in this file on the GitHub repository.

+ +

6. Contact

+ +

For questions or issues, contact: osnarayana@gmail.com

+ + + diff --git a/fly.toml b/fly.toml new file mode 100644 index 0000000000000000000000000000000000000000..1dae7d65e2e8284db6f8fedd22e620e85dff9984 --- /dev/null +++ b/fly.toml @@ -0,0 +1,25 @@ +app = "media-gen-api" + +[build] + image = "tiangolo/uvicorn-gunicorn-fastapi:python3.10" + +[env] + PYTHONUNBUFFERED = "1" + +[[services]] + internal_port = 80 + protocol = "tcp" + + [[services.ports]] + handlers = ["http"] + port = 80 + + [[services.ports]] + handlers = ["tls", "http"] + port = 443 + + [[services.tcp_checks]] + interval = "15s" + timeout = "2s" + grace_period = "5s" + restart_limit = 0 diff --git a/generated_audio/audio_12aa4a7f8c6b4e459e88fd7bfa8559fe.mp3 b/generated_audio/audio_12aa4a7f8c6b4e459e88fd7bfa8559fe.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..ce0ef75d2127d291ceff714436b43608f568f87d --- /dev/null +++ b/generated_audio/audio_12aa4a7f8c6b4e459e88fd7bfa8559fe.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:343053419eba6ee94bd9d7fd54631336e97b693dea9775f2d57c6f342953cefb +size 9024 diff --git a/generated_audio/audio_2084def7399c47fc81f1ea75a2bb38df.mp3 b/generated_audio/audio_2084def7399c47fc81f1ea75a2bb38df.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..ce0ef75d2127d291ceff714436b43608f568f87d --- /dev/null +++ b/generated_audio/audio_2084def7399c47fc81f1ea75a2bb38df.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:343053419eba6ee94bd9d7fd54631336e97b693dea9775f2d57c6f342953cefb +size 9024 diff --git a/generated_audio/audio_46204c7f1cdd4d21b632318c1533798f.mp3 b/generated_audio/audio_46204c7f1cdd4d21b632318c1533798f.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..48f05723639334ef7c14e31fe9cbace778e110d0 --- /dev/null +++ b/generated_audio/audio_46204c7f1cdd4d21b632318c1533798f.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a752e374c3cd105ec751eb30df12b3a0939e0500e7e2487b41e4269050e44906 +size 57216 diff --git a/generated_audio/audio_4d6439239f3b4099b1e6fde8a9331613.mp3 b/generated_audio/audio_4d6439239f3b4099b1e6fde8a9331613.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..da89f033971a907fb58469aca7fe1f34c10fb8d6 --- /dev/null +++ b/generated_audio/audio_4d6439239f3b4099b1e6fde8a9331613.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6924a2184a3a139ef4d6d9bc5e1e08d3b1b69d7e6904f8acb458d40cbc90fc73 +size 9024 diff --git a/generated_audio/audio_4e8cfaddd1e143a4a7a11a8baeafa178.mp3 b/generated_audio/audio_4e8cfaddd1e143a4a7a11a8baeafa178.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..95f40d275c0af8ca52cc60724af1c9e91eb2f521 --- /dev/null +++ b/generated_audio/audio_4e8cfaddd1e143a4a7a11a8baeafa178.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f2d93711bae321f7bfe7f698a0a5be8b0b6462c862cf196a95a621931089ba6 +size 20160 diff --git a/generated_audio/audio_5d3d0ac54e0e4b7ab91a88a5e6cb5a7a.mp3 b/generated_audio/audio_5d3d0ac54e0e4b7ab91a88a5e6cb5a7a.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..da89f033971a907fb58469aca7fe1f34c10fb8d6 --- /dev/null +++ b/generated_audio/audio_5d3d0ac54e0e4b7ab91a88a5e6cb5a7a.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6924a2184a3a139ef4d6d9bc5e1e08d3b1b69d7e6904f8acb458d40cbc90fc73 +size 9024 diff --git a/generated_audio/audio_6b4a3eaef6c745d4932063aba95541e5.mp3 b/generated_audio/audio_6b4a3eaef6c745d4932063aba95541e5.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..ce0ef75d2127d291ceff714436b43608f568f87d --- /dev/null +++ b/generated_audio/audio_6b4a3eaef6c745d4932063aba95541e5.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:343053419eba6ee94bd9d7fd54631336e97b693dea9775f2d57c6f342953cefb +size 9024 diff --git a/generated_audio/audio_772adc2bcf6a49e28757294a6157a71c.mp3 b/generated_audio/audio_772adc2bcf6a49e28757294a6157a71c.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..ad37d18f633c777abfbef9beb5dce017c43e07ac --- /dev/null +++ b/generated_audio/audio_772adc2bcf6a49e28757294a6157a71c.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7168fbdac599d9f1f20f13a88b11188cfca4bf03c6c5e83b10f0446f501a698 +size 35328 diff --git a/generated_audio/audio_785571d23e8c401886fbe9b176071973.mp3 b/generated_audio/audio_785571d23e8c401886fbe9b176071973.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..0ae0efec77ce2f5dfaecce26f19bbc45f649f700 --- /dev/null +++ b/generated_audio/audio_785571d23e8c401886fbe9b176071973.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1b9c225a86625e285d88be0b465bdb82177e21876fb7bedce63c313e018f343 +size 47808 diff --git a/generated_audio/audio_8a8be69edd134341afc83be9d200f5c2.mp3 b/generated_audio/audio_8a8be69edd134341afc83be9d200f5c2.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..ce0ef75d2127d291ceff714436b43608f568f87d --- /dev/null +++ b/generated_audio/audio_8a8be69edd134341afc83be9d200f5c2.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:343053419eba6ee94bd9d7fd54631336e97b693dea9775f2d57c6f342953cefb +size 9024 diff --git a/generated_audio/audio_8d99b8f248d549f3806c1fe705c8a24c.mp3 b/generated_audio/audio_8d99b8f248d549f3806c1fe705c8a24c.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..6aadba9d6f30fa83c00ccde092a2fbea5d06fc59 --- /dev/null +++ b/generated_audio/audio_8d99b8f248d549f3806c1fe705c8a24c.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bbd01d120150bc2de7f4f6882d9194d8cf67a267ae3505dda8ddcc9b109701c +size 20160 diff --git a/generated_audio/audio_8e0e1a375aa8415eb3be3689fdfc015e.mp3 b/generated_audio/audio_8e0e1a375aa8415eb3be3689fdfc015e.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..6703df267275aa2a7f8eac030ec49d195389d9bb --- /dev/null +++ b/generated_audio/audio_8e0e1a375aa8415eb3be3689fdfc015e.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c348b34bef828c56c5157933592943b8e23c02b00eb1f81cedde7e2b3a4dac7 +size 72384 diff --git a/generated_audio/audio_b3d3a162a45941579686e8bee223032e.mp3 b/generated_audio/audio_b3d3a162a45941579686e8bee223032e.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..da89f033971a907fb58469aca7fe1f34c10fb8d6 --- /dev/null +++ b/generated_audio/audio_b3d3a162a45941579686e8bee223032e.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6924a2184a3a139ef4d6d9bc5e1e08d3b1b69d7e6904f8acb458d40cbc90fc73 +size 9024 diff --git a/generated_audio/audio_d9f41d5ef87c4d088bd7cd230a75cd62.mp3 b/generated_audio/audio_d9f41d5ef87c4d088bd7cd230a75cd62.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..d922b694b11fe02500197db118defca46682343d --- /dev/null +++ b/generated_audio/audio_d9f41d5ef87c4d088bd7cd230a75cd62.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ba21c2e66aa0ee7d2fb7f5f7e7cc18948526f35cdcc458dc043e2ab16afef8 +size 20160 diff --git a/generated_audio/audio_db2b96e4b94e4bcf8ab796d4fe483e41.mp3 b/generated_audio/audio_db2b96e4b94e4bcf8ab796d4fe483e41.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..ad37d18f633c777abfbef9beb5dce017c43e07ac --- /dev/null +++ b/generated_audio/audio_db2b96e4b94e4bcf8ab796d4fe483e41.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7168fbdac599d9f1f20f13a88b11188cfca4bf03c6c5e83b10f0446f501a698 +size 35328 diff --git a/generated_audio/audio_ddccc5c34f7840bdb838254e343c7b14.mp3 b/generated_audio/audio_ddccc5c34f7840bdb838254e343c7b14.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..ce0ef75d2127d291ceff714436b43608f568f87d --- /dev/null +++ b/generated_audio/audio_ddccc5c34f7840bdb838254e343c7b14.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:343053419eba6ee94bd9d7fd54631336e97b693dea9775f2d57c6f342953cefb +size 9024 diff --git a/nixpacks.toml b/nixpacks.toml new file mode 100644 index 0000000000000000000000000000000000000000..19f47baac080fac238c40882a6ed5c06945fdd0a --- /dev/null +++ b/nixpacks.toml @@ -0,0 +1,19 @@ +[phases.setup] +nixpkgs = [ + "python311", + "zlib", + "libjpeg", + "gcc", + "pkg-config" +] + +[phases.build] +cmds = [ + "python -m venv /opt/venv", + ". /opt/venv/bin/activate", + "pip install --upgrade pip", + "pip install -r requirements.txt" +] + +[start] +cmd = "streamlit run app.py --server.port 8000 --server.address 0.0.0.0" diff --git a/openapi.json b/openapi.json new file mode 100644 index 0000000000000000000000000000000000000000..1a3dde5767332809c9c6894e18f24048d61f179d --- /dev/null +++ b/openapi.json @@ -0,0 +1 @@ +{"openapi":"3.1.0","info":{"title":"Media Generator API","description":"Generate audio, video, image, and PPT from text","version":"1.0.0"},"paths":{"/api/v1/audio/generate":{"post":{"tags":["Audio"],"summary":"Generate Audio Endpoint","operationId":"generate_audio_endpoint_api_v1_audio_generate_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/api/v1/video/generate":{"post":{"tags":["Video"],"summary":"Generate Video","operationId":"generate_video_api_v1_video_generate_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/VideoInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/api/v1/image/generate":{"post":{"tags":["Image"],"summary":"Generate Image","operationId":"generate_image_api_v1_image_generate_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/api/v1/ppt/generate":{"post":{"tags":["PPT"],"summary":"Generate Ppt","operationId":"generate_ppt_api_v1_ppt_generate_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/PPTInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/api/v1/metrics/evaluate/bleu":{"post":{"tags":["Metrics"],"summary":"Compute Bleu","operationId":"compute_bleu_api_v1_metrics_evaluate_bleu_post","parameters":[{"name":"reference","in":"query","required":true,"schema":{"type":"string","title":"Reference"}},{"name":"candidate","in":"query","required":true,"schema":{"type":"string","title":"Candidate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/api/v1/metrics/evaluate/clipscore":{"post":{"tags":["Metrics"],"summary":"Compute Clip Score","operationId":"compute_clip_score_api_v1_metrics_evaluate_clipscore_post","parameters":[{"name":"reference","in":"query","required":true,"schema":{"type":"string","title":"Reference"}},{"name":"candidate","in":"query","required":true,"schema":{"type":"string","title":"Candidate"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/api/v1/download/":{"get":{"tags":["Download"],"summary":"Download File","operationId":"download_file_api_v1_download__get","parameters":[{"name":"file_path","in":"query","required":true,"schema":{"type":"string","description":"Relative path from project root","title":"File Path"},"description":"Relative path from project root"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}}},"components":{"schemas":{"AudioInput":{"properties":{"text":{"type":"string","title":"Text"},"voice":{"type":"string","title":"Voice","default":"default"},"language":{"type":"string","title":"Language","default":"en"}},"type":"object","required":["text"],"title":"AudioInput"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageInput":{"properties":{"prompt":{"type":"string","title":"Prompt"},"style":{"type":"string","title":"Style","default":"default"}},"type":"object","required":["prompt"],"title":"ImageInput"},"PPTInput":{"properties":{"slides":{"items":{"$ref":"#/components/schemas/Slide"},"type":"array","title":"Slides"}},"type":"object","required":["slides"],"title":"PPTInput"},"Slide":{"properties":{"title":{"type":"string","title":"Title"},"content":{"type":"string","title":"Content"}},"type":"object","required":["title","content"],"title":"Slide"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VideoInput":{"properties":{"script":{"type":"string","title":"Script"},"duration":{"type":"integer","title":"Duration","default":10}},"type":"object","required":["script"],"title":"VideoInput"}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"}}}} \ No newline at end of file diff --git a/packages.txt b/packages.txt new file mode 100644 index 0000000000000000000000000000000000000000..90360c3dd60d6888645b5007f045b17c2243ff8b --- /dev/null +++ b/packages.txt @@ -0,0 +1,3 @@ +ffmpeg +libjpeg-dev +zlib1g-dev diff --git a/render.yaml b/render.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae415af14b4c896d0850ada23b62d078efaae8cd --- /dev/null +++ b/render.yaml @@ -0,0 +1,9 @@ +services: + - type: web + name: media-gen-api + runtime: python + buildCommand: "pip install -r requirements.txt" + startCommand: "uvicorn backend.main:app --host=0.0.0.0 --port=10000" + envVars: + - key: PORT + value: 10000 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..75999c0b3a81d094750b52c62937b126aa3ea525 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,22 @@ +streamlit +requests +pillow==9.5.0 +sentence_transformers +scikit-learn +nltk +sentry-sdk +moviepy==1.0.3 +srt +fastapi +uvicorn +gtts +pydantic +mutagen +requests +# Required moviepy dependencies +imageio +imageio-ffmpeg +decorator +tqdm +python-multipart +opencv-python-headless \ No newline at end of file diff --git a/screenshots/generated_audio_sample.png b/screenshots/generated_audio_sample.png new file mode 100644 index 0000000000000000000000000000000000000000..353e1616eb49105aa72c04693e11fb0d10b51607 --- /dev/null +++ b/screenshots/generated_audio_sample.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19179978c90042892ae84c7048cd7231fe176ac01e990a4ef2e33c2fcb687f57 +size 19265 diff --git a/screenshots/home_page.png b/screenshots/home_page.png new file mode 100644 index 0000000000000000000000000000000000000000..3b92644d37e957986967c39b52e23ed203f8e61d --- /dev/null +++ b/screenshots/home_page.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0606196db2cfd6bed82decaf7aa40e24c52add62e2863265d6f858eb295cda74 +size 75899 diff --git a/scripts/preload_content.py b/scripts/preload_content.py new file mode 100644 index 0000000000000000000000000000000000000000..a9479a75431798d1cbac02d6d9c5f263b00e909b --- /dev/null +++ b/scripts/preload_content.py @@ -0,0 +1,17 @@ +import os +import shutil +from datetime import datetime + +SOURCE_DIR = "generated" +ARCHIVE_DIR = f"archive_{datetime.now().strftime('%Y%m%d_%H%M')}" + +def archive_all(): + if not os.path.exists(SOURCE_DIR): + print("No content to archive.") + return + + shutil.copytree(SOURCE_DIR, ARCHIVE_DIR) + print(f"Archived to {ARCHIVE_DIR}") + +if __name__ == "__main__": + archive_all() diff --git a/streamlit_ui - Copy.py b/streamlit_ui - Copy.py new file mode 100644 index 0000000000000000000000000000000000000000..15a938942ba234d5704d7665c4e7765c972400dc --- /dev/null +++ b/streamlit_ui - Copy.py @@ -0,0 +1,213 @@ +# streamlit_ui.py + +import streamlit as st +import requests +import base64 +from PIL import Image +import io +import os +import tempfile + +st.set_page_config( + page_title="Prompta - Text to Media Generator", + page_icon="🎙️", + layout="wide", + initial_sidebar_state="expanded" +) +st.title("🎙️🖼️🎞️ Prompta - Text to Media Generator") + +# 🛠️ Get Token FIRST +TOKEN = st.sidebar.text_input("🔑 API Token", type="password") +HEADERS = {"Authorization": f"Bearer {TOKEN}"} if TOKEN else {} + +# ✅ Display AFTER token is typed +if TOKEN: + st.sidebar.write("Using token:", TOKEN) + st.sidebar.write("Sending headers:", HEADERS) +else: + st.sidebar.warning("⚠️ Please enter a valid API token to use the app.") + +API_BASE = "http://localhost:8000" + +#API_BASE = "https://2255d6a4793d.ngrok-free.app" + +def render_media(file_bytes, media_type, caption): + b64 = base64.b64encode(file_bytes).decode() + if media_type == "audio": + st.audio(f"data:audio/wav;base64,{b64}", format="audio/wav") + elif media_type == "video": + st.video(f"data:video/mp4;base64,{b64}") + elif media_type == "image": + try: + # Validate if it's a valid image + img = Image.open(io.BytesIO(file_bytes)) + st.image(img, caption=caption) + except Exception as e: + st.warning("⚠️ Cannot render image. It may be corrupt or empty.") + st.code(str(e)) + +# Sidebar inputs +st.sidebar.header("🛠️ Settings") +#TOKEN = st.sidebar.text_input("🔑 API Token", type="password") +#HEADERS = {"Authorization": f"Bearer {TOKEN}"} if TOKEN else {} + +voice = st.selectbox("Choose voice", ["en", "hi", "te", "ta"]) +voice_map = { + "en": "en-US", + "hi": "hi-IN", + "te": "te-IN", + "ta": "ta-IN" +} + +tab = st.sidebar.radio("Select Task", ["Text to Audio", "Text to Image", "Text to Video"]) + +if tab == "Text to Audio": + st.subheader("🎤 Text to Audio") + text = st.text_area("Enter text") + voice = st.selectbox("Choose language", ["English", "Hindi", "Telugu", "Tamil"]) + voice_map = { + "English": ("en-US", "en"), + "Hindi": ("hi-IN", "hi"), + "Telugu": ("te-IN", "te"), + "Tamil": ("ta-IN", "ta") + } + voice_code, lang_code = voice_map[voice] + + if st.button("🔊 Generate Audio"): + with st.spinner("Generating audio..."): + r = requests.post( + f"{API_BASE}/api/v1/audio/generate", + json={ + "text": text, + "voice": voice_code, + "language": lang_code + }, + headers=HEADERS + ) + if r.status_code == 200: + try: + data = r.json() + st.code(data, language="json") # Debug: show full JSON response in UI + + if "download_url" in data: + download_url = f"{API_BASE}{data['download_url']}" + audio_resp = requests.get(download_url, headers=HEADERS) + if audio_resp.status_code == 200: + render_media(audio_resp.content, "audio", "Generated Audio") + else: + st.error("❌ Failed to download audio file.") + else: + st.error("❌ `download_url` not found in API response.") + st.code(data) + except Exception as e: + st.error("❌ Failed to parse API response.") + st.code(r.text) + st.exception(e) + else: + st.error(f"❌ Failed: {r.json().get('detail')}") + + +elif tab == "Text to Image": + st.subheader("🖼️ Text to Image") + prompt = st.text_area("Enter image prompt") + style = st.selectbox("Choose Style", ["sdxl", "deepfloyd", "kandinsky"]) + + if st.button("🧠 Generate Image"): + with st.spinner("Generating image..."): + r = requests.post( + f"{API_BASE}/api/v1/image/generate", + json={"prompt": prompt, "style": style}, # ✅ correct key + headers=HEADERS + ) + if r.status_code == 200: + try: + res_json = r.json() + download_url = res_json.get("download_url") + if not download_url: + st.error("No download URL returned.") + else: + download_full_url = f"{API_BASE}{download_url}" + image_response = requests.get(download_full_url, headers={"accept": "image/png"}, allow_redirects=True) + if image_response.status_code != 200: + st.error("❌ Failed to download image.") + st.code(image_response.text) + st.write("Status:", image_response.status_code) + st.write("Headers:", image_response.headers) + st.write(image_response.status_code, image_response.headers) + render_media(image_response.content, "image", "Generated Image") + except Exception as e: + st.error(f"⚠️ Failed to fetch/display image: {str(e)}") + st.code(r.text) + else: + try: + detail = r.json().get("detail") + except Exception: + detail = r.text # fallback to raw response text (may be empty or HTML) + + st.error(f"❌ Failed: {detail}") + +elif tab == "Text to Video": + st.subheader("🎞️ Text to Video") + prompt = st.text_area("Enter video prompt") + tone = st.selectbox("Tone", ["formal", "casual", "emotional", "documentary"]) + domain = st.selectbox("Domain", ["health", "education", "governance", "entertainment"]) + environment = st.selectbox("Environment", ["urban", "rural", "nature", "futuristic"]) + + transcript = st.text_area("Transcript (optional - for subtitles)", height=100) + enhance = st.checkbox("✨ Add Subtitles and Background Music") + + if st.button("🎬 Generate Video"): + with st.spinner("Generating video..."): + r = requests.post( + f"{API_BASE}/api/v1/video/generate", + json={"prompt": prompt, "tone": tone, "domain": domain, "environment": environment}, + headers=HEADERS + ) + if r.status_code == 200: + try: + data = r.json() + st.code(data, language="json") + + download_url = data.get("download_url") + if not download_url: + st.error("⚠️ No download URL received.") + else: + full_video_url = f"{API_BASE}{download_url}" + video_response = requests.get(full_video_url, headers=HEADERS) + if video_response.status_code == 200: + video_bytes = video_response.content + st.write("📦 Video size (bytes):", len(video_bytes)) + + if enhance and transcript: + with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_vid: + tmp_vid.write(video_bytes) + tmp_vid_path = tmp_vid.name + + srt_path = generate_srt_from_text(transcript, output_path="streamlit_subs.srt") + enhanced_path = "streamlit_final_video.mp4" + enhance_video_with_subtitles_and_bgm( + video_path=tmp_vid_path, + srt_path=srt_path, + bgm_path="default_bgm.mp3", + output_path=enhanced_path + ) + + with open(enhanced_path, "rb") as f: + render_media(f.read(), "video", "Enhanced Video") + else: + st.video(video_bytes) + else: + st.error("❌ Failed to download video.") + except Exception as e: + st.error("❌ Error parsing response or rendering video.") + st.code(r.text) + st.exception(e) + else: + try: + st.error(f"❌ Failed: {r.json().get('detail')}") + except: + st.error(f"❌ Failed: {r.text}") + + +st.sidebar.markdown("---") +st.sidebar.info("Built with ❤️ for AI GovTech Challenge 2025") diff --git a/streamlit_ui.py b/streamlit_ui.py new file mode 100644 index 0000000000000000000000000000000000000000..53e9bc06fcbc4eea48f85b412d6e0b62c7460bf5 --- /dev/null +++ b/streamlit_ui.py @@ -0,0 +1,132 @@ +# streamlit_ui.py +import streamlit as st +import requests +import base64 +import io + +st.set_page_config( + page_title="Prompta - Text to Media Generator", + page_icon="🎙️", + layout="wide", + initial_sidebar_state="expanded" +) +st.title("🎙️🖼️🎞️ Prompta - Text to Media Generator") + +# 🛠️ Get Token FIRST +TOKEN = st.sidebar.text_input("🔑 API Token", type="password") +HEADERS = {"Authorization": f"Bearer {TOKEN}"} if TOKEN else {} + +# ✅ Display AFTER token is typed +if TOKEN: + st.sidebar.write("Using token:", TOKEN) +else: + st.sidebar.warning("⚠️ Please enter a valid API token to use the app.") + +API_BASE = "http://localhost:8000" + +# ================================================== +# Unified media rendering +# ================================================== +def render_media(response, label): + content_type = response.headers.get("Content-Type", "") + file_bytes = response.content + + if "audio" in content_type: + st.audio(file_bytes, format=content_type) + elif "video" in content_type: + st.video(file_bytes) + elif "image" in content_type: + st.image(file_bytes, caption=label, use_container_width=True) + else: + try: + # JSON fallback (video download_url case) + data = response.json() + if "download_url" in data: + video_url = f"{API_BASE}{data['download_url']}" + st.info("📥 Downloading video from URL...") + video_resp = requests.get(video_url, headers=HEADERS) + if video_resp.status_code == 200: + st.video(video_resp.content) + else: + st.error(f"❌ Failed to download video from {video_url}") + else: + st.warning("⚠️ Unsupported media format or empty response.") + except Exception: + st.warning("⚠️ Unsupported media format or empty response.") + +# ================================================== +# Sidebar Inputs +# ================================================== +st.sidebar.header("🛠️ Settings") + +tab = st.sidebar.radio("Select Task", ["Text to Audio", "Text to Image", "Text to Video"]) + +# ================================================== +# Text to Audio +# ================================================== +if tab == "Text to Audio": + st.subheader("🎤 Text to Audio") + text = st.text_area("Enter text") + voice = st.selectbox("Choose voice/language", ["en-US", "hi-IN", "te-IN", "ta-IN"]) + + if st.button("🔊 Generate Audio"): + with st.spinner("Generating audio..."): + r = requests.post( + f"{API_BASE}/api/v1/audio/generate", + json={"text": text, "voice": voice}, + headers=HEADERS + ) + if r.status_code == 200: + render_media(r, "Generated Audio") + else: + st.error(f"❌ Failed: {r.json().get('detail', r.text)}") + +# ================================================== +# Text to Image +# ================================================== +elif tab == "Text to Image": + st.subheader("🖼️ Text to Image") + prompt = st.text_area("Enter image prompt") + style = st.selectbox("Choose style", ["nature", "technology", "urban", "abstract"]) + + if st.button("🧠 Generate Image"): + with st.spinner("Generating image from Unsplash..."): + r = requests.post( + f"{API_BASE}/api/v1/image/generate", + json={"prompt": prompt, "style": style}, + headers=HEADERS + ) + if r.status_code == 200: + render_media(r, "Generated Image") + else: + try: + err = r.json().get('detail', 'Unknown error') + except Exception: + err = r.text + st.error(f"❌ Failed to fetch/display image: {err}") + +# ================================================== +# Text to Video +# ================================================== +elif tab == "Text to Video": + st.subheader("🎞️ Text to Video") + prompt = st.text_area("Enter video prompt") + tone = st.selectbox("Tone", ["formal", "casual", "emotional", "documentary"]) + domain = st.selectbox("Domain", ["health", "education", "governance", "entertainment"]) + environment = st.selectbox("Environment", ["urban", "rural", "nature", "futuristic"]) + + if st.button("🎬 Generate Video"): + with st.spinner("Generating video..."): + r = requests.post( + f"{API_BASE}/api/v1/video/generate", + json={"prompt": prompt, "tone": tone, "domain": domain, "environment": environment}, + headers=HEADERS + ) + if r.status_code == 200: + render_media(r, "Generated Video") + else: + st.error(f"❌ Failed: {r.json().get('detail', r.text)}") + + +st.sidebar.markdown("---") +st.sidebar.info("Built with ❤️ for AI GovTech Challenge 2025") diff --git a/video_enhancer.py b/video_enhancer.py new file mode 100644 index 0000000000000000000000000000000000000000..3d25399a33e825ec5b6e9423c5ea4678803c4f5f --- /dev/null +++ b/video_enhancer.py @@ -0,0 +1,77 @@ +from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip, AudioFileClip, concatenate_audioclips +import os +from typing import List, Tuple + + +def export_srt(transcript: List[str], duration: float, output_path: str): + """ + Exports transcript as a .srt subtitle file assuming equal spacing. + """ + lines = [] + segment_duration = duration / len(transcript) + + for idx, line in enumerate(transcript): + start_time = segment_duration * idx + end_time = segment_duration * (idx + 1) + + def format_time(t): + h = int(t // 3600) + m = int((t % 3600) // 60) + s = int(t % 60) + ms = int((t % 1) * 1000) + return f"{h:02}:{m:02}:{s:02},{ms:03}" + + lines.append(f"{idx+1}") + lines.append(f"{format_time(start_time)} --> {format_time(end_time)}") + lines.append(line.strip()) + lines.append("") # Empty line for spacing + + with open(output_path, "w", encoding="utf-8") as f: + f.write("\n".join(lines)) + + +def add_subtitles_and_bgm( + video_path: str, + transcript: List[str], + output_path: str = "final_output.mp4", + bgm_path: str = None, + subtitle_font: str = "Arial", + subtitle_size: int = 24, + subtitle_color: str = "white", + subtitle_position: Tuple[int, int] = ("center", "bottom") +): + """ + Adds subtitles and optional background music to a video. + """ + + clip = VideoFileClip(video_path) + duration = clip.duration + segment_duration = duration / len(transcript) + subtitle_clips = [] + + for i, line in enumerate(transcript): + txt = TextClip( + line, + fontsize=subtitle_size, + font=subtitle_font, + color=subtitle_color, + method='caption', + size=(clip.w * 0.8, None) # 80% width + ).set_position(subtitle_position).set_duration(segment_duration).set_start(i * segment_duration) + + subtitle_clips.append(txt) + + final_video = CompositeVideoClip([clip, *subtitle_clips]) + + if bgm_path and os.path.exists(bgm_path): + bgm = AudioFileClip(bgm_path).volumex(0.2).set_duration(duration) + original_audio = clip.audio + if original_audio: + mixed_audio = original_audio.volumex(1.0).audio_fadein(0.5).audio_fadeout(0.5) + final_audio = CompositeVideoClip([mixed_audio.set_start(0), bgm.set_start(0)]).audio + else: + final_audio = bgm + final_video = final_video.set_audio(final_audio) + + final_video.write_videofile(output_path, codec="libx264", audio_codec="aac") +