Spaces:

osnarayana
/

media-gen-api

Sleeping

App Files Files Community

osnarayana commited on Jul 31

Commit

8234bbd

2 Parent(s): b7f91f4 9bf9f75

Resolved merge conflict in README.md

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.devcontainer/devcontainer.json +33 -0
.gitignore +44 -0
.streamlit/config.toml +6 -0
.vs/VSWorkspaceState.json +6 -0
.vs/ai_gov_comm/FileContentIndex/09e5affd-6ff5-45b8-b6bb-c44b5b94954f.vsidx +0 -0
.vs/ai_gov_comm/v17/.wsuo +0 -0
.vs/ai_gov_comm/v17/DocumentLayout.json +12 -0
ArchitectureDiagram.png +3 -0
Demo/02_Tech_Deck.pdf +3 -0
Demo/03_Cost_Sheet.xlsx +0 -0
Demo/04_Team_Profile.pdf +3 -0
Demo/05-Roadmap.pdf +3 -0
Demo/06-End Slide.pdf +3 -0
Demo/old-02_Tech_Deck.pdf +3 -0
Dockerfile +20 -0
PRIVACY_POLICY.md +38 -0
Procfile +1 -0
Project_structure.docx +0 -0
README.md +96 -0
Spacefile +5 -0
app/__init__.py +0 -0
app/api/__init__.py +0 -0
app/api/v1/__init__.py +0 -0
app/api/v1/audio.py +34 -0
app/api/v1/download.py +32 -0
app/api/v1/image.py +41 -0
app/api/v1/metrics.py +24 -0
app/api/v1/ppt.py +24 -0
app/api/v1/utils.py +9 -0
app/api/v1/video.py +45 -0
app/auth/auth.py +25 -0
app/core/config.py +6 -0
app/db.py +7 -0
app/main.py +39 -0
app/models.py +14 -0
app/services/audio_service.py +39 -0
app/services/image_service.py +42 -0
app/services/ppt_service.py +34 -0
app/services/video_service - Copy.py +35 -0
app/services/video_service.py +94 -0
assets/default.jpg +0 -0
assets/logo_watermark.png +3 -0
backend/media_gen.py +187 -0
backend/subtitle_utils.py +84 -0
default_bgm.mp3 +3 -0
docs/index.html +62 -0
docs/privacy.html +68 -0
fly.toml +25 -0
generated_audio/audio_12aa4a7f8c6b4e459e88fd7bfa8559fe.mp3 +3 -0
generated_audio/audio_2084def7399c47fc81f1ea75a2bb38df.mp3 +3 -0

.devcontainer/devcontainer.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "name": "Python 3",
+  // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
+  "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
+  "customizations": {
+    "codespaces": {
+      "openFiles": [
+        "README.md",
+        "app.py"
+      ]
+    },
+    "vscode": {
+      "settings": {},
+      "extensions": [
+        "ms-python.python",
+        "ms-python.vscode-pylance"
+      ]
+    }
+  },
+  "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
+  "postAttachCommand": {
+    "server": "streamlit run app.py --server.enableCORS false --server.enableXsrfProtection false"
+  },
+  "portsAttributes": {
+    "8501": {
+      "label": "Application",
+      "onAutoForward": "openPreview"
+    }
+  },
+  "forwardPorts": [
+    8501
+  ]
+}

.gitignore ADDED Viewed

	@@ -0,0 +1,44 @@

+# Python
+*.pyc
+__pycache__/
+*.pyo
+*.pyd
+*.log
+# Virtual env
+.venv/
+venv/
+ENV/
+env/
+# Environment secrets
+.env
+# Streamlit cache
+.streamlit/
+.metadata/
+# Pytest cache
+.pytest_cache/
+# VS Code / PyCharm / IDE files
+.vscode/
+.idea/
+.project
+.pydevproject
+# RStudio / Mac / Windows artifacts
+.Rhistory
+.DS_Store
+Thumbs.db
+# Output folders
+outputs/
+logs/
+__pycache__/
+outputs/
+generated/
+*.pyc
+*.log

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,6 @@

+[theme]
+primaryColor = "#4CAF50"
+backgroundColor = "#F5F5F5"
+secondaryBackgroundColor = "#E0E0E0"
+textColor = "#262730"
+font = "sans serif"

.vs/VSWorkspaceState.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "ExpandedNodes": [
+    ""
+  ],
+  "PreviewInSolutionExplorer": false
+}

.vs/ai_gov_comm/FileContentIndex/09e5affd-6ff5-45b8-b6bb-c44b5b94954f.vsidx ADDED Viewed

Binary file (84.3 kB). View file

.vs/ai_gov_comm/v17/.wsuo ADDED Viewed

Binary file (8.19 kB). View file

.vs/ai_gov_comm/v17/DocumentLayout.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "Version": 1,
+  "WorkspaceRootPath": "C:\\Users\\fimba\\OneDrive\\Desktop\\PythonProjects\\ai_gov_comm\\",
+  "Documents": [],
+  "DocumentGroupContainers": [
+    {
+      "Orientation": 0,
+      "VerticalTabListWidth": 256,
+      "DocumentGroups": []
+    }
+  ]
+}

ArchitectureDiagram.png ADDED Viewed

Git LFS Details

SHA256: 939a8dca28847fc7ed65376c5f54f1badac80b13fe3be7685afafb507587f7d9
Pointer size: 132 Bytes
Size of remote file: 1.65 MB

Demo/02_Tech_Deck.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b45c266eb01b16301621ee2e31ea8282cd1d72fff4bf959522b0b302f5b7d0e
+size 10621

Demo/03_Cost_Sheet.xlsx ADDED Viewed

Binary file (5.63 kB). View file

Demo/04_Team_Profile.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e71ceecc9d5d85324fd31ed8b268e222977f8dfc1455cb44e260b2aa74ad5de0
+size 73529

Demo/05-Roadmap.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea63aed5200371c1b3bc42cf06f9e4f1d3cded3afe53f27f5789de4cbccc352d
+size 83787

Demo/06-End Slide.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eaab3d85667480068276f4c7c45db0ce1a31c7b26227ac904eb5b8ad4e6cf0ca
+size 3922333

Demo/old-02_Tech_Deck.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8887e7d3825f880a83d1d4c94d1d9dbc3dbe3db290b6d4ee03c14a918b07bd45
+size 1730

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+# Use lightweight Python
+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Copy requirements first for caching
+COPY requirements.txt .
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy project files
+COPY . .
+# Expose FastAPI port
+EXPOSE 8000
+# Run FastAPI with uvicorn
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

PRIVACY_POLICY.md ADDED Viewed

	@@ -0,0 +1,38 @@

+---
+### ✅ `PRIVACY_POLICY.md`
+```markdown
+# 🔐 Privacy Policy for OSNarayana Media Generator
+Effective Date: July 19, 2025
+Your privacy is important to us. This app is built to prioritize user data security and transparency.
+## 1. What We Collect
+- **No personal data** is collected, stored, or shared by this application.
+- All prompts, audio, and media are **processed locally** or via **user-provided API keys** to external services (e.g., ElevenLabs, Unsplash).
+## 2. API Usage
+- If you use external services (like ElevenLabs or Unsplash), you are subject to their respective [Terms of Service](https://www.elevenlabs.io/terms) and [Privacy Policies](https://www.elevenlabs.io/privacy).
+- Your API keys are stored **locally** in your environment file (`.env`) and never uploaded.
+## 3. Data Storage
+- Generated images, audio, and videos are stored **only on your local machine** under the `outputs/` directory.
+- You can delete any generated content at your discretion.
+## 4. Analytics & Ads
+- This app contains **no ads**, **no tracking**, and **no analytics**.
+## 5. Changes to Policy
+Any updates to this policy will be reflected in this file on the [GitHub repository](https://github.com/your-username/osnarayana-media-generator).
+## 6. Contact
+For questions or issues, contact: **[email protected]**

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: uvicorn app.main:app --host 0.0.0.0 --port 10000

Project_structure.docx ADDED Viewed

Binary file (14 kB). View file

README.md CHANGED Viewed

@@ -9,3 +9,99 @@ short_description: FastAPI backend for Text-to-Audio, Image, and Video generato
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+=======
+# 🎙️ Media Generation API
+A FastAPI-based backend to generate audio, images, video, and PPT from user inputs.
+Supports BLEU/CLIP metrics, token-based authentication, and stores metadata in SQLite/Postgres.
+A modular, RESTful FastAPI solution that converts text input into:
+- 🎥 Video
+- 🖼️ Image/Graphics
+- 🔊 Audio
+---
+## 🚀 Features
+- Text → Video: Tone, domain, and environment-aware video generation.
+- Text → Audio: Context-aware voice synthesis with emotional tone and language support.
+- Text → Graphics: Visual generation using parameter-based prompts.
+- BLEU/CLIP metrics for prompt-output fidelity.
+- Token-based authentication for secure API use.
+- Dockerized for easy deployment
+- Optional Streamlit/React UI
+- Swagger UI: `http://localhost:8000/docs`
+---
+### 📁 Project Structure
+media-gen-api/
+├── app/
+│   ├── api/v1/               # Versioned API endpoints
+│   ├── auth/                 # Token-based auth
+│   ├── services/             # Core media generation logic
+│   └── main.py               # FastAPI entry point
+├── tests/                    # Unit/integration tests
+├── requirements.txt
+└── README.md
+---
+## 📦 Installation
+🚀 Run Locally
+1. Clone repo & create virtual environment
+git clone https://github.com/yourorg/media-gen-api.git
+cd media-gen-api
+python -m venv .venv
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+2. Install dependencies
+pip install -r requirements.txt
+3. Run the API
+uvicorn app.main:app --reload
+Access docs: http://127.0.0.1:8000/docs
+---
+### 🔐 Authentication
+Use Bearer <your_token> in the Authorize button or headers.
+---
+### 📡 API Endpoints Summary
+| Endpoint                  | Method | Description               |
+|--------------------------|--------|---------------------------|
+| /api/v1/audio/generate   | POST   | Generate audio from text |
+| /api/v1/image/generate   | POST   | Generate image from text |
+| /api/v1/video/generate   | POST   | Generate video from text |
+| /api/v1/download         | GET    | Download generated file  |
+---
+###📦 Deployment (Streamlit/Optional UI)
+Option 1: Run with Streamlit (for demo)
+streamlit run streamlit_ui.py
+Option 2: Docker (Production-ready)
+docker build -t media-gen-api .
+docker run -p 8000:8000 media-gen-api
+---
+### 📊 Metrics Logging (Optional)
+- BLEU score and CLIPScore (WIP)
+- Latency, GPU/CPU tracking
+- Log file: logs/generation.log
+---
+#### 📋 Submission Checklist
+- ✅ RESTful modular architecture
+- ✅ Multi-format (MP4, PNG, WAV)
+- ✅ Token Auth + Swagger UI
+- ✅ Compatible with DD/PIB via API
+- ✅ Streamlit demo app (optional)

Spacefile ADDED Viewed

	@@ -0,0 +1,5 @@

+[build]
+builder = "heroku/buildpacks:20"
+[run]
+command = "uvicorn main:app --host 0.0.0.0 --port $PORT"

app/__init__.py ADDED Viewed

File without changes

app/api/__init__.py ADDED Viewed

File without changes

app/api/v1/__init__.py ADDED Viewed

File without changes

app/api/v1/audio.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from fastapi import APIRouter, HTTPException, Body
+from fastapi.responses import Response  # ✅ add this
+from pydantic import BaseModel
+from gtts import gTTS
+import uuid
+import os
+router = APIRouter()
+class AudioRequest(BaseModel):
+    text: str
+    voice: str = "default"
+    language: str = "en"
+@router.post("/generate")
+def generate_audio_endpoint(payload: AudioRequest):
+    try:
+        # ✅ Save inside generated/audio for consistency
+        filename = f"audio_{uuid.uuid4().hex}.mp3"
+        folder = "generated/audio"
+        os.makedirs("generated_audio", exist_ok=True)
+        file_path = f"generated_audio/{filename}" # ✅ match your video & image folders
+        # ✅ Generate TTS audio
+        tts = gTTS(text=payload.text, lang=payload.language)
+        tts.save(file_path)
+        # ✅ Return audio bytes for inline Streamlit playback
+        with open(file_path, "rb") as f:
+            audio_bytes = f.read()
+        return Response(content=audio_bytes, media_type="audio/mpeg")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

app/api/v1/download.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from fastapi import APIRouter, HTTPException, Query
+from fastapi.responses import FileResponse
+import os
+router = APIRouter()
+@router.get("/")
+def download_file(file_path: str = Query(..., description="Relative path from project root")):
+    print(f"🔍 Requested file path: {file_path}")
+    # Sanitize and resolve absolute path
+    full_path = os.path.abspath(file_path)
+    # Ensure file is inside your allowed folder (to prevent directory traversal)
+    allowed_root = os.path.abspath("generated")
+    if not full_path.startswith(allowed_root):
+        raise HTTPException(status_code=400, detail="Invalid file path")
+    print(f"📂 Resolved full path: {full_path}")
+    if not os.path.isfile(full_path):
+        print("❌ File not found.")
+        raise HTTPException(status_code=404, detail="File not found")
+    # Set correct media type dynamically (you can refine this later)
+    media_type = "audio/mpeg" if full_path.endswith(".mp3") else "image/png"
+    return FileResponse(
+        full_path,
+        media_type=media_type,
+        filename=os.path.basename(full_path)
+    )

app/api/v1/image.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from fastapi import APIRouter, HTTPException, Depends, Body
+from fastapi.responses import Response
+from pydantic import BaseModel
+from app.auth.auth import verify_token
+import requests
+import os
+from pydantic import BaseModel
+from dotenv import load_dotenv
+load_dotenv()
+# ✅ Define router
+router = APIRouter()
+# ✅ Define Request schema
+class ImageRequest(BaseModel):
+    prompt: str
+    style: str = "default"
+UNSPLASH_ACCESS_KEY = os.getenv("UNSPLASH_ACCESS_KEY")  # store this in .env
+print(f"unsplash key is: {UNSPLASH_ACCESS_KEY}")
+# ✅ Endpoint
+@router.post("/generate")
+def generate_image_file_endpoint(
+    data: ImageRequest = Body(...),
+    token: str = Depends(verify_token)
+):
+    query = f"{data.prompt} {data.style}"
+    url = f"https://api.unsplash.com/photos/random?query={query}&client_id={UNSPLASH_ACCESS_KEY}&orientation=landscape"
+    try:
+        r = requests.get(url)
+        r.raise_for_status()
+        image_url = r.json()["urls"]["regular"]
+        img_data = requests.get(image_url).content
+        return Response(content=img_data, media_type="image/jpeg")
+    except Exception as e:
+        print(f"❌ Image fetch failed: {str(e)}")
+        raise HTTPException(status_code=500, detail="Image generation failed.")

app/api/v1/metrics.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# app/api/v1/metrics.py
+from fastapi import APIRouter, HTTPException, Query, Depends, Request
+from typing import List
+from sklearn.metrics import accuracy_score
+from nltk.translate.bleu_score import sentence_bleu
+from sentence_transformers import SentenceTransformer, util
+from app.auth.auth import verify_token
+router = APIRouter()
+model = SentenceTransformer("clip-ViT-B-32")  # for CLIP-like semantic score
+@router.post("/evaluate/bleu", dependencies=[Depends(verify_token)])
+def compute_bleu(reference: str, candidate: str):
+    ref_tokens = [reference.split()]
+    cand_tokens = candidate.split()
+    score = sentence_bleu(ref_tokens, cand_tokens)
+    return {"metric": "BLEU", "score": score}
+@router.post("/evaluate/clipscore")
+def compute_clip_score(reference: str, candidate: str):
+    ref_emb = model.encode(reference, convert_to_tensor=True)
+    cand_emb = model.encode(candidate, convert_to_tensor=True)
+    score = util.cos_sim(ref_emb, cand_emb).item()
+    return {"metric": "CLIPScore", "score": score}

app/api/v1/ppt.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# app/api/v1/ppt.py
+from fastapi import APIRouter, HTTPException, Query, Depends, Request
+from pydantic import BaseModel
+from typing import List
+from app.services.ppt_service import generate_ppt_file
+from app.auth.auth import verify_token
+router = APIRouter()
+class Slide(BaseModel):
+    title: str
+    content: str
+class PPTInput(BaseModel):
+    slides: List[Slide]
+@router.post("/generate")
+def generate_ppt(payload: PPTInput):
+    filename = generate_ppt_file([slide.dict() for slide in payload.slides])
+    return {
+        "message": "PPT generated successfully",
+        "filename": filename,
+        "download_url": f"/api/v1/download?file_path=generated/ppt/{filename}"
+    }

app/api/v1/utils.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# app/api/v1/utils.py
+from fastapi.responses import FileResponse
+def download_file(file_path: str):
+    return FileResponse(
+        path=file_path,
+        filename=file_path.split("/")[-1],
+        media_type="application/octet-stream"
+    )

app/api/v1/video.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# app/api/v1/video.py
+from fastapi import APIRouter, HTTPException, Depends, Body
+from fastapi.responses import FileResponse
+from pydantic import BaseModel
+from app.services.video_service import generate_video_file
+from app.auth.auth import verify_token
+import os
+from typing import Optional
+# ✅ Define router FIRST
+router = APIRouter()
+class VideoInput(BaseModel):
+    prompt: str
+    tone: str
+    domain: str
+    environment: str
+    transcript: Optional[str] = None
+@router.post("/generate")
+def generate_video_endpoint(
+    payload: VideoInput = Body(...),
+    token: str = Depends(verify_token)
+):
+    try:
+        # Generate video file
+        filename = generate_video_file(
+            script=payload.prompt,
+            duration=10  # Optional: could be dynamic
+        )
+        video_path = os.path.join("generated/video", filename)
+        if not os.path.exists(video_path):
+            raise HTTPException(status_code=500, detail="Video not found")
+        # ✅ Return the actual file for Streamlit to play
+        return FileResponse(
+            video_path,
+            media_type="video/mp4",
+            filename=filename
+        )
+    except Exception as e:
+        print("❌ Video generation error:", str(e))
+        raise HTTPException(status_code=500, detail=str(e))

app/auth/auth.py ADDED Viewed

	@@ -0,0 +1,25 @@

+#from fastapi import Depends, HTTPException, status
+from fastapi import HTTPException, Security
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from starlette.status import HTTP_403_FORBIDDEN
+from fastapi import Security
+from fastapi.security import HTTPBearer
+bearer_scheme = HTTPBearer()
+#security = HTTPBearer()
+from fastapi import Header, HTTPException, Depends
+VALID_TOKENS = ["my_secure_token_123"]  # or load from file/db/env
+def verify_token(credentials: HTTPAuthorizationCredentials = Security(bearer_scheme)):
+    token = credentials.credentials
+    # Replace with your actual logic (static check shown here)
+    if token != "my_secure_token_123":
+        raise HTTPException(
+            status_code=HTTP_403_FORBIDDEN,
+            detail="Invalid or expired token"
+        )

app/core/config.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import os
+class Settings:
+    API_KEY: str = os.getenv("API_KEY", "dummy-api-key")
+settings = Settings()

app/db.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+DATABASE_URL = "sqlite:///./media_gen.db"
+engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
+SessionLocal = sessionmaker(bind=engine, autoflush=False)

app/main.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# app/main.py
+from fastapi import FastAPI, Depends
+from fastapi.security import HTTPBearer
+from fastapi import Security
+from fastapi import FastAPI
+from app.api.v1.audio import router as audio_router
+from app.api.v1.video import router as video_router
+from app.api.v1.image import router as image_router
+from app.api.v1.ppt import router as ppt_router
+from app.api.v1.metrics import router as metrics_router
+from app.api.v1.download import router as download_router
+from fastapi import Security
+from app.auth.auth import verify_token
+bearer_scheme = HTTPBearer()
+app = FastAPI(
+    title="Media Generation API",
+    description="Generate audio, video, image, and PPT content via secure endpoints.",
+    version="1.0.0"
+)
+# Root for health check
+@app.get("/")
+def root():
+    return {"message": "FastAPI running successfully!"}
+# Registering route modules
+app.include_router(audio_router, prefix="/api/v1/audio", tags=["Audio"], dependencies=[Depends(verify_token)])
+app.include_router(video_router, prefix="/api/v1/video", tags=["Video"], dependencies=[Depends(verify_token)])
+app.include_router(image_router, prefix="/api/v1/image", tags=["Image"], dependencies=[Depends(verify_token)])
+app.include_router(ppt_router, prefix="/api/v1/ppt", tags=["PPT"], dependencies=[Depends(verify_token)])
+app.include_router(metrics_router, prefix="/api/v1/metrics", tags=["Metrics"], dependencies=[Depends(verify_token)])
+app.include_router(download_router, prefix="/api/v1/download", tags=["Download"])

app/models.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from sqlalchemy import Column, Integer, String, DateTime
+from sqlalchemy.ext.declarative import declarative_base
+from datetime import datetime
+Base = declarative_base()
+class MediaGeneration(Base):
+    __tablename__ = "media_generations"
+    id = Column(Integer, primary_key=True, index=True)
+    media_type = Column(String)
+    prompt = Column(String)
+    file_path = Column(String)
+    timestamp = Column(DateTime, default=datetime.utcnow)

app/services/audio_service.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# app/services/audio_service.py
+from gtts import gTTS
+import os
+from datetime import datetime
+from app.db import SessionLocal
+from app.models import MediaGeneration
+import logging
+logger = logging.getLogger(__name__)
+import uuid
+def generate_audio_file(text: str, voice: str = "default", language: str = "en") -> str:
+    try:
+        tts = gTTS(text=text, lang=language, slow=False)
+        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+        filename = f"audio_{timestamp}.mp3"
+        output_dir = "generated/audio"
+        os.makedirs(output_dir, exist_ok=True)
+        file_path = os.path.join(output_dir, filename)
+        tts.save(file_path)
+        logger.info(f"Generated Audio: {filename}")
+        return file_path
+    except:
+        logger.error(f"Audio Generation Failed: {str(e)}")
+        raise
+from app.db import SessionLocal
+from app.models import MediaGeneration
+def save_metadata(media_type, prompt, file_path):
+    db = SessionLocal()
+    record = MediaGeneration(
+        media_type=media_type,
+        prompt=prompt,
+        file_path=file_path,
+    )
+    db.add(record)
+    db.commit()
+    db.close()

app/services/image_service.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# app/services/image_service.py
+import os
+from datetime import datetime
+from app.db import SessionLocal
+from app.models import MediaGeneration
+import logging
+logger = logging.getLogger(__name__)
+def generate_image_file(prompt: str, style: str = "default") -> str:
+    try:
+        # Simulate saving a generated image file
+        filename = f"image_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
+        folder = "generated/image"
+        os.makedirs(folder, exist_ok=True)
+        # Placeholder: Simulate image generation by writing prompt text to a file
+        with open(os.path.join(folder, filename), "w") as f:
+            f.write(f"Prompt: {prompt}\nStyle: {style}")
+        logger.info(f"Generated Image: {filename}")
+        if os.path.isfile(output_path):
+            print(f"✅ Image created: {output_path}, size = {os.path.getsize(output_path)} bytes")
+        else:
+            print(f"❌ Image file not found at: {output_path}")
+        return filename
+    except:
+        logger.error(f"Image Geneartion failed: {str(e)}")
+        raise
+from app.db import SessionLocal
+from app.models import MediaGeneration
+def save_metadata(media_type, prompt, file_path):
+    db = SessionLocal()
+    record = MediaGeneration(
+        media_type=media_type,
+        prompt=prompt,
+        file_path=file_path,
+    )
+    db.add(record)
+    db.commit()
+    db.close()

app/services/ppt_service.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# app/services/ppt_service.py
+import os
+from datetime import datetime
+from app.db import SessionLocal
+from app.models import MediaGeneration
+import logging
+logger = logging.getLogger(__name__)
+def generate_ppt_file(slides: list[dict]) -> str:
+    try:
+        filename = f"ppt_{datetime.now().strftime('%Y%m%d_%H%M%S')}.ppt"
+        folder = "generated/ppt"
+        os.makedirs(folder, exist_ok=True)
+        with open(os.path.join(folder, filename), "w") as f:
+            for i, slide in enumerate(slides, 1):
+                f.write(f"Slide {i}:\nTitle: {slide['title']}\nContent: {slide['content']}\n\n")
+        logger.info(f"Generated PPT: {filename}")
+        return filename
+    except:
+        logger.error(f"PPT Generation failed: {str(e)}")
+        raise
+def save_metadata(media_type, prompt, file_path):
+    db = SessionLocal()
+    record = MediaGeneration(
+        media_type=media_type,
+        prompt=prompt,
+        file_path=file_path,
+    )
+    db.add(record)
+    db.commit()
+    db.close()

app/services/video_service - Copy.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# app/services/video_service.py
+import os
+from datetime import datetime
+from app.db import SessionLocal
+from app.models import MediaGeneration
+import logging
+logger = logging.getLogger(__name__)
+def generate_video_file(script: str, duration: int = 10) -> str:
+    try:
+        # Simulate saving a generated video file
+        filename = f"video_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
+        folder = "generated/video"
+        os.makedirs(folder, exist_ok=True)
+        # Placeholder: Simulate video generation by writing script info to a file
+        with open(os.path.join(folder, filename), "w") as f:
+            f.write(f"Script: {script}\nDuration: {duration} seconds")
+        logger.info(f"Generated Video: {filename}")
+        return filename
+    except:
+        logger.error(f"Video generation failed: {str(e)}")
+        raise
+def save_metadata(media_type, prompt, file_path):
+    db = SessionLocal()
+    record = MediaGeneration(
+        media_type=media_type,
+        prompt=prompt,
+        file_path=file_path,
+    )
+    db.add(record)
+    db.commit()
+    db.close()

app/services/video_service.py ADDED Viewed

	@@ -0,0 +1,94 @@

+# app/services/video_service.py
+import os
+import uuid
+import requests
+from gtts import gTTS
+from mutagen.mp3 import MP3
+from moviepy.editor import ImageClip, concatenate_videoclips, AudioFileClip
+from dotenv import load_dotenv
+load_dotenv()
+UNSPLASH_KEY = os.getenv("UNSPLASH_ACCESS_KEY")
+UNSPLASH_API = "https://api.unsplash.com/photos/random"
+def fetch_unsplash_images(query, count=3):
+    headers = {"Accept-Version": "v1", "Authorization": f"Client-ID {UNSPLASH_KEY}"}
+    urls = []
+    for _ in range(count):
+        r = requests.get(UNSPLASH_API, params={"query": query}, headers=headers)
+        if r.status_code == 200:
+            data = r.json()
+            if isinstance(data, dict):
+                urls.append(data["urls"]["regular"])
+            elif isinstance(data, list) and len(data) > 0:
+                urls.append(data[0]["urls"]["regular"])
+    return urls
+def generate_video_file(script: str, duration: int = None) -> str:
+    os.makedirs("generated/video", exist_ok=True)
+    os.makedirs("generated/audio", exist_ok=True)
+    os.makedirs("generated/tmp", exist_ok=True)
+    video_filename = f"video_{uuid.uuid4().hex}.mp4"
+    video_path = os.path.join("generated/video", video_filename)
+    audio_path = f"generated/audio/audio_{uuid.uuid4().hex}.mp3"
+    # Step 1: Generate audio
+    tts = gTTS(text=script, lang='en')
+    tts.save(audio_path)
+    # Get audio duration (fallback if 0)
+    audio = MP3(audio_path)
+    audio_duration = max(audio.info.length, 3.0)  # ensure at least 3s
+    # Step 2: Fetch Unsplash images
+    images = fetch_unsplash_images(script, count=3)
+    if not images:
+        raise Exception("No images found from Unsplash for the prompt")
+    # Step 3: Create slideshow clips
+    clips = []
+    per_image_duration = audio_duration / len(images)
+    tmp_files = []
+    for url in images:
+        img_data = requests.get(url).content
+        tmp_file = f"generated/tmp/tmp_{uuid.uuid4().hex}.jpg"
+        tmp_files.append(tmp_file)
+        with open(tmp_file, "wb") as f:
+            f.write(img_data)
+        clip = ImageClip(tmp_file).resize(height=720).set_duration(per_image_duration)
+        clips.append(clip)
+    # Step 4: Concatenate without negative padding
+    final_clip = concatenate_videoclips(clips, method="compose")
+    # Step 5: Force duration to match audio
+    final_clip = final_clip.set_duration(audio_duration)
+    # Step 6: Add audio
+    final_clip = final_clip.set_audio(AudioFileClip(audio_path))
+    # Step 7: Export video
+    final_clip.write_videofile(
+        video_path,
+        fps=24,
+        codec="libx264",
+        audio_codec="aac",
+        threads=4,
+        preset="ultrafast"
+    )
+    # Cleanup
+    for file in tmp_files:
+        try:
+            os.remove(file)
+        except:
+            pass
+    return video_filename

assets/default.jpg ADDED Viewed

assets/logo_watermark.png ADDED Viewed

Git LFS Details

SHA256: a7a35246c3c7c6df8cf325d6a1ea7f45e4ce0899303cc68a3771f8193cff0c35
Pointer size: 129 Bytes
Size of remote file: 5.24 kB

backend/media_gen.py ADDED Viewed

	@@ -0,0 +1,187 @@

+# ✅ Updated media_gen.py with file logging + UI debug toggle
+import os
+import re
+import logging
+import streamlit as st
+import requests
+from PIL import Image, UnidentifiedImageError
+from io import BytesIO
+from dotenv import load_dotenv
+from moviepy.editor import ImageClip, AudioFileClip
+from elevenlabs import generate, save, set_api_key
+from googletrans import Translator
+from PIL import ImageEnhance, Image
+import tempfile
+# Load env vars
+load_dotenv()
+# Logging setup
+logging.basicConfig(
+    filename="app.log",
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+# Constants
+OUTPUT_DIR = "outputs"
+DEFAULT_IMAGE = "assets/fallback.jpg"
+WATERMARK_PATH = "assets/logo_watermark.png"
+UNSPLASH_ACCESS_KEY = os.getenv("UNSPLASH_ACCESS_KEY")
+os.makedirs("outputs/audio", exist_ok=True)
+os.makedirs("outputs/images", exist_ok=True)
+os.makedirs("outputs/videos", exist_ok=True)
+def translate_text(text, target_lang):
+    return Translator().translate(text, dest=target_lang).text
+def sanitize_filename(text):
+    return re.sub(r'\W+', '_', text).lower()[:50]
+def apply_watermark(image_path, watermark_path=WATERMARK_PATH):
+    try:
+        base = Image.open(image_path).convert("RGBA")
+        watermark = Image.open(watermark_path).convert("RGBA").resize((100, 100))
+        base.paste(watermark, (base.width - 110, base.height - 110), watermark)
+        base.convert("RGB").save(image_path)
+    except Exception as e:
+        logging.error(f"Watermarking failed: {e}")
+        st.write(f"❌ Watermarking failed: {e}")
+def use_fallback_image(prompt, add_watermark=False):
+    try:
+        fallback_path = DEFAULT_IMAGE
+        output_path = f"outputs/images/{sanitize_filename(prompt)}.jpg"
+        with Image.open(fallback_path) as img:
+            img.save(output_path)
+        if add_watermark:
+            apply_watermark(output_path)
+        return output_path
+    except UnidentifiedImageError:
+        logging.error("Could not open fallback image.")
+        st.write("❌ Could not open fallback image.")
+        return None
+def generate_gtts_fallback(prompt, output_path, lang="en", debug_mode=False):
+    try:
+        from gtts import gTTS
+        tts = gTTS(text=prompt, lang=lang)
+        tts.save(output_path)
+        logging.info(f"gTTS fallback audio saved to {output_path}")
+        if debug_mode:
+            st.write(f"✅ Fallback audio (gTTS) saved to {output_path}")
+        return output_path
+    except Exception as e:
+        logging.error(f"gTTS fallback failed: {e}")
+        st.write(f"❌ gTTS fallback failed: {str(e)}")
+        return None
+def generate_image(prompt, file_tag, add_watermark=False, dark_mode=False, debug_mode=False):
+    try:
+        # Enhance prompt if dark mode is enabled
+        if dark_mode:
+            prompt += " at night, dark theme, low light, moody lighting"
+        url = f"https://api.unsplash.com/photos/random?query={requests.utils.quote(prompt)}&client_id={UNSPLASH_ACCESS_KEY}"
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+        image_url = response.json()["urls"]["regular"]
+        image_response = requests.get(image_url, timeout=10)
+        image_response.raise_for_status()
+        output_path = f"outputs/images/{sanitize_filename(prompt)}.jpg"
+        img = Image.open(BytesIO(image_response.content))
+        img.convert("RGB").save(output_path)
+        if add_watermark:
+            apply_watermark(output_path)
+        return output_path
+    except Exception as e:
+        logging.error(f"Image generation failed: {e}")
+        st.write("🔁 Unsplash failed. Using fallback.")
+        st.write(f"❌ Image generation failed: {e}")
+        return use_fallback_image(prompt, add_watermark=add_watermark)
+# ✅ Updated generate_audio with proper language handling
+def generate_audio(prompt, output_path, debug_mode=False, lang="en"):
+    try:
+        api_key = os.getenv("ELEVEN_API_KEY") or st.secrets.get("ELEVEN_API_KEY", None)
+        # Use gTTS for non-English languages
+        if lang != "en":
+            if debug_mode:
+                st.write(f"🌐 Non-English language selected: {lang}. Using gTTS.")
+            return generate_gtts_fallback(prompt, output_path, lang=lang, debug_mode=debug_mode)
+        if api_key:
+            if debug_mode:
+                st.write(f"✅ ELEVEN_API_KEY loaded: {api_key[:4]}...****")
+            set_api_key(api_key)
+            if debug_mode:
+                st.write(f"🎧 Generating audio for prompt: {prompt}")
+            try:
+                audio = generate(text=prompt, voice="Aria", model="eleven_monolingual_v1")
+                save(audio, output_path)
+                logging.info(f"Audio saved successfully to {output_path}")
+                if debug_mode:
+                    st.write(f"🔍 File exists after save? {os.path.exists(output_path)}")
+                    st.write(f"✅ Audio saved successfully to {output_path}")
+                return output_path
+            except Exception as e:
+                logging.warning(f"ElevenLabs failed: {e}")
+                if debug_mode:
+                    st.write(f"⚠️ ElevenLabs failed: {str(e)}")
+                    st.write("🔁 Falling back to gTTS...")
+                return generate_gtts_fallback(prompt, output_path, lang=lang, debug_mode=debug_mode)
+        else:
+            logging.warning("ELEVEN_API_KEY not found")
+            if debug_mode:
+                st.write("❌ ELEVEN_API_KEY not found. Falling back to gTTS.")
+            return generate_gtts_fallback(prompt, output_path, lang=lang, debug_mode=debug_mode)
+    except Exception as e:
+        logging.error(f"Exception during audio generation setup: {e}")
+        if debug_mode:
+            st.write(f"❌ Exception during audio generation setup: {str(e)}")
+            st.write("🔁 Falling back to gTTS...")
+        return generate_gtts_fallback(prompt, output_path, lang=lang, debug_mode=debug_mode)
+def generate_video(prompt, image_path, audio_path, output_path, add_watermark=False, dark_mode=False):
+    try:
+        # If dark_mode, darken the image temporarily
+        if dark_mode:
+            with Image.open(image_path) as img:
+                enhancer = ImageEnhance.Brightness(img)
+                darker_img = enhancer.enhance(0.5)  # Reduce brightness to 50%
+                # Save to a temporary file
+                with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
+                    temp_image_path = tmp.name
+                    darker_img.save(temp_image_path)
+                    image_path = temp_image_path
+        audio_clip = AudioFileClip(audio_path)
+        image_clip = ImageClip(image_path).set_duration(audio_clip.duration).resize(height=720)
+        video = image_clip.set_audio(audio_clip)
+        output_path = f"outputs/videos/{sanitize_filename(prompt)}.mp4"
+        video.write_videofile(output_path, fps=24, codec="libx264", audio_codec="aac", verbose=False, logger=None)
+        return output_path
+    except Exception as e:
+        logging.error(f"Video generation failed: {e}")
+        st.write(f"❌ Video generation failed: {e}")
+        return None

backend/subtitle_utils.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip
+from moviepy.video.tools.subtitles import SubtitlesClip
+import srt
+from datetime import timedelta
+import os
+def export_srt(text, duration=10, words_per_caption=6, output_path="output.srt"):
+    """
+    Converts text into SRT subtitles and saves to output_path.
+    """
+    lines = []
+    words = text.split()
+    start = 0
+    index = 1
+    while start < len(words):
+        end = start + words_per_caption
+        chunk = words[start:end]
+        content = " ".join(chunk)
+        start_time = timedelta(seconds=(index - 1) * duration)
+        end_time = timedelta(seconds=index * duration)
+        sub = srt.Subtitle(index=index, start=start_time, end=end_time, content=content)
+        lines.append(sub)
+        start += words_per_caption
+        index += 1
+    srt_data = srt.compose(lines)
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(srt_data)
+    return output_path
+def add_subtitles_and_bgm(
+    video_path,
+    srt_path,
+    bgm_path,
+    output_path="enhanced_output.mp4",
+    font="Arial-Bold",
+    font_size=36,
+    font_color="white",
+    subtitle_position=("center", "bottom")
+):
+    """
+    Adds subtitles from .srt and background music to the given video.
+    """
+    # Load video
+    video = VideoFileClip(video_path)
+    # Parse .srt file
+    with open(srt_path, "r", encoding="utf-8") as f:
+        subtitles = list(srt.parse(f.read()))
+    # Create subtitle clips
+    def make_textclip(txt):
+        return TextClip(txt, font=font, fontsize=font_size, color=font_color, stroke_color='black', stroke_width=2)
+    subtitle_clips = []
+    for sub in subtitles:
+        txt_clip = (make_textclip(sub.content)
+                    .set_position(subtitle_position)
+                    .set_start(sub.start.total_seconds())
+                    .set_duration((sub.end - sub.start).total_seconds()))
+        subtitle_clips.append(txt_clip)
+    # Background music
+    if os.path.exists(bgm_path):
+        bgm = AudioFileClip(bgm_path).volumex(0.2)  # reduce volume
+        bgm = bgm.set_duration(video.duration)
+        final_audio = video.audio.volumex(0.8).audio_fadein(1).audio_fadeout(1).set_duration(video.duration)
+        final_audio = final_audio.set_audio(bgm)
+    else:
+        final_audio = video.audio
+    final = CompositeVideoClip([video, *subtitle_clips])
+    final = final.set_audio(final_audio)
+    # Export final video
+    final.write_videofile(output_path, codec="libx264", audio_codec="aac", fps=video.fps)
+    return output_path
+# Aliases for compatibility with streamlit_ui.py
+generate_srt_from_text = export_srt
+enhance_video_with_subtitles_and_bgm = add_subtitles_and_bgm

default_bgm.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04217a47ea351a5332f063822e233536c99b5c99fbbac8f806e9ea2470249327
+size 160514

docs/index.html ADDED Viewed

	@@ -0,0 +1,62 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
+  <meta name="description" content="OSN Media Generator - Built by O.S.Narayana. Create media content using AI with no ads or in-app purchases."/>
+  <title>OSN Media Generator</title>
+  <style>
+    body {
+      font-family: Arial, sans-serif;
+      background-color: #f3f4f6;
+      color: #111827;
+      text-align: center;
+      padding: 50px 20px;
+    }
+    .container {
+      max-width: 600px;
+      margin: auto;
+      background: #ffffff;
+      padding: 30px;
+      border-radius: 12px;
+      box-shadow: 0 4px 12px rgba(0,0,0,0.1);
+    }
+    h1 {
+      color: #1f2937;
+    }
+    a {
+      color: #2563eb;
+      text-decoration: none;
+    }
+    a:hover {
+      text-decoration: underline;
+    }
+    .footer {
+      margin-top: 40px;
+      font-size: 0.9em;
+      color: #6b7280;
+    }
+  </style>
+</head>
+<body>
+  <div class="container">
+    <h1>🎮 OSN Media Generator</h1>
+    <p>
+      Welcome to the official homepage of <strong>OSN Media Generator</strong>, created by <strong>O.S.Narayana</strong>.
+    </p>
+    <p>
+      This app helps you generate audio, images, and videos using AI tools like ElevenLabs, Unsplash, and Streamlit.
+    </p>
+    <p>
+      👉 Try the app now: <br>
+      <a href="https://osnarayana-media-generator.streamlit.app/" target="_blank">Launch OSN Media Generator</a>
+    </p>
+    <p>
+      📄 View our <a href="privacy.html" target="_blank">Privacy Policy</a>
+    </p>
+    <div class="footer">
+      &copy; 2025 OSN Media. Built with 💗 by O.S.Narayana.
+    </div>
+  </div>
+</body>
+</html>

docs/privacy.html ADDED Viewed

	@@ -0,0 +1,68 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Privacy Policy - OSN Media Generator</title>
+    <meta http-equiv="Cache-Control" content="no-cache, no-store, must-revalidate" />
+    <meta http-equiv="Pragma" content="no-cache" />
+    <meta http-equiv="Expires" content="0" />
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            margin: 2rem;
+            padding: 0;
+            background-color: #f9f9f9;
+            color: #333;
+        }
+        h1, h2, h3 { color: #222; }
+        a { color: #1a73e8; text-decoration: none; }
+        a:hover { text-decoration: underline; }
+        code { background-color: #eee; padding: 2px 4px; border-radius: 4px; }
+    </style>
+</head>
+<body>
+    <h1>🔐 Privacy Policy for OSN Media Generator</h1>
+<p>Effective Date: July 19, 2025</p>
+<p>Your privacy is important to us. This app is built to prioritize user data security and transparency.</p>
+<h2>1. What We Collect</h2>
+<ul>
+<li><strong>No personal data</strong> is collected, stored, or shared by this application.</li>
+<li>All prompts, audio, and media are <strong>processed locally</strong> or via <strong>user-provided API keys</strong> to external services (e.g., ElevenLabs, Unsplash).</li>
+</ul>
+<h2>2. API Usage</h2>
+<ul>
+<li>If you use external services (like ElevenLabs or Unsplash), you are subject to their respective <a href="https://www.elevenlabs.io/terms">Terms of Service</a> and <a href="https://www.elevenlabs.io/privacy">Privacy Policies</a>.</li>
+<li>Your API keys are stored <strong>locally</strong> in your environment file (<code>.env</code>) and never uploaded.</li>
+</ul>
+<h2>3. Data Storage</h2>
+<ul>
+<li>Generated images, audio, and videos are stored <strong>only on your local machine</strong> under the <code>outputs/</code> directory.</li>
+<li>You can delete any generated content at your discretion.</li>
+</ul>
+<h2>4. Analytics &amp; Ads</h2>
+<ul>
+<li>This app contains <strong>no ads</strong>, <strong>no tracking</strong>, and <strong>no analytics</strong>.</li>
+</ul>
+<h2>5. Changes to Policy</h2>
+<p>Any updates to this policy will be reflected in this file on the <a href="https://github.com/sivaogeti/osnarayana-media-generator">GitHub repository</a>.</p>
+<h2>6. Contact</h2>
+<p>For questions or issues, contact: <strong>[email protected]</strong></p>
+</body>
+</html>

fly.toml ADDED Viewed

	@@ -0,0 +1,25 @@

+app = "media-gen-api"
+[build]
+  image = "tiangolo/uvicorn-gunicorn-fastapi:python3.10"
+[env]
+  PYTHONUNBUFFERED = "1"
+[[services]]
+  internal_port = 80
+  protocol = "tcp"
+  [[services.ports]]
+    handlers = ["http"]
+    port = 80
+  [[services.ports]]
+    handlers = ["tls", "http"]
+    port = 443
+  [[services.tcp_checks]]
+    interval = "15s"
+    timeout = "2s"
+    grace_period = "5s"
+    restart_limit = 0

generated_audio/audio_12aa4a7f8c6b4e459e88fd7bfa8559fe.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:343053419eba6ee94bd9d7fd54631336e97b693dea9775f2d57c6f342953cefb
+size 9024

generated_audio/audio_2084def7399c47fc81f1ea75a2bb38df.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:343053419eba6ee94bd9d7fd54631336e97b693dea9775f2d57c6f342953cefb
+size 9024