osnarayana commited on
Commit
8234bbd
·
2 Parent(s): b7f91f4 9bf9f75

Resolved merge conflict in README.md

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .devcontainer/devcontainer.json +33 -0
  2. .gitignore +44 -0
  3. .streamlit/config.toml +6 -0
  4. .vs/VSWorkspaceState.json +6 -0
  5. .vs/ai_gov_comm/FileContentIndex/09e5affd-6ff5-45b8-b6bb-c44b5b94954f.vsidx +0 -0
  6. .vs/ai_gov_comm/v17/.wsuo +0 -0
  7. .vs/ai_gov_comm/v17/DocumentLayout.json +12 -0
  8. ArchitectureDiagram.png +3 -0
  9. Demo/02_Tech_Deck.pdf +3 -0
  10. Demo/03_Cost_Sheet.xlsx +0 -0
  11. Demo/04_Team_Profile.pdf +3 -0
  12. Demo/05-Roadmap.pdf +3 -0
  13. Demo/06-End Slide.pdf +3 -0
  14. Demo/old-02_Tech_Deck.pdf +3 -0
  15. Dockerfile +20 -0
  16. PRIVACY_POLICY.md +38 -0
  17. Procfile +1 -0
  18. Project_structure.docx +0 -0
  19. README.md +96 -0
  20. Spacefile +5 -0
  21. app/__init__.py +0 -0
  22. app/api/__init__.py +0 -0
  23. app/api/v1/__init__.py +0 -0
  24. app/api/v1/audio.py +34 -0
  25. app/api/v1/download.py +32 -0
  26. app/api/v1/image.py +41 -0
  27. app/api/v1/metrics.py +24 -0
  28. app/api/v1/ppt.py +24 -0
  29. app/api/v1/utils.py +9 -0
  30. app/api/v1/video.py +45 -0
  31. app/auth/auth.py +25 -0
  32. app/core/config.py +6 -0
  33. app/db.py +7 -0
  34. app/main.py +39 -0
  35. app/models.py +14 -0
  36. app/services/audio_service.py +39 -0
  37. app/services/image_service.py +42 -0
  38. app/services/ppt_service.py +34 -0
  39. app/services/video_service - Copy.py +35 -0
  40. app/services/video_service.py +94 -0
  41. assets/default.jpg +0 -0
  42. assets/logo_watermark.png +3 -0
  43. backend/media_gen.py +187 -0
  44. backend/subtitle_utils.py +84 -0
  45. default_bgm.mp3 +3 -0
  46. docs/index.html +62 -0
  47. docs/privacy.html +68 -0
  48. fly.toml +25 -0
  49. generated_audio/audio_12aa4a7f8c6b4e459e88fd7bfa8559fe.mp3 +3 -0
  50. generated_audio/audio_2084def7399c47fc81f1ea75a2bb38df.mp3 +3 -0
.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Python 3",
3
+ // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
4
+ "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
5
+ "customizations": {
6
+ "codespaces": {
7
+ "openFiles": [
8
+ "README.md",
9
+ "app.py"
10
+ ]
11
+ },
12
+ "vscode": {
13
+ "settings": {},
14
+ "extensions": [
15
+ "ms-python.python",
16
+ "ms-python.vscode-pylance"
17
+ ]
18
+ }
19
+ },
20
+ "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
21
+ "postAttachCommand": {
22
+ "server": "streamlit run app.py --server.enableCORS false --server.enableXsrfProtection false"
23
+ },
24
+ "portsAttributes": {
25
+ "8501": {
26
+ "label": "Application",
27
+ "onAutoForward": "openPreview"
28
+ }
29
+ },
30
+ "forwardPorts": [
31
+ 8501
32
+ ]
33
+ }
.gitignore ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ *.pyc
3
+ __pycache__/
4
+ *.pyo
5
+ *.pyd
6
+ *.log
7
+
8
+ # Virtual env
9
+ .venv/
10
+ venv/
11
+ ENV/
12
+ env/
13
+
14
+ # Environment secrets
15
+ .env
16
+
17
+ # Streamlit cache
18
+ .streamlit/
19
+ .metadata/
20
+
21
+ # Pytest cache
22
+ .pytest_cache/
23
+
24
+ # VS Code / PyCharm / IDE files
25
+ .vscode/
26
+ .idea/
27
+ .project
28
+ .pydevproject
29
+
30
+ # RStudio / Mac / Windows artifacts
31
+ .Rhistory
32
+ .DS_Store
33
+ Thumbs.db
34
+
35
+ # Output folders
36
+ outputs/
37
+
38
+
39
+ logs/
40
+ __pycache__/
41
+ outputs/
42
+ generated/
43
+ *.pyc
44
+ *.log
.streamlit/config.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor = "#4CAF50"
3
+ backgroundColor = "#F5F5F5"
4
+ secondaryBackgroundColor = "#E0E0E0"
5
+ textColor = "#262730"
6
+ font = "sans serif"
.vs/VSWorkspaceState.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "ExpandedNodes": [
3
+ ""
4
+ ],
5
+ "PreviewInSolutionExplorer": false
6
+ }
.vs/ai_gov_comm/FileContentIndex/09e5affd-6ff5-45b8-b6bb-c44b5b94954f.vsidx ADDED
Binary file (84.3 kB). View file
 
.vs/ai_gov_comm/v17/.wsuo ADDED
Binary file (8.19 kB). View file
 
.vs/ai_gov_comm/v17/DocumentLayout.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Version": 1,
3
+ "WorkspaceRootPath": "C:\\Users\\fimba\\OneDrive\\Desktop\\PythonProjects\\ai_gov_comm\\",
4
+ "Documents": [],
5
+ "DocumentGroupContainers": [
6
+ {
7
+ "Orientation": 0,
8
+ "VerticalTabListWidth": 256,
9
+ "DocumentGroups": []
10
+ }
11
+ ]
12
+ }
ArchitectureDiagram.png ADDED

Git LFS Details

  • SHA256: 939a8dca28847fc7ed65376c5f54f1badac80b13fe3be7685afafb507587f7d9
  • Pointer size: 132 Bytes
  • Size of remote file: 1.65 MB
Demo/02_Tech_Deck.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b45c266eb01b16301621ee2e31ea8282cd1d72fff4bf959522b0b302f5b7d0e
3
+ size 10621
Demo/03_Cost_Sheet.xlsx ADDED
Binary file (5.63 kB). View file
 
Demo/04_Team_Profile.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e71ceecc9d5d85324fd31ed8b268e222977f8dfc1455cb44e260b2aa74ad5de0
3
+ size 73529
Demo/05-Roadmap.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea63aed5200371c1b3bc42cf06f9e4f1d3cded3afe53f27f5789de4cbccc352d
3
+ size 83787
Demo/06-End Slide.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaab3d85667480068276f4c7c45db0ce1a31c7b26227ac904eb5b8ad4e6cf0ca
3
+ size 3922333
Demo/old-02_Tech_Deck.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8887e7d3825f880a83d1d4c94d1d9dbc3dbe3db290b6d4ee03c14a918b07bd45
3
+ size 1730
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use lightweight Python
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy requirements first for caching
8
+ COPY requirements.txt .
9
+
10
+ # Install dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy project files
14
+ COPY . .
15
+
16
+ # Expose FastAPI port
17
+ EXPOSE 8000
18
+
19
+ # Run FastAPI with uvicorn
20
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
PRIVACY_POLICY.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+
4
+ ### ✅ `PRIVACY_POLICY.md`
5
+
6
+ ```markdown
7
+ # 🔐 Privacy Policy for OSNarayana Media Generator
8
+
9
+ Effective Date: July 19, 2025
10
+
11
+ Your privacy is important to us. This app is built to prioritize user data security and transparency.
12
+
13
+ ## 1. What We Collect
14
+
15
+ - **No personal data** is collected, stored, or shared by this application.
16
+ - All prompts, audio, and media are **processed locally** or via **user-provided API keys** to external services (e.g., ElevenLabs, Unsplash).
17
+
18
+ ## 2. API Usage
19
+
20
+ - If you use external services (like ElevenLabs or Unsplash), you are subject to their respective [Terms of Service](https://www.elevenlabs.io/terms) and [Privacy Policies](https://www.elevenlabs.io/privacy).
21
+ - Your API keys are stored **locally** in your environment file (`.env`) and never uploaded.
22
+
23
+ ## 3. Data Storage
24
+
25
+ - Generated images, audio, and videos are stored **only on your local machine** under the `outputs/` directory.
26
+ - You can delete any generated content at your discretion.
27
+
28
+ ## 4. Analytics & Ads
29
+
30
+ - This app contains **no ads**, **no tracking**, and **no analytics**.
31
+
32
+ ## 5. Changes to Policy
33
+
34
+ Any updates to this policy will be reflected in this file on the [GitHub repository](https://github.com/your-username/osnarayana-media-generator).
35
+
36
+ ## 6. Contact
37
+
38
+ For questions or issues, contact: **[email protected]**
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: uvicorn app.main:app --host 0.0.0.0 --port 10000
Project_structure.docx ADDED
Binary file (14 kB). View file
 
README.md CHANGED
@@ -9,3 +9,99 @@ short_description: FastAPI backend for Text-to-Audio, Image, and Video generato
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
12
+ =======
13
+ # 🎙️ Media Generation API
14
+
15
+ A FastAPI-based backend to generate audio, images, video, and PPT from user inputs.
16
+ Supports BLEU/CLIP metrics, token-based authentication, and stores metadata in SQLite/Postgres.
17
+
18
+ A modular, RESTful FastAPI solution that converts text input into:
19
+ - 🎥 Video
20
+ - 🖼️ Image/Graphics
21
+ - 🔊 Audio
22
+
23
+
24
+ ---
25
+
26
+ ## 🚀 Features
27
+
28
+ - Text → Video: Tone, domain, and environment-aware video generation.
29
+ - Text → Audio: Context-aware voice synthesis with emotional tone and language support.
30
+ - Text → Graphics: Visual generation using parameter-based prompts.
31
+ - BLEU/CLIP metrics for prompt-output fidelity.
32
+ - Token-based authentication for secure API use.
33
+ - Dockerized for easy deployment
34
+ - Optional Streamlit/React UI
35
+ - Swagger UI: `http://localhost:8000/docs`
36
+
37
+ ---
38
+
39
+ ### 📁 Project Structure
40
+ media-gen-api/
41
+ ├── app/
42
+ │ ├── api/v1/ # Versioned API endpoints
43
+ │ ├── auth/ # Token-based auth
44
+ │ ├── services/ # Core media generation logic
45
+ │ └── main.py # FastAPI entry point
46
+ ├── tests/ # Unit/integration tests
47
+ ├── requirements.txt
48
+ └── README.md
49
+
50
+ ---
51
+
52
+ ## 📦 Installation
53
+ 🚀 Run Locally
54
+ 1. Clone repo & create virtual environment
55
+
56
+ git clone https://github.com/yourorg/media-gen-api.git
57
+ cd media-gen-api
58
+ python -m venv .venv
59
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
60
+
61
+ 2. Install dependencies
62
+
63
+ pip install -r requirements.txt
64
+
65
+ 3. Run the API
66
+
67
+ uvicorn app.main:app --reload
68
+
69
+ Access docs: http://127.0.0.1:8000/docs
70
+
71
+ ---
72
+ ### 🔐 Authentication
73
+ Use Bearer <your_token> in the Authorize button or headers.
74
+
75
+ ---
76
+ ### 📡 API Endpoints Summary
77
+ | Endpoint | Method | Description |
78
+ |--------------------------|--------|---------------------------|
79
+ | /api/v1/audio/generate | POST | Generate audio from text |
80
+ | /api/v1/image/generate | POST | Generate image from text |
81
+ | /api/v1/video/generate | POST | Generate video from text |
82
+ | /api/v1/download | GET | Download generated file |
83
+
84
+ ---
85
+ ###📦 Deployment (Streamlit/Optional UI)
86
+ Option 1: Run with Streamlit (for demo)
87
+ streamlit run streamlit_ui.py
88
+
89
+ Option 2: Docker (Production-ready)
90
+ docker build -t media-gen-api .
91
+ docker run -p 8000:8000 media-gen-api
92
+
93
+ ---
94
+ ### 📊 Metrics Logging (Optional)
95
+ - BLEU score and CLIPScore (WIP)
96
+ - Latency, GPU/CPU tracking
97
+ - Log file: logs/generation.log
98
+
99
+ ---
100
+ #### 📋 Submission Checklist
101
+ - ✅ RESTful modular architecture
102
+ - ✅ Multi-format (MP4, PNG, WAV)
103
+ - ✅ Token Auth + Swagger UI
104
+ - ✅ Compatible with DD/PIB via API
105
+ - ✅ Streamlit demo app (optional)
106
+
107
+
Spacefile ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [build]
2
+ builder = "heroku/buildpacks:20"
3
+
4
+ [run]
5
+ command = "uvicorn main:app --host 0.0.0.0 --port $PORT"
app/__init__.py ADDED
File without changes
app/api/__init__.py ADDED
File without changes
app/api/v1/__init__.py ADDED
File without changes
app/api/v1/audio.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Body
2
+ from fastapi.responses import Response # ✅ add this
3
+ from pydantic import BaseModel
4
+ from gtts import gTTS
5
+ import uuid
6
+ import os
7
+ router = APIRouter()
8
+
9
+ class AudioRequest(BaseModel):
10
+ text: str
11
+ voice: str = "default"
12
+ language: str = "en"
13
+
14
+ @router.post("/generate")
15
+ def generate_audio_endpoint(payload: AudioRequest):
16
+ try:
17
+ # ✅ Save inside generated/audio for consistency
18
+ filename = f"audio_{uuid.uuid4().hex}.mp3"
19
+ folder = "generated/audio"
20
+ os.makedirs("generated_audio", exist_ok=True)
21
+ file_path = f"generated_audio/{filename}" # ✅ match your video & image folders
22
+
23
+ # ✅ Generate TTS audio
24
+ tts = gTTS(text=payload.text, lang=payload.language)
25
+ tts.save(file_path)
26
+
27
+ # ✅ Return audio bytes for inline Streamlit playback
28
+ with open(file_path, "rb") as f:
29
+ audio_bytes = f.read()
30
+
31
+ return Response(content=audio_bytes, media_type="audio/mpeg")
32
+
33
+ except Exception as e:
34
+ raise HTTPException(status_code=500, detail=str(e))
app/api/v1/download.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Query
2
+ from fastapi.responses import FileResponse
3
+ import os
4
+
5
+ router = APIRouter()
6
+
7
+ @router.get("/")
8
+ def download_file(file_path: str = Query(..., description="Relative path from project root")):
9
+ print(f"🔍 Requested file path: {file_path}")
10
+
11
+ # Sanitize and resolve absolute path
12
+ full_path = os.path.abspath(file_path)
13
+
14
+ # Ensure file is inside your allowed folder (to prevent directory traversal)
15
+ allowed_root = os.path.abspath("generated")
16
+ if not full_path.startswith(allowed_root):
17
+ raise HTTPException(status_code=400, detail="Invalid file path")
18
+
19
+ print(f"📂 Resolved full path: {full_path}")
20
+
21
+ if not os.path.isfile(full_path):
22
+ print("❌ File not found.")
23
+ raise HTTPException(status_code=404, detail="File not found")
24
+
25
+ # Set correct media type dynamically (you can refine this later)
26
+ media_type = "audio/mpeg" if full_path.endswith(".mp3") else "image/png"
27
+
28
+ return FileResponse(
29
+ full_path,
30
+ media_type=media_type,
31
+ filename=os.path.basename(full_path)
32
+ )
app/api/v1/image.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Depends, Body
2
+ from fastapi.responses import Response
3
+ from pydantic import BaseModel
4
+ from app.auth.auth import verify_token
5
+ import requests
6
+ import os
7
+ from pydantic import BaseModel
8
+ from dotenv import load_dotenv
9
+ load_dotenv()
10
+
11
+
12
+ # ✅ Define router
13
+ router = APIRouter()
14
+
15
+ # ✅ Define Request schema
16
+ class ImageRequest(BaseModel):
17
+ prompt: str
18
+ style: str = "default"
19
+
20
+ UNSPLASH_ACCESS_KEY = os.getenv("UNSPLASH_ACCESS_KEY") # store this in .env
21
+ print(f"unsplash key is: {UNSPLASH_ACCESS_KEY}")
22
+
23
+ # ✅ Endpoint
24
+ @router.post("/generate")
25
+ def generate_image_file_endpoint(
26
+ data: ImageRequest = Body(...),
27
+ token: str = Depends(verify_token)
28
+ ):
29
+ query = f"{data.prompt} {data.style}"
30
+ url = f"https://api.unsplash.com/photos/random?query={query}&client_id={UNSPLASH_ACCESS_KEY}&orientation=landscape"
31
+
32
+ try:
33
+ r = requests.get(url)
34
+ r.raise_for_status()
35
+ image_url = r.json()["urls"]["regular"]
36
+ img_data = requests.get(image_url).content
37
+ return Response(content=img_data, media_type="image/jpeg")
38
+
39
+ except Exception as e:
40
+ print(f"❌ Image fetch failed: {str(e)}")
41
+ raise HTTPException(status_code=500, detail="Image generation failed.")
app/api/v1/metrics.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/api/v1/metrics.py
2
+ from fastapi import APIRouter, HTTPException, Query, Depends, Request
3
+ from typing import List
4
+ from sklearn.metrics import accuracy_score
5
+ from nltk.translate.bleu_score import sentence_bleu
6
+ from sentence_transformers import SentenceTransformer, util
7
+ from app.auth.auth import verify_token
8
+
9
+ router = APIRouter()
10
+ model = SentenceTransformer("clip-ViT-B-32") # for CLIP-like semantic score
11
+
12
+ @router.post("/evaluate/bleu", dependencies=[Depends(verify_token)])
13
+ def compute_bleu(reference: str, candidate: str):
14
+ ref_tokens = [reference.split()]
15
+ cand_tokens = candidate.split()
16
+ score = sentence_bleu(ref_tokens, cand_tokens)
17
+ return {"metric": "BLEU", "score": score}
18
+
19
+ @router.post("/evaluate/clipscore")
20
+ def compute_clip_score(reference: str, candidate: str):
21
+ ref_emb = model.encode(reference, convert_to_tensor=True)
22
+ cand_emb = model.encode(candidate, convert_to_tensor=True)
23
+ score = util.cos_sim(ref_emb, cand_emb).item()
24
+ return {"metric": "CLIPScore", "score": score}
app/api/v1/ppt.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/api/v1/ppt.py
2
+ from fastapi import APIRouter, HTTPException, Query, Depends, Request
3
+ from pydantic import BaseModel
4
+ from typing import List
5
+ from app.services.ppt_service import generate_ppt_file
6
+ from app.auth.auth import verify_token
7
+
8
+ router = APIRouter()
9
+
10
+ class Slide(BaseModel):
11
+ title: str
12
+ content: str
13
+
14
+ class PPTInput(BaseModel):
15
+ slides: List[Slide]
16
+
17
+ @router.post("/generate")
18
+ def generate_ppt(payload: PPTInput):
19
+ filename = generate_ppt_file([slide.dict() for slide in payload.slides])
20
+ return {
21
+ "message": "PPT generated successfully",
22
+ "filename": filename,
23
+ "download_url": f"/api/v1/download?file_path=generated/ppt/{filename}"
24
+ }
app/api/v1/utils.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # app/api/v1/utils.py
2
+ from fastapi.responses import FileResponse
3
+
4
+ def download_file(file_path: str):
5
+ return FileResponse(
6
+ path=file_path,
7
+ filename=file_path.split("/")[-1],
8
+ media_type="application/octet-stream"
9
+ )
app/api/v1/video.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/api/v1/video.py
2
+ from fastapi import APIRouter, HTTPException, Depends, Body
3
+ from fastapi.responses import FileResponse
4
+ from pydantic import BaseModel
5
+ from app.services.video_service import generate_video_file
6
+ from app.auth.auth import verify_token
7
+ import os
8
+ from typing import Optional
9
+
10
+ # ✅ Define router FIRST
11
+ router = APIRouter()
12
+
13
+ class VideoInput(BaseModel):
14
+ prompt: str
15
+ tone: str
16
+ domain: str
17
+ environment: str
18
+ transcript: Optional[str] = None
19
+
20
+ @router.post("/generate")
21
+ def generate_video_endpoint(
22
+ payload: VideoInput = Body(...),
23
+ token: str = Depends(verify_token)
24
+ ):
25
+ try:
26
+ # Generate video file
27
+ filename = generate_video_file(
28
+ script=payload.prompt,
29
+ duration=10 # Optional: could be dynamic
30
+ )
31
+ video_path = os.path.join("generated/video", filename)
32
+
33
+ if not os.path.exists(video_path):
34
+ raise HTTPException(status_code=500, detail="Video not found")
35
+
36
+ # ✅ Return the actual file for Streamlit to play
37
+ return FileResponse(
38
+ video_path,
39
+ media_type="video/mp4",
40
+ filename=filename
41
+ )
42
+
43
+ except Exception as e:
44
+ print("❌ Video generation error:", str(e))
45
+ raise HTTPException(status_code=500, detail=str(e))
app/auth/auth.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #from fastapi import Depends, HTTPException, status
2
+ from fastapi import HTTPException, Security
3
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
4
+ from starlette.status import HTTP_403_FORBIDDEN
5
+
6
+ from fastapi import Security
7
+ from fastapi.security import HTTPBearer
8
+
9
+ bearer_scheme = HTTPBearer()
10
+
11
+ #security = HTTPBearer()
12
+
13
+ from fastapi import Header, HTTPException, Depends
14
+
15
+ VALID_TOKENS = ["my_secure_token_123"] # or load from file/db/env
16
+
17
+ def verify_token(credentials: HTTPAuthorizationCredentials = Security(bearer_scheme)):
18
+ token = credentials.credentials
19
+ # Replace with your actual logic (static check shown here)
20
+ if token != "my_secure_token_123":
21
+ raise HTTPException(
22
+ status_code=HTTP_403_FORBIDDEN,
23
+ detail="Invalid or expired token"
24
+ )
25
+
app/core/config.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ class Settings:
4
+ API_KEY: str = os.getenv("API_KEY", "dummy-api-key")
5
+
6
+ settings = Settings()
app/db.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine
2
+ from sqlalchemy.orm import sessionmaker
3
+
4
+ DATABASE_URL = "sqlite:///./media_gen.db"
5
+
6
+ engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
7
+ SessionLocal = sessionmaker(bind=engine, autoflush=False)
app/main.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/main.py
2
+ from fastapi import FastAPI, Depends
3
+ from fastapi.security import HTTPBearer
4
+ from fastapi import Security
5
+
6
+ from fastapi import FastAPI
7
+
8
+ from app.api.v1.audio import router as audio_router
9
+ from app.api.v1.video import router as video_router
10
+ from app.api.v1.image import router as image_router
11
+ from app.api.v1.ppt import router as ppt_router
12
+ from app.api.v1.metrics import router as metrics_router
13
+ from app.api.v1.download import router as download_router
14
+ from fastapi import Security
15
+
16
+ from app.auth.auth import verify_token
17
+
18
+ bearer_scheme = HTTPBearer()
19
+
20
+
21
+ app = FastAPI(
22
+ title="Media Generation API",
23
+ description="Generate audio, video, image, and PPT content via secure endpoints.",
24
+ version="1.0.0"
25
+ )
26
+
27
+ # Root for health check
28
+ @app.get("/")
29
+ def root():
30
+ return {"message": "FastAPI running successfully!"}
31
+
32
+ # Registering route modules
33
+ app.include_router(audio_router, prefix="/api/v1/audio", tags=["Audio"], dependencies=[Depends(verify_token)])
34
+ app.include_router(video_router, prefix="/api/v1/video", tags=["Video"], dependencies=[Depends(verify_token)])
35
+ app.include_router(image_router, prefix="/api/v1/image", tags=["Image"], dependencies=[Depends(verify_token)])
36
+ app.include_router(ppt_router, prefix="/api/v1/ppt", tags=["PPT"], dependencies=[Depends(verify_token)])
37
+ app.include_router(metrics_router, prefix="/api/v1/metrics", tags=["Metrics"], dependencies=[Depends(verify_token)])
38
+ app.include_router(download_router, prefix="/api/v1/download", tags=["Download"])
39
+
app/models.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, Integer, String, DateTime
2
+ from sqlalchemy.ext.declarative import declarative_base
3
+ from datetime import datetime
4
+
5
+ Base = declarative_base()
6
+
7
+ class MediaGeneration(Base):
8
+ __tablename__ = "media_generations"
9
+
10
+ id = Column(Integer, primary_key=True, index=True)
11
+ media_type = Column(String)
12
+ prompt = Column(String)
13
+ file_path = Column(String)
14
+ timestamp = Column(DateTime, default=datetime.utcnow)
app/services/audio_service.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/services/audio_service.py
2
+ from gtts import gTTS
3
+ import os
4
+ from datetime import datetime
5
+ from app.db import SessionLocal
6
+ from app.models import MediaGeneration
7
+ import logging
8
+ logger = logging.getLogger(__name__)
9
+ import uuid
10
+
11
+ def generate_audio_file(text: str, voice: str = "default", language: str = "en") -> str:
12
+ try:
13
+ tts = gTTS(text=text, lang=language, slow=False)
14
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
15
+ filename = f"audio_{timestamp}.mp3"
16
+ output_dir = "generated/audio"
17
+ os.makedirs(output_dir, exist_ok=True)
18
+ file_path = os.path.join(output_dir, filename)
19
+ tts.save(file_path)
20
+ logger.info(f"Generated Audio: {filename}")
21
+ return file_path
22
+ except:
23
+ logger.error(f"Audio Generation Failed: {str(e)}")
24
+ raise
25
+
26
+
27
+ from app.db import SessionLocal
28
+ from app.models import MediaGeneration
29
+
30
+ def save_metadata(media_type, prompt, file_path):
31
+ db = SessionLocal()
32
+ record = MediaGeneration(
33
+ media_type=media_type,
34
+ prompt=prompt,
35
+ file_path=file_path,
36
+ )
37
+ db.add(record)
38
+ db.commit()
39
+ db.close()
app/services/image_service.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/services/image_service.py
2
+ import os
3
+ from datetime import datetime
4
+ from app.db import SessionLocal
5
+ from app.models import MediaGeneration
6
+ import logging
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ def generate_image_file(prompt: str, style: str = "default") -> str:
11
+ try:
12
+ # Simulate saving a generated image file
13
+ filename = f"image_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
14
+ folder = "generated/image"
15
+ os.makedirs(folder, exist_ok=True)
16
+
17
+ # Placeholder: Simulate image generation by writing prompt text to a file
18
+ with open(os.path.join(folder, filename), "w") as f:
19
+ f.write(f"Prompt: {prompt}\nStyle: {style}")
20
+ logger.info(f"Generated Image: {filename}")
21
+ if os.path.isfile(output_path):
22
+ print(f"✅ Image created: {output_path}, size = {os.path.getsize(output_path)} bytes")
23
+ else:
24
+ print(f"❌ Image file not found at: {output_path}")
25
+ return filename
26
+ except:
27
+ logger.error(f"Image Geneartion failed: {str(e)}")
28
+ raise
29
+
30
+ from app.db import SessionLocal
31
+ from app.models import MediaGeneration
32
+
33
+ def save_metadata(media_type, prompt, file_path):
34
+ db = SessionLocal()
35
+ record = MediaGeneration(
36
+ media_type=media_type,
37
+ prompt=prompt,
38
+ file_path=file_path,
39
+ )
40
+ db.add(record)
41
+ db.commit()
42
+ db.close()
app/services/ppt_service.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/services/ppt_service.py
2
+ import os
3
+ from datetime import datetime
4
+ from app.db import SessionLocal
5
+ from app.models import MediaGeneration
6
+ import logging
7
+ logger = logging.getLogger(__name__)
8
+
9
+ def generate_ppt_file(slides: list[dict]) -> str:
10
+ try:
11
+ filename = f"ppt_{datetime.now().strftime('%Y%m%d_%H%M%S')}.ppt"
12
+ folder = "generated/ppt"
13
+ os.makedirs(folder, exist_ok=True)
14
+
15
+ with open(os.path.join(folder, filename), "w") as f:
16
+ for i, slide in enumerate(slides, 1):
17
+ f.write(f"Slide {i}:\nTitle: {slide['title']}\nContent: {slide['content']}\n\n")
18
+ logger.info(f"Generated PPT: {filename}")
19
+ return filename
20
+ except:
21
+ logger.error(f"PPT Generation failed: {str(e)}")
22
+ raise
23
+
24
+
25
+ def save_metadata(media_type, prompt, file_path):
26
+ db = SessionLocal()
27
+ record = MediaGeneration(
28
+ media_type=media_type,
29
+ prompt=prompt,
30
+ file_path=file_path,
31
+ )
32
+ db.add(record)
33
+ db.commit()
34
+ db.close()
app/services/video_service - Copy.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/services/video_service.py
2
+ import os
3
+ from datetime import datetime
4
+ from app.db import SessionLocal
5
+ from app.models import MediaGeneration
6
+ import logging
7
+ logger = logging.getLogger(__name__)
8
+
9
+ def generate_video_file(script: str, duration: int = 10) -> str:
10
+ try:
11
+ # Simulate saving a generated video file
12
+ filename = f"video_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
13
+ folder = "generated/video"
14
+ os.makedirs(folder, exist_ok=True)
15
+
16
+ # Placeholder: Simulate video generation by writing script info to a file
17
+ with open(os.path.join(folder, filename), "w") as f:
18
+ f.write(f"Script: {script}\nDuration: {duration} seconds")
19
+ logger.info(f"Generated Video: {filename}")
20
+ return filename
21
+ except:
22
+ logger.error(f"Video generation failed: {str(e)}")
23
+ raise
24
+
25
+
26
+ def save_metadata(media_type, prompt, file_path):
27
+ db = SessionLocal()
28
+ record = MediaGeneration(
29
+ media_type=media_type,
30
+ prompt=prompt,
31
+ file_path=file_path,
32
+ )
33
+ db.add(record)
34
+ db.commit()
35
+ db.close()
app/services/video_service.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/services/video_service.py
2
+
3
+ import os
4
+ import uuid
5
+ import requests
6
+ from gtts import gTTS
7
+ from mutagen.mp3 import MP3
8
+ from moviepy.editor import ImageClip, concatenate_videoclips, AudioFileClip
9
+ from dotenv import load_dotenv
10
+
11
+ load_dotenv()
12
+
13
+ UNSPLASH_KEY = os.getenv("UNSPLASH_ACCESS_KEY")
14
+ UNSPLASH_API = "https://api.unsplash.com/photos/random"
15
+
16
+ def fetch_unsplash_images(query, count=3):
17
+ headers = {"Accept-Version": "v1", "Authorization": f"Client-ID {UNSPLASH_KEY}"}
18
+ urls = []
19
+
20
+ for _ in range(count):
21
+ r = requests.get(UNSPLASH_API, params={"query": query}, headers=headers)
22
+ if r.status_code == 200:
23
+ data = r.json()
24
+ if isinstance(data, dict):
25
+ urls.append(data["urls"]["regular"])
26
+ elif isinstance(data, list) and len(data) > 0:
27
+ urls.append(data[0]["urls"]["regular"])
28
+ return urls
29
+
30
+ def generate_video_file(script: str, duration: int = None) -> str:
31
+ os.makedirs("generated/video", exist_ok=True)
32
+ os.makedirs("generated/audio", exist_ok=True)
33
+ os.makedirs("generated/tmp", exist_ok=True)
34
+
35
+ video_filename = f"video_{uuid.uuid4().hex}.mp4"
36
+ video_path = os.path.join("generated/video", video_filename)
37
+ audio_path = f"generated/audio/audio_{uuid.uuid4().hex}.mp3"
38
+
39
+ # Step 1: Generate audio
40
+ tts = gTTS(text=script, lang='en')
41
+ tts.save(audio_path)
42
+
43
+ # Get audio duration (fallback if 0)
44
+ audio = MP3(audio_path)
45
+ audio_duration = max(audio.info.length, 3.0) # ensure at least 3s
46
+
47
+ # Step 2: Fetch Unsplash images
48
+ images = fetch_unsplash_images(script, count=3)
49
+ if not images:
50
+ raise Exception("No images found from Unsplash for the prompt")
51
+
52
+ # Step 3: Create slideshow clips
53
+ clips = []
54
+ per_image_duration = audio_duration / len(images)
55
+ tmp_files = []
56
+
57
+ for url in images:
58
+ img_data = requests.get(url).content
59
+ tmp_file = f"generated/tmp/tmp_{uuid.uuid4().hex}.jpg"
60
+ tmp_files.append(tmp_file)
61
+
62
+ with open(tmp_file, "wb") as f:
63
+ f.write(img_data)
64
+
65
+ clip = ImageClip(tmp_file).resize(height=720).set_duration(per_image_duration)
66
+ clips.append(clip)
67
+
68
+ # Step 4: Concatenate without negative padding
69
+ final_clip = concatenate_videoclips(clips, method="compose")
70
+
71
+ # Step 5: Force duration to match audio
72
+ final_clip = final_clip.set_duration(audio_duration)
73
+
74
+ # Step 6: Add audio
75
+ final_clip = final_clip.set_audio(AudioFileClip(audio_path))
76
+
77
+ # Step 7: Export video
78
+ final_clip.write_videofile(
79
+ video_path,
80
+ fps=24,
81
+ codec="libx264",
82
+ audio_codec="aac",
83
+ threads=4,
84
+ preset="ultrafast"
85
+ )
86
+
87
+ # Cleanup
88
+ for file in tmp_files:
89
+ try:
90
+ os.remove(file)
91
+ except:
92
+ pass
93
+
94
+ return video_filename
assets/default.jpg ADDED
assets/logo_watermark.png ADDED

Git LFS Details

  • SHA256: a7a35246c3c7c6df8cf325d6a1ea7f45e4ce0899303cc68a3771f8193cff0c35
  • Pointer size: 129 Bytes
  • Size of remote file: 5.24 kB
backend/media_gen.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ✅ Updated media_gen.py with file logging + UI debug toggle
2
+ import os
3
+ import re
4
+ import logging
5
+ import streamlit as st
6
+ import requests
7
+ from PIL import Image, UnidentifiedImageError
8
+ from io import BytesIO
9
+ from dotenv import load_dotenv
10
+ from moviepy.editor import ImageClip, AudioFileClip
11
+ from elevenlabs import generate, save, set_api_key
12
+ from googletrans import Translator
13
+ from PIL import ImageEnhance, Image
14
+ import tempfile
15
+
16
+ # Load env vars
17
+ load_dotenv()
18
+
19
+ # Logging setup
20
+ logging.basicConfig(
21
+ filename="app.log",
22
+ level=logging.INFO,
23
+ format="%(asctime)s [%(levelname)s] %(message)s",
24
+ )
25
+
26
+ # Constants
27
+ OUTPUT_DIR = "outputs"
28
+ DEFAULT_IMAGE = "assets/fallback.jpg"
29
+ WATERMARK_PATH = "assets/logo_watermark.png"
30
+ UNSPLASH_ACCESS_KEY = os.getenv("UNSPLASH_ACCESS_KEY")
31
+
32
+ os.makedirs("outputs/audio", exist_ok=True)
33
+ os.makedirs("outputs/images", exist_ok=True)
34
+ os.makedirs("outputs/videos", exist_ok=True)
35
+
36
+ def translate_text(text, target_lang):
37
+ return Translator().translate(text, dest=target_lang).text
38
+
39
+ def sanitize_filename(text):
40
+ return re.sub(r'\W+', '_', text).lower()[:50]
41
+
42
+ def apply_watermark(image_path, watermark_path=WATERMARK_PATH):
43
+ try:
44
+ base = Image.open(image_path).convert("RGBA")
45
+ watermark = Image.open(watermark_path).convert("RGBA").resize((100, 100))
46
+ base.paste(watermark, (base.width - 110, base.height - 110), watermark)
47
+ base.convert("RGB").save(image_path)
48
+ except Exception as e:
49
+ logging.error(f"Watermarking failed: {e}")
50
+ st.write(f"❌ Watermarking failed: {e}")
51
+
52
+ def use_fallback_image(prompt, add_watermark=False):
53
+ try:
54
+ fallback_path = DEFAULT_IMAGE
55
+ output_path = f"outputs/images/{sanitize_filename(prompt)}.jpg"
56
+ with Image.open(fallback_path) as img:
57
+ img.save(output_path)
58
+ if add_watermark:
59
+ apply_watermark(output_path)
60
+ return output_path
61
+ except UnidentifiedImageError:
62
+ logging.error("Could not open fallback image.")
63
+ st.write("❌ Could not open fallback image.")
64
+ return None
65
+
66
+ def generate_gtts_fallback(prompt, output_path, lang="en", debug_mode=False):
67
+ try:
68
+ from gtts import gTTS
69
+ tts = gTTS(text=prompt, lang=lang)
70
+ tts.save(output_path)
71
+ logging.info(f"gTTS fallback audio saved to {output_path}")
72
+ if debug_mode:
73
+ st.write(f"✅ Fallback audio (gTTS) saved to {output_path}")
74
+ return output_path
75
+ except Exception as e:
76
+ logging.error(f"gTTS fallback failed: {e}")
77
+ st.write(f"❌ gTTS fallback failed: {str(e)}")
78
+ return None
79
+
80
+
81
+ def generate_image(prompt, file_tag, add_watermark=False, dark_mode=False, debug_mode=False):
82
+ try:
83
+ # Enhance prompt if dark mode is enabled
84
+ if dark_mode:
85
+ prompt += " at night, dark theme, low light, moody lighting"
86
+
87
+ url = f"https://api.unsplash.com/photos/random?query={requests.utils.quote(prompt)}&client_id={UNSPLASH_ACCESS_KEY}"
88
+ response = requests.get(url, timeout=10)
89
+ response.raise_for_status()
90
+ image_url = response.json()["urls"]["regular"]
91
+ image_response = requests.get(image_url, timeout=10)
92
+ image_response.raise_for_status()
93
+
94
+ output_path = f"outputs/images/{sanitize_filename(prompt)}.jpg"
95
+ img = Image.open(BytesIO(image_response.content))
96
+ img.convert("RGB").save(output_path)
97
+
98
+ if add_watermark:
99
+ apply_watermark(output_path)
100
+
101
+ return output_path
102
+
103
+ except Exception as e:
104
+ logging.error(f"Image generation failed: {e}")
105
+ st.write("🔁 Unsplash failed. Using fallback.")
106
+ st.write(f"❌ Image generation failed: {e}")
107
+ return use_fallback_image(prompt, add_watermark=add_watermark)
108
+
109
+
110
+ # ✅ Updated generate_audio with proper language handling
111
+
112
+ def generate_audio(prompt, output_path, debug_mode=False, lang="en"):
113
+ try:
114
+ api_key = os.getenv("ELEVEN_API_KEY") or st.secrets.get("ELEVEN_API_KEY", None)
115
+
116
+ # Use gTTS for non-English languages
117
+ if lang != "en":
118
+ if debug_mode:
119
+ st.write(f"🌐 Non-English language selected: {lang}. Using gTTS.")
120
+ return generate_gtts_fallback(prompt, output_path, lang=lang, debug_mode=debug_mode)
121
+
122
+ if api_key:
123
+ if debug_mode:
124
+ st.write(f"✅ ELEVEN_API_KEY loaded: {api_key[:4]}...****")
125
+
126
+ set_api_key(api_key)
127
+ if debug_mode:
128
+ st.write(f"🎧 Generating audio for prompt: {prompt}")
129
+
130
+ try:
131
+ audio = generate(text=prompt, voice="Aria", model="eleven_monolingual_v1")
132
+ save(audio, output_path)
133
+ logging.info(f"Audio saved successfully to {output_path}")
134
+
135
+ if debug_mode:
136
+ st.write(f"🔍 File exists after save? {os.path.exists(output_path)}")
137
+ st.write(f"✅ Audio saved successfully to {output_path}")
138
+ return output_path
139
+
140
+ except Exception as e:
141
+ logging.warning(f"ElevenLabs failed: {e}")
142
+ if debug_mode:
143
+ st.write(f"⚠️ ElevenLabs failed: {str(e)}")
144
+ st.write("🔁 Falling back to gTTS...")
145
+ return generate_gtts_fallback(prompt, output_path, lang=lang, debug_mode=debug_mode)
146
+
147
+ else:
148
+ logging.warning("ELEVEN_API_KEY not found")
149
+ if debug_mode:
150
+ st.write("❌ ELEVEN_API_KEY not found. Falling back to gTTS.")
151
+ return generate_gtts_fallback(prompt, output_path, lang=lang, debug_mode=debug_mode)
152
+
153
+ except Exception as e:
154
+ logging.error(f"Exception during audio generation setup: {e}")
155
+ if debug_mode:
156
+ st.write(f"❌ Exception during audio generation setup: {str(e)}")
157
+ st.write("🔁 Falling back to gTTS...")
158
+ return generate_gtts_fallback(prompt, output_path, lang=lang, debug_mode=debug_mode)
159
+
160
+
161
+ def generate_video(prompt, image_path, audio_path, output_path, add_watermark=False, dark_mode=False):
162
+ try:
163
+ # If dark_mode, darken the image temporarily
164
+ if dark_mode:
165
+ with Image.open(image_path) as img:
166
+ enhancer = ImageEnhance.Brightness(img)
167
+ darker_img = enhancer.enhance(0.5) # Reduce brightness to 50%
168
+
169
+ # Save to a temporary file
170
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
171
+ temp_image_path = tmp.name
172
+ darker_img.save(temp_image_path)
173
+ image_path = temp_image_path
174
+
175
+ audio_clip = AudioFileClip(audio_path)
176
+ image_clip = ImageClip(image_path).set_duration(audio_clip.duration).resize(height=720)
177
+ video = image_clip.set_audio(audio_clip)
178
+
179
+ output_path = f"outputs/videos/{sanitize_filename(prompt)}.mp4"
180
+ video.write_videofile(output_path, fps=24, codec="libx264", audio_codec="aac", verbose=False, logger=None)
181
+ return output_path
182
+
183
+ except Exception as e:
184
+ logging.error(f"Video generation failed: {e}")
185
+ st.write(f"❌ Video generation failed: {e}")
186
+ return None
187
+
backend/subtitle_utils.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip
2
+ from moviepy.video.tools.subtitles import SubtitlesClip
3
+ import srt
4
+ from datetime import timedelta
5
+ import os
6
+
7
+
8
+ def export_srt(text, duration=10, words_per_caption=6, output_path="output.srt"):
9
+ """
10
+ Converts text into SRT subtitles and saves to output_path.
11
+ """
12
+ lines = []
13
+ words = text.split()
14
+ start = 0
15
+ index = 1
16
+ while start < len(words):
17
+ end = start + words_per_caption
18
+ chunk = words[start:end]
19
+ content = " ".join(chunk)
20
+ start_time = timedelta(seconds=(index - 1) * duration)
21
+ end_time = timedelta(seconds=index * duration)
22
+ sub = srt.Subtitle(index=index, start=start_time, end=end_time, content=content)
23
+ lines.append(sub)
24
+ start += words_per_caption
25
+ index += 1
26
+
27
+ srt_data = srt.compose(lines)
28
+ with open(output_path, "w", encoding="utf-8") as f:
29
+ f.write(srt_data)
30
+ return output_path
31
+
32
+
33
+ def add_subtitles_and_bgm(
34
+ video_path,
35
+ srt_path,
36
+ bgm_path,
37
+ output_path="enhanced_output.mp4",
38
+ font="Arial-Bold",
39
+ font_size=36,
40
+ font_color="white",
41
+ subtitle_position=("center", "bottom")
42
+ ):
43
+ """
44
+ Adds subtitles from .srt and background music to the given video.
45
+ """
46
+ # Load video
47
+ video = VideoFileClip(video_path)
48
+
49
+ # Parse .srt file
50
+ with open(srt_path, "r", encoding="utf-8") as f:
51
+ subtitles = list(srt.parse(f.read()))
52
+
53
+ # Create subtitle clips
54
+ def make_textclip(txt):
55
+ return TextClip(txt, font=font, fontsize=font_size, color=font_color, stroke_color='black', stroke_width=2)
56
+
57
+ subtitle_clips = []
58
+ for sub in subtitles:
59
+ txt_clip = (make_textclip(sub.content)
60
+ .set_position(subtitle_position)
61
+ .set_start(sub.start.total_seconds())
62
+ .set_duration((sub.end - sub.start).total_seconds()))
63
+ subtitle_clips.append(txt_clip)
64
+
65
+ # Background music
66
+ if os.path.exists(bgm_path):
67
+ bgm = AudioFileClip(bgm_path).volumex(0.2) # reduce volume
68
+ bgm = bgm.set_duration(video.duration)
69
+ final_audio = video.audio.volumex(0.8).audio_fadein(1).audio_fadeout(1).set_duration(video.duration)
70
+ final_audio = final_audio.set_audio(bgm)
71
+ else:
72
+ final_audio = video.audio
73
+
74
+ final = CompositeVideoClip([video, *subtitle_clips])
75
+ final = final.set_audio(final_audio)
76
+
77
+ # Export final video
78
+ final.write_videofile(output_path, codec="libx264", audio_codec="aac", fps=video.fps)
79
+ return output_path
80
+
81
+
82
+ # Aliases for compatibility with streamlit_ui.py
83
+ generate_srt_from_text = export_srt
84
+ enhance_video_with_subtitles_and_bgm = add_subtitles_and_bgm
default_bgm.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04217a47ea351a5332f063822e233536c99b5c99fbbac8f806e9ea2470249327
3
+ size 160514
docs/index.html ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
6
+ <meta name="description" content="OSN Media Generator - Built by O.S.Narayana. Create media content using AI with no ads or in-app purchases."/>
7
+ <title>OSN Media Generator</title>
8
+ <style>
9
+ body {
10
+ font-family: Arial, sans-serif;
11
+ background-color: #f3f4f6;
12
+ color: #111827;
13
+ text-align: center;
14
+ padding: 50px 20px;
15
+ }
16
+ .container {
17
+ max-width: 600px;
18
+ margin: auto;
19
+ background: #ffffff;
20
+ padding: 30px;
21
+ border-radius: 12px;
22
+ box-shadow: 0 4px 12px rgba(0,0,0,0.1);
23
+ }
24
+ h1 {
25
+ color: #1f2937;
26
+ }
27
+ a {
28
+ color: #2563eb;
29
+ text-decoration: none;
30
+ }
31
+ a:hover {
32
+ text-decoration: underline;
33
+ }
34
+ .footer {
35
+ margin-top: 40px;
36
+ font-size: 0.9em;
37
+ color: #6b7280;
38
+ }
39
+ </style>
40
+ </head>
41
+ <body>
42
+ <div class="container">
43
+ <h1>🎮 OSN Media Generator</h1>
44
+ <p>
45
+ Welcome to the official homepage of <strong>OSN Media Generator</strong>, created by <strong>O.S.Narayana</strong>.
46
+ </p>
47
+ <p>
48
+ This app helps you generate audio, images, and videos using AI tools like ElevenLabs, Unsplash, and Streamlit.
49
+ </p>
50
+ <p>
51
+ 👉 Try the app now: <br>
52
+ <a href="https://osnarayana-media-generator.streamlit.app/" target="_blank">Launch OSN Media Generator</a>
53
+ </p>
54
+ <p>
55
+ 📄 View our <a href="privacy.html" target="_blank">Privacy Policy</a>
56
+ </p>
57
+ <div class="footer">
58
+ &copy; 2025 OSN Media. Built with 💗 by O.S.Narayana.
59
+ </div>
60
+ </div>
61
+ </body>
62
+ </html>
docs/privacy.html ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Privacy Policy - OSN Media Generator</title>
7
+ <meta http-equiv="Cache-Control" content="no-cache, no-store, must-revalidate" />
8
+ <meta http-equiv="Pragma" content="no-cache" />
9
+ <meta http-equiv="Expires" content="0" />
10
+ <style>
11
+ body {
12
+ font-family: Arial, sans-serif;
13
+ margin: 2rem;
14
+ padding: 0;
15
+ background-color: #f9f9f9;
16
+ color: #333;
17
+ }
18
+ h1, h2, h3 { color: #222; }
19
+ a { color: #1a73e8; text-decoration: none; }
20
+ a:hover { text-decoration: underline; }
21
+ code { background-color: #eee; padding: 2px 4px; border-radius: 4px; }
22
+
23
+ </style>
24
+ </head>
25
+ <body>
26
+ <h1>🔐 Privacy Policy for OSN Media Generator</h1>
27
+
28
+ <p>Effective Date: July 19, 2025</p>
29
+
30
+ <p>Your privacy is important to us. This app is built to prioritize user data security and transparency.</p>
31
+
32
+ <h2>1. What We Collect</h2>
33
+
34
+ <ul>
35
+ <li><strong>No personal data</strong> is collected, stored, or shared by this application.</li>
36
+ <li>All prompts, audio, and media are <strong>processed locally</strong> or via <strong>user-provided API keys</strong> to external services (e.g., ElevenLabs, Unsplash).</li>
37
+ </ul>
38
+
39
+ <h2>2. API Usage</h2>
40
+
41
+ <ul>
42
+ <li>If you use external services (like ElevenLabs or Unsplash), you are subject to their respective <a href="https://www.elevenlabs.io/terms">Terms of Service</a> and <a href="https://www.elevenlabs.io/privacy">Privacy Policies</a>.</li>
43
+ <li>Your API keys are stored <strong>locally</strong> in your environment file (<code>.env</code>) and never uploaded.</li>
44
+ </ul>
45
+
46
+ <h2>3. Data Storage</h2>
47
+
48
+ <ul>
49
+ <li>Generated images, audio, and videos are stored <strong>only on your local machine</strong> under the <code>outputs/</code> directory.</li>
50
+ <li>You can delete any generated content at your discretion.</li>
51
+ </ul>
52
+
53
+ <h2>4. Analytics &amp; Ads</h2>
54
+
55
+ <ul>
56
+ <li>This app contains <strong>no ads</strong>, <strong>no tracking</strong>, and <strong>no analytics</strong>.</li>
57
+ </ul>
58
+
59
+ <h2>5. Changes to Policy</h2>
60
+
61
+ <p>Any updates to this policy will be reflected in this file on the <a href="https://github.com/sivaogeti/osnarayana-media-generator">GitHub repository</a>.</p>
62
+
63
+ <h2>6. Contact</h2>
64
+
65
+ <p>For questions or issues, contact: <strong>[email protected]</strong></p>
66
+
67
+ </body>
68
+ </html>
fly.toml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ app = "media-gen-api"
2
+
3
+ [build]
4
+ image = "tiangolo/uvicorn-gunicorn-fastapi:python3.10"
5
+
6
+ [env]
7
+ PYTHONUNBUFFERED = "1"
8
+
9
+ [[services]]
10
+ internal_port = 80
11
+ protocol = "tcp"
12
+
13
+ [[services.ports]]
14
+ handlers = ["http"]
15
+ port = 80
16
+
17
+ [[services.ports]]
18
+ handlers = ["tls", "http"]
19
+ port = 443
20
+
21
+ [[services.tcp_checks]]
22
+ interval = "15s"
23
+ timeout = "2s"
24
+ grace_period = "5s"
25
+ restart_limit = 0
generated_audio/audio_12aa4a7f8c6b4e459e88fd7bfa8559fe.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:343053419eba6ee94bd9d7fd54631336e97b693dea9775f2d57c6f342953cefb
3
+ size 9024
generated_audio/audio_2084def7399c47fc81f1ea75a2bb38df.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:343053419eba6ee94bd9d7fd54631336e97b693dea9775f2d57c6f342953cefb
3
+ size 9024