import av import numpy as np import torch import streamlit as st from PIL import Image from torchvision.transforms import Resize, ToTensor, Compose from transformers import AutoImageProcessor, AutoTokenizer, VisionEncoderDecoderModel from diffusers import StableAudioPipeline import soundfile as sf import stripe import time # For simulating processing time import firebase_admin from firebase_admin import credentials, auth from huggingface_hub import login import os import json # Firebase JSON içeriğini Hugging Face Secrets'ten al firebase_json_string = os.getenv("firebase") firebase_credentials = json.loads(firebase_json_string) hf_token = os.getenv("token1") # Secret Manager'da tanımladığınız isim login(hf_token) # Firebase Admin SDK'yı başlatma if not firebase_admin._apps: cred = credentials.Certificate(firebase_credentials) firebase_admin.initialize_app(cred) # ----------------------------- # 1. Device Configuration # ----------------------------- device = "cuda" if torch.cuda.is_available() else "cpu" # ----------------------------- # 2. Streamlit Page Configuration and Styling # ----------------------------- st.set_page_config(page_title="SoundScene.ai", page_icon=":musical_note:", layout="wide") # Dark theme and custom styles st.markdown( """ """, unsafe_allow_html=True, ) # ----------------------------- # 4. Stripe Configuration # ----------------------------- # Replace with your actual Stripe secret key stripe.api_key = "YOUR_STRIPE_SECRET_KEY" # ----------------------------- # 5. Session State Initialization # ----------------------------- # Initialize session states if "user" not in st.session_state: st.session_state.user = None if "auth_mode" not in st.session_state: st.session_state.auth_mode = "Login" # Default mode must match radio option exactly if "conversion_count" not in st.session_state: st.session_state.conversion_count = 0 if "subscribed" not in st.session_state: st.session_state.subscribed = False # Authentication logic def login_user(email, password): try: user = auth.get_user_by_email(email) # Note: Firebase Admin SDK does not handle password validation directly st.session_state.user = {"uid": user.uid, "email": user.email} st.success("Successfully logged in!") except Exception as e: st.error(f"Login failed: {str(e)}") def register_user(email, password): try: user = auth.create_user(email=email, password=password) st.success("Registration successful! You can now log in.") st.session_state.auth_mode = "Login" # Switch to login after registration except Exception as e: st.error(f"Registration failed: {str(e)}") # Authentication interface if st.session_state.user is None: #st.sidebar.title("Welcome to SoundScene.ai") st.sidebar.subheader("Please authenticate to use the application.") # Toggle between Login and Register auth_mode = st.sidebar.radio( "Select an option", ("Login", "Register"), key="auth_mode" ) email = st.sidebar.text_input("Email", key="sidebar_email") password = st.sidebar.text_input("Password", type="password", key="sidebar_password") if auth_mode == "Login": if st.sidebar.button("Log In"): login_user(email, password) elif auth_mode == "Register": if st.sidebar.button("Register"): register_user(email, password) # Main app (accessible regardless of login status) #st.title("Welcome to SoundScene.ai") if st.session_state.user: st.sidebar.success(f"Logged in as: {st.session_state.user['email']}") if st.sidebar.button("Log Out"): #st.session_state.clear() #st.experimental_rerun() # Refresh app to show login screen st.session_state.user = None @st.cache_resource def load_models(): audio_pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0",use_auth_token=hf_token, torch_dtype=torch.float16) audio_pipe = audio_pipe.to(device) image_processor = AutoImageProcessor.from_pretrained("MCG-NJU/videomae-base") tokenizer = AutoTokenizer.from_pretrained("gpt2") video_model = VisionEncoderDecoderModel.from_pretrained("Neleac/timesformer-gpt2-video-captioning").to(device) return image_processor, tokenizer, video_model, audio_pipe image_processor, tokenizer, video_model, audio_pipe = load_models() # ----------------------------- # 7. Preprocessing Functions # ----------------------------- # Görüntüleri yeniden boyutlandıran bir işlev def preprocess_frame(frame, target_size=(224, 224)): """ Preprocess a single video frame for model input. Args: frame (np.ndarray): Video frame. target_size (tuple): Desired size (height, width). Returns: PIL.Image: Preprocessed frame image. """ # Ensure the frame is in uint8 format if frame.dtype != np.uint8: frame = np.clip(frame, 0, 255).astype(np.uint8) # Check frame dimensions if frame.ndim != 3 or frame.shape[2] != 3: raise ValueError(f"Expected frame with 3 channels, got shape {frame.shape}") transform = Resize(target_size) frame_image = Image.fromarray(frame) frame_image = transform(frame_image) # Resize the image return frame_image # Returns PIL.Image def preprocess_frames(frames, target_size=(224, 224), num_frames=8): """ Preprocess multiple video frames for model input. Args: frames (list of np.ndarray): Video frames. target_size (tuple): Desired size (height, width). num_frames (int): Number of frames to process. Returns: list of PIL.Image: List of preprocessed frame images. """ processed_frames = [] for idx, frame in enumerate(frames[:num_frames]): try: img = preprocess_frame(frame, target_size) # Debugging: Display frame properties st.write(f"Frame {idx + 1}: size={img.size}, mode={img.mode}") processed_frames.append(img) except Exception as e: st.warning(f"Skipping frame {idx + 1} due to error: {e}") return processed_frames # with st.sidebar: # st.title("Login") # email = st.text_input("Email", key="login_email") # password = st.text_input("Password", type="password", key="login_password") # login_btn = st.button("Login") # logout_btn = st.button("Logout") # if login_btn: # try: # user = auth.sign_in_with_email_and_password(email, password) # st.session_state.user = user # st.success("Logged in successfully!") # except: # st.error("Invalid login credentials.") # elif logout_btn: # st.session_state.user = None # st.success("Logged out successfully!") # if st.session_state.user: # st.sidebar.write(f"Welcome, {st.session_state.user['email']}!") # ----------------------------- # 9. Navigation Menu # ----------------------------- with st.sidebar: st.markdown("---") page = st.selectbox("Menu", ["Home", "About", "Pricing", "Contact"]) # ----------------------------- # 10. Membership Control and Payment Integration # ----------------------------- def handle_subscription(): st.warning("Subscription service is currently not available.") # ----------------------------- # 11. Main Content Rendering Based on Navigation # ----------------------------- if page == "Home": # ----------------------------- # 12. Home Page Content # ----------------------------- st.title("Welcome to SoundScene.ai") st.markdown( """

Transform your videos into stunning soundscapes with advanced AI models. Learn more about our features and pricing options.

""", unsafe_allow_html=True ) st.header("Create Audio") creation_method = st.radio( "Choose how you want to create audio:", ("Upload Video", "Input Comment"), horizontal=True ) # Initialize variables caption = "" audio_path = "" audio_duration = 0 # ----------------------------- # 13. Audio Creation via Comment # ----------------------------- if creation_method == "Input Comment": st.subheader("Create Audio via Comment") user_comment = st.text_input("Enter your comment here:") # Sound Duration Input default_duration = 10 # Default to 10 seconds max_duration = 60 # Maximum allowed duration user_duration = st.number_input( "Select sound duration (seconds):", min_value=1, max_value=max_duration, value=default_duration, step=1 ) if st.button("Generate Sound"): if not user_comment: st.error("Please enter a comment to generate sound.") else: try: # Initialize progress bar progress_bar = st.progress(0) progress_text = st.empty() with st.spinner("Generating sound from your comment..."): # Step 1: Generating audio progress_text.text("Generating audio...") # Simulate processing time time.sleep(1) progress_bar.progress(20) # Generate audio based on comment negative_prompt = "Low quality." generator = torch.Generator(device).manual_seed(0) audio = audio_pipe( prompt=user_comment, negative_prompt=negative_prompt, num_inference_steps=50, audio_end_in_s=user_duration, num_waveforms_per_prompt=1, generator=generator, ).audios progress_bar.progress(60) progress_text.text("Finalizing audio...") # Save audio output = audio[0].T.float().cpu().numpy() audio_path = "generated_audio.wav" sf.write(audio_path, output, audio_pipe.vae.sampling_rate) progress_bar.progress(80) progress_text.text("Almost done...") # Update conversion count st.session_state.conversion_count += 1 # Finalize progress bar progress_bar.progress(100) progress_text.text("Audio generation complete!") # Display audio st.audio(audio_path) # Download button logic if st.session_state.user: if st.session_state.subscribed or st.session_state.conversion_count <= 10: st.download_button("Download Sound", audio_path, file_name="soundscene_output.wav") else: st.warning( "You have reached the download limit. Please subscribe for unlimited downloads.") else: st.warning("Please log in to download the generated sound.") # Clear progress text after a short delay time.sleep(1) progress_text.empty() progress_bar.empty() except Exception as e: st.error(f"An error occurred while generating audio: {e}") # ----------------------------- # 14. Audio Creation via Video Upload # ----------------------------- elif creation_method == "Upload Video": st.subheader("Create Audio via Video Upload") uploaded_video = st.file_uploader("Upload a video", type=["mp4", "avi", "mov"]) if uploaded_video: try: # Open the video using PyAV container = av.open(uploaded_video) video_stream = container.streams.video[0] video_duration = float(container.duration * video_stream.time_base) st.write(f"Video Duration: {video_duration:.2f} seconds") # Sound Duration Selection sound_duration = st.slider( "Select sound duration (seconds):", min_value=1, max_value=10, value=int(video_duration), step=1 ) if st.button("Process Video"): try: # Initialize progress bar progress_bar = st.progress(0) progress_text = st.empty() with st.spinner("Processing your video..."): # Step 1: Extracting frames progress_text.text("Extracting frames from video...") seg_len = video_stream.frames clip_len = video_model.config.encoder.num_frames if clip_len > seg_len: st.warning(f"Video has only {seg_len} frames, but the model expects {clip_len} frames.") clip_len = seg_len # Simulate processing time time.sleep(1) progress_bar.progress(10) # Select evenly spaced frame indices indices = set(np.linspace(0, seg_len - 1, num=clip_len, endpoint=True).astype(np.int64)) frames = [] container.seek(0) for i, frame in enumerate(container.decode(video=0)): if i in indices: frames.append(frame.to_ndarray(format="rgb24")) if not frames: st.error("No frames were extracted from the video.") st.stop() progress_bar.progress(30) progress_text.text("Preprocessing frames...") # Preprocess frames processed_frames = preprocess_frames(frames, target_size=(224, 224), num_frames=clip_len) if not processed_frames: st.error("No valid frames to process after preprocessing.") st.stop() # Simulate processing time time.sleep(1) progress_bar.progress(50) # Step 2: Generating caption progress_text.text("Preprocessing the video...") # Ensure image_processor receives correct keyword argument pixel_values = image_processor(images=processed_frames, return_tensors="pt").pixel_values.to(device) gen_kwargs = {"min_length": 10, "max_length": 20, "num_beams": 8} tokens = video_model.generate(pixel_values, **gen_kwargs) caption = tokenizer.batch_decode(tokens, skip_special_tokens=True)[0] st.write(f"Generated Caption: {caption}") # Simulate processing time time.sleep(1) progress_bar.progress(70) if sound_duration > 10: sound_duration = 10 # Step 3: Generating audio based on caption progress_text.text("Generating sound based on the video...") negative_prompt = "Low quality." generator = torch.Generator(device).manual_seed(0) audio = audio_pipe( prompt=caption, negative_prompt=negative_prompt, num_inference_steps=50, audio_end_in_s=sound_duration, num_waveforms_per_prompt=1, generator=generator, ).audios progress_bar.progress(90) progress_text.text("Finalizing audio...") # Save audio output = audio[0].T.float().cpu().numpy() audio_path = "generated_audio.wav" sf.write(audio_path, output, audio_pipe.vae.sampling_rate) # Update conversion count st.session_state.conversion_count += 1 # Finalize progress bar progress_bar.progress(100) progress_text.text("Audio generation complete!") # Display audio st.audio(audio_path) # Download button logic if st.session_state.user: if st.session_state.subscribed or st.session_state.conversion_count <= 10: st.download_button("Download Sound", audio_path, file_name="soundscene_output.wav") else: st.warning( "You have reached the download limit. Please subscribe for unlimited downloads.") else: st.warning("Please log in to download the generated sound.") # Clear progress text and bar after a short delay time.sleep(1) progress_text.empty() progress_bar.empty() except Exception as e: st.error(f"An error occurred while processing the video: {e}") except Exception as e: st.error(f"Failed to open the video file: {e}") # ----------------------------- # 15. Conversion Count and Subscription Prompt # ----------------------------- if st.session_state.conversion_count >= 10 and not st.session_state.subscribed: st.warning("You have used your 10 free conversions. Please subscribe to process more content.") if st.button("Subscribe for $5/month (Currently not available)"): handle_subscription() elif page == "About": # ----------------------------- # 16. About Page Content # ----------------------------- st.title("About SoundScene.ai") st.markdown( """

SoundScene.ai leverages cutting-edge AI technologies to transform your visual content into immersive soundscapes. Whether you're a content creator, filmmaker, or enthusiast, our platform empowers you to add rich audio dimensions to your videos effortlessly.


Features:


Base Models From Huggingface:

  • Image Processing: MCG-NJU/videomae-base
  • Tokenizer: gpt2
  • Video Model: Neleac/timesformer-gpt2-video-captioning
  • Audio Model = stabilityai/stable-audio-open-1.0
  • stable-audio-open-1.0 SpaceTimeGPT """, unsafe_allow_html=True ) elif page == "Pricing": # ----------------------------- # 17. Pricing Page Content # ----------------------------- st.title("Pricing") st.markdown( """

    Choose a plan that fits your needs and start transforming your content today!

    """, unsafe_allow_html=True ) # Pricing Cards col1, col2, col3 = st.columns(3) with col1: st.markdown( """

    Free

    $0/month

    """, unsafe_allow_html=True ) with col2: st.markdown( """

    Pro

    $5/month (Currently not available)

    """, unsafe_allow_html=True ) with col3: st.markdown( """

    Enterprise

    Contact Us

    """, unsafe_allow_html=True ) elif page == "Contact": # ----------------------------- # 18. Contact Page Content # ----------------------------- st.title("Contact Us") st.markdown( """

    We'd love to hear from you! Whether you have a question about features, trials, pricing, or anything else, our team is ready to answer all your questions.

    """, unsafe_allow_html=True ) # Contact Form with st.form("contact_form"): name = st.text_input("Your Name") email = st.text_input("Your Email") message = st.text_area("Your Message") submitted = st.form_submit_button("Send") if submitted: if not name or not email or not message: st.error("Please fill out all fields.") else: # Here you can integrate with an email service or database to store the messages st.success("Thank you for reaching out! We'll get back to you shortly.") # Contact Details st.markdown("---") st.markdown( """

    Social Media Accounts

    Linkedin Medium """, unsafe_allow_html=True ) # ----------------------------- # 19. Conversion Count and Subscription Prompt (Moved to Home Page) # ----------------------------- # Note: This section is already handled within the "Home" page based on conversion count. # ----------------------------- # 20. Security and Best Practices Notes # ----------------------------- # Note: # - Replace placeholder API keys with secure methods (e.g., environment variables or Streamlit secrets). # - Ensure proper configuration of Firebase and Stripe if enabling authentication and payment features. # - Validate and sanitize all user inputs to enhance security. # - Customize the success_url and cancel_url in the Stripe checkout session to match your deployment URLs. # ----------------------------- # 21. Footer (Hidden) # ----------------------------- # Optional: Add a custom footer if desired st.markdown( """ """, unsafe_allow_html=True )