Spaces:

AItool
/

ServerlessInferenceAPI

Runtime error

AItool commited on Nov 28, 2024

Commit

5b7582d

verified ·

1 Parent(s): 5ac81fe

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,18 +13,6 @@ api_key = os.getenv("HF_TOKEN")
 # Instantiate the InferenceClient
 client = InferenceClient(api_key=api_key)
-# Function to simulate some process and return the elapsed time
-def process_with_timing():
-    start_time = time.time()
-    # Simulate a process with sleep
-    #time.sleep(2.345)
-    # Change this value to simulate different processing times
-    end_time = time.time()
-    elapsed_time = end_time - start_time
-    minutes, seconds = divmod(elapsed_time, 60)
-    milliseconds = (seconds - int(seconds)) * 1000
-    return minutes, int(seconds), milliseconds
 # Streamlit app title
 st.title("Text-generation model using Streamlit from Inference API (serverless) feature.")
@@ -51,10 +39,10 @@ if submitted:
     messages = [
         {"role": "user", "content": text}
     ]
-    # Call the function to get the elapsed time
-    minutes, seconds, milliseconds = process_with_timing()
     # Create a new stream for each submission
     stream = client.chat.completions.create(
         model=selected_model,
@@ -70,7 +58,15 @@ if submitted:
     # Concatenate chunks to form the full response
     for chunk in stream:
         full_text += chunk.choices[0].delta.content
     # Update session state with the full response
     st.session_state["full_text"] = full_text

 # Instantiate the InferenceClient
 client = InferenceClient(api_key=api_key)
 # Streamlit app title
 st.title("Text-generation model using Streamlit from Inference API (serverless) feature.")
     messages = [
         {"role": "user", "content": text}
     ]
+    # Start timing
+    start_time = time.time()
     # Create a new stream for each submission
     stream = client.chat.completions.create(
         model=selected_model,
     # Concatenate chunks to form the full response
     for chunk in stream:
         full_text += chunk.choices[0].delta.content
+    # End timing
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    # Calculate minutes, seconds, and milliseconds
+    minutes, seconds = divmod(elapsed_time, 60)
+    milliseconds = (seconds - int(seconds)) * 1000
     # Update session state with the full response
     st.session_state["full_text"] = full_text