Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -356,7 +356,6 @@ if __name__ == "__main__":
|
|
356 |
|
357 |
'''
|
358 |
|
359 |
-
|
360 |
import os
|
361 |
import gradio as gr
|
362 |
import requests
|
@@ -503,6 +502,21 @@ def get_retrieved_context(query: str, top_k=3):
|
|
503 |
return "
|
504 |
".join(context_parts)
|
505 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
506 |
# ----------- 5. LLM Chain Setup (Lightning AI generator) -----------
|
507 |
model = LitServeLLM(endpoint_url=LITSERVE_ENDPOINT)
|
508 |
|
@@ -764,4 +778,3 @@ if __name__ == "__main__":
|
|
764 |
})
|
765 |
|
766 |
demo.launch()
|
767 |
-
|
|
|
356 |
|
357 |
'''
|
358 |
|
|
|
359 |
import os
|
360 |
import gradio as gr
|
361 |
import requests
|
|
|
502 |
return "
|
503 |
".join(context_parts)
|
504 |
|
505 |
+
with mlflow.start_span("pinecone_query"):
|
506 |
+
start_time = time.time()
|
507 |
+
results = index.query(
|
508 |
+
namespace="rag-ns",
|
509 |
+
vector=query_embedding,
|
510 |
+
top_k=top_k,
|
511 |
+
include_metadata=True
|
512 |
+
)
|
513 |
+
mlflow.log_metric("pinecone_latency", time.time() - start_time)
|
514 |
+
mlflow.log_metric("retrieved_chunks", len(results['matches']))
|
515 |
+
|
516 |
+
context_parts = [match['metadata']['text'] for match in results['matches']]
|
517 |
+
return "
|
518 |
+
".join(context_parts)
|
519 |
+
|
520 |
# ----------- 5. LLM Chain Setup (Lightning AI generator) -----------
|
521 |
model = LitServeLLM(endpoint_url=LITSERVE_ENDPOINT)
|
522 |
|
|
|
778 |
})
|
779 |
|
780 |
demo.launch()
|
|