sagarkariya's picture
changed model codellama to code t5
1f17a03 verified
raw
history blame
2.69 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import faiss
import os
import numpy as np
from huggingface_hub import login
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
hf_token = os.getenv("PROJECT_TEST_TOKEN")
if hf_token:
login(hf_token)
else:
st.error("Token not found")
st.stop()
# Load code T5 model
tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("Salesforce/codet5-base")
# πŸ” Function to Generate Responses
def generate_response(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
output = model.generate(**inputs, max_length=300)
return tokenizer.decode(output[0], skip_special_tokens=True)
# πŸ” FAISS Index Creation
def create_faiss_index(texts):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
embedding_vectors = embeddings.embed_documents(texts)
faiss_index = faiss.IndexFlatL2(len(embedding_vectors[0]))
faiss_index.add(np.array(embedding_vectors))
return faiss_index
# πŸ“‚ Function to Parse Logs
def parse_test_log(file):
try:
log_content = file.read().decode("utf-8")
return log_content.splitlines() # Split logs into lines
except Exception as e:
st.error(f"Error reading file: {e}")
return []
# 🌟 Streamlit UI
st.title("πŸ” Test Log Failure Analysis with CodeLlama 3B")
# πŸ“‚ File Upload
uploaded_file = st.file_uploader("Upload test log (txt, json, xml, html)", type=["txt", "json", "xml", "html"])
if uploaded_file is not None:
st.info("Processing the test log file...")
test_log_lines = parse_test_log(uploaded_file)
# πŸ” Create FAISS Index
st.info("Indexing log data...")
faiss_index = create_faiss_index(test_log_lines)
# ❓ User Query
question = st.text_input("Ask a question about the test failures:")
if question:
# πŸ” Retrieve Similar Logs from FAISS
query_embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2").embed_query(question)
_, I = faiss_index.search(np.array([query_embedding]), k=5) # Retrieve top 5 results
# πŸ“ Combine the most relevant logs
context = "\n".join([test_log_lines[i] for i in I[0]])
# πŸ€– Generate Answer with CodeLlama
prompt = f"Given the following test logs:\n{context}\n\nAnswer: {question}"
answer = generate_response(prompt)
st.subheader("Analysis Result:")
st.write(answer)
else:
st.info("Upload a test log file to begin analysis.")