Spaces:

pratikshahp
/

openai-github-chat

Running

App Files Files Community

openai-github-chat / app.py

pratikshahp

Update app.py

bad09fd verified 2 months ago

raw

history blame contribute delete

4.53 kB

	import streamlit as st
	import os
	from github import Github
	from langchain_community.vectorstores import Chroma
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from openai import OpenAI
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()
	openai_api_key = os.getenv("OPENAI_API_KEY")

	# Function to fetch repository data from GitHub
	def fetch_github_repo_data(repo_name, github_token):
	"""Fetch all text content from a GitHub repository."""
	try:
	g = Github(github_token)
	repo = g.get_repo(repo_name)
	contents = repo.get_contents("")
	repo_data = ""

	while contents:
	file_content = contents.pop(0)
	if file_content.type == "dir":
	contents.extend(repo.get_contents(file_content.path))
	else:
	try:
	file_data = repo.get_contents(file_content.path).decoded_content
	text = file_data.decode("utf-8")
	repo_data += f"\n\nFile: {file_content.path}\n{text}"
	except UnicodeDecodeError:
	# Skip non-text files
	continue

	return repo_data
	except Exception as e:
	st.error(f"Error fetching GitHub repository data: {e}")
	return None

	# Function to generate a response using OpenAI
	def generate_response(context, question):
	"""Generate a response using OpenAI."""
	try:
	from openai import OpenAI

	client = OpenAI(api_key=openai_api_key)
	messages = [
	{"role": "system", "content": "You are an assistant that answers questions based on repository content."},
	{"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"}
	]
	response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=messages,
	max_tokens=150,
	)
	return response.choices[0].message.content.strip()
	except Exception as e:
	st.error(f"Error generating response: {e}")
	return None

	# Function to perform RAG using OpenAI and Chroma
	def perform_rag(repo_data, question):
	"""Perform retrieval-augmented generation using ChromaDB and OpenAI."""
	try:
	if not repo_data:
	st.warning("Repository data is empty.")
	return None

	# Create embeddings
	embeddings = HuggingFaceEmbeddings()

	# Split text into chunks
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000, chunk_overlap=20, length_function=len
	)
	chunks = text_splitter.create_documents([repo_data])

	# Store chunks in ChromaDB
	persist_directory = "github_repo_embeddings"
	vectordb = Chroma.from_documents(
	documents=chunks, embedding=embeddings, persist_directory=persist_directory
	)
	vectordb.persist()

	# Load persisted Chroma database
	vectordb = Chroma(
	persist_directory=persist_directory, embedding_function=embeddings
	)

	# Perform retrieval using Chroma
	docs = vectordb.similarity_search(question)
	if not docs:
	st.warning("No relevant documents found.")
	return None

	context = docs[0].page_content
	return generate_response(context, question)

	except Exception as e:
	st.error(f"Error performing RAG: {e}")
	return None

	# Streamlit application
	def main():
	st.title("Chat with GitHub Repository")
	st.caption("This app allows you to interact with a GitHub repository using OpenAI and ChromaDB.")

	# Get user inputs
	github_token = st.text_input("Enter your GitHub Token", type="password")
	git_repo = st.text_input("Enter the GitHub Repo (owner/repo)")

	if github_token and git_repo:
	repo_data = fetch_github_repo_data(git_repo, github_token)

	if repo_data:
	st.success(f"Successfully added {git_repo} to the knowledge base!")

	question = st.text_input("Ask any question about the repository")

	if question:
	answer = perform_rag(repo_data, question)

	if answer:
	st.subheader("Generated Answer:")
	st.write(answer)
	else:
	st.error("Failed to fetch repository data. Ensure the repository name and token are correct.")

	if __name__ == "__main__":
	main()