Spaces:

GotoUsuke
/

GraphRag

Sleeping

App Files Files Community

GraphRag / index_app.py

GotoUsuke

Upload folder using huggingface_hub

a65108d verified 4 months ago

raw

history blame contribute delete

35.7 kB

	import gradio as gr
	import requests
	import logging
	import os
	import json
	import shutil
	import glob
	import queue
	import lancedb
	from datetime import datetime
	from dotenv import load_dotenv, set_key
	import yaml
	import pandas as pd
	from typing import List, Optional
	from pydantic import BaseModel

	# Set up logging
	log_queue = queue.Queue()
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	load_dotenv('indexing/.env')

	API_BASE_URL = os.getenv('API_BASE_URL', 'http://localhost:8012')
	LLM_API_BASE = os.getenv('LLM_API_BASE', 'http://localhost:11434')
	EMBEDDINGS_API_BASE = os.getenv('EMBEDDINGS_API_BASE', 'http://localhost:11434')
	ROOT_DIR = os.getenv('ROOT_DIR', 'indexing')

	# Data models
	class IndexingRequest(BaseModel):
	llm_model: str
	embed_model: str
	llm_api_base: str
	embed_api_base: str
	root: str
	verbose: bool = False
	nocache: bool = False
	resume: Optional[str] = None
	reporter: str = "rich"
	emit: List[str] = ["parquet"]
	custom_args: Optional[str] = None

	class PromptTuneRequest(BaseModel):
	root: str = "./{ROOT_DIR}"
	domain: Optional[str] = None
	method: str = "random"
	limit: int = 15
	language: Optional[str] = None
	max_tokens: int = 2000
	chunk_size: int = 200
	no_entity_types: bool = False
	output: str = "./{ROOT_DIR}/prompts"

	class QueueHandler(logging.Handler):
	def __init__(self, log_queue):
	super().__init__()
	self.log_queue = log_queue

	def emit(self, record):
	self.log_queue.put(self.format(record))
	queue_handler = QueueHandler(log_queue)
	logging.getLogger().addHandler(queue_handler)


	def update_logs():
	logs = []
	while not log_queue.empty():
	logs.append(log_queue.get())
	return "\n".join(logs)

	##########SETTINGS################
	def load_settings():
	config_path = os.getenv('GRAPHRAG_CONFIG', 'config.yaml')
	if os.path.exists(config_path):
	with open(config_path, 'r') as config_file:
	config = yaml.safe_load(config_file)
	else:
	config = {}

	settings = {
	'llm_model': os.getenv('LLM_MODEL', config.get('llm_model')),
	'embedding_model': os.getenv('EMBEDDINGS_MODEL', config.get('embedding_model')),
	'community_level': int(os.getenv('COMMUNITY_LEVEL', config.get('community_level', 2))),
	'token_limit': int(os.getenv('TOKEN_LIMIT', config.get('token_limit', 4096))),
	'api_key': os.getenv('GRAPHRAG_API_KEY', config.get('api_key')),
	'api_base': os.getenv('LLM_API_BASE', config.get('api_base')),
	'embeddings_api_base': os.getenv('EMBEDDINGS_API_BASE', config.get('embeddings_api_base')),
	'api_type': os.getenv('API_TYPE', config.get('api_type', 'openai')),
	}

	return settings


	#######FILE_MANAGEMENT##############
	def list_output_files(root_dir):
	output_dir = os.path.join(root_dir, "output")
	files = []
	for root, _, filenames in os.walk(output_dir):
	for filename in filenames:
	files.append(os.path.join(root, filename))
	return files

	def update_file_list():
	files = list_input_files()
	return gr.update(choices=[f["path"] for f in files])

	def update_file_content(file_path):
	if not file_path:
	return ""
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	content = file.read()
	return content
	except Exception as e:
	logging.error(f"Error reading file: {str(e)}")
	return f"Error reading file: {str(e)}"

	def list_output_folders():
	output_dir = os.path.join(ROOT_DIR, "output")
	folders = [f for f in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, f))]
	return sorted(folders, reverse=True)

	def update_output_folder_list():
	folders = list_output_folders()
	return gr.update(choices=folders, value=folders[0] if folders else None)

	def list_folder_contents(folder_name):
	folder_path = os.path.join(ROOT_DIR, "output", folder_name, "artifacts")
	contents = []
	if os.path.exists(folder_path):
	for item in os.listdir(folder_path):
	item_path = os.path.join(folder_path, item)
	if os.path.isdir(item_path):
	contents.append(f"[DIR] {item}")
	else:
	_, ext = os.path.splitext(item)
	contents.append(f"[{ext[1:].upper()}] {item}")
	return contents

	def update_folder_content_list(folder_name):
	if isinstance(folder_name, list) and folder_name:
	folder_name = folder_name[0]
	elif not folder_name:
	return gr.update(choices=[])

	contents = list_folder_contents(folder_name)
	return gr.update(choices=contents)

	def handle_content_selection(folder_name, selected_item):
	if isinstance(selected_item, list) and selected_item:
	selected_item = selected_item[0] # Take the first item if it's a list

	if isinstance(selected_item, str) and selected_item.startswith("[DIR]"):
	dir_name = selected_item[6:] # Remove "[DIR] " prefix
	sub_contents = list_folder_contents(os.path.join(ROOT_DIR, "output", folder_name, dir_name))
	return gr.update(choices=sub_contents), "", ""
	elif isinstance(selected_item, str):
	file_name = selected_item.split("] ")[1] if "]" in selected_item else selected_item # Remove file type prefix if present
	file_path = os.path.join(ROOT_DIR, "output", folder_name, "artifacts", file_name)
	file_size = os.path.getsize(file_path)
	file_type = os.path.splitext(file_name)[1]
	file_info = f"File: {file_name}\nSize: {file_size} bytes\nType: {file_type}"
	content = read_file_content(file_path)
	return gr.update(), file_info, content
	else:
	return gr.update(), "", ""

	def initialize_selected_folder(folder_name):
	if not folder_name:
	return "Please select a folder first.", gr.update(choices=[])
	folder_path = os.path.join(ROOT_DIR, "output", folder_name, "artifacts")
	if not os.path.exists(folder_path):
	return f"Artifacts folder not found in '{folder_name}'.", gr.update(choices=[])
	contents = list_folder_contents(folder_path)
	return f"Folder '{folder_name}/artifacts' initialized with {len(contents)} items.", gr.update(choices=contents)

	def upload_file(file):
	if file is not None:
	input_dir = os.path.join(ROOT_DIR, 'input')
	os.makedirs(input_dir, exist_ok=True)

	# Get the original filename from the uploaded file
	original_filename = file.name

	# Create the destination path
	destination_path = os.path.join(input_dir, os.path.basename(original_filename))

	# Move the uploaded file to the destination path
	shutil.move(file.name, destination_path)

	logging.info(f"File uploaded and moved to: {destination_path}")
	status = f"File uploaded: {os.path.basename(original_filename)}"
	else:
	status = "No file uploaded"

	# Get the updated file list
	updated_file_list = [f["path"] for f in list_input_files()]

	return status, gr.update(choices=updated_file_list), update_logs()

	def list_input_files():
	input_dir = os.path.join(ROOT_DIR, 'input')
	files = []
	if os.path.exists(input_dir):
	files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]
	return [{"name": f, "path": os.path.join(input_dir, f)} for f in files]

	def delete_file(file_path):
	try:
	os.remove(file_path)
	logging.info(f"File deleted: {file_path}")
	status = f"File deleted: {os.path.basename(file_path)}"
	except Exception as e:
	logging.error(f"Error deleting file: {str(e)}")
	status = f"Error deleting file: {str(e)}"

	# Get the updated file list
	updated_file_list = [f["path"] for f in list_input_files()]

	return status, gr.update(choices=updated_file_list), update_logs()

	def read_file_content(file_path):
	try:
	if file_path.endswith('.parquet'):
	df = pd.read_parquet(file_path)

	# Get basic information about the DataFrame
	info = f"Parquet File: {os.path.basename(file_path)}\n"
	info += f"Rows: {len(df)}, Columns: {len(df.columns)}\n\n"
	info += "Column Names:\n" + "\n".join(df.columns) + "\n\n"

	# Display first few rows
	info += "First 5 rows:\n"
	info += df.head().to_string() + "\n\n"

	# Display basic statistics
	info += "Basic Statistics:\n"
	info += df.describe().to_string()

	return info
	else:
	with open(file_path, 'r', encoding='utf-8', errors='replace') as file:
	content = file.read()
	return content
	except Exception as e:
	logging.error(f"Error reading file: {str(e)}")
	return f"Error reading file: {str(e)}"

	def save_file_content(file_path, content):
	try:
	with open(file_path, 'w') as file:
	file.write(content)
	logging.info(f"File saved: {file_path}")
	status = f"File saved: {os.path.basename(file_path)}"
	except Exception as e:
	logging.error(f"Error saving file: {str(e)}")
	status = f"Error saving file: {str(e)}"
	return status, update_logs()

	def manage_data():
	db = lancedb.connect(f"{ROOT_DIR}/lancedb")
	tables = db.table_names()
	table_info = ""
	if tables:
	table = db[tables[0]]
	table_info = f"Table: {tables[0]}\nSchema: {table.schema}"

	input_files = list_input_files()

	return {
	"database_info": f"Tables: {', '.join(tables)}\n\n{table_info}",
	"input_files": input_files
	}


	def find_latest_graph_file(root_dir):
	pattern = os.path.join(root_dir, "output", "", "artifacts", ".graphml")
	graph_files = glob.glob(pattern)
	if not graph_files:
	# If no files found, try excluding .DS_Store
	output_dir = os.path.join(root_dir, "output")
	run_dirs = [d for d in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, d)) and d != ".DS_Store"]
	if run_dirs:
	latest_run = max(run_dirs)
	pattern = os.path.join(root_dir, "output", latest_run, "artifacts", "*.graphml")
	graph_files = glob.glob(pattern)

	if not graph_files:
	return None

	# Sort files by modification time, most recent first
	latest_file = max(graph_files, key=os.path.getmtime)
	return latest_file

	def find_latest_output_folder():
	root_dir =f"{ROOT_DIR}/output"
	folders = [f for f in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, f))]

	if not folders:
	raise ValueError("No output folders found")

	# Sort folders by creation time, most recent first
	sorted_folders = sorted(folders, key=lambda x: os.path.getctime(os.path.join(root_dir, x)), reverse=True)

	latest_folder = None
	timestamp = None

	for folder in sorted_folders:
	try:
	# Try to parse the folder name as a timestamp
	timestamp = datetime.strptime(folder, "%Y%m%d-%H%M%S")
	latest_folder = folder
	break
	except ValueError:
	# If the folder name is not a valid timestamp, skip it
	continue

	if latest_folder is None:
	raise ValueError("No valid timestamp folders found")

	latest_path = os.path.join(root_dir, latest_folder)
	artifacts_path = os.path.join(latest_path, "artifacts")

	if not os.path.exists(artifacts_path):
	raise ValueError(f"Artifacts folder not found in {latest_path}")

	return latest_path, latest_folder

	def initialize_data():
	global entity_df, relationship_df, text_unit_df, report_df, covariate_df

	tables = {
	"entity_df": "create_final_nodes",
	"relationship_df": "create_final_edges",
	"text_unit_df": "create_final_text_units",
	"report_df": "create_final_reports",
	"covariate_df": "create_final_covariates"
	}

	timestamp = None # Initialize timestamp to None

	try:
	latest_output_folder, timestamp = find_latest_output_folder()
	artifacts_folder = os.path.join(latest_output_folder, "artifacts")

	for df_name, file_prefix in tables.items():
	file_pattern = os.path.join(artifacts_folder, f"{file_prefix}*.parquet")
	matching_files = glob.glob(file_pattern)

	if matching_files:
	latest_file = max(matching_files, key=os.path.getctime)
	df = pd.read_parquet(latest_file)
	globals()[df_name] = df
	logging.info(f"Successfully loaded {df_name} from {latest_file}")
	else:
	logging.warning(f"No matching file found for {df_name} in {artifacts_folder}. Initializing as an empty DataFrame.")
	globals()[df_name] = pd.DataFrame()

	except Exception as e:
	logging.error(f"Error initializing data: {str(e)}")
	for df_name in tables.keys():
	globals()[df_name] = pd.DataFrame()

	return timestamp

	# Call initialize_data and store the timestamp
	current_timestamp = initialize_data()


	###########MODELS##################
	def normalize_api_base(api_base: str) -> str:
	"""Normalize the API base URL by removing trailing slashes and /v1 or /api suffixes."""
	api_base = api_base.rstrip('/')
	if api_base.endswith('/v1') or api_base.endswith('/api'):
	api_base = api_base[:-3]
	return api_base

	def is_ollama_api(base_url: str) -> bool:
	"""Check if the given base URL is for Ollama API."""
	try:
	response = requests.get(f"{normalize_api_base(base_url)}/api/tags")
	return response.status_code == 200
	except requests.RequestException:
	return False

	def get_ollama_models(base_url: str) -> List[str]:
	"""Fetch available models from Ollama API."""
	try:
	response = requests.get(f"{normalize_api_base(base_url)}/api/tags")
	response.raise_for_status()
	models = response.json().get('models', [])
	return [model['name'] for model in models]
	except requests.RequestException as e:
	logger.error(f"Error fetching Ollama models: {str(e)}")
	return []

	def get_openai_compatible_models(base_url: str) -> List[str]:
	"""Fetch available models from OpenAI-compatible API."""
	try:
	response = requests.get(f"{normalize_api_base(base_url)}/v1/models")
	response.raise_for_status()
	models = response.json().get('data', [])
	return [model['id'] for model in models]
	except requests.RequestException as e:
	logger.error(f"Error fetching OpenAI-compatible models: {str(e)}")
	return []

	def get_local_models(base_url: str) -> List[str]:
	"""Get available models based on the API type."""
	if is_ollama_api(base_url):
	return get_ollama_models(base_url)
	else:
	return get_openai_compatible_models(base_url)

	def get_model_params(base_url: str, model_name: str) -> dict:
	"""Get model parameters for Ollama models."""
	if is_ollama_api(base_url):
	try:
	response = requests.post(f"{normalize_api_base(base_url)}/api/show", json={"name": model_name})
	response.raise_for_status()
	model_info = response.json()
	return model_info.get('parameters', {})
	except requests.RequestException as e:
	logger.error(f"Error fetching Ollama model parameters: {str(e)}")
	return {}








	#########API###########
	def start_indexing(request: IndexingRequest):
	url = f"{API_BASE_URL}/v1/index"

	try:
	response = requests.post(url, json=request.dict())
	response.raise_for_status()
	result = response.json()
	return result['message'], gr.update(interactive=False), gr.update(interactive=True)
	except requests.RequestException as e:
	logger.error(f"Error starting indexing: {str(e)}")
	return f"Error: {str(e)}", gr.update(interactive=True), gr.update(interactive=False)

	def check_indexing_status():
	url = f"{API_BASE_URL}/v1/index_status"
	try:
	response = requests.get(url)
	response.raise_for_status()
	result = response.json()
	return result['status'], "\n".join(result['logs'])
	except requests.RequestException as e:
	logger.error(f"Error checking indexing status: {str(e)}")
	return "Error", f"Failed to check indexing status: {str(e)}"

	def start_prompt_tuning(request: PromptTuneRequest):
	url = f"{API_BASE_URL}/v1/prompt_tune"

	try:
	response = requests.post(url, json=request.dict())
	response.raise_for_status()
	result = response.json()
	return result['message'], gr.update(interactive=False)
	except requests.RequestException as e:
	logger.error(f"Error starting prompt tuning: {str(e)}")
	return f"Error: {str(e)}", gr.update(interactive=True)

	def check_prompt_tuning_status():
	url = f"{API_BASE_URL}/v1/prompt_tune_status"
	try:
	response = requests.get(url)
	response.raise_for_status()
	result = response.json()
	return result['status'], "\n".join(result['logs'])
	except requests.RequestException as e:
	logger.error(f"Error checking prompt tuning status: {str(e)}")
	return "Error", f"Failed to check prompt tuning status: {str(e)}"

	def update_model_params(model_name):
	params = get_model_params(model_name)
	return gr.update(value=json.dumps(params, indent=2))









	###########################
	css = """
	html, body {
	margin: 0;
	padding: 0;
	height: 100vh;
	overflow: hidden;
	}

	.gradio-container {
	margin: 0 !important;
	padding: 0 !important;
	width: 100vw !important;
	max-width: 100vw !important;
	height: 100vh !important;
	max-height: 100vh !important;
	overflow: auto;
	display: flex;
	flex-direction: column;
	}

	#main-container {
	flex: 1;
	display: flex;
	overflow: hidden;
	}

	#left-column, #right-column {
	height: 100%;
	overflow-y: auto;
	padding: 10px;
	}

	#left-column {
	flex: 1;
	}

	#right-column {
	flex: 2;
	display: flex;
	flex-direction: column;
	}

	#chat-container {
	flex: 0 0 auto; /* Don't allow this to grow */
	height: 100%;
	display: flex;
	flex-direction: column;
	overflow: hidden;
	border: 1px solid var(--color-accent);
	border-radius: 8px;
	padding: 10px;
	overflow-y: auto;
	}

	#chatbot {
	overflow-y: hidden;
	height: 100%;
	}

	#chat-input-row {
	margin-top: 10px;
	}

	#visualization-plot {
	width: 100%;
	aspect-ratio: 1 / 1;
	max-height: 600px; /* Adjust this value as needed */
	}

	#vis-controls-row {
	display: flex;
	justify-content: space-between;
	align-items: center;
	margin-top: 10px;
	}

	#vis-controls-row > * {
	flex: 1;
	margin: 0 5px;
	}

	#vis-status {
	margin-top: 10px;
	}

	/* Chat input styling */
	#chat-input-row {
	display: flex;
	flex-direction: column;
	}

	#chat-input-row > div {
	width: 100% !important;
	}

	#chat-input-row input[type="text"] {
	width: 100% !important;
	}

	/* Adjust padding for all containers */
	.gr-box, .gr-form, .gr-panel {
	padding: 10px !important;
	}

	/* Ensure all textboxes and textareas have full height */
	.gr-textbox, .gr-textarea {
	height: auto !important;
	min-height: 100px !important;
	}

	/* Ensure all dropdowns have full width */
	.gr-dropdown {
	width: 100% !important;
	}

	:root {
	--color-background: #2C3639;
	--color-foreground: #3F4E4F;
	--color-accent: #A27B5C;
	--color-text: #DCD7C9;
	}

	body, .gradio-container {
	background-color: var(--color-background);
	color: var(--color-text);
	}

	.gr-button {
	background-color: var(--color-accent);
	color: var(--color-text);
	}

	.gr-input, .gr-textarea, .gr-dropdown {
	background-color: var(--color-foreground);
	color: var(--color-text);
	border: 1px solid var(--color-accent);
	}

	.gr-panel {
	background-color: var(--color-foreground);
	border: 1px solid var(--color-accent);
	}

	.gr-box {
	border-radius: 8px;
	margin-bottom: 10px;
	background-color: var(--color-foreground);
	}

	.gr-padded {
	padding: 10px;
	}

	.gr-form {
	background-color: var(--color-foreground);
	}

	.gr-input-label, .gr-radio-label {
	color: var(--color-text);
	}

	.gr-checkbox-label {
	color: var(--color-text);
	}

	.gr-markdown {
	color: var(--color-text);
	}

	.gr-accordion {
	background-color: var(--color-foreground);
	border: 1px solid var(--color-accent);
	}

	.gr-accordion-header {
	background-color: var(--color-accent);
	color: var(--color-text);
	}

	#visualization-container {
	display: flex;
	flex-direction: column;
	border: 2px solid var(--color-accent);
	border-radius: 8px;
	margin-top: 20px;
	padding: 10px;
	background-color: var(--color-foreground);
	height: calc(100vh - 300px); /* Adjust this value as needed */
	}

	#visualization-plot {
	width: 100%;
	height: 100%;
	}

	#vis-controls-row {
	display: flex;
	justify-content: space-between;
	align-items: center;
	margin-top: 10px;
	}

	#vis-controls-row > * {
	flex: 1;
	margin: 0 5px;
	}

	#vis-status {
	margin-top: 10px;
	}

	#log-container {
	background-color: var(--color-foreground);
	border: 1px solid var(--color-accent);
	border-radius: 8px;
	padding: 10px;
	margin-top: 20px;
	max-height: auto;
	overflow-y: auto;
	}

	.setting-accordion .label-wrap {
	cursor: pointer;
	}

	.setting-accordion .icon {
	transition: transform 0.3s ease;
	}

	.setting-accordion[open] .icon {
	transform: rotate(90deg);
	}

	.gr-form.gr-box {
	border: none !important;
	background: none !important;
	}

	.model-params {
	border-top: 1px solid var(--color-accent);
	margin-top: 10px;
	padding-top: 10px;
	}
	"""


	def create_interface():
	settings = load_settings()
	llm_api_base = normalize_api_base(settings['api_base'])
	embeddings_api_base = normalize_api_base(settings['embeddings_api_base'])

	with gr.Blocks(theme=gr.themes.Base(), css=css) as demo:
	gr.Markdown("# GraphRAG Indexer")

	with gr.Tabs():
	with gr.TabItem("Indexing"):
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("## Indexing Configuration")

	with gr.Row():
	llm_name = gr.Dropdown(label="LLM Model", choices=[], value=settings['llm_model'], allow_custom_value=True)
	refresh_llm_btn = gr.Button("🔄", size='sm', scale=0)

	with gr.Row():
	embed_name = gr.Dropdown(label="Embedding Model", choices=[], value=settings['embedding_model'], allow_custom_value=True)
	refresh_embed_btn = gr.Button("🔄", size='sm', scale=0)

	save_config_button = gr.Button("Save Configuration", variant="primary")
	config_status = gr.Textbox(label="Configuration Status", lines=2)

	with gr.Row():
	with gr.Column(scale=1):
	root_dir = gr.Textbox(label="Root Directory (Edit in .env file)", value=f"{ROOT_DIR}")
	with gr.Group():
	verbose = gr.Checkbox(label="Verbose", interactive=True, value=True)
	nocache = gr.Checkbox(label="No Cache", interactive=True, value=True)

	with gr.Accordion("Advanced Options", open=True):
	resume = gr.Textbox(label="Resume Timestamp (optional)")
	reporter = gr.Dropdown(
	label="Reporter",
	choices=["rich", "print", "none"],
	value="rich",
	interactive=True
	)
	emit_formats = gr.CheckboxGroup(
	label="Emit Formats",
	choices=["json", "csv", "parquet"],
	value=["parquet"],
	interactive=True
	)
	custom_args = gr.Textbox(label="Custom CLI Arguments", placeholder="--arg1 value1 --arg2 value2")

	with gr.Column(scale=1):
	gr.Markdown("## Indexing Output")
	index_output = gr.Textbox(label="Output", lines=10)
	index_status = gr.Textbox(label="Status", lines=2)

	run_index_button = gr.Button("Run Indexing", variant="primary")
	check_status_button = gr.Button("Check Indexing Status")


	with gr.TabItem("Prompt Tuning"):
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("## Prompt Tuning Configuration")

	pt_root = gr.Textbox(label="Root Directory", value=f"{ROOT_DIR}", interactive=True)
	pt_domain = gr.Textbox(label="Domain (optional)")
	pt_method = gr.Dropdown(
	label="Method",
	choices=["random", "top", "all"],
	value="random",
	interactive=True
	)
	pt_limit = gr.Number(label="Limit", value=15, precision=0, interactive=True)
	pt_language = gr.Textbox(label="Language (optional)")
	pt_max_tokens = gr.Number(label="Max Tokens", value=2000, precision=0, interactive=True)
	pt_chunk_size = gr.Number(label="Chunk Size", value=200, precision=0, interactive=True)
	pt_no_entity_types = gr.Checkbox(label="No Entity Types", value=False)
	pt_output_dir = gr.Textbox(label="Output Directory", value=f"{ROOT_DIR}/prompts", interactive=True)
	save_pt_config_button = gr.Button("Save Prompt Tuning Configuration", variant="primary")

	with gr.Column(scale=1):
	gr.Markdown("## Prompt Tuning Output")
	pt_output = gr.Textbox(label="Output", lines=10)
	pt_status = gr.Textbox(label="Status", lines=10)

	run_pt_button = gr.Button("Run Prompt Tuning", variant="primary")
	check_pt_status_button = gr.Button("Check Prompt Tuning Status")

	with gr.TabItem("Data Management"):
	with gr.Row():
	with gr.Column(scale=1):
	with gr.Accordion("File Upload", open=True):
	file_upload = gr.File(label="Upload File", file_types=[".txt", ".csv", ".parquet"])
	upload_btn = gr.Button("Upload File", variant="primary")
	upload_output = gr.Textbox(label="Upload Status", visible=True)

	with gr.Accordion("File Management", open=True):
	file_list = gr.Dropdown(label="Select File", choices=[], interactive=True)
	refresh_btn = gr.Button("Refresh File List", variant="secondary")

	file_content = gr.TextArea(label="File Content", lines=10)

	with gr.Row():
	delete_btn = gr.Button("Delete Selected File", variant="stop")
	save_btn = gr.Button("Save Changes", variant="primary")

	operation_status = gr.Textbox(label="Operation Status", visible=True)

	with gr.Column(scale=1):
	with gr.Accordion("Output Folders", open=True):
	output_folder_list = gr.Dropdown(label="Select Output Folder", choices=[], interactive=True)
	refresh_output_btn = gr.Button("Refresh Output Folders", variant="secondary")
	folder_content_list = gr.Dropdown(label="Folder Contents", choices=[], interactive=True, multiselect=False)

	file_info = gr.Textbox(label="File Info", lines=3)
	output_content = gr.TextArea(label="File Content", lines=10)



	# Event handlers
	def refresh_llm_models():
	models = get_local_models(llm_api_base)
	return gr.update(choices=models)

	def refresh_embed_models():
	models = get_local_models(embeddings_api_base)
	return gr.update(choices=models)

	refresh_llm_btn.click(
	refresh_llm_models,
	outputs=[llm_name]
	)

	refresh_embed_btn.click(
	refresh_embed_models,
	outputs=[embed_name]
	)

	# Initialize model lists on page load
	demo.load(refresh_llm_models, outputs=[llm_name])
	demo.load(refresh_embed_models, outputs=[embed_name])

	def create_indexing_request():
	return IndexingRequest(
	llm_model=llm_name.value,
	embed_model=embed_name.value,
	llm_api_base=llm_api_base,
	embed_api_base=embeddings_api_base,
	root=root_dir.value,
	verbose=verbose.value,
	nocache=nocache.value,
	resume=resume.value if resume.value else None,
	reporter=reporter.value,
	emit=[fmt for fmt in emit_formats.value],
	custom_args=custom_args.value if custom_args.value else None
	)

	run_index_button.click(
	lambda: start_indexing(create_indexing_request()),
	outputs=[index_output, run_index_button, check_status_button]
	)

	check_status_button.click(
	check_indexing_status,
	outputs=[index_status, index_output]
	)

	def create_prompt_tune_request():
	return PromptTuneRequest(
	root=pt_root.value,
	domain=pt_domain.value if pt_domain.value else None,
	method=pt_method.value,
	limit=int(pt_limit.value),
	language=pt_language.value if pt_language.value else None,
	max_tokens=int(pt_max_tokens.value),
	chunk_size=int(pt_chunk_size.value),
	no_entity_types=pt_no_entity_types.value,
	output=pt_output_dir.value
	)

	def update_pt_output(request):
	result, button_update = start_prompt_tuning(request)
	return result, button_update, gr.update(value=f"Request: {request.dict()}")

	run_pt_button.click(
	lambda: update_pt_output(create_prompt_tune_request()),
	outputs=[pt_output, run_pt_button, pt_status]
	)

	check_pt_status_button.click(
	check_prompt_tuning_status,
	outputs=[pt_status, pt_output]
	)

	# Add event handlers for real-time updates
	pt_root.change(lambda x: gr.update(value=f"Root Directory changed to: {x}"), inputs=[pt_root], outputs=[pt_status])
	pt_limit.change(lambda x: gr.update(value=f"Limit changed to: {x}"), inputs=[pt_limit], outputs=[pt_status])
	pt_max_tokens.change(lambda x: gr.update(value=f"Max Tokens changed to: {x}"), inputs=[pt_max_tokens], outputs=[pt_status])
	pt_chunk_size.change(lambda x: gr.update(value=f"Chunk Size changed to: {x}"), inputs=[pt_chunk_size], outputs=[pt_status])
	pt_output_dir.change(lambda x: gr.update(value=f"Output Directory changed to: {x}"), inputs=[pt_output_dir], outputs=[pt_status])

	# Event handlers for Data Management
	upload_btn.click(
	upload_file,
	inputs=[file_upload],
	outputs=[upload_output, file_list, operation_status]
	)

	refresh_btn.click(
	update_file_list,
	outputs=[file_list]
	)

	refresh_output_btn.click(
	update_output_folder_list,
	outputs=[output_folder_list]
	)

	file_list.change(
	update_file_content,
	inputs=[file_list],
	outputs=[file_content]
	)

	delete_btn.click(
	delete_file,
	inputs=[file_list],
	outputs=[operation_status, file_list, operation_status]
	)

	save_btn.click(
	save_file_content,
	inputs=[file_list, file_content],
	outputs=[operation_status, operation_status]
	)

	output_folder_list.change(
	update_folder_content_list,
	inputs=[output_folder_list],
	outputs=[folder_content_list]
	)

	folder_content_list.change(
	handle_content_selection,
	inputs=[output_folder_list, folder_content_list],
	outputs=[folder_content_list, file_info, output_content]
	)

	# Event handler for saving configuration
	save_config_button.click(
	update_env_file,
	inputs=[llm_name, embed_name],
	outputs=[config_status]
	)

	# Event handler for saving prompt tuning configuration
	save_pt_config_button.click(
	save_prompt_tuning_config,
	inputs=[pt_root, pt_domain, pt_method, pt_limit, pt_language, pt_max_tokens, pt_chunk_size, pt_no_entity_types, pt_output_dir],
	outputs=[pt_status]
	)

	# Initialize file list and output folder list
	demo.load(update_file_list, outputs=[file_list])
	demo.load(update_output_folder_list, outputs=[output_folder_list])

	return demo

	def update_env_file(llm_model, embed_model):
	env_path = os.path.join(ROOT_DIR, '.env')

	set_key(env_path, 'LLM_MODEL', llm_model)
	set_key(env_path, 'EMBEDDINGS_MODEL', embed_model)

	# Reload the environment variables
	load_dotenv(env_path, override=True)

	return f"Environment updated: LLM_MODEL={llm_model}, EMBEDDINGS_MODEL={embed_model}"

	def save_prompt_tuning_config(root, domain, method, limit, language, max_tokens, chunk_size, no_entity_types, output_dir):
	config = {
	'prompt_tuning': {
	'root': root,
	'domain': domain,
	'method': method,
	'limit': limit,
	'language': language,
	'max_tokens': max_tokens,
	'chunk_size': chunk_size,
	'no_entity_types': no_entity_types,
	'output': output_dir
	}
	}

	config_path = os.path.join(ROOT_DIR, 'prompt_tuning_config.yaml')
	with open(config_path, 'w') as f:
	yaml.dump(config, f)

	return f"Prompt Tuning configuration saved to {config_path}"

	demo = create_interface()

	if __name__ == "__main__":
	demo.launch(server_port=7860)