Spaces:

macota1
/

axa

Runtime error

Mayara Ayat

Upload folder using huggingface_hub

f7ab812 verified 4 months ago

2.35 kB

	import json
	from openai import OpenAI
	from transformers import GPT2Tokenizer


	def openai_complete_if_cache(
	model="gpt-4o", prompt=None, system_prompt=None, history_messages=[], **kwargs
	) -> str:
	openai_client = OpenAI()

	messages = []
	if system_prompt:
	messages.append({"role": "system", "content": system_prompt})
	messages.extend(history_messages)
	messages.append({"role": "user", "content": prompt})

	response = openai_client.chat.completions.create(
	model=model, messages=messages, **kwargs
	)
	return response.choices[0].message.content


	tokenizer = GPT2Tokenizer.from_pretrained("gpt2")


	def get_summary(context, tot_tokens=2000):
	tokens = tokenizer.tokenize(context)
	half_tokens = tot_tokens // 2

	start_tokens = tokens[1000 : 1000 + half_tokens]
	end_tokens = tokens[-(1000 + half_tokens) : 1000]

	summary_tokens = start_tokens + end_tokens
	summary = tokenizer.convert_tokens_to_string(summary_tokens)

	return summary


	clses = ["agriculture"]
	for cls in clses:
	with open(f"../datasets/unique_contexts/{cls}_unique_contexts.json", mode="r") as f:
	unique_contexts = json.load(f)

	summaries = [get_summary(context) for context in unique_contexts]

	total_description = "\n\n".join(summaries)

	prompt = f"""
	Given the following description of a dataset:

	{total_description}

	Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset.

	Output the results in the following structure:
	- User 1: [user description]
	- Task 1: [task description]
	- Question 1:
	- Question 2:
	- Question 3:
	- Question 4:
	- Question 5:
	- Task 2: [task description]
	...
	- Task 5: [task description]
	- User 2: [user description]
	...
	- User 5: [user description]
	...
	"""

	result = openai_complete_if_cache(model="gpt-4o", prompt=prompt)

	file_path = f"../datasets/questions/{cls}_questions.txt"
	with open(file_path, "w") as file:
	file.write(result)

	print(f"{cls}_questions written to {file_path}")