Spaces:
Sleeping
Sleeping
| import re | |
| def parse_text(input_text): | |
| # Define patterns for response and clarification | |
| response_pattern = re.compile(r'<response>(.*?)<\/response>', re.DOTALL) | |
| clarification_pattern = re.compile(r'<clarification>(.*?)<\/clarification>', re.DOTALL) | |
| # Find all matches for response and clarification | |
| response_matches = response_pattern.finditer(input_text) | |
| clarification_matches = clarification_pattern.finditer(input_text) | |
| # Initialize variables to keep track of the position | |
| last_end = 0 | |
| combined_response = "" | |
| parsed_clarifications = [] | |
| # Combine responses and capture everything in between | |
| for response_match in response_matches: | |
| # Capture text before the current response tag | |
| combined_response += input_text[last_end:response_match.start()].strip() + "\n" | |
| # Add the response content | |
| combined_response += response_match.group(1).strip() + "\n" | |
| # Update the last end position | |
| last_end = response_match.end() | |
| # Check for clarifications and parse them | |
| for clarification_match in clarification_matches: | |
| # Capture text before the current clarification tag | |
| combined_response += input_text[last_end:clarification_match.start()].strip() + "\n" | |
| # Process the clarification block | |
| clarification_text = clarification_match.group(1).strip() | |
| if clarification_text: | |
| # Split by "text:" to separate each question block | |
| question_blocks = clarification_text.split("- text:") | |
| # Loop through each block and extract the question and its options | |
| for block in question_blocks[1:]: | |
| # Extract the question using regex (up to the "options:" part) | |
| question_match = re.search(r'^(.*?)\s*options:', block, re.DOTALL) | |
| if question_match: | |
| question = question_match.group(1).strip() | |
| # Extract the options using regex | |
| options_match = re.search(r'options:\s*(.*?)$', block, re.DOTALL) | |
| if options_match: | |
| options = [option.strip() for option in options_match.group(1).split('-') if option.strip()] | |
| # Add the parsed question and options to the list | |
| parsed_clarifications.append({'question': question, 'options': options}) | |
| # Update the last end position | |
| last_end = clarification_match.end() | |
| # Capture any remaining text after the last tag | |
| combined_response += input_text[last_end:].strip() | |
| return combined_response.strip(), parsed_clarifications | |
| # Example usage | |
| input_text = """ | |
| Some introductory text that should be included in the response. | |
| <response>response to previous question is provided here</response> | |
| Some more text that should also be included in the response. | |
| <clarification> | |
| questions: | |
| - text: What topic should the article cover? | |
| options: | |
| - Technology | |
| - Health and Wellness | |
| - Travel | |
| - Other | |
| - text: What is the target audience for the article? | |
| options: | |
| - General public | |
| - Professionals in a specific field | |
| - Students | |
| - Other | |
| </clarification> | |
| Final notes that should be part of the response. | |
| """ | |
| parsed_data = parse_text(input_text) | |
| print(f"Response: {parsed_data['response']}") | |
| print("Clarifications:") | |
| for item in parsed_data['clarifications']: | |
| print(f" Question: {item['question']}") | |
| print(" Options:", ", ".join(item['options'])) | |