import math import json import regex import inspect import gspread import guidance from ast import literal_eval from transformers import Tool from oauth2client.service_account import ServiceAccountCredentials from googleapiclient.discovery import build from google.oauth2.credentials import Credentials from google.auth.transport.requests import Request from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.discovery import build from googleapiclient.errors import HttpError from ortools.linear_solver import pywraplp guidance.llm = guidance.llms.OpenAI("gpt-4") structure_program = guidance( ''' {{#user~}} {{description}} Help me extract args from the data blob to apply the following algorithm: {{code}} ---- {{~#each examples}} Data Blob: {{this.input}} Result: {{this.output}} --- {{~/each}} Please help me extract the input values from a given data blob into a JSON. Data Blob: {{data_blob}} Result: {{~/user}} {{#assistant~}} {{gen 'output'}} {{~/assistant}} ''') class DataExtractorTool(Tool): def __init__(self): super().__init__() def fetch_data(self, source_link: str) -> str: """ Abstract method to fetch raw data from the source. """ raise NotImplementedError("Subclasses must implement this method.") def __call__(self, source_link: str) -> dict: raw_data = self.fetch_data(source_link) return raw_data class GoogleSheetExtractorTool(DataExtractorTool): name = "google_sheet_extractor_tool" description = """ Tool to extract data from Google Sheets. Input: source_link - str - a url or google sheets id e.x. source_link='https://docs.google.com/spreadsheets/d/SHEETS_ID/'. Output is a string. """ def fetch_data(self, source_link: str) -> str: # Set up the credentials scope = ["https://www.googleapis.com/auth/spreadsheets.readonly"] creds = Credentials.from_authorized_user_file('token.json', scope) service = build('sheets', 'v4', credentials=creds) # Open the spreadsheet and get all values source_link = source_link.rstrip("/") spreadsheet_id = source_link.split('/')[-1] range_name = 'Sheet1!A1:Z1000' sheet = service.spreadsheets() result = sheet.values().get(spreadsheetId=spreadsheet_id, range=range_name).execute() data = result.get('values', []) # Convert the data to a string representation raw_data = '\n'.join([','.join(row) for row in data]) print(raw_data) return raw_data