import gradio as gr import requests import pandas as pd import time import os import tempfile import json from datetime import datetime # Function to query the GBIF API and download data def download_gbif_data(species_name, country_code, max_results): base_url = "https://api.gbif.org/v1/occurrence/search" all_results = [] offset = 0 limit = 300 status_message = "" metadata_file_path = None csv_file_path = None try: while offset < int(max_results): params = { "scientificName": species_name, "country": country_code, "limit": limit, "offset": offset, } response = requests.get(base_url, params=params) if response.status_code == 200: data = response.json() if "results" in data and data["results"]: all_results.extend(data["results"]) status_message += f"Fetched {len(data['results'])} records for {species_name} (Offset: {offset}).\n" print(f"Fetched {len(data['results'])} records for {species_name} (Offset: {offset}).") else: status_message += f"No more data found for {species_name} at offset {offset}.\n" print(f"No more data found for {species_name} at offset {offset}.") break else: error_message = f"Failed to retrieve data for {species_name} at offset {offset}. HTTP Status Code: {response.status_code}" print(error_message) return f"Error: {error_message}", None, None offset += limit time.sleep(1) if all_results: df = pd.json_normalize(all_results) with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_file: csv_file_path = tmp_file.name df.to_csv(csv_file_path, index=False) print(f"Data for {species_name} saved to {csv_file_path}.") status_message += f"Data for {species_name} downloaded successfully!\n" # Generate Metadata metadata = { "speciesName": species_name, "countryCode": country_code, "maxResults": max_results, "downloadedTime": datetime.now().isoformat(), "gbif_url": f"https://www.gbif.org/occurrence/search?scientificName={species_name}&country={country_code}", "note": "Please cite GBIF as the original source of data." } with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as tmp_metadata_file: metadata_file_path = tmp_metadata_file.name json.dump(metadata, tmp_metadata_file, indent=2) print(f"Metadata saved to {metadata_file_path}") return status_message, csv_file_path, metadata_file_path else: return f"No data found for {species_name} in {country_code}.", None, None except Exception as e: error_message = f"An error occurred: {str(e)}" print(error_message) return error_message, None, None # Clean Up Temporary File def postprocess(status, csv_file_path, metadata_file_path): if csv_file_path: os.remove(csv_file_path) print(f"Removed temporary file: {csv_file_path}") if metadata_file_path: os.remove(metadata_file_path) print(f"Removed temporary file: {metadata_file_path}") return status, csv_file_path, metadata_file_path # Wrap the download_gbif_data function to convert max_result to int def download_gbif_data_wrapper(species_name, country_code, max_results): try: max_results = int(max_results) except ValueError: return "Error: Invalid value for Max Results. Please enter a valid integer.", None, None return download_gbif_data(species_name, country_code, max_results) # Gradio Interface using gr.Blocks with gr.Blocks(title="GBIF Data Downloader") as iface: gr.Markdown("Enter a species name, country code, and max results to download CSV data from GBIF") with gr.Row(): species_name_input = gr.Textbox(label="Species Name", placeholder="e.g. Aconitum naviculare") country_code_input = gr.Textbox(label="Country Code", value="NP", placeholder="e.g. NP") max_results_input = gr.Textbox(label="Max Results", value="5000", placeholder="e.g. 5000") with gr.Row(): download_button = gr.Button("Download Data") with gr.Row(): output_status = gr.Textbox(label="Status") output_csv = gr.File(label="Download CSV") output_metadata = gr.File(label="Download Metadata") inputs = [species_name_input, country_code_input, max_results_input] outputs = [output_status, output_csv, output_metadata] def clear_inputs(): # Create a function to clear all inputs return [None, None, None] download_button.click( download_gbif_data_wrapper, inputs=inputs, outputs=outputs, postprocess = postprocess ).then(clear_inputs, None, inputs) # added the then() and clear_inputs if __name__ == "__main__": iface.launch()