File size: 5,248 Bytes
d66e041 eb25620 d66e041 eb25620 d66e041 eb25620 d66e041 eb25620 d66e041 f26bcda eb25620 b421d8c b5905f1 9545a51 b5905f1 b421d8c b5905f1 b421d8c b5905f1 b421d8c d66e041 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import gradio as gr
import requests
import pandas as pd
import time
import os
import tempfile
import json
from datetime import datetime
# Function to query the GBIF API and download data
def download_gbif_data(species_name, country_code, max_results):
base_url = "https://api.gbif.org/v1/occurrence/search"
all_results = []
offset = 0
limit = 300
status_message = ""
metadata_file_path = None
csv_file_path = None
try:
while offset < int(max_results):
params = {
"scientificName": species_name,
"country": country_code,
"limit": limit,
"offset": offset,
}
response = requests.get(base_url, params=params)
if response.status_code == 200:
data = response.json()
if "results" in data and data["results"]:
all_results.extend(data["results"])
status_message += f"Fetched {len(data['results'])} records for {species_name} (Offset: {offset}).\n"
print(f"Fetched {len(data['results'])} records for {species_name} (Offset: {offset}).")
else:
status_message += f"No more data found for {species_name} at offset {offset}.\n"
print(f"No more data found for {species_name} at offset {offset}.")
break
else:
error_message = f"Failed to retrieve data for {species_name} at offset {offset}. HTTP Status Code: {response.status_code}"
print(error_message)
return f"Error: {error_message}", None, None
offset += limit
time.sleep(1)
if all_results:
df = pd.json_normalize(all_results)
with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_file:
csv_file_path = tmp_file.name
df.to_csv(csv_file_path, index=False)
print(f"Data for {species_name} saved to {csv_file_path}.")
status_message += f"Data for {species_name} downloaded successfully!\n"
# Generate Metadata
metadata = {
"speciesName": species_name,
"countryCode": country_code,
"maxResults": max_results,
"downloadedTime": datetime.now().isoformat(),
"gbif_url": f"https://www.gbif.org/occurrence/search?scientificName={species_name}&country={country_code}",
"note": "Please cite GBIF as the original source of data."
}
with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as tmp_metadata_file:
metadata_file_path = tmp_metadata_file.name
json.dump(metadata, tmp_metadata_file, indent=2)
print(f"Metadata saved to {metadata_file_path}")
return status_message, csv_file_path, metadata_file_path
else:
return f"No data found for {species_name} in {country_code}.", None, None
except Exception as e:
error_message = f"An error occurred: {str(e)}"
print(error_message)
return error_message, None, None
# Clean Up Temporary File
def postprocess(status, csv_file_path, metadata_file_path):
if csv_file_path:
os.remove(csv_file_path)
print(f"Removed temporary file: {csv_file_path}")
if metadata_file_path:
os.remove(metadata_file_path)
print(f"Removed temporary file: {metadata_file_path}")
return status, csv_file_path, metadata_file_path
# Wrap the download_gbif_data function to convert max_result to int
def download_gbif_data_wrapper(species_name, country_code, max_results):
try:
max_results = int(max_results)
except ValueError:
return "Error: Invalid value for Max Results. Please enter a valid integer.", None, None
return download_gbif_data(species_name, country_code, max_results)
# Gradio Interface using gr.Blocks
with gr.Blocks(title="GBIF Data Downloader") as iface:
gr.Markdown("Enter a species name, country code, and max results to download CSV data from GBIF")
with gr.Row():
species_name_input = gr.Textbox(label="Species Name", placeholder="e.g. Aconitum naviculare")
country_code_input = gr.Textbox(label="Country Code", value="NP", placeholder="e.g. NP")
max_results_input = gr.Textbox(label="Max Results", value="5000", placeholder="e.g. 5000")
with gr.Row():
download_button = gr.Button("Download Data")
with gr.Row():
output_status = gr.Textbox(label="Status")
output_csv = gr.File(label="Download CSV")
output_metadata = gr.File(label="Download Metadata")
inputs = [species_name_input, country_code_input, max_results_input]
outputs = [output_status, output_csv, output_metadata]
def clear_inputs(): # Create a function to clear all inputs
return [None, None, None]
download_button.click(
download_gbif_data_wrapper,
inputs=inputs,
outputs=outputs,
postprocess = postprocess
).then(clear_inputs, None, inputs) # added the then() and clear_inputs
if __name__ == "__main__":
iface.launch() |