File size: 5,248 Bytes
d66e041
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb25620
d66e041
 
 
 
 
 
eb25620
 
 
 
 
 
 
d66e041
 
eb25620
 
 
d66e041
 
 
 
 
 
 
eb25620
 
 
d66e041
f26bcda
 
 
 
 
 
 
 
 
 
eb25620
 
 
 
 
 
 
 
 
 
 
b421d8c
b5905f1
9545a51
 
b5905f1
 
 
 
b421d8c
 
b5905f1
 
 
 
 
 
 
 
b421d8c
 
 
 
b5905f1
 
 
 
b421d8c
d66e041
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import gradio as gr
import requests
import pandas as pd
import time
import os
import tempfile
import json
from datetime import datetime

# Function to query the GBIF API and download data
def download_gbif_data(species_name, country_code, max_results):
    base_url = "https://api.gbif.org/v1/occurrence/search"
    all_results = []
    offset = 0
    limit = 300
    status_message = ""
    metadata_file_path = None
    csv_file_path = None

    try:
        while offset < int(max_results):
            params = {
                "scientificName": species_name,
                "country": country_code,
                "limit": limit,
                "offset": offset,
            }
            response = requests.get(base_url, params=params)

            if response.status_code == 200:
                data = response.json()
                if "results" in data and data["results"]:
                    all_results.extend(data["results"])
                    status_message += f"Fetched {len(data['results'])} records for {species_name} (Offset: {offset}).\n"
                    print(f"Fetched {len(data['results'])} records for {species_name} (Offset: {offset}).")
                else:
                    status_message += f"No more data found for {species_name} at offset {offset}.\n"
                    print(f"No more data found for {species_name} at offset {offset}.")
                    break
            else:
                error_message = f"Failed to retrieve data for {species_name} at offset {offset}. HTTP Status Code: {response.status_code}"
                print(error_message)
                return f"Error: {error_message}", None, None

            offset += limit
            time.sleep(1)

        if all_results:
            df = pd.json_normalize(all_results)
            with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_file:
                csv_file_path = tmp_file.name
                df.to_csv(csv_file_path, index=False)
                print(f"Data for {species_name} saved to {csv_file_path}.")

            status_message += f"Data for {species_name} downloaded successfully!\n"
            # Generate Metadata
            metadata = {
                "speciesName": species_name,
                "countryCode": country_code,
                "maxResults": max_results,
                "downloadedTime": datetime.now().isoformat(),
                "gbif_url": f"https://www.gbif.org/occurrence/search?scientificName={species_name}&country={country_code}",
                "note": "Please cite GBIF as the original source of data."
            }

            with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as tmp_metadata_file:
                metadata_file_path = tmp_metadata_file.name
                json.dump(metadata, tmp_metadata_file, indent=2)
                print(f"Metadata saved to {metadata_file_path}")

            return status_message, csv_file_path, metadata_file_path

        else:
            return f"No data found for {species_name} in {country_code}.", None, None

    except Exception as e:
        error_message = f"An error occurred: {str(e)}"
        print(error_message)
        return error_message, None, None

# Clean Up Temporary File
def postprocess(status, csv_file_path, metadata_file_path):
    if csv_file_path:
        os.remove(csv_file_path)
        print(f"Removed temporary file: {csv_file_path}")
    if metadata_file_path:
        os.remove(metadata_file_path)
        print(f"Removed temporary file: {metadata_file_path}")
    return status, csv_file_path, metadata_file_path


# Wrap the download_gbif_data function to convert max_result to int
def download_gbif_data_wrapper(species_name, country_code, max_results):
    try:
        max_results = int(max_results)
    except ValueError:
        return "Error: Invalid value for Max Results. Please enter a valid integer.", None, None

    return download_gbif_data(species_name, country_code, max_results)



# Gradio Interface using gr.Blocks
with gr.Blocks(title="GBIF Data Downloader") as iface:
    gr.Markdown("Enter a species name, country code, and max results to download CSV data from GBIF")
    with gr.Row():
      species_name_input = gr.Textbox(label="Species Name", placeholder="e.g. Aconitum naviculare")
      country_code_input = gr.Textbox(label="Country Code", value="NP", placeholder="e.g. NP")
      max_results_input = gr.Textbox(label="Max Results", value="5000", placeholder="e.g. 5000")
    with gr.Row():
      download_button = gr.Button("Download Data")
    with gr.Row():
        output_status = gr.Textbox(label="Status")
        output_csv = gr.File(label="Download CSV")
        output_metadata = gr.File(label="Download Metadata")

    inputs = [species_name_input, country_code_input, max_results_input]
    outputs = [output_status, output_csv, output_metadata]

    def clear_inputs(): # Create a function to clear all inputs
      return [None, None, None]

    download_button.click(
        download_gbif_data_wrapper,
        inputs=inputs,
        outputs=outputs,
        postprocess = postprocess
    ).then(clear_inputs, None, inputs) # added the then() and clear_inputs

if __name__ == "__main__":
    iface.launch()