import ast import gradio as gr # pip install "gradio[mcp]" from pycspwrapper import LVStat # pip install pycspwrapper import requests from typing import Any, Dict, List, Tuple def get_topics(name:str = '') -> dict: """Available topics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Args: name (str): name of the topic. If not defined, function will return all available topics. Returns: dict: The dictionary of topics, where key is topic name and value is topic code. Examples: >>> get_topics('vide') {'Vide': 'ENV'} >>> print(get_topics()) {'Iedzīvotāji': 'POP', 'Darbs': 'EMP', 'Sociālā aizsardzība un veselība': 'VES',... """ name_capit = name.capitalize() base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/' content = requests.get(base_url) content_short = {i['text']: i['id'] for i in content.json()} if name_capit in content_short.keys(): return {name_capit: content_short[name_capit]} else: return content_short def get_topic_content(topic: str) -> dict: """Available contents of the topic from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Args: topic (str): topic code. Use get_topics to get topic code. Returns: dict: The dictionary of the contents of the topic, where key is the topic content and value is the topic content code. Examples: >>> # First get topic code ... get_topics('vide') {'Vide': 'ENV'} >>> # Then use this code to get content ... print(get_topic_content('ENV')) {'Vides konti': 'VI', 'Atkritumu apsaimniekošana': 'AK', 'Agro-vides rādītāji': 'AV',... >>> get_topics('Iedzīvotāji') {'Iedzīvotāji': 'POP'} >>> print(get_topic_content('POP')) {'Iedzīvotāju skaits un raksturojošie rādītāji': 'IR', 'Dzimstība': 'ID', 'Mirstība': 'IM', 'Nāves cēloņi': 'NC',... """ base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/START/' content = requests.get(base_url+topic.upper()) content_short = {i['text']: i['id'] for i in content.json()} return content_short def get_titles(topic_content_code:str = '', url:str = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB?query=*&filter=*') -> dict: """Available data (titles) from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Args: topic_content_code (str): topic content code. Use get_topic_content to get topic content code. If not defined, function will return all available titles. url (str): URL from where to get list of available titles. Default value: 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB?query=*&filter=*'. Returns: dict: The dictionary of the titles available from Official Statistics Portal, where key is the title name and value is the list of 4 elements: topic code, topic content code, topic sub-content code and report ID. Examples: >>> # First get topic code ... get_topics('Darbs') {'Darbs': 'EMP'} >>> # Then use this code to get content ... print(get_topic_content('EMP')) {'Darba samaksa (algas)': 'DS', 'Darbaspēka izmaksas': 'DI', 'Darbvietas un darba laiks': 'DV',... >>> # Then use this content code to extract report titles ... print(get_titles('DS')) {'Dzīvi un nedzīvi dzimušo skaits pēc dzimuma 1920 - 2020': ['POP', 'ID', 'IDS', 'IDS010'],... """ alldb = requests.get(url) dict_result = {} for i in alldb.json(): if topic_content_code in i['path']: dict_result[i['title']] = [j for j in i['path'].split('/') if j]+[i['id']] return dict_result def get_query_values(topic_params: list[str] = []) -> List[Dict]: """Get query code and values for particular report. Args: topic_params (list[str]): arguments as a list that are needed for data extraction. Arguments in the list should be in the following order: - topic code, - topic content code, - topic sub-content code - report ID. These codes you can get from the function get_titles. Returns: list: The list of the dictionaries, where dictionary's 'code' value you should use as an argument name and 'values' value(s) as an argument values. Examples: >>> # First get report topic parameters from get_titles ... print(get_titles('DS')) {'Dzīvi un nedzīvi dzimušo skaits pēc dzimuma 1920 - 2020': ['POP', 'ID', 'IDS', 'IDS010'],... >>> # Then use these values to get possible query values ... print(get_query_values(['POP', 'ID', 'IDS', 'IDS010'])) [{'code': 'SEX_NEWBORN', 'values': ['T', 'M', 'F'], 'valueTexts': ['Pavisam', 'Vīrieši', 'Sievietes']}, {'code': 'TIME', 'values': ['1920', '1921', '1922', '1923',... """ base_url = 'https://data.stat.gov.lv/api/v1/lv/OSP_PUB/START/' url = base_url + '/'.join(topic_params) response = requests.get(url) try: response.raise_for_status() # Raises HTTPError for bad responses (4xx, 5xx) data = response.json() if 'variables' not in data: raise ValueError("Unexpected JSON structure: 'variables' key missing") return [{'code': i.get('code', ''), 'values': i.get('values', ''), 'valueTexts': i.get('valueTexts', '')} for i in data['variables']] except requests.exceptions.RequestException as e: raise RuntimeError(f"Request failed: {e}") except ValueError as ve: raise RuntimeError(f"Parsing failed: {ve}") def get_csp_data(lang: str = 'en', topic_params: list[str] = [], **kwargs) -> List[Dict]: """Get statistics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Args: lang (str): Language. Default value 'en'. topic_params (list[str]): arguments as a list that are needed for data extraction. Arguments in the list should be in the following order: - topic code, - topic content code, - topic sub-content code - report ID. These codes you can get from the function get_titles. kwargs: Keyword arguments for query configuration. Possible query argument names ('code') and their possible values ('values') can be obtained using the function get_query_values. 'valueTexts' from get_query_values could help if 'values' is unclear. Returns: list: The list of the dictionaries, where dictionary's key 'key' contains query parameters and key 'values' contains values. Examples: >>> topics = ['POP', 'IR', 'IRE', 'IRE010'] >>> query_args = get_query_values(topics) >>> print(query_args) [{'code': 'ETHNICITY', 'values': ['TOTAL', 'E_LAT',...], 'valueTexts': ['Pavisam', 'Latvieši',...]}, {'code': 'TIME', 'values': ['1935', '1959',..., '2025'],...}] >>> data = get_csp_data( ... lang='en', ... topic_params=topics, ... ETHNICITY=['E_LAT'], ... TIME=['2024', '2025'] ... ) >>> print(data[0]) {'key': ['E_LAT', '2024'], 'values': ['1186337']} """ csp2 = LVStat(lang, *topic_params) csp2.set_query(**kwargs) cspdata2 = csp2.get_data() return cspdata2['data'] def construct_csp_link(params: list[str]) -> str: base_url = 'https://data.stat.gov.lv/pxweb/lv/OSP_PUB/START__' mid_path = '__'.join(params[:3]) last_part = params[3] return f"{base_url}{mid_path}/{last_part}/" with gr.Blocks() as demo: gr.Markdown("### Latvian CSP Data Query Interface") lang = gr.Dropdown(["en", "lv"], value="en", label="Language") # Step 1: Topic Selection topic_dict = get_topics() topic_dropdown = gr.Dropdown(choices=list(topic_dict.keys()), label="Select Topic") # Step 2: Topic Content (dynamically populated) topic_content_dropdown = gr.Dropdown(label="Select Topic Content", visible=False) # Step 3: Report Titles (dynamically populated) report_dropdown = gr.Dropdown(label="Select Report", visible=False) # Dynamic Link & Topic Params Output link_output = gr.Markdown(visible=False) topic_params_box = gr.Textbox(label="Topic Params", lines=1, interactive=True) #topic_params = gr.Textbox(label="Topic Params (Python list, e.g., ['POP', 'IR', 'IRE', 'IRE010'])") kwargs_box = gr.Textbox( label="Query Parameters (Python dict, e.g., {'ETHNICITY': ['E_LAT'], 'TIME': ['2024', '2025']})", lines=4, ) output = gr.JSON(label="Result") run_button = gr.Button("Run Query") def update_topic_content(topic_name: str)-> Tuple[Any, Dict[str, str], str]: """Given a topic name, updates the UI dropdown choices with the corresponding content, and returns the content dictionary (what content is available under given topic_name) and internal topic code (ID of the topic_name). Args: topic_name (str): The name of the selected topic. Possible topic names: 'Darbs', 'Iedzīvotāji', 'Informācijas tehnoloģijas', 'Izglītība, kultūra un zinātne', 'Nozares',\ 'Sociālā aizsardzība un veselība', 'Tirdzniecība un pakalpojumi', 'Uzņēmējdarbība', 'Valsts un ekonomika', 'Vide'. Returns: tuple: - gr.update: Gradio UI update object with new dropdown choices and visibility set to True. - content_dict (dict): Dictionary containing content entries for the selected topic. - topic_code (str): Internal code corresponding to the topic name. """ topic_code = topic_dict[topic_name] content_dict = get_topic_content(topic_code) return gr.update(choices=list(content_dict.keys()), visible=True), content_dict, topic_code def update_reports(topic_content_name: str, content_dict: dict = None, topic_name: str = '') -> Tuple[Dict[str, str], str, Any]: """Updates the UI dropdown menu with available report titles for a selected topic content, and returns the titles dictionary and the corresponding content code. Args: topic_content_name (str): The name of the selected topic content (e.g., "Darba samaksa (algas)"). Topic content names you can get from `update_topic_content` (`content_dict` object). content_dict (dict): A dictionary mapping topic content names to their corresponding content codes. Obtained from `update_topic_content` (2nd returned object: `content_dict`). topic_name (str): The name of the selected topic. Possible topic names: 'Darbs', 'Iedzīvotāji', 'Informācijas tehnoloģijas', 'Izglītība, kultūra un zinātne', 'Nozares',\ 'Sociālā aizsardzība un veselība', 'Tirdzniecība un pakalpojumi', 'Uzņēmējdarbība', 'Valsts un ekonomika', 'Vide'. Returns: tuple: - gr.update: A Gradio UI update object to populate a dropdown with the list of report titles and make it visible. - titles_dict (dict): A dictionary of available report titles from the Official Statistics Portal of Latvia (CSP). Each key is a human-readable report title, and each value is a list of metadata: `[topic_code, topic_content_code, sub_content_code, report_id]`. This list (as a string) of metadata later can be used in the `run_get_csp_data` function (into topic_params_str parameter) - topic_content_code (str): The internal code associated with the selected topic content, used to retrieve titles via the `get_titles` function. Dependencies: - Relies on `get_titles(topic_content_code)` to fetch metadata from the CSP's API at: https://data.stat.gov.lv/api/v1/lv/OSP_PUB?query=*&filter=* """ if len(topic_name) > 0: topic_code = topic_dict[topic_name] content_dict = get_topic_content(topic_code) topic_content_code = content_dict[topic_content_name] titles_dict = get_titles(topic_content_code) return titles_dict, topic_content_code, gr.update(choices=list(titles_dict.keys()), visible=True) def update_topic_params_and_link(report_title: str, titles_dict: dict) -> Tuple[str, Any, Any]: """Prepares and returns metadata, a hyperlink, and query parameter preview for a selected report from the Official Statistics Portal of Latvia (CSP). Args: report_title (str): The title of the selected report, as shown in the dropdown. titles_dict (dict): Dictionary mapping report titles to their metadata list: [topic_code, content_code, sub_content_code, report_id], typically retrieved using `get_titles(topic_content_code)`. Returns: tuple: - topic_params_str (str): String representation of the internal report metadata (code list), useful for debugging or internal reference. This scring can be used in the `run_get_csp_data` (parameter `topic_params_str`). - gr.update: Gradio component update with a Markdown-style hyperlink pointing to the CSP page for the selected report. - gr.update: Gradio component update showing a sample query parameter, particularly for the `TIME` dimension if present, using the most recent 3 values. Details: - The function extracts the internal metadata for the selected report. - It generates a URL using `construct_csp_link(...)` that links directly to the CSP report page. - It attempts to fetch available query parameters using `get_query_values(...)`, then isolates the `TIME` filter and selects the last 3 available values (e.g., most recent years). - If fetching query parameters fails, an empty dictionary (`'{}'`) is returned as the fallback. Example Output: - topic_params_str: "['POP', 'ID', 'IDS', 'IDS010']" - link (Markdown): "[Dzimušo skaits pēc dzimuma](https://data.stat.gov.lv/.../IDS010)" - query_str: "{'TIME': ['2020', '2021', '2022']}" """ title_value = titles_dict[report_title] topic_params_str = str(title_value) link = construct_csp_link(title_value) try: q = get_query_values(title_value) query = {} for i in q: if i.get('code', '') == 'TIME': query = {'TIME': i['values'][-3:]} break query_str = str(query) except Exception as e: query_str = '{}' return topic_params_str, gr.update(value=f"[{report_title}]({link})", visible=True), gr.update(value=query_str) topic_content_state = gr.State() titles_state = gr.State() topic_code_state = gr.State() topic_content_code_state = gr.State() topic_dropdown.change(fn=update_topic_content, inputs=topic_dropdown, outputs=[topic_content_dropdown, topic_content_state, topic_code_state]) topic_content_dropdown.change(fn=update_reports, inputs=[topic_content_dropdown, topic_content_state], #, topic_code_state], outputs=[titles_state, topic_content_code_state, report_dropdown]) report_dropdown.change(fn=update_topic_params_and_link, inputs=[report_dropdown, titles_state], #, topic_code_state, topic_content_code_state], outputs=[topic_params_box, link_output, kwargs_box]) def run_get_csp_data(lang: str = 'en', topic_params_str: str = '[]', query_kwargs_str: str = '{}') -> List[Dict]: """Get statistics from Official Statistics Portal of Latvia (CSP or Centrālā statistikas pārvalde). Args: lang (str): Language. Default value 'en'. topic_params_str (str): string representation of a list that is needed for data extraction. Arguments in the list should be in the following order: topic code, topic content code, topic sub-content code, report ID.\ These codes you can get from the function get_titles. query_kwargs_str (str): string representation of a dictionary - keyword arguments for query configuration.\ Possible query argument names ('code') and their possible values ('values') can be obtained using the function get_query_values.\ 'valueTexts' from get_query_values could help if 'values' is unclear. Returns: list: The list of the dictionaries, where dictionary's key 'key' contains query parameters and key 'values' contains values. Examples: >>> # First get topic code ... get_topics('Iedzīvotāji') {'Iedzīvotāji': 'POP'} >>> # Then use this code to get topic contents ... print(get_topic_content('POP')) {'Iedzīvotāju skaits un raksturojošie rādītāji': 'IR', 'Dzimstība': 'ID', 'Mirstība': 'IM', 'Nāves cēloņi': 'NC'... >>> # Then use this content code to extract report titles ... print(get_titles('IR')) {'Iedzīvotāju skaits gada sākumā, tā izmaiņas un dabiskās kustības galvenie rādītāji 1920 - 2021': ['POP', 'IR', 'IRS010'],... >>> # Use all these previous codes to select data for corresponding report ... report = ['POP', 'IR', 'IRE', 'IRE010'] >>> # Get 'code' and 'values' for filtering data ... query_args = get_query_values(report) >>> print(query_args) [{'code': 'ETHNICITY', 'values': ['TOTAL', 'E_LAT',...], 'valueTexts': ['Pavisam', 'Latvieši',...]}, {'code': 'TIME', 'values': ['1935', '1959',..., '2025'],...}] >>> # Get final result ... data = run_get_csp_data( ... lang='en', ... topic_params_str = str(report), ... query_kwargs_str = '{'ETHNICITY': ['E_LAT'], 'TIME': ['2024', '2025']}' ... ) >>> print(data[0]) {'key': ['E_LAT', '2024'], 'values': ['1186337']} """ try: topic_params = ast.literal_eval(topic_params_str) query_kwargs = ast.literal_eval(query_kwargs_str) if not isinstance(topic_params, list) or not isinstance(query_kwargs, dict): raise ValueError("Input format error") result = get_csp_data(lang=lang, topic_params=topic_params, **query_kwargs) return result except Exception as e: return {"error": str(e)} run_button.click(fn=run_get_csp_data, inputs=[lang, topic_params_box, kwargs_box], outputs=output) if __name__ == "__main__": demo.launch(mcp_server=True)