Spaces:

giswqs
/

scholar

Running

App Files Files Community

giswqs commited on Jul 5, 2023

Commit

1da1c98

1 Parent(s): 615838e

Initial commit

Browse files

Files changed (22) hide show

.gitignore +132 -0
LICENSE +21 -0
Procfile +1 -0
app.py +23 -0
apps/google.py +95 -0
apps/grant.py +108 -0
apps/h_index.py +67 -0
apps/home.py +16 -0
apps/journal.py +310 -0
apps/orcid.py +146 -0
apps/organization.py +153 -0
apps/publication.py +122 -0
apps/researcher.py +240 -0
data/journals.json +280 -0
data/journals.xlsx +0 -0
multiapp.py +71 -0
packages.txt +0 -0
postBuild +6 -0
requirements.txt +14 -0
setup.sh +8 -0
streamlit_app.py +61 -0
streamlit_call.py +14 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,132 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+data/*.csv
+.vscode/
+private/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2021 Qiusheng Wu
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: sh setup.sh && streamlit run streamlit_app.py

app.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import scholarpy
+import streamlit as st
+from multiapp import MultiApp
+from apps import grant, home, journal, orcid, publication, researcher
+st.set_page_config(layout="wide")
+if "dsl" not in st.session_state:
+    st.session_state["dsl"] = scholarpy.Dsl()
+apps = MultiApp()
+# Add all your application here
+apps.add_app("Home", home.app)
+apps.add_app("Grant", grant.app)
+apps.add_app("Journal", journal.app)
+apps.add_app("Publication", publication.app)
+apps.add_app("Researcher", researcher.app)
+apps.add_app("ORCID", orcid.app)
+# The main app
+apps.run()

apps/google.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import os
+import scholarpy
+import tempfile
+import pandas as pd
+import streamlit as st
+import leafmap.foliumap as leafmap
+import plotly.express as px
+from scholarly import scholarly
+# if "dsl" not in st.session_state:
+#     st.session_state["dsl"] = scholarpy.Dsl()
+def app():
+    st.title("Search Google Scholar")
+    row1_col1, row1_col2 = st.columns([1, 1])
+    placeholder = st.empty()
+    with row1_col1:
+        name = st.text_input("Enter a researcher name:", "")
+    if name:
+        placeholder.text("Searching...")
+        if name not in st.session_state:
+            authors = scholarpy.get_author_list(name)
+            st.session_state[name] = authors
+        else:
+            authors = st.session_state[name]
+        placeholder.empty()
+        if len(authors) == 0:
+            with row1_col1:
+                st.write("No results found")
+        else:
+            with row1_col1:
+                st.write("Found {} results:".format(len(authors)))
+                author = st.selectbox("Select a researcher:", authors)
+            if author:
+                placeholder.text("Retrieving data...")
+                id = author.split("|")[1].strip()
+                if id not in st.session_state:
+                    record = scholarpy.get_author_record(id=id)
+                    st.session_state[id] = record
+                else:
+                    record = st.session_state[id]
+                basics = scholarpy.get_author_basics(
+                    record=record, return_df=True)
+                out_csv = os.path.join(tempfile.gettempdir(), "basics.csv")
+                basics.to_csv(out_csv, sep="\t", index=False)
+                df = pd.read_csv(out_csv, sep="\t")
+                with row1_col1:
+                    st.header("Basic information")
+                    markdown = f"""Google Scholar Profile: <https://scholar.google.com/citations?user={id}>"""
+                    st.markdown(markdown)
+                    if "url_picture" in record and len(record["url_picture"]) > 0:
+                        st.image(record["url_picture"])
+                    st.dataframe(df)
+                    leafmap.st_download_button(
+                        "Download data", df, csv_sep="\t")
+                pubs = scholarpy.get_author_pubs(record=record, return_df=True)
+                with row1_col1:
+                    st.header("Publications")
+                    st.text(f"Total number of publications: {len(pubs)}")
+                    st.dataframe(pubs)
+                    leafmap.st_download_button(
+                        "Download data", pubs, csv_sep="\t")
+                pubs_stats, pubs_fig = scholarpy.author_pubs_by_year(
+                    record=record, return_plot=True)
+                citations_stats, citations_fig = scholarpy.author_citations_by_year(
+                    record=record, return_plot=True)
+                with row1_col2:
+                    st.header("Plots")
+                    st.plotly_chart(pubs_fig)
+                    leafmap.st_download_button("Download data", pubs_stats,
+                                               file_name="data.csv", csv_sep="\t")
+                    st.plotly_chart(citations_fig)
+                    leafmap.st_download_button(
+                        "Download data", citations_stats, file_name="data.csv", csv_sep="\t")
+                    if len(record["coauthors"]) > 0:
+                        st.header("Co-authors")
+                        st.text(
+                            "Co-authors listed on Google Scholar profile only.")
+                        coauthors = scholarpy.get_author_coauthors(
+                            record=record, return_df=True)
+                        st.dataframe(coauthors)
+                        leafmap.st_download_button(
+                            "Download data", coauthors, file_name="data.csv", csv_sep="\t")
+                placeholder.empty()

apps/grant.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+import scholarpy
+import pandas as pd
+import streamlit as st
+import leafmap.foliumap as leafmap
+import plotly.express as px
+if "dsl" not in st.session_state:
+    st.session_state["dsl"] = scholarpy.Dsl()
+def app():
+    st.title("Search Grants")
+    dsl = st.session_state["dsl"]
+    (
+        row1_col1,
+        row1_col2,
+        row1_col3,
+        row1_col4,
+        row1_col5,
+    ) = st.columns([1, 0.5, 1, 1, 1])
+    (
+        row2_col1,
+        row2_col2,
+        row2_col3,
+        row2_col4,
+        row2_col5,
+    ) = st.columns([1, 0.5, 1, 1, 1])
+    with row1_col1:
+        keywords = st.text_input("Enter a keyword to search for")
+    with row1_col2:
+        exact_match = st.checkbox("Exact match", True)
+    with row1_col3:
+        scope = st.selectbox(
+            "Select a search scope",
+            [
+                "concepts",
+                "full_data",
+                "investigators",
+                "title_abstract_only",
+                "title_only",
+            ],
+            index=4,
+        )
+    with row1_col4:
+        years = st.slider("Select the start and end year:", 1950, 2030, (2010, 2025))
+    with row1_col5:
+        limit = st.slider("Select the number of grants to return", 1, 1000, 100)
+    if keywords:
+        result = dsl.search_grants_by_keyword(
+            keywords,
+            exact_match,
+            scope,
+            start_year=years[0],
+            end_year=years[1],
+            limit=limit,
+        )
+        df = scholarpy.json_to_df(result)
+        if limit > result.count_total:
+            limit = result.count_total
+        markdown = f"""
+        Returned grants: {limit} (total = {result.count_total})
+        """
+        with row2_col1:
+            st.markdown(markdown)
+        with row2_col2:
+            filter = st.checkbox("Apply a filter")
+        if filter:
+            countries = []
+            for row in df.itertuples():
+                countries.append(eval(row.funder_countries)[0]["name"])
+            df["funder_country"] = countries
+            with row2_col3:
+                filter_by = st.selectbox(
+                    "Select a filter",
+                    [
+                        "funder_country",
+                        "funding_org_name",
+                        "funding_org_acronym",
+                        "research_org_name",
+                    ],
+                )
+                df["funding_org_acronym"] = df["funding_org_acronym"].astype(str)
+                df["research_org_name"] = df["research_org_name"].astype(str)
+                options = df[filter_by].unique()
+                options.sort()
+            with row2_col4:
+                selected = st.selectbox("Select a filter value", options)
+                df = df[df[filter_by] == selected]
+            with row2_col5:
+                st.write("")
+        if df is not None:
+            st.dataframe(df)
+            leafmap.st_download_button("Download data", df, csv_sep="\t")

apps/h_index.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import dimcli
+import pandas as pd
+import sys
+import os
+import streamlit as st
+import scholarpy
+if "dsl" not in st.session_state:
+    st.session_state["dsl"] = scholarpy.Dsl()
+@st.cache
+def the_H_function(sorted_citations_list, n=1):
+    """from a list of integers [n1, n2 ..] representing publications citations,
+    return the max list-position which is >= integer
+    eg
+    >>> the_H_function([10, 8, 5, 4, 3]) => 4
+    >>> the_H_function([25, 8, 5, 3, 3]) => 3
+    >>> the_H_function([1000, 20]) => 2
+    """
+    if sorted_citations_list and sorted_citations_list[0] >= n:
+        return the_H_function(sorted_citations_list[1:], n + 1)
+    else:
+        return n - 1
+def dim_login(key=None, endpoint=None):
+    if key is None:
+        KEY = os.environ.get("DIM_TOKEN")
+    if endpoint is None:
+        ENDPOINT = "https://app.dimensions.ai"
+    try:
+        dimcli.login(key=KEY, endpoint=ENDPOINT)
+        dsl = dimcli.Dsl()
+        return dsl
+    except:
+        raise Exception("Failed to login to Dimensions")
+@st.cache
+def get_pubs_df(dsl, researcher_id):
+    q = """search publications where researchers.id = "{}" return publications[id+title+doi+times_cited] sort by times_cited limit 1000"""
+    pubs = dsl.query(q.format(researcher_id))
+    return pubs.as_dataframe()
+@st.cache
+def get_citations(df):
+    return list(df.fillna(0)["times_cited"])
+def app():
+    dsl = st.session_state["dsl"]
+    researchER_id = st.text_input("Enter researcher ID:", "ur.013632443777.66")
+    df = get_pubs_df(dsl, researchER_id)
+    st.dataframe(df)
+    citations = get_citations(df)
+    h_index = the_H_function(citations)
+    st.write(f"H-index: {h_index}")

apps/home.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import streamlit as st
+def app():
+    st.title("Home")
+    st.text(
+        "Welcome to the Scholar Web App. Click on the left sidebar menu to explore."
+    )
+    markdown = """
+    Disclaimer: The data records are pulled from the [Dimensions.ai database](https://app.dimensions.ai), which might not be the most complete bibliometric database.
+    We plan to incorporate [Scopus](https://www.scopus.com) and [Google Scholar](https://scholar.google.com) in the near future. Don't be surprised if you see that
+    your publication records are not the same as your Google Scholar profile. This is a very preliminary version. A lot more features will be added in the future.
+    We would welcome any feedback. Please send feedback to Qiusheng Wu ([email protected]).
+    """
+    st.info(markdown)
+    st.image("https://i.imgur.com/ZNUJ9fF.gif")

apps/journal.py ADDED Viewed

	@@ -0,0 +1,310 @@

+import os
+import json
+import dimcli
+import pandas as pd
+import plotly.express as px
+import streamlit as st
+import scholarpy
+import leafmap.foliumap as leafmap
+if "dsl" not in st.session_state:
+    st.session_state["dsl"] = scholarpy.Dsl()
+# create output data folder
+FOLDER_NAME = "data"
+if not (os.path.exists(FOLDER_NAME)):
+    os.mkdir(FOLDER_NAME)
+def save(df, filename_dot_csv):
+    df.to_csv(FOLDER_NAME + "/" + filename_dot_csv, index=False)
+def read(filename_dot_csv):
+    df = pd.read_csv(FOLDER_NAME + "/" + filename_dot_csv)
+    return df
+@st.cache
+def get_token():
+    return os.environ.get("DIM_TOKEN")
+@st.cache
+def get_journals():
+    with open("data/journals.json") as f:
+        journals = json.load(f)
+    return journals
+@st.cache
+def read_excel(sheet_name):
+    df = pd.read_excel(
+        "data/journals.xlsx", sheet_name=sheet_name, index_col=False, engine="openpyxl"
+    )
+    df.set_index("Rank", inplace=True)
+    return df
+def app():
+    st.title("Search Journals")
+    dsl = st.session_state["dsl"]
+    search_type = st.radio(
+        "Select a search type",
+        ["Search by journal title", "List Google Scholar journal categories"],
+    )
+    if search_type == "Search by journal title":
+        row1_col1, row1_col2, row1_col3, _ = st.columns([1, 1, 2, 1])
+        with row1_col1:
+            name = st.text_input("Enter a journal title")
+        with row1_col2:
+            exact_match = st.checkbox("Exact match")
+        with row1_col3:
+            options = [
+                "book",
+                "book_series",
+                "proceeding",
+                "journal",
+                "preprint_platform",
+            ]
+            types = st.multiselect(
+                "Select journal types", options, ["journal", "book_series"]
+            )
+        if name:
+            result = dsl.search_journal_by_title(name, exact_match=exact_match)
+            if result is not None:
+                titles = result.as_dataframe()
+                titles = titles[titles["type"].isin(types)]
+                titles.sort_values("title", inplace=True)
+            else:
+                titles = pd.DataFrame()
+            # titles = titles.astype({"start_year": int})
+            if not titles.empty:
+                markdown = f"""
+                Returned Journals: {len(titles)}
+                """
+                st.markdown(markdown)
+                st.dataframe(titles)
+                titles["uid"] = (
+                    titles["id"] + " | " + titles["type"] + " | " + titles["title"]
+                )
+                row2_col1, row2_col2, row2_col3, row2_col4, row2_col5 = st.columns(
+                    [2.4, 1, 0.6, 1, 1]
+                )
+                with row2_col1:
+                    title = st.selectbox(
+                        "Select a journal title", titles["uid"].values.tolist()
+                    )
+                with row2_col2:
+                    keyword = st.text_input("Enter a keyword to search for")
+                with row2_col3:
+                    exact_match = st.checkbox("Exact match", True)
+                with row2_col4:
+                    scope = st.selectbox(
+                        "Select a search scope",
+                        [
+                            "authors",
+                            "concepts",
+                            "full_data",
+                            "full_data_exact",
+                            "title_abstract_only",
+                            "title_only",
+                        ],
+                        index=5,
+                    )
+                with row2_col5:
+                    years = st.slider(
+                        "Select the start and end year:", 1950, 2021, (1980, 2021)
+                    )
+                if title:
+                    journal_id = title.split(" | ")[0]
+                    if keyword:
+                        pubs = dsl.search_pubs_by_keyword(
+                            keyword, exact_match, scope, years[0], years[1], journal_id
+                        )
+                    else:
+                        pubs = dsl.search_pubs_by_journal_id(
+                            journal_id, years[0], years[1]
+                        )
+                    pubs_df = pubs.as_dataframe()
+                    if pubs_df is not None and (not pubs_df.empty):
+                        st.write(
+                            f"Total number of pulications: {pubs.count_total:,}. Display {min(pubs.count_total, 1000)} publications below."
+                        )
+                        try:
+                            st.dataframe(pubs_df)
+                        except Exception as e:
+                            st.dataframe(scholarpy.json_to_df(pubs))
+                            # st.error("An error occurred: " + str(e))
+                        leafmap.st_download_button(
+                            "Download data", pubs_df, csv_sep="\t"
+                        )
+            else:
+                st.text("No results found")
+    elif search_type == "List Google Scholar journal categories":
+        st.markdown(
+            """
+        The journal categories are adopted from [Google Scholar](https://scholar.google.com/citations?view_op=top_venues&hl=en&inst=9897619243961157265).
+        See the list of journals [here](https://docs.google.com/spreadsheets/d/1uCEi3TsJCWl9QEZimvjlM8wjt7hNq3QvMqHGeT44HXQ/edit?usp=sharing).
+        """
+        )
+        st.session_state["orcids"] = None
+        # dsl = st.session_state["dsl"]
+        # token = get_token()
+        # dimcli.login(key=token, endpoint="https://app.dimensions.ai")
+        # dsl = dimcli.Dsl()
+        categories = get_journals()
+        row1_col1, row1_col2, _, row1_col3 = st.columns([1, 1, 0.05, 1])
+        with row1_col1:
+            category = st.selectbox("Select a category:", categories.keys())
+        if category:
+            with row1_col2:
+                journal = st.selectbox("Select a journal:", categories[category].keys())
+        with row1_col3:
+            years = st.slider(
+                "Select the start and end year:", 1950, 2021, (1980, 2021)
+            )
+        if journal:
+            pubs = read_excel(sheet_name=category)
+            with st.expander("Show journal metrics"):
+                st.dataframe(pubs)
+        journal_id = categories[category][journal]
+        if journal_id is not None and str(journal_id).startswith("jour"):
+            q_template = """search publications where
+                journal.id="{}" and
+                year>={} and
+                year<={}
+                return publications[id+title+doi+year+authors+type+pages+journal+issue+volume+altmetric+times_cited]
+                limit 1000"""
+            q = q_template.format(journal_id, years[0], years[1])
+        else:
+            q_template = """search publications where
+                journal.title="{}" and
+                year>={} and
+                year<={}
+                return publications[id+title+doi+year+authors+type+pages+journal+issue+volume+altmetric+times_cited]
+                limit 1000"""
+            q = q_template.format(journal, years[0], years[1])
+        pubs = dsl.query(q)
+        if pubs.count_total > 0:
+            st.header("Publications")
+            st.write(
+                f"Total number of pulications: {pubs.count_total:,}. Display 1,000 publications below."
+            )
+            df_pubs = pubs.as_dataframe()
+            save(df_pubs, "publications.csv")
+            df_pubs = read("publications.csv")
+            st.dataframe(df_pubs)
+            st.header("Authors")
+            authors = pubs.as_dataframe_authors()
+            st.write(
+                f"Total number of authors of the 1,000 pubs shown above: {authors.shape[0]:,}"
+            )
+            save(authors, "authors.csv")
+            df_authors = read("authors.csv")
+            st.dataframe(df_authors)
+            df_authors_orcid = df_authors[~df_authors["orcid"].isna()]
+            # st.dataframe(df_authors_orcid)
+            orcids = list(set(df_authors_orcid["orcid"].values.tolist()))
+            orcids = [i[2:21] for i in orcids]
+            orcids.sort()
+            # st.write(orcids)
+            st.session_state["orcids"] = orcids
+            st.header("Affiliations")
+            affiliations = pubs.as_dataframe_authors_affiliations()
+            st.write(
+                f"Total number of affiliations of the 1,000 pubs shown above: {affiliations.shape[0]:,}"
+            )
+            save(affiliations, "affiliations.csv")
+            df_affiliations = read("affiliations.csv")
+            st.dataframe(df_affiliations)
+            researchers = authors.query("researcher_id!=''")
+            #
+            df_researchers = pd.DataFrame(
+                {
+                    "measure": [
+                        "Authors in total (non unique)",
+                        "Authors with a researcher ID",
+                        "Authors with a researcher ID (unique)",
+                    ],
+                    "count": [
+                        len(authors),
+                        len(researchers),
+                        researchers["researcher_id"].nunique(),
+                    ],
+                }
+            )
+            fig_researchers = px.bar(
+                df_researchers,
+                x="measure",
+                y="count",
+                title=f"Author Research ID stats for {journal} ({years[0]}-{years[1]})",
+            )
+            orcids = authors.query("orcid!=''")
+            #
+            df_orcids = pd.DataFrame(
+                {
+                    "measure": [
+                        "Authors in total (non unique)",
+                        "Authors with a ORCID",
+                        "Authors with a ORCID (unique)",
+                    ],
+                    "count": [
+                        len(authors),
+                        len(orcids),
+                        orcids["orcid"].nunique(),
+                    ],
+                }
+            )
+            fig_orcids = px.bar(
+                df_orcids,
+                x="measure",
+                y="count",
+                title=f"Author ORCID stats for {journal} ({years[0]}-{years[1]})",
+            )
+            st.header("Stats")
+            row2_col1, row1_col2 = st.columns(2)
+            with row2_col1:
+                st.plotly_chart(fig_researchers)
+            with row1_col2:
+                st.plotly_chart(fig_orcids)
+        else:
+            st.warning("No publications found")

apps/orcid.py ADDED Viewed

	@@ -0,0 +1,146 @@

+from geemap.common import ee_initialize
+import requests
+import folium
+import scholarpy
+import streamlit as st
+import geemap.foliumap as geemap
+if "dsl" not in st.session_state:
+    st.session_state["dsl"] = scholarpy.Dsl()
+def get_orcid_data(orcid, info_type=None):
+    """Retrieve ORCID data based on an ORCID and information type.
+    Args:
+        orcid (str): The ORCID to retrieve data for, e.g., 0000-0001-5437-4073
+        info_type (str): The type of information to retrieve, e.g., educations, employments, works
+    Returns:
+        dict: The ORCID data as a dictionary.
+    """
+    headers = {
+        "Accept": "application/vnd.orcid+json",
+    }
+    if info_type is not None:
+        url = f"https://pub.orcid.org/v3.0/{orcid}/{info_type}"
+    else:
+        url = f"https://pub.orcid.org/v3.0/{orcid}"
+    response = requests.get(url, headers=headers)
+    return response.json()
+def get_education_data(orcid):
+    result = get_orcid_data(orcid, "educations")
+    affiliations = result["affiliation-group"]
+    info_dict = {}
+    try:
+        for affiliation in affiliations:
+            summary = affiliation["summaries"][0]["education-summary"]
+            name = summary["source"]["source-name"]["value"]
+            role = summary["role-title"]
+            organization = summary["organization"]["name"]
+            start_year = summary["start-date"]["year"]["value"]
+            end_year = summary["end-date"]["year"]["value"]
+            # start_date = (
+            #     summary["start-date"]["year"]["value"]
+            #     + "-"
+            #     + summary["start-date"]["month"]["value"]
+            #     + "-"
+            #     + summary["start-date"]["day"]["value"]
+            # )
+            # end_date = (
+            #     summary["end-date"]["year"]["value"]
+            #     + "-"
+            #     + summary["end-date"]["month"]["value"]
+            #     + "-"
+            #     + summary["end-date"]["day"]["value"]
+            # )
+            city = summary["organization"]["address"]["city"]
+            region = summary["organization"]["address"]["region"]
+            country = summary["organization"]["address"]["country"]
+            address_list = [city, region, country]
+            address = ", ".join([i for i in address_list if i])
+            # address = city + ", " + region + ", " + country
+            coords = geemap.geocode(address)[0]
+            lat = coords.lat
+            lng = coords.lng
+            info_dict[role] = {
+                "name": name,
+                "organization": organization,
+                "start_year": start_year,
+                "end_year": end_year,
+                "city": city,
+                "region": region,
+                "country": country,
+                "address": address,
+                "lat": lat,
+                "lng": lng,
+            }
+    except:
+        pass
+    return info_dict
+def app():
+    dsl = st.session_state["dsl"]
+    st.title("Retrieve ORCID Data")
+    m = geemap.Map(center=(20, 0), zoom=2, ee_initialize=False)
+    row1_col1, row1_col2 = st.columns(2)
+    with row1_col1:
+        name = st.text_input("Enter a researcher name", "")
+    if name:
+        orcids = dsl.search_orcid_by_name(name, return_list=True)
+        with row1_col2:
+            if orcids is not None:
+                selected = st.selectbox("Select an ORCID", orcids)
+            else:
+                selected = None
+                st.write("No ORCID found.")
+        #     orcids = ["0000-0001-5437-4073", "0000-0001-6157-5519"]
+        #     if st.session_state.get("orcids", []) is not None:
+        #         orcids = orcids + st.session_state.get("orcids", [])
+        #     selected_orcid = st.selectbox("Select an ORCID:", orcids)
+        # with row1_col2:
+        #     orcid = st.text_input("Enter an ORCID:", selected_orcid)
+        row2_col1, row2_col2 = st.columns([1, 1])
+        if selected is not None:
+            orcid = selected.split("|")[1].strip()
+            education_data = get_education_data(orcid)
+            roles = list(education_data.keys())
+            for role in roles:
+                popup = f"<b>Name: </b>{education_data[role]['name']}<br><b>Organization: </b>{education_data[role]['organization']}<br><b>Degree: </b>{role}"
+                marker = folium.Marker(
+                    [education_data[role]["lat"], education_data[role]["lng"]],
+                    popup=popup,
+                )
+                marker.add_to(m)
+            with row2_col1:
+                markdown = f"""ORCID URL: <https://orcid.org/{orcid}>"""
+                st.markdown(markdown)
+                if len(education_data) > 0:
+                    st.write("Education:")
+                    st.write(education_data)
+                else:
+                    st.write("No education data found.")
+            with row2_col2:
+                m.to_streamlit()

apps/organization.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import os
+import scholarpy
+import pandas as pd
+import streamlit as st
+import leafmap.foliumap as leafmap
+import plotly.express as px
+if "dsl" not in st.session_state:
+    st.session_state["dsl"] = scholarpy.Dsl()
+def app():
+    st.title("Search Organizations")
+    dsl = st.session_state["dsl"]
+    row1_col1, row1_col2 = st.columns([1, 1])
+    with row1_col1:
+        name = st.text_input("Enter an organization name:", "")
+    if name:
+        orgs = dsl.search_org_by_name(
+            name, exact_match=False, return_list=True)
+        if orgs is not None:
+            with row1_col1:
+                selected_org = st.selectbox("Select a organization id:", orgs)
+                org_id = selected_org.split("|")[0].strip()
+                id_info = dsl.search_org_by_id(org_id)
+                info_df = scholarpy.json_to_df(id_info, transpose=True)
+                info_df.rename(
+                    columns={info_df.columns[0]: "Type",
+                             info_df.columns[1]: "Value"},
+                    inplace=True,
+                )
+                with row1_col1:
+                    st.header("Organization Information")
+                    if not info_df.empty:
+                        st.dataframe(info_df)
+                        leafmap.st_download_button(
+                            "Download data", info_df, csv_sep="\t"
+                        )
+                    else:
+                        st.text("No information found")
+                with row1_col2:
+                    years = st.slider(
+                        "Select the start and end year:", 1950, 2030, (1980, 2021))
+                    st.header("Publications by year")
+                    pubs, fig = dsl.org_pubs_annual_stats(
+                        org_id, start_year=years[0], end_year=years[1], return_plot=True)
+                    st.text(
+                        f'Total number of publications: {pubs["count"].sum():,}')
+                    if fig is not None:
+                        st.plotly_chart(fig)
+                        leafmap.st_download_button(
+                            "Download data",
+                            pubs,
+                            file_name="data.csv",
+                            csv_sep="\t",
+                        )
+                    else:
+                        st.text("No publications found")
+                with row1_col1:
+                    st.header("Top funders")
+                    funder_count = st.slider(
+                        "Select the number of funders:", 1, 100, 20)
+                    funders, fig = dsl.org_grant_funders(
+                        org_id, limit=funder_count, return_plot=True)
+                    st.text(
+                        f'Total funding amount: ${funders["funding"].sum():,}')
+                    if fig is not None:
+                        st.plotly_chart(fig)
+                        leafmap.st_download_button(
+                            "Download data",
+                            funders,
+                            file_name="data.csv",
+                            csv_sep="\t",
+                        )
+                    else:
+                        st.text("No funders found")
+                with row1_col2:
+                    st.header("The number of grants by year")
+                    grants, fig_count, fig_amount = dsl.org_grants_annual_stats(
+                        org_id, start_year=years[0], end_year=years[1], return_plot=True)
+                    st.plotly_chart(fig_count)
+                    st.plotly_chart(fig_amount)
+                    leafmap.st_download_button(
+                        "Download data",
+                        grants,
+                        file_name="data.csv",
+                        csv_sep="\t",
+                    )
+                with row1_col1:
+                    st.header("List of grants")
+                    st.text("Only the first 1000 grants are shown")
+                    result = dsl.search_grants_by_org(
+                        org_id, start_year=years[0], end_year=years[1])
+                    df = result.as_dataframe()
+                    if not df.empty:
+                        st.dataframe(df)
+                        leafmap.st_download_button(
+                            "Download data", df, file_name="data.csv", csv_sep="\t"
+                        )
+                with row1_col1:
+                    st.header("Publications most cited in last 2 years")
+                    result = dsl.org_pubs_most_cited(org_id, recent=True, limit=100)
+                    df = scholarpy.json_to_df(result, transpose=False)
+                    if not df.empty:
+                        st.dataframe(df)
+                        leafmap.st_download_button(
+                            "Download data", df, file_name="data.csv", csv_sep="\t"
+                        )
+                with row1_col2:
+                    st.header("Publications most cited - all time")
+                    result = dsl.org_pubs_most_cited(org_id, recent=False, limit=100)
+                    df = scholarpy.json_to_df(result, transpose=False)
+                    if not df.empty:
+                        st.dataframe(df)
+                        leafmap.st_download_button(
+                            "Download data", df, file_name="data.csv", csv_sep="\t"
+                        )
+                df, area_fig, journal_fig = dsl.org_pubs_top_areas(org_id, return_plot=True)
+                if not df.empty:
+                    with row1_col1:
+                        st.header("Research areas of most cited publications")
+                        st.plotly_chart(area_fig)
+                        # leafmap.st_download_button(
+                        #     "Download data", df, file_name="data.csv", csv_sep="\t"
+                        # )
+                    with row1_col2:
+                        st.header("Journals of most cited publications")
+                        st.plotly_chart(journal_fig)
+                        leafmap.st_download_button(
+                            "Download data", df, file_name="data.csv", csv_sep="\t"
+                        )
+        else:
+            st.text("No organizations found")

apps/publication.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import os
+import scholarpy
+import pandas as pd
+import streamlit as st
+import leafmap.foliumap as leafmap
+import plotly.express as px
+if "dsl" not in st.session_state:
+    st.session_state["dsl"] = scholarpy.Dsl()
+def app():
+    st.title("Search Publications")
+    dsl = st.session_state["dsl"]
+    (
+        row1_col1,
+        row1_col2,
+        row1_col3,
+        row1_col4,
+        row1_col5,
+    ) = st.columns([1, 0.7, 1, 1, 1])
+    row2_col1, row2_col2, row2_col3, row2_col4, row2_col5 = st.columns(
+        [1, 0.7, 1, 1, 1]
+    )
+    with row1_col1:
+        keywords = st.text_input("Enter a keyword to search for")
+    with row1_col2:
+        exact_match = st.checkbox("Exact match", True)
+    with row1_col3:
+        scope = st.selectbox(
+            "Select a search scope",
+            [
+                "authors",
+                "concepts",
+                "full_data",
+                "full_data_exact",
+                "title_abstract_only",
+                "title_only",
+            ],
+            index=5,
+        )
+    with row1_col4:
+        years = st.slider("Select the start and end year:", 1950, 2030, (1980, 2022))
+    with row1_col5:
+        limit = st.slider("Select the number of publications to return", 1, 1000, 100)
+    if keywords:
+        result = dsl.search_pubs_by_keyword(
+            keywords,
+            exact_match,
+            scope,
+            start_year=years[0],
+            end_year=years[1],
+            limit=limit,
+        )
+        df = scholarpy.json_to_df(result)
+        affiliations = result.as_dataframe_authors_affiliations()
+        country_df = affiliations.groupby(['pub_id'])['aff_country'].unique()
+        df = df.merge(country_df, left_on='id', right_on='pub_id')
+        countries = [c[c.astype(bool)].size for c in df['aff_country']]
+        df['country_count'] = countries
+        journal_counts = df.copy()["journal.title"].value_counts()
+        if limit > result.count_total:
+            limit = result.count_total
+        markdown = f"""
+        Returned Publications: {limit} (total = {result.count_total})
+        """
+        with row2_col1:
+            st.markdown(markdown)
+        with row2_col2:
+            filter = st.checkbox("Filter by journal")
+        if filter:
+            df["journal.title"] = df["journal.title"].astype(str)
+            journals = df["journal.title"].unique()
+            journals.sort()
+            with row2_col3:
+                journal = st.selectbox("Select a journal", journals)
+            df = df[df["journal.title"] == journal]
+        with row2_col4:
+            st.write("")
+        with row2_col5:
+            st.write("")
+        if df is not None:
+            st.dataframe(df)
+            leafmap.st_download_button("Download data", df, csv_sep="\t")
+            summary = pd.DataFrame(
+                {"Journal": journal_counts.index, "Count": journal_counts}
+            ).reset_index(drop=True)
+            markdown = f"""
+            - Total number of journals: **{len(summary)}**
+            """
+            row3_col1, row3_col2 = st.columns([1, 1])
+            with row3_col1:
+                st.markdown(markdown)
+                st.dataframe(summary)
+                leafmap.st_download_button("Download data", summary, csv_sep="\t")
+            with row3_col2:
+                fig = px.box(df, x='year', y='country_count', title='Country Counts')
+                st.plotly_chart(fig)

apps/researcher.py ADDED Viewed

	@@ -0,0 +1,240 @@

+import os
+import scholarpy
+import pandas as pd
+import streamlit as st
+import leafmap.foliumap as leafmap
+import plotly.express as px
+if "dsl" not in st.session_state:
+    st.session_state["dsl"] = scholarpy.Dsl()
+@st.cache(allow_output_mutation=True)
+def get_geonames():
+    return scholarpy.get_geonames()
+def json_to_df(json_data, transpose=False):
+    df = json_data.as_dataframe()
+    if not df.empty:
+        if transpose:
+            df = df.transpose()
+        out_csv = leafmap.temp_file_path(".csv")
+        df.to_csv(out_csv, index=transpose)
+        df = pd.read_csv(out_csv)
+        os.remove(out_csv)
+        return df
+    else:
+        return None
+def annual_pubs(pubs, col="year"):
+    if pubs is not None:
+        df = pubs[col].value_counts().sort_index()
+        df2 = pd.DataFrame({"year": df.index, "publications": df.values})
+        return df2
+    else:
+        return None
+def annual_collaborators(pubs, col="year"):
+    if pubs is not None:
+        df = pubs.groupby([col]).sum()
+        df2 = pd.DataFrame(
+            {"year": df.index, "collaborators": df["authors_count"].values}
+        )
+        fig = px.bar(
+            df2,
+            x="year",
+            y="collaborators",
+        )
+        return fig
+    else:
+        return None
+def annual_citations(pubs, col="year"):
+    if pubs is not None:
+        df = pubs.groupby([col]).sum()
+        df2 = pd.DataFrame(
+            {"year": df.index, "citations": df["times_cited"].values})
+        fig = px.bar(
+            df2,
+            x="year",
+            y="citations",
+        )
+        return fig
+    else:
+        return None
+def the_H_function(sorted_citations_list, n=1):
+    """from a list of integers [n1, n2 ..] representing publications citations,
+    return the max list-position which is >= integer
+    eg
+    >>> the_H_function([10, 8, 5, 4, 3]) => 4
+    >>> the_H_function([25, 8, 5, 3, 3]) => 3
+    >>> the_H_function([1000, 20]) => 2
+    """
+    if sorted_citations_list and sorted_citations_list[0] >= n:
+        return the_H_function(sorted_citations_list[1:], n + 1)
+    else:
+        return n - 1
+def app():
+    st.title("Search Researchers")
+    dsl = st.session_state["dsl"]
+    row1_col1, row1_col2 = st.columns([1, 1])
+    with row1_col1:
+        name = st.text_input("Enter a researcher name:", "")
+    if name:
+        ids, names = dsl.search_researcher_by_name(name, return_list=True)
+        if ids.count_total > 0:
+            # options = ids.as_dataframe()["id"].values.tolist()
+            with row1_col1:
+                name = st.selectbox("Select a researcher id:", names)
+            if name:
+                id = name.split("|")[1].strip()
+                id_info = dsl.search_researcher_by_id(id, return_df=False)
+                info_df = json_to_df(id_info, transpose=True)
+                info_df.rename(
+                    columns={info_df.columns[0]: "Type",
+                             info_df.columns[1]: "Value"},
+                    inplace=True,
+                )
+                with row1_col1:
+                    st.header("Researcher Information")
+                    if not info_df.empty:
+                        st.dataframe(info_df)
+                        leafmap.st_download_button(
+                            "Download data", info_df, csv_sep="\t"
+                        )
+                    else:
+                        st.text("No information found")
+                pubs = dsl.search_pubs_by_researcher_id(id)
+                df = json_to_df(pubs)
+                # annual_df = annual_pubs(df)
+                if df is not None:
+                    df1, df2 = dsl.researcher_annual_stats(
+                        pubs, geonames_df=get_geonames()
+                    )
+                    df3 = scholarpy.collaborator_locations(df2)
+                    with row1_col2:
+                        st.header("Researcher statistics")
+                        columns = ["pubs", "collaborators",
+                                   "institutions", "cities"]
+                        selected_columns = st.multiselect(
+                            "Select attributes to display:", columns, columns
+                        )
+                        if selected_columns:
+                            fig = scholarpy.annual_stats_barplot(
+                                df1, selected_columns)
+                            st.plotly_chart(fig)
+                        leafmap.st_download_button(
+                            "Download data",
+                            df1,
+                            file_name="data.csv",
+                            csv_sep="\t",
+                        )
+                        st.header("Map of collaborator institutions")
+                        markdown = f"""
+                        - Total number of collaborator institutions: **{len(df3)}**
+                        """
+                        st.markdown(markdown)
+                        m = leafmap.Map(
+                            center=[0, 0],
+                            zoom_start=1,
+                            latlon_control=False,
+                            draw_control=False,
+                            measure_control=False,
+                            locate_control=True,
+                        )
+                        m.add_points_from_xy(df3)
+                        m.to_streamlit(height=420)
+                        leafmap.st_download_button(
+                            "Download data",
+                            df3,
+                            file_name="data.csv",
+                            csv_sep="\t",
+                        )
+                        st.header("Publication counts with collaborators")
+                        collaborators = dsl.search_researcher_collaborators(
+                            id, pubs)
+                        markdown = f"""
+                        - Total number of collaborators: **{len(collaborators)}**
+                        """
+                        st.markdown(markdown)
+                        st.dataframe(collaborators)
+                        leafmap.st_download_button(
+                            "Download data",
+                            collaborators,
+                            file_name="data.csv",
+                            csv_sep="\t",
+                        )
+                else:
+                    st.text("No publications found")
+                with row1_col1:
+                    st.header("Publications")
+                    if df is not None:
+                        citations = df["times_cited"].values.tolist()
+                        citations.sort(reverse=True)
+                        h_index = the_H_function(citations)
+                        markdown = f"""
+                        - Total number of publications: **{len(df)}**
+                        - Total number of citations: **{df["times_cited"].sum()}**
+                        - i10-index: **{len(df[df["times_cited"]>=10])}**
+                        - h-index: **{h_index}**
+                        """
+                        st.markdown(markdown)
+                        st.dataframe(df)
+                        leafmap.st_download_button(
+                            "Download data", df, file_name="data.csv", csv_sep="\t"
+                        )
+                        if "journal.title" in df.columns:
+                            st.header("Publication counts by journal")
+                            journals = df["journal.title"].value_counts()
+                            summary = pd.DataFrame(
+                                {"Journal": journals.index, "Count": journals}
+                            ).reset_index(drop=True)
+                            markdown = f"""
+                            - Total number of journals: **{len(summary)}**
+                            """
+                            st.markdown(markdown)
+                            st.dataframe(summary)
+                            leafmap.st_download_button(
+                                "Download data",
+                                summary,
+                                file_name="data.csv",
+                                csv_sep="\t",
+                            )
+                        else:
+                            st.text("No journal publications")
+                    else:
+                        st.text("No publications found")
+                    grants = dsl.search_grants_by_researcher(id)
+                    df = grants.as_dataframe()
+                    if not df.empty:
+                        st.header("Grants")
+                        st.dataframe(df)
+                        leafmap.st_download_button(
+                            "Download data", df, file_name="data.csv", csv_sep="\t"
+                        )
+        else:
+            st.text("No results found.")

data/journals.json ADDED Viewed

	@@ -0,0 +1,280 @@

+{
+    "All Categories": {
+        "Nature": "jour.1018957",
+        "The New England Journal of Medicine": "jour.1014075",
+        "Science": "jour.1346339",
+        "IEEE/CVF Conference on Computer Vision and Pattern Recognition": null,
+        "The Lancet": "jour.1077219",
+        "Advanced Materials": "jour.1129018",
+        "Cell": "jour.1019114",
+        "Nature Communications": null,
+        "Chemical Reviews": "jour.1077147",
+        "International Conference on Learning Representations": null,
+        "JAMA": "jour.1081531",
+        "Neural Information Processing Systems": null,
+        "Proceedings of the National Academy of Sciences": null,
+        "Journal of the American Chemical Society": null,
+        "Angewandte Chemie": null,
+        "Chemical Society Reviews": null,
+        "Nucleic Acids Research": null,
+        "Renewable and Sustainable Energy Reviews": null,
+        "Journal of Clinical Oncology": null,
+        "Physical Review Letters": null,
+        "Advanced Energy Materials": null,
+        "Nature Medicine": null,
+        "International Conference on Machine Learning": null,
+        "Energy & Environmental Science": null,
+        "ACS nullo": null,
+        "Scientific Reports": null,
+        "European Conference on Computer Vision": null,
+        "The Lancet Oncology": null,
+        "Advanced Functional Materials": null,
+        "PLoS ONE": null,
+        "IEEE/CVF International Conference on Computer Vision": null,
+        "Nature Genetics": null,
+        "Journal of Cleaner Production": null,
+        "Nature Materials": null,
+        "Science of The Total Environment": null,
+        "Circulation": "jour.1009570",
+        "BMJ": "jour.1017377",
+        "Journal of the American College of Cardiology": null,
+        "Applied Catalysis B: Environmental": null,
+        "Science Advances": null,
+        "nullo Letters": null,
+        "Nature Energy": null,
+        "ACS Applied Materials & Interfaces": null,
+        "Journal of Materials Chemistry A": null,
+        "IEEE Access": null,
+        "Nature Biotechnology": null,
+        "nullo Energy": null,
+        "Nature Methods": null,
+        "Nature nullotechnology": null,
+        "Cochrane Database of Systematic Reviews": null,
+        "The Astrophysical Journal": null,
+        "The Lancet Infectious Diseases": null,
+        "Applied Energy": null,
+        "European Heart Journal": null,
+        "Blood": "jour.1085025",
+        "American Economic Review": null,
+        "Immunity": "jour.1112054",
+        "Meeting of the Association for Computational Linguistics (ACL)": null,
+        "AAAI Conference on Artificial Intelligence": null,
+        "Gastroenterology": "jour.1017616",
+        "Neuron": "jour.1098485",
+        "Journal of High Energy Physics": null,
+        "IEEE Communications Surveys & Tutorials": null,
+        "Nature Neuroscience": null,
+        "Computers in Human Behavior": null,
+        "Chemical engineering journal": null,
+        "ACS Catalysis": null,
+        "Nature Reviews. Molecular Cell Biology": null,
+        "International Journal of Molecular Sciences": null,
+        "IEEE Transactions on Pattern Analysis and Machine Intelligence": null,
+        "Environmental Science & Technology": null,
+        "Monthly Notices of the Royal Astronomical Society": null,
+        "Cell Metabolism": null,
+        "Nature Physics": null,
+        "Physical Review D": null,
+        "Accounts of Chemical Research": null,
+        "Nature Photonics": null,
+        "Nature Climate Change": null,
+        "Chemistry of Materials": null,
+        "Molecular Cell": null,
+        "Clinical Infectious Diseases": null,
+        "Morbidity and Mortality Weekly Report": null,
+        "Nature Reviews Immunology": null,
+        "Gut": "jour.1077125",
+        "Annals of Oncology": null,
+        "Cell Reports": null,
+        "Journal of Business Research": null,
+        "Clinical Cancer Research": null,
+        "Frontiers in Microbiology": null,
+        "Journal of Hepatology": null,
+        "eLife": "jour.1046517",
+        "Bioinformatics": "jour.1345383",
+        "The Journal of Clinical Investigation": null,
+        "Science Translational Medicine": null,
+        "Water Research": null,
+        "Frontiers in Immunology": null,
+        "Small": "jour.1034711",
+        "Nature Immunology": null,
+        "JAMA Oncology": null,
+        "The Lancet Neurology": null
+    },
+    "Business, Economics & Management": {
+        "American Economic Review": "jour.1056580",
+        "Journal of Business Research": "jour.1028262",
+        "Tourism Management": null,
+        "Journal of Business Ethics": null,
+        "Journal of Finullcial Economics": null,
+        "The Quarterly Journal of Economics": null,
+        "The Review of Finullcial Studies": null,
+        "Technological Forecasting and Social Change": null,
+        "International Journal of Information Management": null,
+        "Management Science": null,
+        "Journal of Political Economy": null,
+        "International Journal of Production Economics": null,
+        "The Journal of Finullce": null,
+        "Journal of Management": null,
+        "Strategic Management Journal": null,
+        "World Development": null,
+        "Journal of Retailing and Consumer Services": null,
+        "Academy of Management Journal": null,
+        "International Journal of Project Management": null,
+        "Energy Economics": null
+    },
+    "Chemical & Material Sciences": {
+        "Advanced Materials": "jour.1129018",
+        "Chemical Reviews": null,
+        "Journal of the American Chemical Society": null,
+        "Angewandte Chemie": null,
+        "Chemical Society Reviews": null,
+        "Advanced Energy Materials": null,
+        "Energy & Environmental Science": null,
+        "ACS nullo": null,
+        "Advanced Functional Materials": null,
+        "Nature Materials": null,
+        "Applied Catalysis B: Environmental": null,
+        "nullo Letters": null,
+        "Nature Energy": null,
+        "ACS Applied Materials & Interfaces": null,
+        "Journal of Materials Chemistry A": null,
+        "nullo Energy": null,
+        "Nature nullotechnology": null,
+        "Chemical engineering journal": null,
+        "ACS Catalysis": null,
+        "Accounts of Chemical Research": null
+    },
+    "Engineering & Computer Science": {
+        "IEEE/CVF Conference on Computer Vision and Pattern Recognition": null,
+        "Advanced Materials": "jour.1129018",
+        "International Conference on Learning Representations": null,
+        "Neural Information Processing Systems": null,
+        "Renewable and Sustainable Energy Reviews": null,
+        "Advanced Energy Materials": null,
+        "International Conference on Machine Learning": null,
+        "Energy & Environmental Science": null,
+        "ACS nullo": null,
+        "European Conference on Computer Vision": null,
+        "Advanced Functional Materials": null,
+        "IEEE/CVF International Conference on Computer Vision": null,
+        "Journal of Cleaner Production": null,
+        "Nature Materials": null,
+        "Applied Catalysis B: Environmental": null,
+        "nullo Letters": null,
+        "Nature Energy": null,
+        "ACS Applied Materials & Interfaces": null,
+        "Journal of Materials Chemistry A": null,
+        "IEEE Access": null
+    },
+    "Health & Medical Sciences": {
+        "The New England Journal of Medicine": "jour.1014075",
+        "The Lancet": "jour.1077219",
+        "Cell": "jour.1019114",
+        "JAMA": "jour.1081531",
+        "Proceedings of the National Academy of Sciences": null,
+        "Journal of Clinical Oncology": null,
+        "Nature Medicine": null,
+        "The Lancet Oncology": null,
+        "PLoS ONE": null,
+        "Nature Genetics": null,
+        "Circulation": "jour.1009570",
+        "BMJ": "jour.1017377",
+        "Journal of the American College of Cardiology": null,
+        "Cochrane Database of Systematic Reviews": null,
+        "The Lancet Infectious Diseases": null,
+        "European Heart Journal": null,
+        "Blood": "jour.1085025",
+        "Immunity": "jour.1112054",
+        "Gastroenterology": "jour.1017616",
+        "Neuron": "jour.1098485"
+    },
+    "Humanities, Literature & Arts": {
+        "Digital Journalism": null,
+        "Journal of Communication": null,
+        "Journalism Studies": null,
+        "International Journal of Communication": null,
+        "Journalism": "jour.1138763",
+        "System": "jour.1137434",
+        "The Modern Language Journal": null,
+        "Media, Culture & Society": null,
+        "Synthese": "jour.1284232",
+        "Political Communication": null,
+        "Applied Linguistics": null,
+        "Language Learning": null,
+        "Public Opinion Quarterly": null,
+        "TESOL Quarterly": null,
+        "Journalism Practice": null,
+        "Feminist Media Studies": null,
+        "Studies in Second Language Acquisition": null,
+        "English Language Teaching": null,
+        "Language Teaching": null,
+        "Race Ethnicity and Education": null
+    },
+    "Life Sciences & Earth Sciences": {
+        "Nature": "jour.1018957",
+        "Science": "jour.1346339",
+        "Cell": "jour.1019114",
+        "Nature Communications": null,
+        "Proceedings of the National Academy of Sciences": null,
+        "Nucleic Acids Research": null,
+        "Scientific Reports": null,
+        "PLoS ONE": null,
+        "Nature Genetics": null,
+        "Science of The Total Environment": null,
+        "Science Advances": null,
+        "Nature Biotechnology": null,
+        "Nature Methods": null,
+        "Neuron": "jour.1098485",
+        "Nature Reviews. Molecular Cell Biology": null,
+        "International Journal of Molecular Sciences": null,
+        "Environmental Science & Technology": null,
+        "Cell Metabolism": null,
+        "Nature Climate Change": null,
+        "Molecular Cell": null
+    },
+    "Physics & Mathematics": {
+        "Physical Review Letters": null,
+        "The Astrophysical Journal": null,
+        "Journal of High Energy Physics": null,
+        "Monthly Notices of the Royal Astronomical Society": null,
+        "Nature Physics": null,
+        "Physical Review D": null,
+        "Nature Photonics": null,
+        "Physical Review B": null,
+        "Physical Review X": null,
+        "Astronomy & Astrophysics": null,
+        "The European Physical Journal C": null,
+        "Journal of Molecular Liquids": null,
+        "IEEE Transactions on Automatic Control": null,
+        "International Journal of Heat and Mass Transfer": null,
+        "Physics Letters B": null,
+        "IEEE Transactions on Geoscience and Remote Sensing": null,
+        "Reviews of Modern Physics": null,
+        "IEEE Transactions on Signal Processing": null,
+        "Geophysical Research Letters": null,
+        "Optica": "jour.1050828"
+    },
+    "Social Sciences": {
+        "Journal of Business Ethics": null,
+        "Computers & Education": null,
+        "Research Policy": null,
+        "New Media & Society": null,
+        "American Journal of Public Health": null,
+        "Global Environmental Change": null,
+        "Nature Human Behaviour": null,
+        "Health Affairs": null,
+        "Social Science & Medicine": null,
+        "Teaching and Teacher Education": null,
+        "Energy Research & Social Science": null,
+        "Information, Communication & Society": null,
+        "Land Use Policy": null,
+        "Academic Medicine": null,
+        "Studies in Higher Education": null,
+        "American Journal of Political Science": null,
+        "Review of Educational Research": null,
+        "Annals of Tourism Research": null,
+        "Cities": "jour.1027483",
+        "Business Strategy and the Environment": null
+    }
+}

data/journals.xlsx ADDED Viewed

Binary file (25.6 kB). View file

multiapp.py ADDED Viewed

	@@ -0,0 +1,71 @@

+"""Frameworks for running multiple Streamlit applications as a single app.
+"""
+import streamlit as st
+# app_state = st.experimental_get_query_params()
+# app_state = {k: v[0] if isinstance(v, list) else v for k, v in app_state.items()} # fetch the first item in each query string as we don't have multiple values for each query string key in this example
+class MultiApp:
+    """Framework for combining multiple streamlit applications.
+    Usage:
+        def foo():
+            st.title("Hello Foo")
+        def bar():
+            st.title("Hello Bar")
+        app = MultiApp()
+        app.add_app("Foo", foo)
+        app.add_app("Bar", bar)
+        app.run()
+    It is also possible keep each application in a separate file.
+        import foo
+        import bar
+        app = MultiApp()
+        app.add_app("Foo", foo.app)
+        app.add_app("Bar", bar.app)
+        app.run()
+    """
+    def __init__(self):
+        self.apps = []
+    def add_app(self, title, func):
+        """Adds a new application.
+        Parameters
+        ----------
+        func:
+            the python function to render this app.
+        title:
+            title of the app. Appears in the dropdown in the sidebar.
+        """
+        self.apps.append({"title": title, "function": func})
+    def run(self):
+        app_state = st.experimental_get_query_params()
+        app_state = {
+            k: v[0] if isinstance(v, list) else v for k, v in app_state.items()
+        }  # fetch the first item in each query string as we don't have multiple values for each query string key in this example
+        # st.write('before', app_state)
+        titles = [a["title"] for a in self.apps]
+        functions = [a["function"] for a in self.apps]
+        default_radio = titles.index(app_state["page"]) if "page" in app_state else 0
+        st.sidebar.title("Navigation")
+        title = st.sidebar.radio("Go To", titles, index=default_radio, key="radio")
+        app_state["page"] = st.session_state.radio
+        # st.write('after', app_state)
+        st.experimental_set_query_params(**app_state)
+        # st.experimental_set_query_params(**st.session_state.to_dict())
+        functions[titles.index(title)]()
+        st.sidebar.title("About")
+        st.sidebar.info(
+            """
+            The web app URL: <https://scholar.gishub.org>. If you have any questions regarding this web app, please contact [Qiusheng Wu](https://wetlands.io) ([email protected]).
+        """
+        )

packages.txt ADDED Viewed

File without changes

postBuild ADDED Viewed

	@@ -0,0 +1,6 @@

+# enable nbserverproxy
+jupyter serverextension enable --sys-prefix nbserverproxy
+# streamlit launches at startup
+mv streamlit_call.py ${NB_PYTHON_PREFIX}/lib/python*/site-packages/
+# enable streamlit extension
+jupyter serverextension enable --sys-prefix streamlit_call

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+dimcli
+geopandas
+# jupyter-server-proxy
+# keplergl
+# nbserverproxy
+openpyxl
+streamlit
+streamlit-option-menu
+leafmap
+scholarpy
+geemap
+# git+https://github.com/giswqs/leafmap
+# git+https://github.com/giswqs/scholarpy
+# git+https://github.com/giswqs/geemap

setup.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+mkdir -p ~/.streamlit/
+echo "\
+[server]\n\
+headless = true\n\
+port = $PORT\n\
+enableCORS = false\n\
+\n\
+" > ~/.streamlit/config.toml

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import scholarpy
+import streamlit as st
+from streamlit_option_menu import option_menu
+from apps import grant, home, google, journal, orcid, organization, publication, researcher
+st.set_page_config(page_title="Scholar Web App",
+                   page_icon="chart_with_upwards_trend",
+                   layout="wide")
+# A dictionary of apps in the format of {"App title": "App icon"}
+# More icons can be found here: https://icons.getbootstrap.com
+apps = {"home": {"title": "Home", "icon": "house"},
+        "grant": {"title": "Grant", "icon": "coin"},
+        "journal": {"title": "Journal", "icon": "journals"},
+        "publication": {"title": "Publication", "icon": "journal"},
+        "researcher": {"title": "Researcher", "icon": "person-circle"},
+        "orcid": {"title": "ORCID", "icon": "person-square"},
+        "organization": {"title": "Organization", "icon": "building"},
+        "google": {"title": "Google Scholar", "icon": "google"},
+        }
+titles = [app["title"] for app in apps.values()]
+icons = [app["icon"] for app in apps.values()]
+params = st.experimental_get_query_params()
+if "page" in params:
+    default_index = int(titles.index(params["page"][0].lower()))
+else:
+    default_index = 0
+with st.sidebar:
+    selected = option_menu(
+        "Main Menu",
+        options=titles,
+        icons=icons,
+        menu_icon="cast",
+        default_index=default_index,
+    )
+    # st.sidebar.title("About")
+    st.sidebar.info(
+        """
+        **Web App URL:**
+        <https://scholar.streamlit.app>
+        **Contact:**
+        - [Qiusheng Wu](https://geography.utk.edu/about-us/faculty/dr-qiusheng-wu)
+    """
+    )
+    st.image("https://i.imgur.com/2WhANKg.png")
+if "dsl" not in st.session_state:
+    st.session_state["dsl"] = scholarpy.Dsl()
+for app in apps:
+    if apps[app]["title"] == selected:
+        eval(f"{app}.app()")
+        break

streamlit_call.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from subprocess import Popen
+def load_jupyter_server_extension(nbapp):
+    """serve the streamlit app"""
+    Popen(
+        [
+            "streamlit",
+            "run",
+            "streamlit_app.py",
+            "--browser.serverAddress=0.0.0.0",
+            "--server.enableCORS=False",
+        ]
+    )