import numpy as np import pandas as pd import datasets import streamlit as st from streamlit_cytoscapejs import st_cytoscapejs st.set_page_config(layout='wide') # parse out gene_ids from URL query args to it's possible to link to this page query_params = st.query_params if "gene_ids" in query_params.keys(): input_gene_ids = query_params["gene_ids"] else: input_gene_ids = "TGME49_231630,TGME49_230210" # use "\n" as the separator so it shows correctly in the text area input_gene_ids = input_gene_ids.replace(",", "\n") st.markdown(""" # ToxoCEN Network **ToxoCEN** is a co-expression network for *Toxoplasma gondii* built on 719 RNA-seq runs across 39 studies. A pair of genes are said to be co-expressed when their expression is correlated across different conditions and is often a marker for genes to be involved in similar processes. To Cite: CS Arnold, Y Wang, VB Carruthers, MJ O'Meara ToxoCEN: A Co-Expression Network for Toxoplasma gondii * Code available at https://github.com/maomlab/CalCEN/tree/master/vignettes/ToxoCEN * Full network and dataset: https://huggingface.co/datasets/maomlab/ToxoCEN ## Plot a network for a set of genes Put a ``TGME49_######`` gene_id, one each row to seed the network """) TGME49_transcript_annotations = datasets.load_dataset( path = "maomlab/ToxoCEN", data_files = {"TGME49_transcript_annotations": "TGME49_transcript_annotations.tsv"}) TGME49_transcript_annotations = TGME49_transcript_annotations["TGME49_transcript_annotations"].to_pandas() top_coexp_hits = datasets.load_dataset( path = "maomlab/ToxoCEN", data_files = {"top_coexp_hits": "top_coexp_hits.tsv"}) top_coexp_hits = top_coexp_hits["top_coexp_hits"].to_pandas() col1, col3, padding = st.columns(spec = [0.2, 0.2, 0.6]) with col1: input_gene_ids = st.text_area( label = "Gene IDs", value = f"{input_gene_ids}", help = "TGME49 Gene IDs e.g. TGME49_231630") coexp_score_threshold = 0.85 ################################## # Parse and check the user input # ################################## seed_gene_ids = [gene_id.strip() for gene_id in input_gene_ids.split("\n")] neighbors = [] for seed_gene_id in seed_gene_ids: neighbors.append( top_coexp_hits[ (top_coexp_hits.gene_id_1 == seed_gene_id) & (top_coexp_hits.coexp_score > coexp_score_threshold)]) neighbors = pd.concat(neighbors) neighbor_gene_ids = list(set(neighbors.gene_id_2)) gene_ids = seed_gene_ids + neighbor_gene_ids gene_types = ['seed'] * len(seed_gene_ids) + ['neighbor'] * len(neighbor_gene_ids) TGME49_ids = [] gene_names = [] descriptions = [] for gene_id in gene_ids: try: TGME49_id = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["TGME49_id"].values[0] gene_name = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["gene_name"].values[0] description = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["description"].values[0] except: st.error(f"Unable to locate TGME49_id for Gene ID: {gene_id}, it should be of the form 'TGME49_######'") TGME49_id = None gene_name = None description = None TGME49_ids.append(TGME49_id) gene_names.append(gene_name) descriptions.append(description) node_info = pd.DataFrame({ "gene_id" : gene_ids, "gene_type" : gene_types, "TGME49_id": TGME49_ids, "gene_name": gene_names, "description": description}) elements = [] for i in range(len(gene_ids)): elements.append({ "data": { "id": gene_ids[i], "label": gene_name if gene_names[i] is not None else gene_ids[i]}, "position": { "x" : i * 10, "y" : i * 10}}) for i in range(len(neighbors.index)): edge = neighbors.iloc[i] elements.append({ "data" : { "source" : edge["gene_id_1"], "target" : edge["gene_id_2"], "label" : edge["coexp_score"]}}) with col3: st.text('') # help alignment with input box st.download_button( label = "Download as as TSV", data = neighbors.to_csv(sep ='\t').encode('utf-8'), file_name = f"ToxoCEN_network.tsv", mime = "text/csv") ########################################################## stylesheet = [ {"selector": "node", "style": {"width": 50, "height": 20, "shape": "rectangle"}}, {"selector": "edge", "style": {"width": 10}}, {"selector": "layout", "style": {"name": "random"}} ] st.title("ToxoCEN Network") clicked_elements = st_cytoscapejs( elements = elements, stylesheet = stylesheet, width = 1500, height= 1500)