Spaces:
Sleeping
Sleeping
import numpy as np | |
import pandas as pd | |
import datasets | |
import streamlit as st | |
from streamlit_cytoscapejs import st_cytoscapejs | |
st.set_page_config(layout='wide') | |
# parse out gene_ids from URL query args to it's possible to link to this page | |
query_params = st.query_params | |
if "gene_ids" in query_params.keys(): | |
input_gene_ids = query_params["gene_ids"] | |
else: | |
input_gene_ids = "TGME49_231630,TGME49_230210" | |
# use "\n" as the separator so it shows correctly in the text area | |
input_gene_ids = input_gene_ids.replace(",", "\n") | |
st.markdown(""" | |
# ToxoCEN Network | |
**ToxoCEN** is a co-expression network for *Toxoplasma gondii* built on 719 RNA-seq runs across 39 studies. | |
A pair of genes are said to be co-expressed when their expression is correlated across different conditions and | |
is often a marker for genes to be involved in similar processes. | |
To Cite: | |
CS Arnold, Y Wang, VB Carruthers, MJ O'Meara | |
ToxoCEN: A Co-Expression Network for Toxoplasma gondii | |
* Code available at https://github.com/maomlab/CalCEN/tree/master/vignettes/ToxoCEN | |
* Full network and dataset: https://huggingface.co/datasets/maomlab/ToxoCEN | |
## Plot a network for a set of genes | |
Put a ``TGME49_######`` gene_id, one each row to seed the network | |
""") | |
TGME49_transcript_annotations = datasets.load_dataset( | |
path = "maomlab/ToxoCEN", | |
data_files = {"TGME49_transcript_annotations": "TGME49_transcript_annotations.tsv"}) | |
TGME49_transcript_annotations = TGME49_transcript_annotations["TGME49_transcript_annotations"].to_pandas() | |
top_coexp_hits = datasets.load_dataset( | |
path = "maomlab/ToxoCEN", | |
data_files = {"top_coexp_hits": "top_coexp_hits.tsv"}) | |
top_coexp_hits = top_coexp_hits["top_coexp_hits"].to_pandas() | |
col1, col3, padding = st.columns(spec = [0.2, 0.2, 0.6]) | |
with col1: | |
input_gene_ids = st.text_area( | |
label = "Gene IDs", | |
value = f"{input_gene_ids}", | |
help = "TGME49 Gene IDs e.g. TGME49_231630") | |
coexp_score_threshold = 0.85 | |
################################## | |
# Parse and check the user input # | |
################################## | |
seed_gene_ids = [gene_id.strip() for gene_id in input_gene_ids.split("\n")] | |
neighbors = [] | |
for seed_gene_id in seed_gene_ids: | |
neighbors.append( | |
top_coexp_hits[ | |
(top_coexp_hits.gene_id_1 == seed_gene_id) & (top_coexp_hits.coexp_score > coexp_score_threshold)]) | |
neighbors = pd.concat(neighbors) | |
neighbor_gene_ids = list(set(neighbors.gene_id_2)) | |
gene_ids = seed_gene_ids + neighbor_gene_ids | |
gene_types = ['seed'] * len(seed_gene_ids) + ['neighbor'] * len(neighbor_gene_ids) | |
TGME49_ids = [] | |
gene_names = [] | |
descriptions = [] | |
for gene_id in gene_ids: | |
try: | |
TGME49_id = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["TGME49_id"].values[0] | |
gene_name = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["gene_name"].values[0] | |
description = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["description"].values[0] | |
except: | |
st.error(f"Unable to locate TGME49_id for Gene ID: {gene_id}, it should be of the form 'TGME49_######'") | |
TGME49_id = None | |
gene_name = None | |
description = None | |
TGME49_ids.append(TGME49_id) | |
gene_names.append(gene_name) | |
descriptions.append(description) | |
node_info = pd.DataFrame({ | |
"gene_id" : gene_ids, | |
"gene_type" : gene_types, | |
"TGME49_id": TGME49_ids, | |
"gene_name": gene_names, | |
"description": description}) | |
elements = [] | |
for i in range(len(gene_ids)): | |
elements.append({ | |
"data": { | |
"id": gene_ids[i], | |
"label": gene_name if gene_names[i] is not None else gene_ids[i]}, | |
"position": { | |
"x" : i * 10, | |
"y" : i * 10}}) | |
for i in range(len(neighbors.index)): | |
edge = neighbors.iloc[i] | |
elements.append({ | |
"data" : { | |
"source" : edge["gene_id_1"], | |
"target" : edge["gene_id_2"], | |
"label" : edge["coexp_score"]}}) | |
with col3: | |
st.text('') # help alignment with input box | |
st.download_button( | |
label = "Download as as TSV", | |
data = neighbors.to_csv(sep ='\t').encode('utf-8'), | |
file_name = f"ToxoCEN_network.tsv", | |
mime = "text/csv") | |
########################################################## | |
stylesheet = [ | |
{"selector": "node", "style": {"width": 50, "height": 20, "shape": "rectangle"}}, | |
{"selector": "edge", "style": {"width": 10}}, | |
{"selector": "layout", "style": {"name": "random"}} | |
] | |
st.title("ToxoCEN Network") | |
clicked_elements = st_cytoscapejs( | |
elements = elements, | |
stylesheet = stylesheet, | |
width = 1500, | |
height= 1500) | |