Spaces:

maomlab
/

ToxoCEN-Network

Sleeping

App Files Files Community

ToxoCEN-Network / app.py

maom

try setting a random layout

965ee30 verified about 1 year ago

raw

history blame

4.74 kB


	import numpy as np
	import pandas as pd
	import datasets
	import streamlit as st
	from streamlit_cytoscapejs import st_cytoscapejs

	st.set_page_config(layout='wide')

	# parse out gene_ids from URL query args to it's possible to link to this page
	query_params = st.query_params
	if "gene_ids" in query_params.keys():
	input_gene_ids = query_params["gene_ids"]
	else:
	input_gene_ids = "TGME49_231630,TGME49_230210"

	# use "\n" as the separator so it shows correctly in the text area
	input_gene_ids = input_gene_ids.replace(",", "\n")



	st.markdown("""
	# ToxoCEN Network
	ToxoCEN is a co-expression network for Toxoplasma gondii built on 719 RNA-seq runs across 39 studies.
	A pair of genes are said to be co-expressed when their expression is correlated across different conditions and
	is often a marker for genes to be involved in similar processes.
	To Cite:
	CS Arnold, Y Wang, VB Carruthers, MJ O'Meara
	ToxoCEN: A Co-Expression Network for Toxoplasma gondii
	* Code available at https://github.com/maomlab/CalCEN/tree/master/vignettes/ToxoCEN
	* Full network and dataset: https://huggingface.co/datasets/maomlab/ToxoCEN
	## Plot a network for a set of genes
	Put a ``TGME49_######`` gene_id, one each row to seed the network
	""")

	TGME49_transcript_annotations = datasets.load_dataset(
	path = "maomlab/ToxoCEN",
	data_files = {"TGME49_transcript_annotations": "TGME49_transcript_annotations.tsv"})
	TGME49_transcript_annotations = TGME49_transcript_annotations["TGME49_transcript_annotations"].to_pandas()

	top_coexp_hits = datasets.load_dataset(
	path = "maomlab/ToxoCEN",
	data_files = {"top_coexp_hits": "top_coexp_hits.tsv"})
	top_coexp_hits = top_coexp_hits["top_coexp_hits"].to_pandas()


	col1, col3, padding = st.columns(spec = [0.2, 0.2, 0.6])
	with col1:
	input_gene_ids = st.text_area(
	label = "Gene IDs",
	value = f"{input_gene_ids}",
	help = "TGME49 Gene IDs e.g. TGME49_231630")

	coexp_score_threshold = 0.85

	##################################
	# Parse and check the user input #
	##################################

	seed_gene_ids = [gene_id.strip() for gene_id in input_gene_ids.split("\n")]

	neighbors = []
	for seed_gene_id in seed_gene_ids:
	neighbors.append(
	top_coexp_hits[
	(top_coexp_hits.gene_id_1 == seed_gene_id) & (top_coexp_hits.coexp_score > coexp_score_threshold)])

	neighbors = pd.concat(neighbors)

	neighbor_gene_ids = list(set(neighbors.gene_id_2))
	gene_ids = seed_gene_ids + neighbor_gene_ids
	gene_types = ['seed'] * len(seed_gene_ids) + ['neighbor'] * len(neighbor_gene_ids)

	TGME49_ids = []
	gene_names = []
	descriptions = []

	for gene_id in gene_ids:
	try:
	TGME49_id = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["TGME49_id"].values[0]
	gene_name = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["gene_name"].values[0]
	description = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["description"].values[0]
	except:
	st.error(f"Unable to locate TGME49_id for Gene ID: {gene_id}, it should be of the form 'TGME49_######'")
	TGME49_id = None
	gene_name = None
	description = None

	TGME49_ids.append(TGME49_id)
	gene_names.append(gene_name)
	descriptions.append(description)

	node_info = pd.DataFrame({
	"gene_id" : gene_ids,
	"gene_type" : gene_types,
	"TGME49_id": TGME49_ids,
	"gene_name": gene_names,
	"description": description})

	elements = []
	for i in range(len(gene_ids)):
	elements.append({
	"data": {
	"id": gene_ids[i],
	"label": gene_name if gene_names[i] is not None else gene_ids[i]},
	"position": {
	"x" : i * 10,
	"y" : i * 10}})
	for i in range(len(neighbors.index)):
	edge = neighbors.iloc[i]
	elements.append({
	"data" : {
	"source" : edge["gene_id_1"],
	"target" : edge["gene_id_2"],
	"label" : edge["coexp_score"]}})


	with col3:
	st.text('') # help alignment with input box
	st.download_button(
	label = "Download as as TSV",
	data = neighbors.to_csv(sep ='\t').encode('utf-8'),
	file_name = f"ToxoCEN_network.tsv",
	mime = "text/csv")

	##########################################################

	stylesheet = [
	{"selector": "node", "style": {"width": 50, "height": 20, "shape": "rectangle"}},
	{"selector": "edge", "style": {"width": 10}},
	{"selector": "layout", "style": {"name": "random"}}
	]

	st.title("ToxoCEN Network")
	clicked_elements = st_cytoscapejs(
	elements = elements,
	stylesheet = stylesheet,
	width = 1500,
	height= 1500)