avelezarce commited on
Commit
5a73eed
·
verified ·
1 Parent(s): 01df268

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -3
README.md CHANGED
@@ -18,13 +18,27 @@ from tdc import tdc_hf_interface
18
  from tdc.model_server.tokenizers.scgpt import scGPTTokenizer
19
  import torch
20
 
 
 
 
 
 
 
 
21
  scgpt = tdc_hf_interface("scGPT")
22
- model = scgpt.load() # this line can cause segmentation fault on inappropriate setups
23
  tokenizer = scGPTTokenizer()
24
- gene_ids = adata.var["feature_name"].to_numpy() # Convert to numpy array
 
25
  tokenized_data = tokenizer.tokenize_cell_vectors(
26
  adata.X.toarray(), gene_ids)
27
- embeds = model(torch.tensor([x[1] for x in tokenized_data])).last_hidden_state
 
 
 
 
 
 
28
  ```
29
 
30
  # TDC Citation
 
18
  from tdc.model_server.tokenizers.scgpt import scGPTTokenizer
19
  import torch
20
 
21
+ # an example dataset
22
+ adata = DataLoader("cellxgene_sample_small",
23
+ "./data",
24
+ dataset_names=["cellxgene_sample_small"],
25
+ no_convert=True).adata
26
+
27
+ # code for loading the model and performing inference
28
  scgpt = tdc_hf_interface("scGPT")
29
+ model = scgpt.load() # This line can cause segmentation fault on inappropriate setup
30
  tokenizer = scGPTTokenizer()
31
+ gene_ids = adata.var["feature_name"].to_numpy(
32
+ ) # Convert to numpy array
33
  tokenized_data = tokenizer.tokenize_cell_vectors(
34
  adata.X.toarray(), gene_ids)
35
+ mask = torch.tensor([x != 0 for x in tokenized_data[0][1]],
36
+ dtype=torch.bool)
37
+
38
+ # Extract first embedding
39
+ first_embed = model(tokenized_data[0][0],
40
+ tokenized_data[0][1],
41
+ attention_mask=mask)
42
  ```
43
 
44
  # TDC Citation