import pandas as pd
import numpy as np
import os
import sys
import time
import scanpy as sc
import matplotlib.pyplot as plt
sys.path.append('scAND-code/')
import scAND
np.random.seed(2019)
The input of scAND contains three parts:
Count_df = pd.read_csv('Example/Count_df.txt', sep='\t')
cells = pd.read_csv('Example/Count_Cells.txt', sep='\t', header=None)
cells = cells[0]
peaks = pd.read_csv('Example/Count_Peaks.txt', sep='\t', header=None)
peaks = peaks[0]
cells.head()
peaks.head()
Count_df.head()
The get_scAND_inputs() function can be used to construct inputs from scATAC-seq matrix. Note that Each row and column of the used matrix should contain at least one element.
MM = pd.read_csv('Example/GSM1647122_GM12878vsHEK.dhsmatrix.txt.gz', sep='\t')
MM.iloc[:3,:10]
scATAC = MM.iloc[:,4:]
scATAC.index = MM.iloc[:,:4].apply(lambda x: x['chr']+':'+str(x['start'])+'-'+str(x['end']), axis=1)
scATAC.iloc[:3, :5]
Count_df, cells, peaks = scAND.get_scAND_inputs(scATAC)
cells.head()
peaks.head()
Count_df.head()
Run_scAND(Count_df, d, weights, cells, peaks, random_seed=2019, L2_norm=True, Binary=True, Graph_norm=True, return_peaks=False, verbose=True)
The parameters of Run_scAND() function:
# parameters
beta_list = [0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95]
start_time = time.time()
Rep_cells = scAND.Run_scAND(Count_df=Count_df, d=50, weights=beta_list, cells=cells, peaks=peaks, random_seed=2019,
L2_norm=False, Binary=True, Graph_norm=True, return_peaks=False,verbose=True)
end_time = time.time()
print('Time Costing: %f s' %(end_time-start_time))
used_dim = 10
used_beta = 0.8
used_rep = scAND.Get_Result(Rep_cells, beta=used_beta, dim=used_dim, L2_norm=True)
used_rep.iloc[:5, :5]
metadata = pd.read_csv('Example/metadata.txt', sep='\t', header=None, index_col=0)
metadata.columns = ['label']
metadata.head()
adata = sc.AnnData(used_rep)
adata.var_names_make_unique()
adata.obs = metadata.copy()
sc.pp.neighbors(adata, random_state=2019)
sc.tl.tsne(adata, random_state=2019)
sc.tl.umap(adata, random_state=2019)
fig, ax = plt.subplots(figsize=(3, 3))
sc.pl.tsne(adata, color='label', title='scAND', s=15, ax=ax)
fig, ax = plt.subplots(figsize=(3, 3))
sc.pl.umap(adata, color='label', title='scAND', s=15, ax=ax)