# -*- coding: utf-8 -*- """TabulaSapiens_Heart Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1x1YAuTu1IpROnthyRovb_bTaeknoctVs """ #Trying with smaller dataset #Around 400 MB """#Trying with smaller dataset #Around 400 MB """ !pip install anndata import numpy as np # linear algebra import pandas as pd import os import time import matplotlib.pyplot as plt import seaborn as sns #1 Code to read file into colaboratory: !pip install -U -q PyDrive from pydrive.auth import GoogleAuth from pydrive.drive import GoogleDrive from google.colab import auth from oauth2client.client import GoogleCredentials # 1. Authenticate and create the PyDrive client. auth.authenticate_user() gauth = GoogleAuth() gauth.credentials = GoogleCredentials.get_application_default() drive = GoogleDrive(gauth) #Tabula Sapiens Heart Cells ------> # https://drive.google.com/file/d/1oMOjGyf14G4dcyqrQChlD4f7TK0YV2Vf/view?usp=sharing - link to dataset #2. Get the file #make sure you upload all your data files to your Google drive and change share->Advanced->change->anyone with the link can view downloaded = drive.CreateFile({'id':'1oMOjGyf14G4dcyqrQChlD4f7TK0YV2Vf'}) # replace the id with id of file you want to access downloaded.GetContentFile('TabulaSapiens_Heart_Dataset.h5ad') print(downloaded) !pip install scanpy import scanpy as sc import anndata import importlib from sklearn.decomposition import PCA import matplotlib as mpl adata = sc.read_h5ad('TabulaSapiens_Heart_Dataset.h5ad') print(adata) # View basic statistics of the data print(adata.var_names[:10]) # View the first 10 gene names print(adata.obs.head()) # View the first few rows of the observation (cell) data # Plot the distribution of the gene expression levels adata.plot.density(x='rna') # Get the dimensions of the data print("Number of Cells:", adata.n_obs) print("Number of Genes:", adata.n_vars) """Dimensionality Reduction (e.g., PCA):""" sc.tl.pca(adata) sc.pl.pca(adata, color=['ENSG00000223972', 'ENSG00000227232' , 'ENSG00000278267', 'ENSG00000243485', 'ENSG00000284332', 'ENSG00000237613', 'ENSG00000268020', 'ENSG00000240361', 'ENSG00000186092', 'ENSG00000238009' ]) # UMAP embedding sc.pl.umap(adata, color='cell_type') # PCA plot sc.pl.pca(adata, color='cell_type')