#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().system('pip install anndata')


# In[2]:


get_ipython().system('pip install scanpy')


# In[1]:


import scanpy as sc
import anndata
import importlib
from sklearn.decomposition import PCA

import matplotlib as mpl


# In[2]:


import h5py
import anndata

# Read the data into an AnnData object
adata = anndata.read_h5ad('C:/Users/smattaparthi/CS-297/TabulaSapiens_Heart_Dataset.h5ad')

print(adata) 


# In[5]:


#shape of data matrix
print(adata.shape)


# In[6]:


# Get the dimensions of the data 
print("Number of Cells:", adata.n_obs)
print("Number of Genes:", adata.n_vars)


# In[7]:


#view variable names(genes)
print(adata.var_names)


# In[8]:


#view observation names(cell)
print(adata.obs_names)


# In[3]:


#Preprocessing

#removing cells with less than 200 genes
sc.pp.filter_cells(adata, min_genes=200)

#removing genes with less than 3 cells
sc.pp.filter_genes(adata, min_cells=3)

print(adata)


# In[4]:


#Check the different cell types present in Heart Data

cell_type = adata.obs['cell_type']

print("\n\n",cell_type,"\n\n")


# In[ ]:


# # Neural Network for Celltype

# In[15]:


pip install tensorflow


# In[10]:


import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split

#  target variable is  'cell_type' - to be predicted
target_var = 'cell_type'

# Extract features (gene expressions) and target to X and y variables respectively
X = adata.X  # Features (gene expressions)
y = adata.obs[target_var]  # Target variable

print("Features:\n", X)
print("\n Target:\n", y)

# Spliting data into train and test sets with train_test_split, # training data: 80%, testing data: 20% 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Convert features to float32
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

print("X_train\n\n")
print(X_train,"\n \n")

# Convert labels to integers
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

print("y_train\n\n")
print(y_train,"\n \n")


# In[14]:


print("Shape of X")
print(X_train.shape[1])
print("\n\n",X_train[1])

print("\n\nShape of X[0]")
print(X_train.shape[0])
print("\n\n",X_train[0],"\n \n")

print("Shape of y")
print(y_train.shape)


# In[15]:


# Build neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(6, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32)


model.evaluate(X_test, y_test)

# Predict test data
predictions = model.predict(X_test)

model.summary()


# In[18]:


print(predictions)


# In[ ]:


# In[ ]: