### Load and explore the dataset

In [None]:
from sklearn.datasets import load_iris

iris = load_iris()
print(iris.DESCR)

In [None]:
print(f'iris.data.shape = {iris.data.shape}')
print(f'iris.target.shape = {iris.target.shape}')
print(f'iris.target_names = {iris.target_names}')
print(f'iris.feature_names = {iris.feature_names}')

### Create a Pandas dataframe

In [None]:
import pandas as pd

pd.set_option('max_columns', 5)
pd.set_option('display.width', None)

iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = [iris.target_names[i] for i in iris.target]

iris_df

In [None]:
pd.set_option('precision', 2)
iris_df.describe()

In [None]:
iris_df['species'].describe()

### Visualize the dataset

In [None]:
%matplotlib inline
import seaborn as sns

sns.set(font_scale=1.1)
sns.set_style('whitegrid')

grid = sns.pairplot(data=iris_df, vars=iris_df.columns[0:4], hue='species')

In [None]:
grid = sns.pairplot(data=iris_df, vars=iris_df.columns[0:4])

### Create a k-means estimator and fit the model

In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=3, random_state=11) # find three clusters
kmeans.fit(iris.data)

### Compare the k-means labels to the Iris dataset’s target values

In [None]:
print(kmeans.labels_[0:50]) # setosa

In [None]:
print(kmeans.labels_[50:100]) # versicolor

In [None]:
print(kmeans.labels_[100:150]) # virginica

### Dimensionality reduction with Principal Component Analysis (PCA)

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2, random_state=11) # reduce to two components
pca.fit(iris.data)

In [None]:
reduced_iris = pca.transform(iris.data)
reduced_iris.shape

### Visualize the reduced data

In [None]:
reduced_iris_df = pd.DataFrame(reduced_iris, 
 columns=['Component 1', 'Component 2'])
reduced_iris_df['species'] = iris_df.species

In [None]:
import matplotlib.pyplot as plt

axes = sns.scatterplot(data=reduced_iris_df, hue='species', legend='brief', 
 x='Component 1', y='Component 2') 

iris_centers = pca.transform(kmeans.cluster_centers_)
dots = plt.scatter(iris_centers[:,0], iris_centers[:,1], s=100, c='k')

In [None]:
##########################################################################
# (C) Copyright 2019 by Deitel & Associates, Inc. and #
# Pearson Education, Inc. All Rights Reserved. #
# #
# DISCLAIMER: The authors and publisher of this book have used their #
# best efforts in preparing the book. These efforts include the #
# development, research, and testing of the theories and programs #
# to determine their effectiveness. The authors and publisher make #
# no warranty of any kind, expressed or implied, with regard to these #
# programs or to the documentation contained in these books. The authors #
# and publisher shall not be liable in any event for incidental or #
# consequential damages in connection with, or arising out of, the #
# furnishing, performance, or use of these programs. #
##########################################################################
