{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_digits\n",
    "\n",
    "digits = load_digits()  # Bunch object containing digits data and metadata"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Explore the data and target values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f'digits.data.shape    = {digits.data.shape}')\n",
    "print(f'digits.target.shape  = {digits.target.shape}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print()\n",
    "print('Sample digit image as a two-dimensional array:')\n",
    "print('digits.images[13] =')\n",
    "print(digits.images[13])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print()\n",
    "print('One-dimensional array representation for Scikit-Learn:')\n",
    "print(f'digits.data[13] = ')\n",
    "print(digits.data[13])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print()\n",
    "print('Some target values:')\n",
    "print(f'digits.target[::100] = {digits.target[::100]}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Visualize the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Arrange the images in a 4 by 6 grid.\n",
    "figure, axes = plt.subplots(nrows=4, ncols=6, figsize=(6, 4))\n",
    "\n",
    "# Display each image and its target, without the axes labels.\n",
    "for item in zip(axes.ravel(), digits.images, digits.target):\n",
    "    axes, image, target = item\n",
    "    axes.imshow(image, cmap=plt.cm.gray_r)\n",
    "    axes.set_xticks([])  # remove x-axis tick marks\n",
    "    axes.set_yticks([])  # remove y-axis tick marks\n",
    "    axes.set_title(target)\n",
    "    \n",
    "plt.tight_layout()     "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Split the data for training and testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "     digits.data, digits.target, random_state=11)\n",
    "\n",
    "print(f'X_train.shape = {X_train.shape}')\n",
    "print(f'X_test.shape  = {X_test.shape}')\n",
    "\n",
    "print()\n",
    "print(f'y_train.shape = {y_train.shape}')\n",
    "print(f'y_test.shape  = {y_test.shape}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create and train the k-nearest neighbors model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "\n",
    "knn = KNeighborsClassifier(n_neighbors=5)\n",
    "knn.fit(X=X_train, y=y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test the model by predicting digit classes "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "predicted = knn.predict(X=X_test)\n",
    "expected = y_test\n",
    "\n",
    "print('First twenty predictions:')\n",
    "print(f'predicted[:20] = {predicted[:20]}')\n",
    "print(f' expected[:20] = {expected[:20]}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wrong = [ (pred, exp) \n",
    "          for (pred, exp) in zip(predicted, expected) \n",
    "          if pred != exp\n",
    "        ]\n",
    "\n",
    "print('Wrong predictions:')\n",
    "print(wrong)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print('Wrong predictions:')\n",
    "print()\n",
    "\n",
    "for data, pred, exp in zip(X_test, predicted, expected):\n",
    "    if pred != exp:\n",
    "        figure = plt.figure(figsize=(1, 1))\n",
    "        image = data.reshape(8, 8)\n",
    "        plt.imshow(image, cmap=plt.cm.gray_r)\n",
    "        plt.xticks([])  # remove x-axis tick marks\n",
    "        plt.yticks([])  # remove y-axis tick marks\n",
    "        plt.title(f'pred = {pred}, exp = {exp}')   "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Model prediction accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f'Prediction accuracy score = {knn.score(X_test, y_test):.2%}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Confusion matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "confusion = confusion_matrix(y_true=expected, y_pred=predicted)\n",
    "\n",
    "print('Confusion matrix:')\n",
    "print(confusion)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "\n",
    "confusion_df = pd.DataFrame(confusion, index=range(10), columns=range(10))\n",
    "axes = sns.heatmap(confusion_df, annot=True, cmap='nipy_spectral_r')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Classification report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "\n",
    "names = [str(digit) for digit in digits.target_names]\n",
    "print(classification_report(expected, predicted, target_names=names))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "##########################################################################\n",
    "# (C) Copyright 2019 by Deitel & Associates, Inc. and                    #\n",
    "# Pearson Education, Inc. All Rights Reserved.                           #\n",
    "#                                                                        #\n",
    "# DISCLAIMER: The authors and publisher of this book have used their     #\n",
    "# best efforts in preparing the book. These efforts include the          #\n",
    "# development, research, and testing of the theories and programs        #\n",
    "# to determine their effectiveness. The authors and publisher make       #\n",
    "# no warranty of any kind, expressed or implied, with regard to these    #\n",
    "# programs or to the documentation contained in these books. The authors #\n",
    "# and publisher shall not be liable in any event for incidental or       #\n",
    "# consequential damages in connection with, or arising out of, the       #\n",
    "# furnishing, performance, or use of these programs.                     #\n",
    "##########################################################################"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}