{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Load the dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_digits\n", "\n", "digits = load_digits() # Bunch object containing digits data and metadata" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Explore the data and target values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(f'digits.data.shape = {digits.data.shape}')\n", "print(f'digits.target.shape = {digits.target.shape}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print()\n", "print('Sample digit image as a two-dimensional array:')\n", "print('digits.images[13] =')\n", "print(digits.images[13])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print()\n", "print('One-dimensional array representation for Scikit-Learn:')\n", "print(f'digits.data[13] = ')\n", "print(digits.data[13])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print()\n", "print('Some target values:')\n", "print(f'digits.target[::100] = {digits.target[::100]}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Visualize the data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "# Arrange the images in a 4 by 6 grid.\n", "figure, axes = plt.subplots(nrows=4, ncols=6, figsize=(6, 4))\n", "\n", "# Display each image and its target, without the axes labels.\n", "for item in zip(axes.ravel(), digits.images, digits.target):\n", " axes, image, target = item\n", " axes.imshow(image, cmap=plt.cm.gray_r)\n", " axes.set_xticks([]) # remove x-axis tick marks\n", " axes.set_yticks([]) # remove y-axis tick marks\n", " axes.set_title(target)\n", " \n", "plt.tight_layout() " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Split the data for training and testing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " digits.data, digits.target, random_state=11)\n", "\n", "print(f'X_train.shape = {X_train.shape}')\n", "print(f'X_test.shape = {X_test.shape}')\n", "\n", "print()\n", "print(f'y_train.shape = {y_train.shape}')\n", "print(f'y_test.shape = {y_test.shape}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create and train the k-nearest neighbors model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.neighbors import KNeighborsClassifier\n", "\n", "knn = KNeighborsClassifier(n_neighbors=5)\n", "knn.fit(X=X_train, y=y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Test the model by predicting digit classes " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predicted = knn.predict(X=X_test)\n", "expected = y_test\n", "\n", "print('First twenty predictions:')\n", "print(f'predicted[:20] = {predicted[:20]}')\n", "print(f' expected[:20] = {expected[:20]}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wrong = [ (pred, exp) \n", " for (pred, exp) in zip(predicted, expected) \n", " if pred != exp\n", " ]\n", "\n", "print('Wrong predictions:')\n", "print(wrong)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('Wrong predictions:')\n", "print()\n", "\n", "for data, pred, exp in zip(X_test, predicted, expected):\n", " if pred != exp:\n", " figure = plt.figure(figsize=(1, 1))\n", " image = data.reshape(8, 8)\n", " plt.imshow(image, cmap=plt.cm.gray_r)\n", " plt.xticks([]) # remove x-axis tick marks\n", " plt.yticks([]) # remove y-axis tick marks\n", " plt.title(f'pred = {pred}, exp = {exp}') " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Model prediction accuracy" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(f'Prediction accuracy score = {knn.score(X_test, y_test):.2%}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Confusion matrix" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import confusion_matrix\n", "\n", "confusion = confusion_matrix(y_true=expected, y_pred=predicted)\n", "\n", "print('Confusion matrix:')\n", "print(confusion)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "\n", "confusion_df = pd.DataFrame(confusion, index=range(10), columns=range(10))\n", "axes = sns.heatmap(confusion_df, annot=True, cmap='nipy_spectral_r')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Classification report" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import classification_report\n", "\n", "names = [str(digit) for digit in digits.target_names]\n", "print(classification_report(expected, predicted, target_names=names))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##########################################################################\n", "# (C) Copyright 2019 by Deitel & Associates, Inc. and #\n", "# Pearson Education, Inc. All Rights Reserved. #\n", "# #\n", "# DISCLAIMER: The authors and publisher of this book have used their #\n", "# best efforts in preparing the book. These efforts include the #\n", "# development, research, and testing of the theories and programs #\n", "# to determine their effectiveness. The authors and publisher make #\n", "# no warranty of any kind, expressed or implied, with regard to these #\n", "# programs or to the documentation contained in these books. The authors #\n", "# and publisher shall not be liable in any event for incidental or #\n", "# consequential damages in connection with, or arising out of, the #\n", "# furnishing, performance, or use of these programs. #\n", "##########################################################################" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 }