{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "pd.set_option('precision', 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "titanic = pd.read_csv('titanic.csv')\n",
    "titanic.columns = ['name', 'survived', 'sex', 'age', 'klass']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>survived</th>\n",
       "      <th>sex</th>\n",
       "      <th>age</th>\n",
       "      <th>klass</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Allen, Miss. Elisabeth Walton</td>\n",
       "      <td>yes</td>\n",
       "      <td>female</td>\n",
       "      <td>29.00</td>\n",
       "      <td>1st</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Allison, Master. Hudson Trevor</td>\n",
       "      <td>yes</td>\n",
       "      <td>male</td>\n",
       "      <td>0.92</td>\n",
       "      <td>1st</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Allison, Miss. Helen Loraine</td>\n",
       "      <td>no</td>\n",
       "      <td>female</td>\n",
       "      <td>2.00</td>\n",
       "      <td>1st</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Allison, Mr. Hudson Joshua Crei</td>\n",
       "      <td>no</td>\n",
       "      <td>male</td>\n",
       "      <td>30.00</td>\n",
       "      <td>1st</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Allison, Mrs. Hudson J C (Bessi</td>\n",
       "      <td>no</td>\n",
       "      <td>female</td>\n",
       "      <td>25.00</td>\n",
       "      <td>1st</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                              name survived     sex    age klass\n",
       "0    Allen, Miss. Elisabeth Walton      yes  female  29.00   1st\n",
       "1   Allison, Master. Hudson Trevor      yes    male   0.92   1st\n",
       "2     Allison, Miss. Helen Loraine       no  female   2.00   1st\n",
       "3  Allison, Mr. Hudson Joshua Crei       no    male  30.00   1st\n",
       "4  Allison, Mrs. Hudson J C (Bessi       no  female  25.00   1st"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "klass    = []  # independent variable\n",
    "survived = []  # dependent variable\n",
    "\n",
    "for s, k in zip(titanic.survived, titanic.klass):\n",
    "    \n",
    "    if   k == '1st': klass += [1]\n",
    "    elif k == '2nd': klass += [2]\n",
    "    else:            klass += [3]  \n",
    "        \n",
    "    if s == 'yes': survived += [1]\n",
    "    else:          survived += [0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "X = np.array(klass)\n",
    "y = np.array(survived)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape of training set = (981, 1)\n",
      "Shape of testing  set = (328, 1)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X.reshape(-1, 1), y, random_state=0)\n",
    "\n",
    "print(f'Shape of training set = {X_train.shape}')\n",
    "print(f'Shape of testing  set = {X_test.shape}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     X_train  y_train  X_test  y_test\n",
      "  0     3        0        3       0\n",
      "  1     2        0        2       1\n",
      "  2     1        1        2       0\n",
      "  3     2        0        3       0\n",
      "  4     3        0        2       0\n",
      "  5     3        1        3       1\n",
      "  6     2        0        1       1\n",
      "  7     3        1        1       1\n",
      "  8     1        0        2       0\n",
      "  9     3        1        3       1\n",
      " 10     1        1        2       0\n"
     ]
    }
   ],
   "source": [
    "print('     X_train  y_train  X_test  y_test')\n",
    "\n",
    "for i in range(11):\n",
    "    print(f'{i:3d}{X_train[i][0]:6d}{y_train[i]:9d}{X_test[i][0]:9d}{y_test[i]:8d}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "logistic_regression_model = LogisticRegression()\n",
    "logistic_regression_model.fit(X=X_train, y=y_train)\n",
    "\n",
    "predicted = logistic_regression_model.predict(X_test)\n",
    "expected  = y_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "right predictions = 226\n",
      "wrong predictions = 102\n",
      "\n",
      "69% right\n"
     ]
    }
   ],
   "source": [
    "right = 0\n",
    "wrong = 0\n",
    "\n",
    "for p, e in zip(predicted, expected):\n",
    "    if p == e: right += 1\n",
    "    else:      wrong += 1\n",
    "        \n",
    "print(f'right predictions = {right}')\n",
    "print(f'wrong predictions = {wrong}')\n",
    "print()\n",
    "print(f'{100*right/(right + wrong):.0f}% right')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
