{ "cells": [ { "cell_type": "markdown", "id": "648a4aec-5892-43f7-80b7-ecaf85342aa7", "metadata": {}, "source": [ "# Bar Charts of Titanic Passenger Ages\n", "## Compute age frequencies with server-side SQL" ] }, { "cell_type": "code", "execution_count": null, "id": "00164d37-90b1-429c-915e-4c255fdf6c8b", "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from DATA225utils import make_connection, dataframe_query" ] }, { "cell_type": "code", "execution_count": null, "id": "08651bf5-d04f-4400-a6d2-86d10222bd85", "metadata": { "tags": [] }, "outputs": [], "source": [ "conn = make_connection(config_file = 'titanic.ini')\n", "cursor = conn.cursor()" ] }, { "cell_type": "markdown", "id": "345e8c74-7260-409a-82b5-dcc643a75e56", "metadata": {}, "source": [ "#### Do the calculations in the database and only download the frequency numbers. The `count()` function only counts non-null values. String `NA` for a missing age evaluates to 0." ] }, { "cell_type": "code", "execution_count": null, "id": "77b0a030-7ff2-4e8e-ab9c-236a6cbd9a80", "metadata": {}, "outputs": [], "source": [ "sql = ( \"\"\"\n", " SELECT\n", " COUNT(IF (age BETWEEN 0.001 AND 1.999, 1, NULL)) AS baby,\n", " COUNT(IF (age BETWEEN 2 AND 12.999, 1, NULL)) AS child,\n", " COUNT(IF (age BETWEEN 13 AND 19.999, 1, NULL)) AS teen,\n", " COUNT(IF (age BETWEEN 20 AND 24.999, 1, NULL)) AS youth,\n", " COUNT(IF (age BETWEEN 25 AND 64.999, 1, NULL)) AS adult,\n", " COUNT(IF (age >= 65, 1, NULL)) AS senior\n", " FROM passengers\n", " \"\"\"\n", " )\n", "\n", "_, df = dataframe_query(conn, sql)\n", "df" ] }, { "cell_type": "markdown", "id": "06597564-8ded-4004-a90d-3421ef452bc7", "metadata": {}, "source": [ "#### Extract the row from the dataframe." ] }, { "cell_type": "code", "execution_count": null, "id": "e0f6e9f8-fa26-4f45-804d-ce41bc8b3175", "metadata": {}, "outputs": [], "source": [ "counts = df.iloc[0].tolist()\n", "counts" ] }, { "cell_type": "markdown", "id": "5746f7fd-c97c-4579-b254-11e88f1d7961", "metadata": {}, "source": [ "#### Get the column headers." ] }, { "cell_type": "code", "execution_count": null, "id": "92d5a183-d9d0-4932-a44f-afd59d99ca5a", "metadata": {}, "outputs": [], "source": [ "labels = df.columns.tolist()\n", "labels" ] }, { "cell_type": "markdown", "id": "7af7ef82-4743-49d5-88bb-c5ec06826d6a", "metadata": { "tags": [] }, "source": [ "#### Create the bar chart." ] }, { "cell_type": "code", "execution_count": null, "id": "877066ae-1d00-4219-b8fb-d017dfcf9650", "metadata": {}, "outputs": [], "source": [ "fig = plt.figure(figsize = (6, 5))\n", "\n", "plt.bar(labels, counts, color = 'blue', width = 0.5)\n", "\n", "plt.xlabel('Age categories')\n", "plt.ylabel('Counts')\n", "plt.title('Titanic Age Counts')" ] }, { "cell_type": "code", "execution_count": null, "id": "e790b87e-f5bc-4955-95d6-b9a6763c26e6", "metadata": {}, "outputs": [], "source": [ "cursor.close()\n", "conn.close()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }