{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "72919405-0d48-4527-baa6-fa0c1288c97a",
   "metadata": {},
   "source": [
    "# Timing comparisons for random sampling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b03155b6-489e-46a1-8183-cccf5691c07c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "import numpy as np\n",
    "\n",
    "ALL_SIZE    = 1_000_000\n",
    "SAMPLE_SIZE = 100\n",
    "MAX_VALUE   = 100"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a0d52c07-56ea-4392-9eb1-9ad3d9f91dcf",
   "metadata": {},
   "source": [
    "## Generate random values for the list and the `numpy` array."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ff2116f0-bab0-43af-bd41-a023ea4b639f",
   "metadata": {},
   "outputs": [],
   "source": [
    "list_values  = [ random.randint(1, MAX_VALUE) for _ in range(ALL_SIZE) ]\n",
    "array_values = np.array(list_values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e19631b9-6959-4776-81ec-1d181cb9eafd",
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_stats(stats):\n",
    "    sample_sum, sample_mean, sample_median = stats\n",
    "\n",
    "    print(f'{sample_sum    = :,d}')\n",
    "    print(f'{sample_mean   = :5.2f}')\n",
    "    print(f'{sample_median = :5.2f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c7e54d19-b80d-4487-b524-abdfcf3c3cf0",
   "metadata": {},
   "source": [
    "## Use lists and explicit calculations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a24d895-5abc-4919-8003-43a7d8bc077d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "\n",
    "def list_explicit(values):\n",
    "    # Generate a random sample.\n",
    "    sample = [0]*SAMPLE_SIZE\n",
    "    for i in range(SAMPLE_SIZE):\n",
    "        index = random.randint(0, ALL_SIZE-1)\n",
    "        sample[i] = values[index]\n",
    "\n",
    "    # Total\n",
    "    total = 0\n",
    "    for v in sample: total += v\n",
    "\n",
    "    # Mean\n",
    "    mean = total/SAMPLE_SIZE\n",
    "\n",
    "    # Median\n",
    "    sorted_list = sorted(sample)\n",
    "    if SAMPLE_SIZE%2 == 1:\n",
    "        mid_index = math.ceildiv(SAMPLE_SIZE, 2)\n",
    "        median = sorted_list[mid_index]\n",
    "    else:\n",
    "        low_mid_index = SAMPLE_SIZE//2\n",
    "        median = (  sorted_list[low_mid_index] \n",
    "                  + sorted_list[low_mid_index+1])/2\n",
    "\n",
    "    return total, mean, median"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a88f755d-7332-4e6e-98e6-93890fc4b41c",
   "metadata": {},
   "outputs": [],
   "source": [
    "stats = list_explicit(list_values)\n",
    "print_stats(stats)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f4ee9af9-9816-4dc1-ad5b-beb8fbe1eb34",
   "metadata": {},
   "source": [
    "## Use lists and the statistics module."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b6a065a7-cdcd-4a2e-9346-625d306ebc6b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import statistics\n",
    "\n",
    "def list_stats(values):\n",
    "    # Generate a random sample.\n",
    "    sample = [0]*SAMPLE_SIZE\n",
    "    for i in range(SAMPLE_SIZE):\n",
    "        index = random.randint(0, ALL_SIZE-1)\n",
    "        sample[i] = values[index]\n",
    "        \n",
    "    # Total, mean, and median\n",
    "    total  = sum(sample)\n",
    "    mean   = total/SAMPLE_SIZE\n",
    "    median = statistics.median(sample)\n",
    "    \n",
    "    return total, mean, median"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d9c51fe7-c443-4b92-af32-9a5604f24261",
   "metadata": {},
   "outputs": [],
   "source": [
    "stats = list_stats(list_values)\n",
    "print_stats(stats)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "64d652b0-0d9e-4e10-a15d-e73c789c6b91",
   "metadata": {},
   "source": [
    "## Use `numpy` arrays and functions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6dcc99fc-d2f9-4f63-8c68-49f2e829e0fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "def array_stats(values):\n",
    "    # Generate a random sample.\n",
    "    sample = np.random.choice(values, size=SAMPLE_SIZE)\n",
    "\n",
    "    # Total, mean, and median\n",
    "    total  = np.sum(sample)\n",
    "    mean   = total/SAMPLE_SIZE\n",
    "    median = np.median(sample)\n",
    "\n",
    "    return total, mean, median"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "545cc98c-c7bc-4903-8795-b96bceccffe8",
   "metadata": {},
   "outputs": [],
   "source": [
    "stats = array_stats(array_values)\n",
    "print_stats(stats)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8979fa9e-6b0b-47e0-8297-ae69ceb2d4fc",
   "metadata": {},
   "source": [
    "## Timing comparisons."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa0fa91f-ba33-49b0-a6fc-546e0df2befc",
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "COUNT = 100_000  # count of random samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "906a8612-a9e8-424b-a374-5b69cc74b398",
   "metadata": {},
   "outputs": [],
   "source": [
    "start_time_explicit = time.process_time_ns()\n",
    "\n",
    "for _ in range(COUNT):\n",
    "    stats = list_explicit(list_values)\n",
    "\n",
    "end_time_explicit = time.process_time_ns()\n",
    "elapsed_time_explicit = (end_time_explicit - start_time_explicit)/10**9\n",
    "\n",
    "print(f\"Elapsed time for list explicit: {elapsed_time_explicit:3.1f} seconds\")\n",
    "print_stats(stats)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae20a093-5f78-485f-bfaa-816239d5bad7",
   "metadata": {},
   "outputs": [],
   "source": [
    "start_time_stats = time.process_time_ns()\n",
    "\n",
    "for _ in range(COUNT):\n",
    "    stats = list_stats(list_values)\n",
    "\n",
    "end_time_stats = time.process_time_ns()\n",
    "elapsed_time_stats = (end_time_stats - start_time_stats)/10**9\n",
    "\n",
    "print(f\"Elapsed time for list explicit: {elapsed_time_stats:3.1f} seconds\")\n",
    "print_stats(stats)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1c79a588-965c-4467-9bd1-918676043060",
   "metadata": {},
   "outputs": [],
   "source": [
    "start_time_array = time.process_time_ns()\n",
    "\n",
    "for _ in range(COUNT):\n",
    "    stats = array_stats(array_values)\n",
    "\n",
    "end_time_array = time.process_time_ns()\n",
    "elapsed_time_array = (end_time_array - start_time_array)/10**9\n",
    "\n",
    "print(f\"Elapsed time for list explicit: {elapsed_time_array:3.1f} seconds\")\n",
    "print_stats(stats)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4bfaa138-22d1-4983-bec0-bbaa0290f4ff",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}