{ "cells": [ { "cell_type": "markdown", "id": "72919405-0d48-4527-baa6-fa0c1288c97a", "metadata": {}, "source": [ "# Timing comparisons for random sampling" ] }, { "cell_type": "code", "execution_count": null, "id": "b03155b6-489e-46a1-8183-cccf5691c07c", "metadata": {}, "outputs": [], "source": [ "import random\n", "import numpy as np\n", "\n", "ALL_SIZE = 1_000_000\n", "SAMPLE_SIZE = 100\n", "MAX_VALUE = 100" ] }, { "cell_type": "markdown", "id": "a0d52c07-56ea-4392-9eb1-9ad3d9f91dcf", "metadata": {}, "source": [ "## Generate random values for the list and the `numpy` array." ] }, { "cell_type": "code", "execution_count": null, "id": "ff2116f0-bab0-43af-bd41-a023ea4b639f", "metadata": {}, "outputs": [], "source": [ "list_values = [ random.randint(1, MAX_VALUE) for _ in range(ALL_SIZE) ]\n", "array_values = np.array(list_values)" ] }, { "cell_type": "code", "execution_count": null, "id": "e19631b9-6959-4776-81ec-1d181cb9eafd", "metadata": {}, "outputs": [], "source": [ "def print_stats(stats):\n", " sample_sum, sample_mean, sample_median = stats\n", "\n", " print(f'{sample_sum = :,d}')\n", " print(f'{sample_mean = :5.2f}')\n", " print(f'{sample_median = :5.2f}')" ] }, { "cell_type": "markdown", "id": "c7e54d19-b80d-4487-b524-abdfcf3c3cf0", "metadata": {}, "source": [ "## Use lists and explicit calculations." ] }, { "cell_type": "code", "execution_count": null, "id": "2a24d895-5abc-4919-8003-43a7d8bc077d", "metadata": {}, "outputs": [], "source": [ "import math\n", "\n", "def list_explicit(values):\n", " # Generate a random sample.\n", " sample = [0]*SAMPLE_SIZE\n", " for i in range(SAMPLE_SIZE):\n", " index = random.randint(0, ALL_SIZE-1)\n", " sample[i] = values[index]\n", "\n", " # Total\n", " total = 0\n", " for v in sample: total += v\n", "\n", " # Mean\n", " mean = total/SAMPLE_SIZE\n", "\n", " # Median\n", " sorted_list = sorted(sample)\n", " if SAMPLE_SIZE%2 == 1:\n", " mid_index = math.ceildiv(SAMPLE_SIZE, 2)\n", " median = sorted_list[mid_index]\n", " else:\n", " low_mid_index = SAMPLE_SIZE//2\n", " median = ( sorted_list[low_mid_index] \n", " + sorted_list[low_mid_index+1])/2\n", "\n", " return total, mean, median" ] }, { "cell_type": "code", "execution_count": null, "id": "a88f755d-7332-4e6e-98e6-93890fc4b41c", "metadata": {}, "outputs": [], "source": [ "stats = list_explicit(list_values)\n", "print_stats(stats)" ] }, { "cell_type": "markdown", "id": "f4ee9af9-9816-4dc1-ad5b-beb8fbe1eb34", "metadata": {}, "source": [ "## Use lists and the statistics module." ] }, { "cell_type": "code", "execution_count": null, "id": "b6a065a7-cdcd-4a2e-9346-625d306ebc6b", "metadata": {}, "outputs": [], "source": [ "import statistics\n", "\n", "def list_stats(values):\n", " # Generate a random sample.\n", " sample = [0]*SAMPLE_SIZE\n", " for i in range(SAMPLE_SIZE):\n", " index = random.randint(0, ALL_SIZE-1)\n", " sample[i] = values[index]\n", " \n", " # Total, mean, and median\n", " total = sum(sample)\n", " mean = total/SAMPLE_SIZE\n", " median = statistics.median(sample)\n", " \n", " return total, mean, median" ] }, { "cell_type": "code", "execution_count": null, "id": "d9c51fe7-c443-4b92-af32-9a5604f24261", "metadata": {}, "outputs": [], "source": [ "stats = list_stats(list_values)\n", "print_stats(stats)" ] }, { "cell_type": "markdown", "id": "64d652b0-0d9e-4e10-a15d-e73c789c6b91", "metadata": {}, "source": [ "## Use `numpy` arrays and functions." ] }, { "cell_type": "code", "execution_count": null, "id": "6dcc99fc-d2f9-4f63-8c68-49f2e829e0fc", "metadata": {}, "outputs": [], "source": [ "def array_stats(values):\n", " # Generate a random sample.\n", " sample = np.random.choice(values, size=SAMPLE_SIZE)\n", "\n", " # Total, mean, and median\n", " total = np.sum(sample)\n", " mean = total/SAMPLE_SIZE\n", " median = np.median(sample)\n", "\n", " return total, mean, median" ] }, { "cell_type": "code", "execution_count": null, "id": "545cc98c-c7bc-4903-8795-b96bceccffe8", "metadata": {}, "outputs": [], "source": [ "stats = array_stats(array_values)\n", "print_stats(stats)" ] }, { "cell_type": "markdown", "id": "8979fa9e-6b0b-47e0-8297-ae69ceb2d4fc", "metadata": {}, "source": [ "## Timing comparisons." ] }, { "cell_type": "code", "execution_count": null, "id": "aa0fa91f-ba33-49b0-a6fc-546e0df2befc", "metadata": {}, "outputs": [], "source": [ "import time\n", "\n", "COUNT = 100_000 # count of random samples" ] }, { "cell_type": "code", "execution_count": null, "id": "906a8612-a9e8-424b-a374-5b69cc74b398", "metadata": {}, "outputs": [], "source": [ "start_time_explicit = time.process_time_ns()\n", "\n", "for _ in range(COUNT):\n", " stats = list_explicit(list_values)\n", "\n", "end_time_explicit = time.process_time_ns()\n", "elapsed_time_explicit = (end_time_explicit - start_time_explicit)/10**9\n", "\n", "print(f\"Elapsed time for list explicit: {elapsed_time_explicit:3.1f} seconds\")\n", "print_stats(stats)" ] }, { "cell_type": "code", "execution_count": null, "id": "ae20a093-5f78-485f-bfaa-816239d5bad7", "metadata": {}, "outputs": [], "source": [ "start_time_stats = time.process_time_ns()\n", "\n", "for _ in range(COUNT):\n", " stats = list_stats(list_values)\n", "\n", "end_time_stats = time.process_time_ns()\n", "elapsed_time_stats = (end_time_stats - start_time_stats)/10**9\n", "\n", "print(f\"Elapsed time for list explicit: {elapsed_time_stats:3.1f} seconds\")\n", "print_stats(stats)" ] }, { "cell_type": "code", "execution_count": null, "id": "1c79a588-965c-4467-9bd1-918676043060", "metadata": {}, "outputs": [], "source": [ "start_time_array = time.process_time_ns()\n", "\n", "for _ in range(COUNT):\n", " stats = array_stats(array_values)\n", "\n", "end_time_array = time.process_time_ns()\n", "elapsed_time_array = (end_time_array - start_time_array)/10**9\n", "\n", "print(f\"Elapsed time for list explicit: {elapsed_time_array:3.1f} seconds\")\n", "print_stats(stats)" ] }, { "cell_type": "code", "execution_count": null, "id": "4bfaa138-22d1-4983-bec0-bbaa0290f4ff", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }