{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Compute the variance and standard deviation from their definitions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import statistics\n", "import math\n", "\n", "def stats_of_list(values):\n", " \"\"\"\n", " Compute and return a tuple of the mean, population variance, \n", " and population standarard deviation of a list of values.\n", " @param values the list of values.\n", " @return a tuple of the mean, variance, and standard deviation.\n", " \"\"\"\n", " \n", " sum_of_squares = 0\n", " mean = statistics.mean(values)\n", " \n", " for x in values:\n", " diff = x - mean\n", " sum_of_squares += diff*diff\n", " \n", " variance = sum_of_squares/len(values)\n", " stdev = math.sqrt(variance)\n", " \n", " return (mean, variance, stdev)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Test with a sequence of normally distributed random values." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import random\n", "\n", "mu = 50\n", "sigma = mu/2\n", "count = 1_000_000\n", "\n", "x_list = [random.gauss(mu, sigma) for _ in range(0, count)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x_mean, x_variance, x_stdev = stats_of_list(x_list)\n", "\n", "print(f' mean = {x_mean:.2f}')\n", "print(f' variance = {x_variance:.2f}')\n", "print(f'standard deviation = {x_stdev:.2f}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## statistics.pvariance and statistics.pstdev functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(f'statistics.pvariance(x_list) = {statistics.pvariance(x_list):.2f}')\n", "print(f'statistics.pstdev(x_list) = {statistics.pstdev(x_list):.2f}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Test the standard deviation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def test_stdev(values, mean, stdev):\n", " \"\"\"\n", " Test the standard deviation by computing the percentage\n", " of values within one standard deviation of the mean.\n", " @param values the list of values.\n", " @param mean the mean of the values.\n", " @param stdev the standard deviation of the values.\n", " @return the percentage of values.\n", " \"\"\"\n", " \n", " total_count = len(values)\n", " range_count = 0;\n", " \n", " lo_range = mean - stdev\n", " hi_range = mean + stdev\n", " \n", " for x in values:\n", " if (x > lo_range) and (x < hi_range):\n", " range_count += 1\n", " \n", " return 100*range_count/total_count" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pct = test_stdev(x_list, x_mean, x_stdev)\n", "print(f'Percentage of values within one standard deviation of the mean: {pct:.2f}%')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 }