{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Analysis of California County Expenditures"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "counties = pd.read_csv('https://bythenumbers.sco.ca.gov/api/views/uctr-c2j8/rows.csv?accessType=DOWNLOAD')  \n",
    "counties.columns = ['Entity Name', 'Fiscal Year', 'Type', 'Form/Table', 'Category', 'Subcategory 1', 'Subcategory 2', 'Line Description', 'Values', 'Zip Code', 'Estimated Population', 'Area in Sq. Miles', 'Row Number']\n",
    "\n",
    "pd.set_option('precision', 2)  # format for floating-point values\n",
    "counties.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#counties.tail()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Expenditure Value Counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "\n",
    "transaction_count = len(counties.Values)\n",
    "\n",
    "# check for intergers in Value column vs null, then filter out '0' values\n",
    "good_values = [Values for Values in counties.Values if not math.isnan(Values)]\n",
    "good_values = [Values for Values in counties.Values if Values != 0]\n",
    "good_values_count = len(good_values)\n",
    "\n",
    "mean_value = sum(good_values)/good_values_count\n",
    "\n",
    "print(f'count of all Expense Values = {transaction_count}')\n",
    "print(f'count of good Expense Values = {good_values_count}')\n",
    "print(f'mean of good Expense Values = {mean_value:.2f}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sorted_good_values = sorted(good_values)\n",
    "print(f'len(sorted(good_values)) = {len(sorted_good_values)}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Descriptive Expenditure Value statistics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "counties.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# convert Fiscal Year to Date-Time and Row Number to Varchar???"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "counties.hist()  # All numeric values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "min_exp = sorted_good_values[0]\n",
    "max_exp = sorted_good_values[-1]\n",
    "\n",
    "print(f'min of Expenditure Values = {min_exp:.2f}')\n",
    "print(f'max of Expenditure Values = {max_exp:.2f}')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
