{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "56dece3d-175e-4137-be82-1c2aded4a8e9",
   "metadata": {},
   "source": [
    "### <center>San Jose State University<br>Department of Applied Data Science<br><br>**DATA 200<br>Computational Programming for Data Analytics**<br><br>Spring 2024<br>Instructor: Ron Mak</center>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b922f63e-10c7-424d-b0cc-790d89f36ffc",
   "metadata": {},
   "source": [
    "# Quicksort with a Trace"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0208d73c-4ac5-475d-9115-fb9c4d73beee",
   "metadata": {},
   "source": [
    "#### Perform quicksorts with a trace."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e9b0b0a-11ef-4363-95a1-3d3caa304a91",
   "metadata": {},
   "source": [
    "## Printing functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "89c0f8c6-b1ab-4616-8288-2058711b702b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_data(data, label=True):\n",
    "    \"\"\"\n",
    "    Print the values of the data list.\n",
    "    \"\"\"\n",
    "    for value in data:\n",
    "        print(f' {value:2d} ', end='')\n",
    "\n",
    "    if label:\n",
    "        print('   sublist to sort')\n",
    "    else:\n",
    "        print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77ee1d72-89a4-4e9b-8a32-ba0c7ada918c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_markers(left, right):\n",
    "    \"\"\"\n",
    "    Print markers under the sublist to be sorted.\n",
    "    @param left the leftmost index of the sublist.\n",
    "    @param right the rightmost index of the sublist.\n",
    "    \"\"\"\n",
    "    if left > 0:\n",
    "        for _ in range(left):\n",
    "            print('    ', end='')\n",
    "\n",
    "    for _ in range(left, right+1):\n",
    "        print('^^^^', end='')\n",
    "\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af327414-cfde-49e0-aeb5-6178f63649b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_sublists(data, pivot_index):\n",
    "    \"\"\"\n",
    "    Print the sublist after partitioning.\n",
    "    @param left the leftmost index of the sublist.\n",
    "    @param right the rightmost index of the sublist.\n",
    "    \"\"\"\n",
    "    for k in range(len(data)):\n",
    "        value = data[k]\n",
    "        \n",
    "        if k == pivot_index:\n",
    "            print(f'({value:2d})', end='')\n",
    "        else:\n",
    "            print(f' {value:2d} ', end='')\n",
    "\n",
    "    print('   partitioned')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ca54c235-a6bf-46fc-b773-e9e21ad12c5f",
   "metadata": {},
   "source": [
    "## Partitioning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6e5951f4-481a-4f8c-b4a0-1aca60a0436f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def partition(data, left, right):\n",
    "    \"\"\"\n",
    "    Partition the part of list data bounded by indexes \n",
    "    left and right.\n",
    "    @param left the leftmost index of the sublist.\n",
    "    @param right the rightmost index of the sublist.\n",
    "    @return the final index of the pivot value.\n",
    "    \"\"\"\n",
    "    # Pick the pivot value.\n",
    "    middle = (left + right)//2\n",
    "    pivot_value = data[middle]\n",
    "\n",
    "    # Swap the pivot value with the rightmost value.\n",
    "    data[middle], data[right] = data[right], data[middle]\n",
    "\n",
    "    i = left - 1\n",
    "    j = right\n",
    "\n",
    "    # As long as i and j don't cross ...\n",
    "    while i < j:\n",
    "\n",
    "        # Move i to the right.\n",
    "        i += 1\n",
    "        while (i < right) and (data[i] <= pivot_value):\n",
    "            i += 1\n",
    "\n",
    "        # Move j to the left.\n",
    "        j -= 1\n",
    "        while (j > left) and (data[j] > pivot_value):\n",
    "            j -= 1\n",
    "\n",
    "        # Swap after i and j have stopped moving.\n",
    "        if (i < j) and (data[i] != data[j]):\n",
    "            data[i], data[j] = data[j], data[i]\n",
    "\n",
    "    # Swap the pivot value from the right end \n",
    "    # to its correct position.\n",
    "    if data[i] != data[right]:\n",
    "        data[i], data[right] = data[right], data[i]\n",
    "\n",
    "    print_sublists(data, i)\n",
    "    print_markers(left, right)\n",
    "    \n",
    "    return i"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aaead8e9-c54d-44c7-aaf6-ba4031a13803",
   "metadata": {},
   "source": [
    "## Recursive quicksort"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f9b386f-dac9-4a5b-baa5-d74745058dd8",
   "metadata": {},
   "outputs": [],
   "source": [
    "def recursive_quicksort(data, left, right):\n",
    "    \"\"\"\n",
    "    Recursively quicksort a sublist.\n",
    "    @param left the leftmost index of the sublist.\n",
    "    @param right the rightmost index of the sublist.\n",
    "    \"\"\"\n",
    "    print_data(data)\n",
    "    print_markers(left, right)\n",
    "\n",
    "    size = right - left + 1\n",
    "\n",
    "    # Size > 2: Partition and recursively\n",
    "    #           quicksort the sublists.\n",
    "    if size > 2:\n",
    "        pivot_index = partition(data, left, right)\n",
    "\n",
    "        # Recursively sort the left sublist.\n",
    "        if left != pivot_index:\n",
    "            recursive_quicksort(data, left, pivot_index - 1)\n",
    "\n",
    "        # Recursively sort the right sublist.\n",
    "        if right != pivot_index:\n",
    "            recursive_quicksort(data, pivot_index + 1, right)\n",
    "\n",
    "    # Size == 2: Swap if necessary.\n",
    "    elif (size == 2) and (data[left] > data[right]):\n",
    "        data[left], data[right] =  data[right], data[left]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e59e8d7f-a619-40cc-9ab0-3e58a995a389",
   "metadata": {},
   "outputs": [],
   "source": [
    "def verify_sorted(data):\n",
    "    \"\"\"\n",
    "    Verify that the data list \n",
    "    has been successfully sorted.\n",
    "    \"\"\"\n",
    "    for k in range(1, len(data)):\n",
    "        value = data[k]\n",
    "        prev  = data[k - 1]\n",
    "        \n",
    "        if value < prev:\n",
    "            raise Exception('Sort error: '\n",
    "                            f'{value} < {prev} at {k}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ec88ac8d-5670-4505-91a1-734ca0aa9e25",
   "metadata": {},
   "outputs": [],
   "source": [
    "def quicksort(data):\n",
    "    \"\"\"\n",
    "    Quicksort a list of data.\n",
    "    \"\"\"\n",
    "    recursive_quicksort(data, 0, len(data) - 1)\n",
    "    verify_sorted(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "141fb53d-6295-4754-8288-cffe2a5ec97a",
   "metadata": {},
   "source": [
    "## Initial tests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a6c057f-f1a2-474f-be42-41910daf9735",
   "metadata": {},
   "outputs": [],
   "source": [
    "def do_sorting(data):\n",
    "    quicksort(data)\n",
    "    \n",
    "    print('Sorted:')\n",
    "    for value in data:\n",
    "        print(f' {value:2d} ', end='')\n",
    "\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1cca2731-1ed8-4d26-884a-f62271fd3cfc",
   "metadata": {},
   "outputs": [],
   "source": [
    "a = [99, 8, 55, 39, 82, 63, 57, 51, 6, 5, 5]\n",
    "do_sorting(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "10fd9712-85d0-4fb5-a359-93ab125f5a4d",
   "metadata": {},
   "outputs": [],
   "source": [
    "a = [12, 73, 87, 43, 23, 85, 19, 70, 80, 30, 87]\n",
    "do_sorting(a)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "252b786a-a6d0-4db1-9428-1f024e693e92",
   "metadata": {},
   "source": [
    "## Pathological cases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5e7abf94-009b-4cf1-af9e-9c8661b58009",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Already sorted\n",
    "do_sorting(list(range(10)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bfba0f2a-7408-41f2-bae5-0796c5050ccb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Already reverse sorted\n",
    "do_sorting(list(range(10, 0, -1)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39d22e30-629f-47f1-89f5-24fbdebe8ab1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# All the same\n",
    "do_sorting([5]*11)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6f01bba9-f0a5-49c2-8286-3e542ae78d34",
   "metadata": {},
   "source": [
    "## Random tests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7e757b2d-44a8-4e88-aa21-4e59afbaad5f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "\n",
    "SIZE = 11\n",
    "COUNT = 100\n",
    "\n",
    "for n in range(1, COUNT+1):\n",
    "    print()\n",
    "    print()\n",
    "    print(f'SORT TEST #{n}')\n",
    "    print()\n",
    "    \n",
    "    data = [random.randint(10, 99) for _ in range(SIZE)];\n",
    "    do_sorting(data)\n",
    "\n",
    "print()\n",
    "print('DONE!')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a78f74bc-e137-43ea-a975-f033aa9625b3",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
