# Timing comparisons for random sampling

In [None]:
import random
import numpy as np

ALL_SIZE = 1_000_000
SAMPLE_SIZE = 100
MAX_VALUE = 100

## Generate random values for the list and the `numpy` array.

In [None]:
list_values = [ random.randint(1, MAX_VALUE) for _ in range(ALL_SIZE) ]
array_values = np.array(list_values)

In [None]:
def print_stats(stats):
 sample_sum, sample_mean, sample_median = stats

 print(f'{sample_sum = :,d}')
 print(f'{sample_mean = :5.2f}')
 print(f'{sample_median = :5.2f}')

## Use lists and explicit calculations.

In [None]:
import math

def list_explicit(values):
 # Generate a random sample.
 sample = [0]*SAMPLE_SIZE
 for i in range(SAMPLE_SIZE):
 index = random.randint(0, ALL_SIZE-1)
 sample[i] = values[index]

 # Total
 total = 0
 for v in sample: total += v

 # Mean
 mean = total/SAMPLE_SIZE

 # Median
 sorted_list = sorted(sample)
 if SAMPLE_SIZE%2 == 1:
 mid_index = math.ceildiv(SAMPLE_SIZE, 2)
 median = sorted_list[mid_index]
 else:
 low_mid_index = SAMPLE_SIZE//2
 median = ( sorted_list[low_mid_index] 
 + sorted_list[low_mid_index+1])/2

 return total, mean, median

In [None]:
stats = list_explicit(list_values)
print_stats(stats)

## Use lists and the statistics module.

In [None]:
import statistics

def list_stats(values):
 # Generate a random sample.
 sample = [0]*SAMPLE_SIZE
 for i in range(SAMPLE_SIZE):
 index = random.randint(0, ALL_SIZE-1)
 sample[i] = values[index]
 
 # Total, mean, and median
 total = sum(sample)
 mean = total/SAMPLE_SIZE
 median = statistics.median(sample)
 
 return total, mean, median

In [None]:
stats = list_stats(list_values)
print_stats(stats)

## Use `numpy` arrays and functions.

In [None]:
def array_stats(values):
 # Generate a random sample.
 sample = np.random.choice(values, size=SAMPLE_SIZE)

 # Total, mean, and median
 total = np.sum(sample)
 mean = total/SAMPLE_SIZE
 median = np.median(sample)

 return total, mean, median

In [None]:
stats = array_stats(array_values)
print_stats(stats)

## Timing comparisons.

In [None]:
import time

COUNT = 100_000 # count of random samples

In [None]:
start_time_explicit = time.process_time_ns()

for _ in range(COUNT):
 stats = list_explicit(list_values)

end_time_explicit = time.process_time_ns()
elapsed_time_explicit = (end_time_explicit - start_time_explicit)/10**9

print(f"Elapsed time for list explicit: {elapsed_time_explicit:3.1f} seconds")
print_stats(stats)

In [None]:
start_time_stats = time.process_time_ns()

for _ in range(COUNT):
 stats = list_stats(list_values)

end_time_stats = time.process_time_ns()
elapsed_time_stats = (end_time_stats - start_time_stats)/10**9

print(f"Elapsed time for list explicit: {elapsed_time_stats:3.1f} seconds")
print_stats(stats)

In [None]:
start_time_array = time.process_time_ns()

for _ in range(COUNT):
 stats = array_stats(array_values)

end_time_array = time.process_time_ns()
elapsed_time_array = (end_time_array - start_time_array)/10**9

print(f"Elapsed time for list explicit: {elapsed_time_array:3.1f} seconds")
print_stats(stats)