### <center>San Jose State University<br>Department of Applied Data Science<br><br>**DATA 200<br>Computational Programming for Data Analytics**<br><br>Spring 2024<br>Instructor: Ron Mak</center>

# A quick review of `numpy` arrays

#### As a data analyst, you will be a very frequent user of `numpy` arrays. `numpy` arrays (called `ndarrays`) are multidimensional arrays of ***numeric*** values (integers and floats) that are highly optimized for numerical calculations.

In [247]:
import numpy as np

## Fill an array

In [248]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [249]:
np.ones((2, 4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [250]:
np.ones((2, 4), dtype=int)

array([[1, 1, 1, 1],
       [1, 1, 1, 1]])

In [251]:
np.full((3, 5), 13)

array([[13, 13, 13, 13, 13],
       [13, 13, 13, 13, 13],
       [13, 13, 13, 13, 13]])

In [252]:
np.full((3, 5), 13, dtype=float)

array([[13., 13., 13., 13., 13.],
       [13., 13., 13., 13., 13.],
       [13., 13., 13., 13., 13.]])

## Reshape an array

In [253]:
one_dimensional = np.array([i for i in range(1, 16)])
one_dimensional

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [254]:
two_dimensional = one_dimensional.reshape(3, 5)
two_dimensional

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [255]:
two_dimensional.shape

(3, 5)

In [256]:
back_to_one_dimensional = two_dimensional.flatten()
back_to_one_dimensional

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

#### -1 means "as many rows/columns as nececessary".

In [257]:
an_array = np.array([i for i in range(1, 17)])
an_array.reshape(-1, 8)

array([[ 1,  2,  3,  4,  5,  6,  7,  8],
       [ 9, 10, 11, 12, 13, 14, 15, 16]])

In [258]:
an_array.reshape(4, -1)

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16]])

In [259]:
an_array.reshape(-1, 2)

array([[ 1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8],
       [ 9, 10],
       [11, 12],
       [13, 14],
       [15, 16]])

## Array arithmetic with a scalar

In [260]:
two_dimensional

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [261]:
two_dimensional_times_ten = 10*two_dimensional
two_dimensional_times_ten

array([[ 10,  20,  30,  40,  50],
       [ 60,  70,  80,  90, 100],
       [110, 120, 130, 140, 150]])

#### Array `two_dimensional` did ***not*** change.

In [262]:
two_dimensional

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

#### Note the important difference between a list and an array!

In [265]:
a_list = [1, 2, 3]
an_array = np.array(a_list)

3*an_array

array([3, 6, 9])

In [266]:
3*a_list

[1, 2, 3, 1, 2, 3, 1, 2, 3]

#### ***Change*** `two_dimensional` by adding 100 to each element.

In [267]:
two_dimensional += 100
two_dimensional

array([[101, 102, 103, 104, 105],
       [106, 107, 108, 109, 110],
       [111, 112, 113, 114, 115]])

## Broadcasting
#### If there is an operation between two array of the same size and dimension, the result is a new array created by the operation between corresponding elements of the two arrays.

In [268]:
array_one = np.array([1, 2, 3, 4])
array_two = np.array([2, 4, 5, 6])

array_one*array_two

array([ 2,  8, 15, 24])

In [270]:
array_one @ array_two  # inner product

49

## Slicing

In [271]:
an_array = np.array([i for i in range(1, 49)]).reshape(12, 4)
an_array

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20],
       [21, 22, 23, 24],
       [25, 26, 27, 28],
       [29, 30, 31, 32],
       [33, 34, 35, 36],
       [37, 38, 39, 40],
       [41, 42, 43, 44],
       [45, 46, 47, 48]])

#### Rows 2 through 5:

In [272]:
an_array[2:5]

array([[ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20]])

#### Every other row, 2 through 9:

In [273]:
an_array[2:10:2]

array([[ 9, 10, 11, 12],
       [17, 18, 19, 20],
       [25, 26, 27, 28],
       [33, 34, 35, 36]])

#### Rows 2, 5, and 9:

In [274]:
an_array[[2, 5, 9]]

array([[ 9, 10, 11, 12],
       [21, 22, 23, 24],
       [37, 38, 39, 40]])

#### Column 2:

In [275]:
an_array[:, 2]

array([ 3,  7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47])

#### Last column only:

In [276]:
an_array[:, -1]

array([ 4,  8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48])

#### Columns 1 through 3:

In [277]:
an_array[:, 1:4]

array([[ 2,  3,  4],
       [ 6,  7,  8],
       [10, 11, 12],
       [14, 15, 16],
       [18, 19, 20],
       [22, 23, 24],
       [26, 27, 28],
       [30, 31, 32],
       [34, 35, 36],
       [38, 39, 40],
       [42, 43, 44],
       [46, 47, 48]])

#### Columns 0 and 3:

In [278]:
an_array[:, [0, 3]]

array([[ 1,  4],
       [ 5,  8],
       [ 9, 12],
       [13, 16],
       [17, 20],
       [21, 24],
       [25, 28],
       [29, 32],
       [33, 36],
       [37, 40],
       [41, 44],
       [45, 48]])

#### Data rectangle consisting of rows 2 through 5 and columns  1 through 3:

In [279]:
an_array

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20],
       [21, 22, 23, 24],
       [25, 26, 27, 28],
       [29, 30, 31, 32],
       [33, 34, 35, 36],
       [37, 38, 39, 40],
       [41, 42, 43, 44],
       [45, 46, 47, 48]])

In [280]:
an_array[2:6, 1:4]

array([[10, 11, 12],
       [14, 15, 16],
       [18, 19, 20],
       [22, 23, 24]])

## View of an array
#### Method `view()` produces a view that shares the array's elements.

In [281]:
an_array

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20],
       [21, 22, 23, 24],
       [25, 26, 27, 28],
       [29, 30, 31, 32],
       [33, 34, 35, 36],
       [37, 38, 39, 40],
       [41, 42, 43, 44],
       [45, 46, 47, 48]])

In [282]:
a_view = an_array.view()

print('a_view')
a_view

a_view


array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20],
       [21, 22, 23, 24],
       [25, 26, 27, 28],
       [29, 30, 31, 32],
       [33, 34, 35, 36],
       [37, 38, 39, 40],
       [41, 42, 43, 44],
       [45, 46, 47, 48]])

In [283]:
a_view[2][1] = -1

print('an_array')
an_array

an_array


array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, -1, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20],
       [21, 22, 23, 24],
       [25, 26, 27, 28],
       [29, 30, 31, 32],
       [33, 34, 35, 36],
       [37, 38, 39, 40],
       [41, 42, 43, 44],
       [45, 46, 47, 48]])

## `numpy` universal functions

In [284]:
array_one = np.array([1, 2, 3, 4])
array_two = np.array([2, 4, 5, 6])

np.sqrt(array_one)

array([1.        , 1.41421356, 1.73205081, 2.        ])

In [285]:
np.add(array_one, array_two)

array([ 3,  6,  8, 10])

In [286]:
array_one + array_two

array([ 3,  6,  8, 10])

#### In the rollowing examples, function `multiply()` broadcasts row by row. 

In [287]:
another_array = np.array([[1, 2, 3],
                          [4, 5, 6]])
another_array

array([[1, 2, 3],
       [4, 5, 6]])

In [288]:
np.multiply(another_array, [10, 100, 1000])

array([[  10,  200, 3000],
       [  40,  500, 6000]])

#### `multiply()` is commutative.

In [289]:
np.multiply([10, 100, 1000], another_array)

array([[  10,  200, 3000],
       [  40,  500, 6000]])

## `numpy` methods

In [290]:
another_array

array([[1, 2, 3],
       [4, 5, 6]])

In [291]:
print(f'{another_array.sum()  = }')
print(f'{another_array.mean() = }')  # average

another_array.sum()  = 21
another_array.mean() = 3.5


In [None]:
# (C) Copyright 2024 by Ronald Mak