import numpy as np
import pandas as pd

Extra: Dataset generator (and Numpy stuff)

Uses numpy to make a dataset

names = ["Fred", "Willam", "Joe", "Robert", "Colin", "Ethan", "James", "Connor", "Thomas", "Hunter", "Jaden", "Leonard", "Fredrick", "Billy"]

def generateStudent(id):
    student = [id]
    # Name
    name = names[np.random.randint(0, len(names)-1)] + " " # First name
    name += names[np.random.randint(0, len(names)-1)][0] + ". " # Middle initial
    name += names[np.random.randint(0, len(names)-1)] # Last name

    # Hours spent on hw (per week)
    hoursOnHw = np.random.random() * 10 + 0.1 # Generates float from 0.1 to 10.1
    hoursOnHw = np.log2(hoursOnHw) + 1 # Simulates a plateu of hours on hw 

    # Grade (GPA format)
    skill = np.random.random() * 1.5 + 0.5 # Generates a random skill multiplier from 0.5 to 2
    score = round(hoursOnHw * skill)
        score = 5
        score = 1

    return student

# Create students
students = []
for i in range(100):

# Print in csv format
for student in students:
    print(student[0], end=",")
    print(student[1], end=",")
    print(student[2], end=",")
Analyze data

students = pd.DataFrame(np.genfromtxt('files/students.csv', delimiter=',', dtype=str, encoding='utf-8'))

      0                   1                     2      3
0    id                name             hoursOnHw  score
1     0   Leonard W. Hunter     4.249093837849809      3
2     1  Fredrick R. Hunter    1.4185357714076718      2
3     2    Willam T. Willam    -0.952639561629149      1
4     3    Leonard J. Ethan   0.45393251649677235      1
..   ..                 ...                   ...    ...
96   95     Fred J. Leonard     3.415124645123977      4
97   96       Joe H. Thomas    2.6327753622042156      1
98   97    James F. Leonard    3.0453511539690554      2
99   98    Thomas L. Thomas  0.031164846240109734      1
100  99  Willam J. Fredrick    2.5751866038250726      2

[101 rows x 4 columns]

Find max, min, and median

import pandas as pd

df = pd.read_csv('files/students.csv')

hoursOnHw_max = df['hoursOnHw'].max()
hoursOnHw_min = df['hoursOnHw'].min()
hoursOnHw_mean = df['hoursOnHw'].mean()
hoursOnHw_median = df['hoursOnHw'].median()

score_max = df['score'].max()
score_min = df['score'].min()
score_mean = df['score'].mean()
score_median = df['score'].median()

print(f'Max hoursOnHw: {hoursOnHw_max}')
print(f'Min hoursOnHw: {hoursOnHw_min}')
print(f'Average hoursOnHw: {hoursOnHw_mean}')
print(f'Median hoursOnHw: {hoursOnHw_median}')
print(f'Max score: {score_max}')
print(f'Min score: {score_min}')
print(f'Average score: {score_mean}')
print(f'Median score: {score_median}')
Max hoursOnHw: 4.322522513071912
Min hoursOnHw: -1.2196529681089996
Average hoursOnHw: 2.9180861885936635
Median hoursOnHw: 3.2371786874994823
Max score: 5
Min score: 1
Average score: 3.29
Median score: 3.0

Sort data

df = df.sort_values(by=['hoursOnHw'])
print("Sorted by time on hw:")
Sorted by time on hw:
    id                name  hoursOnHw  score
70  70     Jaden W. Robert  -1.219653      1
23  23    Colin T. Leonard  -1.068941      1
2    2    Willam T. Willam  -0.952640      1
98  98    Thomas L. Thomas   0.031165      1
10  10      James H. James   0.039333      1
..  ..                 ...        ...    ...
12  12    Fredrick E. Fred   4.275583      5
64  64  Connor E. Fredrick   4.276815      5
63  63   Fredrick J. James   4.285514      5
19  19    Hunter J. Hunter   4.301619      5
60  60   Leonard H. Hunter   4.322523      5

[100 rows x 4 columns]
Merge with another dataframe

yeungdf = pd.DataFrame({'id': [1000], 'name': ['Sean Yeung'], 'hoursOnHw': [10], 'score': [5]})
df = pd.concat([df, yeungdf], ignore_index=True)

       id                name  hoursOnHw  score
0      70     Jaden W. Robert  -1.219653      1
1      23    Colin T. Leonard  -1.068941      1
2       2    Willam T. Willam  -0.952640      1
3      98    Thomas L. Thomas   0.031165      1
4      10      James H. James   0.039333      1
..    ...                 ...        ...    ...
96     64  Connor E. Fredrick   4.276815      5
97     63   Fredrick J. James   4.285514      5
98     19    Hunter J. Hunter   4.301619      5
99     60   Leonard H. Hunter   4.322523      5
100  1000          Sean Yeung  10.000000      5

[101 rows x 4 columns]

2 and 3D arrays

import numpy as np

array2d = np.random.rand(3, 4)
print("2D array:")

array3d = np.random.rand(2, 3, 4)
print("3D array:")
2D array:
[[0.18292427 0.39108696 0.20552718 0.6509084 ]
 [0.17755348 0.90863221 0.41077616 0.5872449 ]
 [0.08287332 0.93492656 0.07489242 0.01677898]]
3D array:
[[[0.10186556 0.57711445 0.59355299 0.62382982]
  [0.98785013 0.29421096 0.23023936 0.6372783 ]
  [0.27195128 0.78486246 0.09230268 0.7529436 ]]

 [[0.55221864 0.59038316 0.24381217 0.5388756 ]
  [0.94727647 0.09412329 0.51256773 0.93349574]
  [0.47361539 0.67561061 0.99036649 0.40945497]]]