import numpy as np
import pandas as pd

Extra: Dataset generator (and Numpy stuff)

Uses numpy to make a dataset

names = ["Fred", "Willam", "Joe", "Robert", "Colin", "Ethan", "James", "Connor", "Thomas", "Hunter", "Jaden", "Leonard", "Fredrick", "Billy"]

def generateStudent(id):
    student = [id]
    
    # Name
    name = names[np.random.randint(0, len(names)-1)] + " " # First name
    name += names[np.random.randint(0, len(names)-1)][0] + ". " # Middle initial
    name += names[np.random.randint(0, len(names)-1)] # Last name
    student.append(name)

    # Hours spent on hw (per week)
    hoursOnHw = np.random.random() * 10 + 0.1 # Generates float from 0.1 to 10.1
    hoursOnHw = np.log2(hoursOnHw) + 1 # Simulates a plateu of hours on hw 
    student.append(hoursOnHw)

    # Grade (GPA format)
    skill = np.random.random() * 1.5 + 0.5 # Generates a random skill multiplier from 0.5 to 2
    score = round(hoursOnHw * skill)
    if(score>5):
        score = 5
    elif(score<1):
        score = 1
    student.append(score)

    return student

# Create students
students = []
for i in range(100):
    students.append(generateStudent(i))

# Print in csv format
print("id,name,hoursOnHw,score")
for student in students:
    print(student[0], end=",")
    print(student[1], end=",")
    print(student[2], end=",")
    print(student[3])
    
id,name,hoursOnHw,score
0,James L. James,3.237205499127559,2
1,Thomas F. Willam,1.8866150062581544,3
2,Robert L. Willam,3.9336736423431753,5
3,Fredrick H. Hunter,3.089383805772319,2
4,Thomas C. Willam,1.118440208260399,1
5,Hunter L. Leonard,1.413370650835248,2
6,Robert C. Hunter,3.4846642778582115,5
7,Robert J. Connor,2.2668694379195915,2
8,Leonard H. Thomas,2.136136212410632,1
9,Thomas C. Colin,2.2035175947710277,3
10,Thomas R. Colin,3.267364205364564,4
11,Fred C. Thomas,3.976922262810806,4
12,Jaden J. Leonard,0.008254586224481053,1
13,Jaden W. Joe,3.877247058500784,5
14,Leonard F. Hunter,3.2444904915989166,5
15,Fred R. Connor,2.3820990139856155,4
16,Colin R. Fred,3.122709988019411,3
17,Hunter E. Hunter,3.7295739088197752,5
18,Jaden C. Jaden,3.6487428868550107,4
19,Joe C. Thomas,0.5141759632476433,1
20,Ethan J. Colin,1.195405891647107,1
21,Connor J. Robert,3.554175950701118,5
22,Connor J. Fredrick,4.044149690758513,5
23,Leonard E. Fredrick,2.7704028263655465,2
24,Fred W. Fred,2.4302967881387305,4
25,Robert H. Hunter,3.1319460316268795,4
26,Colin J. Thomas,-2.11292218559518,1
27,Joe F. Joe,2.8922521099213796,4
28,Ethan R. Joe,2.4047138391074228,5
29,Leonard C. Willam,3.192671423926749,5
30,Robert F. Joe,0.6487947706176755,1
31,Joe E. Fredrick,1.17700383879629,2
32,Connor C. Leonard,1.826023723600838,2
33,Joe R. James,3.9976862761842717,5
34,Colin F. Robert,4.243492716758775,5
35,Willam C. Willam,1.0237861994044801,2
36,Ethan E. Joe,2.7842687494943696,5
37,Jaden J. Ethan,-0.784214335001546,1
38,James F. Leonard,2.5566136497701915,3
39,Leonard J. Fredrick,-2.110073446137993,1
40,James F. Fred,3.4180636446455854,5
41,Colin H. Willam,3.950700822045025,5
42,Joe T. Ethan,1.3117832788867816,1
43,Colin F. James,3.7485005439225794,5
44,Joe J. Thomas,3.64588086419979,5
45,Colin T. Thomas,3.833918514126234,5
46,Colin H. Thomas,3.192625897023759,3
47,Colin J. Joe,3.8146467830747506,2
48,Hunter W. Joe,2.074056187980434,3
49,Fred J. Hunter,3.8070147149977505,5
50,Willam J. Ethan,3.4839454196094883,2
51,Fred J. Ethan,0.5808229174013819,1
52,Colin E. Fredrick,3.3329723908985645,3
53,Colin J. Colin,4.038723252933065,5
54,Ethan L. Robert,0.2722981733313259,1
55,Willam F. Ethan,1.8533879024498976,3
56,Fred J. Leonard,3.4354661270531928,2
57,Ethan C. Joe,3.6844437989248426,5
58,Colin J. Willam,2.8557782404161176,3
59,Willam R. Colin,3.797575090780232,4
60,Willam C. Joe,3.976815460012645,5
61,Ethan J. Hunter,4.168114341593068,5
62,James T. Fred,4.176434900487779,2
63,Fred C. James,3.5508214754871945,4
64,Ethan W. Leonard,3.0758268603424,2
65,Leonard C. James,3.1157066801619626,2
66,Joe F. Ethan,4.122971604575881,2
67,Colin F. Fred,1.7903054741572055,3
68,Fred R. Hunter,4.121680902345879,3
69,Thomas F. Robert,4.172676019906641,5
70,Jaden F. Willam,-1.5304205344377828,1
71,Hunter T. Robert,3.2932969214236243,2
72,Hunter F. Robert,-1.4360322639724372,1
73,Connor R. James,1.1617598190020573,2
74,Joe H. Jaden,4.11206620670831,4
75,Joe L. Fredrick,3.8384989805382035,5
76,Fred F. Hunter,3.1598312206796746,3
77,Fredrick R. Joe,3.556702066873676,3
78,Hunter C. Hunter,3.343184629595175,5
79,Colin F. Ethan,3.7542353270054916,4
80,Jaden T. Connor,3.4790998099398807,4
81,Connor L. Thomas,3.745264709856848,5
82,Fredrick W. Hunter,3.0087495864639378,5
83,James L. Hunter,4.199223789188592,5
84,Fred J. Ethan,3.791093211679164,3
85,James R. Fredrick,3.9493574215670293,4
86,Connor J. Jaden,3.819640337390598,3
87,James J. Robert,2.807095290068344,5
88,Thomas E. Hunter,3.3295936615811637,4
89,Fredrick J. Willam,2.873428570292788,3
90,Hunter C. Fred,1.8428613619618734,3
91,Thomas J. Willam,3.745028082989526,5
92,Joe C. Thomas,3.66404709347185,5
93,Fredrick J. James,3.988131289529417,4
94,Fred C. Joe,3.9264624423064283,5
95,James L. Jaden,3.904508701836713,5
96,Fredrick L. Robert,4.284270438653071,5
97,Thomas W. Robert,3.839825401723429,2
98,Thomas T. Robert,3.6476916004296114,5
99,Connor J. Hunter,4.242607545506144,4

Pandas

Analyze data

students = pd.DataFrame(np.genfromtxt('files/students.csv', delimiter=',', dtype=str, encoding='utf-8'))

print(students)
      0                   1                     2      3
0    id                name             hoursOnHw  score
1     0   Leonard W. Hunter     4.249093837849809      3
2     1  Fredrick R. Hunter    1.4185357714076718      2
3     2    Willam T. Willam    -0.952639561629149      1
4     3    Leonard J. Ethan   0.45393251649677235      1
..   ..                 ...                   ...    ...
96   95     Fred J. Leonard     3.415124645123977      4
97   96       Joe H. Thomas    2.6327753622042156      1
98   97    James F. Leonard    3.0453511539690554      2
99   98    Thomas L. Thomas  0.031164846240109734      1
100  99  Willam J. Fredrick    2.5751866038250726      2

[101 rows x 4 columns]

Analyze data

Find max, min, and median

import pandas as pd

df = pd.read_csv('files/students.csv')

hoursOnHw_max = df['hoursOnHw'].max()
hoursOnHw_min = df['hoursOnHw'].min()
hoursOnHw_mean = df['hoursOnHw'].mean()
hoursOnHw_median = df['hoursOnHw'].median()

score_max = df['score'].max()
score_min = df['score'].min()
score_mean = df['score'].mean()
score_median = df['score'].median()

print(f'Max hoursOnHw: {hoursOnHw_max}')
print(f'Min hoursOnHw: {hoursOnHw_min}')
print(f'Average hoursOnHw: {hoursOnHw_mean}')
print(f'Median hoursOnHw: {hoursOnHw_median}')
print(f'Max score: {score_max}')
print(f'Min score: {score_min}')
print(f'Average score: {score_mean}')
print(f'Median score: {score_median}')
Max hoursOnHw: 4.322522513071912
Min hoursOnHw: -1.2196529681089996
Average hoursOnHw: 2.9180861885936635
Median hoursOnHw: 3.2371786874994823
Max score: 5
Min score: 1
Average score: 3.29
Median score: 3.0

Sort data

df = df.sort_values(by=['hoursOnHw'])
print("Sorted by time on hw:")
print(df)
Sorted by time on hw:
    id                name  hoursOnHw  score
70  70     Jaden W. Robert  -1.219653      1
23  23    Colin T. Leonard  -1.068941      1
2    2    Willam T. Willam  -0.952640      1
98  98    Thomas L. Thomas   0.031165      1
10  10      James H. James   0.039333      1
..  ..                 ...        ...    ...
12  12    Fredrick E. Fred   4.275583      5
64  64  Connor E. Fredrick   4.276815      5
63  63   Fredrick J. James   4.285514      5
19  19    Hunter J. Hunter   4.301619      5
60  60   Leonard H. Hunter   4.322523      5

[100 rows x 4 columns]
df = df.sort_values(by=['hoursOnHw'])
print("Sorted by scores:")
print(df)
Sorted by scores:
    id                name  hoursOnHw  score
70  70     Jaden W. Robert  -1.219653      1
23  23    Colin T. Leonard  -1.068941      1
2    2    Willam T. Willam  -0.952640      1
98  98    Thomas L. Thomas   0.031165      1
10  10      James H. James   0.039333      1
..  ..                 ...        ...    ...
12  12    Fredrick E. Fred   4.275583      5
64  64  Connor E. Fredrick   4.276815      5
63  63   Fredrick J. James   4.285514      5
19  19    Hunter J. Hunter   4.301619      5
60  60   Leonard H. Hunter   4.322523      5

[100 rows x 4 columns]

Merge with another dataframe

yeungdf = pd.DataFrame({'id': [1000], 'name': ['Sean Yeung'], 'hoursOnHw': [10], 'score': [5]})
df = pd.concat([df, yeungdf], ignore_index=True)

print(df)
       id                name  hoursOnHw  score
0      70     Jaden W. Robert  -1.219653      1
1      23    Colin T. Leonard  -1.068941      1
2       2    Willam T. Willam  -0.952640      1
3      98    Thomas L. Thomas   0.031165      1
4      10      James H. James   0.039333      1
..    ...                 ...        ...    ...
96     64  Connor E. Fredrick   4.276815      5
97     63   Fredrick J. James   4.285514      5
98     19    Hunter J. Hunter   4.301619      5
99     60   Leonard H. Hunter   4.322523      5
100  1000          Sean Yeung  10.000000      5

[101 rows x 4 columns]

2 and 3D arrays

import numpy as np

array2d = np.random.rand(3, 4)
print("2D array:")
print(array2d)

array3d = np.random.rand(2, 3, 4)
print("3D array:")
print(array3d)
2D array:
[[0.18292427 0.39108696 0.20552718 0.6509084 ]
 [0.17755348 0.90863221 0.41077616 0.5872449 ]
 [0.08287332 0.93492656 0.07489242 0.01677898]]
3D array:
[[[0.10186556 0.57711445 0.59355299 0.62382982]
  [0.98785013 0.29421096 0.23023936 0.6372783 ]
  [0.27195128 0.78486246 0.09230268 0.7529436 ]]

 [[0.55221864 0.59038316 0.24381217 0.5388756 ]
  [0.94727647 0.09412329 0.51256773 0.93349574]
  [0.47361539 0.67561061 0.99036649 0.40945497]]]