import mysql.connector as mys
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# connect to database
# this is the same MySQL database used for other things in the course
# the 'student' login has restricted access
# the quiz data is updated from time from time using a different program that reads from the Canvas API
cTBL = mys.connect(host='datalab2.bus.sfu.ca',
user='student',
password='SFU!',
database='CanvasTBL')
# get quiz results for a particular term
quizzes = pd.read_sql('''SELECT CourseName, QuizTitle, Percent, TimeSpent
FROM vwQuizResults WHERE TermName='Summer 2020' ''' , cTBL)
# note that data points are anonymous--no student information
quizzes
# group by quiz title ignoring course
quizagg = quizzes.groupby(["QuizTitle"])
# use a formatting dictionary to get the number formats looking reasonable for the describe() summary
format_dict = {"count": "{:.0f}", "mean": "{:.1%}", "std": "{:.1%}", "min": "{:.1%}", "25%": "{:.1%}", "50%": "{:.1%}", "75%": "{:.1%}", "max": "{:.1%}"}
quizagg["Percent"].describe().style.format(format_dict)
# loop through each quiz name
# print mean and draw a histogram
i = 0
xticks = np.linspace(0,1,11)
xticklabels = pd.DataFrame(xticks).applymap(lambda x: "{:.0%}".format(x))
for group_key, group_value in quizagg:
print("{}: mean score: {:.1%}".format(group_key, group_value['Percent'].mean()))
i += 1
plt.figure(figsize=(10,20))
ax = plt.subplot(len(quizagg),1,i)
ax.set_xticks(xticks)
ax.set_xticklabels(xticklabels[0].values)
sns.distplot(group_value['Percent'], kde=False, hist=True)
plt.show()
# get rough statistics on time spent
quizagg['TimeSpent'].describe().round(2)
# show the distribution of time spent on each quiz
i = 0
for group_key, group_value in quizagg:
print("{}: mean time: {:.2f}".format(group_key, group_value['TimeSpent'].mean()))
i += 1
plt.figure(figsize=(10,20))
ax = plt.subplot(len(quizagg),1,i)
sns.distplot(group_value['TimeSpent'], kde=False, hist=True)
plt.show()