In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import scipy.stats as stats

Tables

CS 361 students answered the survey below.

Q1: What's your name? (student names removed for privacy)

Q2: We will roll a 6-sided die. Guess how many rolls will it take to see the first 6.

Q3: If the prize were $100, what's the most you would bet (in dollars) to play this game?

Q4: Have you ever been to Las Vegas?

In [2]:
df = pd.read_csv("121278.csv")
df
Out[2]:
Q2:Rolls Q3:Bet Q4:Vegas
0 1 1.00 Yes
1 6 2.00 No
2 6 5.00 No
3 3 0.00 Yes
4 6 0.00 No
5 6 1.00 Yes
6 4 15.00 No
7 3 50.00 Yes
8 6 16.66 Yes
9 1 10.00 No
10 36 99.00 Yes
11 4 25.00 Yes
12 3 10.00 No
13 3 2.00 Yes
14 2 10.00 Yes
15 3 16.60 No
16 4 10.00 Yes
17 19 10.00 No
18 3 15.00 No
19 6 0.00 No
20 4 16.66 Yes
21 3 5.00 No
22 6 50.00 No
23 4 75.00 No
24 4 0.00 No
25 10 5.00 No
26 3 16.00 No
27 4 50.00 No
28 3 10.00 Yes
29 8 0.00 Yes
... ... ... ...
91 6 50.00 Yes
92 3 20.00 No
93 3 16.00 No
94 4 3.00 No
95 4 0.00 No
96 7 16.66 Yes
97 5 10.00 No
98 6 2.00 Yes
99 3 15.00 No
100 6 16.00 No
101 4 8.00 No
102 6 30.00 Yes
103 4 20.00 No
104 15 15.00 No
105 2 5.00 Yes
106 3 11.00 Yes
107 4 3.00 No
108 1 16.00 No
109 1 1.00 No
110 4 50.00 Yes
111 23 99.00 No
112 6 16.00 No
113 70 50.00 No
114 4 50.00 No
115 3 20.00 No
116 10 10.00 No
117 3 5.00 No
118 4 10.00 No
119 8 50.00 No
120 6 25.00 Yes

121 rows × 3 columns

In [3]:
winning_number = 1
df[df["Q2:Rolls"]==winning_number]
Out[3]:
Q2:Rolls Q3:Bet Q4:Vegas
0 1 1.00 Yes
9 1 10.00 No
44 1 50.00 No
45 1 16.00 No
59 1 20.00 No
64 1 15.00 No
66 1 10.00 Yes
67 1 17.00 Yes
72 1 16.67 No
108 1 16.00 No
109 1 1.00 No

Bar charts

In [4]:
vegas_counts = df["Q4:Vegas"].value_counts()
vegas_counts
Out[4]:
No     78
Yes    43
Name: Q4:Vegas, dtype: int64
In [5]:
plt.bar(vegas_counts.keys(), vegas_counts)
plt.title("Have you ever been to Las Vegas?")
plt.ylabel("Count")
Out[5]:
Text(0, 0.5, 'Count')

Histograms

In [6]:
df = df[df["Q3:Bet"]<=100] #remove outlier (someone wanted to bet too big!)
plt.subplot(1, 2, 1)
plt.hist(df["Q3:Bet"], bins=100)
plt.xlabel("Bet")
plt.ylabel("Count")
plt.title("Histogram of bets with 100 bins")
plt.subplot(1, 2, 2)
plt.hist(df["Q3:Bet"], bins=10)
plt.xlabel("Bet")
plt.ylabel("Count")
plt.title("Histogram of bets with 10 bins")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)

Conditional histograms

In [7]:
plt.subplot(1, 2, 1)
plt.hist(df[df["Q4:Vegas"]=="Yes"]["Q3:Bet"], bins=np.arange(0, 101, 10), color="r", ec="k")
plt.xlabel("Bet")
plt.ylabel("Count")
plt.title("Histogram of bets for those who have been to Las Vegas")
plt.subplot(1, 2, 2)
plt.hist(df[df["Q4:Vegas"]=="No"]["Q3:Bet"], bins=np.arange(0, 101, 10), color="b", ec="k")
plt.xlabel("Bet")
plt.ylabel("Count")
plt.title("Histogram of bets for those who haven't been to Las Vegas")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)