In [11]:
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import scipy.stats as stats

Tables

CS 361 students answered the survey below.

Q1: What's your name? (student names removed for privacy)

Q2: We will roll a 6-sided die. Guess how many rolls will it take to see the first 6.

Q3: If the prize were $100, what's the most you would bet (in dollars) to play this game?

Q4: Have you ever been to Las Vegas?

In [13]:
df = pd.read_csv("117612.csv")
df
Out[13]:
Q2:Rolls Q3:Bet Q4:Vegas
0 5 1.00 Yes
1 3 10.00 No
2 4 0.00 No
3 3 0.00 Yes
4 5 10.00 No
5 3 0.00 No
6 2 0.00 Yes
7 3 50.00 No
8 3 5.00 Yes
9 97 1.00 No
10 6 1.00 Yes
11 9 10.00 No
12 3 20.00 No
13 1 1.00 No
14 6 50.00 No
15 5 100.00 No
16 3 10.00 No
17 4 5.00 No
18 3 20.00 Yes
19 3 15.00 Yes
20 4 20.00 Yes
21 4 7.50 No
22 4 0.00 No
23 4 12.50 No
24 3 17.00 Yes
25 6 1.00 No
26 3 16.67 No
27 3 5.00 Yes
28 2 50.00 No
29 8 75.00 Yes
... ... ... ...
48 4 50.00 No
49 3 65.00 No
50 9 15.00 Yes
51 5 15.00 Yes
52 3 15.00 No
53 4 20.00 No
54 4 10.00 Yes
55 1 2.00 Yes
56 1 10.00 Yes
57 3 10.00 No
58 4 10.00 No
59 11 9.00 Yes
60 8 20.00 Yes
61 4 16.00 Yes
62 3 1.00 No
63 3 5.00 No
64 3 1.00 No
65 5 2.00 No
66 3 10.00 No
67 8 10.00 No
68 5 16.67 No
69 5 16.67 No
70 5 50.00 No
71 10 5.00 No
72 5 17.00 Yes
73 25 1.00 No
74 4 20.00 No
75 3 11.57 Yes
76 4 1.00 Yes
77 3 3.00 No

78 rows × 3 columns

In [14]:
winning_number = 5
df[df["Q2:Rolls"]==winning_number]
Out[14]:
Q2:Rolls Q3:Bet Q4:Vegas
0 5 1.00 Yes
4 5 10.00 No
15 5 100.00 No
40 5 5.00 Yes
51 5 15.00 Yes
65 5 2.00 No
68 5 16.67 No
69 5 16.67 No
70 5 50.00 No
72 5 17.00 Yes

Bar charts

In [15]:
vegas_counts = df["Q4:Vegas"].value_counts()
vegas_counts
Out[15]:
No     48
Yes    30
Name: Q4:Vegas, dtype: int64
In [16]:
plt.bar(vegas_counts.keys(), vegas_counts)
plt.title("Have you ever been to Las Vegas?")
plt.ylabel("Count")
Out[16]:
Text(0,0.5,'Count')

Histograms

In [17]:
plt.subplot(1, 2, 1)
plt.hist(df["Q3:Bet"], bins=100)
plt.xlabel("Bet")
plt.ylabel("Count")
plt.title("Histogram of bets with 100 bins")
plt.subplot(1, 2, 2)
plt.hist(df["Q3:Bet"], bins=10)
plt.xlabel("Bet")
plt.ylabel("Count")
plt.title("Histogram of bets with 10 bins")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)

Conditional histograms

In [24]:
plt.subplot(1, 2, 1)
plt.hist(df[df["Q4:Vegas"]=="Yes"]["Q3:Bet"], bins=np.arange(0, 101, 10), color="r", ec="k")
plt.xlabel("Bet")
plt.ylabel("Count")
plt.title("Histogram of bets for those who have been to Las Vegas")
plt.subplot(1, 2, 2)
plt.hist(df[df["Q4:Vegas"]=="No"]["Q3:Bet"], bins=np.arange(0, 101, 10), color="b", ec="k")
plt.xlabel("Bet")
plt.ylabel("Count")
plt.title("Histogram of bets for those who haven't been to Las Vegas")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)