import matplotlib.pyplot as plt
%matplotlib notebook
import pandas as pd
import numpy as np
df = pd.read_excel("BodyFat.xls", usecols=[4, 5, 6])
df.head()
from mpl_toolkits.mplot3d import Axes3D
%matplotlib notebook
scatter3d = plt.figure().gca(projection='3d')
scatter3d.scatter(df.WEIGHT, df.HEIGHT, df.ADIPOSITY)
scatter3d.set_xlabel('Weight')
scatter3d.set_ylabel('Height')
scatter3d.set_zlabel('Adiposity')
plt.show()
import seaborn as sns
sns.pairplot(df)
df.mean()
df.cov()
df.corr()
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
df_no_outlier = df[df.HEIGHT > 40] # remove height outlier
data_standardized = StandardScaler().fit_transform(df_no_outlier)
pca = PCA(n_components=3)
principal_components = pca.fit_transform(data_standardized)
df_principal = pd.DataFrame(data = principal_components, columns = ['pc1', 'pc2', 'pc3'])
%matplotlib notebook
scatter3d = plt.figure().gca(projection='3d')
scatter3d.scatter(df_principal.pc1, df_principal.pc2, df_principal.pc3)
scatter3d.set_xlabel('principal component 1')
scatter3d.set_ylabel('principal component 2')
scatter3d.set_zlabel('principal component 3')
plt.show()
df_principal.cov()
df_projected = df_principal
df_projected.pc3 = 0 # zero out principal component 3
%matplotlib notebook
scatter3d = plt.figure().gca(projection='3d')
scatter3d.scatter(df_projected.pc1, df_projected.pc2, df_projected.pc3)
scatter3d.set_xlabel('principal component 1')
scatter3d.set_ylabel('principal component 2')
scatter3d.set_zlabel('principal component 3')
plt.show()