import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
df = pd.read_csv("iris.csv", header=None, names=['sepal length','sepal width','petal length','petal width', 'species'])
df.head()
sns.pairplot(df, hue="species", markers=['D', 'o', 's'])
from sklearn.preprocessing import StandardScaler
# Separating out the features
X = df.loc[:, ['sepal length', 'sepal width', 'petal length', 'petal width']].values
# Standardizing the features
X = StandardScaler().fit_transform(X)
# Covariance matrix
cov_mat = np.cov(X.T)
cov_mat
# Obtain eigenvalues and eigenvectors
eigenvals, eigenvecs = np.linalg.eig(cov_mat)
# Sort eigenvalues and eigenvectors in order of decreasing eigenvalue
idx = np.argsort(eigenvals)[::-1]
eigenvals = eigenvals[idx]
eigenvecs = eigenvecs[:,idx]
# Plot eigenvalues in decreasing order
plt.plot(np.arange(1,5), eigenvals, 'ro-')
plt.title('Eigenvalues in sorted order')
eigenvals
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(X)
df_pca = pd.DataFrame(data = principalComponents, columns = ['principal component 1', 'principal component 2'])
df_pca.head()
df_joined = pd.concat([df_pca, df.species], axis = 1)
df_joined.head()
sns.lmplot('principal component 1', 'principal component 2', data=df_joined, fit_reg=False, hue="species", markers=['D','o','s'])