# Referenced: https://stackoverflow.com/questions/26392336/importing-images-from-a-directory-python-to-list-or-dictionary
from PIL import Image
import glob
import numpy as np
import math
import matplotlib.pyplot as plt
import scipy.io as spio
import scipy.sparse.linalg as ll
import sklearn.preprocessing as skpp
from scipy.spatial import distance_matrix
from scipy.spatial.distance import pdist

pictures = []
A = np.empty(60*80)

for pic in glob.glob('data/yalefaces/*.gif'):
    
    # import .gif file
    face = Image.open(pic)
    
    # reduce to lower resolution by factor of 4
    new_size = (face.size[0]//4, face.size[1]//4)
    # resize image
    im_resized = face.resize(new_size, Image.ANTIALIAS)
    # add to list of images    
    pictures.append(im_resized)
    
    # convert image to numpy array
    matrix = np.asarray(im_resized)
    # flatten matrix into a single vector
    vector = matrix.flatten()
    # append image vector to data set
    A = np.vstack([A, vector])

A = A[1:].copy()


# 21 total images stored as array rows
A.shape

(21, 4800)


A_1_test = A[0].copy()
A_1_train = A[1:11].copy()

A_2_test = A[11].copy()
A_2_train = A[12:].copy()


# diplay test image 1
pictures[0]


# display test image 2
pictures[11]


# adapted from 6740 demo code
# PCA
m, n = A_1_train.shape

mu_1 = np.mean(A_1_train,axis = 1)
xc = A_1_train - mu_1[:,None]

C = np.dot(xc,xc.T)/m

# extract first 6 eigenvectors for the reduced representation of each image
K = 6
S,W = ll.eigs(C,k = K)
S = S.real
W = W.real

dim1_1 = np.dot(W[:,0].T,xc)/math.sqrt(S[0]) # extract 1st eigenvalue
dim2_1 = np.dot(W[:,1].T,xc)/math.sqrt(S[1]) # extract 2nd eigenvalue
dim3_1 = np.dot(W[:,2].T,xc)/math.sqrt(S[2]) # extract 3rd eigenvalue
dim4_1 = np.dot(W[:,3].T,xc)/math.sqrt(S[3]) # extract 4th eigenvalue
dim5_1 = np.dot(W[:,4].T,xc)/math.sqrt(S[4]) # extract 5th eigenvalue
dim6_1 = np.dot(W[:,5].T,xc)/math.sqrt(S[5]) # extract 6th eigenvalue

# reshape eigenvectors for visualization
z1_1 = np.reshape(dim1_1, (60,80))
z2_1 = np.reshape(dim2_1, (60,80))
z3_1 = np.reshape(dim3_1, (60,80))
z4_1 = np.reshape(dim4_1, (60,80))
z5_1 = np.reshape(dim5_1, (60,80))
z6_1 = np.reshape(dim6_1, (60,80))


from skimage import io

io.imshow(z1_1)

<matplotlib.image.AxesImage at 0x1ad89103f48>


io.imshow(z2_1)

<matplotlib.image.AxesImage at 0x1ad89234b48>


io.imshow(z3_1)

<matplotlib.image.AxesImage at 0x1ad89305ac8>


io.imshow(z4_1)

<matplotlib.image.AxesImage at 0x1ad893b9088>


io.imshow(z5_1)

<matplotlib.image.AxesImage at 0x1ad8947b1c8>


io.imshow(z6_1)

<matplotlib.image.AxesImage at 0x1ad8952fdc8>


# adapted from 6740 demo code
# PCA
m, n = A_2_train.shape

mu_2 = np.mean(A_2_train,axis = 1)
xc = A_2_train - mu_2[:,None]

C = np.dot(xc,xc.T)/m

# extract first 6 eigenvectors for the reduced representation of each image
K = 6
S,W = ll.eigs(C,k = K)
S = S.real
W = W.real

dim1_2 = np.dot(W[:,0].T,xc)/math.sqrt(S[0]) # extract 1st eigenvalues
dim2_2 = np.dot(W[:,1].T,xc)/math.sqrt(S[1]) # extract 2nd eigenvalue
dim3_2 = np.dot(W[:,2].T,xc)/math.sqrt(S[2]) # extract 3rd eigenvalue
dim4_2 = np.dot(W[:,3].T,xc)/math.sqrt(S[3]) # extract 4th eigenvalue
dim5_2 = np.dot(W[:,4].T,xc)/math.sqrt(S[4]) # extract 5th eigenvalue
dim6_2 = np.dot(W[:,5].T,xc)/math.sqrt(S[5]) # extract 6th eigenvalue

# reshape eigenvectors for visualization
z1_2 = np.reshape(dim1_2, (60,80))
z2_2 = np.reshape(dim2_2, (60,80))
z3_2 = np.reshape(dim3_2, (60,80))
z4_2 = np.reshape(dim4_2, (60,80))
z5_2 = np.reshape(dim5_2, (60,80))
z6_2 = np.reshape(dim6_2, (60,80))


io.imshow(z1_2)

<matplotlib.image.AxesImage at 0x1ad895eba08>


io.imshow(z2_2)

<matplotlib.image.AxesImage at 0x1ad896a4d88>


io.imshow(z3_2)

<matplotlib.image.AxesImage at 0x1ad8976af88>


io.imshow(z4_2)

<matplotlib.image.AxesImage at 0x1ad8a7faa48>


io.imshow(z5_2)

<matplotlib.image.AxesImage at 0x1ad8a89b388>


io.imshow(z6_2)

<matplotlib.image.AxesImage at 0x1ad8a966e48>


mu_3 = np.mean(A_1_train,axis = 0)
mu_4 = np.mean(A_2_train,axis = 0)


s = np.zeros((2,2))

# subject 1 test picture with subject 1 top eigenface
s[0][0] = np.linalg.norm(((A_1_test - mu_3) - dim1_1 * np.dot(dim1_1.T, (A_1_test - mu_3))), ord=2)**2
print('Subject 1 test picture projection residual with subject 1 top eigenface:', round(np.linalg.norm(((A_1_test - mu_3) - dim1_1 * np.dot(dim1_1.T, (A_1_test - mu_3))), ord=2)**2,1))

# subject 1 test picture with subject 2 top eigenface
s[1][0] = np.linalg.norm(((A_1_test - mu_4) - dim1_2 * np.dot(dim1_2.T, (A_1_test - mu_4))), ord=2)**2
print('Subject 1 test picture projection residual with subject 2 top eigenface:', round(np.linalg.norm(((A_1_test - mu_4) - dim1_2 * np.dot(dim1_2.T, (A_1_test - mu_4))), ord=2)**2,1))

# subject 2 test picture with subject 1 top eigenface
s[0][1] = np.linalg.norm(((A_2_test - mu_3) - dim1_1 * np.dot(dim1_1.T, (A_2_test - mu_3))), ord=2)**2
print('Subject 2 test picture projection residual with subject 1 top eigenface:', round(np.linalg.norm(((A_2_test - mu_3) - dim1_1 * np.dot(dim1_1.T, (A_2_test - mu_3))), ord=2)**2,1))

# subject 2 test picture with subject 2 top eigenface
s[1][1] = np.linalg.norm(((A_2_test - mu_4) - dim1_2 * np.dot(dim1_2.T, (A_2_test - mu_4))), ord=2)**2
print('Subject 2 test picture projection residual with subject 2 top eigenface:', round(np.linalg.norm(((A_2_test - mu_4) - dim1_2 * np.dot(dim1_2.T, (A_2_test - mu_4))), ord=2)**2,1))

s

Subject 1 test picture projection residual with subject 1 top eigenface: 67065784.6
Subject 1 test picture projection residual with subject 2 top eigenface: 1135327013.2
Subject 2 test picture projection residual with subject 1 top eigenface: 797562589.0
Subject 2 test picture projection residual with subject 2 top eigenface: 6292442.8

array([[6.70657846e+07, 7.97562589e+08],
       [1.13532701e+09, 6.29244277e+06]])


import scipy.io

mat = scipy.io.loadmat('data/isomap.mat')
mat['images'].shape

(4096, 698)


x = mat['images'].T.copy()
x.shape

(698, 4096)


A = np.zeros((x.shape[0], x.shape[0]))

epsilon = 13

for i in range(A.shape[0]):
    
    for j in range(A.shape[1]):
        
        distance = np.linalg.norm(x[i] - x[j], ord=2)
        
        if distance <= epsilon: 
            A[i][j] = distance
        
        else:
            A[i][j] = 0

A

array([[0.        , 0.        , 6.74323967, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [6.74323967, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])


import networkx as nx
import numpy as np 

G = nx.from_numpy_matrix(A)  
#nx.draw(G, with_labels=False) 

nx.draw_networkx(G, node_size=4, width=0.1, with_labels=True, alpha=0.5, node_color ='red')

plt.show()


plt.imshow(x[613].reshape((64,64), order='F'))

<matplotlib.image.AxesImage at 0x1ad8e129548>


plt.imshow(x[506].reshape((64,64), order='F'))

<matplotlib.image.AxesImage at 0x1ad98dd4d88>


plt.imshow(x[143].reshape((64,64), order='F'))

<matplotlib.image.AxesImage at 0x1ad91e026c8>


plt.imshow(x[538].reshape((64,64), order='F'))

<matplotlib.image.AxesImage at 0x1ad8df51ac8>


plt.imshow(x[694].reshape((64,64), order='F'))

<matplotlib.image.AxesImage at 0x1ad8df6bac8>


A_new = np.zeros((x.shape[0], x.shape[0]))

epsilon = 12

for i in range(A_new.shape[0]):
    
    for j in range(A_new.shape[1]):
        
        # Euclidean distance
        distance = np.linalg.norm(x[i] - x[j], ord=2)
        
        if distance <= epsilon: 
            A_new[i][j] = distance
        
        else:
            A_new[i][j] = 0
            
# check to see how many neighbors are retained for the first image
np.count_nonzero(A_new[0])

48


import sklearn
from sklearn.utils import graph_shortest_path

# referenced https://scikit-learn.org/stable/modules/...
    # generated/sklearn.utils.graph_shortest_path.graph_shortest_path.html
D = graph_shortest_path.graph_shortest_path(A_new)


m = D.shape[0]

H = np.eye(m) - (1/m)*(np.ones((m,m)))

C = (-1/2) * np.dot(np.dot(H, D**2), H)


from scipy.linalg import eig
from scipy.sparse.linalg import eigs

eig_val, eig_vec = eig(C)

eig_val = eig_val.real
eig_vec = eig_vec.real

idx_sorted = np.argsort(eig_val)[::-1] # the index of eigenvalue sorted acsending

val = eig_val[idx_sorted][0:2]

vec = eig_vec[idx_sorted][0:2]


Z = np.dot(eig_vec[:,:2],np.diag(np.sqrt(eig_val[:2])))
Z

array([[ 1.94219139e+01, -2.18810514e+00],
       [-2.10522575e+01,  1.80567930e-02],
       [ 1.93611559e+01, -6.87453411e+00],
       ...,
       [-9.70006432e+00,  1.84205562e+01],
       [-2.68295210e+01, -7.53770948e+00],
       [ 1.05449547e+01,  6.87418680e+00]])


plt.scatter(Z[:,0],Z[:,1])

<matplotlib.collections.PathCollection at 0x21c8e3502c8>


# plotting code adapted from this blog: 
    # https://benalexkeen.com/isomap-for-dimensionality-reduction-in-python/
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
import pandas as pd

# generate DataFrame from lower-dimensional representation
manifold = pd.DataFrame(Z, columns=['Component 1', 'Component 2'])

# generate DataFrame from original dataset to layer images on plot
df = pd.DataFrame(x)
num_images, num_pixels = df.shape
pixels_per_dimension = int(math.sqrt(num_pixels))

# Rotate images
for idx in df.index:
    df.loc[idx] = df.loc[idx].values.reshape(pixels_per_dimension, pixels_per_dimension).T.reshape(-1)
    

# display two-dimensional representation
fig = plt.figure()
fig.set_size_inches(10, 10)
ax = fig.add_subplot(111)
ax.set_title('2D Components from Isomap of Facial Images')
ax.set_xlabel('Component: 1')
ax.set_ylabel('Component: 2')

# layer 20 images on the plot
x_size = (max(manifold['Component 1']) - min(manifold['Component 1'])) * 0.08
y_size = (max(manifold['Component 2']) - min(manifold['Component 2'])) * 0.08
for i in range(20):
    img_num = np.random.randint(0, 698)
    x0 = manifold.loc[img_num, 'Component 1'] - (x_size / 2.)
    y0 = manifold.loc[img_num, 'Component 2'] - (y_size / 2.)
    x1 = manifold.loc[img_num, 'Component 1'] + (x_size / 2.)
    y1 = manifold.loc[img_num, 'Component 2'] + (y_size / 2.)
    img = df.iloc[img_num,:].values.reshape(pixels_per_dimension, pixels_per_dimension)
    ax.imshow(img, aspect='auto', cmap=plt.cm.gray, 
              interpolation='nearest', zorder=100000, extent=(x0, x1, y0, y1))

# Show 2D components plot
ax.scatter(manifold['Component 1'], manifold['Component 2'], marker='.',alpha=0.7)

ax.set_ylabel('Up-Down Pose')
ax.set_xlabel('Right-Left Pose')

plt.show()


A_new = np.zeros((x.shape[0], x.shape[0]))

epsilon = 515

for i in range(A_new.shape[0]):
    
    for j in range(A_new.shape[1]):
        
        # Manhattan distance
        distance = np.linalg.norm(x[i] - x[j], ord=1)
        
        if distance <= epsilon: 
            A_new[i][j] = distance
        
        else:
            A_new[i][j] = 0
            
# check to see how many neighbors are retained for the first image
np.count_nonzero(A_new[0])

49


# referenced https://scikit-learn.org/stable/modules/...
    #generated/sklearn.utils.graph_shortest_path.graph_shortest_path.html
D = graph_shortest_path.graph_shortest_path(A_new)


m = D.shape[0]

H = np.eye(m) - (1/m)*(np.ones((m,m)))

C = (-1/2) * np.dot(np.dot(H, D**2), H)


eig_val, eig_vec = eig(C)

eig_val = eig_val.real
eig_vec = eig_vec.real

idx_sorted = np.argsort(eig_val)[::-1] # the index of eigenvalue sorted acsending

val = eig_val[idx_sorted][0:2]
vec = eig_vec[idx_sorted][0:2]

Z = np.dot(eig_vec[:,:2] ,np.diag(np.sqrt(eig_val[:2])))


# plotting code adapted from this blog: 
    # https://benalexkeen.com/isomap-for-dimensionality-reduction-in-python/
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
import pandas as pd

# generate DataFrame from lower-dimensional representation
manifold = pd.DataFrame(Z, columns=['Component 1', 'Component 2'])

# generate DataFrame from original dataset to layer images on plot
df = pd.DataFrame(x)
num_images, num_pixels = df.shape
pixels_per_dimension = int(math.sqrt(num_pixels))

# Rotate images
for idx in df.index:
    df.loc[idx] = df.loc[idx].values.reshape(pixels_per_dimension, pixels_per_dimension).T.reshape(-1)
    

# display two-dimensional representation
fig = plt.figure()
fig.set_size_inches(10, 10)
ax = fig.add_subplot(111)
ax.set_title('2D Components from Isomap of Facial Images')
ax.set_xlabel('Component: 1')
ax.set_ylabel('Component: 2')

# layer 20 images on the plot
x_size = (max(manifold['Component 1']) - min(manifold['Component 1'])) * 0.08
y_size = (max(manifold['Component 2']) - min(manifold['Component 2'])) * 0.08
for i in range(20):
    img_num = np.random.randint(0, 698)
    x0 = manifold.loc[img_num, 'Component 1'] - (x_size / 2.)
    y0 = manifold.loc[img_num, 'Component 2'] - (y_size / 2.)
    x1 = manifold.loc[img_num, 'Component 1'] + (x_size / 2.)
    y1 = manifold.loc[img_num, 'Component 2'] + (y_size / 2.)
    img = df.iloc[img_num,:].values.reshape(pixels_per_dimension, pixels_per_dimension)
    ax.imshow(img, aspect='auto', cmap=plt.cm.gray, 
              interpolation='nearest', zorder=100000, extent=(x0, x1, y0, y1))

# Show 2D components plot
ax.scatter(manifold['Component 1'], manifold['Component 2'], marker='.',alpha=0.7)

ax.set_ylabel('Down-Up Pose')
ax.set_xlabel('Left-Right Pose')

plt.show()


# adapted from 6740 demo code
# PCA
m, n = x.shape

mu_1 = np.mean(x,axis = 1)
xc = x - mu_1[:,None]

C = np.dot(xc,xc.T)/m

# extract first 2 eigenvectors
K = 2
S,W = ll.eigs(C,k = K)
S = S.real
W = W.real

dim1_1 = np.dot(W[:,0].T,xc)/math.sqrt(S[0]) # extract 1st eigenvalues
dim2_1 = np.dot(W[:,1].T,xc)/math.sqrt(S[1]) # extract 2nd eigenvalue

# extract the top 2 principal components
Z_PCA = pd.DataFrame((dim1_1, dim2_1)).transpose()

Z_PCA.columns = ['Component 1', 'Component 2']


# plotting code adapted from this blog: 
    # https://benalexkeen.com/isomap-for-dimensionality-reduction-in-python/

# generate DataFrame from original dataset to layer images on plot
df = pd.DataFrame(x)
num_images, num_pixels = df.shape
pixels_per_dimension = int(math.sqrt(num_pixels))

# Rotate images
for idx in df.index:
    df.loc[idx] = df.loc[idx].values.reshape(pixels_per_dimension, pixels_per_dimension).T.reshape(-1)
    

# display two-dimensional representation
fig = plt.figure()
fig.set_size_inches(10, 10)
ax = fig.add_subplot(111)
ax.set_title('2D Components from PCA of Facial Images')
ax.set_xlabel('Component: 1')
ax.set_ylabel('Component: 2')

# layer 20 images on the plot
x_size = (max(Z_PCA['Component 1']) - min(Z_PCA['Component 1'])) * 0.08
y_size = (max(Z_PCA['Component 2']) - min(Z_PCA['Component 2'])) * 0.08
for i in range(20):
    img_num = np.random.randint(0, 698)
    x0 = Z_PCA.loc[img_num, 'Component 1'] - (x_size / 2.)
    y0 = Z_PCA.loc[img_num, 'Component 2'] - (y_size / 2.)
    x1 = Z_PCA.loc[img_num, 'Component 1'] + (x_size / 2.)
    y1 = Z_PCA.loc[img_num, 'Component 2'] + (y_size / 2.)
    img = df.iloc[img_num,:].values.reshape(pixels_per_dimension, pixels_per_dimension)
    ax.imshow(img, aspect='auto', cmap=plt.cm.gray, 
              interpolation='nearest', zorder=100000, extent=(x0, x1, y0, y1))

# Show 2D components plot
ax.scatter(Z_PCA['Component 1'], Z_PCA['Component 2'], marker='.',alpha=0.7)

ax.set_ylabel('Down-Up Pose')
ax.set_xlabel('Left-Right Pose')

plt.show()


df = pd.read_csv('data/food-consumption.csv')


country_array = df.drop('Country', axis=1).to_numpy()


country_array = country_array.T


# adapted from 6740 demo code
# PCA
m, n = country_array.shape

mu_1 = np.mean(country_array,axis = 1)
xc = country_array - mu_1[:,None]

C = np.dot(xc,xc.T)/m

# extract first 2 eigenvectors
K = 2
S,W = ll.eigs(C,k = K)
S = S.real
W = W.real

dim1_1 = np.dot(W[:,0].T,xc)/math.sqrt(S[0]) # extract 1st eigenvalues
dim2_1 = np.dot(W[:,1].T,xc)/math.sqrt(S[1]) # extract 2nd eigenvalue


# adpated from: https://stackoverflow.com/questions/14432557/matplotlib-scatter-plot-with-different-text-at-each-data-point
z1 = dim1_1
z2 = dim2_1
n = list(df['Country'])

fig, ax = plt.subplots()
ax.scatter(z1, z2)

for i, txt in enumerate(n):
    ax.annotate(txt, (z1[i], z2[i]))


country_array = df.drop('Country', axis=1).to_numpy()


# adapted from 6740 demo code
# PCA
m, n = country_array.shape

mu_1 = np.mean(country_array,axis = 1)
xc = country_array - mu_1[:,None]

C = np.dot(xc,xc.T)/m

# extract first 2 eigenvectors
K = 2
S,W = ll.eigs(C,k = K)
S = S.real
W = W.real

dim1_1 = np.dot(W[:,0].T,xc)/math.sqrt(S[0]) # extract 1st eigenvalues
dim2_1 = np.dot(W[:,1].T,xc)/math.sqrt(S[1]) # extract 2nd eigenvalue


# adpated from: https://stackoverflow.com/questions/14432557/matplotlib-scatter-plot-with-different-text-at-each-data-point
z1 = dim1_1
z2 = dim2_1
n = list(df.columns)[1:]

fig, ax = plt.subplots()
ax.scatter(z1, z2)

for i, txt in enumerate(n):
    ax.annotate(txt, (z1[i], z2[i]))

IYSE 6740 - Homework 2¶

1. Eigenfaces and simple face recognition¶

Import and transform images¶

Differentiate training and testing sets for each of the two subjects¶

Generate eigenfaces from each training set¶

1 (a)¶

Subject 1 - First 6 eigenfaces¶

Subject 2 - First 6 eigenfaces¶

Analysis:¶

1 (b) Simple face recognition¶

Analysis:¶

1 (c) - bonus¶

2. Order of faces using ISOMAP¶

2 (a)¶

Step 1)¶

2 (b)¶

Implementing the ISOMAP algorithm¶

2 (c)¶

Repeat using Manhattan distance between images¶

2 (d)¶

Repeat using PCA¶

3. PCA: Food consumption in European countries¶

3 (a)¶

3 (b)¶