Image Classification Fresco Play Hands-on Solution HackerRank

# Task 1: Data Loading, Run the below cells to import the packages and load the data.


from keras.datasets import fashion_mnist
from keras.utils import to_categorical
import numpy as np

# load dataset
(trainX, trainy), (testX, testy) = fashion_mnist.load_data()
# load train and test dataset
def load_dataset():
# load dataset
(trainX, trainy), (testX, testY) = fashion_mnist.load_data()
# reshape dataset to have a single channel
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))
# one hot encode target values
trainy = to_categorical(trainy)
testY = to_categorical(testY)
return trainX, trainy, testX, testY

'''
Output:-

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
32768/29515 [=================================] - 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
26427392/26421880 [==============================] - 3s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
8192/5148 [===============================================] - 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
4423680/4422102 [==============================] - 2s 0us/step
'''

# Task 2: Subset Generation.

  • Perform data split with StratifiedShuffleSplit with the parameters of test_size = 0.08 and random_state = seed.
  • Perform train test split with StratifiedShuffleSplit with parameters of test_size = 0.3 and random_state = seed.

seed=9
from sklearn.model_selection import StratifiedShuffleSplit

data_split = StratifiedShuffleSplit(test_size = 0.08,random_state = seed )
for train_index, test_index in data_split.split(trainX, trainy):

split_data_92, split_data_8 = trainX[train_index], trainX[test_index]

split_label_92, split_label_8 = trainy[train_index], trainy[test_index]

train_test_split = StratifiedShuffleSplit( test_size = 0.3, random_state = seed) #test_size=0.3 denotes that 30 % of the dataset is used for testing.

# Task 3: Data Splitting. Print the shape of train_data, train_labels, test_data and test_labels.


for train_index, test_index in train_test_split.split(split_data_8,split_label_8):

train_data_70, test_data_30 = split_data_8[train_index], split_data_8[test_index]

train_label_70, test_label_30 = split_label_8[train_index], split_label_8[test_index]
train_data = train_data_70 #assigning to variable train_data

train_labels = train_label_70 #assigning to variable train_labels
test_data = test_data_30
test_labels = test_label_30

print('train_data : ',train_data.shape)
print('train_labels : ',train_labels.shape)
print('test_data : ', test_data.shape)
print('test_labels : ', test_labels.shape)

'''
Output:-

train_data : (3360, 28, 28)
train_labels : (3360,)
test_data : (1440, 28, 28)
test_labels : (1440,)
'''

# Task 4: Normalization of Dataset.

  • Apply the mean for data with the parameters of axis = (0,1,2) and keepdims=True.
  • Apply the square root for data with parameters of axis = (0,1,2), ddof = 1 and keepdims = True.
  • Print the shape of train_data and test_data.

# definition of normalization function

def normalize(data, eps=1e-8):

data -= data.mean(axis = (0,1,2),keepdims=True )
std = data.std(axis = (0,1,2),keepdims=True )

std[std < eps] = 1.
data /= std

return data

train_data=train_data.astype('float64')
test_data=test_data.astype('float64')

# calling the function
train_data = normalize(train_data)
test_data = normalize(test_data)

# prints the shape of train data and test data
print('train_data: ',train_data.shape)

print('test_data: ',test_data.shape)


'''
Output:-

train_data: (3360, 28, 28)
test_data: (1440, 28, 28)
'''

# Task 5: ZCA Whitening. Print the shape of train_data and test_data.


# Computing whitening matrix

train_data_flat = train_data.reshape(train_data.shape[0], -1).T
test_data_flat = test_data.reshape(test_data.shape[0], -1).T

print('train_data_flat: ',train_data_flat.shape)
print('test_data_flat: ',test_data_flat.shape)

train_data_flat_t = train_data_flat.T
test_data_flat_t = test_data_flat.T

'''
Output:-

train_data_flat: (784, 3360)
test_data_flat: (784, 1440)
'''

# Task 6: Principle Component Analysis (PCA).

  • Keep n_components of train_data_pca as size of train_data columns and fit transform with train_data_flat.
  • Keep n_components of test_data_pca as size of test_data columns and fit transform with test_data_flat.
  • Print the shape of train_data_pca and test_data_pca.

from sklearn.decomposition import PCA

# n_components specify the no.of components to keep
train_data_pca = PCA(n_components=train_data.shape[1]).fit_transform(train_data_flat)
test_data_pca = PCA(n_components=test_data.shape[1]).fit_transform(test_data_flat)

print(train_data_pca.shape)
print(test_data_pca.shape)

train_data_pca = train_data_pca.T
test_data_pca = test_data_pca.T

'''
Output:-

(784, 28)
(784, 28)
'''

# Task 7: Singular Value Decomposition (SVD).

Execute the below cells to perform Singular Value Decomposition.


from skimage import color
def svdFeatures(input_data):

svdArray_input_data=[]
size = input_data.shape[0]

for i in range (0,size):

img=color.rgb2gray(input_data[i])
U, s, V = np.linalg.svd(img, full_matrices=False)
S=[s[i] for i in range(28)]

svdArray_input_data.append(S)
svdMatrix_input_data=np.matrix(svdArray_input_data)

return svdMatrix_input_data

# apply SVD for train and test data
train_data_svd=svdFeatures(train_data)
test_data_svd=svdFeatures(test_data)

print(train_data_svd.shape)
print(test_data_svd.shape)

'''
Output:-

(3360, 28)
(1440, 28)
'''

# Task 8: Support Vector Machine (SVM).

    Steps:-
  • Initialize SVM classifier with gamma=.001 and probability=True.
  • Train the model with train_data_flat_t and train_labels
  • Now predict the output with test_data_flat_t.
  • Evaluate the classifier with score from test_data_flat_t and test_labels.
  • Print the predicted score.

from sklearn import svm #Creating a svm classifier model

clf = svm.SVC(gamma=.001 ,probability=True) #train_data_flat_tModel training

train = clf.fit(train_data_flat_t, train_labels)
predicted= clf.predict(test_data_flat_t)

score = clf.score(test_data_flat_t,test_labels)
print("score",score)

with open('output.txt', 'w') as file:
file.write(str(np.mean(score)))

'''
Output:-

score 0.8277777777777777
'''

About the author

D Shwari
I'm a professor at National University's Department of Computer Science. My main streams are data science and data analysis. Project management for many computer science-related sectors. Next working project on Al with deep Learning.....

Post a Comment