# Task 1: Data Loading, Run the below cells to import the packages and load the data.
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
import numpy as np
# load dataset
(trainX, trainy), (testX, testy) = fashion_mnist.load_data()
# load train and test dataset
def load_dataset():
# load dataset
(trainX, trainy), (testX, testY) = fashion_mnist.load_data()
# reshape dataset to have a single channel
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))
# one hot encode target values
trainy = to_categorical(trainy)
testY = to_categorical(testY)
return trainX, trainy, testX, testY
'''
Output:-
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
32768/29515 [=================================] - 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
26427392/26421880 [==============================] - 3s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
8192/5148 [===============================================] - 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
4423680/4422102 [==============================] - 2s 0us/step
'''
# Task 2: Subset Generation.
- Perform data split with StratifiedShuffleSplit with the parameters of test_size = 0.08 and random_state = seed.
- Perform train test split with StratifiedShuffleSplit with parameters of test_size = 0.3 and random_state = seed.
seed=9
from sklearn.model_selection import StratifiedShuffleSplit
data_split = StratifiedShuffleSplit(test_size = 0.08,random_state = seed )
for train_index, test_index in data_split.split(trainX, trainy):
split_data_92, split_data_8 = trainX[train_index], trainX[test_index]
split_label_92, split_label_8 = trainy[train_index], trainy[test_index]
train_test_split = StratifiedShuffleSplit( test_size = 0.3, random_state = seed) #test_size=0.3 denotes that 30 % of the dataset is used for testing.
# Task 3: Data Splitting. Print the shape of train_data, train_labels, test_data and test_labels.
for train_index, test_index in train_test_split.split(split_data_8,split_label_8):
train_data_70, test_data_30 = split_data_8[train_index], split_data_8[test_index]
train_label_70, test_label_30 = split_label_8[train_index], split_label_8[test_index]
train_data = train_data_70 #assigning to variable train_data
train_labels = train_label_70 #assigning to variable train_labels
test_data = test_data_30
test_labels = test_label_30
print('train_data : ',train_data.shape)
print('train_labels : ',train_labels.shape)
print('test_data : ', test_data.shape)
print('test_labels : ', test_labels.shape)
'''
Output:-
train_data : (3360, 28, 28)
train_labels : (3360,)
test_data : (1440, 28, 28)
test_labels : (1440,)
'''
# Task 4: Normalization of Dataset.
- Apply the mean for data with the parameters of axis = (0,1,2) and keepdims=True.
- Apply the square root for data with parameters of axis = (0,1,2), ddof = 1 and keepdims = True.
- Print the shape of train_data and test_data.
# definition of normalization function
def normalize(data, eps=1e-8):
data -= data.mean(axis = (0,1,2),keepdims=True )
std = data.std(axis = (0,1,2),keepdims=True )
std[std < eps] = 1.
data /= std
return data
train_data=train_data.astype('float64')
test_data=test_data.astype('float64')
# calling the function
train_data = normalize(train_data)
test_data = normalize(test_data)
# prints the shape of train data and test data
print('train_data: ',train_data.shape)
print('test_data: ',test_data.shape)
'''
Output:-
train_data: (3360, 28, 28)
test_data: (1440, 28, 28)
'''
# Task 5: ZCA Whitening. Print the shape of train_data and test_data.
# Computing whitening matrix
train_data_flat = train_data.reshape(train_data.shape[0], -1).T
test_data_flat = test_data.reshape(test_data.shape[0], -1).T
print('train_data_flat: ',train_data_flat.shape)
print('test_data_flat: ',test_data_flat.shape)
train_data_flat_t = train_data_flat.T
test_data_flat_t = test_data_flat.T
'''
Output:-
train_data_flat: (784, 3360)
test_data_flat: (784, 1440)
'''
# Task 6: Principle Component Analysis (PCA).
- Keep n_components of train_data_pca as size of train_data columns and fit transform with train_data_flat.
- Keep n_components of test_data_pca as size of test_data columns and fit transform with test_data_flat.
- Print the shape of train_data_pca and test_data_pca.
from sklearn.decomposition import PCA
# n_components specify the no.of components to keep
train_data_pca = PCA(n_components=train_data.shape[1]).fit_transform(train_data_flat)
test_data_pca = PCA(n_components=test_data.shape[1]).fit_transform(test_data_flat)
print(train_data_pca.shape)
print(test_data_pca.shape)
train_data_pca = train_data_pca.T
test_data_pca = test_data_pca.T
'''
Output:-
(784, 28)
(784, 28)
'''
# Task 7: Singular Value Decomposition (SVD).
Execute the below cells to perform Singular Value Decomposition.
from skimage import color
def svdFeatures(input_data):
svdArray_input_data=[]
size = input_data.shape[0]
for i in range (0,size):
img=color.rgb2gray(input_data[i])
U, s, V = np.linalg.svd(img, full_matrices=False)
S=[s[i] for i in range(28)]
svdArray_input_data.append(S)
svdMatrix_input_data=np.matrix(svdArray_input_data)
return svdMatrix_input_data
# apply SVD for train and test data
train_data_svd=svdFeatures(train_data)
test_data_svd=svdFeatures(test_data)
print(train_data_svd.shape)
print(test_data_svd.shape)
'''
Output:-
(3360, 28)
(1440, 28)
'''
# Task 8: Support Vector Machine (SVM).
- Steps:-
- Initialize SVM classifier with gamma=.001 and probability=True.
- Train the model with train_data_flat_t and train_labels
- Now predict the output with test_data_flat_t.
- Evaluate the classifier with score from test_data_flat_t and test_labels.
- Print the predicted score.
from sklearn import svm #Creating a svm classifier model
clf = svm.SVC(gamma=.001 ,probability=True) #train_data_flat_tModel training
train = clf.fit(train_data_flat_t, train_labels)
predicted= clf.predict(test_data_flat_t)
score = clf.score(test_data_flat_t,test_labels)
print("score",score)
with open('output.txt', 'w') as file:
file.write(str(np.mean(score)))
'''
Output:-
score 0.8277777777777777
'''