|
@@ -8,16 +8,23 @@ import tkinter
|
|
|
from matplotlib import pyplot as plt
|
|
|
|
|
|
|
|
|
+# Read train data from csv
|
|
|
df = pd.read_csv('train.csv')
|
|
|
+
|
|
|
+# Shuffle data after read
|
|
|
df = df.sample(frac=1)
|
|
|
|
|
|
+# Drop and separate label from the data
|
|
|
df_label = df['label']
|
|
|
del df['label']
|
|
|
|
|
|
+
|
|
|
+# Convert both dataframes to numpy objects
|
|
|
X = df.to_numpy()
|
|
|
y = df_label.to_numpy()
|
|
|
|
|
|
|
|
|
+# Reshape and resize data (80% train data, 20% validation data)
|
|
|
X = X.reshape((-1, 28, 28, 1))
|
|
|
X_train_size = math.floor(len(X)*.8)
|
|
|
X_test_size = len(X) - X_train_size
|
|
@@ -29,50 +36,48 @@ y_train = y[:X_train_size]
|
|
|
y_test = y[X_test_size+1:]
|
|
|
|
|
|
|
|
|
+# One Hot Encode the Label
|
|
|
y_train = numpy.eye(10)[y_train]
|
|
|
y_test = numpy.eye(10)[y_test]
|
|
|
|
|
|
+# Normalize the Input between 0 and 1
|
|
|
X_train = X_train/255
|
|
|
X_test = X_test/255
|
|
|
|
|
|
-print(X_train[0])
|
|
|
-
|
|
|
|
|
|
+# Create the convolutional neural network model
|
|
|
model = models.Sequential()
|
|
|
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
|
|
|
model.add(layers.MaxPooling2D((2, 2)))
|
|
|
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
|
|
|
model.add(layers.MaxPooling2D((2, 2)))
|
|
|
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
model.add(layers.Flatten())
|
|
|
model.add(layers.Dense(64, activation='relu'))
|
|
|
model.add(layers.Dense(10, activation='softmax'))
|
|
|
|
|
|
|
|
|
+# Define adam as the optmizer and MSE as the loss function
|
|
|
optmizer = tf.keras.optimizers.Adam(learning_rate=0.001)
|
|
|
model.compile(optimizer=optmizer, loss=tf.keras.losses.MeanSquaredError(),
|
|
|
- metrics=['accuracy'])
|
|
|
-history = model.fit(X_train, y_train, batch_size=2**12, epochs=30, validation_data=(X_test, y_test))
|
|
|
-
|
|
|
-
|
|
|
+ metrics=['accuracy'])
|
|
|
|
|
|
-pred = model.predict(X_test)
|
|
|
+# Train the model
|
|
|
+history = model.fit(X_train, y_train, batch_size=2**12,
|
|
|
+ epochs=30, validation_data=(X_test, y_test))
|
|
|
|
|
|
-#print(numpy.argmax(pred[255]))
|
|
|
-#plt.imshow(X_test[255].reshape(28,28))
|
|
|
-#plt.show()
|
|
|
|
|
|
+# Load test dataset as pandas dataframe and convert to numpy
|
|
|
df = pd.read_csv('test.csv')
|
|
|
X = df.to_numpy()
|
|
|
|
|
|
-y = model.predict(X.reshape(-1,28,28,1))
|
|
|
+# Reshape the data for the expected format of the model and predict the output
|
|
|
+y = model.predict(X.reshape(-1, 28, 28, 1))
|
|
|
|
|
|
+# For each input, save to file the predicted output
|
|
|
with open('submission.csv', 'w') as f:
|
|
|
f.write("ImageId,Label\n")
|
|
|
- idx=1
|
|
|
+ idx = 1
|
|
|
for i in y:
|
|
|
f.write(f"{idx},{numpy.argmax(i)}\n")
|
|
|
- idx+=1
|
|
|
+ idx += 1
|