Browse Source

refactores digit-recognizer

Douglas Andreani 5 years ago
parent
commit
46e08c237b
1 changed files with 21 additions and 16 deletions
  1. 21 16
      digit-recongnizer/nn-conv2d.py

+ 21 - 16
digit-recongnizer/app.py → digit-recongnizer/nn-conv2d.py

@@ -8,16 +8,23 @@ import tkinter
 from matplotlib import pyplot as plt
 
 
+# Read train data from csv
 df = pd.read_csv('train.csv')
+
+# Shuffle data after read
 df = df.sample(frac=1)
 
+# Drop and separate label from the data
 df_label = df['label']
 del df['label']
 
+
+# Convert both dataframes to numpy objects
 X = df.to_numpy()
 y = df_label.to_numpy()
 
 
+# Reshape and resize data (80% train data, 20% validation data)
 X = X.reshape((-1, 28, 28, 1))
 X_train_size = math.floor(len(X)*.8)
 X_test_size = len(X) - X_train_size
@@ -29,50 +36,48 @@ y_train = y[:X_train_size]
 y_test = y[X_test_size+1:]
 
 
+# One Hot Encode the Label
 y_train = numpy.eye(10)[y_train]
 y_test = numpy.eye(10)[y_test]
 
+# Normalize the Input between 0 and 1
 X_train = X_train/255
 X_test = X_test/255
 
-print(X_train[0])
-
 
+# Create the convolutional neural network model
 model = models.Sequential()
 model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
 model.add(layers.MaxPooling2D((2, 2)))
 model.add(layers.Conv2D(64, (3, 3), activation='relu'))
 model.add(layers.MaxPooling2D((2, 2)))
 model.add(layers.Conv2D(128, (3, 3), activation='relu'))
-
-
-
 model.add(layers.Flatten())
 model.add(layers.Dense(64, activation='relu'))
 model.add(layers.Dense(10, activation='softmax'))
 
 
+# Define adam as the optmizer and MSE as the loss function
 optmizer = tf.keras.optimizers.Adam(learning_rate=0.001)
 model.compile(optimizer=optmizer, loss=tf.keras.losses.MeanSquaredError(),
-            metrics=['accuracy'])
-history = model.fit(X_train, y_train, batch_size=2**12, epochs=30, validation_data=(X_test, y_test))
-
-
+              metrics=['accuracy'])
 
-pred = model.predict(X_test)
+# Train the model
+history = model.fit(X_train, y_train, batch_size=2**12,
+                    epochs=30, validation_data=(X_test, y_test))
 
-#print(numpy.argmax(pred[255]))
-#plt.imshow(X_test[255].reshape(28,28))
-#plt.show()
 
+# Load test dataset as pandas dataframe and convert to numpy
 df = pd.read_csv('test.csv')
 X = df.to_numpy()
 
-y = model.predict(X.reshape(-1,28,28,1))
+# Reshape the data for the expected format of the model and predict the output
+y = model.predict(X.reshape(-1, 28, 28, 1))
 
+# For each input, save to file the predicted output
 with open('submission.csv', 'w') as f:
     f.write("ImageId,Label\n")
-    idx=1
+    idx = 1
     for i in y:
         f.write(f"{idx},{numpy.argmax(i)}\n")
-        idx+=1
+        idx += 1