nn-conv2d.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import pandas as pd
  2. import numpy
  3. import tensorflow as tf
  4. from tensorflow.keras import datasets, layers, models
  5. import math
  6. import time
  7. # Read train data from csv
  8. df = pd.read_csv('train.csv')
  9. # Shuffle data after read
  10. df = df.sample(frac=1)
  11. # Drop and separate label from the data
  12. df_label = df['label']
  13. del df['label']
  14. # Convert both dataframes to numpy objects
  15. X = df.to_numpy()
  16. y = df_label.to_numpy()
  17. # Reshape and resize data (80% train data, 20% validation data)
  18. X = X.reshape((-1, 28, 28, 1))
  19. X_train_size = math.floor(len(X)*.8)
  20. X_test_size = len(X) - X_train_size
  21. X_train = X[:X_train_size]
  22. X_test = X[X_test_size+1:]
  23. y_train = y[:X_train_size]
  24. y_test = y[X_test_size+1:]
  25. # One Hot Encode the Label
  26. y_train = numpy.eye(10)[y_train]
  27. y_test = numpy.eye(10)[y_test]
  28. # Normalize the Input between 0 and 1
  29. X_train = X_train/255
  30. X_test = X_test/255
  31. # Create the convolutional neural network model
  32. model = models.Sequential()
  33. model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
  34. model.add(layers.MaxPooling2D((2, 2)))
  35. model.add(layers.Conv2D(64, (3, 3), activation='relu'))
  36. model.add(layers.MaxPooling2D((2, 2)))
  37. model.add(layers.Conv2D(128, (3, 3), activation='relu'))
  38. model.add(layers.Flatten())
  39. model.add(layers.Dense(64, activation='relu'))
  40. model.add(layers.Dense(10, activation='softmax'))
  41. # Define adam as the optmizer and MSE as the loss function
  42. optmizer = tf.keras.optimizers.Adam(learning_rate=0.001)
  43. model.compile(optimizer=optmizer, loss=tf.keras.losses.MeanSquaredError(),
  44. metrics=['accuracy'])
  45. # Train the model
  46. history = model.fit(X_train, y_train, batch_size=2**12,
  47. epochs=30, validation_data=(X_test, y_test))
  48. # Load test dataset as pandas dataframe and convert to numpy
  49. df = pd.read_csv('test.csv')
  50. X = df.to_numpy()
  51. # Reshape the data for the expected format of the model and predict the output
  52. y = model.predict(X.reshape(-1, 28, 28, 1))
  53. # For each input, save to file the predicted output
  54. with open('submission.csv', 'w') as f:
  55. f.write("ImageId,Label\n")
  56. idx = 1
  57. for i in y:
  58. f.write(f"{idx},{numpy.argmax(i)}\n")
  59. idx += 1