cmte
/
keggle-competitions


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
							import pandas as pd
import numpy
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import math
import time


# Read train data from csv
df = pd.read_csv('train.csv')

# Shuffle data after read
df = df.sample(frac=1)

# Drop and separate label from the data
df_label = df['label']
del df['label']


# Convert both dataframes to numpy objects
X = df.to_numpy()
y = df_label.to_numpy()


# Reshape and resize data (80% train data, 20% validation data)
X = X.reshape((-1, 28, 28, 1))
X_train_size = math.floor(len(X)*.8)
X_test_size = len(X) - X_train_size

X_train = X[:X_train_size]
X_test = X[X_test_size+1:]

y_train = y[:X_train_size]
y_test = y[X_test_size+1:]


# One Hot Encode the Label
y_train = numpy.eye(10)[y_train]
y_test = numpy.eye(10)[y_test]

# Normalize the Input between 0 and 1
X_train = X_train/255
X_test = X_test/255


# Create the convolutional neural network model
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))


# Define adam as the optmizer and MSE as the loss function
optmizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optmizer, loss=tf.keras.losses.MeanSquaredError(),
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, batch_size=2**12,
                    epochs=30, validation_data=(X_test, y_test))


# Load test dataset as pandas dataframe and convert to numpy
df = pd.read_csv('test.csv')
X = df.to_numpy()

# Reshape the data for the expected format of the model and predict the output
y = model.predict(X.reshape(-1, 28, 28, 1))

# For each input, save to file the predicted output
with open('submission.csv', 'w') as f:
    f.write("ImageId,Label\n")
    idx = 1
    for i in y:
        f.write(f"{idx},{numpy.argmax(i)}\n")
        idx += 1