# people_gender_nn_sckit.py

# predict sex (0 = male, 1 = female) 
# from age, state, income, politics

# sex  age    state    income   politics
#  0   0.27   0  1  0   0.7610   0  0  1
#  1   0.19   0  0  1   0.6550   1  0  0
# state: michigan = 100, nebraska = 010, oklahoma = 001
# politics: conservative, moderate, liberal

# Anaconda3-2022.10  Python 3.9.13  scikit 1.0.2
# Windows 10/11

import numpy as np 
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings('ignore')  # early-stop warnings

# ---------------------------------------------------------

def show_confusion(cm):
  dim = len(cm)
  mx = np.max(cm)             # largest count in cm
  wid = len(str(mx)) + 1      # width to print
  fmt = "%" + str(wid) + "d"  # like "%3d"
  for i in range(dim):
    print("actual   ", end="")
    print("%3d:" % i, end="")
    for j in range(dim):
      print(fmt % cm[i][j], end="")
    print("")
  print("------------")
  print("predicted    ", end="")
  for j in range(dim):
    print(fmt % j, end="")
  print("")

# ---------------------------------------------------------

def main():
  # 0. get ready
  print("\nBegin scikit neural network binary example ")
  print("Predict sex from age, State, income, politics ")
  np.random.seed(1)
  np.set_printoptions(precision=4, suppress=True)

  # 1. load data
  print("\nLoading data into memory ")
  train_file = ".\\Data\\people_train.txt"
  train_xy = np.loadtxt(train_file, usecols=range(0,9),
    delimiter="\t", comments="#", dtype=np.float32) 
  train_x = train_xy[:,1:9]
  train_y = train_xy[:,0].astype(np.int64)

  # load, two calls to loadtxt() technique
  test_file = ".\\Data\\people_test.txt"
  test_x = np.loadtxt(test_file, usecols=range(1,9),
    delimiter="\t", comments="#",  dtype=np.float32)
  test_y = np.loadtxt(test_file, usecols=0,
    delimiter="\t", comments="#",  dtype=np.int64)

  print("\nTraining data:")
  print(train_x[0:4])
  print(". . . \n")
  print(train_y[0:4])
  print(". . . ")

# ---------------------------------------------------------

  # 2. create network 
  # MLPClassifier(hidden_layer_sizes=(100,),
  #  activation='relu', *, solver='adam', alpha=0.0001,
  #  batch_size='auto', learning_rate='constant',
  #  learning_rate_init=0.001, power_t=0.5, max_iter=200,
  #  shuffle=True, random_state=None, tol=0.0001,
  #  verbose=False, warm_start=False, momentum=0.9,
  #  nesterovs_momentum=True, early_stopping=False,
  #  validation_fraction=0.1, beta_1=0.9, beta_2=0.999,
  #  epsilon=1e-08, n_iter_no_change=10, max_fun=15000)

  params = { 'hidden_layer_sizes' : [10,10],
    'activation' : 'tanh',
    'solver' : 'sgd',
    'alpha' : 0.001,
    'batch_size' : 10,
    'random_state' : 0,
    'tol' : 0.0001,
    'nesterovs_momentum' : False,
    'learning_rate' : 'constant',
    'learning_rate_init' : 0.01,
    'max_iter' : 500,
    'shuffle' : True,
    'n_iter_no_change' : 50,
    'verbose' : False }
       
  print("\nCreating 8-(10-10)-1 tanh neural network ")
  net = MLPClassifier(**params)

# ---------------------------------------------------------

  # 3. train
  print("\nTraining with bat sz = " + \
    str(params['batch_size']) + " lrn rate = " + \
    str(params['learning_rate_init']) + " ")
  print("Stop if no change " + \
    str(params['n_iter_no_change']) + " iterations ")
  net.fit(train_x, train_y)
  print("Done ")

# ---------------------------------------------------------

  # 4. evaluate model
  acc_train = net.score(train_x, train_y)
  print("\nAccuracy on train = %0.4f " % acc_train)
  acc_test = net.score(test_x, test_y)
  print("Accuracy on test = %0.4f " % acc_test)

  from sklearn.metrics import confusion_matrix
  y_predicteds = net.predict(test_x)
  cm = confusion_matrix(test_y, y_predicteds)
  print("\nConfusion matrix: \n")
  # print(cm)  # raw
  show_confusion(cm)  # custom formatted

  from sklearn.metrics import precision_score
  from sklearn.metrics import recall_score
  from sklearn.metrics import f1_score
  y_predicteds = net.predict(test_x)
  precision = precision_score(test_y, y_predicteds)
  print("\nPrecision on test = %0.4f " % precision)
  recall = recall_score(test_y, y_predicteds)
  print("Recall on test = %0.4f " % recall)
  f1 = f1_score(test_y, y_predicteds)
  print("F1 score on test = %0.4f " % f1)

# ---------------------------------------------------------

  # 5. use model
  print("\nSetting age = 30  Oklahoma  $40,000  moderate ")
  X = np.array([[0.30, 0,0,1, 0.4000, 0,1,0]],
    dtype=np.float32)

  probs = net.predict_proba(X)
  print("\nPrediction pseudo-probs: ")
  print(probs)

  sex = net.predict(X)
  print("\nPredicted class: ")
  print(sex)  # a vector with a single value
  if sex[0] == 0: print("male")
  elif sex[0] == 1: print("female")

# ---------------------------------------------------------
  
  # 6. TODO: save model using pickle
  print("\nEnd scikit binary neural network demo ")

if __name__ == "__main__":
  main()
