# synthetic_gpr.py
# scikit Gaussian process regression on synthetic data

# Anaconda3-2022.10  Python 3.9.13
# scikit 1.0.2  Windows 10/11 

import numpy as np
import pickle
from sklearn.gaussian_process import GaussianProcessRegressor

from sklearn.gaussian_process.kernels import RBF
# RBF, ConstantKernel, Matern, RationalQuadratic,
# ExpSineSquared, DotProduct

# -----------------------------------------------------------

def accuracy(model, data_X, data_y, pct_close):
  # correct within pct of true target 
  n_correct = 0; n_wrong = 0

  for i in range(len(data_X)):
    X = data_X[i].reshape(1, -1)  # one-item batch
    y = data_y[i]
    pred = model.predict(X)       # predicted target value

    if np.abs(pred - y) < np.abs(pct_close * y):
      n_correct += 1
    else:
      n_wrong += 1
  acc = (n_correct * 1.0) / (n_correct + n_wrong)
  return acc

# -----------------------------------------------------------

def main():
  # 0. prepare
  print("\nBegin scikit Gaussian process regression ")
  print("Predict synthetic data ")
  np.random.seed(1)
  np.set_printoptions(edgeitems=5, linewidth=100,
    sign=" ", formatter={'float': '{: 7.4f}'.format})

# -----------------------------------------------------------

  # 1. load data
  print("\nLoading 200 train and 40 test data for GPR ")
  train_file = ".\\Data\\synthetic_train.txt"
  train_X = np.loadtxt(train_file, delimiter="\t", 
    usecols=(0,1,2,3,4),
    comments="#", dtype=np.float64)
  train_y = np.loadtxt(train_file, delimiter="\t", 
    usecols=5, comments="#", dtype=np.float32) 

  test_file = ".\\Data\\synthetic_test.txt"
  test_X = np.loadtxt(test_file, delimiter="\t",
    usecols=(0,1,2,3,4),
    comments="#", dtype=np.float64)
  test_y = np.loadtxt(test_file, delimiter="\t",
    usecols=5, comments="#", dtype=np.float32) 
  print("Done ")

  print("\nFirst four X data: ")
  print(train_X[0:4][:])
  print(". . .")
  print("\nFirst four targets: ")
  print(train_y[0:4])
  print(". . .")

# -----------------------------------------------------------

  # 2. create and train GPR model
  print("\nCreating GPR model with RBF(1.0) kernel ")

  # GaussianProcessRegressor(kernel=None, *, alpha=1e-10,
  #  optimizer='fmin_l_bfgs_b', n_restarts_optimizer=0,
  #  normalize_y=False, copy_X_train=True, random_state=None)
  #
  # default: ConstantKernel(1.0, constant_value_bounds="fixed")
  #  * RBF(1.0, length_scale_bounds="fixed")
  # scikit-learn.org/stable/modules/gaussian_process.html

  krnl = RBF(length_scale=1.0, length_scale_bounds="fixed") 
  gpr_model = GaussianProcessRegressor(kernel=krnl,
    normalize_y=False, random_state=1, alpha=0.001)
  print("Done ")

  print("\nTraining model ")
  gpr_model.fit(train_X, train_y)
  print("Done ")

# -----------------------------------------------------------

  # 3. compute model accuracy
  print("\nComputing accuracy (within 0.10 of true) ")
  acc_train = accuracy(gpr_model, train_X, train_y, 0.10)
  print("\nAccuracy on train data = %0.4f " % acc_train)
  acc_test = accuracy(gpr_model, test_X, test_y, 0.10)
  print("Accuracy on test data = %0.4f " % acc_test)

  # 4. use model to predict 
  x = np.array([[-0.5, 0.5, -0.5, 0.5, -0.5]],
    dtype=np.float64)
  print("\nPredicting for x = ")
  print(x)
  (y_pred, std) = gpr_model.predict(x, return_std=True)
  print("\nPredicted y = %0.4f " % y_pred)
  print("std = %0.4f " % std)

  # 5. save model
  print("\nSaving trained GPR model ")
  fn = ".\\Models\\gpr_model.pkl"
  with open(fn,'wb') as f:
    pickle.dump(gpr_model, f)

  # load and use model
  # path = ".\\Models\\gpr_model.pkl"
  # with open(path, 'rb') as f:
  #   loaded_model = pickle.load(f)
  # X = (set values for X)
  # y_pred = loaded_model.predict(X)

  print("\nEnd GPR prediction ")

# -----------------------------------------------------------

if __name__ == "__main__":
  main()
