#! /usr/bin/env python3
#
def gradient_descent_test ( ):

#*****************************************************************************80
#
## gradient_descent_test() tests gradient_descent().
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    27 July 2022
#
#  Author:
#
#    John Burkardt.
#
  import platform

  print ( '' )
  print ( 'gradient_descent_test():' )
  print ( '  Python version: ' + platform.python_version ( ) )
  print ( '  gradient_descent() seeks the minimizer of a function' )
  print ( '  using the method of gradient descent.' )

  gradient_descent_linear_test ( )
  gradient_descent_nonlinear_test ( )
  gradient_descent_vector_x_test ( )
  gradient_descent_vector_f_test ( )
#
#  Terminate.
#
  print ( '' )
  print ( 'gradient_descent_test():' )
  print ( '  Normal end of execution.' )

  return

def gradient_descent_linear ( A, b, alpha, tol, it_max ):

#*****************************************************************************80
#
## gradient_descent_linear() solves a least squares problem using gradient descent.
#
#  Discusssion:
#
#    We are given an MxN matrix A and a right hand side M-vector B.
#    We seek an N-vector X which minimizes the L2 norm |A*X-B|.
#    We apply gradient descent to the function f(x) = 1/2 ||A*x-b||^2
#    with derivative f'(x) = A'*x
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    27 July 2022
#
#  Author:
#
#    Original R code by James Howard
#    MATLAB version by John Burkardt.
#
#  Reference:
#
#    James Howard,
#    Computational Methods for Numerical Analysis with R,
#    CRC Press, 2017,
#    ISBN13: 978-1-4987-2363-3.
#
#  Input:
#
#    real A(M,N): the matrix.
#
#    real B(M): the right hand side.
#
#    real ALPHA: the stepsize.
#
#    real TOL: the step tolerance.
#
#    integer IT_MAX: the maximum number of steps.
#
#  Output:
#
#    real X(N): the approximate least squares solution.
#
#    integer IT: the number of steps taken.
#
  import numpy as np

  m, n = A.shape
  x = np.zeros ( n )

  for it in range ( 0, it_max ):

    e = np.dot ( A, x ) - b
    d = np.dot ( np.transpose ( A ), e )
    xold = x
    x = x - alpha * d

    if ( np.linalg.norm ( x - xold ) < tol ):
      break

  return x, it

def gradient_descent_linear_test ( ):

#*****************************************************************************80
#
## gradient_descent_linear_test() tests gradient_descent_linear().
#
#  Discussion:
#
#    This code suggests how slowly a simple gradient descent method will
#    converge to the solution of a linear least squares problem.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    25 July 2022
#
#  Author:
#
#    John Burkardt.
#
  import numpy as np

  print ( '' )
  print ( 'gradient_descent_linear_test():' )
  print ( '  gradient_descent_linear() approximates the solution of a linear' )
  print ( '  problem A*x=b, by minimizing ||Ax-b|| using gradient descent.' )

  A = np.array ( [ \
   [ 1.47, 1.0 ], \
   [ 1.50, 1.0 ], \
   [ 1.52, 1.0 ], \
   [ 1.55, 1.0 ], \
   [ 1.57, 1.0 ], \
   [ 1.60, 1.0 ], \
   [ 1.63, 1.0 ], \
   [ 1.65, 1.0 ], \
   [ 1.68, 1.0 ], \
   [ 1.70, 1.0 ], \
   [ 1.73, 1.0 ], \
   [ 1.75, 1.0 ], \
   [ 1.78, 1.0 ], \
   [ 1.80, 1.0 ], \
   [ 1.83, 1.0 ] ] )

  m, n = A.shape

  b = np.array ( [ \
    52.21, \
    53.12, \
    54.48, \
    55.84, \
    57.20, \
    58.57, \
    59.93, \
    61.29, \
    63.11, \
    64.47, \
    66.28, \
    68.10, \
    69.92, \
    72.19, \
    74.46 ] )

  alpha = 0.02
  tol = 1.0e-6
  it_max = 10000

  print ( '' )
  print ( '  Learning rate = ', alpha )
  print ( '  Stepsize tolerance = ', tol )
  print ( '  Maximum iterations = ', it_max )

  x_exact = np.array ( [ 61.272, -39.062 ] )

  x, it = gradient_descent_linear ( A, b, alpha, tol, it_max )

  print ( '' )
  print ( '  Number of iterations = ', it )
  print ( '  Estimated solution:' )
  print ( x )
  print ( '  Exact solution:' )
  print ( x_exact )
  print ( '' )
  print ( '  ||x-x_exact||   = ', np.linalg.norm ( x - x_exact ) )
  print ( '  ||A*x-b||       = ', \
    np.linalg.norm ( b - np.dot ( A, x ) ) )
  print ( '  ||A*x_exact-b|| = ', \
    np.linalg.norm ( b - np.dot ( A, x_exact ) ) )

  return

def gradient_descent_nonlinear ( f, df, x, it_max, eta ):

#*****************************************************************************80
#
## gradient_descent_nonlinear(): gradient descent for scalar nonlinear f(x).
#
#  Discusssion:
#
#    We are given a scalar nonlinear function f(x) and want to estimate
#    the location of a local minimizer.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    27 July 2022
#
#  Author:
#
#    John Burkardt.
#
#  Input:
#
#    real value = F(X): evaluates the function.
#
#    real value = DF(X): evaluates the derivative of the function.
#
#    real X: the initial guess for the minimizer.
#
#    integer IT_MAX: the maximum number of steps.
#
#    real ETA: the learning rate.
#
#  Output:
#
#    real X: the estimated minimizer.
#
#    integer IT: the number of steps taken.
#
  print_out = True

  if ( print_out ):
    print ( '' )
    print ( '   it      x               f(x)         f\'(x)' )
    print ( '' )

  for it in range ( 0, it_max + 1 ):

    if ( 0 < it ):
      xold = x
      x = x - eta * df ( x ) * f ( x )

    if ( print_out ):
      print ( '  %3d  %12.8g  %12.8g  %12.8g' % ( it, x, f ( x ), df ( x ) ) )

  return x, it

def gradient_descent_nonlinear_test ( ):

#*****************************************************************************80
#
## gradient_descent_nonlinear_test() tests gradient descent for nonlinear f.
#
#  Discussion:
#
#    The quartic function is
#    f(x) = 2.0 * x ^ 4 - 4.0 * x ^ 2 + x + 20.0
#
#    We seek a minimizer of f(x) in the range -2 <= x <= 2.
#
#    The gradient_descent0() method uses a very simple version of
#    the algorithm.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    27 July 2022
#
#  Author:
#
#    John Burkardt
#
  from numpy.random import default_rng
  import matplotlib.pyplot as plt
  import numpy as np

  a = -2.0
  b = +2.0

  print ( '' )
  print ( 'gradient_descent_nonlinear_test():' )
  print ( '  Seek local minimizer of a scalar function quartic(x)' )
  print ( '  Minimizer is probably in the interval [', a, ',', b, ']' )
  print ( '  Use a very simple version of the gradient descent method.' )
#
#  Plot the function.
#
  plt.clf ( )
  xvec = np.linspace ( a, b, 101 )
  fxvec = quartic ( xvec )
  plt.plot ( xvec, fxvec, linewidth = 3 )
  plt.plot ( [ a, b ], [ 0, 0 ], 'k', linewidth = 3 )
  plt.grid ( True )
  plt.xlabel ( '<-- X -->' )
  plt.ylabel ( '<-- F(X) -->' )
  plt.title ( 'Quartic function' )
  filename = 'quartic.png'
  plt.savefig ( filename )
  print ( '  Graphics saved as "' + filename + '"' )
#
#  Choose a random initial guess, or choose x = -1.5.
#
  rng = default_rng()
  x0 = a + ( b - a ) * rng.random ( )
  x0 = - 1.5
#
#  Maximum number of iterations.
#
  itmax = 40
#
#  Choose a small learning rate.
#
  eta = 0.005
#
#  Seek minimizer.
#
  x, it = gradient_descent_nonlinear ( quartic, quartic_df, x0, itmax, eta )

  print ( '' )
  print ( '  ', it, ' gradient descent steps were taken.' )
  print ( '  Initial x = ', x0, \
          '  f(x) = ', quartic ( x0 ), \
          '  f\'(x) = ', quartic_df(x0) )
  print ( '  Final x = ', x, \
          '  f(x) = ', quartic ( x ), \
          '  f\'(x) = ', quartic_df(x) )
#
#  Plot the function with initial point and local minimizer.
#
  plt.clf ( )
  xvec = np.linspace ( a, b, 101 )
  fxvec = quartic ( xvec )
  plt.plot ( xvec, fxvec, linewidth = 3 )
  plt.plot ( [ x0, x0 ], [ 0, quartic(x0) ], 'b', linewidth = 3 )
  plt.plot ( x0,  0.0, 'bo', markersize = 10 )
  plt.plot ( x0,  quartic ( x0 ), 'bo', markersize = 10 )
  plt.plot ( [ x, x ], [ 0, quartic(x) ], 'r', linewidth = 3 )
  plt.plot ( x,  0.0, 'ro', markersize = 10 )
  plt.plot ( x,  quartic ( x ), 'ro', markersize = 10 )
  plt.plot ( [ a, b ], [ 0, 0 ], 'k', linewidth = 3 )
  plt.grid ( True )
  plt.xlabel ( '<-- X -->' )
  plt.ylabel ( '<-- F(X) -->' )
  plt.title ( 'Initial point blue, minimizer red' )
  filename = 'quartic_minimizer.png'
  plt.savefig ( filename )
  print ( '  Graphics saved as "' + filename + '"' )

  return

def quartic ( x ):

#*****************************************************************************80
#
## quartic() evaluates a quartic function.
#
#  Discussion:
#
#    The quartic function is
#    f(x) = 2.0 * x ^ 4 - 4.0 * x ^ 2 + x + 20.0
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    27 July 2022
#
#  Author:
#
#    John Burkardt
#
#  Input:
#
#    real X: the evaluation point.
#
#  Output:
#
#    real VALUE: the function value.
#
  value = 2.0 * x**4 - 4.0 * x**2 + x + 20.0

  return value

def quartic_df ( x ):

#*****************************************************************************80
#
## quartic_df() evaluates the derivative of a quartic function.
#
#  Discussion:
#
#    The quartic function is
#    f(x) = 2.0 * x ^ 4 - 4.0 * x ^ 2 + x + 20.0
#
#    The quartic function is
#    f'(x) = 8.0 * x ^ 3 - 8.0 * x + 1
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    27 July 2022
#
#  Author:
#
#    John Burkardt
#
#  Input:
#
#    real X: the evaluation point.
#
#  Output:
#
#    real VALUE: the derivative value.
#
  value = 8.0 * x**3 - 8.0 * x + 1.0

  return value

def gradient_descent_vector_x ( f, df, x, it_max, alpha ):

#*****************************************************************************80
#
## gradient_descent_vector_x(): f(x) is scalar, x is a vector.
#
#  Discusssion:
#
#    We are given a scalar nonlinear function f(x) of a vector argument x,
#    and want to estimate the location of a local minimizer.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    28 July 2022
#
#  Author:
#
#    John Burkardt.
#
#  Input:
#
#    real value = F(X): evaluates the function.
#
#    real value = DF(X): evaluates the derivative of the function.
#
#    real X(N): the initial guess for the minimizer.
#
#    integer IT_MAX: the maximum number of steps.
#
#    real ALPHA: the learning rate.
#
#  Output:
#
#    real X(N): the estimated minimizer.
#
  import numpy as np

  print_out = False

  if ( print_out ):
    print ( '' )
    print ( '   it      ||x||               f(x)         ||f\'(x)||' )
    print ( '' )

  for it in range ( 0, it_max + 1 ):

    if ( 0 < it ):
      x = x - alpha * df ( x ) * f ( x )

    if ( print_out ):
      print ( '  %3d  %12.8g  %12.8g  %12.8g' \
        % ( it, np.linalg.norm ( x ), f ( x ), np.linalg.norm ( df ( x ) ) ) )

  return x

def gradient_descent_vector_x_test ( ):

#*****************************************************************************80
#
## gradient_descent_vector_x_test() uses gradient descent on a vector problem.
#
#  Discussion:
#
#    We apply gradient descent to minimize the L2 norm of a scalar function 
#    f(x) of a vector argument x.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    28 July 2022
#
#  Author:
#
#    John Burkardt
#
  import numpy as np

  print ( '' )
  print ( 'gradient_descent_vector_x_test():' )
  print ( '  Seek minimizer of a function z(x,y).' )
#
#  Choose an initial guess.
# 
  x0 = np.array ( [ 1.0, 1.5 ] )
#
#  Choose a small learning rate.
#
  alpha = 0.15
#
#  Maximum number of iterations
#
  it_max = 10000

  x = gradient_descent_vector_x ( hex2, hex2_df, x0, it_max, alpha )

  print ( '' )
  print ( '  Initial x,y = ', x0, \
          '  f(x,y) = ', hex2 ( x0 ), \
          '  f\'(x,y) = ', hex2_df ( x0 ) )

  print ( '  Final x,y = ', x, \
          '  f(x,y) = ', hex2 ( x ), \
          '  f\'(x,y) = ', hex2_df ( x ) )

  return

def hex2 ( xy ):

#*****************************************************************************80
#
## hex2 evaluates the hex2 function.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    28 July 2022
#
#  Input:
#
#    real xy(2): the x and y arguments.
#
#  Output:
#
#    real value: the function value.
#
  x = xy[0]
  y = xy[1]
  value = 2.0 * x**2 - 1.05 * x**4 + x**6 / 6.0 + x * y + y**2

  return value

def hex2_df ( xy ):

#*****************************************************************************80
#
## hex2_df evaluates the hex2 gradient.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    28 July 2022
#
#  Input:
#
#    real xy(2): the x and y arguments.
#
#  Output:
#
#    real value(2): the gradient vector.
#
  import numpy as np

  x = xy[0]
  y = xy[1]

  value = np.array ( [ \
    4.0 * x - 4.2 * x**3 + x**5 + y,
    x + 2.0 * y ] )

  return value

def gradient_descent_vector_f ( f, df, x, it_max, alpha ):

#*****************************************************************************80
#
## gradient_descent_vector_f(): f(x) is vector, x is a vector.
#
#  Discusssion:
#
#    We are given a nonlinear vector function f(x) of a vector argument x,
#    and want to estimate the location of a local minimizer.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    29 July 2022
#
#  Author:
#
#    John Burkardt.
#
#  Input:
#
#    real value = F(X): evaluates the function.
#
#    real value = DF(X): evaluates the derivative of the function.
#
#    real X(N): the initial guess for the minimizer.
#
#    integer IT_MAX: the maximum number of steps.
#
#    real ALPHA: the learning rate.
#
#  Output:
#
#    real X(N): the estimated minimizer.
#
  import numpy as np

  print_out = True

  if ( print_out ):
    print ( '' )
    print ( '   it      ||x||             ||f(x)||       ||J(x)||' )
    print ( '' )

  for it in range ( 0, it_max + 1 ):

    if ( 0 < it ):
      x = x - alpha * np.matmul ( df ( x ), f ( x ) )

    if ( print_out ):
      print ( '  %3d  %12.8g  %12.8g  %12.8g' % ( \
        it, \
        np.linalg.norm ( x ), \
        0.5 * ( np.linalg.norm ( f ( x ) ) )**2, \
        np.linalg.norm ( df ( x ), 'fro' ) ) )

  return x

def gradient_descent_vector_f_test ( ):

#*****************************************************************************80
#
## gradient_descent_vector_f_test() uses gradient descent on a vector problem.
#
#  Discussion:
#
#    We apply gradient descent to minimize the L2 norm of a vector function 
#    f(x) of a vector argument x.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    29 July 2022
#
#  Author:
#
#    John Burkardt
#
  import numpy as np

  print ( '' )
  print ( 'gradient_descent_vector_f_test():' )
  print ( '  Seek minimizer of vector function f(x).' )
#
#  Choose an initial guess.
# 
  x0 = np.array ( [ [ 0.0 ], [ 0.0 ], [ 0.0 ] ] )
#
#  Maximum number of iterations
#
  it_max = 83
#
#  Choose a small learning rate.
#
  alpha = 0.001

  x = gradient_descent_vector_f ( wiki3, wiki3_df, x0, it_max, alpha )

  print ( '' )
  print ( '  Initial x = (%g,%g,%g)' % ( x0[0], x0[1], x0[2] ) )
  temp = 0.5 * ( np.linalg.norm ( wiki3 ( x0 ) ) )**2
  print ( '  ||f(x)|| = %g' % ( temp ) )
  temp = np.linalg.norm ( wiki3_df ( x0 ), 'fro' )
  print ( '  ||J(x)|| = (%g)' % ( temp ) )
  print ( '' )
  print ( '  Final x = (%g,%g,%g)' % ( x[0], x[1], x[2] ) )
  temp = 0.5 * ( np.linalg.norm ( wiki3 ( x ) ) )**2
  print ( '  ||f(x)|| = %g' % ( temp ) )
  temp = np.linalg.norm ( wiki3_df ( x ), 'fro' )
  print ( '  ||J(x)|| = (%g)' % ( temp ) )

  return

def wiki3 ( xyz ):

#*****************************************************************************80
#
## wiki3() evaluates the wiki3 function.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    29 July 2022
#
#  Input:
#
#    real xyz(3): the arguments.
#
#  Output:
#
#    real value[3): the function value.
#
  import numpy as np

  x = xyz[0]
  y = xyz[1]
  z = xyz[2]

  value = np.zeros ( [ 3, 1 ] )

  value[0,0] = 3.0 * x - np.cos ( y * z ) - 3.0 / 2.0
  value[1,0] = 4.0 * x**2 - 625.0 * y**2 + 2.0 * y - 1.0
  value[2,0] = np.exp ( - x * y ) + 20.0 * z + ( 10.0 * np.pi - 3.0 ) / 3.0

  return value

def wiki3_df ( xyz ):

#*****************************************************************************80
#
## wiki3_df() evaluates the wiki3 jacobian.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    29 July 2022
#
#  Input:
#
#    real xyz(3): the arguments.
#
#  Output:
#
#    real value[3,3): the Jacobian matrix.
#
  import numpy as np

  x = xyz[0]
  y = xyz[1]
  z = xyz[2]

  value = np.zeros ( [ 3, 3 ] )

  value[0,0] = 3.0
  value[0,1] = np.sin ( y * z ) * z
  value[0,2] = np.sin ( y * z ) * y
  value[1,0] = 8.0 * x
  value[1,1] =  - 1250.0 * y + 2.0
  value[1,2] = 0.0
  value[2,0] = - y * np.exp ( - x * y )
  value[2,1] = - x * np.exp ( - x * y )
  value[2,2] = 20.0

  return value

def timestamp ( ):

#*****************************************************************************80
#
## timestamp() prints the date as a timestamp.
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    21 August 2019
#
#  Author:
#
#    John Burkardt
#
  import time

  t = time.time ( )
  print ( time.ctime ( t ) )

  return

if ( __name__ == '__main__' ):
  timestamp ( )
  gradient_descent_test ( )
  timestamp ( )