Source code for pymanopt.optimizers.conjugate_gradient

import time
from copy import deepcopy

import numpy as np

from pymanopt.optimizers.line_search import AdaptiveLineSearcher
from pymanopt.optimizers.optimizer import Optimizer, OptimizerResult
from pymanopt.tools import printer


def _beta_fletcher_reeves(
    *,
    manifold,
    x,
    newx,
    grad,
    newgrad,
    Pnewgrad,
    newgradPnewgrad,
    Pgrad,
    gradPgrad,
    gradient_norm,
    oldgrad,
    descent_direction,
):
    return newgradPnewgrad / gradPgrad


def _beta_polak_ribiere(
    *,
    manifold,
    x,
    newx,
    grad,
    newgrad,
    Pnewgrad,
    newgradPnewgrad,
    Pgrad,
    gradPgrad,
    gradient_norm,
    oldgrad,
    descent_direction,
):
    ip_diff = manifold.inner_product(newx, Pnewgrad, newgrad - oldgrad)
    return max(0, ip_diff / gradPgrad)


def _beta_hestenes_stiefel(
    *,
    manifold,
    x,
    newx,
    grad,
    newgrad,
    Pnewgrad,
    newgradPnewgrad,
    Pgrad,
    gradPgrad,
    gradient_norm,
    oldgrad,
    descent_direction,
):
    diff = newgrad - oldgrad
    try:
        beta = max(
            0,
            manifold.inner_product(newx, Pnewgrad, diff)
            / manifold.inner_product(newx, diff, descent_direction),
        )
    except ZeroDivisionError:
        beta = 1
    return beta


def _beta_hager_zhang(
    *,
    manifold,
    x,
    newx,
    grad,
    newgrad,
    Pnewgrad,
    newgradPnewgrad,
    Pgrad,
    gradPgrad,
    gradient_norm,
    oldgrad,
    descent_direction,
):
    diff = newgrad - oldgrad
    Poldgrad = manifold.transport(x, newx, Pgrad)
    Pdiff = Pnewgrad - Poldgrad
    denominator = manifold.inner_product(newx, diff, descent_direction)
    numerator = manifold.inner_product(newx, diff, Pnewgrad)
    numerator -= (
        2
        * manifold.inner_product(newx, diff, Pdiff)
        * manifold.inner_product(newx, descent_direction, newgrad)
        / denominator
    )
    beta = numerator / denominator
    descent_direction_norm = manifold.norm(newx, descent_direction)
    eta_HZ = -1 / (descent_direction_norm * min(0.01, gradient_norm))
    return max(beta, eta_HZ)


def _beta_liu_storey(
    *,
    manifold,
    x,
    newx,
    grad,
    newgrad,
    Pnewgrad,
    newgradPnewgrad,
    Pgrad,
    gradPgrad,
    gradient_norm,
    oldgrad,
    descent_direction,
):
    diff = newgrad - oldgrad
    ip_diff = manifold.inner_product(newx, Pnewgrad, diff)
    denominator = -manifold.inner_product(x, grad, descent_direction)
    beta_ls = ip_diff / denominator
    beta_cd = newgradPnewgrad / denominator
    return max(0, min(beta_ls, beta_cd))


BETA_RULES = {
    "FletcherReeves": _beta_fletcher_reeves,
    "HagerZhang": _beta_hager_zhang,
    "HestenesStiefel": _beta_hestenes_stiefel,
    "PolakRibiere": _beta_polak_ribiere,
    "LiuStorey": _beta_liu_storey,
}


[docs]class ConjugateGradient(Optimizer): """Riemannian conjugate gradient method. Perform optimization using nonlinear conjugate gradient method with line_searcher. This method first computes the gradient of the cost function, and then optimizes by moving in a direction that is conjugate to all previous search directions. Args: beta_rule: Conjugate gradient beta rule used to construct the new search direction. Valid choices are ``{"FletcherReeves", "PolakRibiere", "HestenesStiefel", "HagerZhang"}``. orth_value: Parameter for Powell's restart strategy. An infinite value disables this strategy. line_searcher: The line search method. Notes: See [HZ2006]_ for details about Powell's restart strategy. """ def __init__( self, beta_rule: str = "HestenesStiefel", orth_value=np.inf, line_searcher=None, *args, **kwargs, ): super().__init__(*args, **kwargs) try: self._beta_update = BETA_RULES[beta_rule] except KeyError: raise ValueError( f"Invalid beta rule '{beta_rule}'. Should be one of " f"{list(BETA_RULES.keys())}." ) self._beta_rule = beta_rule self._orth_value = orth_value if line_searcher is None: self._line_searcher = AdaptiveLineSearcher() else: self._line_searcher = line_searcher self.line_searcher = None
[docs] def run( self, problem, *, initial_point=None, reuse_line_searcher=False ) -> OptimizerResult: """Run CG method. Args: problem: Pymanopt problem class instance exposing the cost function and the manifold to optimize over. The class must either initial_point: Initial point on the manifold. If no value is provided then a starting point will be randomly generated. reuse_line_searcher: Whether to reuse the previous line searcher. Allows to use information from a previous call to :meth:`run`. Returns: Local minimum of the cost function, or the most recent iterate if algorithm terminated before convergence. """ manifold = problem.manifold objective = problem.cost gradient = problem.riemannian_gradient if not reuse_line_searcher or self.line_searcher is None: self.line_searcher = deepcopy(self._line_searcher) line_searcher = self.line_searcher # If no starting point is specified, generate one at random. if initial_point is None: x = manifold.random_point() else: x = initial_point if self._verbosity >= 1: print("Optimizing...") if self._verbosity >= 2: iteration_format_length = int(np.log10(self._max_iterations)) + 1 column_printer = printer.ColumnPrinter( columns=[ ("Iteration", f"{iteration_format_length}d"), ("Cost", "+.16e"), ("Gradient norm", ".8e"), ] ) else: column_printer = printer.VoidPrinter() column_printer.print_header() # Calculate initial cost-related quantities. cost = objective(x) grad = gradient(x) gradient_norm = manifold.norm(x, grad) Pgrad = problem.preconditioner(x, grad) gradPgrad = manifold.inner_product(x, grad, Pgrad) # Initial descent direction is the negative gradient. descent_direction = -Pgrad self._initialize_log( optimizer_parameters={ "beta_rule": self._beta_rule, "orth_value": self._orth_value, "line_searcher": line_searcher, }, ) # Initialize iteration counter and timer. iteration = 0 step_size = np.nan start_time = time.time() while True: iteration += 1 column_printer.print_row([iteration, cost, gradient_norm]) self._add_log_entry( iteration=iteration, point=x, cost=cost, gradient_norm=gradient_norm, ) stopping_criterion = self._check_stopping_criterion( start_time=start_time, gradient_norm=gradient_norm, iteration=iteration, step_size=step_size, ) if stopping_criterion: if self._verbosity >= 1: print(stopping_criterion) print("") break # The line search algorithms require the directional derivative of # the cost at the current point x along the search direction. df0 = manifold.inner_product(x, grad, descent_direction) # If we didn't get a descent direction: restart, i.e., switch to # the negative gradient. Equivalent to resetting the CG direction # to a steepest descent step, which discards the past information. if df0 >= 0: # Or we switch to the negative gradient direction. if self._verbosity >= 3: print( "Conjugate gradient info: got an ascent direction " f"(df0 = {df0:.2f}), reset to the (preconditioned) " "steepest descent direction." ) # Reset to negative gradient: this discards the CG memory. descent_direction = -Pgrad df0 = -gradPgrad # Execute line search step_size, newx = line_searcher.search( objective, manifold, x, descent_direction, cost, df0 ) # Compute the new cost-related quantities for newx newcost = objective(newx) newgrad = gradient(newx) newgradient_norm = manifold.norm(newx, newgrad) Pnewgrad = problem.preconditioner(newx, newgrad) newgradPnewgrad = manifold.inner_product(newx, newgrad, Pnewgrad) # Powell's restart strategy. oldgrad = manifold.transport(x, newx, grad) orth_grads = ( manifold.inner_product(newx, oldgrad, Pnewgrad) / newgradPnewgrad ) if abs(orth_grads) >= self._orth_value: beta = 0 descent_direction = -Pnewgrad else: # Transport latest search direction to tangent space at new # estimate. descent_direction = manifold.transport( x, newx, descent_direction ) beta = self._beta_update( manifold=manifold, x=x, newx=newx, grad=grad, newgrad=newgrad, Pnewgrad=Pnewgrad, newgradPnewgrad=newgradPnewgrad, Pgrad=Pgrad, gradPgrad=gradPgrad, gradient_norm=gradient_norm, oldgrad=oldgrad, descent_direction=descent_direction, ) descent_direction = -Pnewgrad + beta * descent_direction # Update the necessary variables for the next iteration. x = newx cost = newcost grad = newgrad Pgrad = Pnewgrad gradient_norm = newgradient_norm gradPgrad = newgradPnewgrad return self._return_result( start_time=start_time, point=x, cost=cost, iterations=iteration, stopping_criterion=stopping_criterion, cost_evaluations=iteration, step_size=step_size, gradient_norm=gradient_norm, )