Source code for pymanopt.manifolds.psd

import numpy as np
import scipy.linalg

from pymanopt.manifolds.manifold import Manifold, RetrAsExpMixin


class _PSDFixedRank(Manifold):
    def __init__(self, n, k, name, dimension):
        self._n = n
        self._k = k
        super().__init__(name, dimension)

    @property
    def typical_dist(self):
        return 10 + self._k

    def inner_product(self, point, tangent_vector_a, tangent_vector_b):
        return np.tensordot(
            tangent_vector_a.conj(),
            tangent_vector_b,
            axes=tangent_vector_a.ndim,
        ).real

    def dist(self, point_a, point_b):
        return self.norm(point_a, self.log(point_a, point_b))

    def norm(self, point, tangent_vector):
        return np.linalg.norm(tangent_vector)

    def projection(self, point, vector):
        YtY = point.T.conj() @ point
        AS = point.T.conj() @ vector - vector.T.conj() @ point
        Omega = scipy.linalg.solve_continuous_lyapunov(YtY, AS)
        return vector - point @ Omega

    to_tangent_space = projection

    def euclidean_to_riemannian_gradient(self, point, euclidean_gradient):
        return euclidean_gradient

    def euclidean_to_riemannian_hessian(
        self, point, euclidean_gradient, euclidean_hessian, tangent_vector
    ):
        return self.projection(point, euclidean_hessian)

    def exp(self, point, tangent_vector):
        return point + tangent_vector

    retraction = exp

    def log(self, point_a, point_b):
        u, _, vh = np.linalg.svd(point_b.T.conj() @ point_a)
        return point_b @ u @ vh - point_a

    def random_point(self):
        return np.random.normal(size=(self._n, self._k))

    def random_tangent_vector(self, point):
        random_vector = self.random_point()
        tangent_vector = self.projection(point, random_vector)
        return tangent_vector / self.norm(point, tangent_vector)

    def transport(self, point_a, point_b, tangent_vector_a):
        return self.projection(point_b, tangent_vector_a)

    def _normalize(self, array):
        return array / np.linalg.norm(array)

    def zero_vector(self, point):
        return np.zeros((self._n, self._k))


[docs]class PSDFixedRank(_PSDFixedRank):
    r"""Manifold of fixed-rank positive semidefinite (PSD) matrices.

    Args:
        n: Number of rows and columns of a point in the ambient space.
        k: Rank of matrices in the ambient space.

    Note:
        A point :math:`\vmX` on the manifold is parameterized as :math:`\vmX =
        \vmY\transp{\vmY}` where :math:`\vmY` is a real matrix of size ``n x
        k`` and rank ``k``.
        As such, :math:`\vmX` is symmetric, positive semidefinite with rank
        ``k``.

        Tangent vectors :math:`\dot{\vmY}` are represented as matrices of the
        same size as points on the manifold so that tangent vectors in the
        ambient space :math:`\R^{n \times n}` correspond to :math:`\dot{\vmX} =
        \vmY\transp{\dot{\vmY}} + \dot{\vmY}\transp{\vmY}`.
        The metric is the canonical Euclidean metric on :math:`\R^{n \times
        k}`.

        Since for any orthogonal matrix :math:`\vmQ` of size :math:`k \times k`
        it holds that :math:`\vmY\vmQ\transp{(\vmY\vmQ)} = \vmY\transp{\vmY}`,
        we identify all matrices of the form :math:`\vmY\vmQ` with an
        equivalence class.
        This set of equivalence classes then forms a Riemannian quotient
        manifold which is implemented here.

        Notice that this manifold is not complete: if optimization leads points
        to be rank-deficient, the geometry will break down.
        Hence, this geometry should only be used if it is expected that the
        points of interest will have rank exactly ``k``.
        Reduce ``k`` if that is not the case.

        The quotient geometry implemented here is the simplest case presented
        in [JBA+2010]_.
    """

    def __init__(self, n: int, k: int):
        name = f"Quotient manifold of {n}x{n} psd matrices of rank {k}"
        dimension = int(k * n - k * (k - 1) / 2)
        super().__init__(n, k, name, dimension)


[docs]class PSDFixedRankComplex(_PSDFixedRank):
    r"""Manifold of fixed-rank Hermitian positive semidefinite (PSD) matrices.

    Args:
        n: Number of rows and columns of a point in the ambient space.
        k: Rank of matrices in the ambient space.

    Note:
        A point :math:`\vmX` on the manifold is parameterized as :math:`\vmX =
        \vmY\conj{\vmY}`, where :math:`\vmY` is a complex matrix of size ``n x
        k`` and rank ``k``.

        Tangent vectors are represented as matrices of the same shape as points
        on the manifold.

        For any point :math:`\vmY` on the manifold, given any complex unitary
        matrix :math:`\vmU \in \C^{k \times k}`, we say
        :math:`\vmY\vmU` is equivalent to :math:`\vmY` since :math:`\vmY`
        and :math:`\vmY\vmU` are indistinguishable in the ambient space
        :math:`\C^{n \times n}`, i.e., :math:`\vmX = \vmY\vmU\conj{(\vmY\vmU)} =
        \vmY\conj{\vmY}`.
        Therefore, the set of equivalence classes forms a Riemannian
        quotient manifold :math:`\C^{n \times k} / \U(k)` where :math:`\U(k)`
        denotes the unitary group.
        The metric is the usual real trace inner product.

        Notice that this manifold is not complete: if optimization leads points
        to be rank-deficient, the geometry will break down.
        Hence, this geometry should only be used if it is expected that the
        points of interest will have rank exactly ``k``.
        Reduce ``k`` if that is not the case.

        The implementation follows the quotient geometry originally described
        in [Yat2013]_.
    """

    def __init__(self, n, k):
        name = f"Quotient manifold of Hermitian {n}x{n} matrices of rank {k}"
        dimension = 2 * k * n - k * k
        super().__init__(n, k, name, dimension)

[docs]    def random_point(self):
        return np.random.normal(
            size=(self._n, self._k)
        ) + 1j * np.random.normal(size=(self._n, self._k))


[docs]class Elliptope(Manifold, RetrAsExpMixin):
    r"""Manifold of fixed-rank PSD matrices with unit diagonal elements.

    Args:
        n: Number of rows and columns of a point in the ambient space.
        k: Rank of matrices in the ambient space.

    Note:
        A point :math:`\vmX` on the manifold is parameterized as :math:`\vmX =
        \vmY\transp{\vmY}` where :math:`\vmY` is a matrix of size ``n x k`` and
        rank ``k``.
        As such, :math:`\vmX` is symmetric, positive semidefinite with rank
        ``k``.

        Tangent vectors are represented as matrices of the same size as points
        on the manifold so that tangent vectors in the ambient space are of the
        form :math:`\dot{\vmX} = \vmY \transp{\dot{\vmY}} +
        \dot{\vmY}\transp{\vmY}` and :math:`\dot{X}_{ii} = 0`.
        The metric is the canonical Euclidean metric on :math:`\R^{n \times
        k}`.

        The diagonal constraints on :math:`X_{ii} = 1` translate to unit-norm
        constraints on the rows of :math:`\vmY`: :math:`\norm{\vmy_i} = 1`
        where :math:`\vmy_i` denotes the i-th column of :math:`\transp{\vmY}`.
        Without any further restrictions, this coincides with the oblique
        manifold (see :class:`pymanopt.manifolds.oblique.Oblique`).
        However, since for any orthogonal matrix :math:`\vmQ` of size ``k``, it
        holds that :math:`\vmY\vmQ\transp{(\vmY\vmQ)} = \vmY\transp{\vmY}`, we
        "group" all matrices of the form :math:`\vmY\vmQ` in an equivalence
        class.
        This set of equivalence classes is a Riemannian quotient manifold that
        is implemented here.

        Note that this geometry formally breaks down at rank-deficient points.
        This does not appear to be a major issue in practice when optimization
        algorithms converge to rank-deficient points, but convergence theorems
        no longer hold.
        As an alternative, you may try using the oblique manifold since it does
        not break down at rank drop.

        The geometry is taken from [JBA+2010]_.
    """

    def __init__(self, n, k):
        self._n = n
        self._k = k

        name = (
            f"Quotient manifold of {n}x{n} psd matrices of rank {k} "
            "with unit diagonal elements"
        )
        dimension = int(n * (k - 1) - k * (k - 1) / 2)
        super().__init__(name, dimension)

    @property
    def typical_dist(self):
        return 10 * self._k

[docs]    def inner_product(self, point, tangent_vector_a, tangent_vector_b):
        return np.tensordot(
            tangent_vector_a, tangent_vector_b, axes=tangent_vector_a.ndim
        )

[docs]    def norm(self, point, tangent_vector):
        return np.sqrt(
            self.inner_product(point, tangent_vector, tangent_vector)
        )

[docs]    def projection(self, point, vector):
        eta = self._project_rows(point, vector)
        YtY = point.T @ point
        AS = point.T @ eta - vector.T @ point
        Omega = scipy.linalg.solve_continuous_lyapunov(YtY, -AS)
        return eta - point @ (Omega - Omega.T) / 2

    to_tangent_space = projection

[docs]    def retraction(self, point, tangent_vector):
        return self._normalize_rows(point + tangent_vector)

[docs]    def euclidean_to_riemannian_gradient(self, point, euclidean_gradient):
        return self._project_rows(point, euclidean_gradient)

[docs]    def euclidean_to_riemannian_hessian(
        self, point, euclidean_gradient, euclidean_hessian, tangent_vector
    ):
        scaling_grad = (euclidean_gradient * point).sum(axis=1)
        hess = euclidean_hessian - tangent_vector * scaling_grad[:, np.newaxis]
        scaling_hess = (
            tangent_vector * euclidean_gradient + point * euclidean_hessian
        ).sum(axis=1)
        hess -= point * scaling_hess[:, np.newaxis]
        return self.projection(point, hess)

[docs]    def random_point(self):
        return self._normalize_rows(np.random.normal(size=(self._n, self._k)))

[docs]    def random_tangent_vector(self, point):
        tangent_vector = self.projection(point, self.random_point())
        return tangent_vector / self.norm(point, tangent_vector)

[docs]    def transport(self, point_a, point_b, tangent_vector_a):
        return self.projection(point_b, tangent_vector_a)

    def _normalize_rows(self, array):
        return array / np.linalg.norm(array, axis=1)[:, np.newaxis]

    def _project_rows(self, point, vector):
        inner_products = (point * vector).sum(axis=1)
        return vector - point * inner_products[:, np.newaxis]

[docs]    def zero_vector(self, point):
        return np.zeros((self._n, self._k))