Source code for pymanopt.manifolds.fixed_rank

import collections

import numpy as np

from pymanopt.manifolds.manifold import RiemannianSubmanifold
from pymanopt.manifolds.stiefel import Stiefel
from pymanopt.tools import ndarraySequenceMixin, return_as_class_instance


[docs]class FixedRankEmbedded(RiemannianSubmanifold):
    r"""Manifold of fixed rank matrices.

    Args:
        m: The number of rows of matrices in the ambient space.
        n: The number of columns of matrices in the ambient space.
        k: The rank of matrices.

    The manifold of ``m x n`` real matrices of fixed rank ``k``.
    For efficiency purposes, points on the manifold are represented with a
    truncated singular value decomposition instead of full matrices of size ``m
    x n``.
    Specifically, a point is represented as a tuple ``(u, s, vt)`` of three
    arrays.
    The arrays ``u``, ``s`` and ``vt`` have shapes ``(m, k)``, ``(k,)`` and
    ``(k, n)``, respectively, and the rank ``k`` matrix which they represent
    can be recovered by the product ``u @ np.diag(s) @ vt``.

    Vectors ``Z`` in the ambient space are best represented as arrays of shape
    ``(m, n)``.
    If these are low-rank, they may also be represented as tuples of arrays
    ``(U, S, V)`` such that ``Z = U @ S @ V.T``.
    There are no restrictions on what ``U``, ``S`` and ``V`` are, as long as
    their product as indicated yields a real ``m x n`` matrix.

    Tangent vectors are represented as tuples of the form ``(Up, M, Vp)``.
    The matrices ``Up`` (of size ``m x k``) and ``Vp`` (of size ``n x k``) obey
    the conditions ``np.allclose(Up.T @ U, 0)`` and ``np.allclose(Vp.T @ V,
    0)``.
    The matrix ``M`` (of size ``k x k``) is arbitrary.
    Such a structure corresponds to the tangent vector ``Z = u @ M @ vt + Up @
    vt + u * Vp.T`` in the ambient space of ``m x n`` matrices at a point ``(u,
    s, vt)``.

    The chosen geometry yields a Riemannian submanifold of the embedding
    space :math:`\R^{m \times n}` equipped with the usual trace inner product.

    Note:
        * The implementation follows the embedded geometry described in
          [Van2013]_.
        * The class is currently not compatible with the
          :class:`pymanopt.optimizers.trust_regions.TrustRegions` optimizer.
        * Details on the implementation of
          :meth:`euclidean_to_riemannian_gradient` can be found at
          https://j-towns.github.io/papers/svd-derivative.pdf.
        * The second-order retraction follows results presented in [AM2012]_.
    """

    def __init__(self, m: int, n: int, k: int):
        self._m = m
        self._n = n
        self._k = k
        self._stiefel_m = Stiefel(m, k)
        self._stiefel_n = Stiefel(n, k)

        name = f"Embedded manifold of {m}x{n} matrices of rank {k}"
        dimension = (m + n - k) * k
        super().__init__(name, dimension, point_layout=3)

    @property
    def typical_dist(self):
        return self.dim

[docs]    def inner_product(self, point, tangent_vector_a, tangent_vector_b):
        return np.sum(
            [
                np.tensordot(a, b)
                for (a, b) in zip(tangent_vector_a, tangent_vector_b)
            ]
        )

    def _apply_ambient(self, vector, matrix):
        if isinstance(vector, (list, tuple)):
            return vector[0] @ vector[1] @ vector[2].T @ matrix
        return vector @ matrix

    def _apply_ambient_transpose(self, vector, matrix):
        if isinstance(vector, (list, tuple)):
            return vector[2] @ vector[1] @ vector[0].T @ matrix
        return vector.T @ matrix

[docs]    def projection(self, point, vector):
        """Project vector in the ambient space to the tangent space.

        Args:
            point: A point on the manifold.
            vector: A vector in the ambient space.

        Returns:
            A tangent vector parameterized as a ``(Up, M, Vp)``.

        Note:
            The argument ``vector`` must either be an array of shape ``(m, n)``
            in the ambient space, or else a tuple ``(U, S, V)`` where ``U @ S @
            V`` is in the ambient space (of low-rank matrices).
        """
        ZV = self._apply_ambient(vector, point[2].T)
        UtZV = point[0].T @ ZV
        ZtU = self._apply_ambient_transpose(vector, point[0])

        Up = ZV - point[0] @ UtZV
        M = UtZV
        Vp = ZtU - point[2].T @ UtZV.T

        return _FixedRankTangentVector(Up, M, Vp)

[docs]    def euclidean_to_riemannian_gradient(self, point, euclidean_gradient):
        u, s, vt = point
        du, ds, dvt = euclidean_gradient

        utdu = u.T @ du
        uutdu = u @ utdu
        Up = (du - uutdu) / s

        vtdv = vt @ dvt.T
        vvtdv = vt.T @ vtdv
        Vp = (dvt.T - vvtdv) / s

        identity = np.eye(self._k)
        f = 1 / (s[np.newaxis, :] ** 2 - s[:, np.newaxis] ** 2 + identity)

        M = (
            f * (utdu - utdu.T) * s
            + s[:, np.newaxis] * f * (vtdv - vtdv.T)
            + np.diag(ds)
        )

        return _FixedRankTangentVector(Up, M, Vp)

[docs]    def retraction(self, point, tangent_vector):
        u, s, vt = point
        du, ds, dvt = tangent_vector

        Qu, Ru = np.linalg.qr(du)
        Qv, Rv = np.linalg.qr(dvt)
        T = np.vstack(
            (
                np.hstack((np.diag(s) + ds, Rv.T)),
                np.hstack((Ru, np.zeros((self._k, self._k)))),
            )
        )
        # Numpy svd outputs St as a 1d vector, not a matrix.
        Ut, St, Vt = np.linalg.svd(T, full_matrices=False)

        U = np.hstack((u, Qu)) @ Ut[:, : self._k]
        S = St[: self._k] + np.spacing(1)
        V = np.hstack((vt.T, Qv)) @ Vt.T[:, : self._k]
        return _FixedRankPoint(U, S, V.T)

[docs]    def norm(self, point, tangent_vector):
        return np.sqrt(
            self.inner_product(point, tangent_vector, tangent_vector)
        )

[docs]    def random_point(self):
        u = self._stiefel_m.random_point()
        s = np.sort(np.random.uniform(size=self._k))[::-1]
        vt = self._stiefel_n.random_point().T
        return _FixedRankPoint(u, s, vt)

[docs]    def to_tangent_space(self, point, vector):
        u, _, vt = point
        Up = vector.Up - u @ u.T @ vector.Up
        Vp = vector.Vp - vt.T @ vt @ vector.Vp
        return _FixedRankTangentVector(Up, vector.M, Vp)

[docs]    def random_tangent_vector(self, point):
        Up = np.random.normal(size=(self._m, self._k))
        Vp = np.random.normal(size=(self._n, self._k))
        M = np.random.normal(size=(self._k, self._k))

        tangent_vector = self.to_tangent_space(
            point, _FixedRankTangentVector(Up, M, Vp)
        )
        return tangent_vector / self.norm(point, tangent_vector)

[docs]    def embedding(self, point, tangent_vector):
        u, _, vt = point
        U = np.hstack((u @ tangent_vector.M + tangent_vector.Up, u))
        S = np.eye(2 * self._k)
        V = np.hstack(([vt.T, tangent_vector.Vp]))
        return U, S, V

[docs]    def transport(self, point_a, point_b, tangent_vector_a):
        return self.projection(
            point_b, self.embedding(point_a, tangent_vector_a)
        )

[docs]    def zero_vector(self, point):
        return _FixedRankTangentVector(
            np.zeros((self._m, self._k)),
            np.zeros((self._k, self._k)),
            np.zeros((self._n, self._k)),
        )


class _ndarraySequence(ndarraySequenceMixin):
    @return_as_class_instance
    def __mul__(self, other):
        return [other * s for s in self]

    __rmul__ = __mul__

    @return_as_class_instance
    def __truediv__(self, other):
        return [val / other for val in self]

    @return_as_class_instance
    def __neg__(self):
        return [-val for val in self]


class _FixedRankPoint(
    _ndarraySequence,
    collections.namedtuple(
        "_FixedRankPointTuple", field_names=("u", "s", "vt")
    ),
):
    pass


class _FixedRankTangentVector(
    _ndarraySequence,
    collections.namedtuple(
        "_FixedRankTangentVectorTuple", field_names=("Up", "M", "Vp")
    ),
):
    @return_as_class_instance
    def __add__(self, other):
        return [s + o for (s, o) in zip(self, other)]

    @return_as_class_instance
    def __sub__(self, other):
        return [s - o for (s, o) in zip(self, other)]