Source code for mindspore_gl.utils.pca

# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
""" pca """
import math
import numpy as np

def large_mat_mul(input_a, input_b, batch=32):
    """
    Large Matrix Slicing Operations.
    """
    m, _ = input_a.shape
    block_m = math.floor(m/batch)
    out = []
    for i in range(batch):
        start = i*block_m
        end = (i+1)*block_m
        new_a = input_a[start:end]
        out_i = np.matmul(new_a, input_b)
        out.append(out_i)
    out = np.concatenate(out, axis=0)
    remain_a = input_a[batch*block_m:m]
    remain_o = np.matmul(remain_a, input_b)
    output = np.concatenate((out, remain_o), axis=0)
    return output

def mat_mul(input_a, input_b):
    """
    Refactored matmul operation.
    """
    m, _ = input_a.shape
    if m > 100000:
        out = large_mat_mul(input_a, input_b)
    else:
        out = np.matmul(input_a, input_b)
    return out

def get_approximate_basis(matrix: np.ndarray, k: int = 6, niter: int = 2):
    """
    Return tensor Q with k orthonormal columns \
    such that 'Q Q^H matrix` approximates `matrix`.
    """
    niter = 2 if niter is None else niter
    _, n = matrix.shape[-2:]
    r = np.random.randn(n, k)
    matrix_t = matrix.T

    q, _ = np.linalg.qr(mat_mul(matrix, r))
    for _ in range(niter):
        q = np.linalg.qr(mat_mul(matrix_t, q))[0]
        q = np.linalg.qr(mat_mul(matrix, q))[0]
    return q

[docs]def pca(matrix: np.ndarray, k: int = None, niter: int = 2, norm: bool = False): r""" Perform a linear principal component analysis (PCA) on the matrix, and will return the first k dimensionality-reduced features. Args: matrix(ndarray): Input features, shape is :math:`(B, F)`. k(int, optional): target dimension for dimensionality reduction. Default: None. niter(int, optional): the number of subspace iterations to conduct and it must be a nonnegative integer. Default: 2. norm(bool, optional): Whether the output is normalized. Default: False. Return: ndarray, Features after dimensionality reduction Raises: TypeError: If `k` or `niter` is not a positive int. TypeError: If `matrix` is not a ndarry. TypeError: If `norm` is not a bool. Supported Platforms: ``Ascend`` ``GPU`` Examples: >>> import numpy as np >>> from mindspore_gl.utils import pca >>> X = np.array([[-1, 1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) >>> data = pca(X, 1) >>> print(data) [[ 0.33702252] [ 2.22871406] [ 3.6021826 ] [-1.37346854] [-2.22871406] [-3.6021826 ]] """ if not isinstance(matrix, np.ndarray): raise TypeError("The matrix type is {},\ but it should be Tensor.".format(type(matrix))) if not isinstance(k, int) or k <= 0: raise TypeError("The k type is {},\ but it should be positive int.".format(type(k))) if not isinstance(niter, int) or niter <= 0: raise TypeError("The niter type is {},\ but it should be positive int.".format(type(niter))) if not isinstance(norm, bool): raise TypeError("The norm type is {},\ but it should be bool.".format(type(norm))) m, n = matrix.shape[-2:] if k is None: k = min(6, m, n) c = np.mean(matrix, axis=-2) norm_matrix = matrix - c q = get_approximate_basis(norm_matrix.T, k, niter) q_c = q.conjugate() b_t = mat_mul(norm_matrix, q_c) _, _, v = np.linalg.svd(b_t, full_matrices=False) v_c = v.conj().transpose(-2, -1) v_c = mat_mul(q, v_c) if not norm: matrix = mat_mul(matrix, v_c[:,]) else: matrix = mat_mul(norm_matrix, v_c[:,]) return matrix