Source code for hyperlearn.decomposition.PCA


from .base import _basePCA
from ..base import *
from ..linalg import svd, eigh, svd_flip


[docs]class PCA(_basePCA): """ Principal Component Analysis reduces the dimensionality of data by selecting the linear combinations of columns or features which maximises the total variance. Speed -------------- Note, SKLEARN's implementation is wasteful, as it uses a full SVD solver, which is slow and painful. HyperLearn's implementation uses regularized Eigenvalue Decomposition, but if solver = 'svd', then full SVD is used. If USE_GPU: Uses PyTorch's SVD (which is slow sadly), or EIGH. Speed is OK. If CPU: Uses Numpy's Fortran C based SVD or EIGH. If NUMBA is not installed, uses very fast LAPACK functions. Stability -------------- Alpha is added for regularization purposes. This prevents system rounding errors and promises better convergence rates. Note svd_flip is NOT same as SKLEARN, hence output may have reversed signs. V based decision is used as EIGH is faster, and U is not computed. """ def __init__(self, n_components = 2, solver = 'eig', alpha = None, fast = True, centre = True): self.decomp = self._fit_svd if solver == 'svd' else self._fit_eig self.n_components, self.alpha, self.solver, self.fast, self.truncated, \ self.centre = n_components, alpha, solver, fast, False, centre def _fit_svd(self, X): S2, VT = eig(X, fast = self.fast) return S2, VT def _fit_eig(self, X): if X.shape[1] >= X.shape[0]: # Drop back to SVD, as Eigendecomp would output U and not VT. return self._fit_svd(X) else: S2, VT = eigh(X.T @ X, alpha = self.alpha, fast = self.fast, positive = True) return S2, VT