Source code for skada._subspace

# Author: Theo Gnassounou <theo.gnassounou@inria.fr>
#         Remi Flamary <remi.flamary@polytechnique.edu>
#         Oleksii Kachaiev <kachayev@gmail.com>
#         Ruben Bueno <ruben.bueno@polytechnique.edu>
#         Antoine Collas <contact@antoinecollas.fr>
#
# License: BSD 3-Clause


import warnings

import numpy as np
import scipy.linalg
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.utils import check_random_state

from ._pipeline import make_da_pipeline
from .base import BaseAdapter
from .utils import (
    check_X_domain,
    extract_source_indices,
    source_target_merge,
    source_target_split,
    torch_minimize,
)



[docs]
class SubspaceAlignmentAdapter(BaseAdapter):
    """Domain Adaptation Using Subspace Alignment.

    See [8]_ for details.

    Parameters
    ----------
    n_components : int, default=None
        The numbers of components to learn with PCA.
        If n_components is not set all components are kept::

            n_components == min(n_samples, n_features)
    random_state : int, RandomState instance or None, default=None
        Determines random number generation for dataset creation. Pass an int
        for reproducible output across multiple function calls.

    Attributes
    ----------
    `pca_source_` : object
        The PCA object fitted on the source data.
    `pca_target_` : object
        The PCA object fitted on the target data.

    References
    ----------
    .. [8] Basura Fernando et. al. Unsupervised Visual
           Domain Adaptation Using Subspace Alignment.
           In IEEE International Conference on Computer Vision, 2013.
    """

    def __init__(
        self,
        n_components=None,
        random_state=None,
    ):
        super().__init__()
        self.n_components = n_components
        self.random_state = random_state

    def fit(self, X, y=None, sample_domain=None, **kwargs):
        """Fit adaptation parameters.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The source data.
        y : array-like, shape (n_samples,)
            The source labels.
        sample_domain : array-like, shape (n_samples,)
            The domain labels (same as sample_domain).

        Returns
        -------
        self : object
            Returns self.
        """
        X, sample_domain = check_X_domain(
            X,
            sample_domain,
            allow_multi_source=True,
            allow_multi_target=True,
        )
        X_source, X_target = source_target_split(X, sample_domain=sample_domain)

        if self.n_components is None:
            n_components = min(X.shape[0], X.shape[1])
        else:
            n_components = self.n_components
        self.random_state_ = check_random_state(self.random_state)
        self.pca_source_ = PCA(n_components, random_state=self.random_state_).fit(
            X_source
        )
        self.pca_target_ = PCA(n_components, random_state=self.random_state_).fit(
            X_target
        )
        self.n_components_ = n_components
        self.M_ = np.dot(self.pca_source_.components_, self.pca_target_.components_.T)
        return self

    def transform(
        self, X, y=None, *, sample_domain=None, allow_source=False, **params
    ) -> np.ndarray:
        """Perform adaptation on given samples (weights, sample or labels).

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The source data.
        y : array-like, shape (n_samples,)
            The source labels.
        sample_domain : array-like, shape (n_samples,)
            The domain labels (same as sample_domain).

        Returns
        -------
        X_t : array-like, shape (n_samples, n_components)
            The data transformed to the target subspace.
        """
        X, sample_domain = check_X_domain(
            X,
            sample_domain,
            allow_source=allow_source,
            allow_multi_source=True,
            allow_multi_target=True,
        )
        X_source, X_target = source_target_split(X, sample_domain=sample_domain)

        if X_source.shape[0]:
            X_source = np.dot(self.pca_source_.transform(X_source), self.M_)
        if X_target.shape[0]:
            X_target = self.pca_target_.transform(X_target)
        X_adapt, _ = source_target_merge(
            X_source, X_target, sample_domain=sample_domain
        )
        return X_adapt

    def fit_transform(self, X, y=None, *, sample_domain=None, **params):
        """Predict adaptation (weights, sample or labels).

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The source data.
        y : array-like, shape (n_samples,)
            The source labels.
        sample_domain : array-like, shape (n_samples,)
            The domain labels (same as sample_domain).

        Returns
        -------
        X_t : array-like, shape (n_samples, n_components)
            The data transformed to the target subspace.
        """
        self.fit(X, y, sample_domain=sample_domain)
        params["allow_source"] = True
        return self.transform(X, y, sample_domain=sample_domain, **params)




[docs]
def SubspaceAlignment(
    base_estimator=None,
    n_components=None,
    random_state=None,
):
    """Domain Adaptation Using Subspace Alignment.

    See [8]_ for details.

    Parameters
    ----------
    base_estimator : object, default=None
        estimator used for fitting and prediction
    n_components : int, default=None
        The numbers of components to learn with PCA.
        If n_components is not set all components are kept::

            n_components == min(n_samples, n_features)
    random_state : int, RandomState instance or None, default=None
        Determines random number generation for dataset creation. Pass an int
        for reproducible output across multiple function calls.

    Returns
    -------
    pipeline : Pipeline
        A pipeline containing a SubspaceAlignmentAdapter.

    References
    ----------
    .. [8] Basura Fernando et. al. Unsupervised Visual
           Domain Adaptation Using Subspace Alignment.
           In IEEE International Conference on Computer Vision, 2013.
    """
    if base_estimator is None:
        base_estimator = SVC()

    return make_da_pipeline(
        SubspaceAlignmentAdapter(
            n_components=n_components,
            random_state=random_state,
        ),
        base_estimator,
    )




[docs]
class TransferComponentAnalysisAdapter(BaseAdapter):
    """Transfer Component Analysis.

    See [9]_ for details.

    Parameters
    ----------
    kernel : kernel object, default='rbf'
        The kernel computed between data.
    n_components : int, default=None
        The numbers of components to learn with PCA.
        Should be less or equal to the number of samples
        of the source and target data.
    mu : float, default=0.1
        The parameter of the regularization in the optimization
        problem.

    Attributes
    ----------
    `X_source_` : array
        Source data used for the optimization problem.
    `X_target_` : array
        Target data used for the optimization problem.
    `K_` : array
        Kernel distance between the data (source and target).
    `eigvects_` : array
        Highest n_components eigenvectors of the solution
        of the optimization problem used to project
        in the new subspace.

    References
    ----------
    .. [9] Sinno Jialin Pan et. al. Domain Adaptation via
           Transfer Component Analysis. In IEEE Transactions
           on Neural Networks, 2011.
    """

    def __init__(self, kernel="rbf", n_components=None, mu=0.1):
        super().__init__()
        self.kernel = kernel
        self.n_components = n_components
        self.mu = mu

    def fit(self, X, y=None, *, sample_domain=None):
        """Fit adaptation parameters.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The source data.
        y : array-like, shape (n_samples,)
            The source labels.
        sample_domain : array-like, shape (n_samples,)
            The domain labels (same as sample_domain).

        Returns
        -------
        self : object
            Returns self.
        """
        X, sample_domain = check_X_domain(
            X,
            sample_domain,
            allow_multi_source=True,
            allow_multi_target=True,
        )
        self.X_source_, self.X_target_ = source_target_split(
            X, sample_domain=sample_domain
        )

        Kss = pairwise_kernels(self.X_source_, metric=self.kernel)
        Ktt = pairwise_kernels(self.X_target_, metric=self.kernel)
        Kst = pairwise_kernels(self.X_source_, self.X_target_, metric=self.kernel)
        K = np.block([[Kss, Kst], [Kst.T, Ktt]])
        self.K_ = K

        ns = self.X_source_.shape[0]
        nt = self.X_target_.shape[0]
        Lss = 1 / ns**2 * np.ones((ns, ns))
        Ltt = 1 / nt**2 * np.ones((nt, nt))
        Lst = -1 / (ns * nt) * np.ones((ns, nt))
        L = np.block([[Lss, Lst], [Lst.T, Ltt]])

        H = np.eye(ns + nt) - 1 / (ns + nt) * np.ones((ns + nt, ns + nt))

        A = np.eye(ns + nt) + self.mu * K @ L @ K
        B = K @ H @ K
        solution = np.linalg.solve(A, B)

        eigvals, eigvects = np.linalg.eigh(solution)

        if self.n_components is None:
            n_components = min(X.shape[0], X.shape[1])
        else:
            n_components = self.n_components
        selected_components = np.argsort(np.abs(eigvals))[::-1][:n_components]
        self.eigvects_ = np.real(eigvects[:, selected_components])
        return self

    def fit_transform(self, X, y=None, *, sample_domain=None, **params):
        """Predict adaptation (weights, sample or labels).

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The source data.
        y : array-like, shape (n_samples,)
            The source labels.
        sample_domain : array-like, shape (n_samples,)
            The domain labels (same as sample_domain).

        Returns
        -------
        X_t : array-like, shape (n_samples, n_components)
            The data transformed to the target subspace.
        """
        self.fit(X, y, sample_domain=sample_domain)
        params["allow_source"] = True
        return self.transform(X, y, sample_domain=sample_domain, **params)

    def transform(
        self, X, y=None, *, sample_domain=None, allow_source=False, **params
    ) -> np.ndarray:
        """Perform adaptation on given samples (weights, sample or labels).

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The source data.
        y : array-like, shape (n_samples,)
            The source labels.
        sample_domain : array-like, shape (n_samples,)
            The domain labels (same as sample_domain).

        Returns
        -------
        X_t : array-like, shape (n_samples, n_components)
            The data transformed to the target subspace.
        """
        X, sample_domain = check_X_domain(
            X,
            sample_domain,
            allow_source=allow_source,
            allow_multi_source=True,
            allow_multi_target=True,
        )
        X_source, X_target = source_target_split(X, sample_domain=sample_domain)

        if np.array_equal(X_source, self.X_source_) and np.array_equal(
            X_target, self.X_target_
        ):
            X_ = (self.K_ @ self.eigvects_)[: X.shape[0]]
        else:
            Ks = pairwise_kernels(X, self.X_source_, metric=self.kernel)
            Kt = pairwise_kernels(X, self.X_target_, metric=self.kernel)
            K = np.concatenate((Ks, Kt), axis=1)
            X_ = (K @ self.eigvects_)[: X.shape[0]]
        return X_




[docs]
def TransferComponentAnalysis(
    base_estimator=None, kernel="rbf", n_components=None, mu=0.1
):
    """Domain Adaptation Using Transfer Component Analysis.

    See [9]_ for details.

    Parameters
    ----------
    base_estimator : object, default=None
        estimator used for fitting and prediction
    kernel : kernel object, default='rbf'
        The kernel computed between data.
    n_components : int, default=None
        The numbers of components to learn with PCA.
        Should be less or equal to the number of samples
        of the source and target data.
    mu : float, default=0.1
        The parameter of the regularization in the optimization
        problem.

    Returns
    -------
    pipeline : Pipeline
        A pipeline containing a TransferComponentAnalysisAdapter.

    References
    ----------
    .. [9] Sinno Jialin Pan et. al. Domain Adaptation via
           Transfer Component Analysis. In IEEE Transactions
           on Neural Networks, 2011.
    """
    if base_estimator is None:
        base_estimator = SVC()

    return make_da_pipeline(
        TransferComponentAnalysisAdapter(
            kernel=kernel, n_components=n_components, mu=mu
        ),
        base_estimator,
    )




[docs]
class TransferJointMatchingAdapter(BaseAdapter):
    """Domain Adaptation Using TJM: Transfer Joint Matching.

    See [26]_ for details.

    Parameters
    ----------
    n_components : int, default=None
        The numbers of components to learn with PCA.
        Should be less or equal to the number of samples
        of the source and target data.
    tradeoff : float, default=1e-2
        The tradeoff constant for the TJM algorithm.
        It serves to trade off feature matching and instance
        reweighting.
    max_iter : int>0, default=100
        The maximal number of iteration before stopping when
        fitting.
    kernel : kernel object, default='rbf'
        The kernel computed between data.
    tol : float, default=0.01
        The threshold for the differences between losses on two iteration
        before the algorithm stops
    verbose : bool, default=False
        If True, print the loss value at each iteration.

    Attributes
    ----------
    None

    References
    ----------
    .. [26]  [Long et al., 2014] Long, M., Wang, J., Ding, G., Sun, J., and Yu, P.
             (2014). Transfer joint matching for unsupervised domain adaptation.
             In IEEE Conference on Computer Vision and Pattern Recognition (CVPR),
             pages 1410–1417

    """

    def __init__(
        self,
        n_components=None,
        tradeoff=1e-2,
        max_iter=100,
        kernel="rbf",
        tol=0.01,
        verbose=False,
    ):
        super().__init__()
        self.n_components = n_components
        self.tradeoff = tradeoff
        self.kernel = kernel
        self.max_iter = max_iter
        self.tol = tol
        self.verbose = verbose

    def fit_transform(self, X, y=None, *, sample_domain=None, **params):
        """Predict adaptation (weights, sample or labels).

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The source data.
        y : array-like, shape (n_samples,)
            The source labels.
        sample_domain : array-like, shape (n_samples,)
            The domain labels (same as sample_domain).

        Returns
        -------
        X_t : array-like, shape (n_samples, n_components)
            The data transformed to the target subspace.
        """
        self.fit(X, y, sample_domain=sample_domain)
        params["allow_source"] = True
        return self.transform(X, y, sample_domain=sample_domain, **params)

    def transform(
        self, X, y=None, *, sample_domain=None, allow_source=False, **params
    ) -> np.ndarray:
        """Perform adaptation on given samples (weights, sample or labels).

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The source data.
        y : array-like, shape (n_samples,)
            The source labels.
        sample_domain : array-like, shape (n_samples,)
            The domain labels (same as sample_domain).

        Returns
        -------
        X_t : array-like, shape (n_samples, n_components)
            The data transformed to the target subspace.
        """
        X, sample_domain = check_X_domain(
            X,
            sample_domain,
            allow_source=allow_source,
            allow_multi_source=True,
            allow_multi_target=True,
        )
        X_source, X_target = source_target_split(X, sample_domain=sample_domain)

        if np.array_equal(X_source, self.X_source_) and np.array_equal(
            X_target, self.X_target_
        ):
            K = self._get_kernel_matrix(X_source, X_target)
            X_ = K @ self.A_
        else:
            Ks = pairwise_kernels(X, self.X_source_, metric=self.kernel)
            Kt = pairwise_kernels(X, self.X_target_, metric=self.kernel)
            K = np.concatenate((Ks, Kt), axis=1)
            X_ = K @ self.A_
        return X_

    def _get_mmd_matrix(self, ns, nt, sample_domain):
        Mss = (1 / (ns**2)) * np.ones((ns, ns))
        Mtt = (1 / (nt**2)) * np.ones((nt, nt))
        Mst = -(1 / (ns * nt)) * np.ones((ns, nt))
        M = np.block([[Mss, Mst], [Mst.T, Mtt]])
        return M

    def _get_kernel_matrix(self, X_source, X_target):
        Kss = pairwise_kernels(X_source, metric=self.kernel)
        Ktt = pairwise_kernels(X_target, metric=self.kernel)
        Kst = pairwise_kernels(X_source, X_target, metric=self.kernel)
        K = np.block([[Kss, Kst], [Kst.T, Ktt]])
        return K

    def fit(self, X, y=None, *, sample_domain=None):
        """Fit adaptation parameters.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The source data.
        y : array-like, shape (n_samples,)
            The source labels.
        sample_domain : array-like, shape (n_samples,)
            The domain labels (same as sample_domain).

        Returns
        -------
        self : object
            Returns self.
        """
        X, sample_domain = check_X_domain(
            X,
            sample_domain,
            allow_multi_source=True,
            allow_multi_target=True,
        )
        X_source, X_target = source_target_split(X, sample_domain=sample_domain)

        if self.n_components is None:
            n_components = min(X.shape[0], X.shape[1])
        else:
            n_components = self.n_components
        self.X_source_ = X_source
        self.X_target_ = X_target

        n = X.shape[0]
        source_mask = extract_source_indices(sample_domain)

        H = np.identity(n) - 1 / n * np.ones((n, n))
        K = self._get_kernel_matrix(X_source, X_target)
        M = self._get_mmd_matrix(X_source.shape[0], X_target.shape[0], sample_domain)
        M /= np.linalg.norm(M, ord="fro")
        G = np.identity(n)

        EPS_eigval = 1e-10
        last_loss = -2 * self.tol
        for i in range(self.max_iter):
            # update A
            B = K @ M @ K.T + self.tradeoff * G
            C = K @ H @ K.T
            B = B + EPS_eigval * np.identity(n)
            C = C + EPS_eigval * np.identity(n)
            phi, A = scipy.linalg.eigh(B, C)
            phi = phi + EPS_eigval
            indices = np.argsort(phi)[:n_components]
            phi, A = phi[indices], A[:, indices]
            error_eigv = np.linalg.norm(B @ A - C @ A @ np.diag(phi))
            if error_eigv > 1e-5:
                warnings.warn(
                    "The solution of the generalized eigenvalue problem "
                    "is not accurate."
                )

            # update G
            A_norms = np.linalg.norm(A, axis=1)
            G = np.zeros(n, dtype=np.float64)
            G[A_norms != 0] = 1 / (2 * A_norms[A_norms != 0] + EPS_eigval)
            G[~source_mask] = 1
            G = np.diag(G)

            loss = np.trace(A.T @ K @ M @ K @ A)
            reg = (
                np.sum(np.linalg.norm(A[source_mask], axis=1))
                + np.linalg.norm(A[~source_mask]) ** 2
            )
            loss_total = loss + self.tradeoff * reg
            # print objective function and constraint satisfaction
            if self.verbose:
                print(
                    f"iter {i}: loss={loss_total:.3e}, loss_mmd={loss:.3e}, "
                    f"reg={reg:.3e}"
                )
                mat = A.T @ K @ H @ K.T @ A
                cond = np.allclose(mat, np.identity(n_components))
                dist = np.linalg.norm(mat - np.identity(n_components))
                print(f"Constraint satisfaction: {cond}, dist={dist:.3e}")
                print(f"Error of generalized eigendecomposition: {error_eigv:.3e}")

            if last_loss == 0 or np.abs(last_loss - loss_total) / last_loss < self.tol:
                break
            else:
                last_loss = loss_total

        self.A_ = A

        return self




[docs]
def TransferJointMatching(
    base_estimator=None,
    n_components=None,
    tradeoff=1e-2,
    kernel="rbf",
    max_iter=100,
    tol=0.01,
):
    """

    Parameters
    ----------
    base_estimator : object, default=None
        estimator used for fitting and prediction
    n_components : int, default=None
        The numbers of components to learn with PCA.
        Should be less or equal to the number of samples
        of the source and target data.
    tradeoff : float, default=1e-2
        The tradeoff constant for the TJM algorithm.
        It serves to trade off feature matching and instance
        reweighting.
    max_iter : int>0, default=100
        The maximal number of iteration before stopping when
        fitting.
    kernel : kernel object, default='rbf'
        The kernel computed between data.
    tol :

    Returns
    -------
    pipeline : Pipeline
        A pipeline containing a TransferJointMatchingAdapter.

    References
    ----------
    .. [26]  [Long et al., 2014] Long, M., Wang, J., Ding, G., Sun, J., and Yu, P.
             (2014). Transfer joint matching for unsupervised domain adaptation.
             In IEEE Conference on Computer Vision and Pattern Recognition (CVPR),
             pages 1410–1417
    """
    if base_estimator is None:
        base_estimator = SVC()

    return make_da_pipeline(
        TransferJointMatchingAdapter(
            tradeoff=tradeoff,
            n_components=n_components,
            kernel=kernel,
            max_iter=max_iter,
            tol=tol,
        ),
        base_estimator,
    )




[docs]
class TransferSubspaceLearningAdapter(BaseAdapter):
    """Domain Adaptation Using TSL: Transfer Subspace Learning.

    See [27]_ for details.

    Parameters
    ----------
    n_components : int, default=None
        The numbers of components to learn.
        Should be less or equal to the number of samples
        of the source and target data.
    base_method : str, default='flda'
        The method used to learn the subspace.
        Possible values are 'pca', 'flda', and 'lpp'.
    length_scale : float, default=2
        The length scale of the rbf kernel used in
        'lpp' method.
    mu : float, default=0.1
        The parameter of the regularization in the optimization
        problem.
    reg : float, default=0.01
        The regularization parameter of the covariance estimator.
        Possible values:
          - None: no shrinkage.
          - float between 0 and 1: fixed shrinkage parameter.
    max_iter : int>0, default=100
        The maximal number of iteration before stopping when
        fitting.
    tol : float, default=0.01
        The threshold for the differences between losses on two iteration
        before the algorithm stops
    verbose : bool, default=False
        If True, print the final gradient norm.

    Attributes
    ----------
    W_ : array of shape (n_features, n_components)
        The learned projection matrix.

    References
    ----------
    .. [27]  [Si et al., 2010] Si, S., Tao, D. and Geng, B.
             Bregman Divergence-Based Regularization
             for Transfer Subspace Learning.
             In IEEE Transactions on Knowledge and Data Engineering.
             pages 929-942
    """

    def __init__(
        self,
        n_components=None,
        base_method="flda",
        length_scale=2,
        mu=0.1,
        reg=0.01,
        max_iter=100,
        tol=0.01,
        verbose=False,
    ):
        super().__init__()
        self.n_components = n_components
        _accepted_base_methods = ["pca", "flda", "lpp"]
        if base_method not in _accepted_base_methods:
            raise ValueError(f"base_method should be in {_accepted_base_methods}")
        self.base_method = base_method
        self.length_scale = length_scale
        self.mu = mu
        if reg is not None and (reg < 0 or reg > 1):
            raise ValueError("reg should be None or between 0 and 1.")
        self.reg = 0 if reg is None else reg
        self.max_iter = max_iter
        self.tol = tol
        self.verbose = verbose

    def _torch_cov(self, X):
        """Compute the covariance matrix of X using torch."""
        torch = self.torch
        reg = self.reg

        n_samples, d = X.shape
        X = X - torch.mean(X, dim=0)
        cov = X.T @ X / n_samples
        cov = (1 - reg) * cov + reg * torch.trace(cov) * torch.eye(d)

        return cov

    def _D(self, W, X_source, X_target):
        """Divergence objective function"""
        torch = self.torch

        Z_source = X_source @ W
        Z_target = X_target @ W

        sigma_1 = self._torch_cov(Z_source)
        sigma_2 = self._torch_cov(Z_target)
        sigma_11 = 2 * sigma_1
        sigma_12 = sigma_1 + sigma_2
        sigma_22 = 2 * sigma_2

        L_11 = torch.linalg.cholesky(torch.linalg.inv(sigma_11))
        L_12 = torch.linalg.cholesky(torch.linalg.inv(sigma_12))
        L_22 = torch.linalg.cholesky(torch.linalg.inv(sigma_22))
        Kss = torch.exp(-0.5 * torch.cdist(Z_source @ L_11, Z_source @ L_11))
        Kst = torch.exp(-0.5 * torch.cdist(Z_source @ L_12, Z_target @ L_12))
        Ktt = torch.exp(-0.5 * torch.cdist(Z_target @ L_22, Z_target @ L_22))

        return torch.mean(Kss) + torch.mean(Ktt) - 2 * torch.mean(Kst)

    def _F(self, W, X_source, y_source):
        """Subspace learning objective function"""
        torch = self.torch
        base_method = self.base_method

        if base_method == "pca":
            cov = self._torch_cov(X_source)
            loss = -torch.trace(W.T @ cov @ W)
        elif base_method == "flda":
            classes = torch.unique(y_source)
            classes_means = torch.stack(
                [torch.mean(X_source[y_source == c], dim=0) for c in classes]
            )
            classes_n_samples = torch.stack([torch.sum(y_source == c) for c in classes])
            classes_means = classes_means * torch.sqrt(classes_n_samples).reshape(-1, 1)
            S_W = self._torch_cov(classes_means)
            S_B = self._torch_cov(X_source)
            loss = torch.trace(W.T @ S_W @ W) / torch.trace(W.T @ S_B @ W)
        elif base_method == "lpp":
            # E is the Gaussian kernel if (y_source)_i == (y_source)_j and 0 otherwise
            E = torch.exp(-torch.cdist(X_source, X_source) / self.length_scale)
            E = E * (y_source[:, None] == y_source[None, :])
            D = torch.diag(torch.sum(E, dim=1))
            loss = -2 * torch.trace(W.T @ X_source.T @ (D - E) @ X_source @ W)

        return loss

    def fit(self, X, y=None, sample_domain=None, **kwargs):
        """Fit adaptation parameters.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The source data.
        y : array-like, shape (n_samples,)
            The source labels.
        sample_domain : array-like, shape (n_samples,)
            The domain labels (same as sample_domain).

        Returns
        -------
        self : object
            Returns self.
        """
        try:
            import torch

            self.torch = torch
        except ImportError:
            raise ImportError(
                "TransferSubspaceLearningAdapter requires pytorch to be installed."
            )

        X, sample_domain = check_X_domain(
            X,
            sample_domain,
            allow_multi_source=True,
            allow_multi_target=True,
        )
        X_source, X_target, y_source, _ = source_target_split(
            X, y, sample_domain=sample_domain
        )

        if self.n_components is None:
            n_components = min(X.shape[0], X.shape[1])
        else:
            n_components = self.n_components

        # Convert data to torch tensors
        X_source = torch.tensor(X_source, dtype=torch.float64)
        y_source = torch.tensor(y_source)
        X_target = torch.tensor(X_target, dtype=torch.float64)

        # Solve the optimization problem
        # min_W F(W) + mu * D(W)
        # s.t. W^T W = I

        def _orth(W):
            if type(W) is np.ndarray:
                W = np.linalg.qr(W)[0]
            else:
                W = torch.linalg.qr(W)[0]
            return W

        def func(W):
            W = _orth(W)
            loss = self._F(W, X_source, y_source)
            loss = loss + self.mu * self._D(W, X_source, X_target)
            return loss

        # Optimize using torch solver
        W = torch.eye(X.shape[1], dtype=torch.float64, requires_grad=True)
        W = W[:, :n_components]
        W, _ = torch_minimize(
            func, W, tol=self.tol, max_iter=self.max_iter, verbose=self.verbose
        )
        W = _orth(W)

        # store W
        self.W_ = W

        return self

    def fit_transform(self, X, y=None, *, sample_domain=None, **params):
        """Predict adaptation (weights, sample or labels).

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The source data.
        y : array-like, shape (n_samples,)
            The source labels.
        sample_domain : array-like, shape (n_samples,)
            The domain labels (same as sample_domain).

        Returns
        -------
        X_t : array-like, shape (n_samples, n_components)
            The data transformed to the target subspace.
        """
        self.fit(X, y, sample_domain=sample_domain)
        return self.transform(X, sample_domain=sample_domain, allow_source=True)

    def transform(
        self, X, y=None, *, sample_domain=None, allow_source=False, **params
    ) -> np.ndarray:
        X, sample_domain = check_X_domain(
            X,
            sample_domain,
            allow_source=allow_source,
            allow_multi_source=True,
            allow_multi_target=True,
        )
        X_source, X_target = source_target_split(X, sample_domain=sample_domain)

        if X_source.shape[0]:
            X_source = np.dot(X_source, self.W_)
        if X_target.shape[0]:
            X_target = np.dot(X_target, self.W_)
        # xxx(okachaiev): this could be done through a more high-level API
        X_adapt, _ = source_target_merge(
            X_source, X_target, sample_domain=sample_domain
        )
        return X_adapt




[docs]
def TransferSubspaceLearning(
    base_estimator=None,
    n_components=None,
    base_method="flda",
    length_scale=2,
    mu=0.1,
    reg=0.01,
    max_iter=100,
    tol=0.01,
    verbose=False,
):
    """Domain Adaptation Using Transfer Subspace Learning.

    Parameters
    ----------
    n_components : int, default=None
        The numbers of components to learn.
        Should be less or equal to the number of samples
        of the source and target data.
    base_method : str, default='flda'
        The method used to learn the subspace.
        Possible values are 'pca', 'flda', and 'lpp'.
    length_scale : float, default=2
        The length scale of the rbf kernel used in
        'lpp' method.
    mu : float, default=0.1
        The parameter of the regularization in the optimization
        problem.
    reg : float, default=0.01
        The regularization parameter of the covariance estimator.
        Possible values:
          - None: no shrinkage.
          - float between 0 and 1: fixed shrinkage parameter.
    max_iter : int>0, default=100
        The maximal number of iteration before stopping when
        fitting.
    tol : float, default=0.01
        The threshold for the differences between losses on two iteration
        before the algorithm stops
    verbose : bool, default=False
        If True, print the final gradient norm.

    Returns
    -------
    pipeline : Pipeline
        A pipeline containing a TransferSubspaceLearning estimator.

    References
    ----------
    .. [27]  [Si et al., 2010] Si, S., Tao, D. and Geng, B.
             Bregman Divergence-Based Regularization
             for Transfer Subspace Learning.
             In IEEE Transactions on Knowledge and Data Engineering.
             pages 929-942
    """
    if base_estimator is None:
        base_estimator = KNeighborsClassifier(n_neighbors=1)

    return make_da_pipeline(
        TransferSubspaceLearningAdapter(
            n_components=n_components,
            base_method=base_method,
            length_scale=length_scale,
            mu=mu,
            reg=reg,
            max_iter=max_iter,
            tol=tol,
            verbose=verbose,
        ),
        base_estimator,
    )