Source code for skscope.solver

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# author: Zezhi Wang
# Copyright (C) 2023 abess-team
# Licensed under the MIT License.

from sklearn.model_selection import KFold
from .base_solver import BaseSolver
from sklearn.base import BaseEstimator
import numpy as np
import jax
from jax import numpy as jnp
from . import _scope, utilities
from .numeric_solver import convex_solver_nlopt



[docs]
class ScopeSolver(BaseEstimator):
    r"""
    Get sparse optimal solution of convex objective function by Sparse-Constrained Optimization via Splicing Iteration (SCOPE) algorithm, which also can be used for variables selection.
    Specifically, ``ScopeSolver`` aims to tackle this problem: :math:`\min_{x \in R^p} f(x) \text{ s.t. } ||x||_0 \leq s`, where :math:`f(x)` is a convex objective function and :math:`s` is the sparsity level. Each element of :math:`x` can be seen as a variable, and the nonzero elements of :math:`x` are the selected variables.

    Parameters
    ----------
    dimensionality : int
        Dimension of the optimization problem, which is also the total number of variables that will be considered to select or not, denoted as :math:`p`.
    sparsity : int or array of int, optional
        The sparsity level, which is the number of nonzero elements of the optimal solution, denoted as :math:`s`.
        default is ``range(int(p/log(log(p))/log(p)))``.
        Used only when ``path_type`` is "seq".
    sample_size : int, default=1
        Sample size, denoted as :math:`n`.
    preselect : array of int, default=[]
        An array contains the indexes of variables which must be selected.
    numeric_solver : callable, optional
        A solver for the convex optimization problem. ``ScopeSolver`` will call this function to solve the convex optimization problem in each iteration.
        It should have the same interface as ``skscope.convex_solver_nlopt``.
    max_iter : int, default=20
        Maximum number of iterations taken for converging.
    ic_method : callable, optional
        A function to calculate the information criterion for choosing the sparsity level.
        ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size.
        Used only when ``sparsity`` is array and ``cv`` is 1.
        Note that ``sample_size`` must be given when using ``ic_method``.
    cv : int, default=1
        The folds number when use the cross-validation method. If ``cv`` = 1, the sparsity level will be chosen by the information criterion. If ``cv`` > 1, the sparsity level will be chosen by the cross-validation method.
    split_method: callable, optional
        A function to get the part of data used in each fold of cross-validation.
        Its interface should be ``(data, index) -> part_data`` where ``index`` is an array of int.
    cv_fold_id: array of shape (sample_size,), optional
        An array indicates different folds in CV, which samples in the same fold should be given the same number.
        The number of different elements should be equal to ``cv``.
        Used only when `cv` > 1.
    group : array of shape (dimensionality,), default=range(dimensionality)
        The group index for each variable, and it must be an incremental integer array starting from 0 without gap.
        The variables in the same group must be adjacent, and they will be selected together or not.
        Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap).
        It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables,
        and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected.
    important_search : int, default=128
        The number of important variables which need be splicing.
        This is used to reduce the computational cost. If it's too large, it would greatly increase runtime.
    screening_size : int, default=-1
        The number of variables remaining after the screening before variables select. Screening is used to reduce the computational cost.
        ``screening_size`` should be a non-negative number smaller than p, but larger than any value in sparsity. If ``screening_size`` is -1, screening will not be used. If ``screening_size`` is 0, ``screening_size`` will be set as ``int(p/log(log(p))/log(p))``.
    max_exchange_num : int, optional, default=5
        Maximum exchange number when splicing.
    is_dynamic_max_exchange_num : bool, default=True
        If ``is_dynamic_max_exchange_num`` is True, ``max_exchange_num`` will be decreased dynamically according to the number of variables exchanged in the last iteration.
    greedy : bool, default=True,
        If ``greedy`` is True, the first exchange-number which can reduce the objective function value will be selected.
        Otherwise, the exchange-number which can reduce the objective function value most will be selected.
    splicing_type : {"halve", "taper"}, default="halve"
        The type of reduce the exchange number in each iteration from ``max_exchange_num``.
        "halve" for decreasing by half, "taper" for decresing by one.
    path_type : {"seq", "gs"}, default="seq"
        The method to be used to select the optimal sparsity level. For path_type = "seq", we solve the problem for all sizes in `sparsity` successively. For path_type = "gs", we solve the problem with sparsity level ranged between `gs_lower_bound` and `gs_upper_bound`, where the specific sparsity level to be considered is determined by golden section.
    gs_lower_bound : int, default=0
        The lower bound of golden-section-search for sparsity searching.
        Used only when path_type = "gs".
    gs_upper_bound : int, optional
        The higher bound of golden-section-search for sparsity searching.
        Default is ``int(p/log(log(p))/log(p))``.
        Used only when path_type = "gs".
    thread : int, default=1
        Max number of multithreads used for cross-validation. If thread = 0, the maximum number of threads supported by the device will be used.
    random_state : int, optional
        The random seed used for cross-validation.
    console_log_level : str, default="off"
        The level of output log to console, which can be "off", "error", "warning", "debug".
        For example, if ``console_log_level`` is "warning", only error and warning log will be output to console.
    file_log_level : str, default="off"
        The level of output log to file, which can be "off", "error", "warning", "debug".
        For example, if ``file_log_level`` is "off", no log will be output to file.
    log_file_name : str, default="logs/skscope.log"
        The name (relative path) of log file, which is used to store the log information.

    Attributes
    ----------
    params :array of shape(dimensionality,)
        The sparse optimal solution.
    objective_value:float
        The value of objective function on the solution.
    support_set :array of int
        The indices of selected variables, sorted in ascending order.
    cv_test_loss :float
        If cv=1, it stores the score under chosen information criterion. If cv>1, it stores the test objective under cross-validation.
    cv_train_loss :float
        The objective on training data.

    References
    ----------
    - Junxian Zhu, Canhong Wen, Jin Zhu, Heping Zhang, and Xueqin Wang.
      A polynomial algorithm for best-subset selection problem.
      Proceedings of the National Academy of Sciences,
      117(52):33117-33123, 2020.
    """

    def __init__(
        self,
        dimensionality,
        sparsity=None,
        sample_size=1,
        *,
        preselect=[],
        numeric_solver=convex_solver_nlopt,
        max_iter=20,
        ic_method=None,
        cv=1,
        split_method=None,
        cv_fold_id=None,
        group=None,
        important_search=128,
        screening_size=-1,
        max_exchange_num=5,
        is_dynamic_max_exchange_num=True,
        greedy=True,
        splicing_type="halve",
        path_type="seq",
        gs_lower_bound=None,
        gs_upper_bound=None,
        thread=1,
        random_state=None,
        console_log_level="off",
        file_log_level="off",
        log_file_name="logs/skscope.log",
    ):
        self.model = _scope.UniversalModel()
        self.dimensionality = dimensionality
        self.sparsity = sparsity
        self.sample_size = sample_size

        self.preselect = preselect
        self.numeric_solver = numeric_solver
        self.max_iter = max_iter
        self.ic_method = ic_method
        self.cv = cv
        self.split_method = split_method
        self.deleter = None
        self.cv_fold_id = cv_fold_id
        self.group = group
        self.warm_start = True
        self.important_search = important_search
        self.screening_size = screening_size
        self.max_exchange_num = max_exchange_num
        self.is_dynamic_max_exchange_num = is_dynamic_max_exchange_num
        self.greedy = greedy
        self.use_hessian = False
        self.splicing_type = splicing_type
        self.path_type = path_type
        self.gs_lower_bound = gs_lower_bound
        self.gs_upper_bound = gs_upper_bound
        self.thread = thread
        self.jax_platform = "cpu"
        self.random_state = random_state
        self.console_log_level = console_log_level
        self.file_log_level = file_log_level
        self.log_file_name = log_file_name
        self.hessian = None
        self.cpp = False


[docs]
    def get_config(self, deep=True):
        """
        Get parameters for this solver.

        Parameters
        ----------
        deep : bool, default=True
            If True, will return the parameters for this solver and
            contained subobjects that are estimators.

        Returns
        -------
        params : dict
            Parameter names mapped to their values.
        """
        return super().get_params(deep)



[docs]
    def set_config(self, **params):
        """Set the parameters of this solver.

        Parameters
        ----------
        **params : dict
            Solver parameters.

        Returns
        -------
        self :
            Solver instance.
        """
        return super().set_params(**params)



[docs]
    def get_estimated_params(self):
        r"""
        Get the optimal parameters of the objective function.

        Returns
        -------
        parameters : array of shape (dimensionality,)
            The optimal solution of optimization.
        """
        return self.params



[docs]
    def get_support(self):
        r"""
        Get the support set of the optimal parameters.

        Returns
        -------
        support_set : array of int
            The indices of selected variables, sorted in ascending order.
        """
        return self.support_set


    @staticmethod
    def _set_log_level(console_log_level, file_log_level, log_file_name):
        # log level
        log_level_dict = {
            "off": 6,
            "error": 4,
            "warning": 3,
            "debug": 1,
        }
        console_log_level = console_log_level.lower()
        file_log_level = file_log_level.lower()
        if (
            console_log_level not in log_level_dict
            or file_log_level not in log_level_dict
        ):
            raise ValueError(
                "console_log_level and file_log_level must be in 'off', 'error', 'warning', 'debug'"
            )
        console_log_level = log_level_dict[console_log_level]
        file_log_level = log_level_dict[file_log_level]
        # log file name
        if not isinstance(log_file_name, str):
            raise ValueError("log_file_name must be a string")

        _scope.init_spdlog(console_log_level, file_log_level, log_file_name)


[docs]
    def solve(
        self,
        objective,
        data=None,
        layers=[],
        init_support_set=None,
        init_params=None,
        gradient=None,
        jit=False,
    ):
        r"""
        Optimize the optimization objective function.

        Parameters
        ----------
        objective : callable
            The objective function to be minimized: ``objective(params, data) -> loss``
            where ``params`` is a 1-D array with shape (dimensionality,) and
            ``data`` is the fixed parameters needed to completely specify the function.
            ``objective`` must be written in ``JAX`` library if ``gradient`` is not provided.
        data : optional
            Extra arguments passed to the objective function and its derivatives (if existed).
        layers : list of ``Layer`` objects, default=[]
            ``Layer`` is a "decorator" of the objective function.
            The parameters will be processed by the ``Layer`` before entering the objective function.
            The different layers can achieve different effects,
            and they can be sequentially concatenated together to form a larger layer,
            enabling the implementation of more complex functionalities.
            The ``Layer`` objects can be found in ``skscope.layers``.
            If ``layers`` is not empty, ``objective`` must be written in ``JAX`` library.
        init_support_set : array of int, default=[]
            The index of the variables in initial active set.
        init_params : array of shape (dimensionality,), optional
            The initial value of parameters, default is an all-zero vector.
        gradient : callable, optional
            A function that returns the gradient vector of parameters: ``gradient(params, data) -> array of shape (dimensionality,)``,
            where ``params`` is a 1-D array with shape (dimensionality,) and ``data`` is the fixed parameters needed to completely specify the function.
            If ``gradient`` is not provided, ``objective`` must be written in ``JAX`` library.
        jit : bool, default=False
            If ``objective`` or ``gradient`` are written in JAX, ``jit`` can be set to True to speed up the optimization.

        Returns
        -------
        parameters : array of shape (dimensionality,)
            The optimal solution of optimization.
        """
        hessian = self.hessian
        cpp = self.cpp
        ScopeSolver._set_log_level(
            self.console_log_level, self.file_log_level, self.log_file_name
        )

        jax.config.update("jax_platform_name", self.jax_platform)

        p = self.dimensionality
        BaseSolver._check_positive_integer(p, "dimensionality")

        n = self.sample_size
        BaseSolver._check_positive_integer(n, "sample_size")

        BaseSolver._check_non_negative_integer(self.max_iter, "max_iter")

        # max_exchange_num
        BaseSolver._check_positive_integer(self.max_exchange_num, "max_exchange_num")

        # group
        if self.group is None:
            group = np.arange(p, dtype="int32")
            group_num = p  # len(np.unique(group))
        else:
            group = np.array(self.group)
            if group.ndim > 1:
                raise ValueError("Group should be an 1D array of integers.")
            if group.size != p:
                raise ValueError(
                    "The length of group should be equal to dimensionality."
                )
            group_num = len(np.unique(group))
            if group[0] != 0:
                raise ValueError("Group should start from 0.")
            if any(group[1:] - group[:-1] < 0):
                raise ValueError("Group should be an incremental integer array.")
            if not group_num == max(group) + 1:
                raise ValueError("There is a gap in group.")
            group = np.array(
                [np.where(group == i)[0][0] for i in range(group_num)], dtype="int32"
            )

        # preselect
        preselect = np.unique(np.array(self.preselect, dtype="int32"))
        if preselect.size > 0 and (preselect[0] < 0 or preselect[-1] >= group_num):
            raise ValueError("preselect should be between 0 and dimensionality.")

        # default sparsity level
        force_min_sparsity = preselect.size
        default_max_sparsity = max(
            force_min_sparsity,
            (
                group_num
                if group_num <= 5
                else int(group_num / np.log(np.log(group_num)) / np.log(group_num))
            ),
        )

        # path_type
        if self.path_type == "seq":
            path_type = 1
            gs_lower_bound, gs_upper_bound = 0, 0
            if self.sparsity is None:
                sparsity = np.arange(
                    force_min_sparsity,
                    default_max_sparsity + 1,
                    dtype="int32",
                )
            else:
                sparsity = np.unique(np.array(self.sparsity, dtype="int32"))
                if sparsity.size == 0:
                    raise ValueError("Sparsity should not be empty.")
                if sparsity[0] < force_min_sparsity or sparsity[-1] > group_num:
                    raise ValueError("There is an invalid sparsity.")
        elif self.path_type == "gs":
            if len(layers) > 0:
                raise ValueError(
                    "The path_type should be 'seq' when the layers are specified."
                )
            path_type = 2
            sparsity = np.array([0], dtype="int32")
            if self.gs_lower_bound is None:
                gs_lower_bound = force_min_sparsity
            else:
                BaseSolver._check_non_negative_integer(
                    self.gs_lower_bound, "gs_lower_bound"
                )
                gs_lower_bound = self.gs_lower_bound

            if self.gs_upper_bound is None:
                gs_upper_bound = default_max_sparsity
            else:
                BaseSolver._check_non_negative_integer(
                    self.gs_upper_bound, "gs_upper_bound"
                )
                gs_upper_bound = self.gs_upper_bound

            if gs_lower_bound < force_min_sparsity or gs_upper_bound > group_num:
                raise ValueError(
                    "gs_lower_bound and gs_upper_bound should be between 0 and dimensionality."
                )
            if gs_lower_bound > gs_upper_bound:
                raise ValueError("gs_upper_bound should be larger than gs_lower_bound.")
        else:
            raise ValueError("path_type should be 'seq' or 'gs'")

        # screening_size
        if len(layers) > 0 and self.screening_size != -1:
            raise ValueError(
                "The screening_size should be -1 when the layers are specified."
            )
        if self.screening_size == -1:
            screening_size = -1
        elif self.screening_size == 0:
            screening_size = max(sparsity[-1], gs_upper_bound, default_max_sparsity)
        else:
            screening_size = self.screening_size
            if screening_size > group_num or screening_size < max(
                sparsity[-1], gs_upper_bound
            ):
                raise ValueError(
                    "screening_size should be between sparsity and dimensionality."
                )

        # thread
        BaseSolver._check_non_negative_integer(self.thread, "thread")

        # splicing_type
        if self.splicing_type == "halve":
            splicing_type = 0
        elif self.splicing_type == "taper":
            splicing_type = 1
        else:
            raise ValueError("splicing_type should be 'halve' or 'taper'.")

        # important_search
        BaseSolver._check_non_negative_integer(
            self.important_search, "important_search"
        )

        # cv
        BaseSolver._check_positive_integer(self.cv, "cv")
        if self.cv > n:
            raise ValueError("cv should not be greater than sample_size.")
        if self.cv > 1:
            ic_method = utilities.AIC if self.ic_method is None else self.ic_method
            if self.split_method is None:
                raise ValueError("split_method should be provided when cv > 1.")
            self.model.set_slice_by_sample(self.split_method)
            self.model.set_deleter(self.deleter)
            if self.cv_fold_id is None:
                kf = KFold(
                    n_splits=self.cv, shuffle=True, random_state=self.random_state
                ).split(np.zeros(n))

                self.cv_fold_id = np.zeros(n)
                for i, (_, fold_id) in enumerate(kf):
                    self.cv_fold_id[fold_id] = i
            else:
                self.cv_fold_id = np.array(self.cv_fold_id, dtype="int32")
                if self.cv_fold_id.ndim > 1:
                    raise ValueError("cv_fold_id should be an 1D array of integers.")
                if self.cv_fold_id.size != n:
                    raise ValueError(
                        "The length of cv_fold_id should be equal to sample_size."
                    )
                if len(set(self.cv_fold_id)) != self.cv:
                    raise ValueError(
                        "The number of different elements in cv_fold_id should be equal to cv."
                    )
        else:
            self.cv_fold_id = np.array([], dtype="int32")
            if sparsity.size == 1 and self.ic_method is None:
                ic_method = utilities.AIC
            elif sparsity.size > 1 and self.ic_method is None:
                raise ValueError(
                    "ic_method should be provided for choosing sparsity level with information criterion."
                )
            elif self.sample_size <= 1:
                raise ValueError("sample_size should be given when using ic_method.")
            else:
                ic_method = self.ic_method
        if gradient is None and len(layers) > 0:
            if len(layers) == 1:
                assert layers[0].out_features == self.dimensionality
            else:
                for i in range(len(layers) - 1):
                    assert layers[i].out_features == layers[i + 1].in_features
                assert layers[-1].out_features == self.dimensionality
            loss_fn = self.__set_objective_py(objective, None, None, jit, layers)
            p = layers[0].in_features
            for layer in layers[::-1]:
                sparsity = layer.transform_sparsity(sparsity)
                group = layer.transform_group(group)
                preselect = layer.transform_preselect(preselect)
        else:
            # set optimization objective
            if cpp:
                loss_fn = self.__set_objective_cpp(objective, gradient, hessian)
            else:
                loss_fn = self.__set_objective_py(objective, gradient, hessian, jit)

        # init_support_set
        if init_support_set is None:
            init_support_set = np.array([], dtype="int32")
        else:
            init_support_set = np.array(init_support_set, dtype="int32")
            if init_support_set.ndim > 1:
                raise ValueError(
                    "The initial active set should be an 1D array of integers."
                )
            if init_support_set.min() < 0 or init_support_set.max() >= p:
                raise ValueError("init_support_set contains wrong index.")

        # init_params
        if init_params is None:
            random_init = False
            if len(layers) > 0:
                random_init = np.any(
                    np.array([layer.random_initilization for layer in layers])
                )
            if random_init:
                init_params = np.random.RandomState(self.random_state).randn(p)
            else:
                init_params = np.zeros(p, dtype=float)
        else:
            init_params = np.array(init_params, dtype=float)
            if init_params.shape != (p,):
                raise ValueError(
                    "The length of init_params should be equal to dimensionality."
                )

        result = _scope.pywrap_Universal(
            data,
            self.model,
            self.numeric_solver,
            p,
            n,
            0,
            self.max_iter,
            self.max_exchange_num,
            path_type,
            self.greedy,
            self.use_hessian,
            self.is_dynamic_max_exchange_num,
            self.warm_start,
            ic_method,
            self.cv,
            sparsity,
            np.array([0.0]),
            gs_lower_bound,
            gs_upper_bound,
            screening_size,
            group,
            preselect,
            self.thread,
            splicing_type,
            self.important_search,
            self.cv_fold_id,
            init_support_set,
            init_params,
            np.zeros(0),
        )

        self.params = np.array(result[0])
        self.objective_value = loss_fn(self.params, data)
        if len(layers) > 0:
            for layer in layers:
                self.params = layer.transform_params(self.params)

        self.support_set = np.sort(np.nonzero(self.params)[0])
        self.information_criterion = result[3]
        self.cross_validation_loss = result[2] if self.cv > 1 else None

        return self.params



[docs]
    def get_result(self):
        r"""
        Get the result of optimization.

        Returns
        -------
        results : dict
            The result of optimization, including the following keys:

            + ``params`` : array of shape (dimensionality,)
                The optimal parameters.
            + ``support_set`` : array of int
                The support set of the optimal parameters.
            + ``objective_value`` : float
                The value of objective function at the optimal parameters.
            + ``information_criterion`` : float
                The value of information criterion.
            + ``cross_validation_loss`` : float
                The mean loss of cross-validation.
        """
        return {
            "params": self.params,
            "support_set": self.support_set,
            "objective_value": self.objective_value,
            "information_criterion": self.information_criterion,
            "cross_validation_loss": self.cross_validation_loss,
        }


    def __set_objective_cpp(self, objective, gradient, hessian):
        self.model.set_loss_of_model(lambda params, data: objective(params, data))
        if gradient is None:
            self.model.set_gradient_autodiff(objective)
        else:
            self.model.set_gradient_user_defined(
                lambda params, data: (objective(params, data), gradient(params, data))
            )
        if hessian is None:
            self.model.set_hessian_autodiff(objective)
        else:
            # NOTE: Perfect Forwarding of grad and hess is neccessary for func
            # written in Pybind11_Cpp code
            self.model.set_hessian_user_defined(
                lambda params, data: hessian(params, data)
            )
        return objective

    def __set_objective_py(self, objective, gradient, hessian, jit, layers=[]):
        loss_, grad_ = BaseSolver._set_objective(objective, gradient, jit, layers)

        # hess
        if hessian is None:

            def hess_(params, data):
                return jax.hessian(loss_)(params, data)

        elif hessian.__code__.co_argcount == 1:

            def hess_(params, data):
                return hessian(params)

        else:

            def hess_(params, data):
                return hessian(params, data)

        if jit:
            hess_ = jax.jit(hess_)

        def loss_fn(params, data):
            value = loss_(params, data)
            if not np.isfinite(value):
                raise ValueError("The objective function returned {}.".format(value))
            if isinstance(value, float):
                return value
            return value.item()

        def value_and_grad(params, data):
            value, grad = grad_(params, data)
            if not np.isfinite(value):
                raise ValueError("The objective function returned {}.".format(value))
            if not np.all(np.isfinite(grad)):
                raise ValueError("The gradient returned contains NaN or Inf.")
            if isinstance(value, float):
                return value, np.array(grad)
            return value.item(), np.array(grad)

        def hess_fn(params, data):
            h = np.array(hess_(jnp.array(params), data))
            if not np.all(np.isfinite(h)):
                raise ValueError("The hessian returned contains NaN or Inf.")
            return h

        self.model.set_loss_of_model(loss_fn)
        self.model.set_gradient_user_defined(value_and_grad)
        self.model.set_hessian_user_defined(hess_fn)

        return loss_fn




[docs]
class HTPSolver(BaseSolver):
    r"""
    Get sparse optimal solution of convex objective function by Gradient Hard Thresholding Pursuit (GraHTP) algorithm.
    Specifically, ``HTPSolver`` aims to tackle this problem: :math:`\min_{x \in R^p} f(x) \text{ s.t. } ||x||_0 \leq s`, where :math:`f(x)` is a convex objective function and :math:`s` is the sparsity level. Each element of :math:`x` can be seen as a variable, and the nonzero elements of :math:`x` are the selected variables.

    Parameters
    ----------
    dimensionality : int
        Dimension of the optimization problem, which is also the total number of variables that will be considered to select or not, denoted as :math:`p`.
    sparsity : int or array of int, optional
        The sparsity level, which is the number of nonzero elements of the optimal solution, denoted as :math:`s`.
        Default is ``range(int(p/log(log(p))/log(p)))``.
    sample_size : int, default=1
        Sample size, denoted as :math:`n`.
    preselect : array of int, default=[]
        An array contains the indexes of variables which must be selected.
    step_size : float, default=0.005
        Step size of gradient descent.
    numeric_solver : callable, optional
        A solver for the convex optimization problem. ``HTPSolver`` will call this function to solve the convex optimization problem in each iteration.
        It should have the same interface as ``skscope.convex_solver_nlopt``.
    max_iter : int, default=100
        Maximum number of iterations taken for converging.
    group : array of shape (dimensionality,), default=range(dimensionality)
        The group index for each variable, and it must be an incremental integer array starting from 0 without gap.
        The variables in the same group must be adjacent, and they will be selected together or not.
        Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap).
        It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables,
        and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected.
    ic_method : callable, optional
        A function to calculate the information criterion for choosing the sparsity level.
        ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size.
        Used only when ``sparsity`` is array and ``cv`` is 1.
        Note that ``sample_size`` must be given when using ``ic_method``.
    cv : int, default=1
        The folds number when use the cross-validation method.
        - If ``cv`` = 1, the sparsity level will be chosen by the information criterion.
        - If ``cv`` > 1, the sparsity level will be chosen by the cross-validation method.
    split_method : callable, optional
        A function to get the part of data used in each fold of cross-validation.
        Its interface should be ``(data, index) -> part_data`` where ``index`` is an array of int.
    cv_fold_id : array of shape (sample_size,), optional
        An array indicates different folds in CV, which samples in the same fold should be given the same number.
        The number of different elements should be equal to ``cv``.
        Used only when `cv` > 1.
    random_state : int, optional
        The random seed used for cross-validation.

    Attributes
    ----------
    params : array of shape(dimensionality,)
        The sparse optimal solution.
    objective_value: float
        The value of objective function on the solution.
    support_set : array of int
        The indices of selected variables, sorted in ascending order.

    References
    ----------
    Yuan X T, Li P, Zhang T. Gradient Hard Thresholding Pursuit[J]. J. Mach. Learn. Res., 2017, 18(1): 6027-6069.

    """

    def __init__(
        self,
        dimensionality,
        sparsity=None,
        sample_size=1,
        *,
        preselect=[],
        step_size=0.005,
        numeric_solver=convex_solver_nlopt,
        max_iter=100,
        group=None,
        ic_method=None,
        cv=1,
        cv_fold_id=None,
        split_method=None,
        random_state=None,
    ):
        super().__init__(
            dimensionality=dimensionality,
            sparsity=sparsity,
            sample_size=sample_size,
            preselect=preselect,
            numeric_solver=numeric_solver,
            max_iter=max_iter,
            group=group,
            ic_method=ic_method,
            cv=cv,
            cv_fold_id=cv_fold_id,
            split_method=split_method,
            random_state=random_state,
        )
        self.step_size = step_size

    def _solve(
        self,
        sparsity,
        loss_fn,
        value_and_grad,
        init_support_set,
        init_params,
        data,
        preselect,
        group,
    ):
        if sparsity <= preselect.size:
            return super()._solve(
                sparsity,
                loss_fn,
                value_and_grad,
                init_support_set,
                init_params,
                data,
                preselect,
                group,
            )
        # init
        params = init_params
        best_suppport_group_tuple = None
        best_loss = np.inf
        results = {}  # key: tuple of ordered support set, value: params
        group_num = len(np.unique(group))
        group_indices = [np.where(group == i)[0] for i in range(group_num)]

        for n_iters in range(self.max_iter):
            # S1: gradient descent
            params_bias = params - self.step_size * value_and_grad(params, data)[1]
            # S2: Gradient Hard Thresholding
            score = np.array(
                [
                    np.sum(np.square(params_bias[group_indices[i]]))
                    for i in range(group_num)
                ]
            )
            score[preselect] = np.inf
            support_new_group = np.argpartition(score, -sparsity)[-sparsity:]
            support_new_group_tuple = tuple(np.sort(support_new_group))
            # terminating condition
            if support_new_group_tuple in results:
                break
            # S3: debise
            params = np.zeros_like(init_params)
            support_new = np.concatenate([group_indices[i] for i in support_new_group])
            params[support_new] = params_bias[support_new]
            loss, params = self._numeric_solver(
                loss_fn, value_and_grad, params, support_new, data
            )
            # update cache
            if loss < best_loss:
                best_loss = loss
                best_suppport_group_tuple = support_new_group_tuple
            results[support_new_group_tuple] = params

        self.n_iters = n_iters
        return results[best_suppport_group_tuple], np.concatenate(
            [group_indices[i] for i in best_suppport_group_tuple]
        )




[docs]
class IHTSolver(HTPSolver):
    r"""
    Get sparse optimal solution of convex objective function by Iterative Hard Thresholding (IHT) algorithm.
    Specifically, ``IHTSolver`` aims to tackle this problem: :math:`\min_{x \in R^p} f(x) \text{ s.t. } ||x||_0 \leq s`, where :math:`f(x)` is a convex objective function and :math:`s` is the sparsity level. Each element of :math:`x` can be seen as a variable, and the nonzero elements of :math:`x` are the selected variables.

    Parameters
    ----------
    dimensionality : int
        Dimension of the optimization problem, which is also the total number of variables that will be considered to select or not, denoted as :math:`p`.
    sparsity : int or array of int, optional
        The sparsity level, which is the number of nonzero elements of the optimal solution, denoted as :math:`s`.
        Default is ``range(int(p/log(log(p))/log(p)))``.
    sample_size : int, default=1
        Sample size, denoted as :math:`n`.
    preselect : array of int, default=[]
        An array contains the indexes of variables which must be selected.
    step_size : float, default=0.005
        Step size of gradient descent.
    numeric_solver : callable, optional
        A solver for the convex optimization problem. ``IHTSolver`` will call this function to solve the convex optimization problem in each iteration.
        It should have the same interface as ``skscope.convex_solver_nlopt``.
    max_iter : int, default=100
        Maximum number of iterations taken for converging.
    group : array of shape (dimensionality,), default=range(dimensionality)
        The group index for each variable, and it must be an incremental integer array starting from 0 without gap.
        The variables in the same group must be adjacent, and they will be selected together or not.
        Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap).
        It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables,
        and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected.
    ic_method : callable, optional
        A function to calculate the information criterion for choosing the sparsity level.
        ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size.
        Used only when ``sparsity`` is array and ``cv`` is 1.
        Note that ``sample_size`` must be given when using ``ic_method``.
    cv : int, default=1
        The folds number when use the cross-validation method.
        - If ``cv`` = 1, the sparsity level will be chosen by the information criterion.
        - If ``cv`` > 1, the sparsity level will be chosen by the cross-validation method.
    split_method : callable, optional
        A function to get the part of data used in each fold of cross-validation.
        Its interface should be ``(data, index) -> part_data`` where ``index`` is an array of int.
    cv_fold_id : array of shape (sample_size,), optional
        An array indicates different folds in CV, which samples in the same fold should be given the same number.
        The number of different elements should be equal to ``cv``.
        Used only when `cv` > 1.
    random_state : int, optional
        The random seed used for cross-validation.

    Attributes
    ----------
    params : array of shape(dimensionality,)
        The sparse optimal solution.
    objective_value: float
        The value of objective function on the solution.
    support_set : array of int
        The indices of selected variables, sorted in ascending order.

    References
    ----------
    Yuan X T, Li P, Zhang T. Gradient Hard Thresholding Pursuit[J]. J. Mach. Learn. Res., 2017, 18(1): 6027-6069.

    """

    def _solve(
        self,
        sparsity,
        loss_fn,
        value_and_grad,
        init_support_set,
        init_params,
        data,
        preselect,
        group,
    ):
        if sparsity <= preselect.size:
            return super()._solve(
                sparsity,
                loss_fn,
                value_and_grad,
                init_support_set,
                init_params,
                data,
                preselect,
                group,
            )
        # init
        params = init_params
        support_old_group = np.array([], dtype="int32")
        group_num = len(np.unique(group))
        group_indices = [np.where(group == i)[0] for i in range(group_num)]

        for n_iters in range(self.max_iter):
            # S1: gradient descent
            params_bias = params - self.step_size * value_and_grad(params, data)[1]
            # S2: Gradient Hard Thresholding
            score = np.array(
                [
                    np.sum(np.square(params_bias[group_indices[i]]))
                    for i in range(group_num)
                ]
            )
            score[preselect] = np.inf
            support_new_group = np.argpartition(score, -sparsity)[-sparsity:]
            # terminating condition
            if np.all(set(support_old_group) == set(support_new_group)):
                break
            else:
                support_old_group = support_new_group
            # S3: debise
            params = np.zeros_like(init_params)
            support_new = np.concatenate([group_indices[i] for i in support_new_group])
            params[support_new] = params_bias[support_new]

        # final optimization for IHT
        _, params = self._numeric_solver(
            loss_fn, value_and_grad, params, support_new, data
        )
        self.n_iters = n_iters
        return params, support_new




[docs]
class GraspSolver(BaseSolver):
    r"""
    Get sparse optimal solution of convex objective function by Gradient Support Pursuit (GraSP) algorithm.
    Specifically, ``GraspSolver`` aims to tackle this problem: :math:`\min_{x \in R^p} f(x) \text{ s.t. } ||x||_0 \leq s`, where :math:`f(x)` is a convex objective function and :math:`s` is the sparsity level. Each element of :math:`x` can be seen as a variable, and the nonzero elements of :math:`x` are the selected variables.


    Parameters
    ----------
    dimensionality : int
        Dimension of the optimization problem, which is also the total number of variables that will be considered to select or not, denoted as :math:`p`.
    sparsity : int or array of int, optional
        The sparsity level, which is the number of nonzero elements of the optimal solution, denoted as :math:`s`.
        Default is ``range(int(p/log(log(p))/log(p)))``.
    sample_size : int, default=1
        Sample size, denoted as :math:`n`.
    preselect : array of int, default=[]
        An array contains the indexes of variables which must be selected.
    numeric_solver : callable, optional
        A solver for the convex optimization problem. ``GraspSolver`` will call this function to solve the convex optimization problem in each iteration.
        It should have the same interface as ``skscope.convex_solver_nlopt``.
    max_iter : int, default=100
        Maximum number of iterations taken for converging.
    group : array of shape (dimensionality,), default=range(dimensionality)
        The group index for each variable, and it must be an incremental integer array starting from 0 without gap.
        The variables in the same group must be adjacent, and they will be selected together or not.
        Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap).
        It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables,
        and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected.
    ic_method : callable, optional
        A function to calculate the information criterion for choosing the sparsity level.
        ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size.
        Used only when ``sparsity`` is array and ``cv`` is 1.
        Note that ``sample_size`` must be given when using ``ic_method``.
    cv : int, default=1
        The folds number when use the cross-validation method.
        - If ``cv`` = 1, the sparsity level will be chosen by the information criterion.
        - If ``cv`` > 1, the sparsity level will be chosen by the cross-validation method.
    split_method : callable, optional
        A function to get the part of data used in each fold of cross-validation.
        Its interface should be ``(data, index) -> part_data`` where ``index`` is an array of int.
    cv_fold_id : array of shape (sample_size,), optional
        An array indicates different folds in CV, which samples in the same fold should be given the same number.
        The number of different elements should be equal to ``cv``.
        Used only when `cv` > 1.
    random_state : int, optional
        The random seed used for cross-validation.

    Attributes
    ----------
    params : array of shape(dimensionality,)
        The sparse optimal solution.
    objective_value: float
        The value of objective function on the solution.
    support_set : array of int
        The indices of selected variables, sorted in ascending order.

    References
    ----------
    Bahmani S, Raj B, Boufounos P T. Greedy sparsity-constrained optimization[J]. The Journal of Machine Learning Research, 2013, 14(1): 807-841.
    """

    def _solve(
        self,
        sparsity,
        loss_fn,
        value_and_grad,
        init_support_set,
        init_params,
        data,
        preselect,
        group,
    ):
        if sparsity <= preselect.size:
            return super()._solve(
                sparsity,
                loss_fn,
                value_and_grad,
                init_support_set,
                init_params,
                data,
                preselect,
                group,
            )
        # init
        params = init_params
        best_suppport_tuple = None
        best_loss = np.inf
        results = {}  # key: tuple of ordered support set, value: params
        group_num = len(np.unique(group))
        group_indices = [np.where(group == i)[0] for i in range(group_num)]

        for n_iters in range(self.max_iter):
            # compute local gradient
            grad_values = value_and_grad(params, data)[1]
            score = np.array(
                [
                    np.sum(np.square(grad_values[group_indices[i]]))
                    for i in range(group_num)
                ]
            )
            score[preselect] = np.inf

            # identify directions
            if 2 * sparsity < group_num:
                Omega = [
                    idx
                    for idx in np.argpartition(score, -2 * sparsity)[-2 * sparsity :]
                    if score[idx] != 0.0
                ]  # supp of top 2k largest absolute values of gradient
            else:
                Omega = np.nonzero(score)[0]  # supp(z)

            # merge supports
            support_new = np.concatenate([group_indices[i] for i in Omega])
            support_new = np.unique(np.append(support_new, params.nonzero()[0]))
            suppport_tuple = tuple(np.sort(support_new))

            # terminating condition
            if suppport_tuple in results:
                break

            # minimize
            params_bias = np.zeros_like(init_params)
            params_bias[support_new] = params[support_new]
            _, params_bias = self._numeric_solver(
                loss_fn, value_and_grad, params_bias, support_new, data
            )

            # prune estimate
            score = np.array(
                [
                    np.sum(np.square(params_bias[group_indices[i]]))
                    for i in range(group_num)
                ]
            )
            score[preselect] = np.inf
            support_set_group = np.argpartition(score, -sparsity)[-sparsity:]
            support_set = np.concatenate([group_indices[i] for i in support_set_group])
            params = np.zeros_like(init_params)
            params[support_set] = params_bias[support_set]

            # update cache
            loss = loss_fn(params, data)
            if loss < best_loss:
                best_loss = loss
                best_suppport_tuple = suppport_tuple
            results[suppport_tuple] = params

        self.n_iters = n_iters
        return results[best_suppport_tuple], np.nonzero(results[best_suppport_tuple])[0]




[docs]
class FobaSolver(BaseSolver):
    r"""
    Get sparse optimal solution of convex objective function by Forward-Backward greedy (FoBa) algorithm.
    Specifically, ``FobaSolver`` aims to tackle this problem: :math:`\min_{x \in R^p} f(x) \text{ s.t. } ||x||_0 \leq s`, where :math:`f(x)` is a convex objective function and :math:`s` is the sparsity level. Each element of :math:`x` can be seen as a variable, and the nonzero elements of :math:`x` are the selected variables.


    Parameters
    ----------
    dimensionality : int
        Dimension of the optimization problem, which is also the total number of variables that will be considered to select or not, denoted as :math:`p`.
    sparsity : int or array of int, optional
        The sparsity level, which is the number of nonzero elements of the optimal solution, denoted as :math:`s`.
        Default is ``range(int(p/log(log(p))/log(p)))``.
    sample_size : int, default=1
        Sample size, denoted as :math:`n`.
    use_gradient : bool, default=True
        Whether to use gradient information to metric the importance of variables or not.
        Using gradient information will accelerate the algorithm but the solution may be not accurate.
    threshold : float, default=0.0
        The threshold to determine whether a variable is selected or not.
    foba_threshold_ratio : float, default=0.5
        The threshold for determining whether a variable is deleted or not will be set to ``threshold`` * ``foba_threshold_ratio``.
    strict_sparsity : bool, default=True
        Whether to strictly control the sparsity level to be ``sparsity`` or not.
    preselect : array of int, default=[]
        An array contains the indexes of variables which must be selected.
    numeric_solver : callable, optional
        A solver for the convex optimization problem. ``FobaSolver`` will call this function to solve the convex optimization problem in each iteration.
        It should have the same interface as ``skscope.convex_solver_nlopt``.
    max_iter : int, default=100
        Maximum number of iterations taken for converging.
    group : array of shape (dimensionality,), default=range(dimensionality)
        The group index for each variable, and it must be an incremental integer array starting from 0 without gap.
        The variables in the same group must be adjacent, and they will be selected together or not.
        Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap).
        It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables,
        and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected.
    ic_method : callable, optional
        A function to calculate the information criterion for choosing the sparsity level.
        ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size.
        Used only when ``sparsity`` is array and ``cv`` is 1.
        Note that ``sample_size`` must be given when using ``ic_method``.
    cv : int, default=1
        The folds number when use the cross-validation method.
        - If ``cv`` = 1, the sparsity level will be chosen by the information criterion.
        - If ``cv`` > 1, the sparsity level will be chosen by the cross-validation method.
    split_method : callable, optional
        A function to get the part of data used in each fold of cross-validation.
        Its interface should be ``(data, index) -> part_data`` where ``index`` is an array of int.
    cv_fold_id : array of shape (sample_size,), optional
        An array indicates different folds in CV, which samples in the same fold should be given the same number.
        The number of different elements should be equal to ``cv``.
        Used only when `cv` > 1.
    random_state : int, optional
        The random seed used for cross-validation.

    Attributes
    ----------
    params : array of shape(dimensionality,)
        The sparse optimal solution.
    objective_value: float
        The value of objective function on the solution.
    support_set : array of int
        The indices of selected variables, sorted in ascending order.

    References
    ----------
    Liu J, Ye J, Fujimaki R. Forward-backward greedy algorithms for general convex smooth functions over a cardinality constraint[C]//International Conference on Machine Learning. PMLR, 2014: 503-511.
    """

    def __init__(
        self,
        dimensionality,
        sparsity=None,
        sample_size=1,
        *,
        use_gradient=True,
        threshold=0.0,
        foba_threshold_ratio=0.5,
        strict_sparsity=True,
        preselect=[],
        numeric_solver=convex_solver_nlopt,
        max_iter=100,
        group=None,
        ic_method=None,
        cv=1,
        cv_fold_id=None,
        split_method=None,
        random_state=None,
    ):
        super().__init__(
            dimensionality=dimensionality,
            sparsity=sparsity,
            sample_size=sample_size,
            preselect=preselect,
            numeric_solver=numeric_solver,
            max_iter=max_iter,
            group=group,
            ic_method=ic_method,
            cv=cv,
            cv_fold_id=cv_fold_id,
            split_method=split_method,
            random_state=random_state,
        )
        self.threshold = threshold
        self.use_gradient = use_gradient
        self.foba_threshold_ratio = foba_threshold_ratio
        self.strict_sparsity = strict_sparsity

    def _forward_step(
        self, loss_fn, value_and_grad, params, support_set_group, data, group_indices
    ):
        if self.use_gradient:
            # FoBa-gdt algorithm
            value_old, grad = value_and_grad(params, data)
            score = np.array(
                [
                    np.sum(np.square(grad[group_indices[i]]))
                    for i in range(len(group_indices))
                ]
            )
            score[support_set_group] = -np.inf
        else:
            # FoBa-obj algorithm
            value_old = loss_fn(params, data)
            score = np.empty(len(group_indices), dtype=float)
            for idx in range(len(group_indices)):
                if idx in support_set_group:
                    score[idx] = -np.inf
                    continue
                cache_param = params[group_indices[idx]]
                score[idx] = (
                    value_old
                    - self._numeric_solver(
                        loss_fn,
                        value_and_grad,
                        params,
                        group_indices[idx],
                        data=data,
                    )[0]
                )
                params[group_indices[idx]] = cache_param

        direction = np.argmax(score)
        if score[direction] < self.threshold:
            return params, support_set_group, -1.0
        support_set_group = np.append(support_set_group, direction)

        inactive_set = np.ones_like(params, dtype=bool)
        support_set = np.concatenate([group_indices[i] for i in support_set_group])
        inactive_set[support_set] = False
        params[inactive_set] = 0.0
        value_new, params = self._numeric_solver(
            loss_fn,
            value_and_grad,
            params,
            support_set,
            data=data,
        )

        return params, support_set_group, value_old - value_new

    def _backward_step(
        self,
        loss_fn,
        value_and_grad,
        params,
        support_set_group,
        data,
        backward_threshold,
        group_indices,
    ):
        score = np.empty(len(group_indices), dtype=float)
        for idx in range(len(group_indices)):
            if idx not in support_set_group or idx in self.preselect:
                score[idx] = np.inf
                continue
            cache_param = params[group_indices[idx]]
            params[group_indices[idx]] = 0.0
            score[idx] = loss_fn(params, data)
            params[group_indices[idx]] = cache_param
        direction = np.argmin(score)
        if score[direction] >= backward_threshold:
            return params, support_set_group, False

        support_set_group = np.delete(
            support_set_group, np.argwhere(support_set_group == direction)
        )

        inactive_set = np.ones_like(params, dtype=bool)
        support_set = np.concatenate([group_indices[i] for i in support_set_group])
        inactive_set[support_set] = False
        params[inactive_set] = 0.0
        _, params = self._numeric_solver(
            loss_fn,
            value_and_grad,
            params,
            support_set,
            data=data,
        )

        return params, support_set_group, True

    def _solve(
        self,
        sparsity,
        loss_fn,
        value_and_grad,
        init_support_set,
        init_params,
        data,
        preselect,
        group,
    ):
        if sparsity <= preselect.size:
            return super()._solve(
                sparsity,
                loss_fn,
                value_and_grad,
                init_support_set,
                init_params,
                data,
                preselect,
                group,
            )
        # init
        params = np.zeros_like(init_params)
        support_set_group = preselect
        threshold = {}
        group_num = len(np.unique(group))
        group_indices = [np.where(group == i)[0] for i in range(group_num)]

        for n_iters in range(self.max_iter):
            if support_set_group.size >= min(2 * sparsity, group_num):
                break
            params, support_set_group, backward_threshold = self._forward_step(
                loss_fn, value_and_grad, params, support_set_group, data, group_indices
            )
            if backward_threshold < 0:
                break
            threshold[support_set_group.size] = backward_threshold

            while support_set_group.size > preselect.size:
                params, support_set_group, success = self._backward_step(
                    loss_fn,
                    value_and_grad,
                    params,
                    support_set_group,
                    data,
                    loss_fn(params, data)
                    + threshold[support_set_group.size] * self.foba_threshold_ratio,
                    group_indices,
                )
                if not success:
                    break

        if self.strict_sparsity:
            while support_set_group.size > sparsity:
                params, support_set_group, _ = self._backward_step(
                    loss_fn,
                    value_and_grad,
                    params,
                    support_set_group,
                    data,
                    np.inf,
                    group_indices,
                )
            while support_set_group.size < sparsity:
                params, support_set_group, _ = self._forward_step(
                    loss_fn,
                    value_and_grad,
                    params,
                    support_set_group,
                    data,
                    group_indices,
                )

        return params, np.concatenate([group_indices[i] for i in support_set_group])




[docs]
class ForwardSolver(FobaSolver):
    r"""
    Get sparse optimal solution of convex objective function by Forward Selection algorithm.
    Specifically, ``ForwardSolver`` aims to tackle this problem: :math:`\min_{x \in R^p} f(x) \text{ s.t. } ||x||_0 \leq s`, where :math:`f(x)` is a convex objective function and :math:`s` is the sparsity level. Each element of :math:`x` can be seen as a variable, and the nonzero elements of :math:`x` are the selected variables.


    Parameters
    ----------
    dimensionality : int
        Dimension of the optimization problem, which is also the total number of variables that will be considered to select or not, denoted as :math:`p`.
    sparsity : int or array of int, optional
        The sparsity level, which is the number of nonzero elements of the optimal solution, denoted as :math:`s`.
        Default is ``range(int(p/log(log(p))/log(p)))``.
    sample_size : int, default=1
        Sample size, denoted as :math:`n`.
    threshold : float, default=0.0
        The threshold to determine whether a variable is selected or not.
    strict_sparsity : bool, default=True
        Whether to strictly control the sparsity level to be ``sparsity`` or not.
    preselect : array of int, default=[]
        An array contains the indexes of variables which must be selected.
    numeric_solver : callable, optional
        A solver for the convex optimization problem. ``ForwardSolver`` will call this function to solve the convex optimization problem in each iteration.
        It should have the same interface as ``skscope.convex_solver_nlopt``.
    max_iter : int, default=100
        Maximum number of iterations taken for converging.
    group : array of shape (dimensionality,), default=range(dimensionality)
        The group index for each variable, and it must be an incremental integer array starting from 0 without gap.
        The variables in the same group must be adjacent, and they will be selected together or not.
        Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap).
        It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables,
        and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected.
    ic_method : callable, optional
        A function to calculate the information criterion for choosing the sparsity level.
        ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size.
        Used only when ``sparsity`` is array and ``cv`` is 1.
        Note that ``sample_size`` must be given when using ``ic_method``.
    cv : int, default=1
        The folds number when use the cross-validation method.
        - If ``cv`` = 1, the sparsity level will be chosen by the information criterion.
        - If ``cv`` > 1, the sparsity level will be chosen by the cross-validation method.
    split_method : callable, optional
        A function to get the part of data used in each fold of cross-validation.
        Its interface should be ``(data, index) -> part_data`` where ``index`` is an array of int.
    cv_fold_id : array of shape (sample_size,), optional
        An array indicates different folds in CV, which samples in the same fold should be given the same number.
        The number of different elements should be equal to ``cv``.
        Used only when `cv` > 1.
    random_state : int, optional
        The random seed used for cross-validation.

    Attributes
    ----------
    params : array of shape(dimensionality,)
        The sparse optimal solution.
    objective_value: float
        The value of objective function on the solution.
    support_set : array of int
        The indices of selected variables, sorted in ascending order.

    """

    def __init__(
        self,
        dimensionality,
        sparsity=None,
        sample_size=1,
        *,
        threshold=0.0,
        strict_sparsity=True,
        preselect=[],
        numeric_solver=convex_solver_nlopt,
        max_iter=100,
        group=None,
        ic_method=None,
        cv=1,
        cv_fold_id=None,
        split_method=None,
        random_state=None,
    ):
        super().__init__(
            dimensionality=dimensionality,
            sparsity=sparsity,
            sample_size=sample_size,
            preselect=preselect,
            use_gradient=False,
            strict_sparsity=strict_sparsity,
            threshold=threshold,
            foba_threshold_ratio=0.5,
            numeric_solver=numeric_solver,
            max_iter=max_iter,
            group=group,
            ic_method=ic_method,
            cv=cv,
            cv_fold_id=cv_fold_id,
            split_method=split_method,
            random_state=random_state,
        )

    def _solve(
        self,
        sparsity,
        loss_fn,
        value_and_grad,
        init_support_set,
        init_params,
        data,
        preselect,
        group,
    ):
        if sparsity <= preselect.size:
            return super()._solve(
                sparsity,
                loss_fn,
                value_and_grad,
                init_support_set,
                init_params,
                data,
                preselect,
                group,
            )
        # init
        params = np.zeros_like(init_params)
        support_set_group = preselect
        group_num = len(np.unique(group))
        group_indices = [np.where(group == i)[0] for i in range(group_num)]

        for iter in range(sparsity - support_set_group.size):
            params, support_set_group, backward_threshold = self._forward_step(
                loss_fn, value_and_grad, params, support_set_group, data, group_indices
            )
            if backward_threshold < 0.0:
                break

        if self.strict_sparsity:
            while support_set_group.size < sparsity:
                params, support_set_group, _ = self._forward_step(
                    loss_fn,
                    value_and_grad,
                    params,
                    support_set_group,
                    data,
                    group_indices,
                )

        return params, np.concatenate([group_indices[i] for i in support_set_group])




[docs]
class OMPSolver(ForwardSolver):
    r"""
    Get sparse optimal solution of convex objective function by Orthogonal Matching Pursuit (OMP) algorithm.
    Specifically, ``OMPSolver`` aims to tackle this problem: :math:`\min_{x \in R^p} f(x) \text{ s.t. } ||x||_0 \leq s`, where :math:`f(x)` is a convex objective function and :math:`s` is the sparsity level. Each element of :math:`x` can be seen as a variable, and the nonzero elements of :math:`x` are the selected variables.


    Parameters
    ----------
    dimensionality : int
        Dimension of the optimization problem, which is also the total number of variables that will be considered to select or not, denoted as :math:`p`.
    sparsity : int or array of int, optional
        The sparsity level, which is the number of nonzero elements of the optimal solution, denoted as :math:`s`.
        Default is ``range(int(p/log(log(p))/log(p)))``.
    sample_size : int, default=1
        Sample size, denoted as :math:`n`.
    threshold : float, default=0.0
        The threshold to determine whether a variable is selected or not.
    strict_sparsity : bool, default=True
        Whether to strictly control the sparsity level to be ``sparsity`` or not.
    preselect : array of int, default=[]
        An array contains the indexes of variables which must be selected.
    numeric_solver : callable, optional
        A solver for the convex optimization problem. ``OMPSolver`` will call this function to solve the convex optimization problem in each iteration.
        It should have the same interface as ``skscope.convex_solver_nlopt``.
    max_iter : int, default=100
        Maximum number of iterations taken for converging.
    group : array of shape (dimensionality,), default=range(dimensionality)
        The group index for each variable, and it must be an incremental integer array starting from 0 without gap.
        The variables in the same group must be adjacent, and they will be selected together or not.
        Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap).
        It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables,
        and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected.
    ic_method : callable, optional
        A function to calculate the information criterion for choosing the sparsity level.
        ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size.
        Used only when ``sparsity`` is array and ``cv`` is 1.
        Note that ``sample_size`` must be given when using ``ic_method``.
    cv : int, default=1
        The folds number when use the cross-validation method.
        - If ``cv`` = 1, the sparsity level will be chosen by the information criterion.
        - If ``cv`` > 1, the sparsity level will be chosen by the cross-validation method.
    split_method : callable, optional
        A function to get the part of data used in each fold of cross-validation.
        Its interface should be ``(data, index) -> part_data`` where ``index`` is an array of int.
    cv_fold_id : array of shape (sample_size,), optional
        An array indicates different folds in CV, which samples in the same fold should be given the same number.
        The number of different elements should be equal to ``cv``.
        Used only when `cv` > 1.
    random_state : int, optional
        The random seed used for cross-validation.

    Attributes
    ----------
    params : array of shape(dimensionality,)
        The sparse optimal solution.
    objective_value: float
        The value of objective function on the solution.
    support_set : array of int
        The indices of selected variables, sorted in ascending order.

    References
    ----------
    Shalev-Shwartz S, Srebro N, Zhang T. Trading accuracy for sparsity in optimization problems with sparsity constraints[J]. SIAM Journal on Optimization, 2010, 20(6): 2807-2832.Shalev-Shwartz S, Srebro N, Zhang T. Trading accuracy for sparsity in optimization problems with sparsity constraints[J]. SIAM Journal on Optimization, 2010, 20(6): 2807-2832.
    """

    def __init__(
        self,
        dimensionality,
        sparsity=None,
        sample_size=1,
        *,
        threshold=0.0,
        strict_sparsity=True,
        preselect=[],
        numeric_solver=convex_solver_nlopt,
        max_iter=100,
        group=None,
        ic_method=None,
        cv=1,
        cv_fold_id=None,
        split_method=None,
        random_state=None,
    ):
        super().__init__(
            dimensionality=dimensionality,
            sparsity=sparsity,
            sample_size=sample_size,
            threshold=threshold,
            strict_sparsity=strict_sparsity,
            preselect=preselect,
            numeric_solver=numeric_solver,
            max_iter=max_iter,
            group=group,
            ic_method=ic_method,
            cv=cv,
            cv_fold_id=cv_fold_id,
            split_method=split_method,
            random_state=random_state,
        )
        self.use_gradient = True