import inspect

import numpy as np
from scipy.linalg import qr

from ..utils import get_arrays_tol


TINY = np.finfo(float).tiny
EPS = np.finfo(float).eps


def tangential_byrd_omojokun(grad, hess_prod, xl, xu, delta, debug, **kwargs):
    r"""
    Minimize approximately a quadratic function subject to bound constraints in
    a trust region.

    This function solves approximately

    .. math::

        \min_{s \in \mathbb{R}^n} \quad g^{\mathsf{T}} s + \frac{1}{2}
        s^{\mathsf{T}} H s \quad \text{s.t.} \quad
        \left\{ \begin{array}{l}
            l \le s \le u\\
            \lVert s \rVert \le \Delta,
        \end{array} \right.

    using an active-set variation of the truncated conjugate gradient method.

    Parameters
    ----------
    grad : `numpy.ndarray`, shape (n,)
        Gradient :math:`g` as shown above.
    hess_prod : callable
        Product of the Hessian matrix :math:`H` with any vector.

            ``hess_prod(s) -> `numpy.ndarray`, shape (n,)``

        returns the product :math:`H s`.
    xl : `numpy.ndarray`, shape (n,)
        Lower bounds :math:`l` as shown above.
    xu : `numpy.ndarray`, shape (n,)
        Upper bounds :math:`u` as shown above.
    delta : float
        Trust-region radius :math:`\Delta` as shown above.
    debug : bool
        Whether to make debugging tests during the execution.

    Returns
    -------
    `numpy.ndarray`, shape (n,)
        Approximate solution :math:`s`.

    Other Parameters
    ----------------
    improve_tcg : bool, optional
        If True, a solution generated by the truncated conjugate gradient
        method that is on the boundary of the trust region is improved by
        moving around the trust-region boundary on the two-dimensional space
        spanned by the solution and the gradient of the quadratic function at
        the solution (default is True).

    Notes
    -----
    This function implements Algorithm 6.2 of [1]_. It is assumed that the
    origin is feasible with respect to the bound constraints and that `delta`
    is finite and positive.

    References
    ----------
    .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
       and Software*. PhD thesis, Department of Applied Mathematics, The Hong
       Kong Polytechnic University, Hong Kong, China, 2022. URL:
       https://theses.lib.polyu.edu.hk/handle/200/12294.
    """
    if debug:
        assert isinstance(grad, np.ndarray) and grad.ndim == 1
        assert inspect.signature(hess_prod).bind(grad)
        assert isinstance(xl, np.ndarray) and xl.shape == grad.shape
        assert isinstance(xu, np.ndarray) and xu.shape == grad.shape
        assert isinstance(delta, float)
        assert isinstance(debug, bool)
        tol = get_arrays_tol(xl, xu)
        assert np.all(xl <= tol)
        assert np.all(xu >= -tol)
        assert np.isfinite(delta) and delta > 0.0
    xl = np.minimum(xl, 0.0)
    xu = np.maximum(xu, 0.0)

    # Copy the arrays that may be modified by the code below.
    n = grad.size
    grad = np.copy(grad)
    grad_orig = np.copy(grad)

    # Calculate the initial active set.
    free_bd = ((xl < 0.0) | (grad < 0.0)) & ((xu > 0.0) | (grad > 0.0))

    # Set the initial iterate and the initial search direction.
    step = np.zeros_like(grad)
    sd = np.zeros_like(step)
    sd[free_bd] = -grad[free_bd]

    k = 0
    reduct = 0.0
    boundary_reached = False
    while k < np.count_nonzero(free_bd):
        # Stop the computations if sd is not a descent direction.
        grad_sd = grad @ sd
        if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
            break

        # Set alpha_tr to the step size for the trust-region constraint.
        try:
            alpha_tr = _alpha_tr(step, sd, delta)
        except ZeroDivisionError:
            break

        # Stop the computations if a step along sd is expected to give a
        # relatively small reduction in the objective function.
        if -alpha_tr * grad_sd <= 1e-8 * reduct:
            break

        # Set alpha_quad to the step size for the minimization problem.
        hess_sd = hess_prod(sd)
        curv_sd = sd @ hess_sd
        if curv_sd > TINY * abs(grad_sd):
            alpha_quad = max(-grad_sd / curv_sd, 0.0)
        else:
            alpha_quad = np.inf

        # Stop the computations if the reduction in the objective function
        # provided by an unconstrained step is small.
        alpha = min(alpha_tr, alpha_quad)
        if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
            break

        # Set alpha_bd to the step size for the bound constraints.
        i_xl = (xl > -np.inf) & (sd < -TINY * np.abs(xl - step))
        i_xu = (xu < np.inf) & (sd > TINY * np.abs(xu - step))
        all_alpha_xl = np.full_like(step, np.inf)
        all_alpha_xu = np.full_like(step, np.inf)
        all_alpha_xl[i_xl] = np.maximum(
            (xl[i_xl] - step[i_xl]) / sd[i_xl],
            0.0,
        )
        all_alpha_xu[i_xu] = np.maximum(
            (xu[i_xu] - step[i_xu]) / sd[i_xu],
            0.0,
        )
        alpha_xl = np.min(all_alpha_xl)
        alpha_xu = np.min(all_alpha_xu)
        alpha_bd = min(alpha_xl, alpha_xu)

        # Update the iterate.
        alpha = min(alpha, alpha_bd)
        if alpha > 0.0:
            step[free_bd] = np.clip(
                step[free_bd] + alpha * sd[free_bd],
                xl[free_bd],
                xu[free_bd],
            )
            grad += alpha * hess_sd
            reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)

        if alpha < min(alpha_tr, alpha_bd):
            # The current iteration is a conjugate gradient iteration. Update
            # the search direction so that it is conjugate (with respect to H)
            # to all the previous search directions.
            beta = (grad[free_bd] @ hess_sd[free_bd]) / curv_sd
            sd[free_bd] = beta * sd[free_bd] - grad[free_bd]
            sd[~free_bd] = 0.0
            k += 1
        elif alpha < alpha_tr:
            # The iterate is restricted by a bound constraint. Add this bound
            # constraint to the active set, and restart the calculations.
            if alpha_xl <= alpha:
                i_new = np.argmin(all_alpha_xl)
                step[i_new] = xl[i_new]
            else:
                i_new = np.argmin(all_alpha_xu)
                step[i_new] = xu[i_new]
            free_bd[i_new] = False
            sd[free_bd] = -grad[free_bd]
            sd[~free_bd] = 0.0
            k = 0
        else:
            # The current iterate is on the trust-region boundary. Add all the
            # active bounds to the working set to prepare for the improvement
            # of the solution, and stop the iterations.
            if alpha_xl <= alpha:
                i_new = _argmin(all_alpha_xl)
                step[i_new] = xl[i_new]
                free_bd[i_new] = False
            if alpha_xu <= alpha:
                i_new = _argmin(all_alpha_xu)
                step[i_new] = xu[i_new]
                free_bd[i_new] = False
            boundary_reached = True
            break

    # Attempt to improve the solution on the trust-region boundary.
    if kwargs.get("improve_tcg", True) and boundary_reached:
        step_base = np.copy(step)
        step_comparator = grad_orig @ step_base + 0.5 * step_base @ hess_prod(
            step_base
        )

        while np.count_nonzero(free_bd) > 0:
            # Check whether a substantial reduction in the objective function
            # is possible, and set the search direction.
            step_sq = step[free_bd] @ step[free_bd]
            grad_sq = grad[free_bd] @ grad[free_bd]
            grad_step = grad[free_bd] @ step[free_bd]
            grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
            sd[free_bd] = grad_step * step[free_bd] - step_sq * grad[free_bd]
            sd[~free_bd] = 0.0
            if grad_sd >= -1e-8 * reduct or np.any(
                grad_sd >= -TINY * np.abs(sd[free_bd])
            ):
                break
            sd[free_bd] /= -grad_sd

            # Calculate an upper bound for the tangent of half the angle theta
            # of this alternative iteration. The step will be updated as:
            # step = cos(theta) * step + sin(theta) * sd.
            temp_xl = np.zeros(n)
            temp_xu = np.zeros(n)
            temp_xl[free_bd] = (
                step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xl[free_bd] ** 2.0
            )
            temp_xu[free_bd] = (
                step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xu[free_bd] ** 2.0
            )
            temp_xl[temp_xl > 0.0] = (
                np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
            )
            temp_xu[temp_xu > 0.0] = (
                np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
            )
            dist_xl = np.maximum(step - xl, 0.0)
            dist_xu = np.maximum(xu - step, 0.0)
            i_xl = temp_xl > TINY * dist_xl
            i_xu = temp_xu > TINY * dist_xu
            all_t_xl = np.ones(n)
            all_t_xu = np.ones(n)
            all_t_xl[i_xl] = np.minimum(
                all_t_xl[i_xl],
                dist_xl[i_xl] / temp_xl[i_xl],
            )
            all_t_xu[i_xu] = np.minimum(
                all_t_xu[i_xu],
                dist_xu[i_xu] / temp_xu[i_xu],
            )
            t_xl = np.min(all_t_xl)
            t_xu = np.min(all_t_xu)
            t_bd = min(t_xl, t_xu)

            # Calculate some curvature information.
            hess_step = hess_prod(step)
            hess_sd = hess_prod(sd)
            curv_step = step @ hess_step
            curv_sd = sd @ hess_sd
            curv_step_sd = step @ hess_sd

            # For a range of equally spaced values of tan(0.5 * theta),
            # calculate the reduction in the objective function that would be
            # obtained by accepting the corresponding angle.
            n_samples = 20
            n_samples = int((n_samples - 3) * t_bd + 3)
            t_samples = np.linspace(t_bd / n_samples, t_bd, n_samples)
            sin_values = 2.0 * t_samples / (1.0 + t_samples**2.0)
            all_reduct = sin_values * (
                grad_step * t_samples
                - grad_sd
                - t_samples * curv_step
                + sin_values
                * (t_samples * curv_step_sd - 0.5 * (curv_sd - curv_step))
            )
            if np.all(all_reduct <= 0.0):
                # No reduction in the objective function is obtained.
                break

            # Accept the angle that provides the largest reduction in the
            # objective function, and update the iterate.
            i_max = np.argmax(all_reduct)
            cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
                1.0 + t_samples[i_max] ** 2.0
            )
            step[free_bd] = (
                cos_value * step[free_bd] + sin_values[i_max] * sd[free_bd]
            )
            grad += (cos_value - 1.0) * hess_step + sin_values[i_max] * hess_sd
            reduct += all_reduct[i_max]

            # If the above angle is restricted by bound constraints, add them
            # to the working set, and restart the alternative iteration.
            # Otherwise, the calculations are terminated.
            if t_bd < 1.0 and i_max == n_samples - 1:
                if t_xl <= t_bd:
                    i_new = _argmin(all_t_xl)
                    step[i_new] = xl[i_new]
                    free_bd[i_new] = False
                if t_xu <= t_bd:
                    i_new = _argmin(all_t_xu)
                    step[i_new] = xu[i_new]
                    free_bd[i_new] = False
            else:
                break

        # Ensure that the alternative iteration improves the objective
        # function.
        if grad_orig @ step + 0.5 * step @ hess_prod(step) > step_comparator:
            step = step_base

    if debug:
        assert np.all(xl <= step)
        assert np.all(step <= xu)
        assert np.linalg.norm(step) < 1.1 * delta
    return step


def constrained_tangential_byrd_omojokun(
    grad,
    hess_prod,
    xl,
    xu,
    aub,
    bub,
    aeq,
    delta,
    debug,
    **kwargs,
):
    r"""
    Minimize approximately a quadratic function subject to bound and linear
    constraints in a trust region.

    This function solves approximately

    .. math::

        \min_{s \in \mathbb{R}^n} \quad g^{\mathsf{T}} s + \frac{1}{2}
        s^{\mathsf{T}} H s \quad \text{s.t.} \quad
        \left\{ \begin{array}{l}
            l \le s \le u,\\
            A_{\scriptscriptstyle I} s \le b_{\scriptscriptstyle I},\\
            A_{\scriptscriptstyle E} s = 0,\\
            \lVert s \rVert \le \Delta,
        \end{array} \right.

    using an active-set variation of the truncated conjugate gradient method.

    Parameters
    ----------
    grad : `numpy.ndarray`, shape (n,)
        Gradient :math:`g` as shown above.
    hess_prod : callable
        Product of the Hessian matrix :math:`H` with any vector.

            ``hess_prod(s) -> `numpy.ndarray`, shape (n,)``

        returns the product :math:`H s`.
    xl : `numpy.ndarray`, shape (n,)
        Lower bounds :math:`l` as shown above.
    xu : `numpy.ndarray`, shape (n,)
        Upper bounds :math:`u` as shown above.
    aub : `numpy.ndarray`, shape (m_linear_ub, n)
        Coefficient matrix :math:`A_{\scriptscriptstyle I}` as shown above.
    bub : `numpy.ndarray`, shape (m_linear_ub,)
        Right-hand side :math:`b_{\scriptscriptstyle I}` as shown above.
    aeq : `numpy.ndarray`, shape (m_linear_eq, n)
        Coefficient matrix :math:`A_{\scriptscriptstyle E}` as shown above.
    delta : float
        Trust-region radius :math:`\Delta` as shown above.
    debug : bool
        Whether to make debugging tests during the execution.

    Returns
    -------
    `numpy.ndarray`, shape (n,)
        Approximate solution :math:`s`.

    Other Parameters
    ----------------
    improve_tcg : bool, optional
        If True, a solution generated by the truncated conjugate gradient
        method that is on the boundary of the trust region is improved by
        moving around the trust-region boundary on the two-dimensional space
        spanned by the solution and the gradient of the quadratic function at
        the solution (default is True).

    Notes
    -----
    This function implements Algorithm 6.3 of [1]_. It is assumed that the
    origin is feasible with respect to the bound and linear constraints, and
    that `delta` is finite and positive.

    References
    ----------
    .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
       and Software*. PhD thesis, Department of Applied Mathematics, The Hong
       Kong Polytechnic University, Hong Kong, China, 2022. URL:
       https://theses.lib.polyu.edu.hk/handle/200/12294.
    """
    if debug:
        assert isinstance(grad, np.ndarray) and grad.ndim == 1
        assert inspect.signature(hess_prod).bind(grad)
        assert isinstance(xl, np.ndarray) and xl.shape == grad.shape
        assert isinstance(xu, np.ndarray) and xu.shape == grad.shape
        assert (
            isinstance(aub, np.ndarray)
            and aub.ndim == 2
            and aub.shape[1] == grad.size
        )
        assert (
            isinstance(bub, np.ndarray)
            and bub.ndim == 1
            and bub.size == aub.shape[0]
        )
        assert (
            isinstance(aeq, np.ndarray)
            and aeq.ndim == 2
            and aeq.shape[1] == grad.size
        )
        assert isinstance(delta, float)
        assert isinstance(debug, bool)
        tol = get_arrays_tol(xl, xu)
        assert np.all(xl <= tol)
        assert np.all(xu >= -tol)
        assert np.all(bub >= -tol)
        assert np.isfinite(delta) and delta > 0.0
    xl = np.minimum(xl, 0.0)
    xu = np.maximum(xu, 0.0)
    bub = np.maximum(bub, 0.0)

    # Copy the arrays that may be modified by the code below.
    n = grad.size
    grad = np.copy(grad)
    grad_orig = np.copy(grad)

    # Calculate the initial active set.
    free_xl = (xl < 0.0) | (grad < 0.0)
    free_xu = (xu > 0.0) | (grad > 0.0)
    free_ub = (bub > 0.0) | (aub @ grad > 0.0)
    n_act, q = qr_tangential_byrd_omojokun(aub, aeq, free_xl, free_xu, free_ub)

    # Set the initial iterate and the initial search direction.
    step = np.zeros_like(grad)
    sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
    resid = np.copy(bub)

    k = 0
    reduct = 0.0
    boundary_reached = False
    while k < n - n_act:
        # Stop the computations if sd is not a descent direction.
        grad_sd = grad @ sd
        if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
            break

        # Set alpha_tr to the step size for the trust-region constraint.
        try:
            alpha_tr = _alpha_tr(step, sd, delta)
        except ZeroDivisionError:
            break

        # Stop the computations if a step along sd is expected to give a
        # relatively small reduction in the objective function.
        if -alpha_tr * grad_sd <= 1e-8 * reduct:
            break

        # Set alpha_quad to the step size for the minimization problem.
        hess_sd = hess_prod(sd)
        curv_sd = sd @ hess_sd
        if curv_sd > TINY * abs(grad_sd):
            alpha_quad = max(-grad_sd / curv_sd, 0.0)
        else:
            alpha_quad = np.inf

        # Stop the computations if the reduction in the objective function
        # provided by an unconstrained step is small.
        alpha = min(alpha_tr, alpha_quad)
        if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
            break

        # Set alpha_bd to the step size for the bound constraints.
        i_xl = free_xl & (xl > -np.inf) & (sd < -TINY * np.abs(xl - step))
        i_xu = free_xu & (xu < np.inf) & (sd > TINY * np.abs(xu - step))
        all_alpha_xl = np.full_like(step, np.inf)
        all_alpha_xu = np.full_like(step, np.inf)
        all_alpha_xl[i_xl] = np.maximum(
            (xl[i_xl] - step[i_xl]) / sd[i_xl],
            0.0,
        )
        all_alpha_xu[i_xu] = np.maximum(
            (xu[i_xu] - step[i_xu]) / sd[i_xu],
            0.0,
        )
        alpha_xl = np.min(all_alpha_xl)
        alpha_xu = np.min(all_alpha_xu)
        alpha_bd = min(alpha_xl, alpha_xu)

        # Set alpha_ub to the step size for the linear constraints.
        aub_sd = aub @ sd
        i_ub = free_ub & (aub_sd > TINY * np.abs(resid))
        all_alpha_ub = np.full_like(bub, np.inf)
        all_alpha_ub[i_ub] = resid[i_ub] / aub_sd[i_ub]
        alpha_ub = np.min(all_alpha_ub, initial=np.inf)

        # Update the iterate.
        alpha = min(alpha, alpha_bd, alpha_ub)
        if alpha > 0.0:
            step = np.clip(step + alpha * sd, xl, xu)
            grad += alpha * hess_sd
            resid = np.maximum(0.0, resid - alpha * aub_sd)
            reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)

        if alpha < min(alpha_tr, alpha_bd, alpha_ub):
            # The current iteration is a conjugate gradient iteration. Update
            # the search direction so that it is conjugate (with respect to H)
            # to all the previous search directions.
            grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
            beta = (grad_proj @ hess_sd) / curv_sd
            sd = beta * sd - grad_proj
            k += 1
        elif alpha < alpha_tr:
            # The iterate is restricted by a bound/linear constraint. Add this
            # constraint to the active set, and restart the calculations.
            if alpha_xl <= alpha:
                i_new = np.argmin(all_alpha_xl)
                step[i_new] = xl[i_new]
                free_xl[i_new] = False
            elif alpha_xu <= alpha:
                i_new = np.argmin(all_alpha_xu)
                step[i_new] = xu[i_new]
                free_xu[i_new] = False
            else:
                i_new = np.argmin(all_alpha_ub)
                free_ub[i_new] = False
            n_act, q = qr_tangential_byrd_omojokun(
                aub,
                aeq,
                free_xl,
                free_xu,
                free_ub,
            )
            sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
            k = 0
        else:
            # The current iterate is on the trust-region boundary. Add all the
            # active bound/linear constraints to the working set to prepare for
            # the improvement of the solution, and stop the iterations.
            if alpha_xl <= alpha:
                i_new = _argmin(all_alpha_xl)
                step[i_new] = xl[i_new]
                free_xl[i_new] = False
            if alpha_xu <= alpha:
                i_new = _argmin(all_alpha_xu)
                step[i_new] = xu[i_new]
                free_xu[i_new] = False
            if alpha_ub <= alpha:
                i_new = _argmin(all_alpha_ub)
                free_ub[i_new] = False
            n_act, q = qr_tangential_byrd_omojokun(
                aub,
                aeq,
                free_xl,
                free_xu,
                free_ub,
            )
            boundary_reached = True
            break

    # Attempt to improve the solution on the trust-region boundary.
    if kwargs.get("improve_tcg", True) and boundary_reached and n_act < n:
        step_base = np.copy(step)
        while n_act < n:
            # Check whether a substantial reduction in the objective function
            # is possible, and set the search direction.
            step_proj = q[:, n_act:] @ (q[:, n_act:].T @ step)
            grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
            step_sq = step_proj @ step_proj
            grad_sq = grad_proj @ grad_proj
            grad_step = grad_proj @ step_proj
            grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
            sd = q[:, n_act:] @ (
                q[:, n_act:].T @ (grad_step * step - step_sq * grad)
            )
            if grad_sd >= -1e-8 * reduct or np.any(
                grad_sd >= -TINY * np.abs(sd)
            ):
                break
            sd /= -grad_sd

            # Calculate an upper bound for the tangent of half the angle theta
            # of this alternative iteration for the bound constraints. The step
            # will be updated as:
            # step += (cos(theta) - 1) * step_proj + sin(theta) * sd.
            temp_xl = np.zeros(n)
            temp_xu = np.zeros(n)
            dist_xl = np.maximum(step - xl, 0.0)
            dist_xu = np.maximum(xu - step, 0.0)
            temp_xl[free_xl] = sd[free_xl] ** 2.0 - dist_xl[free_xl] * (
                dist_xl[free_xl] - 2.0 * step_proj[free_xl]
            )
            temp_xu[free_xu] = sd[free_xu] ** 2.0 - dist_xu[free_xu] * (
                dist_xu[free_xu] + 2.0 * step_proj[free_xu]
            )
            temp_xl[temp_xl > 0.0] = (
                np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
            )
            temp_xu[temp_xu > 0.0] = (
                np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
            )
            i_xl = temp_xl > TINY * dist_xl
            i_xu = temp_xu > TINY * dist_xu
            all_t_xl = np.ones(n)
            all_t_xu = np.ones(n)
            all_t_xl[i_xl] = np.minimum(
                all_t_xl[i_xl],
                dist_xl[i_xl] / temp_xl[i_xl],
            )
            all_t_xu[i_xu] = np.minimum(
                all_t_xu[i_xu],
                dist_xu[i_xu] / temp_xu[i_xu],
            )
            t_xl = np.min(all_t_xl)
            t_xu = np.min(all_t_xu)
            t_bd = min(t_xl, t_xu)

            # Calculate an upper bound for the tangent of half the angle theta
            # of this alternative iteration for the linear constraints.
            temp_ub = np.zeros_like(resid)
            aub_step = aub @ step_proj
            aub_sd = aub @ sd
            temp_ub[free_ub] = aub_sd[free_ub] ** 2.0 - resid[free_ub] * (
                resid[free_ub] + 2.0 * aub_step[free_ub]
            )
            temp_ub[temp_ub > 0.0] = (
                np.sqrt(temp_ub[temp_ub > 0.0]) + aub_sd[temp_ub > 0.0]
            )
            i_ub = temp_ub > TINY * resid
            all_t_ub = np.ones_like(resid)
            all_t_ub[i_ub] = np.minimum(
                all_t_ub[i_ub],
                resid[i_ub] / temp_ub[i_ub],
            )
            t_ub = np.min(all_t_ub, initial=1.0)
            t_min = min(t_bd, t_ub)

            # Calculate some curvature information.
            hess_step = hess_prod(step_proj)
            hess_sd = hess_prod(sd)
            curv_step = step_proj @ hess_step
            curv_sd = sd @ hess_sd
            curv_step_sd = step_proj @ hess_sd

            # For a range of equally spaced values of tan(0.5 * theta),
            # calculate the reduction in the objective function that would be
            # obtained by accepting the corresponding angle.
            n_samples = 20
            n_samples = int((n_samples - 3) * t_min + 3)
            t_samples = np.linspace(t_min / n_samples, t_min, n_samples)
            sin_values = 2.0 * t_samples / (1.0 + t_samples**2.0)
            all_reduct = sin_values * (
                grad_step * t_samples
                - grad_sd
                - sin_values
                * (
                    0.5 * t_samples**2.0 * curv_step
                    - 2.0 * t_samples * curv_step_sd
                    + 0.5 * curv_sd
                )
            )
            if np.all(all_reduct <= 0.0):
                # No reduction in the objective function is obtained.
                break

            # Accept the angle that provides the largest reduction in the
            # objective function, and update the iterate.
            i_max = np.argmax(all_reduct)
            cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
                1.0 + t_samples[i_max] ** 2.0
            )
            step = np.clip(
                step + (cos_value - 1.0) * step_proj + sin_values[i_max] * sd,
                xl,
                xu,
            )
            grad += (cos_value - 1.0) * hess_step + sin_values[i_max] * hess_sd
            resid = np.maximum(
                0.0,
                resid
                - (cos_value - 1.0) * aub_step
                - sin_values[i_max] * aub_sd,
            )
            reduct += all_reduct[i_max]

            # If the above angle is restricted by bound constraints, add them
            # to the working set, and restart the alternative iteration.
            # Otherwise, the calculations are terminated.
            if t_min < 1.0 and i_max == n_samples - 1:
                if t_xl <= t_min:
                    i_new = _argmin(all_t_xl)
                    step[i_new] = xl[i_new]
                    free_xl[i_new] = False
                if t_xu <= t_min:
                    i_new = _argmin(all_t_xu)
                    step[i_new] = xu[i_new]
                    free_xl[i_new] = False
                if t_ub <= t_min:
                    i_new = _argmin(all_t_ub)
                    free_ub[i_new] = False
                n_act, q = qr_tangential_byrd_omojokun(
                    aub,
                    aeq,
                    free_xl,
                    free_xu,
                    free_ub,
                )
            else:
                break

        # Ensure that the alternative iteration improves the objective
        # function.
        if grad_orig @ step + 0.5 * step @ hess_prod(
            step
        ) > grad_orig @ step_base + 0.5 * step_base @ hess_prod(step_base):
            step = step_base

    if debug:
        tol = get_arrays_tol(xl, xu)
        assert np.all(xl <= step)
        assert np.all(step <= xu)
        assert np.all(aub @ step <= bub + tol)
        assert np.all(np.abs(aeq @ step) <= tol)
        assert np.linalg.norm(step) < 1.1 * delta
    return step


def normal_byrd_omojokun(aub, bub, aeq, beq, xl, xu, delta, debug, **kwargs):
    r"""
    Minimize approximately a linear constraint violation subject to bound
    constraints in a trust region.

    This function solves approximately

    .. math::

        \min_{s \in \mathbb{R}^n} \quad \frac{1}{2} \big( \lVert \max \{
        A_{\scriptscriptstyle I} s - b_{\scriptscriptstyle I}, 0 \} \rVert^2 +
        \lVert A_{\scriptscriptstyle E} s - b_{\scriptscriptstyle E} \rVert^2
        \big) \quad \text{s.t.}
        \quad
        \left\{ \begin{array}{l}
            l \le s \le u,\\
            \lVert s \rVert \le \Delta,
        \end{array} \right.

    using a variation of the truncated conjugate gradient method.

    Parameters
    ----------
    aub : `numpy.ndarray`, shape (m_linear_ub, n)
        Matrix :math:`A_{\scriptscriptstyle I}` as shown above.
    bub : `numpy.ndarray`, shape (m_linear_ub,)
        Vector :math:`b_{\scriptscriptstyle I}` as shown above.
    aeq : `numpy.ndarray`, shape (m_linear_eq, n)
        Matrix :math:`A_{\scriptscriptstyle E}` as shown above.
    beq : `numpy.ndarray`, shape (m_linear_eq,)
        Vector :math:`b_{\scriptscriptstyle E}` as shown above.
    xl : `numpy.ndarray`, shape (n,)
        Lower bounds :math:`l` as shown above.
    xu : `numpy.ndarray`, shape (n,)
        Upper bounds :math:`u` as shown above.
    delta : float
        Trust-region radius :math:`\Delta` as shown above.
    debug : bool
        Whether to make debugging tests during the execution.

    Returns
    -------
    `numpy.ndarray`, shape (n,)
        Approximate solution :math:`s`.

    Other Parameters
    ----------------
    improve_tcg : bool, optional
        If True, a solution generated by the truncated conjugate gradient
        method that is on the boundary of the trust region is improved by
        moving around the trust-region boundary on the two-dimensional space
        spanned by the solution and the gradient of the quadratic function at
        the solution (default is True).

    Notes
    -----
    This function implements Algorithm 6.4 of [1]_. It is assumed that the
    origin is feasible with respect to the bound constraints and that `delta`
    is finite and positive.

    References
    ----------
    .. [1] T. M. Ragonneau. *Model-Based Derivative-Free Optimization Methods
       and Software*. PhD thesis, Department of Applied Mathematics, The Hong
       Kong Polytechnic University, Hong Kong, China, 2022. URL:
       https://theses.lib.polyu.edu.hk/handle/200/12294.
    """
    if debug:
        assert isinstance(aub, np.ndarray) and aub.ndim == 2
        assert (
            isinstance(bub, np.ndarray)
            and bub.ndim == 1
            and bub.size == aub.shape[0]
        )
        assert (
            isinstance(aeq, np.ndarray)
            and aeq.ndim == 2
            and aeq.shape[1] == aub.shape[1]
        )
        assert (
            isinstance(beq, np.ndarray)
            and beq.ndim == 1
            and beq.size == aeq.shape[0]
        )
        assert isinstance(xl, np.ndarray) and xl.shape == (aub.shape[1],)
        assert isinstance(xu, np.ndarray) and xu.shape == (aub.shape[1],)
        assert isinstance(delta, float)
        assert isinstance(debug, bool)
        tol = get_arrays_tol(xl, xu)
        assert np.all(xl <= tol)
        assert np.all(xu >= -tol)
        assert np.isfinite(delta) and delta > 0.0
    xl = np.minimum(xl, 0.0)
    xu = np.maximum(xu, 0.0)

    # Calculate the initial active set.
    m_linear_ub, n = aub.shape
    grad = np.r_[aeq.T @ -beq, np.maximum(0.0, -bub)]
    free_xl = (xl < 0.0) | (grad[:n] < 0.0)
    free_xu = (xu > 0.0) | (grad[:n] > 0.0)
    free_slack = bub < 0.0
    free_ub = (bub > 0.0) | (aub @ grad[:n] - grad[n:] > 0.0)
    n_act, q = qr_normal_byrd_omojokun(
        aub,
        free_xl,
        free_xu,
        free_slack,
        free_ub,
    )

    # Calculate an upper bound on the norm of the slack variables. It is not
    # used in the original algorithm, but it may prevent undesired behaviors
    # engendered by computer rounding errors.
    delta_slack = np.sqrt(beq @ beq + grad[n:] @ grad[n:])

    # Set the initial iterate and the initial search direction.
    step = np.zeros(n)
    sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
    resid = bub + grad[n:]

    k = 0
    reduct = 0.0
    boundary_reached = False
    while k < n + m_linear_ub - n_act:
        # Stop the computations if sd is not a descent direction.
        grad_sd = grad @ sd
        if grad_sd >= -10.0 * EPS * n * max(1.0, np.linalg.norm(grad)):
            break

        # Set alpha_tr to the step size for the trust-region constraint.
        try:
            alpha_tr = _alpha_tr(step, sd[:n], delta)
        except ZeroDivisionError:
            alpha_tr = np.inf

        # Prevent undesired behaviors engendered by computer rounding errors by
        # considering the trust-region constraint on the slack variables.
        try:
            alpha_tr = min(alpha_tr, _alpha_tr(grad[n:], sd[n:], delta_slack))
        except ZeroDivisionError:
            pass

        # Stop the computations if a step along sd is expected to give a
        # relatively small reduction in the objective function.
        if -alpha_tr * grad_sd <= 1e-8 * reduct:
            break

        # Set alpha_quad to the step size for the minimization problem.
        hess_sd = np.r_[aeq.T @ (aeq @ sd[:n]), sd[n:]]
        curv_sd = sd @ hess_sd
        if curv_sd > TINY * abs(grad_sd):
            alpha_quad = max(-grad_sd / curv_sd, 0.0)
        else:
            alpha_quad = np.inf

        # Stop the computations if the reduction in the objective function
        # provided by an unconstrained step is small.
        alpha = min(alpha_tr, alpha_quad)
        if -alpha * (grad_sd + 0.5 * alpha * curv_sd) <= 1e-8 * reduct:
            break

        # Set alpha_bd to the step size for the bound constraints.
        i_xl = free_xl & (xl > -np.inf) & (sd[:n] < -TINY * np.abs(xl - step))
        i_xu = free_xu & (xu < np.inf) & (sd[:n] > TINY * np.abs(xu - step))
        i_slack = free_slack & (sd[n:] < -TINY * np.abs(grad[n:]))
        all_alpha_xl = np.full_like(step, np.inf)
        all_alpha_xu = np.full_like(step, np.inf)
        all_alpha_slack = np.full_like(bub, np.inf)
        all_alpha_xl[i_xl] = np.maximum(
            (xl[i_xl] - step[i_xl]) / sd[:n][i_xl],
            0.0,
        )
        all_alpha_xu[i_xu] = np.maximum(
            (xu[i_xu] - step[i_xu]) / sd[:n][i_xu],
            0.0,
        )
        all_alpha_slack[i_slack] = np.maximum(
            -grad[n:][i_slack] / sd[n:][i_slack],
            0.0,
        )
        alpha_xl = np.min(all_alpha_xl)
        alpha_xu = np.min(all_alpha_xu)
        alpha_slack = np.min(all_alpha_slack, initial=np.inf)
        alpha_bd = min(alpha_xl, alpha_xu, alpha_slack)

        # Set alpha_ub to the step size for the linear constraints.
        aub_sd = aub @ sd[:n] - sd[n:]
        i_ub = free_ub & (aub_sd > TINY * np.abs(resid))
        all_alpha_ub = np.full_like(bub, np.inf)
        all_alpha_ub[i_ub] = resid[i_ub] / aub_sd[i_ub]
        alpha_ub = np.min(all_alpha_ub, initial=np.inf)

        # Update the iterate.
        alpha = min(alpha, alpha_bd, alpha_ub)
        if alpha > 0.0:
            step = np.clip(step + alpha * sd[:n], xl, xu)
            grad += alpha * hess_sd
            resid = np.maximum(0.0, resid - alpha * aub_sd)
            reduct -= alpha * (grad_sd + 0.5 * alpha * curv_sd)

        if alpha < min(alpha_tr, alpha_bd, alpha_ub):
            # The current iteration is a conjugate gradient iteration. Update
            # the search direction so that it is conjugate (with respect to H)
            # to all the previous search directions.
            grad_proj = q[:, n_act:] @ (q[:, n_act:].T @ grad)
            beta = (grad_proj @ hess_sd) / curv_sd
            sd = beta * sd - grad_proj
            k += 1
        elif alpha < alpha_tr:
            # The iterate is restricted by a bound/linear constraint. Add this
            # constraint to the active set, and restart the calculations.
            if alpha_xl <= alpha:
                i_new = np.argmin(all_alpha_xl)
                step[i_new] = xl[i_new]
                free_xl[i_new] = False
            elif alpha_xu <= alpha:
                i_new = np.argmin(all_alpha_xu)
                step[i_new] = xu[i_new]
                free_xu[i_new] = False
            elif alpha_slack <= alpha:
                i_new = np.argmin(all_alpha_slack)
                free_slack[i_new] = False
            else:
                i_new = np.argmin(all_alpha_ub)
                free_ub[i_new] = False
            n_act, q = qr_normal_byrd_omojokun(
                aub, free_xl, free_xu, free_slack, free_ub
            )
            sd = -q[:, n_act:] @ (q[:, n_act:].T @ grad)
            k = 0
        else:
            # The current iterate is on the trust-region boundary. Add all the
            # active bound constraints to the working set to prepare for the
            # improvement of the solution, and stop the iterations.
            if alpha_xl <= alpha:
                i_new = _argmin(all_alpha_xl)
                step[i_new] = xl[i_new]
                free_xl[i_new] = False
            if alpha_xu <= alpha:
                i_new = _argmin(all_alpha_xu)
                step[i_new] = xu[i_new]
                free_xu[i_new] = False
            boundary_reached = True
            break

    # Attempt to improve the solution on the trust-region boundary.
    if kwargs.get("improve_tcg", True) and boundary_reached:
        step_base = np.copy(step)
        free_bd = free_xl & free_xu
        grad = aub.T @ np.maximum(aub @ step - bub, 0.0) + aeq.T @ (
            aeq @ step - beq
        )
        sd = np.zeros(n)
        while np.count_nonzero(free_bd) > 0:
            # Check whether a substantial reduction in the objective function
            # is possible, and set the search direction.
            step_sq = step[free_bd] @ step[free_bd]
            grad_sq = grad[free_bd] @ grad[free_bd]
            grad_step = grad[free_bd] @ step[free_bd]
            grad_sd = -np.sqrt(max(step_sq * grad_sq - grad_step**2.0, 0.0))
            sd[free_bd] = grad_step * step[free_bd] - step_sq * grad[free_bd]
            sd[~free_bd] = 0.0
            if grad_sd >= -1e-8 * reduct or np.any(
                grad_sd >= -TINY * np.abs(sd[free_bd])
            ):
                break
            sd[free_bd] /= -grad_sd

            # Calculate an upper bound for the tangent of half the angle theta
            # of this alternative iteration. The step will be updated as:
            # step = cos(theta) * step + sin(theta) * sd.
            temp_xl = np.zeros(n)
            temp_xu = np.zeros(n)
            temp_xl[free_bd] = (
                step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xl[free_bd] ** 2.0
            )
            temp_xu[free_bd] = (
                step[free_bd] ** 2.0 + sd[free_bd] ** 2.0 - xu[free_bd] ** 2.0
            )
            temp_xl[temp_xl > 0.0] = (
                np.sqrt(temp_xl[temp_xl > 0.0]) - sd[temp_xl > 0.0]
            )
            temp_xu[temp_xu > 0.0] = (
                np.sqrt(temp_xu[temp_xu > 0.0]) + sd[temp_xu > 0.0]
            )
            dist_xl = np.maximum(step - xl, 0.0)
            dist_xu = np.maximum(xu - step, 0.0)
            i_xl = temp_xl > TINY * dist_xl
            i_xu = temp_xu > TINY * dist_xu
            all_t_xl = np.ones(n)
            all_t_xu = np.ones(n)
            all_t_xl[i_xl] = np.minimum(
                all_t_xl[i_xl],
                dist_xl[i_xl] / temp_xl[i_xl],
            )
            all_t_xu[i_xu] = np.minimum(
                all_t_xu[i_xu],
                dist_xu[i_xu] / temp_xu[i_xu],
            )
            t_xl = np.min(all_t_xl)
            t_xu = np.min(all_t_xu)
            t_bd = min(t_xl, t_xu)

            # For a range of equally spaced values of tan(0.5 * theta),
            # calculate the reduction in the objective function that would be
            # obtained by accepting the corresponding angle.
            n_samples = 20
            n_samples = int((n_samples - 3) * t_bd + 3)
            t_samples = np.linspace(t_bd / n_samples, t_bd, n_samples)
            resid_ub = np.maximum(aub @ step - bub, 0.0)
            resid_eq = aeq @ step - beq
            step_proj = np.copy(step)
            step_proj[~free_bd] = 0.0
            all_reduct = np.empty(n_samples)
            for i in range(n_samples):
                sin_value = 2.0 * t_samples[i] / (1.0 + t_samples[i] ** 2.0)
                step_alt = np.clip(
                    step + sin_value * (sd - t_samples[i] * step_proj),
                    xl,
                    xu,
                )
                resid_ub_alt = np.maximum(aub @ step_alt - bub, 0.0)
                resid_eq_alt = aeq @ step_alt - beq
                all_reduct[i] = 0.5 * (
                    resid_ub @ resid_ub
                    + resid_eq @ resid_eq
                    - resid_ub_alt @ resid_ub_alt
                    - resid_eq_alt @ resid_eq_alt
                )
            if np.all(all_reduct <= 0.0):
                # No reduction in the objective function is obtained.
                break

            # Accept the angle that provides the largest reduction in the
            # objective function, and update the iterate.
            i_max = np.argmax(all_reduct)
            cos_value = (1.0 - t_samples[i_max] ** 2.0) / (
                1.0 + t_samples[i_max] ** 2.0
            )
            sin_value = (2.0 * t_samples[i_max]
                         / (1.0 + t_samples[i_max] ** 2.0))
            step[free_bd] = cos_value * step[free_bd] + sin_value * sd[free_bd]
            grad = aub.T @ np.maximum(aub @ step - bub, 0.0) + aeq.T @ (
                aeq @ step - beq
            )
            reduct += all_reduct[i_max]

            # If the above angle is restricted by bound constraints, add them
            # to the working set, and restart the alternative iteration.
            # Otherwise, the calculations are terminated.
            if t_bd < 1.0 and i_max == n_samples - 1:
                if t_xl <= t_bd:
                    i_new = _argmin(all_t_xl)
                    step[i_new] = xl[i_new]
                    free_bd[i_new] = False
                if t_xu <= t_bd:
                    i_new = _argmin(all_t_xu)
                    step[i_new] = xu[i_new]
                    free_bd[i_new] = False
            else:
                break

        # Ensure that the alternative iteration improves the objective
        # function.
        resid_ub = np.maximum(aub @ step - bub, 0.0)
        resid_ub_base = np.maximum(aub @ step_base - bub, 0.0)
        resid_eq = aeq @ step - beq
        resid_eq_base = aeq @ step_base - beq
        if (
            resid_ub @ resid_ub + resid_eq @ resid_eq
            > resid_ub_base @ resid_ub_base + resid_eq_base @ resid_eq_base
        ):
            step = step_base

    if debug:
        assert np.all(xl <= step)
        assert np.all(step <= xu)
        assert np.linalg.norm(step) < 1.1 * delta
    return step


def qr_tangential_byrd_omojokun(aub, aeq, free_xl, free_xu, free_ub):
    n = free_xl.size
    identity = np.eye(n)
    q, r, _ = qr(
        np.block(
            [
                [aeq],
                [aub[~free_ub, :]],
                [-identity[~free_xl, :]],
                [identity[~free_xu, :]],
            ]
        ).T,
        pivoting=True,
    )
    n_act = np.count_nonzero(
        np.abs(np.diag(r))
        >= 10.0
        * EPS
        * n
        * np.linalg.norm(r[: np.min(r.shape), : np.min(r.shape)], axis=0)
    )
    return n_act, q


def qr_normal_byrd_omojokun(aub, free_xl, free_xu, free_slack, free_ub):
    m_linear_ub, n = aub.shape
    identity_n = np.eye(n)
    identity_m = np.eye(m_linear_ub)
    q, r, _ = qr(
        np.block(
            [
                [
                    aub[~free_ub, :],
                    -identity_m[~free_ub, :],
                ],
                [
                    np.zeros((m_linear_ub - np.count_nonzero(free_slack), n)),
                    -identity_m[~free_slack, :],
                ],
                [
                    -identity_n[~free_xl, :],
                    np.zeros((n - np.count_nonzero(free_xl), m_linear_ub)),
                ],
                [
                    identity_n[~free_xu, :],
                    np.zeros((n - np.count_nonzero(free_xu), m_linear_ub)),
                ],
            ]
        ).T,
        pivoting=True,
    )
    n_act = np.count_nonzero(
        np.abs(np.diag(r))
        >= 10.0
        * EPS
        * (n + m_linear_ub)
        * np.linalg.norm(r[: np.min(r.shape), : np.min(r.shape)], axis=0)
    )
    return n_act, q


def _alpha_tr(step, sd, delta):
    step_sd = step @ sd
    sd_sq = sd @ sd
    dist_tr_sq = delta**2.0 - step @ step
    temp = np.sqrt(max(step_sd**2.0 + sd_sq * dist_tr_sq, 0.0))
    if step_sd <= 0.0 and sd_sq > TINY * abs(temp - step_sd):
        alpha_tr = max((temp - step_sd) / sd_sq, 0.0)
    elif abs(temp + step_sd) > TINY * dist_tr_sq:
        alpha_tr = max(dist_tr_sq / (temp + step_sd), 0.0)
    else:
        raise ZeroDivisionError
    return alpha_tr


def _argmax(x):
    return np.flatnonzero(x >= np.max(x))


def _argmin(x):
    return np.flatnonzero(x <= np.min(x))
