
    >[g2                         d Z ddlZddlmZ ddlZddlmZ ddl	m
Z
mZmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlm Z m!Z!  G d deee
      Z"y)z!
Nearest Centroid Classification
    N)Real)sparse   )BaseEstimatorClassifierMixin_fit_context)#DiscriminantAnalysisPredictionMixin)pairwise_distancespairwise_distances_argmin)LabelEncoder)get_tags)available_if)Interval
StrOptions)check_classification_targets)csc_median_axis_0)check_is_fittedvalidate_datac                   N    e Zd ZU dZ eddh      g eeddd      dgd ed	d
h      gdZee	d<   	 ddd
ddZ
 ed      d        Z fdZd Zd Z  ee      ej$                        Z  ee      ej&                        Z  ee      ej(                        Z fdZ xZS )NearestCentroida  Nearest centroid classifier.

    Each class is represented by its centroid, with test samples classified to
    the class with the nearest centroid.

    Read more in the :ref:`User Guide <nearest_centroid_classifier>`.

    Parameters
    ----------
    metric : {"euclidean", "manhattan"}, default="euclidean"
        Metric to use for distance computation.

        If `metric="euclidean"`, the centroid for the samples corresponding to each
        class is the arithmetic mean, which minimizes the sum of squared L1 distances.
        If `metric="manhattan"`, the centroid is the feature-wise median, which
        minimizes the sum of L1 distances.

        .. versionchanged:: 1.5
            All metrics but `"euclidean"` and `"manhattan"` were deprecated and
            now raise an error.

        .. versionchanged:: 0.19
            `metric='precomputed'` was deprecated and now raises an error

    shrink_threshold : float, default=None
        Threshold for shrinking centroids to remove features.

    priors : {"uniform", "empirical"} or array-like of shape (n_classes,),         default="uniform"
        The class prior probabilities. By default, the class proportions are
        inferred from the training data.

        .. versionadded:: 1.6

    Attributes
    ----------
    centroids_ : array-like of shape (n_classes, n_features)
        Centroid of each class.

    classes_ : array of shape (n_classes,)
        The unique classes labels.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    deviations_ : ndarray of shape (n_classes, n_features)
        Deviations (or shrinkages) of the centroids of each class from the
        overall centroid. Equal to eq. (18.4) if `shrink_threshold=None`,
        else (18.5) p. 653 of [2]. Can be used to identify features used
        for classification.

        .. versionadded:: 1.6

    within_class_std_dev_ : ndarray of shape (n_features,)
        Pooled or within-class standard deviation of input data.

        .. versionadded:: 1.6

    class_prior_ : ndarray of shape (n_classes,)
        The class prior probabilities.

        .. versionadded:: 1.6

    See Also
    --------
    KNeighborsClassifier : Nearest neighbors classifier.

    Notes
    -----
    When used for text classification with tf-idf vectors, this classifier is
    also known as the Rocchio classifier.

    References
    ----------
    [1] Tibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of
    multiple cancer types by shrunken centroids of gene expression. Proceedings
    of the National Academy of Sciences of the United States of America,
    99(10), 6567-6572. The National Academy of Sciences.

    [2] Hastie, T., Tibshirani, R., Friedman, J. (2009). The Elements of Statistical
    Learning Data Mining, Inference, and Prediction. 2nd Edition. New York, Springer.

    Examples
    --------
    >>> from sklearn.neighbors import NearestCentroid
    >>> import numpy as np
    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    >>> y = np.array([1, 1, 1, 2, 2, 2])
    >>> clf = NearestCentroid()
    >>> clf.fit(X, y)
    NearestCentroid()
    >>> print(clf.predict([[-0.8, -1]]))
    [1]
    	manhattan	euclideanr   Nneither)closedz
array-like	empiricaluniformmetricshrink_thresholdpriors_parameter_constraints)r   r    c                .    || _         || _        || _        y )Nr   )selfr   r   r    s       ^/var/www/html/bid-api/venv/lib/python3.12/site-packages/sklearn/neighbors/_nearest_centroid.py__init__zNearestCentroid.__init__   s      0    T)prefer_skip_nested_validationc                    | j                   dk(  rt        | ||dg      \  }}n8t        |       j                  j                  rdnd}t        | |||ddg      \  }}t        j                  |      }t        |       |j                  \  }}t               }|j                  |      }|j                  x| _        }	|	j                  }
|
dk  rt        d	|
z        | j                  d
k(  rJt        j                   |d      \  }}t        j"                  |      t%        t'        |            z  | _        nU| j                  dk(  r"t        j*                  d|
z  g|
z        | _        n$t        j*                  | j                        | _        | j(                  dk  j-                         rt        d      t        j.                  | j(                  j1                         d      sFt3        j4                  dt6               | j(                  | j(                  j1                         z  | _        t        j8                  |
|ft        j:                        | _        t        j>                  |
      }tA        |
      D ]  }||k(  }t        j0                  |      ||<   |rt        jB                  |      d   }| j                   dk(  rF|s(t        jD                  ||   d      | j<                  |<   stG        ||         | j<                  |<   ||   jI                  d      | j<                  |<    t        jJ                  || j<                  |   z
  d      dz  }t        jJ                  t        jL                  |j1                  d      ||
z
  z        d      | _'        t-        | jN                  dk(        rt3        j4                  d       d}|rTt        jP                  |jS                  d      |jU                  d      z
  jW                         dk(        rt        |      |s8t        jP                  t        jX                  |d      dk(        rt        |      |jI                  d      }t        jL                  d|z  d|z  z
        }| jN                  t        jD                  | jN                        z   }|j[                  t'        |      d      }||z  }t        jJ                  | j<                  |z
  |z  d      | _.        | j^                  rt        j`                  | j\                        }t        jb                  | j\                        | j^                  z
  | _.        t        jd                  | j\                  dd| j\                         | xj\                  |z  c_.        || j\                  z  }t        jJ                  ||z   d      | _        | S )a0  
        Fit the NearestCentroid model according to the given training data.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training vector, where `n_samples` is the number of samples and
            `n_features` is the number of features.
            Note that centroid shrinking cannot be used with sparse matrices.
        y : array-like of shape (n_samples,)
            Target values.

        Returns
        -------
        self : object
            Fitted estimator.
        r   csc)accept_sparse	allow-nanTcsr)ensure_all_finiter*   r   z>The number of classes has to be greater than one; got %d classr   )return_inverser      r   zpriors must be non-negativeg      ?zAThe priors do not sum to 1. Normalizing such that it sums to one.dtype)axisF)copyzself.within_class_std_dev_ has at least 1 zero standard deviation.Inputs within the same classes for at least 1 feature are identical.z2All features have zero variance. Division by zero.N)out)3r   r   r   
input_tags	allow_nanspissparser   shaper   fit_transformclasses_size
ValueErrorr    npuniquebincountfloatlenclass_prior_asarrayanyisclosesumwarningswarnUserWarningemptyfloat64
centroids_zerosrangewheremedianr   meanarraysqrtwithin_class_std_dev_allmaxmintoarrayptpreshapedeviations_r   signabsclip)r#   Xyr-   is_X_sparse	n_samples
n_featuresley_indclasses	n_classes_class_countsnk	cur_classcenter_maskvarianceerr_msgdataset_centroid_msmmmssignsmsds                            r$   fitzNearestCentroid.fit   s   * ;;+% q!E7CDAq  (~88BB  !"3$enDAq kk!n$Q' !	:^  #"$++-LL	q=P 
 ;;+% ii$?OA| "L 9E#a&M ID[[I% "

A	M?Y+F GD "

4;; 7D!&&(:;;zz$++//137MMS !% 1 1D4E4E4I4I4K KD ((Iz#:"**M XXi y)I9,KFF;/ByM hh{3A6{{k)"131[>PQ1RDOOI.1B1[>1RDOOI.-.{^-@-@a-@-H	* *  88A 66UCqH%'XXGGHLLaL(I	,ABC%&
" t))Q./MMW
 G266155a5=155a5=#@"I"I"Kq"PQW%%qq(9Q(>!?W%%FFFNGGS2X#	/23 &&43M3M)NNYYs1vq!!V88__00B6U

   GGD,,-E!vvd&6&67$:O:OODGGD$$a43C3CD%t'''C hh'83'>UKDOr&   c                    t        |        t        j                  | j                  dt	        | j
                        z        j                         rbt        |       j                  j                  rdnd}t        | ||dd      }| j
                  t        || j                  | j                           S t        | =  |      S )a  Perform classification on an array of test vectors `X`.

        The predicted class `C` for each sample in `X` is returned.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Input data.

        Returns
        -------
        y_pred : ndarray of shape (n_samples,)
            The predicted classes.
        r/   r+   Tr,   F)r-   r*   resetr   )r   r>   rF   rC   rB   r;   rV   r   r5   r6   r   r   rM   r   superpredict)r#   r`   r-   	__class__s      r$   r|   zNearestCentroid.predict  s     	::d''S-?)?@DDF  (~88BB  "3#A ==)!T__T[[Q  7?1%%r&   c           	         t        | d       t        | |dddt        j                        }t        j                  |j
                  d   | j                  j                  ft        j                        }| j                  dk7  }|d d |fxx   | j                  |   z  cc<   | j                  j                         }|d d |fxx   | j                  |   z  cc<   t        | j                  j                        D ]t  }t        |||g   | j                        j                         }|d	z  }t        j                  | d
t        j                   | j"                  |         z  z         |d d |f<   v |S )NrM   TFr,   )r3   ry   r*   r1   r   r0   rz   r   g       @)r   r   r>   rL   rK   r9   r;   r<   rU   rM   r3   rO   r
   r   ravelsqueezelogrC   )r#   r`   X_normalizeddiscriminant_scoremaskcentroids_normalized	class_idx	distancess           r$   _decision_functionz"NearestCentroid._decision_function8  sL   l+$!$e5


  XX"DMM$6$67rzz
 ))Q.QW!;!;D!AA#335QW%)C)CD)II%t}}112I*2I;?eg  !OI/1zz
S266$*;*;I*F#GGG0q)|, 3 "!r&   c                      | j                   dk(  S )Nr   rz   )r#   s    r$   _check_euclidean_metricz'NearestCentroid._check_euclidean_metricT  s    {{k))r&   c                 `    t         |          }| j                  dk(  |j                  _        |S )Nnan_euclidean)r{   __sklearn_tags__r   r5   r6   )r#   tagsr}   s     r$   r   z NearestCentroid.__sklearn_tags__c  s*    w')$(KK?$B!r&   )r   )__name__
__module____qualname____doc__r   r   r   r!   dict__annotations__r%   r   rw   r|   r   r   r   r	   decision_functionpredict_probapredict_log_probar   __classcell__)r}   s   @r$   r   r      s    eP {K89:%dAtIFM[),D!EF$D  	 	 5{ 6{z &D"8* >%<=+== :L!89+99M >%<=+== r&   r   )#r   rH   numbersr   numpyr>   scipyr   r7   baser   r   r   discriminant_analysisr	   metrics.pairwiser
   r   preprocessingr   utilsr   utils._available_ifr   utils._param_validationr   r   utils.multiclassr   utils.sparsefuncsr   utils.validationr   r   r    r&   r$   <module>r      sP        ? ? G )  . : ; 1 =I'-Ir&   