
    bcgdQ                     ^   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlZe j                  dk\  rd dl
Zd Znd dlmZ d Zd	 Zed
        Z G d de      Z G d de      Z e       Zej(                  Zej*                  ej,                  ej.                  ej0                  ej2                  ej4                  iZej*                  ej8                  ej.                  ej:                  ej2                  ej<                  iZej*                  ej@                  ej.                  ejB                  ej2                  ejD                  iZ#ej*                  ejH                  ej.                  ejJ                  ej2                  ejL                  iZ' G d d      Z(d Z) G d d      Z* G d d      Z+ G d d      Z,y)    N)	iteritems)IntEnum)contextmanager)   c                     t         j                  j                  | |      }t         j                  j                  |      }|j                  j                  |       |S N)	importlibutilspec_from_file_locationmodule_from_specloaderexec_module)namepathspecmodules       M/var/www/html/bid-api/venv/lib/python3.12/site-packages/catboost/hnsw/hnsw.pyload_dynamicr      sC    ~~55dDA006'    )r   c                    t         j                  j                  t         j                  j                  t              |       } t         j                  j                  |       rt        j                  |       ng }|D cg c]9  }|j                  d      d   dv st         j                  j                  | |      ; c}S c c}w )N.)sopyd)osr   joindirname__file__isdirlistdirsplit)dir_namelist_dirso_names      r   get_so_pathsr%      s    ww||BGGOOH5x@H')ww}}X'>rzz(#BH;Co8w}}UXGYZ\G]anGnBGGLL7+8ooos   <B="B=c                      dt         j                  v rt         j                  d   S t        d      } | D ]%  }	 t        d|      }|t         j                  d<   |c S  ddlm} |S # t        $ r Y ;w xY w)N_hnswz./z
hnsw._hnsw   )r'   )sysmodulesr%   r   ImportError r'   )so_pathsso_pathloaded_hnswr'   s       r   get_hnsw_bin_moduler0      st    #++{{7##D!H	&w8K(3CKK%	  L  		s    A##	A/.A/c               #      K   t         j                  t        j                         	 d  t         j	                          y # t         j	                          w xY wwr   )r'   _set_loggerr)   stdout_reset_logger r   r   	log_fixupr6   )   s8     	cjj!s   $AA  A AAc                       e Zd ZdZdZdZdZy)	EDistancer   r(      r   N)__name__
__module____qualname__
DotProductL1L2SqrPairVectorDistancer5   r   r   r8   r8   2   s    J	
BEr   r8   c                       e Zd ZdZdZdZy)EVectorComponentTyper   r(   r9   N)r:   r;   r<   FloatI8I32r5   r   r   rB   rB   9   s    E	
B
Cr   rB   c                   D    e Zd ZdZddZed        Zed        Zd Zd Z	y)	Poolz&
    Pool is a storage of vectors
    Nc                     || _         || _        || _        |du |du z  sJ |t        |   ||      | _        d| _        |t        |   d||      | _        || _        yy)a  
        Pool is a storage of vectors. You can create it from row-major binary file or
        binary data of vectors.

        Parameters
        ----------
        vectors_path : string
            Path to binary file with vectors.

        dtype : EVectorComponentType
            Type of vectors.

        dimension : int
            Dimension of vectors.

        vectors_bin_data : bytes
            Binary data of vectors.
        N)vectors_pathdtype	dimension_DenseVectorStorage_storage_data)selfrI   rJ   rK   vectors_bin_datas        r   __init__zPool.__init__a   sz    & )
" D(\T-ABBB#/6|YODMDJ'/6tYHXYDM)DJ (r   c                     t        |||d      S )a   
        Create pool from binary file.

        Parameters
        ----------
        vectors_path : string
            Path to binary file with vectors.

        dtype : EVectorComponentType
            Type of vectors.

        dimension : int
            Dimension of vectors.
        NrG   )clsrI   rJ   rK   s       r   	from_filezPool.from_file   s      L%D99r   c                     t        d|||      S )a  
        Create pool from binary data.

        Parameters
        ----------
        vectors_bin_data : bytes
            Binary data of vectors.

        dtype : EVectorComponentType
            Type of vectors.

        dimension : int
            Dimension of vectors.
        NrS   )rT   rP   rJ   rK   s       r   
from_byteszPool.from_bytes   s      D%,<==r   c                 8    | j                   j                  |      S z
        Get item from storage by id.

        Parameters
        ----------
        id : int
            Index of item in storage.

        Returns
        -------
        item : numpy.ndarray
        )rM   	_get_itemrO   ids     r   get_itemzPool.get_item   s     }}&&r**r   c                 6    | j                   j                         S zn
        Get the number of items in storage.

        Returns
        -------
        num_items : int
        )rM   _get_num_itemsrO   s    r   get_num_itemszPool.get_num_items   s     }}++--r   r   )
r:   r;   r<   __doc__rQ   classmethodrU   rW   r]   rb   r5   r   r   rG   rG   \   s>    *< : :" > >"+.r   rG   c                     t         j                  t        d      t        j                  | j
                        }t        | j                     | j                        |_        |S )a  
    Transform pool for fast dot product search on HNSW graph
    https://papers.nips.cc/paper/9032-mobius-transformation-for-fast-inner-product-search-on-graph.pdf

    Parameters
    ----------
    pool : Pool

    Returns
    -------
    transformed_pool : Pool
    r   )	rG   rW   bytesrB   rC   rK   _transform_mobiusrJ   rM   )pooltransformed_pools     r   transform_mobiusrj      sG     uQx1E1K1KT^^\ 1$** =dmm Lr   c                   D    e Zd ZdZd Z	 	 	 d
dZd Zd Zd Zd Z	dd	Z
y)Hnswz`
    Class for building, loading and working with Hierarchical Navigable Small World index.
    c                      d| _         d| _        y)z6
        Create object for working with HNSW.
        N)_indexrN   ra   s    r   rQ   zHnsw.__init__   s     
r   Nc                 J   i }g d}t        t                     D ]  \  }}||vs||||<    t        |j                     |j                  |      | _        t               5  | j
                  j                  t        j                  |             ddd       y# 1 sw Y   yxY w)a  
        Build index with given options.

        Parameters
        ----------
        pool : Pool
            Pool of vectors for which index will be built.

        distance : EDistance
            Distance that should be used for finding nearest vectors.

        max_neighbors : int (default=32)
            Maximum number of neighbors that every item can be connected with.

        search_neighborhood_size : int (default=300)
            Search neighborhood size for ANN-search.
            Higher values improve search quality in expense of building time.

        num_exact_candidates : int (default=100)
            Number of nearest vectors to take from batch.
            Higher values improve search quality in expense of building time.

        batch_size : int (default=1000)
            Number of items that added to graph on each step of algorithm.

        upper_level_batch_size : int (default=40000)
            Batch size for building upper levels.

        level_size_decay : int (default=max_neighbors/2)
            Base of exponent for decaying level sizes.

        num_threads : int (default=number of CPUs)
            Number of threads for building index.

        report_progress : bool (default=True)
            Print progress of building.

        verbose : bool (default=False)
            Print additional information about time of building.

        snapshot_file : string (default=None)
            Path for saving snapshots during the index building.

        snapshot_interval : int (default=600)
            Interval between saving snapshots (seconds).
            Snapshot is saved after building each level also.
        )
not_paramsrO   params	__class__rh   distanceN)
r   locals_HnswDenseVectorIndexrJ   rM   rn   r6   _buildjsondumps)rO   rh   rs   max_neighborssearch_neighborhood_sizenum_exact_candidates
batch_sizeupper_level_batch_sizelevel_size_decaynum_threadsverbosereport_progresssnapshot_filesnapshot_intervalrq   rp   keyvalues                     r   buildz
Hnsw.build   s    d V
#FH-JC*$):#s . ,DJJ7xP[KKtzz&12 [[s   !/BB"c                 2    | j                   t        d      y Nz!Index is not built and not loadedrn   HnswExceptionra   s    r   _check_indexzHnsw._check_index      ;; CDD r   c                 Z    | j                          | j                  j                  |       y)z
        Save index to file.

        Parameters
        ----------
        index_path : string
            Path to file for saving index.
        N)r   rn   _save)rO   
index_paths     r   savez	Hnsw.save  s"     	*%r   c                     t        |j                     |j                  |      | _        | j                  j	                  |       d| _        y)aK  
        Load index from file.

        Parameters
        ----------
        index_path : string
            Path to file for loading index.

        pool : Pool
            Pool of vectors for which index will be loaded.

        distance : EDistance
            Distance that should be used for finding nearest vectors.
        N)ru   rJ   rM   rn   _loadrN   )rO   r   rh   rs   s       r   loadz	Hnsw.load%  s8     ,DJJ7xP*%
r   c                     t        |j                     |j                  |      | _        | j                  j	                  |       || _        y)a>  
        Load index from bytes.

        Parameters
        ----------
        index_data : bytes
            Index binary data.

        pool : Pool
            Pool of vectors for which index will be loaded.

        distance : EDistance
            Distance that should be used for finding nearest vectors.
        N)ru   rJ   rM   rn   _load_from_bytesrN   )rO   
index_datarh   rs   s       r   load_from_byteszHnsw.load_from_bytes8  s8     ,DJJ7xP$$Z0
r   c                 ^    | j                          | j                  j                  ||||      S )a  
        Get approximate nearest neighbors for query from index.

        Parameters
        ----------
        query : list or numpy.ndarray
            Vector for which nearest neighbors should be found.

        top_size : int
            Required number of neighbors.

        search_neighborhood_size : int
            Search neighborhood size for ANN-search.
            Higher values improve search quality in expense of search time.
            It should be equal or greater than top_size.

        distance_calc_limit : int (default=0)
            Limit of distance calculation.
            To guarantee satisfactory search time at the expense of quality.
            0 is equivalent to no limit.

        Returns
        -------
        neighbors : list of tuples (id, distance)
        )r   rn   _get_nearest)rO   querytop_sizerz   distance_calc_limits        r   get_nearestzHnsw.get_nearestK  s-    4 	{{''x9QSfggr   )NNNNNNNFTNNr   )r:   r;   r<   rc   rQ   r   r   r   r   r   r   r5   r   r   rl   rl      s:     mqmrJN93vE
&& &hr   rl   c                       e Zd ZdZdej
                  ddddddfd	Zd
 Z	 	 ddZd Z	ddZ
d Zed        Zed        Z	 	 ddZy)HnswEstimatorz
    Class for building, loading and working with Hierarchical Navigable Small World index with SciKit-Learn
    Estimator compatible interface.
    Mostly drop-in replacement for sklearn.neighbors.NearestNeighbors (except for some parameters)
           i,  d   i  i@  Nc	                 ^    t        t                     D ]  \  }	}
|	dvst        | |	|
        y)a<  
        Parameters
        ----------
        n_neighbors : int, default=5
            Number of neighbors to use by default for kneighbors queries.


        distance : EDistance
            Distance that should be used for finding nearest vectors.

        max_neighbors : int (default=32)
            Maximum number of neighbors that every item can be connected with.

        search_neighborhood_size : int (default=300)
            Search neighborhood size for ANN-search.
            Higher values improve search quality in expense of building time.

        num_exact_candidates : int (default=100)
            Number of nearest vectors to take from batch.
            Higher values improve search quality in expense of building time.

        batch_size : int (default=1000)
            Number of items that added to graph on each step of algorithm.

        upper_level_batch_size : int (default=40000)
            Batch size for building upper levels.

        level_size_decay : int (default=max_neighbors/2)
            Base of exponent for decaying level sizes.
        )rO   rr   N)r   rt   setattr)rO   n_neighborsrs   ry   rz   r{   r|   r}   r~   r   r   s              r   rQ   zHnswEstimator.__init__p  s/    D $FH-JC//c5) .r   c                 2    | j                   t        d      y r   r   ra   s    r   r   zHnswEstimator._check_index  r   r   c                 ~   t         j                  || j                        \  | _        | _        | j                  d      }g d}	t        t                     D ]  \  }
}|
|	vs||||
<    |d= t               5  | j                  j                  t        j                  |             ddd       | S # 1 sw Y   | S xY w)a  
        Fit the HNSW model.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_values)

        y: None
            Added to be compatible with Estimator API

        num_threads : int (default=number of CPUs)
            Number of threads for building index.

        report_progress : bool (default=True)
            Print progress of building.

        verbose : bool (default=False)
            Print additional information about time of building.

        snapshot_file : string (default=None)
            Path for saving snapshots during the index building.

        snapshot_interval : int (default=600)
            Interval between saving snapshots (seconds).

        Returns
        -------
        model : HnswEstimator

        Freturn_none)rp   rO   rq   rr   XyNrs   )r'   _init_indexrs   rn   _index_data_get_paramsr   rt   r6   rv   rw   rx   )rO   r   r   r   r   r   r   r   rq   rp   r   r   s               r   fitzHnswEstimator.fit  s    @ ).(9(9!T]](K%T%!!e!4L
#FH-JC*$):#s . :[KKtzz&12  s   9/B22B<c                 t    i }| j                   j                         D ]  \  }}|d   dk7  s|s||||<    |S )Nr   _)__dict__items)rO   r   rq   r   r   s        r   r   zHnswEstimator._get_params  sD    ----/JC1v}+%2C#s 0 r   c                 &    | j                  d      S )z4
        Get parameters for this estimator.
        Tr   )r   )rO   deeps     r   
get_paramszHnswEstimator.get_params  s     D11r   c                     |s| S | j                  d      }|j                         D ]  \  }}||vst        d       | S )z
        Set the parameters of this estimator.

        Parameters
        ----------
        **params : dict
            HnswEstimator parameters.

        Returns
        -------
        self : HnswEstimator instance
        Tr   zjInvalid parameter %s for HnswEstimator. Check the list of available parameters with `get_params().keys()`.)r   r   r   r   )rO   rq   valid_paramsr   r   s        r   
set_paramszHnswEstimator.set_params  sT     K''D'9 ,,.JC,&#2  ) r   c                     | j                   S )zk
        Returns
        -------
        Distance that should be used for finding nearest vectors.
        )rs   ra   s    r   effective_metric_zHnswEstimator.effective_metric_  s     }}r   c                 T    | j                          | j                  j                  d   S )zW
        Returns
        -------
        Number of samples in the fitted data.
        r   )r   r   shapera   s    r   n_samples_fit_zHnswEstimator.n_samples_fit_  s'     	%%a((r   c                     | j                          || j                  }|| j                  }||dz  }| j                  j	                  |||| j
                  ||      S )a  Finds the approximate K-neighbors of a point.
        Returns indices of and distances to the neighbors of each point.

        Parameters
        ----------
        X : array-like, shape (n_queries, n_features) or None
            The query point or points.
            If not provided, neighbors of each indexed point are returned.
            In this case, the query point is not considered its own neighbor.
        n_neighbors : int, default=None
            Number of neighbors required for each sample. The default is the
            value passed to the constructor.
        return_distance : bool, default=True
            Whether or not to return the distances.

        search_neighborhood_size : int, default=None
            Search neighborhood size for ANN-search.
            Higher values improve search quality in expense of search time.
            It should be equal or greater than top_size.
            If None set to n_neighbors * 2.

        distance_calc_limit : int (default=0)
            Limit of distance calculation.
            To guarantee satisfactory search time at the expense of quality.
            0 is equivalent to no limit.

        Returns
        -------
        neigh_dist :numpy.ndarray of shape (n_queries, n_neighbors)
            Array representing the lengths to points, only present if
            return_distance=True
        neigh_ind : numpy.ndarray of shape (n_queries, n_neighbors)
            Indices of the nearest points in the population matrix.
        r9   )r   r   r   rn   _kneighborsrs   )rO   r   r   return_distancerz   r   s         r   
kneighborszHnswEstimator.kneighbors  sk    H 	9  A**K#+'2Q${{&&q+Wo':< 	<r   )NNFTNiX  )T)NNTNr   )r:   r;   r<   rc   r8   r=   rQ   r   r   r   r   r   propertyr   r   r   r5   r   r   r   r   i  s     $%#..b[^&)dSX"&$*LE cg!+Z28   ) ) cg'(.<r   r   c                   8    e Zd ZdZd	dZd Zd
dZd Zd Zd Z	y)
OnlineHnswz^
    Class for building and working with Online Hierarchical Navigable Small World index.
    Nc                     || _         || _        i }g d}	t        t                     D ]  \  }
}|
|	v s||||
<    t	        |   ||t        j                  |            | _        y)a3  
        Create object with given options.

        Parameters
        ----------
        dtype : EVectorComponentType
            Type of vectors.
        dimension : int
            Dimension of vectors.
        distance : EDistance
            Distance that should be used for finding nearest vectors.
        max_neighbors : int (default=32)
            Maximum number of neighbors that every item can be connected with.
        search_neighborhood_size : int (default=300)
            Search neighborhood size for ANN-search.
            Higher values improve search quality in expense of building time.
        num_vertices : int (default=0)
            Expected number of vectors in storage.
        level_size_decay : int (default=max_neighbors/2)
            Base of exponent for decaying level sizes.
        )ry   rz   num_verticesr~   N)rJ   rK   r   rt   _OnlineHnswDenseVectorIndexrw   rx   _online_index)rO   rJ   rK   rs   ry   rz   r   r~   rq   
all_paramsr   r   s               r   rQ   zOnlineHnsw.__init__8  sl    , 
"f
#FH-JCj U%6#s . 9?	8UYU_U_`fUghr   c                 8    | j                   j                  |      S )a  
        Get approximate nearest neighbors for query from index and add item to index

        Parameters
        ----------
        query : list or numpy.ndarray
            Vector for which nearest neighbors should be found.
            Vector which should be added in index.

        Returns
        -------
        neighbors : list of tuples (id, distance) with length = search_neighborhood_size
        )r   #_get_nearest_neighbors_and_add_item)rO   r   s     r   get_nearest_and_add_itemz#OnlineHnsw.get_nearest_and_add_itemW  s     !!EEeLLr   c                 :    | j                   j                  ||      S )aj  
        Get approximate nearest neighbors for query from index.

        Parameters
        ----------
        query : list or numpy.ndarray
            Vector for which nearest neighbors should be found.
        top_size : int
            Required number of neighbors.

        Returns
        -------
        neighbors : list of tuples (id, distance)
        )r   _get_nearest_neighbors)rO   r   r   s      r   r   zOnlineHnsw.get_nearestg  s     !!88IIr   c                 :    | j                   j                  |       y)z
        Add item in index.

        Parameters
        ----------
        item : list or numpy.ndarray
            Vector which should be added in index.
        N)r   	_add_item)rO   items     r   add_itemzOnlineHnsw.add_itemx  s     	$$T*r   c                 8    | j                   j                  |      S rY   )r   rZ   r[   s     r   r]   zOnlineHnsw.get_item  s     !!++B//r   c                 6    | j                   j                         S r_   )r   r`   ra   s    r   rb   zOnlineHnsw.get_num_items  s     !!0022r   )NNNNr   )
r:   r;   r<   rc   rQ   r   r   r   r]   rb   r5   r   r   r   r   4  s(    i>M J"	+03r   r   )-r)   r   sixr   enumr   
contextlibr   rw   version_infoimportlib.utilr	   r   impr%   r0   r6   r8   rB   r'   r   rC   _DenseFloatVectorStoragerD   _DenseI8VectorStoragerE   _DenseI32VectorStoragerL   _HnswDenseFloatVectorIndex_HnswDenseI8VectorIndex_HnswDenseI32VectorIndexru   _transform_mobius_float_transform_mobius_i8_transform_mobius_i32rg    _OnlineHnswDenseFloatVectorIndex_OnlineHnswDenseI8VectorIndex_OnlineHnswDenseI32VectorIndexr   rG   rj   rl   r   r   r5   r   r   <module>r      s   
 	   % t !p   7  	##  > >U88e::   @ @U::e<<   = =U77e99   F FU@@eBB ^. ^.B$Wh WhtH< H<Vf3 f3r   