
    =[g0                        d Z ddlmZmZ ddlmZmZmZmZm	Z	m
Z
mZmZmZ ddlZddlZddlmZ ddlmZmZmZmZ ddlmZ dd	lmZ d
dlmZ dej>                  dej@                  fdZ! edd      Z" e"dddddd      Z#de
eej@                        de
ej@                     fdZ$deejJ                     deejJ                  e&e'gdf   ddfdZ( G d de      Z)dejJ                  defdZ*d ee&e	ej@                     f   d!e
e+   d"ee&ef   d#e
e   d$ee&ef   defd%Z,deejJ                     d&e
ee&      d!e
e+   d'e'd(ee&ef   d)e'd*e'deee
e   f   fd+Z-	 	 d0d,ed ed-e
e   d.e'dej@                  f
d/Z.y)1z*Utilities for processing spark partitions.    )defaultdict
namedtuple)	AnyCallableDictIteratorListOptionalSequenceTupleUnionN)
csr_matrix)DataIterDMatrixQuantileDMatrixXGBModelconcat   )	ArrayLike   )
get_loggerseriesreturnc                 T    | j                  d      }t        j                  |      }|S )zStack a series of arrays.F)copy)to_numpynpstack)r   arrays     M/var/www/html/bid-api/venv/lib/python3.12/site-packages/xgboost/spark/data.pystack_seriesr"      s$    OOO'EHHUOEL    Alias)datalabelweightmarginvalidqidvaluesr&   r'   
baseMarginvalidationIndicatorr*   seqc                     | rt        |       S y)z&Concatenate the data if it's not None.Nr   )r.   s    r!   concat_or_noner0      s    
c{r#   iteratorappendc                    dt         j                  dt        ddffd}d}| D ]  }|t        j                  |j
                  v }|du rt        j                  |j
                  v sJ |rJ|j                  |t        j                      ddf   }|j                  |t        j                     ddf   }n|d}} ||d       | ||d        y)znExtract partitions from pyspark iterator. `append` is a user defined function for
    accepting new partition.partis_validr   Nc                      | t         j                  |        | t         j                  |        | t         j                  |        | t         j                  |        | t         j
                  |       y )N)aliasr%   r&   r'   r(   r*   )r4   r5   r2   s     r!   	make_blobz#cache_partitions.<locals>.make_blob+   sS    tUZZ*tU[[(+tU\\8,tU\\8,tUYY)r#   TF)pd	DataFrameboolr7   r)   columnsloc)r1   r2   r8   has_validationr4   trainr)   s    `     r!   cache_partitionsr@   %   s    * * * * &*N!"[[DLL8NT!;;$,,...HHd5;;//23EHHT%++.12E5E%eT" r#   c                        e Zd ZdZdeeef   dee   de	ddf fdZ
deeej                        deej                     fdZd	edefd
ZddZ xZS )PartIterz7Iterator for creating Quantile DMatrix from partitions.r%   	device_idkwargsr   Nc                 ^    d| _         || _        || _        || _        t        |   d       y )Nr   T)release_data)_iter
_device_id_data_kwargssuper__init__)selfr%   rC   rD   	__class__s       r!   rL   zPartIter.__init__H   s1     
#
d+r#   c                     |sy | j                   Udd l}dd l}|j                  j                  j                  | j                          |j                  || j                           S || j                     S Nr   )rH   cudfcupycudaruntime	setDevicer:   rG   )rM   r%   rQ   cps       r!   _fetchzPartIter._fetchR   s[    ??& GGOO%%doo6>>$tzz"233DJJr#   
input_datac                    | j                   t        | j                  t        j                           k(  ry |d| j                  | j                  t        j                           | j                  | j                  j                  t        j                  d             | j                  | j                  j                  t        j                  d             | j                  | j                  j                  t        j                  d             | j                  | j                  j                  t        j                  d             d| j                   | xj                   dz  c_         y)Nr   r%   r&   r'   base_marginr*   r    )rG   lenrI   r7   r%   rW   getr&   r'   r(   r*   rJ   )rM   rX   s     r!   nextzPartIter.nexta   s    ::TZZ

344 	
TZZ

34++djjnnU[[$?@;;tzz~~ellDABDJJNN5<<$FGDJJNN599d;<	
 ll	
 	

a
r#   c                     d| _         y rP   )rG   )rM   s    r!   resetzPartIter.reseto   s	    
r#   )r   N)__name__
__module____qualname____doc__r   strr	   r
   intr   rL   r   r9   r:   rW   r   r_   ra   __classcell__)rN   s   @r!   rB   rB   E   sy    A,dO,08,IL,	, 8HR\\$:;  @V  x C r#   rB   r4   c                 v   g dgg }}}d}t        | j                  | j                  | j                  | j                        D ]  \  }}}}|dk(  rt        |      }	|}
|}n2t        |      }	t        j                  |	t        j                        }
|}|dk(  r|	}||	k(  sJ |j                  |
       |j                  |d   t        |
      z          |j                  |        t        j                  |      }t        j                  |      }t        j                  |      }t        |||ft        |       |f      S )Nr   )dtype)shape)zipfeatureVectorTypefeatureVectorSizefeatureVectorIndicesfeatureVectorValuesrg   r]   r   arangeint32r2   r    concatenater   )r4   csr_indices_listcsr_indptr_listcsr_values_list
n_featuresvec_type	vec_size_vec_indices
vec_valuesvec_sizecsr_indices
csr_valuescsr_indptr_arrcsr_indices_arrcsr_values_arrs                  r!   )_read_csr_matrix_from_unwrapped_spark_vecr   s   s5   9;aS"oJ8;!!  	94)[* q=9~H%K#J :H))HBHH=K#J?!JX%%%,r2S5EEFz*598 XXo.Nnn%56O^^O4N	.9#d)ZAX r#   r%   dev_ordinalmetarefparamsc                     | s t        t        j                  d      |      S t        | |fi |}t        |fi |d|i}|S )z+Handle empty partition for QuantileDMatrix.r   r   )r   r   )r   r   emptyrB   )r%   r   r   r   r   itms          r!   make_qdmr      sG     rxx/S99	$	,t	,B.f.#.AHr#   feature_colsuse_qdmrD   enable_sparse_data_optimhas_validation_colc           	         t        t              t        t              ddt        j                  dt        dt
        ddffd}dt        j                  dt        dt
        ddffd}d	t        t        t        t        j                     f   d
t        t        t        f   dt        fd}	|r|}
dv rd   dk(  sJ |}
dt        t        t        t        f   t        t        t        t        t        t
        f   f   f   ffd} |       \  }}|rt!        | |
       t#        ||d|      }nO|st!        | |
        |	      }n5|rt!        | |
       t#        ||d|      }nt!        | |
        |	      }|r |rt#        ||||      }n|r	 |	      nd}nd}|#|j%                         |j%                         k(  sJ ||fS )a~  Create DMatrix from spark data partitions.

    Parameters
    ----------
    iterator :
        Pyspark partition iterator.
    feature_cols:
        A sequence of feature names, used only when rapids plugin is enabled.
    dev_ordinal:
        Device ordinal, used when GPU is enabled.
    use_qdm :
        Whether QuantileDMatrix should be used instead of DMatrix.
    kwargs :
        Metainfo for DMatrix.
    enable_sparse_data_optim :
        Whether sparse data should be unwrapped
    has_validation:
        Whether there's validation data.

    Returns
    -------
    Training DMatrix and an optional validation DMatrix.
    r   r4   namer5   r   Nc                    |t         j                  k(  s|| j                  v r|t         j                  k(  r|    j                  d   dkD  r|    }n;| |   j                  d   dkD  r$| |   }|t         j                  k(  rt	        |      }nd }|t         j                  k(  r*|(dk(  r|j                  d   |j                  d   k(  sJ |y |r|   j                  |       y |   j                  |       y y Nr   r   )r7   r%   r<   rl   r"   r2   )r4   r   r5   r    r   rx   
train_data
valid_datas       r!   append_mz0create_dmatrix_from_partitions.<locals>.append_m   s    5::!5

" ,&,,Q/!3.2<.@d!!!$q(T
5::%(/Euzz!e&7?!&QJ!U[[^333}4 ''.4 ''.5 "6r#   c                 6   |t         j                  k(  s|| j                  v rw|t         j                  k(  r3t        |       }dk(  r|j                  d   |j                  d   k(  sJ | |   }|r|   j                  |       y |   j                  |       y y r   )r7   r%   r<   r   rl   r2   )r4   r   r5   r    rx   r   r   s       r!   append_m_sparsez7create_dmatrix_from_partitions.<locals>.append_m_sparse   s     5::!5uzz!A$G?!&QJ!U[[^333T
4 ''.4 ''. "6r#   r+   rD   c           	      4   t        |       dk(  r:t        d      j                  d       t        ddt	        j
                  d      i|S t        | t        j                           }t        | j                  t        j                  d             }t        | j                  t        j                  d             }t        | j                  t        j                  d             }t        | j                  t        j                  d             }t        d|||||d|S )Nr   XGBoostPySparkz_Detected an empty partition in the training data. Consider to enable repartition_random_shuffler%   r   rZ   r\   )r]   r   warningr   r   r   r0   r7   r%   r^   r&   r'   r(   r*   )r+   rD   r%   r&   r'   r(   r*   s          r!   makez,create_dmatrix_from_partitions.<locals>.make  s    v;!'(00.
 ; 0;F;;fUZZ01vzz%++t<=

5<< >?

5<< >?VZZ		489 
U6v3
RX
 	
r#   missingg        c                  f    d} i }i }j                         D ]  \  }}|| v r|||<   |||<    ||fS )N)max_binr   silentnthreadenable_categorical)items)non_data_keysnon_data_paramsr   kvrD   s        r!   split_paramsz4create_dmatrix_from_partitions.<locals>.split_params  sR    

 LLNDAqM!%&"Q	 #
 _$$r#   )r   listr9   r:   rf   r;   r   r	   r   ndarrayr   r   r   r   rg   floatr@   r   num_col)r1   r   r   r   rD   r   r   r   r   r   	append_fnr   r   r   dtraindvalidrx   r   r   s    `  `           @@@r!   create_dmatrix_from_partitionsr      s   B /:$.?J.9$.?JJ/r|| /3 /$ /4 / /</bll /# / /$ /"
T#tBJJ//0 
$sCx. 
W 
$  #	F"vi'8C'???	%%S#XS%UD@P:Q5Q0R RS %*  >LD&G9-":{D$O		!'9-j&)		'9-*k4vF9-j&) (0Kvv)F 2DT*f-F~~6>>#33336>r#   modelr[   strict_shapec                     | j                  d      }t        ||| j                  | j                  | j                  | j
                        }| j                         j                  |dd||      S )z4Predict contributions with data with the full model.N)r[   r   r   feature_typesr   TF)pred_contribsvalidate_featuresiteration_ranger   )_get_iteration_ranger   r   n_jobsr   r   get_boosterpredict)r   r%   r[   r   r   data_dmatrixs         r!   r   r   T  sv     006O)) 33L &&'! '  r#   )NF)/re   collectionsr   r   typingr   r   r   r   r	   r
   r   r   r   numpyr   pandasr9   scipy.sparser   xgboostr   r   r   r   xgboost.compatr   _typingr   utilsr   Seriesr   r"   r$   r7   r0   r:   rf   r;   r@   rB   r   rg   r   r   r   r\   r#   r!   <module>r      s-   0 / X X X   # @ @ !   rzz  	7QRh<9NPUV"**!56 8BJJ;O #r||$#.6c47PRV7V.W#	#@+x +\(BLL (Z (V
sD$$
%# sCx. 
'		
 cN dr||$d8C=)d #d 	d
 cNd #d d 7HW%%&dT (,	
 )$ 	
 ZZr#   