
    bcg                     >    d Z ddlZddlmZ ddlmZ  G d de      Zy)zG
class Splitter.

Convenient tool for creating and working with folds.
    N   )FoldStorage)	_FoldFilec                   \    e Zd ZdZdZd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zedd       Zy)	_Splitterzq
     Splitter needs providing some parameters to create folds and some "reader",
     that can read source.
    i c                     || _         | j                         \  | _        | _        t	               | _        || _        || _        t        j                  |      | _
        y N)_line_reader_read_groups_ids_line_groups_ids_groups_idsset_folds_storage_column_description_min_folds_countrandomRandom_random)selfline_readercolumn_descriptionseedmin_folds_counts        R/var/www/html/bid-api/venv/lib/python3.12/site-packages/catboost/eval/_splitter.py__init__z_Splitter.__init__   sO    '262G2G2I/t/ "e#5  /}}T*    c                     g }t               }| j                  j                         }|D ]'  \  }}|j                  |       |j	                  |       ) ||fS )z=Find all groups in dataset and which group each line belongs.)r   r
   lines_generatorappendadd)r   line_groups_ids
groups_idslinesgroup_id_s         r   r   z_Splitter._read_groups_ids"   sW    U
!!113 KHa""8,NN8$ ! 
**r   c           	         t        | j                        }|| j                  z  |k  r&t        dj	                  ||| j                              t        | j                        }| j                  j                  |       g }t        ||z  |      }t        |      D ](  }|j                  t        |||z  |dz   |z                * |S )z4Prepare test sets for folds only for one permutationzGThe size of fold is too big: count_groups: {}, fold_size: {}. Const: {}r   )lenr   r   AttributeErrorformatsortedr   shuffleminranger   r   )r   	fold_size
left_foldscount_groupspermutationresultcurrent_count_foldsis           r   _make_learn_foldsz_Splitter._make_learn_folds.   s    4++,40009< !j!q!qi)>)>"@   T--.[)!,)";ZH*+AMM#k!i-!a%99LMNO ,r   c                    | j                   j                         }|D ]  }|j                           	 g }| j                  ddj	                  |      |      }|j                          |dz  }d}t        |      D ]  \  }	\  }
}| j                  |	   }d}|D ]'  }|j                  |      s|j                  |       d}) |rI|j                  |       |dz  }|| j                  k\  so|j                  |       |j                          | j                  ddj	                  |      |      }|j                          d}|dz  } |dkD  r"|j                          |j                  |       n |j                         r|j                          |D ]  }|j                           |S # |D ]  }|j                           w xY w)ziLearn_set contains numbers of lines. The method itself store relevant lines from dataset to fold storage.Nzoffset{}_restr   r   FT)r
   r   opencreate_foldr)   	enumerater   contains_group_idr    
_REST_SIZEr   close	is_opened)r   fold_storagesnumoffset	generatorfold_storage
rest_foldsrest_fold_file	rest_sizenum_liner%   liner$   
is_writtens                 r   _write_foldsz_Splitter._write_folds?   s    %%557	 *L * 	%J!--dO4J4J64RTWXN!1HCI'0';#)1d00:"
$1L#55h?$((.%)
 %2 ""&&t,NI DOO3")).9&,,.)-)9)9$@V@VW]@^`c)d&++-$%	q! (<$ 1}$$&!!.1))+$$& -""$ !.  !.""$ !.s   A6F$ *F$ %F$ (B#F$ $F=c                     g }d}||k  r=|j                  | j                  |||z
               |d   }|t        |      z  }||k  r=|S )z&Create all folds for all permutations.r   )r   r5   r'   )r   r.   folds_countfoldspassed_folds_countcurrent_learn_foldss         r   create_fold_setsz_Splitter.create_fold_setsl   s`     ;.LL//	;I[;[\]"')#&9":: !;. r   c              #   >  K   d}|D ]  }g }g }|D ]j  }|dz  }||k  r%| j                  |d|      }|j                  |       2||k\  s8| j                  |dj                  |      |      }|j                  |       l | j                  ||z   ||      }	|||	f  yw)z?Create folds storages for all folds in folds_groups. Generator.r   r   foldzoffset{}_skippedN)r8   r   r)   rI   )
r   folds_groupsfold_offsetfold_num
fold_grouplearn_foldsskipped_folds	learn_set	fold_filerC   s
             r   fold_groups_files_generatorz%_Splitter.fold_groups_files_generatorw   s      &JKM'	A) $ 0 0FH MI&&y1 H, $ 0 0<N<U<UVa<bdl mI!((3 ( **;+FR]^J}j88 's   ABABc                     | j                  ||      }t        ||| j                  j                         | j                        }| j
                  j                  |       |S )N)sepr   )create_name_from_idr   r
   get_separatorr   r   r    )r   fold_setnameid	file_namerZ   s         r   r8   z_Splitter.create_fold   s[    ,,T26	h'"&"3"3"A"A"C151I1IK	 		*r   c                 F    | j                   D ]  }|j                           y r	   )r   delete)r   files     r   clean_foldsz_Splitter.clean_folds   s    ''DKKM (r   c                 ,    t        j                          y r	   )r   
remove_dir)r   s    r   cleanz_Splitter.clean   s     r   Nc                 `    |dj                  || ||      } | S dj                  || |      } | S )Nz,{name}{:0>{max_count_digits}}_offset{offset})ra   max_count_digitsr@   z{name}{:0>{max_count_digits}})ra   rl   )r)   )ra   rb   r@   rl   s       r   r^   z_Splitter.create_name_from_id   sP    AHH!1	 I D  399"4Zj9kDr   )N   )__name__
__module____qualname____doc__r;   r   r   r5   rI   rP   r[   r8   rg   rj   staticmethodr^    r   r   r   r      sP     J	+
+"+Z	9&! 
 
r   r   )rq   r   _fold_storager   r   objectr   rs   r   r   <module>rv      s#     & $X Xr   