
    bcgF                         d dl Zd dlZd dlmZ ddlmZ ddlm	Z	 ddl
mZ d Z G d d	e      Z G d
 d      ZddZ G d d      Z G d d      Z G d d      Zy)    N)Enum   )CatBoostError) metric_description_or_str_to_strcompute_wx_testc                      t        | |      d   S )Npvaluer   )baselinetests     Z/var/www/html/bid-api/venv/lib/python3.12/site-packages/catboost/eval/evaluation_result.pycalc_wilcoxon_testr   
   s    8T*844    c                       e Zd ZdZdZy)	ScoreTypeAbsoluteDiffRelativeDiffN)__name__
__module____qualname__AbsRel r   r   r   r      s    
C
Cr   r   c                   Z    e Zd ZdZej
                  ddddfdZed	d       Zed	d       Z	y)
ScoreConfigzB
        Config to present human-friendly evaluation results.
    d   {Gz?Tc                 J    || _         || _        || _        || _        || _        y)a  

        :param score_type: type of score. For abs difference score will be (baseline - test).mean(),
        for relative it's ((baseline - test) / baseline).mean()
        :param multiplier: multiplier to print score
        :param score_level: WX-test level. Will be used to make if tested case significantly better or worse
        :param interval_level: level to compute score confidence interval
        :param overfit_iterations_info: if information about overfit iterations should be preserved
        N)type
multiplierscore_levelinterval_leveloverfit_overfit_iterations_info)self
score_typer    r!   r"   overfit_iterations_infos         r   __init__zScoreConfig.__init__   s*     	$&,/F,r   c                 :    t        t        j                  d|       S )N   r%   r    r!   )r   r   r   levels    r   	abs_scorezScoreConfig.abs_score-   s    imm&'',. 	.r   c                 :    t        t        j                  d|       S )Nr   r*   )r   r   r   r+   s    r   	rel_scorezScoreConfig.rel_score3   s    imm&)',. 	.r   N)r   )
r   r   r   __doc__r   r   r'   staticmethodr-   r/   r   r   r   r   r      sJ    
 &MM! $)-G* . .
 . .r   r   c                    | dk(  j                         st        j                  |       } g }t        d|      D ]O  }t        j                  j                  | t        |             }|j                  t        j                  |             Q t        |      }|t        ||dz  z           }|t        |d|dz  z
  z           }||fS y)a  
    Count confidence intervals for difference each two samples.

    Args:
        :param samples: samples
        :param level: (float) Level for the confidence interval.
        :param tries: bootstrap samples to use
        :return: (left, right) border of confidence interval

    r   r         ?)r   r   )allnparrayrangerandomchoicelenappendmeansortedint)samplesr,   triesmeans_resampleleftrights           r   calc_bootstrap_ci_for_meanrF   :   s     qL((7#q%Ayy''W>HLL*+ ! uS%!),-.c%3?345U{r   c                   h    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd ZddZd ZddZy)CaseEvaluationResultzg
        CaseEvaluationResults stores aggregated statistics for one EvaluationCase and one metric.
    c                     || _         || _        t        j                         | _        t        j                         | _        t               | _        || _        y N)	_case_metric_descriptionpdSeries_fold_metric_fold_metric_iterationdict_fold_curves
_eval_step)r$   casemetric_description	eval_steps       r   r'   zCaseEvaluationResult.__init__Y   s>    
#5 IIK&(iik# F#r   c                    |j                         | j                  k7  rt        d      |j                         }|| j                  |<   | j
                  j                         rt        |      n
t        |      }| j
                  j                         rt        j                  |      nt        j                  |      }|| j                  j                  |<   || j                  j                  |<   y )Nz)Model case should be equal to result case)get_caserK   r   get_fold_idrR   rL   is_max_optimalmaxminr5   argmaxargminrO   atrP   )r$   modellearning_curvefold_idscorepositions         r   _addzCaseEvaluationResult._addc   s    >>tzz) KLL##%%3'"'+'?'?'N'N'PN#VYZhVi040H0H0W0W0Y299^,_a_h_h` ).W%2:##&&w/r   c                     t        j                  | j                  |j                  k(        xrG t        j                  | j                  |j                  k(        xr | j                  |j                  k(  S rJ   )r5   r4   rO   rP   rR   r$   others     r   __eq__zCaseEvaluationResult.__eq__q   s_    t((E,>,>>? <FF466%:V:VVW<%%););;	=r   c                     | j                   S )z4
            ExecutionCases for this result
        )rK   r$   s    r   rX   zCaseEvaluationResult.get_casev   s     zzr   c                 6    | j                   j                         S )zM

        :return: FoldsIds for which this caseResult was calculated
        )rR   keysrk   s    r   get_fold_idsz!CaseEvaluationResult.get_fold_ids|   s    
   %%''r   c                 >    | j                   |   | j                  |   fS )zr

        :param fold: id of fold to get result
        :return: best metric value, best metric iteration
        )rO   rP   r$   folds     r   get_best_metric_for_foldz-CaseEvaluationResult.get_best_metric_for_fold   s%       &(C(CD(IIIr   c                     | j                   S )zK

        :return: pandas Series with best iterations on all folds
        )rP   rk   s    r   get_best_iterationsz(CaseEvaluationResult.get_best_iterations   s    
 ***r   c                     | j                   S )zA

        :return: pandas series with best metric values
        )rO   rk   s    r   get_best_metricsz%CaseEvaluationResult.get_best_metrics   s    
    r   c                      | j                   |   S )zq

        :param fold:
        :return: fold learning curve (test scores on every eval_period iteration)
        )rR   rp   s     r   get_fold_curvez#CaseEvaluationResult.get_fold_curve   s       &&r   c                     | j                   S )zJ

        :return: Metric used to build this CaseEvaluationResult
        rL   rk   s    r   get_metric_descriptionz+CaseEvaluationResult.get_metric_description       
 '''r   c                     | j                   S )zG

        :return: step which was used for metric computations
        )rS   rk   s    r   get_eval_stepz"CaseEvaluationResult.get_eval_step   s    
 r   c                     d}d}| j                   j                         D ];  \  }}| j                  |   }|dz  t        |      z  }||kD  r|dz  }1||k  s7|dz  }= ||fS )aK  

        :param overfit_border: min fraction of iterations until overfitting starts one expects all models to have
        :param underfit_border: border, after which there should be no best_metric_scores
        :return: #models with best_metric > underfit_border * iter_count, #models, with best_metric > overfit_border
        r   r3   r)   )rR   itemsrP   r:   )	r$   overfit_borderunderfit_bordercount_overfittingcount_underfittingrb   
fold_curvebest_score_positionbest_model_size_fractions	            r   count_under_and_over_fitsz.CaseEvaluationResult.count_under_and_over_fits   s     #'#4#4#:#:#<GZ"&"="=g"F':S'@3z?'R$'.8"a'")O;!Q&! $= !"444r   c                 B    | j                         \  }}||kD  ry||kD  ryy)zX

        :return: Simple sanity check that all models overfit and not too fast
        OverfittingUnderfittingGood)r   )r$   r   r   s      r   estimate_fit_qualityz)CaseEvaluationResult.estimate_fit_quality   s4    
 150N0N0P--11  11!r   Nc                 4   ddl m} g }| j                         D ]  }| j                  |      }||}nt	        t        |      dz        }|j                  |j                  t        |t        |            D cg c]  }|t	        | j                        z   c}||d ddj                  |                    |j                  dj                  | j                        dt        d	d
dd      t        dd
d      d      }|j                  ||      }	|	S c c}w )z{

        :param offset: First iteration to plot
        :return: plotly Figure with learning curves for each fold
        r   N皙?lineszFold #{}xymodenamezLearning curves for case {}closest	Iteration   Fr   titleticklenzeroline	gridwidthMetricr   r   r   Tr   	hovermodexaxisyaxis
showlegenddatalayout)plotly.graph_objs
graph_objsrn   rx   r>   r:   r;   Scatterr7   rS   formatLayoutrK   rQ   Figure)
r$   offsetgotracesrq   scores_curve	first_idxir   figs
             r   create_learning_curves_plotz0CaseEvaluationResult.create_learning_curves_plot   s,    	'%%'D..t4L!"	L 1C 78	MM"**%PY[^_k[lJm'nJmQC,@(@Jm'n'3IJ'?*1*4*;*;D*A % C D ( /66tzzB!	 
   
  iiVFi3
- (os   6D)g333333?gffffff?rJ   )r   r   r   r0   r'   re   ri   rX   rn   rr   rt   rv   rx   r{   r~   r   r   r   r   r   r   rH   rH   T   sN    $;=
(J+!'(5&
'r   rH   c                   p    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
dd
ZddZd Zd Zd Zd ZddZd Zy	)MetricEvaluationResultz
        Evaluation result for one metric.
        Stores all ExecutionCases with specified metric scores
        Computes human-friendly tables with results and some plots
    c                     t        |      dk  r#t        dj                  t        |                  t               | _        t               | _        |D cg c]  }|j                          c}| _        |D ]!  }|j                         }|| j                  |<   # |d   j                         | _	        |d   j                         | _
        t               | _        | j                  j                         D ]  \  }}|j                         | j                  k7  rt        d      |j                         | j                         k7  rt        d      |j                         | j                         k7  s|t        d       y c c}w )Nr)   z%Need at least 2 case results, got {} r   z1Metric names should be equal for all case resultsz1Case results should be computed on the same foldsz.Eval steps should be equal for different cases)r:   r   r   rQ   _case_results_case_comparisonsrX   _casesr{   rL   _baseline_caser   _score_configr   rn   r~   )r$   case_resultscase_resultrT   s       r   r'   zMetricEvaluationResult.__init__   sP   |! G N NsS_O` abb!V!%AMN+{++-N'K'')D'2Dt$ ( $0?#I#I#K *1o668(]#'#5#5#;#;#=T;113t7O7OO#$WXX'')T->->-@@#$WXX((*d.@.@.BB#$TUU $> Os   E;c                 "    t               | _        y rJ   )rQ   r   rk   s    r   __clear_comparisonsz*MetricEvaluationResult.__clear_comparisons  s    !%r   c                 L   |t        |t              rj|t        j                  k(  rt        j	                         }nB|t        j
                  k(  rt        j                         }nt        dj                  |            | j                  |k7  r|| _	        | j                          y y y )NzUnknown scoreType {})
isinstancer   r   r   r-   r   r/   r   r   r   *_MetricEvaluationResult__clear_comparisons)r$   configs     r   _change_score_configz+MetricEvaluationResult._change_score_config  s    &),Y]]*(224Fy}},(224F'(>(E(Ef(MNN!!V+%+"((* , r   c                    t        j                         }| j                  |   j                         }| j                  |   j	                         }| j                  j                         D ]@  \  }}||k7  s|j                         }t        ||      }||z
  }	| j                  j                  t        j                  k(  r|	|j                         z  }	| j                  j                         r|	 }	|	j                         }
t        |	| j                  j                         \  }}t#        |      }||j$                  |df<   |
| j                  j&                  z  |j$                  |df<   dj)                  | j                  j*                  dz        }dj)                  d| j                  j*                  dz  z
        }|| j                  j&                  z  |j$                  ||f<   || j                  j&                  z  |j$                  ||f<   d}|| j                  j*                  k  r|
dkD  rd}n|
dk  rd	}||j$                  |d
f<   | j                  j,                  s|j	                         }t        ||      }||z
  j                         |j$                  |df<   ||j$                  |df<   C |j/                  dg| j                  j                               S )NPValueScorezQuantile {}r   r3   UNKNOWNr   GOODBADDecisionzOverfit iter diffzOverfit iter pValue)by	ascending)rM   	DataFramer   rv   rt   r   r   r   r   r   r   absrL   rZ   r<   rF   r"   strr_   r    r   r!   r#   sort_values)r$   baseline_caseresultbaseline_scoresbaseline_itersrT   r   test_scoresr
   diff	mean_diffleft_quantileright_quantile	case_nameleft_quantile_titleright_quantile_titledecision
test_iterss                     r   _compute_case_result_tablez1MetricEvaluationResult._compute_case_result_table*  s   ,,];LLN++M:NNP#'#5#5#;#;#=T;}$)::<+O[I'+5%%**imm;/"5"5"77D++::< 5D IIK	0J4KOK]K]KlKl1n-~  I	17		)X-.09D<N<N<Y<Y0Y		)W,-&3&:&:4;M;M;Y;Y\];]&^#'4';';C$BTBTB`B`cdBd<d'e$<IDL^L^LiLi<i		)%889=KdN`N`NkNk=k		)%99:$D..::: 1}#)"Q#(3;		)Z/0%%EE!,!@!@!BJ/
KFAKnA\@b@b@dFIIi)<<=BHFIIi)>>?O $>R !!gY$:R:R:a:a:c!ddr   c                     | j                   S )z`

        :return: ExecutionCases used as a baseline (with everything else is compared)
        )r   rk   s    r   get_baseline_casez(MetricEvaluationResult.get_baseline_caseZ  s    
 """r   c                     | j                   S )z4

        :return: Cases which are compared
        )r   rk   s    r   	get_casesz MetricEvaluationResult.get_casesa  s    
 {{r   c                     | j                   S )zD

        :return: Metric for which results were calculated
        rz   rk   s    r   r{   z-MetricEvaluationResult.get_metric_descriptionh  r|   r   Nc                 >    | j                   }| j                  ||      S )a  
        Method to get human-friendly table with model comparisons.

        Returns baseline vs all other computed cases result
        :param score_config: Config to present human-friendly score, optional. Instance of ScoreConfig
        :return: pandas DataFrame. Each row is related to one ExecutionCase.
        Each row describes how better (or worse) this case is compared to baseline.
        )r   get_case_comparison)r$   score_configrT   s      r   get_baseline_comparisonz.MetricEvaluationResult.get_baseline_comparisono  s"     ""''l;;r   c                     | j                  |       || j                  vr| j                  |      | j                  |<   | j                  |   S )a  
        Method to get human-friendly table with model comparisons.
        Same as get_baseline_comparison(), but with other non-baseline case specified as baseline

        :param case: use specified case as baseline
        :param score_config:
        :return: pandas DataFrame. Each row is related to one ExecutionCase.
        Each row describes how better (or worse) this case is compared to baseline.
        )r   r   r   )r$   rT   r   s      r   r   z*MetricEvaluationResult.get_case_comparison{  sL     	!!,/t---+/+J+J4+PD""4(%%d++r   c                 b    || j                   vrt        dj                  |            || _        y)zB

        :param case: new baseline case
        :return:
        z,Case {} is unknown. Can't use it as baselineN)r   r   r   r   r$   rT   s     r   change_baseline_casez+MetricEvaluationResult.change_baseline_case  s2     t))) N U UVZ [\\"r   c                      | j                   |   S )z

        :param case:
        :return: CaseEvaluationResult. Scores and other information about single execution case
        )r   r   s     r   get_case_resultz&MetricEvaluationResult.get_case_result  s     !!$''r   c                 P    | j                   | j                     j                         S )zX

        :return: Folds ids which we used for computing this evaluation result
        )r   r   rn   rk   s    r   rn   z#MetricEvaluationResult.get_fold_ids  s$    
 !!$"5"56CCEEr   c                 P    | j                   | j                     j                         S rJ   )r   r   r~   rk   s    r   r~   z$MetricEvaluationResult.get_eval_step  s"    !!$"5"56DDFFr   c                 `   ddl m} g }| j                         D ]  }| j                  |      }|j	                  |      }||}nt        t        |      dz        }|j                  |j                  t        |t        |            D 	cg c]  }	|	t        |j                               z    c}	||d ddj                  |                    |j                  dj                  | j                  |      dt        d	d
dd      t        dd
d      d      }
|j                  ||
      }|S c c}	w )z

        :param fold: FoldId to plot
        :param offset: first iteration to plot
        :return: plotly figure for all cases on specified fold
        r   Nr   r   zCase {}r   z)Learning curves for metric {} on fold #{}r   r   r   Fr   r   r   r   Tr   r   )r   r   r   r   rx   r>   r:   r;   r   r7   r~   r   r   rL   rQ   r   )r$   rq   r   r   r   rT   r   r   r   r   r   r   s               r   create_fold_learning_curvesz2MetricEvaluationResult.create_fold_learning_curves  sC    	'NN$D..t4K&55d;L!"	L 1C 78	MM

ER[]`am]nLopLoqa#k&?&?&A"BBLop))*5 ' ) 0 0 6  89 % =DDTE]E]_cd!	 
   
  iiVFi3
- qs   #D+c                     | j                   |j                   k(  xr4 | j                  |j                  k(  xr | j                  |j                  k(  S rJ   )r   r   r   rg   s     r   ri   zMetricEvaluationResult.__eq__  sI    ""e&9&99 0**e.E.EE0KK5<</	1r   rJ   )r   r   r   r0   r'   r   r   r   r   r   r{   r   r   r   r   rn   r~   r   ri   r   r   r   r   r      sX    V6(+.e`#(
<,#(FG(T1r   r   c                   *    e Zd Zd Zd Zd Zd Zd Zy)EvaluationResultsc                    t        |      dk  rt        d      t               | _        t               | _        d | _        |D ]  }|j                         }|| j                  v rt        dj                  |            | j
                  |j                         | _        t        |      }|| j                  |<   || j                  |<    y )Nr)   zNeed at least one resultzDuplicate metric {})
r:   r   rQ   _results_metricsr   r{   r   r   r   )r$   metric_resultsr   rU   keys        r   r'   zEvaluationResults.__init__  s    ~" :;;$F!'!>!>!@!T]]2#$9$@$@AS$TUU{{"$..023EFC!'DMM#!3DMM# %r   c                 2    | j                   t        |         S )z^

        :param metric:
        :return: MetricEvaluationResult for specified metric
        )r   r   )r$   metrics     r   get_metric_resultsz$EvaluationResults.get_metric_results  s     }}=fEFFr   c                     | j                   S )zD

        :return: Names of the metrics which were computed
        )r   rk   s    r   get_metricszEvaluationResults.get_metrics  s    
 }}r   c                     | j                   S )z

        :return: Results are the map from metric names to computed results (instance of MetricEvaluationResult)
         on this fold
        )r   rk   s    r   get_resultszEvaluationResults.get_results  s     }}r   c                 j    | j                   j                         D ]  \  }}|j                  |        y)zY
            Could be used to change baseline cases for already computed results
        N)r   r   r   )r$   rT   r   metric_results       r   set_baseline_casez#EvaluationResults.set_baseline_case  s.     (,}}':':'<#V]..t4 (=r   N)r   r   r   r'   r   r  r  r  r   r   r   r   r     s    4$G5r   r   )g?i  )numpyr5   pandasrM   enumr    r   corer   utilsr   r   r   r   rF   rH   r   r   r   r   r   <module>r     s\        3 #5 
$. $.N4a aH[1 [1|05 05r   