The results calculated according to the formula described in the doc are different from the results displayed by the model.
ccylance opened this issue · 0 comments
ccylance commented
catboost version: catboost 1.2
Operating System: macos 14.4.1
CPU: M1
GPU: no
According to the official website, the l2 score function is calculated here https://catboost.ai/en/docs/concepts/algorithm-score-functions
according to the function as the website showed, the best split for the depth 1 is result should be 7.67857143 but the model showed 3.4375
Here is my code:
import numpy as np
import pandas as pd
from catboost import CatBoostRegressor, Pool
class RmseObjective(object):
def calc_ders_range(self, approxes, targets, weights):
assert len(approxes) == len(targets)
if weights is not None:
assert len(weights) == len(approxes)
result = []
print('preds:', approxes, len(approxes))
for index in range(len(targets)):
der1 = targets[index] - approxes[index]
der2 = 0
# print("weights:", weights)
if weights is not None:
der1 *= weights[index]
der2 *= weights[index]
result.append((der1, der2))
# print("res:", result)
return result
class RmseMetric(object):
def get_final_error(self, error, weight):
return np.sqrt(error / (weight + 1e-38))
def is_max_optimal(self):
return False
def evaluate(self, approxes, target, weight):
assert len(approxes) == 1
assert len(target) == len(approxes[0])
approx = approxes[0]
error_sum = 0.0
weight_sum = 0.0
print("approx:", approx)
for i in range(len(approx)):
w = 1.0 if weight is None else weight[i]
weight_sum += w
error_sum += w * ((approx[i] - target[i])**2)
return error_sum, weight_sum
d = pd.DataFrame()
d['f_1'] = np.array([ 11.625 , 2.75 , 10.40625 , 5.8125 , 4.125 ,
7.60714286, 10.65 , 13.3125 , 8.25 , 12.75 ,
8.875 , 8.53125 , 11.625 , 11.89285714, 11.375 ,
7.75 ])
y = np.arange(len(d))
model = CatBoostRegressor(
iterations=1, depth = 1, boosting_type='Ordered',
has_time = True,
leaf_estimation_method='Simple',
l2_leaf_reg= 0,
learning_rate=1,
loss_function=RmseObjective(),
eval_metric=RmseMetric(),
score_function = "L2",
bootstrap_type = "No"
)
pools = Pool(d, y)
model.fit(pools, silent=True)
model.plot_tree(tree_idx=0, pool=pools)
Here are the results: