Note
Click here to download the full example code
Comparison of XGBRegressor with different transformations
from ai4water.datasets import busan_beach
from ai4water.utils.utils import get_version_info
from ai4water.experiments import TransformationExperiments
from ai4water.hyperopt import Categorical, Integer, Real
from ai4water.utils.utils import dateandtime_now
for k,v in get_version_info().items():
print(f"{k} version: {v}")
python version: 3.7.9 (default, Oct 19 2020, 15:13:17)
[GCC 7.5.0]
os version: posix
ai4water version: 1.06
lightgbm version: 3.3.5
tcn version: 3.5.0
catboost version: 1.1.1
xgboost version: 1.6.2
easy_mpl version: 0.21.2
SeqMetrics version: 1.3.4
tensorflow version: 2.7.0
keras.api._v2.keras version: 2.7.0
numpy version: 1.21.1
pandas version: 1.3.4
matplotlib version: 3.5.3
h5py version: 3.8.0
joblib version: 1.2.0
data = busan_beach()
input_features = data.columns.tolist()[0:-1]
output_features = data.columns.tolist()[-1:]
class MyTransformationExperiments(TransformationExperiments):
def update_paras(self, **kwargs):
y_transformation = kwargs.pop('y_transformation')
if y_transformation == "none":
y_transformation = None
return {
'model': {"XGBRegressor": kwargs},
'y_transformation': y_transformation
}
cases = {
'model_None': {'y_transformation': 'none'},
'model_minmax': {'y_transformation': 'minmax'},
'model_zscore': {'y_transformation': 'zscore'},
'model_center': {'y_transformation': 'center'},
'model_scale': {'y_transformation': 'scale'},
'model_robust': {'y_transformation': 'robust'},
'model_quantile': {'y_transformation': 'quantile'},
'model_box_cox': {'y_transformation': {'method': 'box-cox', 'treat_negatives': True, 'replace_zeros': True}},
'model_yeo-johnson': {'y_transformation': 'yeo-johnson'},
'model_sqrt': {'y_transformation': 'sqrt'},
'model_log': {'y_transformation': {'method':'log', 'treat_negatives': True, 'replace_zeros': True}},
'model_log10': {'y_transformation': {'method':'log10', 'treat_negatives': True, 'replace_zeros': True}},
"model_pareto": {"y_transformation": "pareto"},
"model_vast": {"y_transformation": "vast"},
"model_mmad": {"y_transformation": "mmad"}
}
num_samples=10
search_space = [
# maximum number of trees that can be built
Integer(low=10, high=30, name='iterations', num_samples=num_samples),
# Used for reducing the gradient step.
Real(low=0.09, high=0.3, prior='log-uniform', name='learning_rate', num_samples=num_samples),
# Coefficient at the L2 regularization term of the cost function.
Real(low=0.5, high=5.0, name='l2_leaf_reg', num_samples=num_samples),
# arger the value, the smaller the model size.
Real(low=0.1, high=10, name='model_size_reg', num_samples=num_samples),
# percentage of features to use at each split selection, when features are selected over again at random.
Real(low=0.1, high=0.5, name='rsm', num_samples=num_samples),
# number of splits for numerical features
Integer(low=32, high=50, name='border_count', num_samples=num_samples),
# The quantization mode for numerical features. The quantization mode for numerical features.
Categorical(categories=['Median', 'Uniform', 'UniformAndQuantiles',
'MaxLogSum', 'MinEntropy', 'GreedyLogSum'], name='feature_border_type')
]
x0 = [10, 0.11, 1.0, 1.0, 0.2, 45, "Uniform"]
experiment = MyTransformationExperiments(
cases=cases,
input_features=input_features,
output_features = output_features,
param_space=search_space,
x0=x0,
verbosity=0,
split_random=True,
exp_name = f"xgb_y_exp_{dateandtime_now()}",
save=False
)
experiment.fit(data = data, run_type='dry_run')
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
running None model
[02:19:51] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
running minmax model
[02:19:54] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
running zscore model
[02:19:56] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
running center model
[02:19:59] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
running scale model
[02:20:01] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
running robust model
[02:20:04] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
running quantile model
[02:20:06] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
running box_cox model
[02:20:09] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
running yeo-johnson model
[02:20:12] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
running sqrt model
[02:20:14] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
running log model
[02:20:17] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
running log10 model
[02:20:19] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
running pareto model
[02:20:22] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
running vast model
[02:20:25] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
running mmad model
[02:20:27] WARNING: ../src/learner.cc:627:
Parameters: { "border_count", "feature_border_type", "iterations", "l2_leaf_reg", "model_size_reg", "rsm" } might not be used.
This could be a false alarm, with some parameters getting used by language bindings but
then being mistakenly passed down to XGBoost core, or some parameter actually being used
but getting flagged wrongly here. Please open an issue if you find any such cases.
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
/home/docs/checkouts/readthedocs.org/user_builds/ai4water-experiments/envs/latest/lib/python3.7/site-packages/scipy/stats/stats.py:283: RuntimeWarning: invalid value encountered in log
log_a = np.log(a)
experiment.compare_errors('rmse', data=data)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
experiment.compare_errors('r2', data=data)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
experiment.compare_errors('r2_score', data=data)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
experiment.compare_errors('nrmse', data=data)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
experiment.taylor_plot(data=data)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
<Figure size 900x700 with 2 Axes>
experiment.compare_edf_plots(data=data)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
experiment.compare_regression_plots(data=data)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
experiment.compare_residual_plots(data=data)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (121, 13)
target shape: (121, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (31, 13)
target shape: (31, 1)
********** Removing Examples with nan in labels **********
***** Test *****
input_x shape: (66, 13)
target shape: (66, 1)
<Figure size 640x480 with 16 Axes>
Total running time of the script: ( 0 minutes 52.032 seconds)