Optimize RF or lightGBM with Optuna

Personal memorandum

Boston Home Price Data Used


# !pip install optuna lightgbm
from functools import partial

import optuna
import lightgbm as lgb

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
from sklearn.metrics import mean_absolute_error
from sklearn import datasets

def objective(X, y, trial, clf = 'RandomForestRegressor'):
    """Objective function to minimize"""
    if clf == 'RandomForestRegressor':
        #At RF
        params = {
            'n_estimators': int(trial.suggest_loguniform('n_estimators', 1e+2, 1e+3)),
            'max_depth': int(trial.suggest_loguniform('max_depth', 2, 32)),
        model = RandomForestRegressor(**params)
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        scores = cross_validate(model, X=X, y=y, cv=kf, n_jobs=-1, scoring='neg_mean_absolute_error')
        score = -1*scores['test_score'].mean()
    elif clf == 'LGB':
        #At the time of LGB
        params = {
            'objective': 'regression',
            'max_bin': int(trial.suggest_int('max_bin', 255, 500)),
            'learning_rate': 0.05,
            'num_leaves': int(trial.suggest_int('num_leaves', 32, 128)),
            'metrics': 'mae'
        lgb_train = lgb.Dataset(X, y)
        res = lgb.cv(params, lgb_train, num_boost_round=1000, early_stopping_rounds=10, nfold=5, shuffle=True, stratified=False, seed=42)
        score = res['l1-mean'][-1]

    return score

def main(): #Using Boston Home Price Data as an Example
    dataset = datasets.load_boston()
    X, y = dataset.data, dataset.target
    f = partial(objective, X, y)
    study = optuna.create_study()
    study.optimize(f, n_trials=30)
    print('params:', study.best_params)

if __name__ == '__main__':

