您的位置:首页 > 编程语言 > Go语言

lightgbm algorithm case of kaggle(下)

2018-03-22 00:00 429 查看


作者简介Introduction苏高生,西南财经大学统计学硕士毕业,现就职于中国电信,主要负责企业存量客户大数据分析、数据建模。研究方向:机器学习,最喜欢的编程语言:R语言,没有之一。E-mail:sugs01@outlook.com往期回顾:Xgboost算法——Kaggle案例The rxfastforest algorithm case of kaggle


紧接上文:lightgbm algorithm case of kaggle(上)各位看客,请继续......五、二次调参
1.调试weight参数
grid_search <- expand.grid(
    learning_rate = .125,
    num_leaves = 600,
    max_bin = 30,
    min_data_in_bin = 64,
    feature_fraction = .64,
    min_sum_hessian = .004,
    lambda_l1 = .002,
    lambda_l2 = .008,
    drop_rate = .3, 
   max_drop = 5)
perf_weight_2 <- numeric(length = nrow(grid_search))
for(i in 1:20){
    lgb_weight <- (lgb_tr$TARGET * i + 1) / sum(lgb_tr$TARGET * i + 1)
        lgb_train <- lgb.Dataset(
        data = data.matrix(lgb_tr[, 1:137]),
        label = lgb_tr$TARGET,
       free_raw_data = FALSE,
        weight = lgb_weight
   )
        # 参数
    params <- list(
        objective = 'binary',
        metric = 'auc',
        learning_rate = grid_search[1, 'learning_rate'],
        num_leaves = grid_search[1, 'num_leaves'],
        max_bin = grid_search[1, 'max_bin'], 
       min_data_in_bin = grid_search[1, 'min_data_in_bin'],
        feature_fraction = grid_search[1, 'feature_fraction'],
        min_sum_hessian = grid_search[1, 'min_sum_hessian'],
        lambda_l1 = grid_search[1, 'lambda_l1'],
        lambda_l2 = grid_search[1, 'lambda_l2'], 
       drop_rate = grid_search[1, 'drop_rate'],
        max_drop = grid_search[1, 'max_drop']
    )
    # 交叉验证
    lgb_tr_mod <- lgb.cv(
        params,
        data = lgb_train,
        nrounds = 300, 
       stratified = TRUE,
        nfold = 10, 
       learning_rate = .1,
        num_threads = 2,
        early_stopping_rounds = 10
    )
    perf_weight_2[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}
ggplot(data.frame(num = 1:length(perf_weight_2), perf = perf_weight_2), aes(x = num, y = perf)) +
    geom_point() +
    geom_smooth()
结论:从此图可知auc值在weight>=2时auc 趋于稳定, weight=8时取最大值
2.调试learning_rate参数
grid_search <- expand.grid(
    learning_rate = seq(.05, .5, .01),
    num_leaves = 600,
    max_bin = 30,
    min_data_in_bin = 64,
    feature_fraction = .64,
    min_sum_hessian = .004,
    lambda_l1 = .002,
    lambda_l2 = .008,
    drop_rate = .3,
    max_drop = 5)
perf_learning_rate_1 <- numeric(length = nrow(grid_search))
fo
1844e
r(i in 1:nrow(grid_search)){
    lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)
        lgb_train <- lgb.Dataset(
        data = data.matrix(lgb_tr[, 1:137]),
        label = lgb_tr$TARGET,
        free_raw_data = FALSE,
        weight = lgb_weight
    )
        # 参数
    params <- list(
        objective = 'binary',
        metric = 'auc',
        learning_rate = grid_search[i, 'learning_rate'],
        num_leaves = grid_search[i, 'num_leaves'],
        max_bin = grid_search[i, 'max_bin'],
        min_data_in_bin = grid_search[i, 'min_data_in_bin'], 
       feature_fraction = grid_search[i, 'feature_fraction'],
        min_sum_hessian = grid_search[i, 'min_sum_hessian'],
        lambda_l1 = grid_search[i, 'lambda_l1'],
        lambda_l2 = grid_search[i, 'lambda_l2'],
        drop_rate = grid_search[i, 'drop_rate'], 
        max_drop = grid_search[i, 'max_drop'] 
    )
    # 交叉验证
    lgb_tr_mod <- lgb.cv(
        params,
        data = lgb_train,
        nrounds = 300,
        stratified = TRUE,
        nfold = 10, 
       num_threads = 2,
        early_stopping_rounds = 10
    )
    perf_learning_rate_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}
grid_search$perf <- perf_learning_rate_1
ggplot(data = grid_search, aes(x = learning_rate, y = perf)) +
    geom_point() +
    geom_smooth()
结论:learning_rate=.2时,auc最大
3.调试num_leaves参数
grid_search <- expand.grid(
    learning_rate = .2,
    num_leaves = seq(50, 800, 50),
    max_bin = 30,
    min_data_in_bin = 64,
    feature_fraction = .64,
    min_sum_hessian = .004,
    lambda_l1 = .002,
    lambda_l2 = .008,
    drop_rate = .3,
    max_drop = 5
)
perf_num_leaves_1 <- numeric(length = nrow(grid_search))
for(i in 1:nrow(grid_search)){
        lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)
        lgb_train <- lgb.Dataset(
        data = data.matrix(lgb_tr[, 1:137]), 
        label = lgb_tr$TARGET,
        free_raw_data = FALSE, 
       weight = lgb_weight
    )
        # 参数 
    params <- list(
        objective = 'binary',
        metric = 'auc', 
       learning_rate = grid_search[i, 'learning_rate'],
        num_leaves = grid_search[i, 'num_leaves'],
        max_bin = grid_search[i, 'max_bin'], 
       min_data_in_bin = grid_search[i, 'min_data_in_bin'],
        feature_fraction = grid_search[i, 'feature_fraction'],
        min_sum_hessian = grid_search[i, 'min_sum_hessian'],
        lambda_l1 = grid_search[i, 'lambda_l1'],
        lambda_l2 = grid_search[i, 'lambda_l2'],
        drop_rate = grid_search[i, 'drop_rate'],
        max_drop = grid_search[i, 'max_drop']
    )
    # 交叉验证
    lgb_tr_mod <- lgb.cv(
        params,
        data = lgb_train,
        nrounds = 300,
        stratified = TRUE,
        nfold = 10,
        num_threads = 2,
        early_stopping_rounds = 10
    )
    perf_num_leaves_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}
grid_search$perf <- perf_num_leaves_1
ggplot(data = grid_search, aes(x = num_leaves, y = perf)) +
    geom_point() +
    geom_smooth()
结论:num_leaves=300时,auc最大
4.调试max_bin参数
grid_search <- expand.grid(
    learning_rate = .2,
    num_leaves = 300,
    max_bin = seq(30, 150, 10),
    min_data_in_bin = 64,
    feature_fraction = .64,
    min_sum_hessian = .004,
    lambda_l1 = .002,
    lambda_l2 = .008, 
   drop_rate = .3,
    max_drop = 5
)
perf_max_bin_1 <- numeric(length = nrow(grid_search))
for(i in 1:nrow(grid_search)){
    lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)
        lgb_train <- lgb.Dataset(
        data = data.matrix(lgb_tr[, 1:137]),
        label = lgb_tr$TARGET, 
        free_raw_data = FALSE, 
       weight = lgb_weight 
    )
        # 参数
    params <- list(
        objective = 'binary',
        metric = 'auc', 
       learning_rate = grid_search[i, 'learning_rate'],
        num_leaves = grid_search[i, 'num_leaves'],
        max_bin = grid_search[i, 'max_bin'],
        min_data_in_bin = grid_search[i, 'min_data_in_bin'],
        feature_fraction = grid_search[i, 'feature_fraction'],
        min_sum_hessian = grid_search[i, 'min_sum_hessian'],
        lambda_l1 = grid_search[i, 'lambda_l1'],
        lambda_l2 = grid_search[i, 'lambda_l2'],
        drop_rate = grid_search[i, 'drop_rate'],
        max_drop = grid_search[i, 'max_drop']
    )
    # 交叉验证
    lgb_tr_mod <- lgb.cv(
        params,
        data = lgb_train,
        nrounds = 300, 
       stratified = TRUE,
        nfold = 10,
        num_threads = 2,
        early_stopping_rounds = 10
    )
    perf_max_bin_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}
grid_search$perf <- perf_max_bin_1
ggplot(data = grid_search, aes(x = max_bin, y = perf)) +
    geom_point() +
    geom_smooth()
结论:max_bin=120时,auc最大
5.调试min_data_in_bin参数
grid_search <- expand.grid( 
    learning_rate = .2, 
   num_leaves = 300, 
   max_bin = 120, 
   min_data_in_bin = seq(20, 100, 5),  
  feature_fraction = .64, 
   min_sum_hessian = .004, 
   lambda_l1 = .002, 
   lambda_l2 = .008,  
  drop_rate = .3, 
   max_drop = 5
)
perf_min_data_in_bin_1 <- numeric(length = nrow(grid_search))
for(i in 1:nrow(grid_search)){ 
    lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)
        lgb_train <- lgb.Dataset( 
        data = data.matrix(lgb_tr[, 1:137]),   
     label = lgb_tr$TARGET,    
    free_raw_data = FALSE,     
   weight = lgb_weight 
    )    
    # 参数  
  params <- list(   
     objective = 'binary',   
     metric = 'auc',
        learning_rate = grid_search[i, 'learning_rate'], 
       num_leaves = grid_search[i, 'num_leaves'], 
       max_bin = grid_search[i, 'max_bin'], 
       min_data_in_bin = grid_search[i, 'min_data_in_bin'],
        feature_fraction = grid_search[i, 'feature_fraction'], 
       min_sum_hessian = grid_search[i, 'min_sum_hessian'],  
      lambda_l1 = grid_search[i, 'lambda_l1'], 
       lambda_l2 = grid_search[i, 'lambda_l2'],
        drop_rate = grid_search[i, 'drop_rate'], 
      max_drop = grid_search[i, 'max_drop'] 
    )   
 # 交叉验证
    lgb_tr_mod <- lgb.cv(  
      params,   
     data = lgb_train,  
      nrounds = 300,   
     stratified = TRUE,  
      nfold = 10,  
      num_threads = 2,  
      early_stopping_rounds = 10
    )  
  perf_min_data_in_bin_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}
grid_search$perf <- perf_min_data_in_bin_1
ggplot(data = grid_search, aes(x = min_data_in_bin, y = perf)) +
    geom_point() +  
  geom_smooth()
结论:min_data_in_bin=20时,auc最大
5.调试feature_fraction参数
grid_search <- expand.grid(
    learning_rate = .2,
    num_leaves = 300,
    max_bin = 120, 
   min_data_in_bin = 20, 
   feature_fraction = .5, 
   min_sum_hessian = .004,
    lambda_l1 = .002, 
   lambda_l2 = .008,
    drop_rate = .3,  
  max_drop = 5)perf_feature_fraction_1 <- numeric(length = nrow(grid_search))
for(i in 1:nrow(grid_search)){  
  lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)  
      lgb_train <- lgb.Dataset(  
      data = data.matrix(lgb_tr[, 1:137]), 
        label = lgb_tr$TARGET, 
        free_raw_data = FALSE,   
     weight = lgb_weight   
 )   
     # 参数
    params <- list(      
   objective = 'binary',  
   metric = 'auc',   
   learning_rate = grid_search[i, 'learning_rate'],  
   num_leaves = grid_search[i, 'num_leaves'],
   max_bin = grid_search[i, 'max_bin'],  
    min_data_in_bin = grid_search[i, 'min_data_in_bin'],   
    feature_fraction = grid_search[i, 'feature_fraction'],      
   min_sum_hessian = grid_search[i, 'min_sum_hessian'],  
    lambda_l1 = grid_search[i, 'lambda_l1'],    
    lambda_l2 = grid_search[i, 'lambda_l2'],    
    drop_rate = grid_search[i, 'drop_rate'], 
     max_drop = grid_search[i, 'max_drop']
    )    
# 交叉验证
    lgb_tr_mod <- lgb.cv(
        params,
        data = lgb_train,
        nrounds = 300,
        stratified = TRUE,
        nfold = 10,
        num_threads = 2,
        early_stopping_rounds = 10
    )
    perf_feature_fraction_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}
grid_search$perf <- perf_feature_fraction_1
ggplot(data = grid_search, aes(x = feature_fraction, y = perf)) +
    geom_point() +
    geom_smooth()
结论:feature_fraction=.5时,auc最大,=.62时也较好
6.调试min_sum_hessian参数
grid_search <- expand.grid(
    learning_rate = .2,
    num_leaves = 300, 
   max_bin = 120,
    min_data_in_bin = 20,
    feature_fraction = .5,
   min_sum_hessian = 0,
    lambda_l1 = .002,
    lambda_l2 = .008,
    drop_rate = .3,
    max_drop = 5
)
perf_min_sum_hessian_1 <- numeric(length = nrow(grid_search))
for(i in 1:nrow(grid_search)){
    lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)
        lgb_train <- lgb.Dataset(
        data = data.matrix(lgb_tr[, 1:137]), 
        label = lgb_tr$TARGET, 
        free_raw_data = FALSE,
        weight = lgb_weight
    ) 
        # 参数
    params <- list(
        objective = 'binary',
        metric = 'auc',
        learning_rate = grid_search[i, 'learning_rate'],
        num_leaves = grid_search[i, 'num_leaves'],
        max_bin = grid_search[i, 'max_bin'],
        min_data_in_bin = grid_search[i, 'min_data_in_bin'], 
       feature_fraction = grid_search[i, 'feature_fraction'],
        min_sum_hessian = grid_search[i, 'min_sum_hessian'], 
       lambda_l1 = grid_search[i, 'lambda_l1'],
        lambda_l2 = grid_search[i, 'lambda_l2'],
        drop_rate = grid_search[i, 'drop_rate'],
        max_drop = grid_search[i, 'max_drop']
    )
    # 交叉验证
    lgb_tr_mod <- lgb.cv(
        params,
        data = lgb_train,
        nrounds = 300,
        stratified = TRUE,
        nfold = 10,
        num_threads = 2,
        early_stopping_rounds = 10
    )
    perf_min_sum_hessian_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}
grid_search$perf <- perf_min_sum_hessian_1
ggplot(data = grid_search, aes(x = min_sum_hessian, y = perf)) +
    geom_point() +
    geom_smooth()
结论:min_sum_hessian与auc呈负相关,min_sum_hessian=0时,取min_sum_hessian=0
6.调试lambda参数
grid_search <- expand.grid(
    learning_rate = .2, 
   num_leaves = 300, 
   max_bin = 120, 
   min_data_in_bin = 20,
    feature_fraction = .5,
    min_sum_hessian = 0, 
   lambda_l1 = seq(0, .01, .002),
    lambda_l2 = seq(0, .01, .002),
    drop_rate = .3,    max_drop = 5
)
perf_lambda_1 <- numeric(length = nrow(grid_search))
for(i in 1:nrow(grid_search)){
    lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)
        lgb_train <- lgb.Dataset(
        data = data.matrix(lgb_tr[, 1:137]),
        label = lgb_tr$TARGET,
        free_raw_data = FALSE,
        weight = lgb_weight 
    )
        # 参数
    params <- list(
        objective = 'binary',
        metric = 'auc',
        learning_rate = grid_search[i, 'learning_rate'], 
       num_leaves = grid_search[i, 'num_leaves'],
        max_bin = grid_search[i, 'max_bin'], 
       min_data_in_bin = grid_search[i, 'min_data_in_bin'],
        feature_fraction = grid_search[i, 'feature_fraction'], 
       min_sum_hessian = grid_search[i, 'min_sum_hessian'],
        lambda_l1 = grid_search[i, 'lambda_l1'],
        lambda_l2 = grid_search[i, 'lambda_l2'],
        drop_rate = grid_search[i, 'drop_rate'],
        max_drop = grid_search[i, 'max_drop']
    )
    # 交叉验证
    lgb_tr_mod <- lgb.cv(
        params, 
       data = lgb_train,
        nrounds = 300, 
       stratified = TRUE, 
       nfold = 10,
        num_threads = 2,
        early_stopping_rounds = 10
    )
    perf_lambda_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}
grid_search$perf <- perf_lamda_1
ggplot(data = grid_search, aes(x = lambda_l1, y = perf)) +
    geom_point() + 
    facet_wrap(~ lambda_l2, nrow = 5)
结论:lambda与auc呈负相关,取lambda_l1=.002, lambda_l2 = .01
7.调试drop_rate参数
grid_search <- expand.grid(
    learning_rate = .2,
    num_leaves = 300,
    max_bin = 120,
    min_data_in_bin = 20,
    feature_fraction = .5,
    min_sum_hessian = 0,
    lambda_l1 = .002,
    lambda_l2 = .01,
    drop_rate = seq(0, .5, .05),
    max_drop = 5)perf_drop_rate_1 <- numeric(length = nrow(grid_search))
for(i in 1:nrow(grid_search)){
    lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)
        lgb_train <- lgb.Dataset(
        data = data.matrix(lgb_tr[, 1:137]),
        label = lgb_tr$TARGET,
        free_raw_data = FALSE, 
       weight = lgb_weight
    ) 
        # 参数
    params <- list(
        objective = 'binary',
        metric = 'auc', 
       learning_rate = grid_search[i, 'learning_rate'],
        num_leaves = grid_search[i, 'num_leaves'], 
       max_bin = grid_search[i, 'max_bin'],
        min_data_in_bin = grid_search[i, 'min_data_in_bin'],
       feature_fraction = grid_search[i, 'feature_fraction'],
        min_sum_hessian = grid_search[i, 'min_sum_hessian'],
        lambda_l1 = grid_search[i, 'lambda_l1'],
        lambda_l2 = grid_search[i, 'lambda_l2'], 
       drop_rate = grid_search[i, 'drop_rate'], 
       max_drop = grid_search[i, 'max_drop']
    ) 
    # 交叉验证
    lgb_tr_mod <- lgb.cv(
        params, 
       data = lgb_train,
        nrounds = 300,
        stratified = TRUE,
        nfold = 10,  
      num_threads = 2,
        early_stopping_rounds = 10
    ) 
   perf_drop_rate_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}
grid_search$perf <- perf_drop_rate_1
ggplot(data = grid_search, aes(x = drop_rate, y = perf)) +
    geom_point()
结论:drop_rate=.3时取到最大值,与第一次调参没有变化
8.调试max_drop参数
grid_search <- expand.grid(
   learning_rate = .2,
    num_leaves = 300,
    max_bin = 120,
    min_data_in_bin = 20,
    feature_fraction = .5, 
   min_sum_hessian = 0,
    lambda_l1 = .002, 
   lambda_l2 = .01, 
   drop_rate = .3,
    max_drop = seq(19, 29, 2)
)
perf_max_drop_1 <- numeric(length = nrow(grid_search))
for(i in 1:nrow(grid_search)){
    lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)
        lgb_train <- lgb.Dataset(
        data = data.matrix(lgb_tr[, 1:137]),
        label = lgb_tr$TARGET,
        free_raw_data = FALSE, 
       weight = lgb_weight
    )   
     # 参数 
    params <- list(
        objective = 'binary', 
       metric = 'auc',
        learning_rate = grid_search[i, 'learning_rate'],
        num_leaves = grid_search[i, 'num_leaves'], 
       max_bin = grid_search[i, 'max_bin'],
        min_data_in_bin = grid_search[i, 'min_data_in_bin'],
        feature_fraction = grid_search[i, 'feature_fraction'],
        min_sum_hessian = grid_search[i, 'min_sum_hessian'],
        lambda_l1 = grid_search[i, 'lambda_l1'], 
       lambda_l2 = grid_search[i, 'lambda_l2'],
        drop_rate = grid_search[i, 'drop_rate'], 
       max_drop = grid_search[i, 'max_drop'] 
    ) 
    # 交叉验证
    lgb_tr_mod <- lgb.cv(
        params, 
       data = lgb_train,
       nrounds = 300,
        stratified = TRUE,
        nfold = 10, 
       num_threads = 2,
        early_stopping_rounds = 10
    )
    perf_max_drop_1[i] <- unlist(lgb_tr_mod$record_evals$valid$auc$eval)[length(unlist(lgb_tr_mod$record_evals$valid$auc$eval))]}grid_search$perf <- perf_max_drop_1ggplot(data = grid_search, aes(x = max_drop, y = perf)) +    geom_point()
结论:max_drop=23时取到最大值
六、集成学习
1)参数
set.seed(1)
grid_search <- expand.grid(
    learning_rate = sample(115:125, 10, replace = FALSE) / 100,
    num_leaves = sample(250:350, 10, replace = FALSE),
    max_bin = sample(115:125, 5, replace = FALSE),
    min_data_in_bin = sample(18:22, replace = FALSE),
    feature_fraction = c(.5, .62),
    min_sum_hessian = 0,
    lambda_l1 = .002,
    lambda_l2 = c(.008, .009, .01),
    drop_rate = sample(126:134, 4, replace = FALSE) / 1000,
    max_drop = c(23, 27, 29)
)
sample_ind <- sample(dim(grid_search)[1], 100, replace = FALSE)
lgb.pred <- list()grid_search2 <- grid_search[sample_ind, ]rm(grid_search)
2)权重
lgb_weight <- (lgb_tr$TARGET * 8 + 1) / sum(lgb_tr$TARGET * 8 + 1)
3)训练数据集
lgb_train <- lgb.Dataset(
   data = data.matrix(lgb_tr[, 1:137]),
    label = lgb_tr$TARGET,
    free_raw_data = FALSE,
   weight = lgb_weight
)
4)训练
for (i in 1:nrow(grid_search2)[1]){ 
        # 参数
    params <- list(
        objective = 'binary',
        metric = 'auc', 
       learning_rate = grid_search2[i, 'learning_rate'],
        num_leaves = grid_search2[i, 'num_leaves'],
        max_bin = grid_search2[i, 'max_bin'],
        min_data_in_bin = grid_search2[i, 'min_data_in_bin'], 
       feature_fraction = grid_search2[i, 'feature_fraction'],
        min_sum_hessian = grid_search2[i, 'min_sum_hessian'],
        lambda_l1 = grid_search2[i, 'lambda_l1'], 
       lambda_l2 = grid_search2[i, 'lambda_l2'], 
       drop_rate = grid_search2[i, 'drop_rate'], 
       max_drop = grid_search2[i, 'max_drop']
    ) 
    # 模型
    lgb_mod <- lightgbm(
        params = params,
        data = lgb_train, 
       nrounds = 300, 
       early_stopping_rounds = 10,
        num_threads = 2
    )  
  # 预测
    lgb.pred[[i]] <- predict(lgb_mod, data.matrix(lgb_te))}
5)结果
lgb.pred2 <- matrix(unlist(lgb.pred), ncol = 100)
lgb.pred3 <- data.frame(prob1 = apply(lgb.pred2, 1, mean))
6)输出
write.csv(lgb.pred3, "C:/Users/Administrator/Documents/kaggle/scs_lgb/lgb.pred1.csv"

 往期精彩内容整理合集 2017年R语言发展报告(国内)R语言中文社区历史文章整理(作者篇)
R语言中文社区历史文章整理(类型篇)


公众号后台回复关键字即可学习回复 R                  R语言快速入门及数据挖掘 
回复 Kaggle案例  Kaggle十大案例精讲(连载中)
回复 文本挖掘      手把手教你做文本挖掘
回复 可视化          R语言可视化在商务场景中的应用 
回复 大数据         大数据系列免费视频教程 
回复 量化投资      张丹教你如何用R语言量化投资 
回复 用户画像      京东大数据,揭秘用户画像
回复 数据挖掘     常用数据挖掘算法原理解释与应用
回复 机器学习     人工智能系列之机器学习与实践
回复 爬虫            R语言爬虫实战案例分享
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: