Custom loss for multiclass classification #6649

kosnikos · 2024-09-10T14:10:25Z

I am trying to write a custom loss function for multiclass applications and I started by replicating the 'multiclassova' objective

def custom_classification_loss(y_true, raw_pred):
    # Reshape raw_pred to (N, num_classes)
    num_classes = 3  # Change this to your actual number of classes
    raw_pred = raw_pred.reshape(-1, num_classes)

    # Apply sigmoid for binary classification on each class (one-vs-all)
    y_pred = 1 / (1 + np.exp(-raw_pred))  # Sigmoid

    # One-hot encode the true labels
    y_true_one_hot = np.zeros_like(raw_pred)
    y_true_one_hot[np.arange(len(y_true)), y_true.astype(int)] = 1

    # Gradient for binary cross-entropy for each class
    grad = y_pred - y_true_one_hot

    # Hessian for binary cross-entropy for each class
    hess = y_pred * (1 - y_pred)
    
    return grad.ravel(), hess.ravel()

I am calling this function using
lgb.LGBMClassifier(objective=custom_classification_loss, num_class= 3)

and making predictions by applying the sigmoid function
probabilities = sigmoid(raw_preds)

However, the results are completely different (obviously much worse) than the predictions using the built-in 'multiclassova' objective.

Does anyone now how 'multiclassova' is implemented under the hoods?

The text was updated successfully, but these errors were encountered:

jmoralez · 2024-09-19T17:32:42Z

Hey @kosnikos, thanks for using LightGBM. This is most likely because of the init_score, can you compare your results vs the built-in objective setting boost_from_average to False? That'd make the boosting start from zero for both. Also note that when using a custom objective the predictions returned from the model are always the raw scores, so you'll have to convert them.

If you want to use the built-in init scores you can find an example on how to replicate the L2 loss in #5114 (comment), note that you'll have to adjust the calculation of the init scores to use the ones for BinaryLogloss

LightGBM/src/objective/binary_objective.hpp

Lines 139 to 165 in 41ba9e8

    
           double BoostFromScore(int) const override { 
        
             double suml = 0.0f; 
        
             double sumw = 0.0f; 
        
             if (weights_ != nullptr) { 
        
               #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:suml, sumw) if (!deterministic_) 
        
               for (data_size_t i = 0; i < num_data_; ++i) { 
        
                 suml += is_pos_(label_[i]) * weights_[i]; 
        
                 sumw += weights_[i]; 
        
               } 
        
             } else { 
        
               sumw = static_cast<double>(num_data_); 
        
               #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) reduction(+:suml) if (!deterministic_) 
        
               for (data_size_t i = 0; i < num_data_; ++i) { 
        
                 suml += is_pos_(label_[i]); 
        
               } 
        
             } 
        
             if (Network::num_machines() > 1) { 
        
               suml = Network::GlobalSyncUpBySum(suml); 
        
               sumw = Network::GlobalSyncUpBySum(sumw); 
        
             } 
        
             double pavg = suml / sumw; 
        
             pavg = std::min(pavg, 1.0 - kEpsilon); 
        
             pavg = std::max<double>(pavg, kEpsilon); 
        
             double initscore = std::log(pavg / (1.0f - pavg)) / sigmoid_; 
        
             Log::Info("[%s:%s]: pavg=%f -> initscore=%f", GetName(), __func__, pavg, initscore); 
        
             return initscore; 
        
           }

You can find the implementation for the MulticlassOVA objective here

LightGBM/src/objective/multiclass_objective.hpp

Lines 186 to 276 in 41ba9e8

    
           class MulticlassOVA: public ObjectiveFunction { 
        
            public: 
        
             explicit MulticlassOVA(const Config& config) { 
        
               num_class_ = config.num_class; 
        
               for (int i = 0; i < num_class_; ++i) { 
        
                 binary_loss_.emplace_back( 
        
                   new BinaryLogloss(config, [i](label_t label) { return static_cast<int>(label) == i; })); 
        
               } 
        
               sigmoid_ = config.sigmoid; 
        
             } 
        
             explicit MulticlassOVA(const std::vector<std::string>& strs) { 
        
               num_class_ = -1; 
        
               sigmoid_ = -1; 
        
               for (auto str : strs) { 
        
                 auto tokens = Common::Split(str.c_str(), ':'); 
        
                 if (tokens.size() == 2) { 
        
                   if (tokens[0] == std::string("num_class")) { 
        
                     Common::Atoi(tokens[1].c_str(), &num_class_); 
        
                   } else if (tokens[0] == std::string("sigmoid")) { 
        
                     Common::Atof(tokens[1].c_str(), &sigmoid_); 
        
                   } 
        
                 } 
        
               } 
        
               if (num_class_ < 0) { 
        
                 Log::Fatal("Objective should contain num_class field"); 
        
               } 
        
               if (sigmoid_ <= 0.0) { 
        
                 Log::Fatal("Sigmoid parameter %f should be greater than zero", sigmoid_); 
        
               } 
        
             } 
        
             ~MulticlassOVA() { 
        
             } 
        
             void Init(const Metadata& metadata, data_size_t num_data) override { 
        
               num_data_ = num_data; 
        
               for (int i = 0; i < num_class_; ++i) { 
        
                 binary_loss_[i]->Init(metadata, num_data); 
        
               } 
        
             } 
        
             void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override { 
        
               for (int i = 0; i < num_class_; ++i) { 
        
                 int64_t offset = static_cast<int64_t>(num_data_) * i; 
        
                 binary_loss_[i]->GetGradients(score + offset, gradients + offset, hessians + offset); 
        
               } 
        
             } 
        
             const char* GetName() const override { 
        
               return "multiclassova"; 
        
             } 
        
             void ConvertOutput(const double* input, double* output) const override { 
        
               for (int i = 0; i < num_class_; ++i) { 
        
                 output[i] = 1.0f / (1.0f + std::exp(-sigmoid_ * input[i])); 
        
               } 
        
             } 
        
             std::string ToString() const override { 
        
               std::stringstream str_buf; 
        
               str_buf << GetName() << " "; 
        
               str_buf << "num_class:" << num_class_ << " "; 
        
               str_buf << "sigmoid:" << sigmoid_; 
        
               return str_buf.str(); 
        
             } 
        
             bool SkipEmptyClass() const override { return true; } 
        
             int NumModelPerIteration() const override { return num_class_; } 
        
             int NumPredictOneRow() const override { return num_class_; } 
        
             bool NeedAccuratePrediction() const override { return false; } 
        
             double BoostFromScore(int class_id) const override { 
        
               return binary_loss_[class_id]->BoostFromScore(0); 
        
             } 
        
             bool ClassNeedTrain(int class_id) const override { 
        
               return binary_loss_[class_id]->ClassNeedTrain(0); 
        
             } 
        
            protected: 
        
             /*! \brief Number of data */ 
        
             data_size_t num_data_; 
        
             /*! \brief Number of classes */ 
        
             int num_class_; 
        
             std::vector<std::unique_ptr<BinaryLogloss>> binary_loss_; 
        
             double sigmoid_; 
        
           };

jmoralez added the awaiting response label Sep 19, 2024

jameslamb mentioned this issue Sep 19, 2024

[RFC] provide Python/R implementations of all the built-in objectives? #6440

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Custom loss for multiclass classification #6649

Custom loss for multiclass classification #6649

kosnikos commented Sep 10, 2024 •

edited

Loading

jmoralez commented Sep 19, 2024

Custom loss for multiclass classification #6649

Custom loss for multiclass classification #6649

Comments

kosnikos commented Sep 10, 2024 • edited Loading

jmoralez commented Sep 19, 2024

kosnikos commented Sep 10, 2024 •

edited

Loading