Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

maual calculate validation loss different from the lightning automatically output#21039

Discussion options

The 'valid_loss_manual' is myself calculate batch average one, while 'val_loss_epoch' is the output from the lightning. There are always differences.

Besides, the valid loss could not plot each step. I tried rich progress bar, it still do not show

class RegressiveModule(pl.LightningModule):    def __init__(self, model_name, train_config, model_config):        super().__init__()        self.save_hyperparameters(ignore=["model_name"])        # self.model = model        self.load_model(model_name, model_config)        self.train_config = train_config        self.criterion = torch.nn.MSELoss() # compute_sequence_loss(water_pred_seq, target_seq)                self.training_step_outputs = []        self.validation_step_outputs = []        self.test_step_outputs = []        self.testTime = []        self.predictions= []        self.targets =[]    def load_model(self, model_name, model_config):        name = model_name        # Change the `snake_case.py` file name to `CamelCase` class name.        # Please always name your model file name as `snake_case.py` and        # class name corresponding `CamelCase`.        camel_name = ''.join([i.capitalize() for i in name.split('_')])        try:            Model = getattr(importlib.import_module(                '.' + name, package=__package__), camel_name)        except:            raise ValueError(                f'Invalid Module File Name or Invalid Class Name {name}.{camel_name}!')        self.model = self.instancialize(Model)    def instancialize(self, Model, **other_args):        """ Instancialize a model using the corresponding parameters            from self.hparams dictionary. You can also input any args            to overwrite the corresponding value in self.hparams.        """        # model_config = Model[1]        # Model = Model[0]        class_args = inspect.getargspec(Model.__init__).args[1:]        inkeys = self.hparams.keys()        args1 = {}        for arg in class_args:            if arg in inkeys:                args1[arg] = getattr(self.hparams, arg)        args1.update(other_args)        return Model(**args1)    def forward(self, img):        return self.model(img)    def training_step(self, batch, batch_idx):        print('in train loop')        signal, time_feat, target_seq, pattern_labels = batch        water_pred_seq = self.model(signal,time_feat)        loss = self.criterion(water_pred_seq, target_seq)        self.training_step_outputs.append(loss)        self.log("train_loss",loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)        return {"loss": loss}    def on_train_epoch_end(self, *arg, **kwargs):        epoch_average = torch.stack(self.training_step_outputs).mean()        self.log("training_epoch_average", epoch_average)        self.training_step_outputs.clear()  # free memory        # print("\n\ntrain_loss_manual", epoch_average)    def validation_step(self, batch, batch_idx):        # print('\n\nvalid step')        signal, time_feat, target_seq, pattern_labels = batch        pred_seq = self.model(signal,time_feat)        val_loss = self.criterion(pred_seq, target_seq)        self.validation_step_outputs.append(val_loss)        self.log_dict({'val_loss': val_loss},                 on_step=True, on_epoch=True, prog_bar=True,logger=True) # Critical: on_epoch aggregates batch losses        return {"val_loss": val_loss}    def on_validation_epoch_end(self, *arg, **kwargs):        epoch_average = torch.stack(self.validation_step_outputs).mean()        self.log("validation_epoch_average", epoch_average)        print("\n\nvalid_loss_manual", epoch_average)        self.validation_step_outputs.clear()    def configure_optimizers(self):        optimizer = torch.optim.Adam(self.parameters(), lr=self.train_config.get('learningRate', 1e-3),                                     weight_decay=self.train_config.get('weight_decay', 1e-3), betas=(0.9, 0.999),                         eps=1e-08, )        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=self.train_config.get('patience', 1e-3),                                                               factor=self.train_config.get('factor', 1e-3))        return {            "optimizer": optimizer,            "lr_scheduler": scheduler,            "monitor": "val_loss",            'interval': 'epoch',            'frequency': 1        }
valid_loss_manual tensor(0.4408, device='cuda:0')Epoch 13/199 ━━━━━━━━━━━━━━━━━ 9/9 0:00:00 • 0:00:00 95.71it/s v_num: 273.000                                                                  train_loss_step:                                                                0.539                                                                           val_loss_step:                                                                  0.501                                                                           val_loss_epoch:                                                                 0.421                                                                           train_loss_epoch:                                                               0.313            in train loopin train loopin train loopin train loopin train loopin train loopin train loopin train loopin train loopvalid_loss_manual tensor(0.3893, device='cuda:0')Epoch 14/199 ━━━━━━━━━━━━━━━━━ 9/9 0:00:00 • 0:00:00 95.85it/s v_num: 273.000                                                                  train_loss_step:                                                                0.489                                                                           val_loss_step:                                                                  0.388                                                                           val_loss_epoch:                                                                 0.390                                                                           train_loss_epoch:                                                               0.309            in train loopin train loopin train loopin train loopin train loopin train loopin train loopin train loopin train loopvalid_loss_manual tensor(0.3756, device='cuda:0')Epoch 15/199 ━━━━━━━━━━━━━━━━━ 9/9 0:00:00 • 0:00:00 95.63it/s v_num: 273.000                                                                  train_loss_step:                                                                0.465                                                                           val_loss_step:                                                                  0.420                                                                           val_loss_epoch:                                                                 0.361                                                                           train_loss_epoch:                                                               0.306
You must be logged in to vote

Replies: 0 comments

Sign up for freeto join this conversation on GitHub. Already have an account?Sign in to comment

[8]ページ先頭

©2009-2025 Movatter.jp