UnsupportedModuleError: [IllegalModuleConfigurationError('Model needs to be in training mode')]

Question

UnsupportedModuleError: [IllegalModuleConfigurationError('Model needs to be in training mode')]

jS5t3r opened this issue 8 months ago · 1 comments

🐛 Bug

I have installed
Python 3.8, Opacus 1.4.0, torch 2.1.1, and torchvision 0.16.1.

My implementation contains a BERT LLM sequence classifier.

import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertForSequenceClassification, BertTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader
from opacus import PrivacyEngine

# Load and preprocess the SST-2 dataset
dataset = load_dataset("glue", "sst2")
train_dataset = dataset["train"]

# Load pre-trained BERT model and tokenizer
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Define data loader
def collate_fn(batch):
    inputs = tokenizer(batch["sentence"], padding=True, truncation=True, max_length=128, return_tensors="pt")
    inputs["labels"] = torch.tensor(batch["label"])
    return inputs

train_loader = DataLoader(train_dataset, batch_size=32, collate_fn=collate_fn)

# Define optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Define privacy engine
privacy_engine = PrivacyEngine()
model, optimizer, train_loader = privacy_engine.make_private(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader,
    noise_multiplier=1.0,
    max_grad_norm=1.0,
)

# Define loss function
criterion = nn.CrossEntropyLoss()

# Train the model
for epoch in range(5):  # Adjust number of epochs as needed
    model.train()
    for batch in train_loader:
        optimizer.zero_grad()
        inputs = {key: value.cuda() for key, value in batch.items()}
        outputs = model(**inputs)
        loss = criterion(outputs.logits, inputs["labels"].cuda())
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1} finished.")

Error:

UnsupportedModuleError                    Traceback (most recent call last)
Cell In[126], line 30
     28 # Define privacy engine
     29 privacy_engine = PrivacyEngine()
---> 30 model, optimizer, train_loader = privacy_engine.make_private(
     31     module=model,
     32     optimizer=optimizer,
     33     data_loader=train_loader,
     34     noise_multiplier=1.0,
     35     max_grad_norm=1.0,
     36 )
     38 # Define loss function
     39 criterion = nn.CrossEntropyLoss()

File ~/.conda/envs/def2/lib/python3.8/site-packages/opacus/privacy_engine.py:399, in PrivacyEngine.make_private(self, module, optimizer, data_loader, noise_multiplier, max_grad_norm, batch_first, loss_reduction, poisson_sampling, clipping, noise_generator, grad_sample_mode)
    393         raise ValueError(
    394             "Module parameters are different than optimizer Parameters"
    395         )
    397 distributed = isinstance(module, (DPDDP, DDP))
--> 399 module = self._prepare_model(
    400     module,
    401     batch_first=batch_first,
    402     loss_reduction=loss_reduction,
    403     grad_sample_mode=grad_sample_mode,
    404 )
    405 if poisson_sampling:
    406     module.register_backward_hook(forbid_accumulation_hook)

File ~/.conda/envs/def2/lib/python3.8/site-packages/opacus/privacy_engine.py:224, in PrivacyEngine._prepare_model(self, module, batch_first, loss_reduction, grad_sample_mode)
    214 def _prepare_model(
    215     self,
    216     module: nn.Module,
   (...)
    222     # Ideally, validation should have been taken care of by calling
    223     # `get_compatible_module()`
--> 224     self.validate(module=module, optimizer=None, data_loader=None)
    226     # wrap
    227     if isinstance(module, AbstractGradSampleModule):

File ~/.conda/envs/def2/lib/python3.8/site-packages/opacus/privacy_engine.py:289, in PrivacyEngine.validate(self, module, optimizer, data_loader)
    269 def validate(
    270     self,
    271     *,
   (...)
    274     data_loader: Optional[DataLoader],
    275 ):
    276     """
    277     Validate that task components are compatible with DP.
    278     Same as ``is_compatible()``, but raises error instead of returning bool.
   (...)
    287             If one or more modules found to be incompatible
    288     """
--> 289     ModuleValidator.validate(module, strict=True)

File ~/.conda/envs/def2/lib/python3.8/site-packages/opacus/validators/module_validator.py:69, in ModuleValidator.validate(cls, module, strict)
     67 # raise/return as needed
     68 if strict and len(errors) > 0:
---> 69     raise UnsupportedModuleError(errors)
     70 else:
     71     return errors

UnsupportedModuleError: [IllegalModuleConfigurationError('Model needs to be in training mode')]

Answer 1 · 2024-02-13T17:01:55.000Z

could you run model.train() before privacy_engine?