Confusion about the Loss computation
dyyoungg opened this issue · 0 comments
dyyoungg commented
# in model/vc_dddm_mixup.py
def compute_loss(self, x, w2v_x, f0_x, x_length):
x_mask = sequence_mask(x_length, x.size(2)).unsqueeze(1).to(x.dtype)
spk, src_out, ftr_out = self.encoder(w2v_x, f0_x, x, x_length, mixup=True)
mixup = torch.randint(0, 2, (x.size(0),1,1)).to(x.device)
src_out_new = mixup*src_out[:x.size(0), :, :] + (1-mixup)*src_out[x.size(0):, :, :]
ftr_out_new = mixup*ftr_out[:x.size(0), :, :] + (1-mixup)*ftr_out[x.size(0):, :, :]
The effect of this code is simply to switch between src_out[:x.size(0), :, :] and src_out[x.size(0):, :, :], without performing any mixing or interpolation.
Will the following code be more reasonable?
def compute_loss(self, x, w2v_x, f0_x, x_length):
x_mask = sequence_mask(x_length, x.size(2)).unsqueeze(1).to(x.dtype)
spk, src_out, ftr_out = self.encoder(w2v_x, f0_x, x, x_length, mixup=True)
mixup = torch.rand((x.size(0), 1, 1), device=x.device)
src_out_new = mixup*src_out[:x.size(0), :, :] + (1-mixup)*ftr_out[x.size(0):, :, :]
ftr_out_new = mixup*ftr_out[:x.size(0), :, :] + (1-mixup)*src_out[x.size(0):, :, :]