|
|
@@ -698,7 +698,7 @@ class CosyVoice3LM(Qwen2LM):
|
|
|
lm_output, lm_output_mask = self.llm(lm_input, lm_input_len.to(device))
|
|
|
logits = self.llm_decoder(lm_output)
|
|
|
loss = self.criterion_ce(logits, lm_target.to(device))
|
|
|
- acc = th_accuracy(logits.view(-1, self.speech_token_size + 3), lm_target, ignore_label=IGNORE_ID)
|
|
|
+ acc = th_accuracy(logits.view(-1, self.speech_token_size + 200), lm_target, ignore_label=IGNORE_ID)
|
|
|
return {'loss': loss, 'acc': acc}
|
|
|
|
|
|
@torch.inference_mode()
|