| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- import os
- import torch
- import tensorrt as trt
- import logging
- import threading
- _min_shape = [(2, 80, 4), (2, 1, 4), (2, 80, 4), (2,), (2, 80), (2, 80, 4)]
- _opt_shape = [(2, 80, 193), (2, 1, 193), (2, 80, 193), (2,), (2, 80), (2, 80, 193)]
- _max_shape = [(2, 80, 6800), (2, 1, 6800), (2, 80, 6800), (2,), (2, 80), (2, 80, 6800)]
- class EstimatorTRT:
- def __init__(self, path_prefix: str, device: torch.device, fp16: bool = True):
- self.lock = threading.Lock()
- self.device = device
- with torch.cuda.device(device):
- self.input_names = ["x", "mask", "mu", "t", "spks", "cond"]
- self.output_name = "estimator_out"
- onnx_path = path_prefix + ".fp32.onnx"
- precision = ".fp16" if fp16 else ".fp32"
- trt_path = path_prefix + precision +".plan"
- self.fp16 = fp16
- self.logger = trt.Logger(trt.Logger.INFO)
- self.trt_runtime = trt.Runtime(self.logger)
- save_trt = not os.environ.get("NOT_SAVE_TRT", "0") == "1"
- if os.path.exists(trt_path):
- self.engine = self._load_trt(trt_path)
- else:
- self.engine = self._convert_onnx_to_trt(onnx_path, trt_path, save_trt)
- self.context = self.engine.create_execution_context()
- def _convert_onnx_to_trt(
- self, onnx_path: str, trt_path: str, save_trt: bool = True
- ):
- logging.info("Converting onnx to trt...")
- network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
- builder = trt.Builder(self.logger)
- network = builder.create_network(network_flags)
- parser = trt.OnnxParser(network, self.logger)
- config = builder.create_builder_config()
- config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 33) # 8GB
- if (self.fp16):
- config.set_flag(trt.BuilderFlag.FP16)
- profile = builder.create_optimization_profile()
- # load onnx model
- with open(onnx_path, "rb") as f:
- if not parser.parse(f.read()):
- for error in range(parser.num_errors):
- print(parser.get_error(error))
- exit(1)
- # set input shapes
- for i in range(len(self.input_names)):
- profile.set_shape(
- self.input_names[i], _min_shape[i], _opt_shape[i], _max_shape[i]
- )
- tensor_dtype = trt.DataType.HALF if self.fp16 else trt.DataType.FLOAT
- # set input and output data type
- for i in range(network.num_inputs):
- input_tensor = network.get_input(i)
- input_tensor.dtype = tensor_dtype
- for i in range(network.num_outputs):
- output_tensor = network.get_output(i)
- output_tensor.dtype = tensor_dtype
- config.add_optimization_profile(profile)
- engine_bytes = builder.build_serialized_network(network, config)
- # save trt engine
- if save_trt:
- with open(trt_path, "wb") as f:
- f.write(engine_bytes)
- print("trt engine saved to {}".format(trt_path))
- engine = self.trt_runtime.deserialize_cuda_engine(engine_bytes)
- return engine
- def _load_trt(self, trt_path: str):
- logging.info("Found trt engine, loading...")
- with open(trt_path, "rb") as f:
- engine_bytes = f.read()
- engine = self.trt_runtime.deserialize_cuda_engine(engine_bytes)
- return engine
- def forward(
- self,
- x: torch.Tensor,
- mask: torch.Tensor,
- mu: torch.Tensor,
- t: torch.Tensor,
- spks: torch.Tensor,
- cond: torch.Tensor,
- ):
- with self.lock:
- with torch.cuda.device(self.device):
- self.context.set_input_shape("x", (2, 80, x.size(2)))
- self.context.set_input_shape("mask", (2, 1, x.size(2)))
- self.context.set_input_shape("mu", (2, 80, x.size(2)))
- self.context.set_input_shape("t", (2,))
- self.context.set_input_shape("spks", (2, 80))
- self.context.set_input_shape("cond", (2, 80, x.size(2)))
- # run trt engine
- self.context.execute_v2(
- [
- x.contiguous().data_ptr(),
- mask.contiguous().data_ptr(),
- mu.contiguous().data_ptr(),
- t.contiguous().data_ptr(),
- spks.contiguous().data_ptr(),
- cond.contiguous().data_ptr(),
- x.data_ptr(),
- ]
- )
- return x
- def __call__(
- self,
- x: torch.Tensor,
- mask: torch.Tensor,
- mu: torch.Tensor,
- t: torch.Tensor,
- spks: torch.Tensor,
- cond: torch.Tensor,
- ):
- return self.forward(x, mask, mu, t, spks, cond)
|