config.pbtxt 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. name: "token2wav_dit"
  15. backend: "python"
  16. max_batch_size: ${triton_max_batch_size}
  17. dynamic_batching {
  18. max_queue_delay_microseconds: ${max_queue_delay_microseconds}
  19. priority_levels: 10
  20. default_priority_level: 10
  21. }
  22. parameters: { key: "FORCE_CPU_ONLY_INPUT_TENSORS" value: {string_value:"no"}}
  23. parameters [
  24. {
  25. key: "model_dir",
  26. value: {string_value:"${model_dir}"}
  27. }
  28. ]
  29. input [
  30. {
  31. name: "target_speech_tokens"
  32. data_type: TYPE_INT32
  33. dims: [-1]
  34. },
  35. {
  36. name: "reference_wav"
  37. data_type: TYPE_FP32
  38. dims: [-1]
  39. },
  40. {
  41. name: "reference_wav_len"
  42. data_type: TYPE_INT32
  43. dims: [1]
  44. },
  45. {
  46. name: "finalize"
  47. data_type: TYPE_BOOL
  48. dims: [ 1 ]
  49. reshape: { shape: [ ] }
  50. optional: true
  51. },
  52. {
  53. name: "conformer_cnn_cache"
  54. data_type: TYPE_FP16
  55. dims: [ 512, -1 ]
  56. optional: true
  57. },
  58. {
  59. name: "conformer_att_cache"
  60. data_type: TYPE_FP16
  61. dims: [ 10, 8, -1, 128 ]
  62. optional: true
  63. },
  64. {
  65. name: "estimator_cnn_cache"
  66. data_type: TYPE_FP16
  67. dims: [ 10, 16, -1, 1024, 2 ]
  68. optional: true
  69. },
  70. {
  71. name: "estimator_att_cache"
  72. data_type: TYPE_FP16
  73. dims: [ 10, 16, -1, 8, -1, 128 ]
  74. optional: true
  75. },
  76. {
  77. name: "mel"
  78. data_type: TYPE_FP32
  79. dims: [ 80, -1 ]
  80. optional: true
  81. },
  82. {
  83. name: "source"
  84. data_type: TYPE_FP32
  85. dims: [ 1, -1 ]
  86. optional: true
  87. },
  88. {
  89. name: "speech"
  90. data_type: TYPE_FP32
  91. dims: [ -1 ]
  92. optional: true
  93. }
  94. ]
  95. output [
  96. {
  97. name: "waveform"
  98. data_type: TYPE_FP32
  99. dims: [ -1 ]
  100. },
  101. {
  102. name: "conformer_cnn_cache"
  103. data_type: TYPE_FP16
  104. dims: [ 512, -1 ]
  105. },
  106. {
  107. name: "conformer_att_cache"
  108. data_type: TYPE_FP16
  109. dims: [ 10, 8, -1, 128 ]
  110. },
  111. {
  112. name: "estimator_cnn_cache"
  113. data_type: TYPE_FP16
  114. dims: [ 10, 16, -1, 1024, 2 ]
  115. },
  116. {
  117. name: "estimator_att_cache"
  118. data_type: TYPE_FP16
  119. dims: [ 10, 16, -1, 8, -1, 128 ]
  120. },
  121. {
  122. name: "mel"
  123. data_type: TYPE_FP32
  124. dims: [ 80, -1 ]
  125. },
  126. {
  127. name: "source"
  128. data_type: TYPE_FP32
  129. dims: [ 1, -1 ]
  130. },
  131. {
  132. name: "speech"
  133. data_type: TYPE_FP32
  134. dims: [ -1 ]
  135. }
  136. ]
  137. instance_group [
  138. {
  139. count: 1
  140. kind: KIND_CPU
  141. }
  142. ]