speech_synthesizer.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. import logging
  3. from re import I
  4. import uuid
  5. import json
  6. import threading
  7. from nls.core import NlsCore
  8. from . import logging
  9. from . import util
  10. from .exception import (StartTimeoutException,
  11. CompleteTimeoutException,
  12. InvalidParameter)
  13. __SPEECH_SYNTHESIZER_NAMESPACE__ = 'SpeechSynthesizer'
  14. __SPEECH_LONG_SYNTHESIZER_NAMESPACE__ = 'SpeechLongSynthesizer'
  15. __SPEECH_SYNTHESIZER_REQUEST_CMD__ = {
  16. 'start': 'StartSynthesis'
  17. }
  18. __URL__ = 'wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1'
  19. __all__ = ['NlsSpeechSynthesizer']
  20. class NlsSpeechSynthesizer:
  21. """
  22. Api for text-to-speech
  23. """
  24. def __init__(self,
  25. url=__URL__,
  26. token=None,
  27. appkey=None,
  28. long_tts=False,
  29. on_metainfo=None,
  30. on_data=None,
  31. on_completed=None,
  32. on_error=None,
  33. on_close=None,
  34. callback_args=[]):
  35. """
  36. NlsSpeechSynthesizer initialization
  37. Parameters:
  38. -----------
  39. url: str
  40. websocket url.
  41. akid: str
  42. access id from aliyun. if you provide a token, ignore this argument.
  43. appkey: str
  44. appkey from aliyun
  45. long_tts: bool
  46. whether using long-text synthesis support, default is False. long-text synthesis
  47. can support longer text but more expensive.
  48. on_metainfo: function
  49. Callback object which is called when recognition started.
  50. on_start has two arguments.
  51. The 1st argument is message which is a json format string.
  52. The 2nd argument is *args which is callback_args.
  53. on_data: function
  54. Callback object which is called when partial synthesis result arrived
  55. arrived.
  56. on_result_changed has two arguments.
  57. The 1st argument is binary data corresponding to aformat in start
  58. method.
  59. The 2nd argument is *args which is callback_args.
  60. on_completed: function
  61. Callback object which is called when recognition is completed.
  62. on_completed has two arguments.
  63. The 1st argument is message which is a json format string.
  64. The 2nd argument is *args which is callback_args.
  65. on_error: function
  66. Callback object which is called when any error occurs.
  67. on_error has two arguments.
  68. The 1st argument is message which is a json format string.
  69. The 2nd argument is *args which is callback_args.
  70. on_close: function
  71. Callback object which is called when connection closed.
  72. on_close has one arguments.
  73. The 1st argument is *args which is callback_args.
  74. callback_args: list
  75. callback_args will return in callbacks above for *args.
  76. """
  77. if not token or not appkey:
  78. raise InvalidParameter('Must provide token and appkey')
  79. self.__response_handler__ = {
  80. 'MetaInfo': self.__metainfo,
  81. 'SynthesisCompleted': self.__synthesis_completed,
  82. 'TaskFailed': self.__task_failed
  83. }
  84. self.__callback_args = callback_args
  85. self.__url = url
  86. self.__appkey = appkey
  87. self.__token = token
  88. self.__long_tts = long_tts
  89. self.__start_cond = threading.Condition()
  90. self.__start_flag = False
  91. self.__on_metainfo = on_metainfo
  92. self.__on_data = on_data
  93. self.__on_completed = on_completed
  94. self.__on_error = on_error
  95. self.__on_close = on_close
  96. self.__allow_aformat = (
  97. 'pcm', 'wav', 'mp3'
  98. )
  99. self.__allow_sample_rate = (
  100. 8000, 11025, 16000, 22050,
  101. 24000, 32000, 44100, 48000
  102. )
  103. def __handle_message(self, message):
  104. logging.debug('__handle_message')
  105. try:
  106. __result = json.loads(message)
  107. if __result['header']['name'] in self.__response_handler__:
  108. __handler = self.__response_handler__[__result['header']['name']]
  109. __handler(message)
  110. else:
  111. logging.error('cannot handle cmd{}'.format(
  112. __result['header']['name']))
  113. return
  114. except json.JSONDecodeError:
  115. logging.error('cannot parse message:{}'.format(message))
  116. return
  117. def __syn_core_on_open(self):
  118. logging.debug('__syn_core_on_open')
  119. with self.__start_cond:
  120. self.__start_flag = True
  121. self.__start_cond.notify()
  122. def __syn_core_on_data(self, data, opcode, flag):
  123. logging.debug('__syn_core_on_data')
  124. if self.__on_data:
  125. self.__on_data(data, *self.__callback_args)
  126. def __syn_core_on_msg(self, msg, *args):
  127. logging.debug('__syn_core_on_msg:msg={} args={}'.format(msg, args))
  128. self.__handle_message(msg)
  129. def __syn_core_on_error(self, msg, *args):
  130. logging.debug('__sr_core_on_error:msg={} args={}'.format(msg, args))
  131. def __syn_core_on_close(self):
  132. logging.debug('__sr_core_on_close')
  133. if self.__on_close:
  134. self.__on_close(*self.__callback_args)
  135. with self.__start_cond:
  136. self.__start_flag = False
  137. self.__start_cond.notify()
  138. def __metainfo(self, message):
  139. logging.debug('__metainfo')
  140. if self.__on_metainfo:
  141. self.__on_metainfo(message, *self.__callback_args)
  142. def __synthesis_completed(self, message):
  143. logging.debug('__synthesis_completed')
  144. self.__nls.shutdown()
  145. logging.debug('__synthesis_completed shutdown done')
  146. if self.__on_completed:
  147. self.__on_completed(message, *self.__callback_args)
  148. with self.__start_cond:
  149. self.__start_flag = False
  150. self.__start_cond.notify()
  151. def __task_failed(self, message):
  152. logging.debug('__task_failed')
  153. with self.__start_cond:
  154. self.__start_flag = False
  155. self.__start_cond.notify()
  156. if self.__on_error:
  157. self.__on_error(message, *self.__callback_args)
  158. def start(self,
  159. text=None,
  160. voice='xiaoyun',
  161. aformat='pcm',
  162. sample_rate=16000,
  163. volume=50,
  164. speech_rate=0,
  165. pitch_rate=0,
  166. wait_complete=True,
  167. start_timeout=10,
  168. completed_timeout=60,
  169. ex:dict=None):
  170. """
  171. Synthesis start
  172. Parameters:
  173. -----------
  174. text: str
  175. utf-8 text
  176. voice: str
  177. voice for text-to-speech, default is xiaoyun
  178. aformat: str
  179. audio binary format, support: 'pcm', 'wav', 'mp3', default is 'pcm'
  180. sample_rate: int
  181. audio sample rate, default is 16000, support:8000, 11025, 16000, 22050,
  182. 24000, 32000, 44100, 48000
  183. volume: int
  184. audio volume, from 0~100, default is 50
  185. speech_rate: int
  186. speech rate from -500~500, default is 0
  187. pitch_rate: int
  188. pitch for voice from -500~500, default is 0
  189. wait_complete: bool
  190. whether block until syntheis completed or timeout for completed timeout
  191. start_timeout: int
  192. timeout for connection established
  193. completed_timeout: int
  194. timeout for waiting synthesis completed from connection established
  195. ex: dict
  196. dict which will merge into 'payload' field in request
  197. """
  198. if text is None:
  199. raise InvalidParameter('Text cannot be None')
  200. self.__nls = NlsCore(
  201. url=self.__url,
  202. token=self.__token,
  203. on_open=self.__syn_core_on_open,
  204. on_message=self.__syn_core_on_msg,
  205. on_data=self.__syn_core_on_data,
  206. on_close=self.__syn_core_on_close,
  207. on_error=self.__syn_core_on_error,
  208. callback_args=[])
  209. if aformat not in self.__allow_aformat:
  210. raise InvalidParameter('format {} not support'.format(aformat))
  211. if sample_rate not in self.__allow_sample_rate:
  212. raise InvalidParameter('samplerate {} not support'.format(sample_rate))
  213. if volume < 0 or volume > 100:
  214. raise InvalidParameter('volume {} not support'.format(volume))
  215. if speech_rate < -500 or speech_rate > 500:
  216. raise InvalidParameter('speech_rate {} not support'.format(speech_rate))
  217. if pitch_rate < -500 or pitch_rate > 500:
  218. raise InvalidParameter('pitch rate {} not support'.format(pitch_rate))
  219. __id4 = uuid.uuid4().hex
  220. self.__task_id = uuid.uuid4().hex
  221. __namespace = __SPEECH_SYNTHESIZER_NAMESPACE__
  222. if self.__long_tts:
  223. __namespace = __SPEECH_LONG_SYNTHESIZER_NAMESPACE__
  224. __header = {
  225. 'message_id': __id4,
  226. 'task_id': self.__task_id,
  227. 'namespace': __namespace,
  228. 'name': __SPEECH_SYNTHESIZER_REQUEST_CMD__['start'],
  229. 'appkey': self.__appkey
  230. }
  231. __payload = {
  232. 'text': text,
  233. 'voice': voice,
  234. 'format': aformat,
  235. 'sample_rate': sample_rate,
  236. 'volume': volume,
  237. 'speech_rate': speech_rate,
  238. 'pitch_rate': pitch_rate
  239. }
  240. if ex:
  241. __payload.update(ex)
  242. __msg = {
  243. 'header': __header,
  244. 'payload': __payload,
  245. 'context': util.GetDefaultContext()
  246. }
  247. __jmsg = json.dumps(__msg)
  248. with self.__start_cond:
  249. if self.__start_flag:
  250. logging.debug('already start...')
  251. return
  252. self.__nls.start(__jmsg, ping_interval=0, ping_timeout=None)
  253. if self.__start_flag == False:
  254. if not self.__start_cond.wait(start_timeout):
  255. logging.debug('syn start timeout')
  256. raise StartTimeoutException(f'Waiting Start over {start_timeout}s')
  257. if self.__start_flag and wait_complete:
  258. if not self.__start_cond.wait(completed_timeout):
  259. raise CompleteTimeoutException(f'Waiting Complete over {completed_timeout}s')
  260. def shutdown(self):
  261. """
  262. Shutdown connection immediately
  263. """
  264. self.__nls.shutdown()