dct4_s.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. /********************************************************************************
  2. **
  3. ** ITU-T G.722.1 (2005-05) - Fixed point implementation for main body and Annex C
  4. ** > Software Release 2.1 (2008-06)
  5. ** (Simple repackaging; no change from 2005-05 Release 2.0 code)
  6. **
  7. ** © 2004 Polycom, Inc.
  8. **
  9. ** All rights reserved.
  10. **
  11. ********************************************************************************/
  12. /********************************************************************************
  13. * Filename: dct_type_iv_s.c
  14. *
  15. * Purpose: Discrete Cosine Transform, Type IV used for inverse MLT
  16. *
  17. * The basis functions are
  18. *
  19. * cos(PI*(t+0.5)*(k+0.5)/block_length)
  20. *
  21. * for time t and basis function number k. Due to the symmetry of the expression
  22. * in t and k, it is clear that the forward and inverse transforms are the same.
  23. *
  24. *********************************************************************************/
  25. /***************************************************************************
  26. Include files
  27. ***************************************************************************/
  28. #include "defs.h"
  29. #include "count.h"
  30. #include "dct4_s.h"
  31. /***************************************************************************
  32. External variable declarations
  33. ***************************************************************************/
  34. extern Word16 syn_bias_7khz[DCT_LENGTH];
  35. extern Word16 dither[DCT_LENGTH];
  36. extern Word16 max_dither[MAX_DCT_LENGTH];
  37. extern Word16 dct_core_s[DCT_LENGTH_DIV_32][DCT_LENGTH_DIV_32];
  38. extern cos_msin_t s_cos_msin_2[DCT_LENGTH_DIV_32];
  39. extern cos_msin_t s_cos_msin_4[DCT_LENGTH_DIV_16];
  40. extern cos_msin_t s_cos_msin_8[DCT_LENGTH_DIV_8];
  41. extern cos_msin_t s_cos_msin_16[DCT_LENGTH_DIV_4];
  42. extern cos_msin_t s_cos_msin_32[DCT_LENGTH_DIV_2];
  43. extern cos_msin_t s_cos_msin_64[DCT_LENGTH];
  44. extern cos_msin_t *s_cos_msin_table[];
  45. /********************************************************************************
  46. Function: dct_type_iv_s
  47. Syntax: void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
  48. Description: Discrete Cosine Transform, Type IV used for inverse MLT
  49. Design Notes:
  50. WMOPS: 7kHz | 24kbit | 32kbit
  51. -------|--------------|----------------
  52. AVG | 1.74 | 1.74
  53. -------|--------------|----------------
  54. MAX | 1.74 | 1.74
  55. -------|--------------|----------------
  56. 14kHz | 24kbit | 32kbit | 48kbit
  57. -------|--------------|----------------|----------------
  58. AVG | 3.62 | 3.62 | 3.62
  59. -------|--------------|----------------|----------------
  60. MAX | 3.62 | 3.62 | 3.62
  61. -------|--------------|----------------|----------------
  62. ********************************************************************************/
  63. void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
  64. {
  65. Word16 buffer_a[MAX_DCT_LENGTH], buffer_b[MAX_DCT_LENGTH], buffer_c[MAX_DCT_LENGTH];
  66. Word16 *in_ptr, *in_ptr_low, *in_ptr_high, *next_in_base;
  67. Word16 *out_ptr_low, *out_ptr_high, *next_out_base;
  68. Word16 *out_buffer, *in_buffer, *buffer_swap;
  69. Word16 in_val_low, in_val_high;
  70. Word16 out_val_low, out_val_high;
  71. Word16 in_low_even, in_low_odd;
  72. Word16 in_high_even, in_high_odd;
  73. Word16 out_low_even, out_low_odd;
  74. Word16 out_high_even, out_high_odd;
  75. Word16 *pair_ptr;
  76. Word16 cos_even, cos_odd, msin_even, msin_odd;
  77. Word16 set_span, set_count, set_count_log, pairs_left, sets_left;
  78. Word16 i,k;
  79. Word16 index;
  80. Word16 dummy;
  81. Word32 sum;
  82. cos_msin_t **table_ptr_ptr, *cos_msin_ptr;
  83. Word32 acca;
  84. Word16 temp;
  85. Word16 dct_length_log;
  86. Word16 *dither_ptr;
  87. /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
  88. /* Do the sum/difference butterflies, the first part of */
  89. /* converting one N-point transform into 32 - 10 point transforms */
  90. /* transforms, where N = 1 << DCT_LENGTH_LOG. */
  91. /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
  92. test();
  93. if (dct_length==DCT_LENGTH)
  94. {
  95. dct_length_log = DCT_LENGTH_LOG;
  96. move16();
  97. dither_ptr = dither;
  98. move16();
  99. }
  100. else
  101. {
  102. dct_length_log = MAX_DCT_LENGTH_LOG;
  103. move16();
  104. dither_ptr = max_dither;
  105. move16();
  106. }
  107. in_buffer = input;
  108. move16();
  109. out_buffer = buffer_a;
  110. move16();
  111. index=0;
  112. move16();
  113. i=0;
  114. move16();
  115. for (set_count_log = 0; set_count_log <= dct_length_log - 2; set_count_log++)
  116. {
  117. /*===========================================================*/
  118. /* Initialization for the loop over sets at the current size */
  119. /*===========================================================*/
  120. /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */
  121. set_span = shr_nocheck(dct_length,set_count_log);
  122. set_count = shl_nocheck(1,set_count_log);
  123. in_ptr = in_buffer;
  124. move16();
  125. next_out_base = out_buffer;
  126. move16();
  127. /*=====================================*/
  128. /* Loop over all the sets of this size */
  129. /*=====================================*/
  130. temp = sub(index,1);
  131. test();
  132. if(temp < 0)
  133. {
  134. for (sets_left = set_count;sets_left > 0;sets_left--)
  135. {
  136. /*||||||||||||||||||||||||||||||||||||||||||||*/
  137. /* Set up output pointers for the current set */
  138. /*||||||||||||||||||||||||||||||||||||||||||||*/
  139. /* pointer arithmetic */
  140. out_ptr_low = next_out_base;
  141. move16();
  142. next_out_base += set_span;
  143. move16();
  144. out_ptr_high = next_out_base;
  145. move16();
  146. /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
  147. /* Loop over all the butterflies in the current set */
  148. /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
  149. do
  150. {
  151. in_val_low = *in_ptr++;
  152. move16();
  153. in_val_high = *in_ptr++;
  154. move16();
  155. /* BEST METHOD OF GETTING RID OF BIAS, BUT COMPUTATIONALLY UNPLEASANT */
  156. /* ALTERNATIVE METHOD, SMEARS BIAS OVER THE ENTIRE FRAME, COMPUTATIONALLY SIMPLEST. */
  157. /* IF THIS WORKS, IT'S PREFERABLE */
  158. dummy = add(in_val_low,dither_ptr[i++]);
  159. // blp: addition of two 16bits vars, there's no way
  160. // they'll overflow a 32bit var
  161. //acca = L_add(dummy,in_val_high);
  162. acca = dummy + in_val_high;
  163. out_val_low = extract_l(L_shr_nocheck(acca,1));
  164. dummy = add(in_val_low,dither_ptr[i++]);
  165. // blp: addition of two 16bits vars, there's no way
  166. // they'll overflow a 32bit var
  167. //acca = L_add(dummy,-in_val_high);
  168. acca = dummy - in_val_high;
  169. out_val_high = extract_l(L_shr_nocheck(acca,1));
  170. *out_ptr_low++ = out_val_low;
  171. move16();
  172. *--out_ptr_high = out_val_high;
  173. move16();
  174. test();
  175. /* this involves comparison of pointers */
  176. /* pointer arithmetic */
  177. } while (out_ptr_low < out_ptr_high);
  178. } /* End of loop over sets of the current size */
  179. }
  180. else
  181. {
  182. for (sets_left = set_count; sets_left > 0; sets_left--)
  183. {
  184. /*||||||||||||||||||||||||||||||||||||||||||||*/
  185. /* Set up output pointers for the current set */
  186. /*||||||||||||||||||||||||||||||||||||||||||||*/
  187. out_ptr_low = next_out_base;
  188. move16();
  189. next_out_base += set_span;
  190. move16();
  191. out_ptr_high = next_out_base;
  192. move16();
  193. /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
  194. /* Loop over all the butterflies in the current set */
  195. /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
  196. do
  197. {
  198. in_val_low = *in_ptr++;
  199. move16();
  200. in_val_high = *in_ptr++;
  201. move16();
  202. out_val_low = add(in_val_low,in_val_high);
  203. out_val_high = add(in_val_low,negate(in_val_high));
  204. *out_ptr_low++ = out_val_low;
  205. move16();
  206. *--out_ptr_high = out_val_high;
  207. move16();
  208. test();
  209. } while (out_ptr_low < out_ptr_high);
  210. } /* End of loop over sets of the current size */
  211. }
  212. /*============================================================*/
  213. /* Decide which buffers to use as input and output next time. */
  214. /* Except for the first time (when the input buffer is the */
  215. /* subroutine input) we just alternate the local buffers. */
  216. /*============================================================*/
  217. in_buffer = out_buffer;
  218. move16();
  219. test();
  220. if (out_buffer == buffer_a)
  221. {
  222. out_buffer = buffer_b;
  223. move16();
  224. }
  225. else
  226. {
  227. out_buffer = buffer_a;
  228. move16();
  229. }
  230. index = add(index,1);
  231. } /* End of loop over set sizes */
  232. /*++++++++++++++++++++++++++++++++*/
  233. /* Do 32 - 10 point transforms */
  234. /*++++++++++++++++++++++++++++++++*/
  235. pair_ptr = in_buffer;
  236. move16();
  237. buffer_swap = buffer_c;
  238. move16();
  239. for (pairs_left = 1 << (dct_length_log - 1); pairs_left > 0; pairs_left--)
  240. {
  241. for ( k=0; k<CORE_SIZE; k++ )
  242. {
  243. #if PJ_HAS_INT64
  244. /* blp: danger danger! not really compatible but faster */
  245. pj_int64_t sum64=0;
  246. move32();
  247. for ( i=0; i<CORE_SIZE; i++ )
  248. {
  249. sum64 += L_mult(pair_ptr[i], dct_core_s[i][k]);
  250. }
  251. sum = L_saturate(sum64);
  252. #else
  253. sum=0L;
  254. move32();
  255. for ( i=0; i<CORE_SIZE; i++ )
  256. {
  257. sum = L_mac(sum, pair_ptr[i],dct_core_s[i][k]);
  258. }
  259. #endif
  260. buffer_swap[k] = itu_round(sum);
  261. }
  262. pair_ptr += CORE_SIZE;
  263. move16();
  264. buffer_swap += CORE_SIZE;
  265. move16();
  266. }
  267. for (i=0;i<dct_length;i++)
  268. {
  269. in_buffer[i] = buffer_c[i];
  270. move16();
  271. }
  272. table_ptr_ptr = s_cos_msin_table;
  273. move16();
  274. /*++++++++++++++++++++++++++++++*/
  275. /* Perform rotation butterflies */
  276. /*++++++++++++++++++++++++++++++*/
  277. index=0;
  278. move16();
  279. for (set_count_log = dct_length_log - 2 ; set_count_log >= 0; set_count_log--)
  280. {
  281. /*===========================================================*/
  282. /* Initialization for the loop over sets at the current size */
  283. /*===========================================================*/
  284. /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */
  285. set_span = shr_nocheck(dct_length,set_count_log);
  286. set_count = shl_nocheck(1,set_count_log);
  287. next_in_base = in_buffer;
  288. move16();
  289. test();
  290. if (set_count_log == 0)
  291. {
  292. next_out_base = output;
  293. move16();
  294. }
  295. else
  296. {
  297. next_out_base = out_buffer;
  298. move16();
  299. }
  300. /*=====================================*/
  301. /* Loop over all the sets of this size */
  302. /*=====================================*/
  303. for (sets_left = set_count; sets_left > 0; sets_left--)
  304. {
  305. /*|||||||||||||||||||||||||||||||||||||||||*/
  306. /* Set up the pointers for the current set */
  307. /*|||||||||||||||||||||||||||||||||||||||||*/
  308. in_ptr_low = next_in_base;
  309. move16();
  310. temp = shr_nocheck(set_span,1);
  311. in_ptr_high = in_ptr_low + temp;
  312. move16();
  313. next_in_base += set_span;
  314. move16();
  315. out_ptr_low = next_out_base;
  316. move16();
  317. next_out_base += set_span;
  318. move16();
  319. out_ptr_high = next_out_base;
  320. move16();
  321. cos_msin_ptr = *table_ptr_ptr;
  322. move16();
  323. /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/
  324. /* Loop over all the butterfly pairs in the current set */
  325. /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/
  326. do
  327. {
  328. in_low_even = *in_ptr_low++;
  329. move16();
  330. in_low_odd = *in_ptr_low++;
  331. move16();
  332. in_high_even = *in_ptr_high++;
  333. move16();
  334. in_high_odd = *in_ptr_high++;
  335. move16();
  336. cos_even = cos_msin_ptr[0].cosine;
  337. move16();
  338. msin_even = cos_msin_ptr[0].minus_sine;
  339. move16();
  340. cos_odd = cos_msin_ptr[1].cosine;
  341. move16();
  342. msin_odd = cos_msin_ptr[1].minus_sine;
  343. move16();
  344. cos_msin_ptr += 2;
  345. sum = 0L;
  346. move32();
  347. sum = L_mac(sum,cos_even,in_low_even);
  348. sum = L_mac(sum,negate(msin_even),in_high_even);
  349. out_low_even = itu_round(L_shl_nocheck(sum,1));
  350. sum = 0L;
  351. move32();
  352. sum = L_mac(sum,msin_even,in_low_even);
  353. sum = L_mac(sum,cos_even,in_high_even);
  354. out_high_even = itu_round(L_shl_nocheck(sum,1));
  355. sum = 0L;
  356. move32();
  357. sum = L_mac(sum,cos_odd,in_low_odd);
  358. sum = L_mac(sum,msin_odd,in_high_odd);
  359. out_low_odd = itu_round(L_shl_nocheck(sum,1));
  360. sum = 0L;
  361. move32();
  362. sum = L_mac(sum,msin_odd,in_low_odd);
  363. sum = L_mac(sum,negate(cos_odd),in_high_odd);
  364. out_high_odd = itu_round(L_shl_nocheck(sum,1));
  365. *out_ptr_low++ = out_low_even;
  366. move16();
  367. *--out_ptr_high = out_high_even;
  368. move16();
  369. *out_ptr_low++ = out_low_odd;
  370. move16();
  371. *--out_ptr_high = out_high_odd;
  372. move16();
  373. test();
  374. } while (out_ptr_low < out_ptr_high);
  375. } /* End of loop over sets of the current size */
  376. /*=============================================*/
  377. /* Swap input and output buffers for next time */
  378. /*=============================================*/
  379. buffer_swap = in_buffer;
  380. move16();
  381. in_buffer = out_buffer;
  382. move16();
  383. out_buffer = buffer_swap;
  384. move16();
  385. index = add(index,1);
  386. table_ptr_ptr++;
  387. }
  388. /*------------------------------------
  389. ADD IN BIAS FOR OUTPUT
  390. -----------------------------------*/
  391. if (dct_length==DCT_LENGTH)
  392. {
  393. for(i=0;i<320;i++)
  394. {
  395. // blp: addition of two 16bits vars, there's no way
  396. // they'll overflow a 32bit var
  397. //sum = L_add(output[i],syn_bias_7khz[i]);
  398. sum = output[i] + syn_bias_7khz[i];
  399. acca = L_sub(sum,32767);
  400. test();
  401. if (acca > 0)
  402. {
  403. sum = 32767L;
  404. move32();
  405. }
  406. // blp: addition of two 16bits vars, there's no way
  407. // they'll overflow 32bit var
  408. //acca = L_add(sum,32768L);
  409. acca = sum + 32768;
  410. test();
  411. if (acca < 0)
  412. {
  413. sum = -32768L;
  414. move32();
  415. }
  416. output[i] = extract_l(sum);
  417. }
  418. }
  419. }