dct4_a.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. /*********************************************************************************
  2. ** ITU-T G.722.1 (2005-05) - Fixed point implementation for main body and Annex C
  3. ** > Software Release 2.1 (2008-06)
  4. ** (Simple repackaging; no change from 2005-05 Release 2.0 code)
  5. **
  6. ** © 2004 Polycom, Inc.
  7. **
  8. ** All rights reserved.
  9. **
  10. *********************************************************************************/
  11. /*********************************************************************************
  12. * Filename: dct_type_iv_a.c
  13. *
  14. * Purpose: Discrete Cosine Transform, Type IV used for MLT
  15. *
  16. * The basis functions are
  17. *
  18. * cos(PI*(t+0.5)*(k+0.5)/block_length)
  19. *
  20. * for time t and basis function number k. Due to the symmetry of the expression
  21. * in t and k, it is clear that the forward and inverse transforms are the same.
  22. *
  23. *********************************************************************************/
  24. /*********************************************************************************
  25. Include files
  26. *********************************************************************************/
  27. #include "defs.h"
  28. #include "count.h"
  29. #include "dct4_a.h"
  30. /*********************************************************************************
  31. External variable declarations
  32. *********************************************************************************/
  33. extern Word16 anal_bias[DCT_LENGTH];
  34. extern Word16 dct_core_a[DCT_LENGTH_DIV_32][DCT_LENGTH_DIV_32];
  35. extern cos_msin_t a_cos_msin_2 [DCT_LENGTH_DIV_32];
  36. extern cos_msin_t a_cos_msin_4 [DCT_LENGTH_DIV_16];
  37. extern cos_msin_t a_cos_msin_8 [DCT_LENGTH_DIV_8];
  38. extern cos_msin_t a_cos_msin_16[DCT_LENGTH_DIV_4];
  39. extern cos_msin_t a_cos_msin_32[DCT_LENGTH_DIV_2];
  40. extern cos_msin_t a_cos_msin_64[DCT_LENGTH];
  41. extern cos_msin_t *a_cos_msin_table[];
  42. /*********************************************************************************
  43. Function: dct_type_iv_a
  44. Syntax: void dct_type_iv_a (input, output, dct_length)
  45. Word16 input[], output[], dct_length;
  46. Description: Discrete Cosine Transform, Type IV used for MLT
  47. Design Notes:
  48. WMOPS: | 24kbit | 32kbit
  49. -------|--------------|----------------
  50. AVG | 1.14 | 1.14
  51. -------|--------------|----------------
  52. MAX | 1.14 | 1.14
  53. -------|--------------|----------------
  54. 14kHz | 24kbit | 32kbit | 48kbit
  55. -------|--------------|----------------|----------------
  56. AVG | 2.57 | 2.57 | 2.57
  57. -------|--------------|----------------|----------------
  58. MAX | 2.57 | 2.57 | 2.57
  59. -------|--------------|----------------|----------------
  60. *********************************************************************************/
  61. void dct_type_iv_a (Word16 *input,Word16 *output,Word16 dct_length)
  62. {
  63. Word16 buffer_a[MAX_DCT_LENGTH], buffer_b[MAX_DCT_LENGTH], buffer_c[MAX_DCT_LENGTH];
  64. Word16 *in_ptr, *in_ptr_low, *in_ptr_high, *next_in_base;
  65. Word16 *out_ptr_low, *out_ptr_high, *next_out_base;
  66. Word16 *out_buffer, *in_buffer, *buffer_swap;
  67. Word16 in_val_low, in_val_high;
  68. Word16 out_val_low, out_val_high;
  69. Word16 in_low_even, in_low_odd;
  70. Word16 in_high_even, in_high_odd;
  71. Word16 out_low_even, out_low_odd;
  72. Word16 out_high_even, out_high_odd;
  73. Word16 *pair_ptr;
  74. Word16 cos_even, cos_odd, msin_even, msin_odd;
  75. Word16 neg_cos_odd;
  76. Word16 neg_msin_even;
  77. Word32 sum;
  78. Word16 set_span, set_count, set_count_log, pairs_left, sets_left;
  79. Word16 i,k;
  80. Word16 index;
  81. cos_msin_t **table_ptr_ptr, *cos_msin_ptr;
  82. Word16 temp;
  83. Word32 acca;
  84. Word16 dct_length_log;
  85. /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
  86. /* Do the sum/difference butterflies, the first part of */
  87. /* converting one N-point transform into N/2 two-point */
  88. /* transforms, where N = 1 << DCT_LENGTH_LOG. = 64/128 */
  89. /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
  90. test();
  91. if (dct_length==DCT_LENGTH)
  92. {
  93. dct_length_log = DCT_LENGTH_LOG;
  94. /* Add bias offsets */
  95. for (i=0;i<dct_length;i++)
  96. {
  97. input[i] = add(input[i],anal_bias[i]);
  98. move16();
  99. }
  100. }
  101. else
  102. dct_length_log = MAX_DCT_LENGTH_LOG;
  103. index = 0L;
  104. move16();
  105. in_buffer = input;
  106. move16();
  107. out_buffer = buffer_a;
  108. move16();
  109. temp = sub(dct_length_log,2);
  110. for (set_count_log=0;set_count_log<=temp;set_count_log++)
  111. {
  112. /*===========================================================*/
  113. /* Initialization for the loop over sets at the current size */
  114. /*===========================================================*/
  115. /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */
  116. set_span = shr_nocheck(dct_length,set_count_log);
  117. set_count = shl_nocheck(1,set_count_log);
  118. in_ptr = in_buffer;
  119. move16();
  120. next_out_base = out_buffer;
  121. move16();
  122. /*=====================================*/
  123. /* Loop over all the sets of this size */
  124. /*=====================================*/
  125. for (sets_left=set_count;sets_left>0;sets_left--)
  126. {
  127. /*||||||||||||||||||||||||||||||||||||||||||||*/
  128. /* Set up output pointers for the current set */
  129. /*||||||||||||||||||||||||||||||||||||||||||||*/
  130. out_ptr_low = next_out_base;
  131. next_out_base = next_out_base + set_span;
  132. out_ptr_high = next_out_base;
  133. /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
  134. /* Loop over all the butterflies in the current set */
  135. /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
  136. do
  137. {
  138. in_val_low = *in_ptr++;
  139. in_val_high = *in_ptr++;
  140. // blp: addition of two 16bits vars, there's no way
  141. // they'll overflow a 32bit var
  142. //acca = L_add(in_val_low,in_val_high);
  143. acca = (in_val_low + in_val_high);
  144. acca = L_shr_nocheck(acca,1);
  145. out_val_low = extract_l(acca);
  146. acca = L_sub(in_val_low,in_val_high);
  147. acca = L_shr_nocheck(acca,1);
  148. out_val_high = extract_l(acca);
  149. *out_ptr_low++ = out_val_low;
  150. *--out_ptr_high = out_val_high;
  151. test();
  152. } while (out_ptr_low < out_ptr_high);
  153. } /* End of loop over sets of the current size */
  154. /*============================================================*/
  155. /* Decide which buffers to use as input and output next time. */
  156. /* Except for the first time (when the input buffer is the */
  157. /* subroutine input) we just alternate the local buffers. */
  158. /*============================================================*/
  159. in_buffer = out_buffer;
  160. move16();
  161. if (out_buffer == buffer_a)
  162. out_buffer = buffer_b;
  163. else
  164. out_buffer = buffer_a;
  165. index = add(index,1);
  166. } /* End of loop over set sizes */
  167. /*++++++++++++++++++++++++++++++++*/
  168. /* Do N/2 two-point transforms, */
  169. /* where N = 1 << DCT_LENGTH_LOG */
  170. /*++++++++++++++++++++++++++++++++*/
  171. pair_ptr = in_buffer;
  172. move16();
  173. buffer_swap = buffer_c;
  174. move16();
  175. temp = sub(dct_length_log,1);
  176. temp = shl_nocheck(1,temp);
  177. for (pairs_left=temp; pairs_left > 0; pairs_left--)
  178. {
  179. for ( k=0; k<CORE_SIZE; k++ )
  180. {
  181. #if PJ_HAS_INT64
  182. /* blp: danger danger! not really compatible but faster */
  183. pj_int64_t sum64=0;
  184. move32();
  185. for ( i=0; i<CORE_SIZE; i++ )
  186. {
  187. sum64 += L_mult(pair_ptr[i], dct_core_a[i][k]);
  188. }
  189. sum = L_saturate(sum64);
  190. #else
  191. sum=0L;
  192. move32();
  193. for ( i=0; i<CORE_SIZE; i++ )
  194. {
  195. sum = L_mac(sum, pair_ptr[i],dct_core_a[i][k]);
  196. }
  197. #endif
  198. buffer_swap[k] = itu_round(sum);
  199. }
  200. /* address arithmetic */
  201. pair_ptr += CORE_SIZE;
  202. buffer_swap += CORE_SIZE;
  203. }
  204. for (i=0;i<dct_length;i++)
  205. {
  206. in_buffer[i] = buffer_c[i];
  207. move16();
  208. }
  209. table_ptr_ptr = a_cos_msin_table;
  210. /*++++++++++++++++++++++++++++++*/
  211. /* Perform rotation butterflies */
  212. /*++++++++++++++++++++++++++++++*/
  213. temp = sub(dct_length_log,2);
  214. for (set_count_log = temp; set_count_log >= 0; set_count_log--)
  215. {
  216. /*===========================================================*/
  217. /* Initialization for the loop over sets at the current size */
  218. /*===========================================================*/
  219. /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */
  220. set_span = shr_nocheck(dct_length,set_count_log);
  221. set_count = shl_nocheck(1,set_count_log);
  222. next_in_base = in_buffer;
  223. move16();
  224. test();
  225. if (set_count_log == 0)
  226. {
  227. next_out_base = output;
  228. }
  229. else
  230. {
  231. next_out_base = out_buffer;
  232. }
  233. /*=====================================*/
  234. /* Loop over all the sets of this size */
  235. /*=====================================*/
  236. for (sets_left = set_count; sets_left > 0;sets_left--)
  237. {
  238. /*|||||||||||||||||||||||||||||||||||||||||*/
  239. /* Set up the pointers for the current set */
  240. /*|||||||||||||||||||||||||||||||||||||||||*/
  241. in_ptr_low = next_in_base;
  242. move16();
  243. temp = shr_nocheck(set_span,1);
  244. /* address arithmetic */
  245. in_ptr_high = in_ptr_low + temp;
  246. next_in_base += set_span;
  247. out_ptr_low = next_out_base;
  248. next_out_base += set_span;
  249. out_ptr_high = next_out_base;
  250. cos_msin_ptr = *table_ptr_ptr;
  251. /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/
  252. /* Loop over all the butterfly pairs in the current set */
  253. /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/
  254. do
  255. {
  256. /* address arithmetic */
  257. in_low_even = *in_ptr_low++;
  258. in_low_odd = *in_ptr_low++;
  259. in_high_even = *in_ptr_high++;
  260. in_high_odd = *in_ptr_high++;
  261. cos_even = cos_msin_ptr[0].cosine;
  262. move16();
  263. msin_even = cos_msin_ptr[0].minus_sine;
  264. move16();
  265. cos_odd = cos_msin_ptr[1].cosine;
  266. move16();
  267. msin_odd = cos_msin_ptr[1].minus_sine;
  268. move16();
  269. cos_msin_ptr += 2;
  270. sum = 0L;
  271. sum=L_mac(sum,cos_even,in_low_even);
  272. neg_msin_even = negate(msin_even);
  273. sum=L_mac(sum,neg_msin_even,in_high_even);
  274. out_low_even = itu_round(sum);
  275. sum = 0L;
  276. sum=L_mac(sum,msin_even,in_low_even);
  277. sum=L_mac(sum,cos_even,in_high_even);
  278. out_high_even= itu_round(sum);
  279. sum = 0L;
  280. sum=L_mac(sum,cos_odd,in_low_odd);
  281. sum=L_mac(sum,msin_odd,in_high_odd);
  282. out_low_odd= itu_round(sum);
  283. sum = 0L;
  284. sum=L_mac(sum,msin_odd,in_low_odd);
  285. neg_cos_odd = negate(cos_odd);
  286. sum=L_mac(sum,neg_cos_odd,in_high_odd);
  287. out_high_odd= itu_round(sum);
  288. *out_ptr_low++ = out_low_even;
  289. *--out_ptr_high = out_high_even;
  290. *out_ptr_low++ = out_low_odd;
  291. *--out_ptr_high = out_high_odd;
  292. test();
  293. } while (out_ptr_low < out_ptr_high);
  294. } /* End of loop over sets of the current size */
  295. /*=============================================*/
  296. /* Swap input and output buffers for next time */
  297. /*=============================================*/
  298. buffer_swap = in_buffer;
  299. in_buffer = out_buffer;
  300. out_buffer = buffer_swap;
  301. table_ptr_ptr++;
  302. }
  303. }