rotate_dspr2.cc 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/rotate_row.h"
  11. #include "libyuv/row.h"
  12. #include "libyuv/basic_types.h"
  13. #ifdef __cplusplus
  14. namespace libyuv {
  15. extern "C" {
  16. #endif
  17. #if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips_dsp) && \
  18. (__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32)
  19. void TransposeWx8_DSPR2(const uint8* src,
  20. int src_stride,
  21. uint8* dst,
  22. int dst_stride,
  23. int width) {
  24. __asm__ __volatile__(
  25. ".set push \n"
  26. ".set noreorder \n"
  27. "sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
  28. "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
  29. "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
  30. "addu $t3, $t2, %[src_stride] \n"
  31. "addu $t5, $t4, %[src_stride] \n"
  32. "addu $t6, $t2, $t4 \n"
  33. "andi $t0, %[dst], 0x3 \n"
  34. "andi $t1, %[dst_stride], 0x3 \n"
  35. "or $t0, $t0, $t1 \n"
  36. "bnez $t0, 11f \n"
  37. " subu $t7, $t9, %[src_stride] \n"
  38. // dst + dst_stride word aligned
  39. "1: \n"
  40. "lbu $t0, 0(%[src]) \n"
  41. "lbux $t1, %[src_stride](%[src]) \n"
  42. "lbux $t8, $t2(%[src]) \n"
  43. "lbux $t9, $t3(%[src]) \n"
  44. "sll $t1, $t1, 16 \n"
  45. "sll $t9, $t9, 16 \n"
  46. "or $t0, $t0, $t1 \n"
  47. "or $t8, $t8, $t9 \n"
  48. "precr.qb.ph $s0, $t8, $t0 \n"
  49. "lbux $t0, $t4(%[src]) \n"
  50. "lbux $t1, $t5(%[src]) \n"
  51. "lbux $t8, $t6(%[src]) \n"
  52. "lbux $t9, $t7(%[src]) \n"
  53. "sll $t1, $t1, 16 \n"
  54. "sll $t9, $t9, 16 \n"
  55. "or $t0, $t0, $t1 \n"
  56. "or $t8, $t8, $t9 \n"
  57. "precr.qb.ph $s1, $t8, $t0 \n"
  58. "sw $s0, 0(%[dst]) \n"
  59. "addiu %[width], -1 \n"
  60. "addiu %[src], 1 \n"
  61. "sw $s1, 4(%[dst]) \n"
  62. "bnez %[width], 1b \n"
  63. " addu %[dst], %[dst], %[dst_stride] \n"
  64. "b 2f \n"
  65. // dst + dst_stride unaligned
  66. "11: \n"
  67. "lbu $t0, 0(%[src]) \n"
  68. "lbux $t1, %[src_stride](%[src]) \n"
  69. "lbux $t8, $t2(%[src]) \n"
  70. "lbux $t9, $t3(%[src]) \n"
  71. "sll $t1, $t1, 16 \n"
  72. "sll $t9, $t9, 16 \n"
  73. "or $t0, $t0, $t1 \n"
  74. "or $t8, $t8, $t9 \n"
  75. "precr.qb.ph $s0, $t8, $t0 \n"
  76. "lbux $t0, $t4(%[src]) \n"
  77. "lbux $t1, $t5(%[src]) \n"
  78. "lbux $t8, $t6(%[src]) \n"
  79. "lbux $t9, $t7(%[src]) \n"
  80. "sll $t1, $t1, 16 \n"
  81. "sll $t9, $t9, 16 \n"
  82. "or $t0, $t0, $t1 \n"
  83. "or $t8, $t8, $t9 \n"
  84. "precr.qb.ph $s1, $t8, $t0 \n"
  85. "swr $s0, 0(%[dst]) \n"
  86. "swl $s0, 3(%[dst]) \n"
  87. "addiu %[width], -1 \n"
  88. "addiu %[src], 1 \n"
  89. "swr $s1, 4(%[dst]) \n"
  90. "swl $s1, 7(%[dst]) \n"
  91. "bnez %[width], 11b \n"
  92. "addu %[dst], %[dst], %[dst_stride] \n"
  93. "2: \n"
  94. ".set pop \n"
  95. : [src] "+r"(src), [dst] "+r"(dst), [width] "+r"(width)
  96. : [src_stride] "r"(src_stride), [dst_stride] "r"(dst_stride)
  97. : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1");
  98. }
  99. void TransposeWx8_Fast_DSPR2(const uint8* src,
  100. int src_stride,
  101. uint8* dst,
  102. int dst_stride,
  103. int width) {
  104. __asm__ __volatile__(
  105. ".set noat \n"
  106. ".set push \n"
  107. ".set noreorder \n"
  108. "beqz %[width], 2f \n"
  109. " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
  110. "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
  111. "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
  112. "addu $t3, $t2, %[src_stride] \n"
  113. "addu $t5, $t4, %[src_stride] \n"
  114. "addu $t6, $t2, $t4 \n"
  115. "srl $AT, %[width], 0x2 \n"
  116. "andi $t0, %[dst], 0x3 \n"
  117. "andi $t1, %[dst_stride], 0x3 \n"
  118. "or $t0, $t0, $t1 \n"
  119. "bnez $t0, 11f \n"
  120. " subu $t7, $t9, %[src_stride] \n"
  121. // dst + dst_stride word aligned
  122. "1: \n"
  123. "lw $t0, 0(%[src]) \n"
  124. "lwx $t1, %[src_stride](%[src]) \n"
  125. "lwx $t8, $t2(%[src]) \n"
  126. "lwx $t9, $t3(%[src]) \n"
  127. // t0 = | 30 | 20 | 10 | 00 |
  128. // t1 = | 31 | 21 | 11 | 01 |
  129. // t8 = | 32 | 22 | 12 | 02 |
  130. // t9 = | 33 | 23 | 13 | 03 |
  131. "precr.qb.ph $s0, $t1, $t0 \n"
  132. "precr.qb.ph $s1, $t9, $t8 \n"
  133. "precrq.qb.ph $s2, $t1, $t0 \n"
  134. "precrq.qb.ph $s3, $t9, $t8 \n"
  135. // s0 = | 21 | 01 | 20 | 00 |
  136. // s1 = | 23 | 03 | 22 | 02 |
  137. // s2 = | 31 | 11 | 30 | 10 |
  138. // s3 = | 33 | 13 | 32 | 12 |
  139. "precr.qb.ph $s4, $s1, $s0 \n"
  140. "precrq.qb.ph $s5, $s1, $s0 \n"
  141. "precr.qb.ph $s6, $s3, $s2 \n"
  142. "precrq.qb.ph $s7, $s3, $s2 \n"
  143. // s4 = | 03 | 02 | 01 | 00 |
  144. // s5 = | 23 | 22 | 21 | 20 |
  145. // s6 = | 13 | 12 | 11 | 10 |
  146. // s7 = | 33 | 32 | 31 | 30 |
  147. "lwx $t0, $t4(%[src]) \n"
  148. "lwx $t1, $t5(%[src]) \n"
  149. "lwx $t8, $t6(%[src]) \n"
  150. "lwx $t9, $t7(%[src]) \n"
  151. // t0 = | 34 | 24 | 14 | 04 |
  152. // t1 = | 35 | 25 | 15 | 05 |
  153. // t8 = | 36 | 26 | 16 | 06 |
  154. // t9 = | 37 | 27 | 17 | 07 |
  155. "precr.qb.ph $s0, $t1, $t0 \n"
  156. "precr.qb.ph $s1, $t9, $t8 \n"
  157. "precrq.qb.ph $s2, $t1, $t0 \n"
  158. "precrq.qb.ph $s3, $t9, $t8 \n"
  159. // s0 = | 25 | 05 | 24 | 04 |
  160. // s1 = | 27 | 07 | 26 | 06 |
  161. // s2 = | 35 | 15 | 34 | 14 |
  162. // s3 = | 37 | 17 | 36 | 16 |
  163. "precr.qb.ph $t0, $s1, $s0 \n"
  164. "precrq.qb.ph $t1, $s1, $s0 \n"
  165. "precr.qb.ph $t8, $s3, $s2 \n"
  166. "precrq.qb.ph $t9, $s3, $s2 \n"
  167. // t0 = | 07 | 06 | 05 | 04 |
  168. // t1 = | 27 | 26 | 25 | 24 |
  169. // t8 = | 17 | 16 | 15 | 14 |
  170. // t9 = | 37 | 36 | 35 | 34 |
  171. "addu $s0, %[dst], %[dst_stride] \n"
  172. "addu $s1, $s0, %[dst_stride] \n"
  173. "addu $s2, $s1, %[dst_stride] \n"
  174. "sw $s4, 0(%[dst]) \n"
  175. "sw $t0, 4(%[dst]) \n"
  176. "sw $s6, 0($s0) \n"
  177. "sw $t8, 4($s0) \n"
  178. "sw $s5, 0($s1) \n"
  179. "sw $t1, 4($s1) \n"
  180. "sw $s7, 0($s2) \n"
  181. "sw $t9, 4($s2) \n"
  182. "addiu $AT, -1 \n"
  183. "addiu %[src], 4 \n"
  184. "bnez $AT, 1b \n"
  185. " addu %[dst], $s2, %[dst_stride] \n"
  186. "b 2f \n"
  187. // dst + dst_stride unaligned
  188. "11: \n"
  189. "lw $t0, 0(%[src]) \n"
  190. "lwx $t1, %[src_stride](%[src]) \n"
  191. "lwx $t8, $t2(%[src]) \n"
  192. "lwx $t9, $t3(%[src]) \n"
  193. // t0 = | 30 | 20 | 10 | 00 |
  194. // t1 = | 31 | 21 | 11 | 01 |
  195. // t8 = | 32 | 22 | 12 | 02 |
  196. // t9 = | 33 | 23 | 13 | 03 |
  197. "precr.qb.ph $s0, $t1, $t0 \n"
  198. "precr.qb.ph $s1, $t9, $t8 \n"
  199. "precrq.qb.ph $s2, $t1, $t0 \n"
  200. "precrq.qb.ph $s3, $t9, $t8 \n"
  201. // s0 = | 21 | 01 | 20 | 00 |
  202. // s1 = | 23 | 03 | 22 | 02 |
  203. // s2 = | 31 | 11 | 30 | 10 |
  204. // s3 = | 33 | 13 | 32 | 12 |
  205. "precr.qb.ph $s4, $s1, $s0 \n"
  206. "precrq.qb.ph $s5, $s1, $s0 \n"
  207. "precr.qb.ph $s6, $s3, $s2 \n"
  208. "precrq.qb.ph $s7, $s3, $s2 \n"
  209. // s4 = | 03 | 02 | 01 | 00 |
  210. // s5 = | 23 | 22 | 21 | 20 |
  211. // s6 = | 13 | 12 | 11 | 10 |
  212. // s7 = | 33 | 32 | 31 | 30 |
  213. "lwx $t0, $t4(%[src]) \n"
  214. "lwx $t1, $t5(%[src]) \n"
  215. "lwx $t8, $t6(%[src]) \n"
  216. "lwx $t9, $t7(%[src]) \n"
  217. // t0 = | 34 | 24 | 14 | 04 |
  218. // t1 = | 35 | 25 | 15 | 05 |
  219. // t8 = | 36 | 26 | 16 | 06 |
  220. // t9 = | 37 | 27 | 17 | 07 |
  221. "precr.qb.ph $s0, $t1, $t0 \n"
  222. "precr.qb.ph $s1, $t9, $t8 \n"
  223. "precrq.qb.ph $s2, $t1, $t0 \n"
  224. "precrq.qb.ph $s3, $t9, $t8 \n"
  225. // s0 = | 25 | 05 | 24 | 04 |
  226. // s1 = | 27 | 07 | 26 | 06 |
  227. // s2 = | 35 | 15 | 34 | 14 |
  228. // s3 = | 37 | 17 | 36 | 16 |
  229. "precr.qb.ph $t0, $s1, $s0 \n"
  230. "precrq.qb.ph $t1, $s1, $s0 \n"
  231. "precr.qb.ph $t8, $s3, $s2 \n"
  232. "precrq.qb.ph $t9, $s3, $s2 \n"
  233. // t0 = | 07 | 06 | 05 | 04 |
  234. // t1 = | 27 | 26 | 25 | 24 |
  235. // t8 = | 17 | 16 | 15 | 14 |
  236. // t9 = | 37 | 36 | 35 | 34 |
  237. "addu $s0, %[dst], %[dst_stride] \n"
  238. "addu $s1, $s0, %[dst_stride] \n"
  239. "addu $s2, $s1, %[dst_stride] \n"
  240. "swr $s4, 0(%[dst]) \n"
  241. "swl $s4, 3(%[dst]) \n"
  242. "swr $t0, 4(%[dst]) \n"
  243. "swl $t0, 7(%[dst]) \n"
  244. "swr $s6, 0($s0) \n"
  245. "swl $s6, 3($s0) \n"
  246. "swr $t8, 4($s0) \n"
  247. "swl $t8, 7($s0) \n"
  248. "swr $s5, 0($s1) \n"
  249. "swl $s5, 3($s1) \n"
  250. "swr $t1, 4($s1) \n"
  251. "swl $t1, 7($s1) \n"
  252. "swr $s7, 0($s2) \n"
  253. "swl $s7, 3($s2) \n"
  254. "swr $t9, 4($s2) \n"
  255. "swl $t9, 7($s2) \n"
  256. "addiu $AT, -1 \n"
  257. "addiu %[src], 4 \n"
  258. "bnez $AT, 11b \n"
  259. " addu %[dst], $s2, %[dst_stride] \n"
  260. "2: \n"
  261. ".set pop \n"
  262. ".set at \n"
  263. : [src] "+r"(src), [dst] "+r"(dst), [width] "+r"(width)
  264. : [src_stride] "r"(src_stride), [dst_stride] "r"(dst_stride)
  265. : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1",
  266. "s2", "s3", "s4", "s5", "s6", "s7");
  267. }
  268. void TransposeUVWx8_DSPR2(const uint8* src,
  269. int src_stride,
  270. uint8* dst_a,
  271. int dst_stride_a,
  272. uint8* dst_b,
  273. int dst_stride_b,
  274. int width) {
  275. __asm__ __volatile__(
  276. ".set push \n"
  277. ".set noreorder \n"
  278. "beqz %[width], 2f \n"
  279. " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
  280. "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
  281. "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
  282. "addu $t3, $t2, %[src_stride] \n"
  283. "addu $t5, $t4, %[src_stride] \n"
  284. "addu $t6, $t2, $t4 \n"
  285. "subu $t7, $t9, %[src_stride] \n"
  286. "srl $t1, %[width], 1 \n"
  287. // check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
  288. "andi $t0, %[dst_a], 0x3 \n"
  289. "andi $t8, %[dst_b], 0x3 \n"
  290. "or $t0, $t0, $t8 \n"
  291. "andi $t8, %[dst_stride_a], 0x3 \n"
  292. "andi $s5, %[dst_stride_b], 0x3 \n"
  293. "or $t8, $t8, $s5 \n"
  294. "or $t0, $t0, $t8 \n"
  295. "bnez $t0, 11f \n"
  296. " nop \n"
  297. // dst + dst_stride word aligned (both, a & b dst addresses)
  298. "1: \n"
  299. "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
  300. "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
  301. "addu $s5, %[dst_a], %[dst_stride_a] \n"
  302. "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
  303. "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
  304. "addu $s6, %[dst_b], %[dst_stride_b] \n"
  305. "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
  306. "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
  307. "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
  308. "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
  309. "sll $t0, $t0, 16 \n"
  310. "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
  311. "sll $t9, $t9, 16 \n"
  312. "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
  313. "sw $s3, 0($s5) \n"
  314. "sw $s4, 0($s6) \n"
  315. "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
  316. "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
  317. "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
  318. "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
  319. "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
  320. "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
  321. "sw $s3, 0(%[dst_a]) \n"
  322. "sw $s4, 0(%[dst_b]) \n"
  323. "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
  324. "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
  325. "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
  326. "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
  327. "sll $t0, $t0, 16 \n"
  328. "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
  329. "sll $t9, $t9, 16 \n"
  330. "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
  331. "sw $s3, 4($s5) \n"
  332. "sw $s4, 4($s6) \n"
  333. "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
  334. "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
  335. "addiu %[src], 4 \n"
  336. "addiu $t1, -1 \n"
  337. "sll $t0, %[dst_stride_a], 1 \n"
  338. "sll $t8, %[dst_stride_b], 1 \n"
  339. "sw $s3, 4(%[dst_a]) \n"
  340. "sw $s4, 4(%[dst_b]) \n"
  341. "addu %[dst_a], %[dst_a], $t0 \n"
  342. "bnez $t1, 1b \n"
  343. " addu %[dst_b], %[dst_b], $t8 \n"
  344. "b 2f \n"
  345. " nop \n"
  346. // dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
  347. "11: \n"
  348. "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
  349. "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
  350. "addu $s5, %[dst_a], %[dst_stride_a] \n"
  351. "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
  352. "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
  353. "addu $s6, %[dst_b], %[dst_stride_b] \n"
  354. "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
  355. "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
  356. "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
  357. "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
  358. "sll $t0, $t0, 16 \n"
  359. "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
  360. "sll $t9, $t9, 16 \n"
  361. "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
  362. "swr $s3, 0($s5) \n"
  363. "swl $s3, 3($s5) \n"
  364. "swr $s4, 0($s6) \n"
  365. "swl $s4, 3($s6) \n"
  366. "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
  367. "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
  368. "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
  369. "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
  370. "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
  371. "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
  372. "swr $s3, 0(%[dst_a]) \n"
  373. "swl $s3, 3(%[dst_a]) \n"
  374. "swr $s4, 0(%[dst_b]) \n"
  375. "swl $s4, 3(%[dst_b]) \n"
  376. "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
  377. "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
  378. "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
  379. "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
  380. "sll $t0, $t0, 16 \n"
  381. "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
  382. "sll $t9, $t9, 16 \n"
  383. "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
  384. "swr $s3, 4($s5) \n"
  385. "swl $s3, 7($s5) \n"
  386. "swr $s4, 4($s6) \n"
  387. "swl $s4, 7($s6) \n"
  388. "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
  389. "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
  390. "addiu %[src], 4 \n"
  391. "addiu $t1, -1 \n"
  392. "sll $t0, %[dst_stride_a], 1 \n"
  393. "sll $t8, %[dst_stride_b], 1 \n"
  394. "swr $s3, 4(%[dst_a]) \n"
  395. "swl $s3, 7(%[dst_a]) \n"
  396. "swr $s4, 4(%[dst_b]) \n"
  397. "swl $s4, 7(%[dst_b]) \n"
  398. "addu %[dst_a], %[dst_a], $t0 \n"
  399. "bnez $t1, 11b \n"
  400. " addu %[dst_b], %[dst_b], $t8 \n"
  401. "2: \n"
  402. ".set pop \n"
  403. : [src] "+r"(src), [dst_a] "+r"(dst_a), [dst_b] "+r"(dst_b),
  404. [width] "+r"(width), [src_stride] "+r"(src_stride)
  405. : [dst_stride_a] "r"(dst_stride_a), [dst_stride_b] "r"(dst_stride_b)
  406. : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1",
  407. "s2", "s3", "s4", "s5", "s6");
  408. }
  409. #endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
  410. #ifdef __cplusplus
  411. } // extern "C"
  412. } // namespace libyuv
  413. #endif