scale_common.cc 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341
  1. /*
  2. * Copyright 2013 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include <assert.h>
  12. #include <string.h>
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/planar_functions.h" // For CopyARGB
  15. #include "libyuv/row.h"
  16. #include "libyuv/scale_row.h"
  17. #ifdef __cplusplus
  18. namespace libyuv {
  19. extern "C" {
  20. #endif
  21. static __inline int Abs(int v) {
  22. return v >= 0 ? v : -v;
  23. }
  24. // CPU agnostic row functions
  25. void ScaleRowDown2_C(const uint8* src_ptr,
  26. ptrdiff_t src_stride,
  27. uint8* dst,
  28. int dst_width) {
  29. int x;
  30. (void)src_stride;
  31. for (x = 0; x < dst_width - 1; x += 2) {
  32. dst[0] = src_ptr[1];
  33. dst[1] = src_ptr[3];
  34. dst += 2;
  35. src_ptr += 4;
  36. }
  37. if (dst_width & 1) {
  38. dst[0] = src_ptr[1];
  39. }
  40. }
  41. void ScaleRowDown2_16_C(const uint16* src_ptr,
  42. ptrdiff_t src_stride,
  43. uint16* dst,
  44. int dst_width) {
  45. int x;
  46. (void)src_stride;
  47. for (x = 0; x < dst_width - 1; x += 2) {
  48. dst[0] = src_ptr[1];
  49. dst[1] = src_ptr[3];
  50. dst += 2;
  51. src_ptr += 4;
  52. }
  53. if (dst_width & 1) {
  54. dst[0] = src_ptr[1];
  55. }
  56. }
  57. void ScaleRowDown2Linear_C(const uint8* src_ptr,
  58. ptrdiff_t src_stride,
  59. uint8* dst,
  60. int dst_width) {
  61. const uint8* s = src_ptr;
  62. int x;
  63. (void)src_stride;
  64. for (x = 0; x < dst_width - 1; x += 2) {
  65. dst[0] = (s[0] + s[1] + 1) >> 1;
  66. dst[1] = (s[2] + s[3] + 1) >> 1;
  67. dst += 2;
  68. s += 4;
  69. }
  70. if (dst_width & 1) {
  71. dst[0] = (s[0] + s[1] + 1) >> 1;
  72. }
  73. }
  74. void ScaleRowDown2Linear_16_C(const uint16* src_ptr,
  75. ptrdiff_t src_stride,
  76. uint16* dst,
  77. int dst_width) {
  78. const uint16* s = src_ptr;
  79. int x;
  80. (void)src_stride;
  81. for (x = 0; x < dst_width - 1; x += 2) {
  82. dst[0] = (s[0] + s[1] + 1) >> 1;
  83. dst[1] = (s[2] + s[3] + 1) >> 1;
  84. dst += 2;
  85. s += 4;
  86. }
  87. if (dst_width & 1) {
  88. dst[0] = (s[0] + s[1] + 1) >> 1;
  89. }
  90. }
  91. void ScaleRowDown2Box_C(const uint8* src_ptr,
  92. ptrdiff_t src_stride,
  93. uint8* dst,
  94. int dst_width) {
  95. const uint8* s = src_ptr;
  96. const uint8* t = src_ptr + src_stride;
  97. int x;
  98. for (x = 0; x < dst_width - 1; x += 2) {
  99. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  100. dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  101. dst += 2;
  102. s += 4;
  103. t += 4;
  104. }
  105. if (dst_width & 1) {
  106. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  107. }
  108. }
  109. void ScaleRowDown2Box_Odd_C(const uint8* src_ptr,
  110. ptrdiff_t src_stride,
  111. uint8* dst,
  112. int dst_width) {
  113. const uint8* s = src_ptr;
  114. const uint8* t = src_ptr + src_stride;
  115. int x;
  116. dst_width -= 1;
  117. for (x = 0; x < dst_width - 1; x += 2) {
  118. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  119. dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  120. dst += 2;
  121. s += 4;
  122. t += 4;
  123. }
  124. if (dst_width & 1) {
  125. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  126. dst += 1;
  127. s += 2;
  128. t += 2;
  129. }
  130. dst[0] = (s[0] + t[0] + 1) >> 1;
  131. }
  132. void ScaleRowDown2Box_16_C(const uint16* src_ptr,
  133. ptrdiff_t src_stride,
  134. uint16* dst,
  135. int dst_width) {
  136. const uint16* s = src_ptr;
  137. const uint16* t = src_ptr + src_stride;
  138. int x;
  139. for (x = 0; x < dst_width - 1; x += 2) {
  140. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  141. dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  142. dst += 2;
  143. s += 4;
  144. t += 4;
  145. }
  146. if (dst_width & 1) {
  147. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  148. }
  149. }
  150. void ScaleRowDown4_C(const uint8* src_ptr,
  151. ptrdiff_t src_stride,
  152. uint8* dst,
  153. int dst_width) {
  154. int x;
  155. (void)src_stride;
  156. for (x = 0; x < dst_width - 1; x += 2) {
  157. dst[0] = src_ptr[2];
  158. dst[1] = src_ptr[6];
  159. dst += 2;
  160. src_ptr += 8;
  161. }
  162. if (dst_width & 1) {
  163. dst[0] = src_ptr[2];
  164. }
  165. }
  166. void ScaleRowDown4_16_C(const uint16* src_ptr,
  167. ptrdiff_t src_stride,
  168. uint16* dst,
  169. int dst_width) {
  170. int x;
  171. (void)src_stride;
  172. for (x = 0; x < dst_width - 1; x += 2) {
  173. dst[0] = src_ptr[2];
  174. dst[1] = src_ptr[6];
  175. dst += 2;
  176. src_ptr += 8;
  177. }
  178. if (dst_width & 1) {
  179. dst[0] = src_ptr[2];
  180. }
  181. }
  182. void ScaleRowDown4Box_C(const uint8* src_ptr,
  183. ptrdiff_t src_stride,
  184. uint8* dst,
  185. int dst_width) {
  186. intptr_t stride = src_stride;
  187. int x;
  188. for (x = 0; x < dst_width - 1; x += 2) {
  189. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  190. src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
  191. src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
  192. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
  193. src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
  194. src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
  195. src_ptr[stride * 3 + 3] + 8) >>
  196. 4;
  197. dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
  198. src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
  199. src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
  200. src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
  201. src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
  202. src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
  203. src_ptr[stride * 3 + 7] + 8) >>
  204. 4;
  205. dst += 2;
  206. src_ptr += 8;
  207. }
  208. if (dst_width & 1) {
  209. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  210. src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
  211. src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
  212. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
  213. src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
  214. src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
  215. src_ptr[stride * 3 + 3] + 8) >>
  216. 4;
  217. }
  218. }
  219. void ScaleRowDown4Box_16_C(const uint16* src_ptr,
  220. ptrdiff_t src_stride,
  221. uint16* dst,
  222. int dst_width) {
  223. intptr_t stride = src_stride;
  224. int x;
  225. for (x = 0; x < dst_width - 1; x += 2) {
  226. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  227. src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
  228. src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
  229. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
  230. src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
  231. src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
  232. src_ptr[stride * 3 + 3] + 8) >>
  233. 4;
  234. dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
  235. src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
  236. src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
  237. src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
  238. src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
  239. src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
  240. src_ptr[stride * 3 + 7] + 8) >>
  241. 4;
  242. dst += 2;
  243. src_ptr += 8;
  244. }
  245. if (dst_width & 1) {
  246. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  247. src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
  248. src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
  249. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
  250. src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
  251. src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
  252. src_ptr[stride * 3 + 3] + 8) >>
  253. 4;
  254. }
  255. }
  256. void ScaleRowDown34_C(const uint8* src_ptr,
  257. ptrdiff_t src_stride,
  258. uint8* dst,
  259. int dst_width) {
  260. int x;
  261. (void)src_stride;
  262. assert((dst_width % 3 == 0) && (dst_width > 0));
  263. for (x = 0; x < dst_width; x += 3) {
  264. dst[0] = src_ptr[0];
  265. dst[1] = src_ptr[1];
  266. dst[2] = src_ptr[3];
  267. dst += 3;
  268. src_ptr += 4;
  269. }
  270. }
  271. void ScaleRowDown34_16_C(const uint16* src_ptr,
  272. ptrdiff_t src_stride,
  273. uint16* dst,
  274. int dst_width) {
  275. int x;
  276. (void)src_stride;
  277. assert((dst_width % 3 == 0) && (dst_width > 0));
  278. for (x = 0; x < dst_width; x += 3) {
  279. dst[0] = src_ptr[0];
  280. dst[1] = src_ptr[1];
  281. dst[2] = src_ptr[3];
  282. dst += 3;
  283. src_ptr += 4;
  284. }
  285. }
  286. // Filter rows 0 and 1 together, 3 : 1
  287. void ScaleRowDown34_0_Box_C(const uint8* src_ptr,
  288. ptrdiff_t src_stride,
  289. uint8* d,
  290. int dst_width) {
  291. const uint8* s = src_ptr;
  292. const uint8* t = src_ptr + src_stride;
  293. int x;
  294. assert((dst_width % 3 == 0) && (dst_width > 0));
  295. for (x = 0; x < dst_width; x += 3) {
  296. uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  297. uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  298. uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  299. uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  300. uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  301. uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  302. d[0] = (a0 * 3 + b0 + 2) >> 2;
  303. d[1] = (a1 * 3 + b1 + 2) >> 2;
  304. d[2] = (a2 * 3 + b2 + 2) >> 2;
  305. d += 3;
  306. s += 4;
  307. t += 4;
  308. }
  309. }
  310. void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr,
  311. ptrdiff_t src_stride,
  312. uint16* d,
  313. int dst_width) {
  314. const uint16* s = src_ptr;
  315. const uint16* t = src_ptr + src_stride;
  316. int x;
  317. assert((dst_width % 3 == 0) && (dst_width > 0));
  318. for (x = 0; x < dst_width; x += 3) {
  319. uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  320. uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  321. uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  322. uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  323. uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  324. uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  325. d[0] = (a0 * 3 + b0 + 2) >> 2;
  326. d[1] = (a1 * 3 + b1 + 2) >> 2;
  327. d[2] = (a2 * 3 + b2 + 2) >> 2;
  328. d += 3;
  329. s += 4;
  330. t += 4;
  331. }
  332. }
  333. // Filter rows 1 and 2 together, 1 : 1
  334. void ScaleRowDown34_1_Box_C(const uint8* src_ptr,
  335. ptrdiff_t src_stride,
  336. uint8* d,
  337. int dst_width) {
  338. const uint8* s = src_ptr;
  339. const uint8* t = src_ptr + src_stride;
  340. int x;
  341. assert((dst_width % 3 == 0) && (dst_width > 0));
  342. for (x = 0; x < dst_width; x += 3) {
  343. uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  344. uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  345. uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  346. uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  347. uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  348. uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  349. d[0] = (a0 + b0 + 1) >> 1;
  350. d[1] = (a1 + b1 + 1) >> 1;
  351. d[2] = (a2 + b2 + 1) >> 1;
  352. d += 3;
  353. s += 4;
  354. t += 4;
  355. }
  356. }
  357. void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr,
  358. ptrdiff_t src_stride,
  359. uint16* d,
  360. int dst_width) {
  361. const uint16* s = src_ptr;
  362. const uint16* t = src_ptr + src_stride;
  363. int x;
  364. assert((dst_width % 3 == 0) && (dst_width > 0));
  365. for (x = 0; x < dst_width; x += 3) {
  366. uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  367. uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  368. uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  369. uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  370. uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  371. uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  372. d[0] = (a0 + b0 + 1) >> 1;
  373. d[1] = (a1 + b1 + 1) >> 1;
  374. d[2] = (a2 + b2 + 1) >> 1;
  375. d += 3;
  376. s += 4;
  377. t += 4;
  378. }
  379. }
  380. // Scales a single row of pixels using point sampling.
  381. void ScaleCols_C(uint8* dst_ptr,
  382. const uint8* src_ptr,
  383. int dst_width,
  384. int x,
  385. int dx) {
  386. int j;
  387. for (j = 0; j < dst_width - 1; j += 2) {
  388. dst_ptr[0] = src_ptr[x >> 16];
  389. x += dx;
  390. dst_ptr[1] = src_ptr[x >> 16];
  391. x += dx;
  392. dst_ptr += 2;
  393. }
  394. if (dst_width & 1) {
  395. dst_ptr[0] = src_ptr[x >> 16];
  396. }
  397. }
  398. void ScaleCols_16_C(uint16* dst_ptr,
  399. const uint16* src_ptr,
  400. int dst_width,
  401. int x,
  402. int dx) {
  403. int j;
  404. for (j = 0; j < dst_width - 1; j += 2) {
  405. dst_ptr[0] = src_ptr[x >> 16];
  406. x += dx;
  407. dst_ptr[1] = src_ptr[x >> 16];
  408. x += dx;
  409. dst_ptr += 2;
  410. }
  411. if (dst_width & 1) {
  412. dst_ptr[0] = src_ptr[x >> 16];
  413. }
  414. }
  415. // Scales a single row of pixels up by 2x using point sampling.
  416. void ScaleColsUp2_C(uint8* dst_ptr,
  417. const uint8* src_ptr,
  418. int dst_width,
  419. int x,
  420. int dx) {
  421. int j;
  422. (void)x;
  423. (void)dx;
  424. for (j = 0; j < dst_width - 1; j += 2) {
  425. dst_ptr[1] = dst_ptr[0] = src_ptr[0];
  426. src_ptr += 1;
  427. dst_ptr += 2;
  428. }
  429. if (dst_width & 1) {
  430. dst_ptr[0] = src_ptr[0];
  431. }
  432. }
  433. void ScaleColsUp2_16_C(uint16* dst_ptr,
  434. const uint16* src_ptr,
  435. int dst_width,
  436. int x,
  437. int dx) {
  438. int j;
  439. (void)x;
  440. (void)dx;
  441. for (j = 0; j < dst_width - 1; j += 2) {
  442. dst_ptr[1] = dst_ptr[0] = src_ptr[0];
  443. src_ptr += 1;
  444. dst_ptr += 2;
  445. }
  446. if (dst_width & 1) {
  447. dst_ptr[0] = src_ptr[0];
  448. }
  449. }
  450. // (1-f)a + fb can be replaced with a + f(b-a)
  451. #if defined(__arm__) || defined(__aarch64__)
  452. #define BLENDER(a, b, f) \
  453. (uint8)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
  454. #else
  455. // Intel uses 7 bit math with rounding.
  456. #define BLENDER(a, b, f) \
  457. (uint8)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
  458. #endif
  459. void ScaleFilterCols_C(uint8* dst_ptr,
  460. const uint8* src_ptr,
  461. int dst_width,
  462. int x,
  463. int dx) {
  464. int j;
  465. for (j = 0; j < dst_width - 1; j += 2) {
  466. int xi = x >> 16;
  467. int a = src_ptr[xi];
  468. int b = src_ptr[xi + 1];
  469. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  470. x += dx;
  471. xi = x >> 16;
  472. a = src_ptr[xi];
  473. b = src_ptr[xi + 1];
  474. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  475. x += dx;
  476. dst_ptr += 2;
  477. }
  478. if (dst_width & 1) {
  479. int xi = x >> 16;
  480. int a = src_ptr[xi];
  481. int b = src_ptr[xi + 1];
  482. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  483. }
  484. }
  485. void ScaleFilterCols64_C(uint8* dst_ptr,
  486. const uint8* src_ptr,
  487. int dst_width,
  488. int x32,
  489. int dx) {
  490. int64 x = (int64)(x32);
  491. int j;
  492. for (j = 0; j < dst_width - 1; j += 2) {
  493. int64 xi = x >> 16;
  494. int a = src_ptr[xi];
  495. int b = src_ptr[xi + 1];
  496. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  497. x += dx;
  498. xi = x >> 16;
  499. a = src_ptr[xi];
  500. b = src_ptr[xi + 1];
  501. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  502. x += dx;
  503. dst_ptr += 2;
  504. }
  505. if (dst_width & 1) {
  506. int64 xi = x >> 16;
  507. int a = src_ptr[xi];
  508. int b = src_ptr[xi + 1];
  509. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  510. }
  511. }
  512. #undef BLENDER
  513. // Same as 8 bit arm blender but return is cast to uint16
  514. #define BLENDER(a, b, f) \
  515. (uint16)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
  516. void ScaleFilterCols_16_C(uint16* dst_ptr,
  517. const uint16* src_ptr,
  518. int dst_width,
  519. int x,
  520. int dx) {
  521. int j;
  522. for (j = 0; j < dst_width - 1; j += 2) {
  523. int xi = x >> 16;
  524. int a = src_ptr[xi];
  525. int b = src_ptr[xi + 1];
  526. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  527. x += dx;
  528. xi = x >> 16;
  529. a = src_ptr[xi];
  530. b = src_ptr[xi + 1];
  531. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  532. x += dx;
  533. dst_ptr += 2;
  534. }
  535. if (dst_width & 1) {
  536. int xi = x >> 16;
  537. int a = src_ptr[xi];
  538. int b = src_ptr[xi + 1];
  539. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  540. }
  541. }
  542. void ScaleFilterCols64_16_C(uint16* dst_ptr,
  543. const uint16* src_ptr,
  544. int dst_width,
  545. int x32,
  546. int dx) {
  547. int64 x = (int64)(x32);
  548. int j;
  549. for (j = 0; j < dst_width - 1; j += 2) {
  550. int64 xi = x >> 16;
  551. int a = src_ptr[xi];
  552. int b = src_ptr[xi + 1];
  553. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  554. x += dx;
  555. xi = x >> 16;
  556. a = src_ptr[xi];
  557. b = src_ptr[xi + 1];
  558. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  559. x += dx;
  560. dst_ptr += 2;
  561. }
  562. if (dst_width & 1) {
  563. int64 xi = x >> 16;
  564. int a = src_ptr[xi];
  565. int b = src_ptr[xi + 1];
  566. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  567. }
  568. }
  569. #undef BLENDER
  570. void ScaleRowDown38_C(const uint8* src_ptr,
  571. ptrdiff_t src_stride,
  572. uint8* dst,
  573. int dst_width) {
  574. int x;
  575. (void)src_stride;
  576. assert(dst_width % 3 == 0);
  577. for (x = 0; x < dst_width; x += 3) {
  578. dst[0] = src_ptr[0];
  579. dst[1] = src_ptr[3];
  580. dst[2] = src_ptr[6];
  581. dst += 3;
  582. src_ptr += 8;
  583. }
  584. }
  585. void ScaleRowDown38_16_C(const uint16* src_ptr,
  586. ptrdiff_t src_stride,
  587. uint16* dst,
  588. int dst_width) {
  589. int x;
  590. (void)src_stride;
  591. assert(dst_width % 3 == 0);
  592. for (x = 0; x < dst_width; x += 3) {
  593. dst[0] = src_ptr[0];
  594. dst[1] = src_ptr[3];
  595. dst[2] = src_ptr[6];
  596. dst += 3;
  597. src_ptr += 8;
  598. }
  599. }
  600. // 8x3 -> 3x1
  601. void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
  602. ptrdiff_t src_stride,
  603. uint8* dst_ptr,
  604. int dst_width) {
  605. intptr_t stride = src_stride;
  606. int i;
  607. assert((dst_width % 3 == 0) && (dst_width > 0));
  608. for (i = 0; i < dst_width; i += 3) {
  609. dst_ptr[0] =
  610. (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
  611. src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
  612. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
  613. (65536 / 9) >>
  614. 16;
  615. dst_ptr[1] =
  616. (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
  617. src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
  618. src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
  619. (65536 / 9) >>
  620. 16;
  621. dst_ptr[2] =
  622. (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
  623. src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
  624. (65536 / 6) >>
  625. 16;
  626. src_ptr += 8;
  627. dst_ptr += 3;
  628. }
  629. }
  630. void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
  631. ptrdiff_t src_stride,
  632. uint16* dst_ptr,
  633. int dst_width) {
  634. intptr_t stride = src_stride;
  635. int i;
  636. assert((dst_width % 3 == 0) && (dst_width > 0));
  637. for (i = 0; i < dst_width; i += 3) {
  638. dst_ptr[0] =
  639. (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
  640. src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
  641. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
  642. (65536 / 9) >>
  643. 16;
  644. dst_ptr[1] =
  645. (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
  646. src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
  647. src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
  648. (65536 / 9) >>
  649. 16;
  650. dst_ptr[2] =
  651. (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
  652. src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
  653. (65536 / 6) >>
  654. 16;
  655. src_ptr += 8;
  656. dst_ptr += 3;
  657. }
  658. }
  659. // 8x2 -> 3x1
  660. void ScaleRowDown38_2_Box_C(const uint8* src_ptr,
  661. ptrdiff_t src_stride,
  662. uint8* dst_ptr,
  663. int dst_width) {
  664. intptr_t stride = src_stride;
  665. int i;
  666. assert((dst_width % 3 == 0) && (dst_width > 0));
  667. for (i = 0; i < dst_width; i += 3) {
  668. dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
  669. src_ptr[stride + 1] + src_ptr[stride + 2]) *
  670. (65536 / 6) >>
  671. 16;
  672. dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
  673. src_ptr[stride + 4] + src_ptr[stride + 5]) *
  674. (65536 / 6) >>
  675. 16;
  676. dst_ptr[2] =
  677. (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
  678. (65536 / 4) >>
  679. 16;
  680. src_ptr += 8;
  681. dst_ptr += 3;
  682. }
  683. }
  684. void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr,
  685. ptrdiff_t src_stride,
  686. uint16* dst_ptr,
  687. int dst_width) {
  688. intptr_t stride = src_stride;
  689. int i;
  690. assert((dst_width % 3 == 0) && (dst_width > 0));
  691. for (i = 0; i < dst_width; i += 3) {
  692. dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
  693. src_ptr[stride + 1] + src_ptr[stride + 2]) *
  694. (65536 / 6) >>
  695. 16;
  696. dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
  697. src_ptr[stride + 4] + src_ptr[stride + 5]) *
  698. (65536 / 6) >>
  699. 16;
  700. dst_ptr[2] =
  701. (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
  702. (65536 / 4) >>
  703. 16;
  704. src_ptr += 8;
  705. dst_ptr += 3;
  706. }
  707. }
  708. void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
  709. int x;
  710. assert(src_width > 0);
  711. for (x = 0; x < src_width - 1; x += 2) {
  712. dst_ptr[0] += src_ptr[0];
  713. dst_ptr[1] += src_ptr[1];
  714. src_ptr += 2;
  715. dst_ptr += 2;
  716. }
  717. if (src_width & 1) {
  718. dst_ptr[0] += src_ptr[0];
  719. }
  720. }
  721. void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
  722. int x;
  723. assert(src_width > 0);
  724. for (x = 0; x < src_width - 1; x += 2) {
  725. dst_ptr[0] += src_ptr[0];
  726. dst_ptr[1] += src_ptr[1];
  727. src_ptr += 2;
  728. dst_ptr += 2;
  729. }
  730. if (src_width & 1) {
  731. dst_ptr[0] += src_ptr[0];
  732. }
  733. }
  734. void ScaleARGBRowDown2_C(const uint8* src_argb,
  735. ptrdiff_t src_stride,
  736. uint8* dst_argb,
  737. int dst_width) {
  738. const uint32* src = (const uint32*)(src_argb);
  739. uint32* dst = (uint32*)(dst_argb);
  740. int x;
  741. (void)src_stride;
  742. for (x = 0; x < dst_width - 1; x += 2) {
  743. dst[0] = src[1];
  744. dst[1] = src[3];
  745. src += 4;
  746. dst += 2;
  747. }
  748. if (dst_width & 1) {
  749. dst[0] = src[1];
  750. }
  751. }
  752. void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
  753. ptrdiff_t src_stride,
  754. uint8* dst_argb,
  755. int dst_width) {
  756. int x;
  757. (void)src_stride;
  758. for (x = 0; x < dst_width; ++x) {
  759. dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
  760. dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
  761. dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
  762. dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
  763. src_argb += 8;
  764. dst_argb += 4;
  765. }
  766. }
  767. void ScaleARGBRowDown2Box_C(const uint8* src_argb,
  768. ptrdiff_t src_stride,
  769. uint8* dst_argb,
  770. int dst_width) {
  771. int x;
  772. for (x = 0; x < dst_width; ++x) {
  773. dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
  774. src_argb[src_stride + 4] + 2) >>
  775. 2;
  776. dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
  777. src_argb[src_stride + 5] + 2) >>
  778. 2;
  779. dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
  780. src_argb[src_stride + 6] + 2) >>
  781. 2;
  782. dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
  783. src_argb[src_stride + 7] + 2) >>
  784. 2;
  785. src_argb += 8;
  786. dst_argb += 4;
  787. }
  788. }
  789. void ScaleARGBRowDownEven_C(const uint8* src_argb,
  790. ptrdiff_t src_stride,
  791. int src_stepx,
  792. uint8* dst_argb,
  793. int dst_width) {
  794. const uint32* src = (const uint32*)(src_argb);
  795. uint32* dst = (uint32*)(dst_argb);
  796. (void)src_stride;
  797. int x;
  798. for (x = 0; x < dst_width - 1; x += 2) {
  799. dst[0] = src[0];
  800. dst[1] = src[src_stepx];
  801. src += src_stepx * 2;
  802. dst += 2;
  803. }
  804. if (dst_width & 1) {
  805. dst[0] = src[0];
  806. }
  807. }
  808. void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
  809. ptrdiff_t src_stride,
  810. int src_stepx,
  811. uint8* dst_argb,
  812. int dst_width) {
  813. int x;
  814. for (x = 0; x < dst_width; ++x) {
  815. dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
  816. src_argb[src_stride + 4] + 2) >>
  817. 2;
  818. dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
  819. src_argb[src_stride + 5] + 2) >>
  820. 2;
  821. dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
  822. src_argb[src_stride + 6] + 2) >>
  823. 2;
  824. dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
  825. src_argb[src_stride + 7] + 2) >>
  826. 2;
  827. src_argb += src_stepx * 4;
  828. dst_argb += 4;
  829. }
  830. }
  831. // Scales a single row of pixels using point sampling.
  832. void ScaleARGBCols_C(uint8* dst_argb,
  833. const uint8* src_argb,
  834. int dst_width,
  835. int x,
  836. int dx) {
  837. const uint32* src = (const uint32*)(src_argb);
  838. uint32* dst = (uint32*)(dst_argb);
  839. int j;
  840. for (j = 0; j < dst_width - 1; j += 2) {
  841. dst[0] = src[x >> 16];
  842. x += dx;
  843. dst[1] = src[x >> 16];
  844. x += dx;
  845. dst += 2;
  846. }
  847. if (dst_width & 1) {
  848. dst[0] = src[x >> 16];
  849. }
  850. }
  851. void ScaleARGBCols64_C(uint8* dst_argb,
  852. const uint8* src_argb,
  853. int dst_width,
  854. int x32,
  855. int dx) {
  856. int64 x = (int64)(x32);
  857. const uint32* src = (const uint32*)(src_argb);
  858. uint32* dst = (uint32*)(dst_argb);
  859. int j;
  860. for (j = 0; j < dst_width - 1; j += 2) {
  861. dst[0] = src[x >> 16];
  862. x += dx;
  863. dst[1] = src[x >> 16];
  864. x += dx;
  865. dst += 2;
  866. }
  867. if (dst_width & 1) {
  868. dst[0] = src[x >> 16];
  869. }
  870. }
  871. // Scales a single row of pixels up by 2x using point sampling.
  872. void ScaleARGBColsUp2_C(uint8* dst_argb,
  873. const uint8* src_argb,
  874. int dst_width,
  875. int x,
  876. int dx) {
  877. const uint32* src = (const uint32*)(src_argb);
  878. uint32* dst = (uint32*)(dst_argb);
  879. int j;
  880. (void)x;
  881. (void)dx;
  882. for (j = 0; j < dst_width - 1; j += 2) {
  883. dst[1] = dst[0] = src[0];
  884. src += 1;
  885. dst += 2;
  886. }
  887. if (dst_width & 1) {
  888. dst[0] = src[0];
  889. }
  890. }
  891. // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
  892. // Mimics SSSE3 blender
  893. #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
  894. #define BLENDERC(a, b, f, s) \
  895. (uint32)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
  896. #define BLENDER(a, b, f) \
  897. BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
  898. BLENDERC(a, b, f, 0)
  899. void ScaleARGBFilterCols_C(uint8* dst_argb,
  900. const uint8* src_argb,
  901. int dst_width,
  902. int x,
  903. int dx) {
  904. const uint32* src = (const uint32*)(src_argb);
  905. uint32* dst = (uint32*)(dst_argb);
  906. int j;
  907. for (j = 0; j < dst_width - 1; j += 2) {
  908. int xi = x >> 16;
  909. int xf = (x >> 9) & 0x7f;
  910. uint32 a = src[xi];
  911. uint32 b = src[xi + 1];
  912. dst[0] = BLENDER(a, b, xf);
  913. x += dx;
  914. xi = x >> 16;
  915. xf = (x >> 9) & 0x7f;
  916. a = src[xi];
  917. b = src[xi + 1];
  918. dst[1] = BLENDER(a, b, xf);
  919. x += dx;
  920. dst += 2;
  921. }
  922. if (dst_width & 1) {
  923. int xi = x >> 16;
  924. int xf = (x >> 9) & 0x7f;
  925. uint32 a = src[xi];
  926. uint32 b = src[xi + 1];
  927. dst[0] = BLENDER(a, b, xf);
  928. }
  929. }
  930. void ScaleARGBFilterCols64_C(uint8* dst_argb,
  931. const uint8* src_argb,
  932. int dst_width,
  933. int x32,
  934. int dx) {
  935. int64 x = (int64)(x32);
  936. const uint32* src = (const uint32*)(src_argb);
  937. uint32* dst = (uint32*)(dst_argb);
  938. int j;
  939. for (j = 0; j < dst_width - 1; j += 2) {
  940. int64 xi = x >> 16;
  941. int xf = (x >> 9) & 0x7f;
  942. uint32 a = src[xi];
  943. uint32 b = src[xi + 1];
  944. dst[0] = BLENDER(a, b, xf);
  945. x += dx;
  946. xi = x >> 16;
  947. xf = (x >> 9) & 0x7f;
  948. a = src[xi];
  949. b = src[xi + 1];
  950. dst[1] = BLENDER(a, b, xf);
  951. x += dx;
  952. dst += 2;
  953. }
  954. if (dst_width & 1) {
  955. int64 xi = x >> 16;
  956. int xf = (x >> 9) & 0x7f;
  957. uint32 a = src[xi];
  958. uint32 b = src[xi + 1];
  959. dst[0] = BLENDER(a, b, xf);
  960. }
  961. }
  962. #undef BLENDER1
  963. #undef BLENDERC
  964. #undef BLENDER
  965. // Scale plane vertically with bilinear interpolation.
  966. void ScalePlaneVertical(int src_height,
  967. int dst_width,
  968. int dst_height,
  969. int src_stride,
  970. int dst_stride,
  971. const uint8* src_argb,
  972. uint8* dst_argb,
  973. int x,
  974. int y,
  975. int dy,
  976. int bpp,
  977. enum FilterMode filtering) {
  978. // TODO(fbarchard): Allow higher bpp.
  979. int dst_width_bytes = dst_width * bpp;
  980. void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
  981. ptrdiff_t src_stride, int dst_width,
  982. int source_y_fraction) = InterpolateRow_C;
  983. const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
  984. int j;
  985. assert(bpp >= 1 && bpp <= 4);
  986. assert(src_height != 0);
  987. assert(dst_width > 0);
  988. assert(dst_height > 0);
  989. src_argb += (x >> 16) * bpp;
  990. #if defined(HAS_INTERPOLATEROW_SSSE3)
  991. if (TestCpuFlag(kCpuHasSSSE3)) {
  992. InterpolateRow = InterpolateRow_Any_SSSE3;
  993. if (IS_ALIGNED(dst_width_bytes, 16)) {
  994. InterpolateRow = InterpolateRow_SSSE3;
  995. }
  996. }
  997. #endif
  998. #if defined(HAS_INTERPOLATEROW_AVX2)
  999. if (TestCpuFlag(kCpuHasAVX2)) {
  1000. InterpolateRow = InterpolateRow_Any_AVX2;
  1001. if (IS_ALIGNED(dst_width_bytes, 32)) {
  1002. InterpolateRow = InterpolateRow_AVX2;
  1003. }
  1004. }
  1005. #endif
  1006. #if defined(HAS_INTERPOLATEROW_NEON)
  1007. if (TestCpuFlag(kCpuHasNEON)) {
  1008. InterpolateRow = InterpolateRow_Any_NEON;
  1009. if (IS_ALIGNED(dst_width_bytes, 16)) {
  1010. InterpolateRow = InterpolateRow_NEON;
  1011. }
  1012. }
  1013. #endif
  1014. #if defined(HAS_INTERPOLATEROW_DSPR2)
  1015. if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
  1016. IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
  1017. IS_ALIGNED(dst_stride, 4)) {
  1018. InterpolateRow = InterpolateRow_Any_DSPR2;
  1019. if (IS_ALIGNED(dst_width_bytes, 4)) {
  1020. InterpolateRow = InterpolateRow_DSPR2;
  1021. }
  1022. }
  1023. #endif
  1024. #if defined(HAS_INTERPOLATEROW_MSA)
  1025. if (TestCpuFlag(kCpuHasMSA)) {
  1026. InterpolateRow = InterpolateRow_Any_MSA;
  1027. if (IS_ALIGNED(dst_width_bytes, 32)) {
  1028. InterpolateRow = InterpolateRow_MSA;
  1029. }
  1030. }
  1031. #endif
  1032. for (j = 0; j < dst_height; ++j) {
  1033. int yi;
  1034. int yf;
  1035. if (y > max_y) {
  1036. y = max_y;
  1037. }
  1038. yi = y >> 16;
  1039. yf = filtering ? ((y >> 8) & 255) : 0;
  1040. InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
  1041. dst_width_bytes, yf);
  1042. dst_argb += dst_stride;
  1043. y += dy;
  1044. }
  1045. }
  1046. void ScalePlaneVertical_16(int src_height,
  1047. int dst_width,
  1048. int dst_height,
  1049. int src_stride,
  1050. int dst_stride,
  1051. const uint16* src_argb,
  1052. uint16* dst_argb,
  1053. int x,
  1054. int y,
  1055. int dy,
  1056. int wpp,
  1057. enum FilterMode filtering) {
  1058. // TODO(fbarchard): Allow higher wpp.
  1059. int dst_width_words = dst_width * wpp;
  1060. void (*InterpolateRow)(uint16 * dst_argb, const uint16* src_argb,
  1061. ptrdiff_t src_stride, int dst_width,
  1062. int source_y_fraction) = InterpolateRow_16_C;
  1063. const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
  1064. int j;
  1065. assert(wpp >= 1 && wpp <= 2);
  1066. assert(src_height != 0);
  1067. assert(dst_width > 0);
  1068. assert(dst_height > 0);
  1069. src_argb += (x >> 16) * wpp;
  1070. #if defined(HAS_INTERPOLATEROW_16_SSE2)
  1071. if (TestCpuFlag(kCpuHasSSE2)) {
  1072. InterpolateRow = InterpolateRow_Any_16_SSE2;
  1073. if (IS_ALIGNED(dst_width_bytes, 16)) {
  1074. InterpolateRow = InterpolateRow_16_SSE2;
  1075. }
  1076. }
  1077. #endif
  1078. #if defined(HAS_INTERPOLATEROW_16_SSSE3)
  1079. if (TestCpuFlag(kCpuHasSSSE3)) {
  1080. InterpolateRow = InterpolateRow_Any_16_SSSE3;
  1081. if (IS_ALIGNED(dst_width_bytes, 16)) {
  1082. InterpolateRow = InterpolateRow_16_SSSE3;
  1083. }
  1084. }
  1085. #endif
  1086. #if defined(HAS_INTERPOLATEROW_16_AVX2)
  1087. if (TestCpuFlag(kCpuHasAVX2)) {
  1088. InterpolateRow = InterpolateRow_Any_16_AVX2;
  1089. if (IS_ALIGNED(dst_width_bytes, 32)) {
  1090. InterpolateRow = InterpolateRow_16_AVX2;
  1091. }
  1092. }
  1093. #endif
  1094. #if defined(HAS_INTERPOLATEROW_16_NEON)
  1095. if (TestCpuFlag(kCpuHasNEON)) {
  1096. InterpolateRow = InterpolateRow_Any_16_NEON;
  1097. if (IS_ALIGNED(dst_width_bytes, 16)) {
  1098. InterpolateRow = InterpolateRow_16_NEON;
  1099. }
  1100. }
  1101. #endif
  1102. #if defined(HAS_INTERPOLATEROW_16_DSPR2)
  1103. if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
  1104. IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
  1105. IS_ALIGNED(dst_stride, 4)) {
  1106. InterpolateRow = InterpolateRow_Any_16_DSPR2;
  1107. if (IS_ALIGNED(dst_width_bytes, 4)) {
  1108. InterpolateRow = InterpolateRow_16_DSPR2;
  1109. }
  1110. }
  1111. #endif
  1112. for (j = 0; j < dst_height; ++j) {
  1113. int yi;
  1114. int yf;
  1115. if (y > max_y) {
  1116. y = max_y;
  1117. }
  1118. yi = y >> 16;
  1119. yf = filtering ? ((y >> 8) & 255) : 0;
  1120. InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
  1121. dst_width_words, yf);
  1122. dst_argb += dst_stride;
  1123. y += dy;
  1124. }
  1125. }
  1126. // Simplify the filtering based on scale factors.
  1127. enum FilterMode ScaleFilterReduce(int src_width,
  1128. int src_height,
  1129. int dst_width,
  1130. int dst_height,
  1131. enum FilterMode filtering) {
  1132. if (src_width < 0) {
  1133. src_width = -src_width;
  1134. }
  1135. if (src_height < 0) {
  1136. src_height = -src_height;
  1137. }
  1138. if (filtering == kFilterBox) {
  1139. // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
  1140. if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
  1141. filtering = kFilterBilinear;
  1142. }
  1143. }
  1144. if (filtering == kFilterBilinear) {
  1145. if (src_height == 1) {
  1146. filtering = kFilterLinear;
  1147. }
  1148. // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
  1149. if (dst_height == src_height || dst_height * 3 == src_height) {
  1150. filtering = kFilterLinear;
  1151. }
  1152. // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
  1153. // avoid reading 2 pixels horizontally that causes memory exception.
  1154. if (src_width == 1) {
  1155. filtering = kFilterNone;
  1156. }
  1157. }
  1158. if (filtering == kFilterLinear) {
  1159. if (src_width == 1) {
  1160. filtering = kFilterNone;
  1161. }
  1162. // TODO(fbarchard): Detect any odd scale factor and reduce to None.
  1163. if (dst_width == src_width || dst_width * 3 == src_width) {
  1164. filtering = kFilterNone;
  1165. }
  1166. }
  1167. return filtering;
  1168. }
  1169. // Divide num by div and return as 16.16 fixed point result.
  1170. int FixedDiv_C(int num, int div) {
  1171. return (int)(((int64)(num) << 16) / div);
  1172. }
  1173. // Divide num by div and return as 16.16 fixed point result.
  1174. int FixedDiv1_C(int num, int div) {
  1175. return (int)((((int64)(num) << 16) - 0x00010001) / (div - 1));
  1176. }
  1177. #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
  1178. // Compute slope values for stepping.
  1179. void ScaleSlope(int src_width,
  1180. int src_height,
  1181. int dst_width,
  1182. int dst_height,
  1183. enum FilterMode filtering,
  1184. int* x,
  1185. int* y,
  1186. int* dx,
  1187. int* dy) {
  1188. assert(x != NULL);
  1189. assert(y != NULL);
  1190. assert(dx != NULL);
  1191. assert(dy != NULL);
  1192. assert(src_width != 0);
  1193. assert(src_height != 0);
  1194. assert(dst_width > 0);
  1195. assert(dst_height > 0);
  1196. // Check for 1 pixel and avoid FixedDiv overflow.
  1197. if (dst_width == 1 && src_width >= 32768) {
  1198. dst_width = src_width;
  1199. }
  1200. if (dst_height == 1 && src_height >= 32768) {
  1201. dst_height = src_height;
  1202. }
  1203. if (filtering == kFilterBox) {
  1204. // Scale step for point sampling duplicates all pixels equally.
  1205. *dx = FixedDiv(Abs(src_width), dst_width);
  1206. *dy = FixedDiv(src_height, dst_height);
  1207. *x = 0;
  1208. *y = 0;
  1209. } else if (filtering == kFilterBilinear) {
  1210. // Scale step for bilinear sampling renders last pixel once for upsample.
  1211. if (dst_width <= Abs(src_width)) {
  1212. *dx = FixedDiv(Abs(src_width), dst_width);
  1213. *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
  1214. } else if (dst_width > 1) {
  1215. *dx = FixedDiv1(Abs(src_width), dst_width);
  1216. *x = 0;
  1217. }
  1218. if (dst_height <= src_height) {
  1219. *dy = FixedDiv(src_height, dst_height);
  1220. *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
  1221. } else if (dst_height > 1) {
  1222. *dy = FixedDiv1(src_height, dst_height);
  1223. *y = 0;
  1224. }
  1225. } else if (filtering == kFilterLinear) {
  1226. // Scale step for bilinear sampling renders last pixel once for upsample.
  1227. if (dst_width <= Abs(src_width)) {
  1228. *dx = FixedDiv(Abs(src_width), dst_width);
  1229. *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
  1230. } else if (dst_width > 1) {
  1231. *dx = FixedDiv1(Abs(src_width), dst_width);
  1232. *x = 0;
  1233. }
  1234. *dy = FixedDiv(src_height, dst_height);
  1235. *y = *dy >> 1;
  1236. } else {
  1237. // Scale step for point sampling duplicates all pixels equally.
  1238. *dx = FixedDiv(Abs(src_width), dst_width);
  1239. *dy = FixedDiv(src_height, dst_height);
  1240. *x = CENTERSTART(*dx, 0);
  1241. *y = CENTERSTART(*dy, 0);
  1242. }
  1243. // Negative src_width means horizontally mirror.
  1244. if (src_width < 0) {
  1245. *x += (dst_width - 1) * *dx;
  1246. *dx = -*dx;
  1247. // src_width = -src_width; // Caller must do this.
  1248. }
  1249. }
  1250. #undef CENTERSTART
  1251. // Read 8x2 upsample with filtering and write 16x1.
  1252. // actually reads an extra pixel, so 9x2.
  1253. void ScaleRowUp2_16_C(const uint16* src_ptr,
  1254. ptrdiff_t src_stride,
  1255. uint16* dst,
  1256. int dst_width) {
  1257. const uint16* src2 = src_ptr + src_stride;
  1258. int x;
  1259. for (x = 0; x < dst_width - 1; x += 2) {
  1260. uint16 p0 = src_ptr[0];
  1261. uint16 p1 = src_ptr[1];
  1262. uint16 p2 = src2[0];
  1263. uint16 p3 = src2[1];
  1264. dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
  1265. dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4;
  1266. ++src_ptr;
  1267. ++src2;
  1268. dst += 2;
  1269. }
  1270. if (dst_width & 1) {
  1271. uint16 p0 = src_ptr[0];
  1272. uint16 p1 = src_ptr[1];
  1273. uint16 p2 = src2[0];
  1274. uint16 p3 = src2[1];
  1275. dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
  1276. }
  1277. }
  1278. #ifdef __cplusplus
  1279. } // extern "C"
  1280. } // namespace libyuv
  1281. #endif