scale_argb.cc 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include <assert.h>
  12. #include <string.h>
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/planar_functions.h" // For CopyARGB
  15. #include "libyuv/row.h"
  16. #include "libyuv/scale_row.h"
  17. #ifdef __cplusplus
  18. namespace libyuv {
  19. extern "C" {
  20. #endif
  21. static __inline int Abs(int v) {
  22. return v >= 0 ? v : -v;
  23. }
  24. // ScaleARGB ARGB, 1/2
  25. // This is an optimized version for scaling down a ARGB to 1/2 of
  26. // its original size.
  27. static void ScaleARGBDown2(int src_width,
  28. int src_height,
  29. int dst_width,
  30. int dst_height,
  31. int src_stride,
  32. int dst_stride,
  33. const uint8* src_argb,
  34. uint8* dst_argb,
  35. int x,
  36. int dx,
  37. int y,
  38. int dy,
  39. enum FilterMode filtering) {
  40. int j;
  41. int row_stride = src_stride * (dy >> 16);
  42. void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
  43. uint8* dst_argb, int dst_width) =
  44. filtering == kFilterNone
  45. ? ScaleARGBRowDown2_C
  46. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C
  47. : ScaleARGBRowDown2Box_C);
  48. (void)src_width;
  49. (void)src_height;
  50. (void)dx;
  51. assert(dx == 65536 * 2); // Test scale factor of 2.
  52. assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
  53. // Advance to odd row, even column.
  54. if (filtering == kFilterBilinear) {
  55. src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  56. } else {
  57. src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
  58. }
  59. #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
  60. if (TestCpuFlag(kCpuHasSSE2)) {
  61. ScaleARGBRowDown2 =
  62. filtering == kFilterNone
  63. ? ScaleARGBRowDown2_Any_SSE2
  64. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2
  65. : ScaleARGBRowDown2Box_Any_SSE2);
  66. if (IS_ALIGNED(dst_width, 4)) {
  67. ScaleARGBRowDown2 =
  68. filtering == kFilterNone
  69. ? ScaleARGBRowDown2_SSE2
  70. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2
  71. : ScaleARGBRowDown2Box_SSE2);
  72. }
  73. }
  74. #endif
  75. #if defined(HAS_SCALEARGBROWDOWN2_NEON)
  76. if (TestCpuFlag(kCpuHasNEON)) {
  77. ScaleARGBRowDown2 =
  78. filtering == kFilterNone
  79. ? ScaleARGBRowDown2_Any_NEON
  80. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON
  81. : ScaleARGBRowDown2Box_Any_NEON);
  82. if (IS_ALIGNED(dst_width, 8)) {
  83. ScaleARGBRowDown2 =
  84. filtering == kFilterNone
  85. ? ScaleARGBRowDown2_NEON
  86. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON
  87. : ScaleARGBRowDown2Box_NEON);
  88. }
  89. }
  90. #endif
  91. #if defined(HAS_SCALEARGBROWDOWN2_MSA)
  92. if (TestCpuFlag(kCpuHasMSA)) {
  93. ScaleARGBRowDown2 =
  94. filtering == kFilterNone
  95. ? ScaleARGBRowDown2_Any_MSA
  96. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA
  97. : ScaleARGBRowDown2Box_Any_MSA);
  98. if (IS_ALIGNED(dst_width, 4)) {
  99. ScaleARGBRowDown2 =
  100. filtering == kFilterNone
  101. ? ScaleARGBRowDown2_MSA
  102. : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA
  103. : ScaleARGBRowDown2Box_MSA);
  104. }
  105. }
  106. #endif
  107. if (filtering == kFilterLinear) {
  108. src_stride = 0;
  109. }
  110. for (j = 0; j < dst_height; ++j) {
  111. ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
  112. src_argb += row_stride;
  113. dst_argb += dst_stride;
  114. }
  115. }
  116. // ScaleARGB ARGB, 1/4
  117. // This is an optimized version for scaling down a ARGB to 1/4 of
  118. // its original size.
  119. static void ScaleARGBDown4Box(int src_width,
  120. int src_height,
  121. int dst_width,
  122. int dst_height,
  123. int src_stride,
  124. int dst_stride,
  125. const uint8* src_argb,
  126. uint8* dst_argb,
  127. int x,
  128. int dx,
  129. int y,
  130. int dy) {
  131. int j;
  132. // Allocate 2 rows of ARGB.
  133. const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
  134. align_buffer_64(row, kRowSize * 2);
  135. int row_stride = src_stride * (dy >> 16);
  136. void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
  137. uint8* dst_argb, int dst_width) =
  138. ScaleARGBRowDown2Box_C;
  139. // Advance to odd row, even column.
  140. src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  141. (void)src_width;
  142. (void)src_height;
  143. (void)dx;
  144. assert(dx == 65536 * 4); // Test scale factor of 4.
  145. assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
  146. #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
  147. if (TestCpuFlag(kCpuHasSSE2)) {
  148. ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
  149. if (IS_ALIGNED(dst_width, 4)) {
  150. ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
  151. }
  152. }
  153. #endif
  154. #if defined(HAS_SCALEARGBROWDOWN2_NEON)
  155. if (TestCpuFlag(kCpuHasNEON)) {
  156. ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
  157. if (IS_ALIGNED(dst_width, 8)) {
  158. ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
  159. }
  160. }
  161. #endif
  162. for (j = 0; j < dst_height; ++j) {
  163. ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
  164. ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
  165. dst_width * 2);
  166. ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
  167. src_argb += row_stride;
  168. dst_argb += dst_stride;
  169. }
  170. free_aligned_buffer_64(row);
  171. }
  172. // ScaleARGB ARGB Even
  173. // This is an optimized version for scaling down a ARGB to even
  174. // multiple of its original size.
  175. static void ScaleARGBDownEven(int src_width,
  176. int src_height,
  177. int dst_width,
  178. int dst_height,
  179. int src_stride,
  180. int dst_stride,
  181. const uint8* src_argb,
  182. uint8* dst_argb,
  183. int x,
  184. int dx,
  185. int y,
  186. int dy,
  187. enum FilterMode filtering) {
  188. int j;
  189. int col_step = dx >> 16;
  190. int row_stride = (dy >> 16) * src_stride;
  191. void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
  192. int src_step, uint8* dst_argb, int dst_width) =
  193. filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
  194. (void)src_width;
  195. (void)src_height;
  196. assert(IS_ALIGNED(src_width, 2));
  197. assert(IS_ALIGNED(src_height, 2));
  198. src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  199. #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
  200. if (TestCpuFlag(kCpuHasSSE2)) {
  201. ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
  202. : ScaleARGBRowDownEven_Any_SSE2;
  203. if (IS_ALIGNED(dst_width, 4)) {
  204. ScaleARGBRowDownEven =
  205. filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2;
  206. }
  207. }
  208. #endif
  209. #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
  210. if (TestCpuFlag(kCpuHasNEON)) {
  211. ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON
  212. : ScaleARGBRowDownEven_Any_NEON;
  213. if (IS_ALIGNED(dst_width, 4)) {
  214. ScaleARGBRowDownEven =
  215. filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON;
  216. }
  217. }
  218. #endif
  219. #if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
  220. if (TestCpuFlag(kCpuHasMSA)) {
  221. ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA
  222. : ScaleARGBRowDownEven_Any_MSA;
  223. if (IS_ALIGNED(dst_width, 4)) {
  224. ScaleARGBRowDownEven =
  225. filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA;
  226. }
  227. }
  228. #endif
  229. if (filtering == kFilterLinear) {
  230. src_stride = 0;
  231. }
  232. for (j = 0; j < dst_height; ++j) {
  233. ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
  234. src_argb += row_stride;
  235. dst_argb += dst_stride;
  236. }
  237. }
  238. // Scale ARGB down with bilinear interpolation.
  239. static void ScaleARGBBilinearDown(int src_width,
  240. int src_height,
  241. int dst_width,
  242. int dst_height,
  243. int src_stride,
  244. int dst_stride,
  245. const uint8* src_argb,
  246. uint8* dst_argb,
  247. int x,
  248. int dx,
  249. int y,
  250. int dy,
  251. enum FilterMode filtering) {
  252. int j;
  253. void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
  254. ptrdiff_t src_stride, int dst_width,
  255. int source_y_fraction) = InterpolateRow_C;
  256. void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
  257. int dst_width, int x, int dx) =
  258. (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
  259. int64 xlast = x + (int64)(dst_width - 1) * dx;
  260. int64 xl = (dx >= 0) ? x : xlast;
  261. int64 xr = (dx >= 0) ? xlast : x;
  262. int clip_src_width;
  263. xl = (xl >> 16) & ~3; // Left edge aligned.
  264. xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
  265. xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
  266. if (xr > src_width) {
  267. xr = src_width;
  268. }
  269. clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
  270. src_argb += xl * 4;
  271. x -= (int)(xl << 16);
  272. #if defined(HAS_INTERPOLATEROW_SSSE3)
  273. if (TestCpuFlag(kCpuHasSSSE3)) {
  274. InterpolateRow = InterpolateRow_Any_SSSE3;
  275. if (IS_ALIGNED(clip_src_width, 16)) {
  276. InterpolateRow = InterpolateRow_SSSE3;
  277. }
  278. }
  279. #endif
  280. #if defined(HAS_INTERPOLATEROW_AVX2)
  281. if (TestCpuFlag(kCpuHasAVX2)) {
  282. InterpolateRow = InterpolateRow_Any_AVX2;
  283. if (IS_ALIGNED(clip_src_width, 32)) {
  284. InterpolateRow = InterpolateRow_AVX2;
  285. }
  286. }
  287. #endif
  288. #if defined(HAS_INTERPOLATEROW_NEON)
  289. if (TestCpuFlag(kCpuHasNEON)) {
  290. InterpolateRow = InterpolateRow_Any_NEON;
  291. if (IS_ALIGNED(clip_src_width, 16)) {
  292. InterpolateRow = InterpolateRow_NEON;
  293. }
  294. }
  295. #endif
  296. #if defined(HAS_INTERPOLATEROW_DSPR2)
  297. if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
  298. IS_ALIGNED(src_stride, 4)) {
  299. InterpolateRow = InterpolateRow_Any_DSPR2;
  300. if (IS_ALIGNED(clip_src_width, 4)) {
  301. InterpolateRow = InterpolateRow_DSPR2;
  302. }
  303. }
  304. #endif
  305. #if defined(HAS_INTERPOLATEROW_MSA)
  306. if (TestCpuFlag(kCpuHasMSA)) {
  307. InterpolateRow = InterpolateRow_Any_MSA;
  308. if (IS_ALIGNED(clip_src_width, 32)) {
  309. InterpolateRow = InterpolateRow_MSA;
  310. }
  311. }
  312. #endif
  313. #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
  314. if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  315. ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  316. }
  317. #endif
  318. #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
  319. if (TestCpuFlag(kCpuHasNEON)) {
  320. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
  321. if (IS_ALIGNED(dst_width, 4)) {
  322. ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
  323. }
  324. }
  325. #endif
  326. #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
  327. if (TestCpuFlag(kCpuHasMSA)) {
  328. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
  329. if (IS_ALIGNED(dst_width, 8)) {
  330. ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
  331. }
  332. }
  333. #endif
  334. // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
  335. // Allocate a row of ARGB.
  336. {
  337. align_buffer_64(row, clip_src_width * 4);
  338. const int max_y = (src_height - 1) << 16;
  339. if (y > max_y) {
  340. y = max_y;
  341. }
  342. for (j = 0; j < dst_height; ++j) {
  343. int yi = y >> 16;
  344. const uint8* src = src_argb + yi * src_stride;
  345. if (filtering == kFilterLinear) {
  346. ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
  347. } else {
  348. int yf = (y >> 8) & 255;
  349. InterpolateRow(row, src, src_stride, clip_src_width, yf);
  350. ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
  351. }
  352. dst_argb += dst_stride;
  353. y += dy;
  354. if (y > max_y) {
  355. y = max_y;
  356. }
  357. }
  358. free_aligned_buffer_64(row);
  359. }
  360. }
  361. // Scale ARGB up with bilinear interpolation.
  362. static void ScaleARGBBilinearUp(int src_width,
  363. int src_height,
  364. int dst_width,
  365. int dst_height,
  366. int src_stride,
  367. int dst_stride,
  368. const uint8* src_argb,
  369. uint8* dst_argb,
  370. int x,
  371. int dx,
  372. int y,
  373. int dy,
  374. enum FilterMode filtering) {
  375. int j;
  376. void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
  377. ptrdiff_t src_stride, int dst_width,
  378. int source_y_fraction) = InterpolateRow_C;
  379. void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
  380. int dst_width, int x, int dx) =
  381. filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
  382. const int max_y = (src_height - 1) << 16;
  383. #if defined(HAS_INTERPOLATEROW_SSSE3)
  384. if (TestCpuFlag(kCpuHasSSSE3)) {
  385. InterpolateRow = InterpolateRow_Any_SSSE3;
  386. if (IS_ALIGNED(dst_width, 4)) {
  387. InterpolateRow = InterpolateRow_SSSE3;
  388. }
  389. }
  390. #endif
  391. #if defined(HAS_INTERPOLATEROW_AVX2)
  392. if (TestCpuFlag(kCpuHasAVX2)) {
  393. InterpolateRow = InterpolateRow_Any_AVX2;
  394. if (IS_ALIGNED(dst_width, 8)) {
  395. InterpolateRow = InterpolateRow_AVX2;
  396. }
  397. }
  398. #endif
  399. #if defined(HAS_INTERPOLATEROW_NEON)
  400. if (TestCpuFlag(kCpuHasNEON)) {
  401. InterpolateRow = InterpolateRow_Any_NEON;
  402. if (IS_ALIGNED(dst_width, 4)) {
  403. InterpolateRow = InterpolateRow_NEON;
  404. }
  405. }
  406. #endif
  407. #if defined(HAS_INTERPOLATEROW_DSPR2)
  408. if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) &&
  409. IS_ALIGNED(dst_stride, 4)) {
  410. InterpolateRow = InterpolateRow_DSPR2;
  411. }
  412. #endif
  413. #if defined(HAS_INTERPOLATEROW_MSA)
  414. if (TestCpuFlag(kCpuHasMSA)) {
  415. InterpolateRow = InterpolateRow_Any_MSA;
  416. if (IS_ALIGNED(dst_width, 8)) {
  417. InterpolateRow = InterpolateRow_MSA;
  418. }
  419. }
  420. #endif
  421. if (src_width >= 32768) {
  422. ScaleARGBFilterCols =
  423. filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
  424. }
  425. #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
  426. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  427. ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  428. }
  429. #endif
  430. #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
  431. if (filtering && TestCpuFlag(kCpuHasNEON)) {
  432. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
  433. if (IS_ALIGNED(dst_width, 4)) {
  434. ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
  435. }
  436. }
  437. #endif
  438. #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
  439. if (filtering && TestCpuFlag(kCpuHasMSA)) {
  440. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
  441. if (IS_ALIGNED(dst_width, 8)) {
  442. ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
  443. }
  444. }
  445. #endif
  446. #if defined(HAS_SCALEARGBCOLS_SSE2)
  447. if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
  448. ScaleARGBFilterCols = ScaleARGBCols_SSE2;
  449. }
  450. #endif
  451. #if defined(HAS_SCALEARGBCOLS_NEON)
  452. if (!filtering && TestCpuFlag(kCpuHasNEON)) {
  453. ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
  454. if (IS_ALIGNED(dst_width, 8)) {
  455. ScaleARGBFilterCols = ScaleARGBCols_NEON;
  456. }
  457. }
  458. #endif
  459. #if defined(HAS_SCALEARGBCOLS_MSA)
  460. if (!filtering && TestCpuFlag(kCpuHasMSA)) {
  461. ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
  462. if (IS_ALIGNED(dst_width, 4)) {
  463. ScaleARGBFilterCols = ScaleARGBCols_MSA;
  464. }
  465. }
  466. #endif
  467. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  468. ScaleARGBFilterCols = ScaleARGBColsUp2_C;
  469. #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
  470. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  471. ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
  472. }
  473. #endif
  474. }
  475. if (y > max_y) {
  476. y = max_y;
  477. }
  478. {
  479. int yi = y >> 16;
  480. const uint8* src = src_argb + yi * src_stride;
  481. // Allocate 2 rows of ARGB.
  482. const int kRowSize = (dst_width * 4 + 31) & ~31;
  483. align_buffer_64(row, kRowSize * 2);
  484. uint8* rowptr = row;
  485. int rowstride = kRowSize;
  486. int lasty = yi;
  487. ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
  488. if (src_height > 1) {
  489. src += src_stride;
  490. }
  491. ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
  492. src += src_stride;
  493. for (j = 0; j < dst_height; ++j) {
  494. yi = y >> 16;
  495. if (yi != lasty) {
  496. if (y > max_y) {
  497. y = max_y;
  498. yi = y >> 16;
  499. src = src_argb + yi * src_stride;
  500. }
  501. if (yi != lasty) {
  502. ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
  503. rowptr += rowstride;
  504. rowstride = -rowstride;
  505. lasty = yi;
  506. src += src_stride;
  507. }
  508. }
  509. if (filtering == kFilterLinear) {
  510. InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
  511. } else {
  512. int yf = (y >> 8) & 255;
  513. InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
  514. }
  515. dst_argb += dst_stride;
  516. y += dy;
  517. }
  518. free_aligned_buffer_64(row);
  519. }
  520. }
  521. #ifdef YUVSCALEUP
  522. // Scale YUV to ARGB up with bilinear interpolation.
  523. static void ScaleYUVToARGBBilinearUp(int src_width,
  524. int src_height,
  525. int dst_width,
  526. int dst_height,
  527. int src_stride_y,
  528. int src_stride_u,
  529. int src_stride_v,
  530. int dst_stride_argb,
  531. const uint8* src_y,
  532. const uint8* src_u,
  533. const uint8* src_v,
  534. uint8* dst_argb,
  535. int x,
  536. int dx,
  537. int y,
  538. int dy,
  539. enum FilterMode filtering) {
  540. int j;
  541. void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
  542. const uint8* v_buf, uint8* rgb_buf, int width) =
  543. I422ToARGBRow_C;
  544. #if defined(HAS_I422TOARGBROW_SSSE3)
  545. if (TestCpuFlag(kCpuHasSSSE3)) {
  546. I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
  547. if (IS_ALIGNED(src_width, 8)) {
  548. I422ToARGBRow = I422ToARGBRow_SSSE3;
  549. }
  550. }
  551. #endif
  552. #if defined(HAS_I422TOARGBROW_AVX2)
  553. if (TestCpuFlag(kCpuHasAVX2)) {
  554. I422ToARGBRow = I422ToARGBRow_Any_AVX2;
  555. if (IS_ALIGNED(src_width, 16)) {
  556. I422ToARGBRow = I422ToARGBRow_AVX2;
  557. }
  558. }
  559. #endif
  560. #if defined(HAS_I422TOARGBROW_NEON)
  561. if (TestCpuFlag(kCpuHasNEON)) {
  562. I422ToARGBRow = I422ToARGBRow_Any_NEON;
  563. if (IS_ALIGNED(src_width, 8)) {
  564. I422ToARGBRow = I422ToARGBRow_NEON;
  565. }
  566. }
  567. #endif
  568. #if defined(HAS_I422TOARGBROW_DSPR2)
  569. if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) &&
  570. IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
  571. IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
  572. IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
  573. IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
  574. I422ToARGBRow = I422ToARGBRow_DSPR2;
  575. }
  576. #endif
  577. #if defined(HAS_I422TOARGBROW_MSA)
  578. if (TestCpuFlag(kCpuHasMSA)) {
  579. I422ToARGBRow = I422ToARGBRow_Any_MSA;
  580. if (IS_ALIGNED(src_width, 8)) {
  581. I422ToARGBRow = I422ToARGBRow_MSA;
  582. }
  583. }
  584. #endif
  585. void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
  586. ptrdiff_t src_stride, int dst_width,
  587. int source_y_fraction) = InterpolateRow_C;
  588. #if defined(HAS_INTERPOLATEROW_SSSE3)
  589. if (TestCpuFlag(kCpuHasSSSE3)) {
  590. InterpolateRow = InterpolateRow_Any_SSSE3;
  591. if (IS_ALIGNED(dst_width, 4)) {
  592. InterpolateRow = InterpolateRow_SSSE3;
  593. }
  594. }
  595. #endif
  596. #if defined(HAS_INTERPOLATEROW_AVX2)
  597. if (TestCpuFlag(kCpuHasAVX2)) {
  598. InterpolateRow = InterpolateRow_Any_AVX2;
  599. if (IS_ALIGNED(dst_width, 8)) {
  600. InterpolateRow = InterpolateRow_AVX2;
  601. }
  602. }
  603. #endif
  604. #if defined(HAS_INTERPOLATEROW_NEON)
  605. if (TestCpuFlag(kCpuHasNEON)) {
  606. InterpolateRow = InterpolateRow_Any_NEON;
  607. if (IS_ALIGNED(dst_width, 4)) {
  608. InterpolateRow = InterpolateRow_NEON;
  609. }
  610. }
  611. #endif
  612. #if defined(HAS_INTERPOLATEROW_DSPR2)
  613. if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) &&
  614. IS_ALIGNED(dst_stride_argb, 4)) {
  615. InterpolateRow = InterpolateRow_DSPR2;
  616. }
  617. #endif
  618. #if defined(HAS_INTERPOLATEROW_MSA)
  619. if (TestCpuFlag(kCpuHasMSA)) {
  620. InterpolateRow = InterpolateRow_Any_MSA;
  621. if (IS_ALIGNED(dst_width, 8)) {
  622. InterpolateRow = InterpolateRow_MSA;
  623. }
  624. }
  625. #endif
  626. void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
  627. int dst_width, int x, int dx) =
  628. filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
  629. if (src_width >= 32768) {
  630. ScaleARGBFilterCols =
  631. filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
  632. }
  633. #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
  634. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  635. ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  636. }
  637. #endif
  638. #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
  639. if (filtering && TestCpuFlag(kCpuHasNEON)) {
  640. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
  641. if (IS_ALIGNED(dst_width, 4)) {
  642. ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
  643. }
  644. }
  645. #endif
  646. #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
  647. if (filtering && TestCpuFlag(kCpuHasMSA)) {
  648. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
  649. if (IS_ALIGNED(dst_width, 8)) {
  650. ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
  651. }
  652. }
  653. #endif
  654. #if defined(HAS_SCALEARGBCOLS_SSE2)
  655. if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
  656. ScaleARGBFilterCols = ScaleARGBCols_SSE2;
  657. }
  658. #endif
  659. #if defined(HAS_SCALEARGBCOLS_NEON)
  660. if (!filtering && TestCpuFlag(kCpuHasNEON)) {
  661. ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
  662. if (IS_ALIGNED(dst_width, 8)) {
  663. ScaleARGBFilterCols = ScaleARGBCols_NEON;
  664. }
  665. }
  666. #endif
  667. #if defined(HAS_SCALEARGBCOLS_MSA)
  668. if (!filtering && TestCpuFlag(kCpuHasMSA)) {
  669. ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
  670. if (IS_ALIGNED(dst_width, 4)) {
  671. ScaleARGBFilterCols = ScaleARGBCols_MSA;
  672. }
  673. }
  674. #endif
  675. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  676. ScaleARGBFilterCols = ScaleARGBColsUp2_C;
  677. #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
  678. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  679. ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
  680. }
  681. #endif
  682. }
  683. const int max_y = (src_height - 1) << 16;
  684. if (y > max_y) {
  685. y = max_y;
  686. }
  687. const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
  688. int yi = y >> 16;
  689. int uv_yi = yi >> kYShift;
  690. const uint8* src_row_y = src_y + yi * src_stride_y;
  691. const uint8* src_row_u = src_u + uv_yi * src_stride_u;
  692. const uint8* src_row_v = src_v + uv_yi * src_stride_v;
  693. // Allocate 2 rows of ARGB.
  694. const int kRowSize = (dst_width * 4 + 31) & ~31;
  695. align_buffer_64(row, kRowSize * 2);
  696. // Allocate 1 row of ARGB for source conversion.
  697. align_buffer_64(argb_row, src_width * 4);
  698. uint8* rowptr = row;
  699. int rowstride = kRowSize;
  700. int lasty = yi;
  701. // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
  702. ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
  703. if (src_height > 1) {
  704. src_row_y += src_stride_y;
  705. if (yi & 1) {
  706. src_row_u += src_stride_u;
  707. src_row_v += src_stride_v;
  708. }
  709. }
  710. ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
  711. if (src_height > 2) {
  712. src_row_y += src_stride_y;
  713. if (!(yi & 1)) {
  714. src_row_u += src_stride_u;
  715. src_row_v += src_stride_v;
  716. }
  717. }
  718. for (j = 0; j < dst_height; ++j) {
  719. yi = y >> 16;
  720. if (yi != lasty) {
  721. if (y > max_y) {
  722. y = max_y;
  723. yi = y >> 16;
  724. uv_yi = yi >> kYShift;
  725. src_row_y = src_y + yi * src_stride_y;
  726. src_row_u = src_u + uv_yi * src_stride_u;
  727. src_row_v = src_v + uv_yi * src_stride_v;
  728. }
  729. if (yi != lasty) {
  730. // TODO(fbarchard): Convert the clipped region of row.
  731. I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
  732. ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
  733. rowptr += rowstride;
  734. rowstride = -rowstride;
  735. lasty = yi;
  736. src_row_y += src_stride_y;
  737. if (yi & 1) {
  738. src_row_u += src_stride_u;
  739. src_row_v += src_stride_v;
  740. }
  741. }
  742. }
  743. if (filtering == kFilterLinear) {
  744. InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
  745. } else {
  746. int yf = (y >> 8) & 255;
  747. InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
  748. }
  749. dst_argb += dst_stride_argb;
  750. y += dy;
  751. }
  752. free_aligned_buffer_64(row);
  753. free_aligned_buffer_64(row_argb);
  754. }
  755. #endif
  756. // Scale ARGB to/from any dimensions, without interpolation.
  757. // Fixed point math is used for performance: The upper 16 bits
  758. // of x and dx is the integer part of the source position and
  759. // the lower 16 bits are the fixed decimal part.
  760. static void ScaleARGBSimple(int src_width,
  761. int src_height,
  762. int dst_width,
  763. int dst_height,
  764. int src_stride,
  765. int dst_stride,
  766. const uint8* src_argb,
  767. uint8* dst_argb,
  768. int x,
  769. int dx,
  770. int y,
  771. int dy) {
  772. int j;
  773. void (*ScaleARGBCols)(uint8 * dst_argb, const uint8* src_argb, int dst_width,
  774. int x, int dx) =
  775. (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
  776. (void)src_height;
  777. #if defined(HAS_SCALEARGBCOLS_SSE2)
  778. if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
  779. ScaleARGBCols = ScaleARGBCols_SSE2;
  780. }
  781. #endif
  782. #if defined(HAS_SCALEARGBCOLS_NEON)
  783. if (TestCpuFlag(kCpuHasNEON)) {
  784. ScaleARGBCols = ScaleARGBCols_Any_NEON;
  785. if (IS_ALIGNED(dst_width, 8)) {
  786. ScaleARGBCols = ScaleARGBCols_NEON;
  787. }
  788. }
  789. #endif
  790. #if defined(HAS_SCALEARGBCOLS_MSA)
  791. if (TestCpuFlag(kCpuHasMSA)) {
  792. ScaleARGBCols = ScaleARGBCols_Any_MSA;
  793. if (IS_ALIGNED(dst_width, 4)) {
  794. ScaleARGBCols = ScaleARGBCols_MSA;
  795. }
  796. }
  797. #endif
  798. if (src_width * 2 == dst_width && x < 0x8000) {
  799. ScaleARGBCols = ScaleARGBColsUp2_C;
  800. #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
  801. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  802. ScaleARGBCols = ScaleARGBColsUp2_SSE2;
  803. }
  804. #endif
  805. }
  806. for (j = 0; j < dst_height; ++j) {
  807. ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
  808. dx);
  809. dst_argb += dst_stride;
  810. y += dy;
  811. }
  812. }
  813. // ScaleARGB a ARGB.
  814. // This function in turn calls a scaling function
  815. // suitable for handling the desired resolutions.
  816. static void ScaleARGB(const uint8* src,
  817. int src_stride,
  818. int src_width,
  819. int src_height,
  820. uint8* dst,
  821. int dst_stride,
  822. int dst_width,
  823. int dst_height,
  824. int clip_x,
  825. int clip_y,
  826. int clip_width,
  827. int clip_height,
  828. enum FilterMode filtering) {
  829. // Initial source x/y coordinate and step values as 16.16 fixed point.
  830. int x = 0;
  831. int y = 0;
  832. int dx = 0;
  833. int dy = 0;
  834. // ARGB does not support box filter yet, but allow the user to pass it.
  835. // Simplify filtering when possible.
  836. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
  837. filtering);
  838. // Negative src_height means invert the image.
  839. if (src_height < 0) {
  840. src_height = -src_height;
  841. src = src + (src_height - 1) * src_stride;
  842. src_stride = -src_stride;
  843. }
  844. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
  845. &dx, &dy);
  846. src_width = Abs(src_width);
  847. if (clip_x) {
  848. int64 clipf = (int64)(clip_x)*dx;
  849. x += (clipf & 0xffff);
  850. src += (clipf >> 16) * 4;
  851. dst += clip_x * 4;
  852. }
  853. if (clip_y) {
  854. int64 clipf = (int64)(clip_y)*dy;
  855. y += (clipf & 0xffff);
  856. src += (clipf >> 16) * src_stride;
  857. dst += clip_y * dst_stride;
  858. }
  859. // Special case for integer step values.
  860. if (((dx | dy) & 0xffff) == 0) {
  861. if (!dx || !dy) { // 1 pixel wide and/or tall.
  862. filtering = kFilterNone;
  863. } else {
  864. // Optimized even scale down. ie 2, 4, 6, 8, 10x.
  865. if (!(dx & 0x10000) && !(dy & 0x10000)) {
  866. if (dx == 0x20000) {
  867. // Optimized 1/2 downsample.
  868. ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
  869. src_stride, dst_stride, src, dst, x, dx, y, dy,
  870. filtering);
  871. return;
  872. }
  873. if (dx == 0x40000 && filtering == kFilterBox) {
  874. // Optimized 1/4 box downsample.
  875. ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
  876. src_stride, dst_stride, src, dst, x, dx, y, dy);
  877. return;
  878. }
  879. ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
  880. src_stride, dst_stride, src, dst, x, dx, y, dy,
  881. filtering);
  882. return;
  883. }
  884. // Optimized odd scale down. ie 3, 5, 7, 9x.
  885. if ((dx & 0x10000) && (dy & 0x10000)) {
  886. filtering = kFilterNone;
  887. if (dx == 0x10000 && dy == 0x10000) {
  888. // Straight copy.
  889. ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
  890. dst, dst_stride, clip_width, clip_height);
  891. return;
  892. }
  893. }
  894. }
  895. }
  896. if (dx == 0x10000 && (x & 0xffff) == 0) {
  897. // Arbitrary scale vertically, but unscaled vertically.
  898. ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
  899. dst_stride, src, dst, x, y, dy, 4, filtering);
  900. return;
  901. }
  902. if (filtering && dy < 65536) {
  903. ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
  904. src_stride, dst_stride, src, dst, x, dx, y, dy,
  905. filtering);
  906. return;
  907. }
  908. if (filtering) {
  909. ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
  910. src_stride, dst_stride, src, dst, x, dx, y, dy,
  911. filtering);
  912. return;
  913. }
  914. ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
  915. dst_stride, src, dst, x, dx, y, dy);
  916. }
  917. LIBYUV_API
  918. int ARGBScaleClip(const uint8* src_argb,
  919. int src_stride_argb,
  920. int src_width,
  921. int src_height,
  922. uint8* dst_argb,
  923. int dst_stride_argb,
  924. int dst_width,
  925. int dst_height,
  926. int clip_x,
  927. int clip_y,
  928. int clip_width,
  929. int clip_height,
  930. enum FilterMode filtering) {
  931. if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb ||
  932. dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 ||
  933. clip_width > 32768 || clip_height > 32768 ||
  934. (clip_x + clip_width) > dst_width ||
  935. (clip_y + clip_height) > dst_height) {
  936. return -1;
  937. }
  938. ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
  939. dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
  940. clip_height, filtering);
  941. return 0;
  942. }
  943. // Scale an ARGB image.
  944. LIBYUV_API
  945. int ARGBScale(const uint8* src_argb,
  946. int src_stride_argb,
  947. int src_width,
  948. int src_height,
  949. uint8* dst_argb,
  950. int dst_stride_argb,
  951. int dst_width,
  952. int dst_height,
  953. enum FilterMode filtering) {
  954. if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 ||
  955. src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
  956. return -1;
  957. }
  958. ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
  959. dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
  960. filtering);
  961. return 0;
  962. }
  963. // Scale with YUV conversion to ARGB and clipping.
  964. LIBYUV_API
  965. int YUVToARGBScaleClip(const uint8* src_y,
  966. int src_stride_y,
  967. const uint8* src_u,
  968. int src_stride_u,
  969. const uint8* src_v,
  970. int src_stride_v,
  971. uint32 src_fourcc,
  972. int src_width,
  973. int src_height,
  974. uint8* dst_argb,
  975. int dst_stride_argb,
  976. uint32 dst_fourcc,
  977. int dst_width,
  978. int dst_height,
  979. int clip_x,
  980. int clip_y,
  981. int clip_width,
  982. int clip_height,
  983. enum FilterMode filtering) {
  984. uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
  985. int r;
  986. (void)src_fourcc; // TODO(fbarchard): implement and/or assert.
  987. (void)dst_fourcc;
  988. I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
  989. argb_buffer, src_width * 4, src_width, src_height);
  990. r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
  991. dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
  992. clip_width, clip_height, filtering);
  993. free(argb_buffer);
  994. return r;
  995. }
  996. #ifdef __cplusplus
  997. } // extern "C"
  998. } // namespace libyuv
  999. #endif