00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 #include <inttypes.h>
00058 #include <string.h>
00059 #include <math.h>
00060 #include <stdio.h>
00061 #include <unistd.h>
00062 #include "config.h"
00063 #include <assert.h>
00064 #ifdef HAVE_SYS_MMAN_H
00065 #include <sys/mman.h>
00066 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
00067 #define MAP_ANONYMOUS MAP_ANON
00068 #endif
00069 #endif
00070 #include "swscale.h"
00071 #include "swscale_internal.h"
00072 #include "rgb2rgb.h"
00073 #include "libavutil/x86_cpu.h"
00074 #include "libavutil/bswap.h"
00075
00076 #undef MOVNTQ
00077 #undef PAVGB
00078
00079
00080
00081
00082
00083
00084 #define DITHER1XBPP
00085
00086 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
00087
00088 #define RET 0xC3 //near return opcode for X86
00089
00090 #ifdef M_PI
00091 #define PI M_PI
00092 #else
00093 #define PI 3.14159265358979323846
00094 #endif
00095
00096 #define isSupportedIn(x) ( \
00097 (x)==PIX_FMT_YUV420P \
00098 || (x)==PIX_FMT_YUVA420P \
00099 || (x)==PIX_FMT_YUYV422 \
00100 || (x)==PIX_FMT_UYVY422 \
00101 || (x)==PIX_FMT_RGB32 \
00102 || (x)==PIX_FMT_BGR24 \
00103 || (x)==PIX_FMT_BGR565 \
00104 || (x)==PIX_FMT_BGR555 \
00105 || (x)==PIX_FMT_BGR32 \
00106 || (x)==PIX_FMT_RGB24 \
00107 || (x)==PIX_FMT_RGB565 \
00108 || (x)==PIX_FMT_RGB555 \
00109 || (x)==PIX_FMT_GRAY8 \
00110 || (x)==PIX_FMT_YUV410P \
00111 || (x)==PIX_FMT_GRAY16BE \
00112 || (x)==PIX_FMT_GRAY16LE \
00113 || (x)==PIX_FMT_YUV444P \
00114 || (x)==PIX_FMT_YUV422P \
00115 || (x)==PIX_FMT_YUV411P \
00116 || (x)==PIX_FMT_PAL8 \
00117 || (x)==PIX_FMT_BGR8 \
00118 || (x)==PIX_FMT_RGB8 \
00119 || (x)==PIX_FMT_BGR4_BYTE \
00120 || (x)==PIX_FMT_RGB4_BYTE \
00121 || (x)==PIX_FMT_YUV440P \
00122 )
00123 #define isSupportedOut(x) ( \
00124 (x)==PIX_FMT_YUV420P \
00125 || (x)==PIX_FMT_YUYV422 \
00126 || (x)==PIX_FMT_UYVY422 \
00127 || (x)==PIX_FMT_YUV444P \
00128 || (x)==PIX_FMT_YUV422P \
00129 || (x)==PIX_FMT_YUV411P \
00130 || isRGB(x) \
00131 || isBGR(x) \
00132 || (x)==PIX_FMT_NV12 \
00133 || (x)==PIX_FMT_NV21 \
00134 || (x)==PIX_FMT_GRAY16BE \
00135 || (x)==PIX_FMT_GRAY16LE \
00136 || (x)==PIX_FMT_GRAY8 \
00137 || (x)==PIX_FMT_YUV410P \
00138 )
00139 #define isPacked(x) ( \
00140 (x)==PIX_FMT_PAL8 \
00141 || (x)==PIX_FMT_YUYV422 \
00142 || (x)==PIX_FMT_UYVY422 \
00143 || isRGB(x) \
00144 || isBGR(x) \
00145 )
00146
00147 #define RGB2YUV_SHIFT 16
00148 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
00149 #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
00150 #define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
00151 #define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
00152 #define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
00153 #define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
00154 #define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
00155 #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
00156 #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
00157
00158 extern const int32_t Inverse_Table_6_9[8][4];
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175 #if defined(ARCH_X86) && defined (CONFIG_GPL)
00176 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
00177 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
00178 DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
00179 DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
00180 DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
00181 DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
00182 DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
00183 DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
00184
00185 static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
00186 static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
00187 static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
00188 static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
00189
00190 const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
00191 0x0103010301030103LL,
00192 0x0200020002000200LL,};
00193
00194 const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]) = {
00195 0x0602060206020602LL,
00196 0x0004000400040004LL,};
00197
00198 DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
00199 DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
00200 DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
00201 DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
00202 DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
00203 DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
00204
00205 DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
00206 DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
00207 DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
00208
00209 #ifdef FAST_BGR2YV12
00210 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL;
00211 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL;
00212 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL;
00213 #else
00214 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL;
00215 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL;
00216 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL;
00217 #endif
00218 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
00219 DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
00220 DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
00221 #endif
00222
00223
00224 static unsigned char clip_table[768];
00225
00226 static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
00227
00228 extern const uint8_t dither_2x2_4[2][8];
00229 extern const uint8_t dither_2x2_8[2][8];
00230 extern const uint8_t dither_8x8_32[8][8];
00231 extern const uint8_t dither_8x8_73[8][8];
00232 extern const uint8_t dither_8x8_220[8][8];
00233
00234 const char *sws_format_name(enum PixelFormat format)
00235 {
00236 switch (format) {
00237 case PIX_FMT_YUV420P:
00238 return "yuv420p";
00239 case PIX_FMT_YUVA420P:
00240 return "yuva420p";
00241 case PIX_FMT_YUYV422:
00242 return "yuyv422";
00243 case PIX_FMT_RGB24:
00244 return "rgb24";
00245 case PIX_FMT_BGR24:
00246 return "bgr24";
00247 case PIX_FMT_YUV422P:
00248 return "yuv422p";
00249 case PIX_FMT_YUV444P:
00250 return "yuv444p";
00251 case PIX_FMT_RGB32:
00252 return "rgb32";
00253 case PIX_FMT_YUV410P:
00254 return "yuv410p";
00255 case PIX_FMT_YUV411P:
00256 return "yuv411p";
00257 case PIX_FMT_RGB565:
00258 return "rgb565";
00259 case PIX_FMT_RGB555:
00260 return "rgb555";
00261 case PIX_FMT_GRAY16BE:
00262 return "gray16be";
00263 case PIX_FMT_GRAY16LE:
00264 return "gray16le";
00265 case PIX_FMT_GRAY8:
00266 return "gray8";
00267 case PIX_FMT_MONOWHITE:
00268 return "mono white";
00269 case PIX_FMT_MONOBLACK:
00270 return "mono black";
00271 case PIX_FMT_PAL8:
00272 return "Palette";
00273 case PIX_FMT_YUVJ420P:
00274 return "yuvj420p";
00275 case PIX_FMT_YUVJ422P:
00276 return "yuvj422p";
00277 case PIX_FMT_YUVJ444P:
00278 return "yuvj444p";
00279 case PIX_FMT_XVMC_MPEG2_MC:
00280 return "xvmc_mpeg2_mc";
00281 case PIX_FMT_XVMC_MPEG2_IDCT:
00282 return "xvmc_mpeg2_idct";
00283 case PIX_FMT_UYVY422:
00284 return "uyvy422";
00285 case PIX_FMT_UYYVYY411:
00286 return "uyyvyy411";
00287 case PIX_FMT_RGB32_1:
00288 return "rgb32x";
00289 case PIX_FMT_BGR32_1:
00290 return "bgr32x";
00291 case PIX_FMT_BGR32:
00292 return "bgr32";
00293 case PIX_FMT_BGR565:
00294 return "bgr565";
00295 case PIX_FMT_BGR555:
00296 return "bgr555";
00297 case PIX_FMT_BGR8:
00298 return "bgr8";
00299 case PIX_FMT_BGR4:
00300 return "bgr4";
00301 case PIX_FMT_BGR4_BYTE:
00302 return "bgr4 byte";
00303 case PIX_FMT_RGB8:
00304 return "rgb8";
00305 case PIX_FMT_RGB4:
00306 return "rgb4";
00307 case PIX_FMT_RGB4_BYTE:
00308 return "rgb4 byte";
00309 case PIX_FMT_NV12:
00310 return "nv12";
00311 case PIX_FMT_NV21:
00312 return "nv21";
00313 case PIX_FMT_YUV440P:
00314 return "yuv440p";
00315 default:
00316 return "Unknown format";
00317 }
00318 }
00319
00320 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00321 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00322 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
00323 {
00324
00325 int i;
00326 for (i=0; i<dstW; i++)
00327 {
00328 int val=1<<18;
00329 int j;
00330 for (j=0; j<lumFilterSize; j++)
00331 val += lumSrc[j][i] * lumFilter[j];
00332
00333 dest[i]= av_clip_uint8(val>>19);
00334 }
00335
00336 if (uDest)
00337 for (i=0; i<chrDstW; i++)
00338 {
00339 int u=1<<18;
00340 int v=1<<18;
00341 int j;
00342 for (j=0; j<chrFilterSize; j++)
00343 {
00344 u += chrSrc[j][i] * chrFilter[j];
00345 v += chrSrc[j][i + VOFW] * chrFilter[j];
00346 }
00347
00348 uDest[i]= av_clip_uint8(u>>19);
00349 vDest[i]= av_clip_uint8(v>>19);
00350 }
00351 }
00352
00353 static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00354 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00355 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
00356 {
00357
00358 int i;
00359 for (i=0; i<dstW; i++)
00360 {
00361 int val=1<<18;
00362 int j;
00363 for (j=0; j<lumFilterSize; j++)
00364 val += lumSrc[j][i] * lumFilter[j];
00365
00366 dest[i]= av_clip_uint8(val>>19);
00367 }
00368
00369 if (!uDest)
00370 return;
00371
00372 if (dstFormat == PIX_FMT_NV12)
00373 for (i=0; i<chrDstW; i++)
00374 {
00375 int u=1<<18;
00376 int v=1<<18;
00377 int j;
00378 for (j=0; j<chrFilterSize; j++)
00379 {
00380 u += chrSrc[j][i] * chrFilter[j];
00381 v += chrSrc[j][i + VOFW] * chrFilter[j];
00382 }
00383
00384 uDest[2*i]= av_clip_uint8(u>>19);
00385 uDest[2*i+1]= av_clip_uint8(v>>19);
00386 }
00387 else
00388 for (i=0; i<chrDstW; i++)
00389 {
00390 int u=1<<18;
00391 int v=1<<18;
00392 int j;
00393 for (j=0; j<chrFilterSize; j++)
00394 {
00395 u += chrSrc[j][i] * chrFilter[j];
00396 v += chrSrc[j][i + VOFW] * chrFilter[j];
00397 }
00398
00399 uDest[2*i]= av_clip_uint8(v>>19);
00400 uDest[2*i+1]= av_clip_uint8(u>>19);
00401 }
00402 }
00403
00404 #define YSCALE_YUV_2_PACKEDX_C(type) \
00405 for (i=0; i<(dstW>>1); i++){\
00406 int j;\
00407 int Y1 = 1<<18;\
00408 int Y2 = 1<<18;\
00409 int U = 1<<18;\
00410 int V = 1<<18;\
00411 type av_unused *r, *b, *g;\
00412 const int i2= 2*i;\
00413 \
00414 for (j=0; j<lumFilterSize; j++)\
00415 {\
00416 Y1 += lumSrc[j][i2] * lumFilter[j];\
00417 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
00418 }\
00419 for (j=0; j<chrFilterSize; j++)\
00420 {\
00421 U += chrSrc[j][i] * chrFilter[j];\
00422 V += chrSrc[j][i+VOFW] * chrFilter[j];\
00423 }\
00424 Y1>>=19;\
00425 Y2>>=19;\
00426 U >>=19;\
00427 V >>=19;\
00428 if ((Y1|Y2|U|V)&256)\
00429 {\
00430 if (Y1>255) Y1=255; \
00431 else if (Y1<0)Y1=0; \
00432 if (Y2>255) Y2=255; \
00433 else if (Y2<0)Y2=0; \
00434 if (U>255) U=255; \
00435 else if (U<0) U=0; \
00436 if (V>255) V=255; \
00437 else if (V<0) V=0; \
00438 }
00439
00440 #define YSCALE_YUV_2_RGBX_C(type) \
00441 YSCALE_YUV_2_PACKEDX_C(type) \
00442 r = (type *)c->table_rV[V]; \
00443 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
00444 b = (type *)c->table_bU[U]; \
00445
00446 #define YSCALE_YUV_2_PACKED2_C \
00447 for (i=0; i<(dstW>>1); i++){ \
00448 const int i2= 2*i; \
00449 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
00450 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
00451 int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19; \
00452 int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19; \
00453
00454 #define YSCALE_YUV_2_RGB2_C(type) \
00455 YSCALE_YUV_2_PACKED2_C\
00456 type *r, *b, *g;\
00457 r = (type *)c->table_rV[V];\
00458 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
00459 b = (type *)c->table_bU[U];\
00460
00461 #define YSCALE_YUV_2_PACKED1_C \
00462 for (i=0; i<(dstW>>1); i++){\
00463 const int i2= 2*i;\
00464 int Y1= buf0[i2 ]>>7;\
00465 int Y2= buf0[i2+1]>>7;\
00466 int U= (uvbuf1[i ])>>7;\
00467 int V= (uvbuf1[i+VOFW])>>7;\
00468
00469 #define YSCALE_YUV_2_RGB1_C(type) \
00470 YSCALE_YUV_2_PACKED1_C\
00471 type *r, *b, *g;\
00472 r = (type *)c->table_rV[V];\
00473 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
00474 b = (type *)c->table_bU[U];\
00475
00476 #define YSCALE_YUV_2_PACKED1B_C \
00477 for (i=0; i<(dstW>>1); i++){\
00478 const int i2= 2*i;\
00479 int Y1= buf0[i2 ]>>7;\
00480 int Y2= buf0[i2+1]>>7;\
00481 int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\
00482 int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
00483
00484 #define YSCALE_YUV_2_RGB1B_C(type) \
00485 YSCALE_YUV_2_PACKED1B_C\
00486 type *r, *b, *g;\
00487 r = (type *)c->table_rV[V];\
00488 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
00489 b = (type *)c->table_bU[U];\
00490
00491 #define YSCALE_YUV_2_ANYRGB_C(func, func2)\
00492 switch(c->dstFormat)\
00493 {\
00494 case PIX_FMT_RGB32:\
00495 case PIX_FMT_BGR32:\
00496 func(uint32_t)\
00497 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
00498 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
00499 } \
00500 break;\
00501 case PIX_FMT_RGB24:\
00502 func(uint8_t)\
00503 ((uint8_t*)dest)[0]= r[Y1];\
00504 ((uint8_t*)dest)[1]= g[Y1];\
00505 ((uint8_t*)dest)[2]= b[Y1];\
00506 ((uint8_t*)dest)[3]= r[Y2];\
00507 ((uint8_t*)dest)[4]= g[Y2];\
00508 ((uint8_t*)dest)[5]= b[Y2];\
00509 dest+=6;\
00510 }\
00511 break;\
00512 case PIX_FMT_BGR24:\
00513 func(uint8_t)\
00514 ((uint8_t*)dest)[0]= b[Y1];\
00515 ((uint8_t*)dest)[1]= g[Y1];\
00516 ((uint8_t*)dest)[2]= r[Y1];\
00517 ((uint8_t*)dest)[3]= b[Y2];\
00518 ((uint8_t*)dest)[4]= g[Y2];\
00519 ((uint8_t*)dest)[5]= r[Y2];\
00520 dest+=6;\
00521 }\
00522 break;\
00523 case PIX_FMT_RGB565:\
00524 case PIX_FMT_BGR565:\
00525 {\
00526 const int dr1= dither_2x2_8[y&1 ][0];\
00527 const int dg1= dither_2x2_4[y&1 ][0];\
00528 const int db1= dither_2x2_8[(y&1)^1][0];\
00529 const int dr2= dither_2x2_8[y&1 ][1];\
00530 const int dg2= dither_2x2_4[y&1 ][1];\
00531 const int db2= dither_2x2_8[(y&1)^1][1];\
00532 func(uint16_t)\
00533 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
00534 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
00535 }\
00536 }\
00537 break;\
00538 case PIX_FMT_RGB555:\
00539 case PIX_FMT_BGR555:\
00540 {\
00541 const int dr1= dither_2x2_8[y&1 ][0];\
00542 const int dg1= dither_2x2_8[y&1 ][1];\
00543 const int db1= dither_2x2_8[(y&1)^1][0];\
00544 const int dr2= dither_2x2_8[y&1 ][1];\
00545 const int dg2= dither_2x2_8[y&1 ][0];\
00546 const int db2= dither_2x2_8[(y&1)^1][1];\
00547 func(uint16_t)\
00548 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
00549 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
00550 }\
00551 }\
00552 break;\
00553 case PIX_FMT_RGB8:\
00554 case PIX_FMT_BGR8:\
00555 {\
00556 const uint8_t * const d64= dither_8x8_73[y&7];\
00557 const uint8_t * const d32= dither_8x8_32[y&7];\
00558 func(uint8_t)\
00559 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
00560 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
00561 }\
00562 }\
00563 break;\
00564 case PIX_FMT_RGB4:\
00565 case PIX_FMT_BGR4:\
00566 {\
00567 const uint8_t * const d64= dither_8x8_73 [y&7];\
00568 const uint8_t * const d128=dither_8x8_220[y&7];\
00569 func(uint8_t)\
00570 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
00571 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
00572 }\
00573 }\
00574 break;\
00575 case PIX_FMT_RGB4_BYTE:\
00576 case PIX_FMT_BGR4_BYTE:\
00577 {\
00578 const uint8_t * const d64= dither_8x8_73 [y&7];\
00579 const uint8_t * const d128=dither_8x8_220[y&7];\
00580 func(uint8_t)\
00581 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
00582 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
00583 }\
00584 }\
00585 break;\
00586 case PIX_FMT_MONOBLACK:\
00587 {\
00588 const uint8_t * const d128=dither_8x8_220[y&7];\
00589 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
00590 for (i=0; i<dstW-7; i+=8){\
00591 int acc;\
00592 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
00593 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
00594 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
00595 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
00596 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
00597 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
00598 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
00599 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
00600 ((uint8_t*)dest)[0]= acc;\
00601 dest++;\
00602 }\
00603 \
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630
00631
00632
00633
00634
00635
00636
00637
00638
00639
00640
00641
00642
00643
00644
00645
00646
00647 \
00648 }\
00649 break;\
00650 case PIX_FMT_YUYV422:\
00651 func2\
00652 ((uint8_t*)dest)[2*i2+0]= Y1;\
00653 ((uint8_t*)dest)[2*i2+1]= U;\
00654 ((uint8_t*)dest)[2*i2+2]= Y2;\
00655 ((uint8_t*)dest)[2*i2+3]= V;\
00656 } \
00657 break;\
00658 case PIX_FMT_UYVY422:\
00659 func2\
00660 ((uint8_t*)dest)[2*i2+0]= U;\
00661 ((uint8_t*)dest)[2*i2+1]= Y1;\
00662 ((uint8_t*)dest)[2*i2+2]= V;\
00663 ((uint8_t*)dest)[2*i2+3]= Y2;\
00664 } \
00665 break;\
00666 }\
00667
00668
00669 static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00670 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00671 uint8_t *dest, int dstW, int y)
00672 {
00673 int i;
00674 switch(c->dstFormat)
00675 {
00676 case PIX_FMT_BGR32:
00677 case PIX_FMT_RGB32:
00678 YSCALE_YUV_2_RGBX_C(uint32_t)
00679 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
00680 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
00681 }
00682 break;
00683 case PIX_FMT_RGB24:
00684 YSCALE_YUV_2_RGBX_C(uint8_t)
00685 ((uint8_t*)dest)[0]= r[Y1];
00686 ((uint8_t*)dest)[1]= g[Y1];
00687 ((uint8_t*)dest)[2]= b[Y1];
00688 ((uint8_t*)dest)[3]= r[Y2];
00689 ((uint8_t*)dest)[4]= g[Y2];
00690 ((uint8_t*)dest)[5]= b[Y2];
00691 dest+=6;
00692 }
00693 break;
00694 case PIX_FMT_BGR24:
00695 YSCALE_YUV_2_RGBX_C(uint8_t)
00696 ((uint8_t*)dest)[0]= b[Y1];
00697 ((uint8_t*)dest)[1]= g[Y1];
00698 ((uint8_t*)dest)[2]= r[Y1];
00699 ((uint8_t*)dest)[3]= b[Y2];
00700 ((uint8_t*)dest)[4]= g[Y2];
00701 ((uint8_t*)dest)[5]= r[Y2];
00702 dest+=6;
00703 }
00704 break;
00705 case PIX_FMT_RGB565:
00706 case PIX_FMT_BGR565:
00707 {
00708 const int dr1= dither_2x2_8[y&1 ][0];
00709 const int dg1= dither_2x2_4[y&1 ][0];
00710 const int db1= dither_2x2_8[(y&1)^1][0];
00711 const int dr2= dither_2x2_8[y&1 ][1];
00712 const int dg2= dither_2x2_4[y&1 ][1];
00713 const int db2= dither_2x2_8[(y&1)^1][1];
00714 YSCALE_YUV_2_RGBX_C(uint16_t)
00715 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
00716 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
00717 }
00718 }
00719 break;
00720 case PIX_FMT_RGB555:
00721 case PIX_FMT_BGR555:
00722 {
00723 const int dr1= dither_2x2_8[y&1 ][0];
00724 const int dg1= dither_2x2_8[y&1 ][1];
00725 const int db1= dither_2x2_8[(y&1)^1][0];
00726 const int dr2= dither_2x2_8[y&1 ][1];
00727 const int dg2= dither_2x2_8[y&1 ][0];
00728 const int db2= dither_2x2_8[(y&1)^1][1];
00729 YSCALE_YUV_2_RGBX_C(uint16_t)
00730 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
00731 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
00732 }
00733 }
00734 break;
00735 case PIX_FMT_RGB8:
00736 case PIX_FMT_BGR8:
00737 {
00738 const uint8_t * const d64= dither_8x8_73[y&7];
00739 const uint8_t * const d32= dither_8x8_32[y&7];
00740 YSCALE_YUV_2_RGBX_C(uint8_t)
00741 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
00742 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
00743 }
00744 }
00745 break;
00746 case PIX_FMT_RGB4:
00747 case PIX_FMT_BGR4:
00748 {
00749 const uint8_t * const d64= dither_8x8_73 [y&7];
00750 const uint8_t * const d128=dither_8x8_220[y&7];
00751 YSCALE_YUV_2_RGBX_C(uint8_t)
00752 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
00753 +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
00754 }
00755 }
00756 break;
00757 case PIX_FMT_RGB4_BYTE:
00758 case PIX_FMT_BGR4_BYTE:
00759 {
00760 const uint8_t * const d64= dither_8x8_73 [y&7];
00761 const uint8_t * const d128=dither_8x8_220[y&7];
00762 YSCALE_YUV_2_RGBX_C(uint8_t)
00763 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
00764 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
00765 }
00766 }
00767 break;
00768 case PIX_FMT_MONOBLACK:
00769 {
00770 const uint8_t * const d128=dither_8x8_220[y&7];
00771 uint8_t *g= c->table_gU[128] + c->table_gV[128];
00772 int acc=0;
00773 for (i=0; i<dstW-1; i+=2){
00774 int j;
00775 int Y1=1<<18;
00776 int Y2=1<<18;
00777
00778 for (j=0; j<lumFilterSize; j++)
00779 {
00780 Y1 += lumSrc[j][i] * lumFilter[j];
00781 Y2 += lumSrc[j][i+1] * lumFilter[j];
00782 }
00783 Y1>>=19;
00784 Y2>>=19;
00785 if ((Y1|Y2)&256)
00786 {
00787 if (Y1>255) Y1=255;
00788 else if (Y1<0)Y1=0;
00789 if (Y2>255) Y2=255;
00790 else if (Y2<0)Y2=0;
00791 }
00792 acc+= acc + g[Y1+d128[(i+0)&7]];
00793 acc+= acc + g[Y2+d128[(i+1)&7]];
00794 if ((i&7)==6){
00795 ((uint8_t*)dest)[0]= acc;
00796 dest++;
00797 }
00798 }
00799 }
00800 break;
00801 case PIX_FMT_YUYV422:
00802 YSCALE_YUV_2_PACKEDX_C(void)
00803 ((uint8_t*)dest)[2*i2+0]= Y1;
00804 ((uint8_t*)dest)[2*i2+1]= U;
00805 ((uint8_t*)dest)[2*i2+2]= Y2;
00806 ((uint8_t*)dest)[2*i2+3]= V;
00807 }
00808 break;
00809 case PIX_FMT_UYVY422:
00810 YSCALE_YUV_2_PACKEDX_C(void)
00811 ((uint8_t*)dest)[2*i2+0]= U;
00812 ((uint8_t*)dest)[2*i2+1]= Y1;
00813 ((uint8_t*)dest)[2*i2+2]= V;
00814 ((uint8_t*)dest)[2*i2+3]= Y2;
00815 }
00816 break;
00817 }
00818 }
00819
00820
00821
00822
00823 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) || !defined(CONFIG_GPL)
00824 #define COMPILE_C
00825 #endif
00826
00827 #ifdef ARCH_POWERPC
00828 #if (defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
00829 #define COMPILE_ALTIVEC
00830 #endif //HAVE_ALTIVEC
00831 #endif //ARCH_POWERPC
00832
00833 #if defined(ARCH_X86)
00834
00835 #if ((defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
00836 #define COMPILE_MMX
00837 #endif
00838
00839 #if (defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
00840 #define COMPILE_MMX2
00841 #endif
00842
00843 #if ((defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
00844 #define COMPILE_3DNOW
00845 #endif
00846 #endif //ARCH_X86 || ARCH_X86_64
00847
00848 #undef HAVE_MMX
00849 #undef HAVE_MMX2
00850 #undef HAVE_3DNOW
00851
00852 #ifdef COMPILE_C
00853 #undef HAVE_MMX
00854 #undef HAVE_MMX2
00855 #undef HAVE_3DNOW
00856 #undef HAVE_ALTIVEC
00857 #define RENAME(a) a ## _C
00858 #include "swscale_template.c"
00859 #endif
00860
00861 #ifdef COMPILE_ALTIVEC
00862 #undef RENAME
00863 #define HAVE_ALTIVEC
00864 #define RENAME(a) a ## _altivec
00865 #include "swscale_template.c"
00866 #endif
00867
00868 #if defined(ARCH_X86)
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881 #ifdef COMPILE_MMX
00882 #undef RENAME
00883 #define HAVE_MMX
00884 #undef HAVE_MMX2
00885 #undef HAVE_3DNOW
00886 #define RENAME(a) a ## _MMX
00887 #include "swscale_template.c"
00888 #endif
00889
00890
00891 #ifdef COMPILE_MMX2
00892 #undef RENAME
00893 #define HAVE_MMX
00894 #define HAVE_MMX2
00895 #undef HAVE_3DNOW
00896 #define RENAME(a) a ## _MMX2
00897 #include "swscale_template.c"
00898 #endif
00899
00900
00901 #ifdef COMPILE_3DNOW
00902 #undef RENAME
00903 #define HAVE_MMX
00904 #undef HAVE_MMX2
00905 #define HAVE_3DNOW
00906 #define RENAME(a) a ## _3DNow
00907 #include "swscale_template.c"
00908 #endif
00909
00910 #endif //ARCH_X86 || ARCH_X86_64
00911
00912
00913
00914 static double getSplineCoeff(double a, double b, double c, double d, double dist)
00915 {
00916
00917 if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;
00918 else return getSplineCoeff( 0.0,
00919 b+ 2.0*c + 3.0*d,
00920 c + 3.0*d,
00921 -b- 3.0*c - 6.0*d,
00922 dist-1.0);
00923 }
00924
00925 static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
00926 int srcW, int dstW, int filterAlign, int one, int flags,
00927 SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
00928 {
00929 int i;
00930 int filterSize;
00931 int filter2Size;
00932 int minFilterSize;
00933 double *filter=NULL;
00934 double *filter2=NULL;
00935 int ret= -1;
00936 #if defined(ARCH_X86)
00937 if (flags & SWS_CPU_CAPS_MMX)
00938 asm volatile("emms\n\t"::: "memory");
00939 #endif
00940
00941
00942 *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
00943
00944 if (FFABS(xInc - 0x10000) <10)
00945 {
00946 int i;
00947 filterSize= 1;
00948 filter= av_malloc(dstW*sizeof(double)*filterSize);
00949 for (i=0; i<dstW*filterSize; i++) filter[i]=0;
00950
00951 for (i=0; i<dstW; i++)
00952 {
00953 filter[i*filterSize]=1;
00954 (*filterPos)[i]=i;
00955 }
00956
00957 }
00958 else if (flags&SWS_POINT)
00959 {
00960 int i;
00961 int xDstInSrc;
00962 filterSize= 1;
00963 filter= av_malloc(dstW*sizeof(double)*filterSize);
00964
00965 xDstInSrc= xInc/2 - 0x8000;
00966 for (i=0; i<dstW; i++)
00967 {
00968 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
00969
00970 (*filterPos)[i]= xx;
00971 filter[i]= 1.0;
00972 xDstInSrc+= xInc;
00973 }
00974 }
00975 else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR))
00976 {
00977 int i;
00978 int xDstInSrc;
00979 if (flags&SWS_BICUBIC) filterSize= 4;
00980 else if (flags&SWS_X ) filterSize= 4;
00981 else filterSize= 2;
00982 filter= av_malloc(dstW*sizeof(double)*filterSize);
00983
00984 xDstInSrc= xInc/2 - 0x8000;
00985 for (i=0; i<dstW; i++)
00986 {
00987 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
00988 int j;
00989
00990 (*filterPos)[i]= xx;
00991
00992 for (j=0; j<filterSize; j++)
00993 {
00994 double d= FFABS((xx<<16) - xDstInSrc)/(double)(1<<16);
00995 double coeff= 1.0 - d;
00996 if (coeff<0) coeff=0;
00997 filter[i*filterSize + j]= coeff;
00998 xx++;
00999 }
01000 xDstInSrc+= xInc;
01001 }
01002 }
01003 else
01004 {
01005 double xDstInSrc;
01006 double sizeFactor, filterSizeInSrc;
01007 const double xInc1= (double)xInc / (double)(1<<16);
01008
01009 if (flags&SWS_BICUBIC) sizeFactor= 4.0;
01010 else if (flags&SWS_X) sizeFactor= 8.0;
01011 else if (flags&SWS_AREA) sizeFactor= 1.0;
01012 else if (flags&SWS_GAUSS) sizeFactor= 8.0;
01013 else if (flags&SWS_LANCZOS) sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
01014 else if (flags&SWS_SINC) sizeFactor= 20.0;
01015 else if (flags&SWS_SPLINE) sizeFactor= 20.0;
01016 else if (flags&SWS_BILINEAR) sizeFactor= 2.0;
01017 else {
01018 sizeFactor= 0.0;
01019 assert(0);
01020 }
01021
01022 if (xInc1 <= 1.0) filterSizeInSrc= sizeFactor;
01023 else filterSizeInSrc= sizeFactor*srcW / (double)dstW;
01024
01025 filterSize= (int)ceil(1 + filterSizeInSrc);
01026 if (filterSize > srcW-2) filterSize=srcW-2;
01027
01028 filter= av_malloc(dstW*sizeof(double)*filterSize);
01029
01030 xDstInSrc= xInc1 / 2.0 - 0.5;
01031 for (i=0; i<dstW; i++)
01032 {
01033 int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
01034 int j;
01035 (*filterPos)[i]= xx;
01036 for (j=0; j<filterSize; j++)
01037 {
01038 double d= FFABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
01039 double coeff;
01040 if (flags & SWS_BICUBIC)
01041 {
01042 double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
01043 double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
01044
01045 if (d<1.0)
01046 coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
01047 else if (d<2.0)
01048 coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
01049 else
01050 coeff=0.0;
01051 }
01052
01053
01054
01055
01056
01057
01058 else if (flags & SWS_X)
01059 {
01060 double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
01061
01062 if (d<1.0)
01063 coeff = cos(d*PI);
01064 else
01065 coeff=-1.0;
01066 if (coeff<0.0) coeff= -pow(-coeff, A);
01067 else coeff= pow( coeff, A);
01068 coeff= coeff*0.5 + 0.5;
01069 }
01070 else if (flags & SWS_AREA)
01071 {
01072 double srcPixelSize= 1.0/xInc1;
01073 if (d + srcPixelSize/2 < 0.5) coeff= 1.0;
01074 else if (d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
01075 else coeff=0.0;
01076 }
01077 else if (flags & SWS_GAUSS)
01078 {
01079 double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
01080 coeff = pow(2.0, - p*d*d);
01081 }
01082 else if (flags & SWS_SINC)
01083 {
01084 coeff = d ? sin(d*PI)/(d*PI) : 1.0;
01085 }
01086 else if (flags & SWS_LANCZOS)
01087 {
01088 double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
01089 coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
01090 if (d>p) coeff=0;
01091