rgb2rgb_template.c File Reference

#include <stddef.h>
#include <inttypes.h>

Go to the source code of this file.

Defines

#define __WORDSIZE   MP_WORDSIZE
#define MMREG_SIZE   8
#define PREFETCH   " # nop"
#define PREFETCHW   " # nop"
#define EMMS   "emms"
#define MOVNTQ   "movq"
#define SFENCE   " # nop"

Functions

static void RENAME() rgb24to32 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb32to24 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb15to16 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb16to15 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb32to16 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb32tobgr16 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb32to15 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb32tobgr15 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb24to16 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb24tobgr16 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb24to15 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb24tobgr15 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb15to24 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb16to24 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb15to32 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb16to32 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb32tobgr32 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() rgb24tobgr24 (const uint8_t *src, uint8_t *dst, long src_size)
static void RENAME() yuvPlanartoyuy2 (const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, long width, long height, long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
static void RENAME() yv12toyuy2 (const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, long width, long height, long lumStride, long chromStride, long dstStride)
static void RENAME() yuvPlanartouyvy (const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, long width, long height, long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
static void RENAME() yv12touyvy (const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, long width, long height, long lumStride, long chromStride, long dstStride)
static void RENAME() yuv422ptoyuy2 (const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, long width, long height, long lumStride, long chromStride, long dstStride)
static void RENAME() yuy2toyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, long width, long height, long lumStride, long chromStride, long srcStride)
static void RENAME() yvu9toyv12 (const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, long width, long height, long lumStride, long chromStride)
static void RENAME() planar2x (const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
static void RENAME() uyvytoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, long width, long height, long lumStride, long chromStride, long srcStride)
static void RENAME() rgb24toyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, long width, long height, long lumStride, long chromStride, long srcStride)
void RENAME() interleaveBytes (uint8_t *src1, uint8_t *src2, uint8_t *dest, long width, long height, long src1Stride, long src2Stride, long dstStride)
static void RENAME() vu9_to_vu12 (const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, long width, long height, long srcStride1, long srcStride2, long dstStride1, long dstStride2)
static void RENAME() yvu9_to_yuy2 (const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, uint8_t *dst, long width, long height, long srcStride1, long srcStride2, long srcStride3, long dstStride)
static void RENAME() rgb2rgb_init (void)


Define Documentation

#define __WORDSIZE   MP_WORDSIZE

Definition at line 35 of file rgb2rgb_template.c.

#define EMMS   "emms"

Definition at line 74 of file rgb2rgb_template.c.

Referenced by interleaveBytes(), planar2x(), rgb15to16(), rgb15to24(), rgb15to32(), rgb16to15(), rgb16to24(), rgb16to32(), rgb24to15(), rgb24to16(), rgb24to32(), rgb24tobgr15(), rgb24tobgr16(), rgb24tobgr24(), rgb24toyv12(), rgb32to15(), rgb32to16(), rgb32to24(), rgb32tobgr15(), rgb32tobgr16(), rgb32tobgr32(), swScale(), uyvytoyv12(), vu9_to_vu12(), yuvPlanartouyvy(), yuvPlanartoyuy2(), yuy2toyv12(), and yvu9_to_yuy2().

#define MMREG_SIZE   8

Definition at line 49 of file rgb2rgb_template.c.

#define MOVNTQ   "movq"

Definition at line 81 of file rgb2rgb_template.c.

Referenced by interleaveBytes(), planar2x(), rgb15to16(), rgb15to24(), rgb15to32(), rgb16to15(), rgb16to24(), rgb16to32(), rgb24to15(), rgb24to16(), rgb24to32(), rgb24tobgr15(), rgb24tobgr16(), rgb24tobgr24(), rgb24toyv12(), rgb32to15(), rgb32to16(), rgb32to24(), rgb32tobgr15(), rgb32tobgr16(), rgb32tobgr32(), uyvytoyv12(), vu9_to_vu12(), yuv2packed2(), yuv420_rgb16(), yuvPlanartouyvy(), yuvPlanartoyuy2(), yuy2toyv12(), and yvu9_to_yuy2().

#define PREFETCH   " # nop"

Definition at line 65 of file rgb2rgb_template.c.

#define PREFETCHW   " # nop"

Definition at line 66 of file rgb2rgb_template.c.

#define SFENCE   " # nop"

Definition at line 82 of file rgb2rgb_template.c.

Referenced by interleaveBytes(), planar2x(), rgb15to16(), rgb15to24(), rgb15to32(), rgb16to15(), rgb16to24(), rgb16to32(), rgb24to15(), rgb24to16(), rgb24to32(), rgb24tobgr15(), rgb24tobgr16(), rgb24tobgr24(), rgb24toyv12(), rgb32to15(), rgb32to16(), rgb32to24(), rgb32tobgr15(), rgb32tobgr16(), rgb32tobgr32(), swScale(), uyvytoyv12(), vu9_to_vu12(), yuvPlanartouyvy(), yuvPlanartoyuy2(), yuy2toyv12(), and yvu9_to_yuy2().


Function Documentation

void RENAME() interleaveBytes ( uint8_t *  src1,
uint8_t *  src2,
uint8_t *  dest,
long  width,
long  height,
long  src1Stride,
long  src2Stride,
long  dstStride 
)

Definition at line 2433 of file rgb2rgb_template.c.

References dest, EMMS, height, MOVNTQ, PREFETCH, REG_a, SFENCE, and width.

02435                                                              {
02436     long h;
02437 
02438     for (h=0; h < height; h++)
02439     {
02440         long w;
02441 
02442 #ifdef HAVE_MMX
02443 #ifdef HAVE_SSE2
02444         asm(
02445         "xor              %%"REG_a", %%"REG_a"  \n\t"
02446         "1:                                     \n\t"
02447         PREFETCH" 64(%1, %%"REG_a")             \n\t"
02448         PREFETCH" 64(%2, %%"REG_a")             \n\t"
02449         "movdqa     (%1, %%"REG_a"), %%xmm0     \n\t"
02450         "movdqa     (%1, %%"REG_a"), %%xmm1     \n\t"
02451         "movdqa     (%2, %%"REG_a"), %%xmm2     \n\t"
02452         "punpcklbw           %%xmm2, %%xmm0     \n\t"
02453         "punpckhbw           %%xmm2, %%xmm1     \n\t"
02454         "movntdq             %%xmm0,   (%0, %%"REG_a", 2)   \n\t"
02455         "movntdq             %%xmm1, 16(%0, %%"REG_a", 2)   \n\t"
02456         "add                    $16, %%"REG_a"  \n\t"
02457         "cmp                     %3, %%"REG_a"  \n\t"
02458         " jb                     1b             \n\t"
02459         ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
02460         : "memory", "%"REG_a""
02461         );
02462 #else
02463         asm(
02464         "xor %%"REG_a", %%"REG_a"               \n\t"
02465         "1:                                     \n\t"
02466         PREFETCH" 64(%1, %%"REG_a")             \n\t"
02467         PREFETCH" 64(%2, %%"REG_a")             \n\t"
02468         "movq       (%1, %%"REG_a"), %%mm0      \n\t"
02469         "movq      8(%1, %%"REG_a"), %%mm2      \n\t"
02470         "movq                 %%mm0, %%mm1      \n\t"
02471         "movq                 %%mm2, %%mm3      \n\t"
02472         "movq       (%2, %%"REG_a"), %%mm4      \n\t"
02473         "movq      8(%2, %%"REG_a"), %%mm5      \n\t"
02474         "punpcklbw            %%mm4, %%mm0      \n\t"
02475         "punpckhbw            %%mm4, %%mm1      \n\t"
02476         "punpcklbw            %%mm5, %%mm2      \n\t"
02477         "punpckhbw            %%mm5, %%mm3      \n\t"
02478         MOVNTQ"               %%mm0,   (%0, %%"REG_a", 2)   \n\t"
02479         MOVNTQ"               %%mm1,  8(%0, %%"REG_a", 2)   \n\t"
02480         MOVNTQ"               %%mm2, 16(%0, %%"REG_a", 2)   \n\t"
02481         MOVNTQ"               %%mm3, 24(%0, %%"REG_a", 2)   \n\t"
02482         "add                    $16, %%"REG_a"  \n\t"
02483         "cmp                     %3, %%"REG_a"  \n\t"
02484         " jb                     1b             \n\t"
02485         ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
02486         : "memory", "%"REG_a
02487         );
02488 #endif
02489         for (w= (width&(~15)); w < width; w++)
02490         {
02491             dest[2*w+0] = src1[w];
02492             dest[2*w+1] = src2[w];
02493         }
02494 #else
02495         for (w=0; w < width; w++)
02496         {
02497             dest[2*w+0] = src1[w];
02498             dest[2*w+1] = src2[w];
02499         }
02500 #endif
02501         dest += dstStride;
02502                 src1 += src1Stride;
02503                 src2 += src2Stride;
02504     }
02505 #ifdef HAVE_MMX
02506     asm(
02507         EMMS"       \n\t"
02508         SFENCE"     \n\t"
02509         ::: "memory"
02510         );
02511 #endif
02512 }

static void RENAME() planar2x ( const uint8_t *  src,
uint8_t *  dst,
long  srcWidth,
long  srcHeight,
long  srcStride,
long  dstStride 
) [inline, static]

Definition at line 1902 of file rgb2rgb_template.c.

References EMMS, HAVE_MMX2, MOVNTQ, PAVGB, REG_a, SFENCE, and src.

01903 {
01904     long x,y;
01905 
01906     dst[0]= src[0];
01907 
01908     // first line
01909     for (x=0; x<srcWidth-1; x++){
01910         dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
01911         dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
01912     }
01913     dst[2*srcWidth-1]= src[srcWidth-1];
01914 
01915         dst+= dstStride;
01916 
01917     for (y=1; y<srcHeight; y++){
01918 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01919         const long mmxSize= srcWidth&~15;
01920         asm volatile(
01921         "mov           %4, %%"REG_a"            \n\t"
01922         "1:                                     \n\t"
01923         "movq         (%0, %%"REG_a"), %%mm0    \n\t"
01924         "movq         (%1, %%"REG_a"), %%mm1    \n\t"
01925         "movq        1(%0, %%"REG_a"), %%mm2    \n\t"
01926         "movq        1(%1, %%"REG_a"), %%mm3    \n\t"
01927         "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
01928         "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
01929         PAVGB"                  %%mm0, %%mm5    \n\t"
01930         PAVGB"                  %%mm0, %%mm3    \n\t"
01931         PAVGB"                  %%mm0, %%mm5    \n\t"
01932         PAVGB"                  %%mm0, %%mm3    \n\t"
01933         PAVGB"                  %%mm1, %%mm4    \n\t"
01934         PAVGB"                  %%mm1, %%mm2    \n\t"
01935         PAVGB"                  %%mm1, %%mm4    \n\t"
01936         PAVGB"                  %%mm1, %%mm2    \n\t"
01937         "movq                   %%mm5, %%mm7    \n\t"
01938         "movq                   %%mm4, %%mm6    \n\t"
01939         "punpcklbw              %%mm3, %%mm5    \n\t"
01940         "punpckhbw              %%mm3, %%mm7    \n\t"
01941         "punpcklbw              %%mm2, %%mm4    \n\t"
01942         "punpckhbw              %%mm2, %%mm6    \n\t"
01943 #if 1
01944         MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
01945         MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
01946         MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
01947         MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
01948 #else
01949         "movq                   %%mm5,  (%2, %%"REG_a", 2)  \n\t"
01950         "movq                   %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
01951         "movq                   %%mm4,  (%3, %%"REG_a", 2)  \n\t"
01952         "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
01953 #endif
01954         "add                       $8, %%"REG_a"            \n\t"
01955         " js                       1b                       \n\t"
01956         :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
01957            "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
01958            "g" (-mmxSize)
01959         : "%"REG_a
01960 
01961         );
01962 #else
01963         const long mmxSize=1;
01964 #endif
01965         dst[0        ]= (3*src[0] +   src[srcStride])>>2;
01966         dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
01967 
01968         for (x=mmxSize-1; x<srcWidth-1; x++){
01969             dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
01970             dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
01971             dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
01972             dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
01973         }
01974         dst[srcWidth*2 -1            ]= (3*src[srcWidth-1] +   src[srcWidth-1 + srcStride])>>2;
01975         dst[srcWidth*2 -1 + dstStride]= (  src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
01976 
01977         dst+=dstStride*2;
01978         src+=srcStride;
01979     }
01980 
01981     // last line
01982 #if 1
01983     dst[0]= src[0];
01984 
01985     for (x=0; x<srcWidth-1; x++){
01986         dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
01987         dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
01988     }
01989     dst[2*srcWidth-1]= src[srcWidth-1];
01990 #else
01991     for (x=0; x<srcWidth; x++){
01992         dst[2*x+0]=
01993         dst[2*x+1]= src[x];
01994     }
01995 #endif
01996 
01997 #ifdef HAVE_MMX
01998 asm volatile(   EMMS"       \n\t"
01999                 SFENCE"     \n\t"
02000                 :::"memory");
02001 #endif
02002 }

static void RENAME() rgb15to16 ( const uint8_t *  src,
uint8_t *  dst,
long  src_size 
) [inline, static]

Definition at line 237 of file rgb2rgb_template.c.

References EMMS, MOVNTQ, PREFETCH, SFENCE, and src.

00238 {
00239     register const uint8_t* s=src;
00240     register uint8_t* d=dst;
00241     register const uint8_t *end;
00242     const uint8_t *mm_end;
00243     end = s + src_size;
00244 #ifdef HAVE_MMX
00245     asm volatile(PREFETCH"    %0"::"m"(*s));
00246     asm volatile("movq        %0, %%mm4"::"m"(mask15s));
00247     mm_end = end - 15;
00248     while (s<mm_end)
00249     {
00250         asm volatile(
00251         PREFETCH"  32%1         \n\t"
00252         "movq        %1, %%mm0  \n\t"
00253         "movq       8%1, %%mm2  \n\t"
00254         "movq     %%mm0, %%mm1  \n\t"
00255         "movq     %%mm2, %%mm3  \n\t"
00256         "pand     %%mm4, %%mm0  \n\t"
00257         "pand     %%mm4, %%mm2  \n\t"
00258         "paddw    %%mm1, %%mm0  \n\t"
00259         "paddw    %%mm3, %%mm2  \n\t"
00260         MOVNTQ"   %%mm0,  %0    \n\t"
00261         MOVNTQ"   %%mm2, 8%0"
00262         :"=m"(*d)
00263         :"m"(*s)
00264         );
00265         d+=16;
00266         s+=16;
00267     }
00268     asm volatile(SFENCE:::"memory");
00269     asm volatile(EMMS:::"memory");
00270 #endif
00271     mm_end = end - 3;
00272     while (s < mm_end)
00273     {
00274         register unsigned x= *((const uint32_t *)s);
00275         *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
00276         d+=4;
00277         s+=4;
00278     }
00279     if (s < end)
00280     {
00281         register unsigned short x= *((const uint16_t *)s);
00282         *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
00283     }
00284 }

static void RENAME() rgb15to24 ( const uint8_t *  src,
uint8_t *  dst,
long  src_size 
) [inline, static]

Definition at line 933 of file rgb2rgb_template.c.

References EMMS, MOVNTQ, PREFETCH, SFENCE, and src.

00934 {
00935     const uint16_t *end;
00936 #ifdef HAVE_MMX
00937     const uint16_t *mm_end;
00938 #endif
00939     uint8_t *d = dst;
00940     const uint16_t *s = (const uint16_t*)src;
00941     end = s + src_size/2;
00942 #ifdef HAVE_MMX
00943     asm volatile(PREFETCH"    %0"::"m"(*s):"memory");
00944     mm_end = end - 7;
00945     while (s < mm_end)
00946     {
00947         asm volatile(
00948         PREFETCH"    32%1           \n\t"
00949         "movq          %1, %%mm0    \n\t"
00950         "movq          %1, %%mm1    \n\t"
00951         "movq          %1, %%mm2    \n\t"
00952         "pand          %2, %%mm0    \n\t"
00953         "pand          %3, %%mm1    \n\t"
00954         "pand          %4, %%mm2    \n\t"
00955         "psllq         $3, %%mm0    \n\t"
00956         "psrlq         $2, %%mm1    \n\t"
00957         "psrlq         $7, %%mm2    \n\t"
00958         "movq       %%mm0, %%mm3    \n\t"
00959         "movq       %%mm1, %%mm4    \n\t"
00960         "movq       %%mm2, %%mm5    \n\t"
00961         "punpcklwd     %5, %%mm0    \n\t"
00962         "punpcklwd     %5, %%mm1    \n\t"
00963         "punpcklwd     %5, %%mm2    \n\t"
00964         "punpckhwd     %5, %%mm3    \n\t"
00965         "punpckhwd     %5, %%mm4    \n\t"
00966         "punpckhwd     %5, %%mm5    \n\t"
00967         "psllq         $8, %%mm1    \n\t"
00968         "psllq        $16, %%mm2    \n\t"
00969         "por        %%mm1, %%mm0    \n\t"
00970         "por        %%mm2, %%mm0    \n\t"
00971         "psllq         $8, %%mm4    \n\t"
00972         "psllq        $16, %%mm5    \n\t"
00973         "por        %%mm4, %%mm3    \n\t"
00974         "por        %%mm5, %%mm3    \n\t"
00975 
00976         "movq       %%mm0, %%mm6    \n\t"
00977         "movq       %%mm3, %%mm7    \n\t"
00978 
00979         "movq         8%1, %%mm0    \n\t"
00980         "movq         8%1, %%mm1    \n\t"
00981         "movq         8%1, %%mm2    \n\t"
00982         "pand          %2, %%mm0    \n\t"
00983         "pand          %3, %%mm1    \n\t"
00984         "pand          %4, %%mm2    \n\t"
00985         "psllq         $3, %%mm0    \n\t"
00986         "psrlq         $2, %%mm1    \n\t"
00987         "psrlq         $7, %%mm2    \n\t"
00988         "movq       %%mm0, %%mm3    \n\t"
00989         "movq       %%mm1, %%mm4    \n\t"
00990         "movq       %%mm2, %%mm5    \n\t"
00991         "punpcklwd     %5, %%mm0    \n\t"
00992         "punpcklwd     %5, %%mm1    \n\t"
00993         "punpcklwd     %5, %%mm2    \n\t"
00994         "punpckhwd     %5, %%mm3    \n\t"
00995         "punpckhwd     %5, %%mm4    \n\t"
00996         "punpckhwd     %5, %%mm5    \n\t"
00997         "psllq         $8, %%mm1    \n\t"
00998         "psllq        $16, %%mm2    \n\t"
00999         "por        %%mm1, %%mm0    \n\t"
01000         "por        %%mm2, %%mm0    \n\t"
01001         "psllq         $8, %%mm4    \n\t"
01002         "psllq        $16, %%mm5    \n\t"
01003         "por        %%mm4, %%mm3    \n\t"
01004         "por        %%mm5, %%mm3    \n\t"
01005 
01006         :"=m"(*d)
01007         :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
01008         :"memory");
01009         /* borrowed 32 to 24 */
01010         asm volatile(
01011         "movq       %%mm0, %%mm4    \n\t"
01012         "movq       %%mm3, %%mm5    \n\t"
01013         "movq       %%mm6, %%mm0    \n\t"
01014         "movq       %%mm7, %%mm1    \n\t"
01015 
01016         "movq       %%mm4, %%mm6    \n\t"
01017         "movq       %%mm5, %%mm7    \n\t"
01018         "movq       %%mm0, %%mm2    \n\t"
01019         "movq       %%mm1, %%mm3    \n\t"
01020 
01021         "psrlq         $8, %%mm2    \n\t"
01022         "psrlq         $8, %%mm3    \n\t"
01023         "psrlq         $8, %%mm6    \n\t"
01024         "psrlq         $8, %%mm7    \n\t"
01025         "pand          %2, %%mm0    \n\t"
01026         "pand          %2, %%mm1    \n\t"
01027         "pand          %2, %%mm4    \n\t"
01028         "pand          %2, %%mm5    \n\t"
01029         "pand          %3, %%mm2    \n\t"
01030         "pand          %3, %%mm3    \n\t"
01031         "pand          %3, %%mm6    \n\t"
01032         "pand          %3, %%mm7    \n\t"
01033         "por        %%mm2, %%mm0    \n\t"
01034         "por        %%mm3, %%mm1    \n\t"
01035         "por        %%mm6, %%mm4    \n\t"
01036         "por        %%mm7, %%mm5    \n\t"
01037 
01038         "movq       %%mm1, %%mm2    \n\t"
01039         "movq       %%mm4, %%mm3    \n\t"
01040         "psllq        $48, %%mm2    \n\t"
01041         "psllq        $32, %%mm3    \n\t"
01042         "pand          %4, %%mm2    \n\t"
01043         "pand          %5, %%mm3    \n\t"
01044         "por        %%mm2, %%mm0    \n\t"
01045         "psrlq        $16, %%mm1    \n\t"
01046         "psrlq        $32, %%mm4    \n\t"
01047         "psllq        $16, %%mm5    \n\t"
01048         "por        %%mm3, %%mm1    \n\t"
01049         "pand          %6, %%mm5    \n\t"
01050         "por        %%mm5, %%mm4    \n\t"
01051 
01052         MOVNTQ"     %%mm0,   %0     \n\t"
01053         MOVNTQ"     %%mm1,  8%0     \n\t"
01054         MOVNTQ"     %%mm4, 16%0"
01055 
01056         :"=m"(*d)
01057         :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
01058         :"memory");
01059         d += 24;
01060         s += 8;
01061     }
01062     asm volatile(SFENCE:::"memory");
01063     asm volatile(EMMS:::"memory");
01064 #endif
01065     while (s < end)
01066     {
01067         register uint16_t bgr;
01068         bgr = *s++;
01069         *d++ = (bgr&0x1F)<<3;
01070         *d++ = (bgr&0x3E0)>>2;
01071         *d++ = (bgr&0x7C00)>>7;
01072     }
01073 }

static void RENAME() rgb15to32 ( const uint8_t *  src,
uint8_t *  dst,
long  src_size 
) [inline, static]

Definition at line 1216 of file rgb2rgb_template.c.

References EMMS, MOVNTQ, PREFETCH, SFENCE, and src.

01217 {
01218     const uint16_t *end;
01219 #ifdef HAVE_MMX
01220     const uint16_t *mm_end;
01221 #endif
01222     uint8_t *d = dst;
01223     const uint16_t *s = (const uint16_t *)src;
01224     end = s + src_size/2;
01225 #ifdef HAVE_MMX
01226     asm volatile(PREFETCH"    %0"::"m"(*s):"memory");
01227     asm volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
01228     mm_end = end - 3;
01229     while (s < mm_end)
01230     {
01231         asm volatile(
01232         PREFETCH"    32%1           \n\t"
01233         "movq          %1, %%mm0    \n\t"
01234         "movq          %1, %%mm1    \n\t"
01235         "movq          %1, %%mm2    \n\t"
01236         "pand          %2, %%mm0    \n\t"
01237         "pand          %3, %%mm1    \n\t"
01238         "pand          %4, %%mm2    \n\t"
01239         "psllq         $3, %%mm0    \n\t"
01240         "psrlq         $2, %%mm1    \n\t"
01241         "psrlq         $7, %%mm2    \n\t"
01242         "movq       %%mm0, %%mm3    \n\t"
01243         "movq       %%mm1, %%mm4    \n\t"
01244         "movq       %%mm2, %%mm5    \n\t"
01245         "punpcklwd  %%mm7, %%mm0    \n\t"
01246         "punpcklwd  %%mm7, %%mm1    \n\t"
01247         "punpcklwd  %%mm7, %%mm2    \n\t"
01248         "punpckhwd  %%mm7, %%mm3    \n\t"
01249         "punpckhwd  %%mm7, %%mm4    \n\t"
01250         "punpckhwd  %%mm7, %%mm5    \n\t"
01251         "psllq         $8, %%mm1    \n\t"
01252         "psllq        $16, %%mm2    \n\t"
01253         "por        %%mm1, %%mm0    \n\t"
01254         "por        %%mm2, %%mm0    \n\t"
01255         "psllq         $8, %%mm4    \n\t"
01256         "psllq        $16, %%mm5    \n\t"
01257         "por        %%mm4, %%mm3    \n\t"
01258         "por        %%mm5, %%mm3    \n\t"
01259         MOVNTQ"     %%mm0,  %0      \n\t"
01260         MOVNTQ"     %%mm3, 8%0      \n\t"
01261         :"=m"(*d)
01262         :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
01263         :"memory");
01264         d += 16;
01265         s += 4;
01266     }
01267     asm volatile(SFENCE:::"memory");
01268     asm volatile(EMMS:::"memory");
01269 #endif
01270     while (s < end)
01271     {
01272 #if 0 //slightly slower on Athlon
01273         int bgr= *s++;
01274         *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
01275 #else
01276         register uint16_t bgr;
01277         bgr = *s++;
01278 #ifdef WORDS_BIGENDIAN
01279         *d++ = 0;
01280         *d++ = (bgr&0x7C00)>>7;
01281         *d++ = (bgr&0x3E0)>>2;
01282         *d++ = (bgr&0x1F)<<3;
01283 #else
01284         *d++ = (bgr&0x1F)<<3;
01285         *d++ = (bgr&0x3E0)>>2;
01286         *d++ = (bgr&0x7C00)>>7;
01287         *d++ = 0;
01288 #endif
01289 
01290 #endif
01291     }
01292 }

static void RENAME() rgb16to15 ( const uint8_t *  src,
uint8_t *  dst,
long  src_size 
) [inline, static]

Definition at line 286 of file rgb2rgb_template.c.

References EMMS, MOVNTQ, PREFETCH, SFENCE, and src.

00287 {
00288     register const uint8_t* s=src;
00289     register uint8_t* d=dst;
00290     register const uint8_t *end;
00291     const uint8_t *mm_end;
00292     end = s + src_size;
00293 #ifdef HAVE_MMX
00294     asm volatile(PREFETCH"    %0"::"m"(*s));
00295     asm volatile("movq        %0, %%mm7"::"m"(mask15rg));
00296     asm volatile("movq        %0, %%mm6"::"m"(mask15b));
00297     mm_end = end - 15;
00298     while (s<mm_end)
00299     {
00300         asm volatile(
00301         PREFETCH"  32%1         \n\t"
00302         "movq        %1, %%mm0  \n\t"
00303         "movq       8%1, %%mm2  \n\t"
00304         "movq     %%mm0, %%mm1  \n\t"
00305         "movq     %%mm2, %%mm3  \n\t"
00306         "psrlq       $1, %%mm0  \n\t"
00307         "psrlq       $1, %%mm2  \n\t"
00308         "pand     %%mm7, %%mm0  \n\t"
00309         "pand     %%mm7, %%mm2  \n\t"
00310         "pand     %%mm6, %%mm1  \n\t"
00311         "pand     %%mm6, %%mm3  \n\t"
00312         "por      %%mm1, %%mm0  \n\t"
00313         "por      %%mm3, %%mm2  \n\t"
00314         MOVNTQ"   %%mm0,  %0    \n\t"
00315         MOVNTQ"   %%mm2, 8%0"
00316         :"=m"(*d)
00317         :"m"(*s)
00318         );
00319         d+=16;
00320         s+=16;
00321     }
00322     asm volatile(SFENCE:::"memory");
00323     asm volatile(EMMS:::"memory");
00324 #endif
00325     mm_end = end - 3;
00326     while (s < mm_end)
00327     {
00328         register uint32_t x= *((const uint32_t*)s);
00329         *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
00330         s+=4;
00331         d+=4;
00332     }
00333     if (s < end)
00334     {
00335         register uint16_t x= *((const uint16_t*)s);
00336         *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
00337         s+=2;
00338         d+=2;
00339     }
00340 }

static void RENAME() rgb16to24 ( const uint8_t *  src,
uint8_t *  dst,
long  src_size 
) [inline, static]

Definition at line 1075 of file rgb2rgb_template.c.

References EMMS, MOVNTQ, PREFETCH, SFENCE, and src.

01076 {
01077     const uint16_t *end;
01078 #ifdef HAVE_MMX
01079     const uint16_t *mm_end;
01080 #endif
01081     uint8_t *d = (uint8_t *)dst;
01082     const uint16_t *s = (const uint16_t *)src;
01083     end = s + src_size/2;
01084 #ifdef HAVE_MMX
01085     asm volatile(PREFETCH"    %0"::"m"(*s):"memory");
01086     mm_end = end - 7;
01087     while (s < mm_end)
01088     {
01089         asm volatile(
01090         PREFETCH"    32%1           \n\t"
01091         "movq          %1, %%mm0    \n\t"
01092         "movq          %1, %%mm1    \n\t"
01093         "movq          %1, %%mm2    \n\t"
01094         "pand          %2, %%mm0    \n\t"
01095         "pand          %3, %%mm1    \n\t"
01096         "pand          %4, %%mm2    \n\t"
01097         "psllq         $3, %%mm0    \n\t"
01098         "psrlq         $3, %%mm1    \n\t"
01099         "psrlq         $8, %%mm2    \n\t"
01100         "movq       %%mm0, %%mm3    \n\t"
01101         "movq       %%mm1, %%mm4    \n\t"
01102         "movq       %%mm2, %%mm5    \n\t"
01103         "punpcklwd     %5, %%mm0    \n\t"
01104         "punpcklwd     %5, %%mm1    \n\t"
01105         "punpcklwd     %5, %%mm2    \n\t"
01106         "punpckhwd     %5, %%mm3    \n\t"
01107         "punpckhwd     %5, %%mm4    \n\t"
01108         "punpckhwd     %5, %%mm5    \n\t"
01109         "psllq         $8, %%mm1    \n\t"
01110         "psllq        $16, %%mm2    \n\t"
01111         "por        %%mm1, %%mm0    \n\t"
01112         "por        %%mm2, %%mm0    \n\t"
01113         "psllq         $8, %%mm4    \n\t"
01114         "psllq        $16, %%mm5    \n\t"
01115         "por        %%mm4, %%mm3    \n\t"
01116         "por        %%mm5, %%mm3    \n\t"
01117 
01118         "movq       %%mm0, %%mm6    \n\t"
01119         "movq       %%mm3, %%mm7    \n\t"
01120 
01121         "movq         8%1, %%mm0    \n\t"
01122         "movq         8%1, %%mm1    \n\t"
01123         "movq         8%1, %%mm2    \n\t"
01124         "pand          %2, %%mm0    \n\t"
01125         "pand          %3, %%mm1    \n\t"
01126         "pand          %4, %%mm2    \n\t"
01127         "psllq         $3, %%mm0    \n\t"
01128         "psrlq         $3, %%mm1    \n\t"
01129         "psrlq         $8, %%mm2    \n\t"
01130         "movq       %%mm0, %%mm3    \n\t"
01131         "movq       %%mm1, %%mm4    \n\t"
01132         "movq       %%mm2, %%mm5    \n\t"
01133         "punpcklwd     %5, %%mm0    \n\t"
01134         "punpcklwd     %5, %%mm1    \n\t"
01135         "punpcklwd     %5, %%mm2    \n\t"
01136         "punpckhwd     %5, %%mm3    \n\t"
01137         "punpckhwd     %5, %%mm4    \n\t"
01138         "punpckhwd     %5, %%mm5    \n\t"
01139         "psllq         $8, %%mm1    \n\t"
01140         "psllq        $16, %%mm2    \n\t"
01141         "por        %%mm1, %%mm0    \n\t"
01142         "por        %%mm2, %%mm0    \n\t"
01143         "psllq         $8, %%mm4    \n\t"
01144         "psllq        $16, %%mm5    \n\t"
01145         "por        %%mm4, %%mm3    \n\t"
01146         "por        %%mm5, %%mm3    \n\t"
01147         :"=m"(*d)
01148         :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
01149         :"memory");
01150         /* borrowed 32 to 24 */
01151         asm volatile(
01152         "movq       %%mm0, %%mm4    \n\t"
01153         "movq       %%mm3, %%mm5    \n\t"
01154         "movq       %%mm6, %%mm0    \n\t"
01155         "movq       %%mm7, %%mm1    \n\t"
01156 
01157         "movq       %%mm4, %%mm6    \n\t"
01158         "movq       %%mm5, %%mm7    \n\t"
01159         "movq       %%mm0, %%mm2    \n\t"
01160         "movq       %%mm1, %%mm3    \n\t"
01161 
01162         "psrlq         $8, %%mm2    \n\t"
01163         "psrlq         $8, %%mm3    \n\t"
01164         "psrlq         $8, %%mm6    \n\t"
01165         "psrlq         $8, %%mm7    \n\t"
01166         "pand          %2, %%mm0    \n\t"
01167         "pand          %2, %%mm1    \n\t"
01168         "pand          %2, %%mm4    \n\t"
01169         "pand          %2, %%mm5    \n\t"
01170         "pand          %3, %%mm2    \n\t"
01171         "pand          %3, %%mm3    \n\t"
01172         "pand          %3, %%mm6    \n\t"
01173         "pand          %3, %%mm7    \n\t"
01174         "por        %%mm2, %%mm0    \n\t"
01175         "por        %%mm3, %%mm1    \n\t"
01176         "por        %%mm6, %%mm4    \n\t"
01177         "por        %%mm7, %%mm5    \n\t"
01178 
01179         "movq       %%mm1, %%mm2    \n\t"
01180         "movq       %%mm4, %%mm3    \n\t"
01181         "psllq        $48, %%mm2    \n\t"
01182         "psllq        $32, %%mm3    \n\t"
01183         "pand          %4, %%mm2    \n\t"
01184         "pand          %5, %%mm3    \n\t"
01185         "por        %%mm2, %%mm0    \n\t"
01186         "psrlq        $16, %%mm1    \n\t"
01187         "psrlq        $32, %%mm4    \n\t"
01188         "psllq        $16, %%mm5    \n\t"
01189         "por        %%mm3, %%mm1    \n\t"
01190         "pand          %6, %%mm5    \n\t"
01191         "por        %%mm5, %%mm4    \n\t"
01192 
01193         MOVNTQ"     %%mm0,   %0     \n\t"
01194         MOVNTQ"     %%mm1,  8%0     \n\t"
01195         MOVNTQ"     %%mm4, 16%0"
01196 
01197         :"=m"(*d)
01198         :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
01199         :"memory");
01200         d += 24;
01201         s += 8;
01202     }
01203     asm volatile(SFENCE:::"memory");
01204     asm volatile(EMMS:::"memory");
01205 #endif
01206     while (s < end)
01207     {
01208         register uint16_t bgr;
01209         bgr = *s++;
01210         *d++ = (bgr&0x1F)<<3;
01211         *d++ = (bgr&0x7E0)>>3;
01212         *d++ = (bgr&0xF800)>>8;
01213     }
01214 }

static void RENAME() rgb16to32 ( const uint8_t *  src,
uint8_t *  dst,
long  src_size 
) [inline, static]

Definition at line 1294 of file rgb2rgb_template.c.

References EMMS, MOVNTQ, PREFETCH, SFENCE, and src.

01295 {
01296     const uint16_t *end;
01297 #ifdef HAVE_MMX
01298     const uint16_t *mm_end;
01299 #endif
01300     uint8_t *d = dst;
01301     const uint16_t *s = (const uint16_t*)src;
01302     end = s + src_size/2;
01303 #ifdef HAVE_MMX
01304     asm volatile(PREFETCH"    %0"::"m"(*s):"memory");
01305     asm volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
01306     mm_end = end - 3;
01307     while (s < mm_end)
01308     {
01309         asm volatile(
01310         PREFETCH"    32%1           \n\t"
01311         "movq          %1, %%mm0    \n\t"
01312         "movq          %1, %%mm1    \n\t"
01313         "movq          %1, %%mm2    \n\t"
01314         "pand          %2, %%mm0    \n\t"
01315         "pand          %3, %%mm1    \n\t"
01316         "pand          %4, %%mm2    \n\t"
01317         "psllq         $3, %%mm0    \n\t"
01318         "psrlq         $3, %%mm1    \n\t"
01319         "psrlq         $8, %%mm2    \n\t"
01320         "movq       %%mm0, %%mm3    \n\t"
01321         "movq       %%mm1, %%mm4    \n\t"
01322         "movq       %%mm2, %%mm5    \n\t"
01323         "punpcklwd  %%mm7, %%mm0    \n\t"
01324         "punpcklwd  %%mm7, %%mm1    \n\t"
01325         "punpcklwd  %%mm7, %%mm2    \n\t"
01326         "punpckhwd  %%mm7, %%mm3    \n\t"
01327         "punpckhwd  %%mm7, %%mm4    \n\t"
01328         "punpckhwd  %%mm7, %%mm5    \n\t"
01329         "psllq         $8, %%mm1    \n\t"
01330         "psllq        $16, %%mm2    \n\t"
01331         "por        %%mm1, %%mm0    \n\t"
01332         "por        %%mm2, %%mm0    \n\t"
01333         "psllq         $8, %%mm4    \n\t"
01334         "psllq        $16, %%mm5    \n\t"
01335         "por        %%mm4, %%mm3    \n\t"
01336         "por        %%mm5, %%mm3    \n\t"
01337         MOVNTQ"     %%mm0, %0       \n\t"
01338         MOVNTQ"     %%mm3, 8%0      \n\t"
01339         :"=m"(*d)
01340         :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
01341         :"memory");
01342         d += 16;
01343         s += 4;
01344     }
01345     asm volatile(SFENCE:::"memory");
01346     asm volatile(EMMS:::"memory");
01347 #endif
01348     while (s < end)
01349     {
01350         register uint16_t bgr;
01351         bgr = *s++;
01352 #ifdef WORDS_BIGENDIAN
01353         *d++ = 0;
01354         *d++ = (bgr&0xF800)>>8;
01355         *d++ = (bgr&0x7E0)>>3;
01356         *d++ = (bgr&0x1F)<<3;
01357 #else
01358         *d++ = (bgr&0x1F)<<3;
01359         *d++ = (bgr&0x7E0)>>3;
01360         *d++ = (bgr&0xF800)>>8;
01361         *d++ = 0;
01362 #endif
01363     }
01364 }

static void RENAME() rgb24to15 ( const uint8_t *  src,
uint8_t *  dst,
long  src_size 
) [inline, static]

Definition at line 786 of file rgb2rgb_template.c.

References EMMS, g, MOVNTQ, PREFETCH, SFENCE, and src.

00787 {
00788     const uint8_t *s = src;
00789     const uint8_t *end;
00790 #ifdef HAVE_MMX
00791     const uint8_t *mm_end;
00792 #endif
00793     uint16_t *d = (uint16_t *)dst;
00794     end = s + src_size;
00795 #ifdef HAVE_MMX
00796     asm volatile(PREFETCH"    %0"::"m"(*src):"memory");
00797     asm volatile(
00798         "movq          %0, %%mm7    \n\t"
00799         "movq          %1, %%mm6    \n\t"
00800         ::"m"(red_15mask),"m"(green_15mask));
00801     mm_end = end - 11;
00802     while (s < mm_end)
00803     {
00804         asm volatile(
00805         PREFETCH"    32%1           \n\t"
00806         "movd          %1, %%mm0    \n\t"
00807         "movd         3%1, %%mm3    \n\t"
00808         "punpckldq    6%1, %%mm0    \n\t"
00809         "punpckldq    9%1, %%mm3    \n\t"
00810         "movq       %%mm0, %%mm1    \n\t"
00811         "movq       %%mm0, %%mm2    \n\t"
00812         "movq       %%mm3, %%mm4    \n\t"
00813         "movq       %%mm3, %%mm5    \n\t"
00814         "psrlq         $3, %%mm0    \n\t"
00815         "psrlq         $3, %%mm3    \n\t"
00816         "pand          %2, %%mm0    \n\t"
00817         "pand          %2, %%mm3    \n\t"
00818         "psrlq         $6, %%mm1    \n\t"
00819         "psrlq         $6, %%mm4    \n\t"
00820         "pand       %%mm6, %%mm1    \n\t"
00821         "pand       %%mm6, %%mm4    \n\t"