fdct_mmx.c File Reference

#include "libavutil/common.h"
#include "libavcodec/dsputil.h"
#include "mmx.h"

Go to the source code of this file.

Defines

#define ATTR_ALIGN(align)   __attribute__ ((__aligned__ (align)))
#define BITS_FRW_ACC   3
#define SHIFT_FRW_COL   BITS_FRW_ACC
#define SHIFT_FRW_ROW   (BITS_FRW_ACC + 17 - 3)
#define RND_FRW_ROW   (1 << (SHIFT_FRW_ROW-1))
#define X8(x)   x,x,x,x,x,x,x,x
#define TABLE_SSE2
#define C1   22725
#define C2   21407
#define C3   19266
#define C4   16384
#define C5   12873
#define C6   8867
#define C7   4520
#define C1   31521
#define C2   29692
#define C3   26722
#define C4   22725
#define C5   17855
#define C6   12299
#define C7   6270
#define C1   29692
#define C2   27969
#define C3   25172
#define C4   21407
#define C5   16819
#define C6   11585
#define C7   5906
#define C1   26722
#define C2   25172
#define C3   22654
#define C4   19266
#define C5   15137
#define C6   10426
#define C7   5315
#define C1   22725
#define C2   21407
#define C3   19266
#define C4   16384
#define C5   12873
#define C6   8867
#define C7   4520
#define C1   26722
#define C2   25172
#define C3   22654
#define C4   19266
#define C5   15137
#define C6   10426
#define C7   5315
#define C1   29692
#define C2   27969
#define C3   25172
#define C4   21407
#define C5   16819
#define C6   11585
#define C7   5906
#define C1   31521
#define C2   29692
#define C3   26722
#define C4   22725
#define C5   17855
#define C6   12299
#define C7   6270
#define FDCT_COL(cpu, mm, mov)
#define FDCT_ROW_SSE2_H1(i, t)
#define FDCT_ROW_SSE2_H2(i, t)
#define FDCT_ROW_SSE2(i)

Functions

static const int16_t fdct_tg_all_16[24] ATTR_ALIGN (16)
static const int32_t fdct_r_row[2] ATTR_ALIGN (8)
static av_always_inline void fdct_row_sse2 (const int16_t *in, int16_t *out)
static av_always_inline void fdct_row_mmx2 (const int16_t *in, int16_t *out, const int16_t *table)
static av_always_inline void fdct_row_mmx (const int16_t *in, int16_t *out, const int16_t *table)
void ff_fdct_mmx (int16_t *block)
void ff_fdct_mmx2 (int16_t *block)
void ff_fdct_sse2 (int16_t *block)

Variables

struct {
const int32_t fdct_r_row_sse2[4] ATTR_ALIGN (16)
ATTR_ALIGN
struct {
const int16_t tab_frw_01234567_sse2[256] ATTR_ALIGN (16)
ATTR_ALIGN


Define Documentation

#define ATTR_ALIGN ( align   )     __attribute__ ((__aligned__ (align)))

Definition at line 37 of file fdct_mmx.c.

Referenced by ff_fdct_mmx(), ff_fdct_mmx2(), ff_fdct_sse2(), and idct_col().

#define BITS_FRW_ACC   3

Definition at line 50 of file fdct_mmx.c.

#define C1   31521

#define C1   29692

#define C1   26722

#define C1   22725

#define C1   26722

#define C1   29692

#define C1   31521

#define C1   22725

Referenced by DECLARE_ASM_CONST(), idct4col_add(), idct4col_put(), and imdct36().

#define C2   29692

#define C2   27969

#define C2   25172

#define C2   21407

#define C2   25172

#define C2   27969

#define C2   29692

#define C2   21407

Referenced by DECLARE_ASM_CONST(), idct4col_add(), idct4col_put(), and imdct36().

#define C3   26722

#define C3   25172

#define C3   22654

#define C3   19266

#define C3   22654

#define C3   25172

#define C3   26722

#define C3   19266

Referenced by DECLARE_ASM_CONST(), idct4col_add(), imdct12(), and imdct36().

#define C4   22725

#define C4   21407

#define C4   19266

#define C4   16384

#define C4   19266

#define C4   21407

#define C4   22725

#define C4   16384

Referenced by DECLARE_ASM_CONST(), idct_col(), and imdct36().

#define C5   17855

#define C5   16819

#define C5   15137

#define C5   12873

#define C5   15137

#define C5   16819

#define C5   17855

#define C5   12873

Referenced by DECLARE_ASM_CONST(), and imdct36().

#define C6   12299

#define C6   11585

#define C6   10426

#define C6   8867

#define C6   10426

#define C6   11585

#define C6   12299

#define C6   8867

Referenced by DECLARE_ASM_CONST().

#define C7   6270

#define C7   5906

#define C7   5315

#define C7   4520

#define C7   5315

#define C7   5906

#define C7   6270

#define C7   4520

Referenced by DECLARE_ASM_CONST(), and imdct36().

#define FDCT_COL ( cpu,
mm,
mov   ) 

Definition at line 289 of file fdct_mmx.c.

#define FDCT_ROW_SSE2 (  ) 

Value:

"movq      %%xmm2, %%xmm1       \n\t" \
        "pshuflw   $27, %%xmm0, %%xmm0  \n\t" \
        "paddsw    %%xmm0, %%xmm1       \n\t" \
        "psubsw    %%xmm0, %%xmm2       \n\t" \
        "punpckldq %%xmm2, %%xmm1       \n\t" \
        "pshufd    $78, %%xmm1, %%xmm2  \n\t" \
        "pmaddwd   %%xmm2, %%xmm3       \n\t" \
        "pmaddwd   %%xmm1, %%xmm7       \n\t" \
        "pmaddwd   %%xmm5, %%xmm2       \n\t" \
        "pmaddwd   %%xmm4, %%xmm1       \n\t" \
        "paddd     %%xmm7, %%xmm3       \n\t" \
        "paddd     %%xmm2, %%xmm1       \n\t" \
        "paddd     %%xmm6, %%xmm3       \n\t" \
        "paddd     %%xmm6, %%xmm1       \n\t" \
        "psrad     %3, %%xmm3           \n\t" \
        "psrad     %3, %%xmm1           \n\t" \
        "packssdw  %%xmm3, %%xmm1       \n\t" \
        "movdqa    %%xmm1, " #i "(%4)   \n\t"

Referenced by fdct_row_sse2().

#define FDCT_ROW_SSE2_H1 ( i,
t   ) 

Value:

"movq      " #i "(%0), %%xmm2      \n\t" \
        "movq      " #i "+8(%0), %%xmm0    \n\t" \
        "movdqa    " #t "+32(%1), %%xmm3   \n\t" \
        "movdqa    " #t "+48(%1), %%xmm7   \n\t" \
        "movdqa    " #t "(%1), %%xmm4      \n\t" \
        "movdqa    " #t "+16(%1), %%xmm5   \n\t"

Referenced by fdct_row_sse2().

#define FDCT_ROW_SSE2_H2 ( i,
t   ) 

Value:

"movq      " #i "(%0), %%xmm2      \n\t" \
        "movq      " #i "+8(%0), %%xmm0    \n\t" \
        "movdqa    " #t "+32(%1), %%xmm3   \n\t" \
        "movdqa    " #t "+48(%1), %%xmm7   \n\t"

Referenced by fdct_row_sse2().

#define RND_FRW_ROW   (1 << (SHIFT_FRW_ROW-1))

Definition at line 53 of file fdct_mmx.c.

#define SHIFT_FRW_COL   BITS_FRW_ACC

Definition at line 51 of file fdct_mmx.c.

#define SHIFT_FRW_ROW   (BITS_FRW_ACC + 17 - 3)

Definition at line 52 of file fdct_mmx.c.

Referenced by fdct_row_mmx(), fdct_row_mmx2(), and fdct_row_sse2().

#define TABLE_SSE2

Value:

C4,  C4,  C1,  C3, -C6, -C2, -C1, -C5, \
                   C4,  C4,  C5,  C7,  C2,  C6,  C3, -C7, \
                  -C4,  C4,  C7,  C3,  C6, -C2,  C7, -C5, \
                   C4, -C4,  C5, -C1,  C2, -C6,  C3, -C1,

#define X8 (  )     x,x,x,x,x,x,x,x

Definition at line 56 of file fdct_mmx.c.


Function Documentation

static const int32_t fdct_r_row [2] ATTR_ALIGN (  )  [static]

static const int16_t fdct_tg_all_16 [24] ATTR_ALIGN ( 16   )  [static]

static av_always_inline void fdct_row_mmx ( const int16_t *  in,
int16_t *  out,
const int16_t *  table 
) [static]

Definition at line 477 of file fdct_mmx.c.

References mm0, mm1, mm2, mm3, movd_m2r, movq_m2r, movq_r2m, movq_r2r, packssdw_r2r, paddd_r2r, paddsw_r2r, pmaddwd_m2r, pmaddwd_r2r, psrad_i2r, psrlq_i2r, psubsw_r2r, punpckhdq_r2r, punpckldq_r2r, punpcklwd_m2r, punpcklwd_r2r, and SHIFT_FRW_ROW.

Referenced by ff_fdct_mmx().

00478 {
00479 //FIXME reorder (I do not have an old MMX-only CPU here to benchmark ...)
00480     movd_m2r(*(in + 6), mm1);
00481     punpcklwd_m2r(*(in + 4), mm1);
00482     movq_r2r(mm1, mm2);
00483     psrlq_i2r(0x20, mm1);
00484     movq_m2r(*(in + 0), mm0);
00485     punpcklwd_r2r(mm2, mm1);
00486     movq_r2r(mm0, mm5);
00487     paddsw_r2r(mm1, mm0);
00488     psubsw_r2r(mm1, mm5);
00489     movq_r2r(mm0, mm2);
00490     punpckldq_r2r(mm5, mm0);
00491     punpckhdq_r2r(mm5, mm2);
00492     movq_m2r(*(table + 0), mm1);
00493     movq_m2r(*(table + 4), mm3);
00494     movq_m2r(*(table + 8), mm4);
00495     movq_m2r(*(table + 12), mm5);
00496     movq_m2r(*(table + 16), mm6);
00497     movq_m2r(*(table + 20), mm7);
00498     pmaddwd_r2r(mm0, mm1);
00499     pmaddwd_r2r(mm2, mm3);
00500     pmaddwd_r2r(mm0, mm4);
00501     pmaddwd_r2r(mm2, mm5);
00502     pmaddwd_r2r(mm0, mm6);
00503     pmaddwd_r2r(mm2, mm7);
00504     pmaddwd_m2r(*(table + 24), mm0);
00505     pmaddwd_m2r(*(table + 28), mm2);
00506     paddd_r2r(mm1, mm3);
00507     paddd_r2r(mm4, mm5);
00508     paddd_r2r(mm6, mm7);
00509     paddd_r2r(mm0, mm2);
00510     movq_m2r(*fdct_r_row, mm0);
00511     paddd_r2r(mm0, mm3);
00512     paddd_r2r(mm0, mm5);
00513     paddd_r2r(mm0, mm7);
00514     paddd_r2r(mm0, mm2);
00515     psrad_i2r(SHIFT_FRW_ROW, mm3);
00516     psrad_i2r(SHIFT_FRW_ROW, mm5);
00517     psrad_i2r(SHIFT_FRW_ROW, mm7);
00518     psrad_i2r(SHIFT_FRW_ROW, mm2);
00519     packssdw_r2r(mm5, mm3);
00520     packssdw_r2r(mm2, mm7);
00521     movq_r2m(mm3, *(out + 0));
00522     movq_r2m(mm7, *(out + 4));
00523 }

static av_always_inline void fdct_row_mmx2 ( const int16_t *  in,
int16_t *  out,
const int16_t *  table 
) [static]

Definition at line 434 of file fdct_mmx.c.

References mm0, mm1, mm2, mm3, movq_m2r, movq_r2m, movq_r2r, packssdw_r2r, paddd_r2r, paddsw_r2r, pmaddwd_m2r, pmaddwd_r2r, pshufw_m2r, psrad_i2r, psubsw_r2r, punpckhdq_r2r, punpckldq_r2r, and SHIFT_FRW_ROW.

Referenced by ff_fdct_mmx2().

00435 {
00436     pshufw_m2r(*(in + 4), mm5, 0x1B);
00437     movq_m2r(*(in + 0), mm0);
00438     movq_r2r(mm0, mm1);
00439     paddsw_r2r(mm5, mm0);
00440     psubsw_r2r(mm5, mm1);
00441     movq_r2r(mm0, mm2);
00442     punpckldq_r2r(mm1, mm0);
00443     punpckhdq_r2r(mm1, mm2);
00444     movq_m2r(*(table + 0), mm1);
00445     movq_m2r(*(table + 4), mm3);
00446     movq_m2r(*(table + 8), mm4);
00447     movq_m2r(*(table + 12), mm5);
00448     movq_m2r(*(table + 16), mm6);
00449     movq_m2r(*(table + 20), mm7);
00450     pmaddwd_r2r(mm0, mm1);
00451     pmaddwd_r2r(mm2, mm3);
00452     pmaddwd_r2r(mm0, mm4);
00453     pmaddwd_r2r(mm2, mm5);
00454     pmaddwd_r2r(mm0, mm6);
00455     pmaddwd_r2r(mm2, mm7);
00456     pmaddwd_m2r(*(table + 24), mm0);
00457     pmaddwd_m2r(*(table + 28), mm2);
00458     paddd_r2r(mm1, mm3);
00459     paddd_r2r(mm4, mm5);
00460     paddd_r2r(mm6, mm7);
00461     paddd_r2r(mm0, mm2);
00462     movq_m2r(*fdct_r_row, mm0);
00463     paddd_r2r(mm0, mm3);
00464     paddd_r2r(mm0, mm5);
00465     paddd_r2r(mm0, mm7);
00466     paddd_r2r(mm0, mm2);
00467     psrad_i2r(SHIFT_FRW_ROW, mm3);
00468     psrad_i2r(SHIFT_FRW_ROW, mm5);
00469     psrad_i2r(SHIFT_FRW_ROW, mm7);
00470     psrad_i2r(SHIFT_FRW_ROW, mm2);
00471     packssdw_r2r(mm5, mm3);
00472     packssdw_r2r(mm2, mm7);
00473     movq_r2m(mm3, *(out + 0));
00474     movq_r2m(mm7, *(out + 4));
00475 }

static av_always_inline void fdct_row_sse2 ( const int16_t *  in,
int16_t *  out 
) [static]

Definition at line 372 of file fdct_mmx.c.

References FDCT_ROW_SSE2, FDCT_ROW_SSE2_H1, FDCT_ROW_SSE2_H2, and SHIFT_FRW_ROW.

Referenced by ff_fdct_sse2().

00373 {
00374     asm volatile(
00375 #define FDCT_ROW_SSE2_H1(i,t)                    \
00376         "movq      " #i "(%0), %%xmm2      \n\t" \
00377         "movq      " #i "+8(%0), %%xmm0    \n\t" \
00378         "movdqa    " #t "+32(%1), %%xmm3   \n\t" \
00379         "movdqa    " #t "+48(%1), %%xmm7   \n\t" \
00380         "movdqa    " #t "(%1), %%xmm4      \n\t" \
00381         "movdqa    " #t "+16(%1), %%xmm5   \n\t"
00382 
00383 #define FDCT_ROW_SSE2_H2(i,t)                    \
00384         "movq      " #i "(%0), %%xmm2      \n\t" \
00385         "movq      " #i "+8(%0), %%xmm0    \n\t" \
00386         "movdqa    " #t "+32(%1), %%xmm3   \n\t" \
00387         "movdqa    " #t "+48(%1), %%xmm7   \n\t"
00388 
00389 #define FDCT_ROW_SSE2(i)                      \
00390         "movq      %%xmm2, %%xmm1       \n\t" \
00391         "pshuflw   $27, %%xmm0, %%xmm0  \n\t" \
00392         "paddsw    %%xmm0, %%xmm1       \n\t" \
00393         "psubsw    %%xmm0, %%xmm2       \n\t" \
00394         "punpckldq %%xmm2, %%xmm1       \n\t" \
00395         "pshufd    $78, %%xmm1, %%xmm2  \n\t" \
00396         "pmaddwd   %%xmm2, %%xmm3       \n\t" \
00397         "pmaddwd   %%xmm1, %%xmm7       \n\t" \
00398         "pmaddwd   %%xmm5, %%xmm2       \n\t" \
00399         "pmaddwd   %%xmm4, %%xmm1       \n\t" \
00400         "paddd     %%xmm7, %%xmm3       \n\t" \
00401         "paddd     %%xmm2, %%xmm1       \n\t" \
00402         "paddd     %%xmm6, %%xmm3       \n\t" \
00403         "paddd     %%xmm6, %%xmm1       \n\t" \
00404         "psrad     %3, %%xmm3           \n\t" \
00405         "psrad     %3, %%xmm1           \n\t" \
00406         "packssdw  %%xmm3, %%xmm1       \n\t" \
00407         "movdqa    %%xmm1, " #i "(%4)   \n\t"
00408 
00409         "movdqa    (%2), %%xmm6         \n\t"
00410         FDCT_ROW_SSE2_H1(0,0)
00411         FDCT_ROW_SSE2(0)
00412         FDCT_ROW_SSE2_H2(64,0)
00413         FDCT_ROW_SSE2(64)
00414 
00415         FDCT_ROW_SSE2_H1(16,64)
00416         FDCT_ROW_SSE2(16)
00417         FDCT_ROW_SSE2_H2(112,64)
00418         FDCT_ROW_SSE2(112)
00419 
00420         FDCT_ROW_SSE2_H1(32,128)
00421         FDCT_ROW_SSE2(32)
00422         FDCT_ROW_SSE2_H2(96,128)
00423         FDCT_ROW_SSE2(96)
00424 
00425         FDCT_ROW_SSE2_H1(48,192)
00426         FDCT_ROW_SSE2(48)
00427         FDCT_ROW_SSE2_H2(80,192)
00428         FDCT_ROW_SSE2(80)
00429         :
00430         : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
00431     );
00432 }

void ff_fdct_mmx ( int16_t *  block  ) 

Definition at line 525 of file fdct_mmx.c.

References ATTR_ALIGN, and fdct_row_mmx().

00526 {
00527     int64_t align_tmp[16] ATTR_ALIGN(8);
00528     int16_t * block1= (int16_t*)align_tmp;
00529     const int16_t *table= tab_frw_01234567;
00530     int i;
00531 
00532     fdct_col_mmx(block, block1, 0);
00533     fdct_col_mmx(block, block1, 4);
00534 
00535     for(i=8;i>0;i--) {
00536         fdct_row_mmx(block1, block, table);
00537         block1 += 8;
00538         table += 32;
00539         block += 8;
00540     }
00541 }

void ff_fdct_mmx2 ( int16_t *  block  ) 

Definition at line 543 of file fdct_mmx.c.

References ATTR_ALIGN, and fdct_row_mmx2().

00544 {
00545     int64_t align_tmp[16] ATTR_ALIGN(8);
00546     int16_t *block1= (int16_t*)align_tmp;
00547     const int16_t *table= tab_frw_01234567;
00548     int i;
00549 
00550     fdct_col_mmx(block, block1, 0);
00551     fdct_col_mmx(block, block1, 4);
00552 
00553     for(i=8;i>0;i--) {
00554         fdct_row_mmx2(block1, block, table);
00555         block1 += 8;
00556         table += 32;
00557         block += 8;
00558     }
00559 }

void ff_fdct_sse2 ( int16_t *  block  ) 

Definition at line 561 of file fdct_mmx.c.

References ATTR_ALIGN, and fdct_row_sse2().

00562 {
00563     int64_t align_tmp[16] ATTR_ALIGN(16);
00564     int16_t * const block1= (int16_t*)align_tmp;
00565 
00566     fdct_col_sse2(block, block1, 0);
00567     fdct_row_sse2(block1, block);
00568 }


Variable Documentation

struct { ... } ATTR_ALIGN [static]

struct { ... } ATTR_ALIGN [static]


Generated on Fri Jan 9 15:44:58 2009 for libextractor by  doxygen 1.5.1