dct-test.c

Go to the documentation of this file.
00001 /*
00002  * (c) 2001 Fabrice Bellard
00003  *     2007 Marc Hoffman <marc.hoffman@analog.com>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00022 /**
00023  * @file dct-test.c
00024  * DCT test. (c) 2001 Fabrice Bellard.
00025  * Started from sample code by Juan J. Sierralta P.
00026  */
00027 
00028 #include <stdlib.h>
00029 #include <stdio.h>
00030 #include <string.h>
00031 #include <sys/time.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034 
00035 #include "libavutil/common.h"
00036 
00037 #include "simple_idct.h"
00038 #include "faandct.h"
00039 #include "faanidct.h"
00040 #include "i386/idct_xvid.h"
00041 
00042 #undef printf
00043 #undef random
00044 
00045 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
00046 
00047 /* reference fdct/idct */
00048 extern void fdct(DCTELEM *block);
00049 extern void idct(DCTELEM *block);
00050 extern void init_fdct();
00051 
00052 extern void ff_mmx_idct(DCTELEM *data);
00053 extern void ff_mmxext_idct(DCTELEM *data);
00054 
00055 extern void odivx_idct_c (short *block);
00056 
00057 // BFIN
00058 extern void ff_bfin_idct (DCTELEM *block)  ;
00059 extern void ff_bfin_fdct (DCTELEM *block) ;
00060 
00061 // ALTIVEC
00062 extern void fdct_altivec (DCTELEM *block);
00063 //extern void idct_altivec (DCTELEM *block);?? no routine
00064 
00065 
00066 struct algo {
00067   const char *name;
00068   enum { FDCT, IDCT } is_idct;
00069   void (* func) (DCTELEM *block);
00070   void (* ref)  (DCTELEM *block);
00071   enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM } format;
00072   int  mm_support;
00073 };
00074 
00075 #ifndef FAAN_POSTSCALE
00076 #define FAAN_SCALE SCALE_PERM
00077 #else
00078 #define FAAN_SCALE NO_PERM
00079 #endif
00080 
00081 static int cpu_flags;
00082 
00083 struct algo algos[] = {
00084   {"REF-DBL",         0, fdct,               fdct, NO_PERM},
00085   {"FAAN",            0, ff_faandct,         fdct, FAAN_SCALE},
00086   {"FAANI",           1, ff_faanidct,        idct, NO_PERM},
00087   {"IJG-AAN-INT",     0, fdct_ifast,         fdct, SCALE_PERM},
00088   {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, fdct, NO_PERM},
00089   {"REF-DBL",         1, idct,               idct, NO_PERM},
00090   {"INT",             1, j_rev_dct,          idct, MMX_PERM},
00091   {"SIMPLE-C",        1, ff_simple_idct,     idct, NO_PERM},
00092 
00093 #ifdef HAVE_MMX
00094   {"MMX",             0, ff_fdct_mmx,        fdct, NO_PERM, MM_MMX},
00095 #ifdef HAVE_MMX2
00096   {"MMX2",            0, ff_fdct_mmx2,       fdct, NO_PERM, MM_MMXEXT},
00097 #endif
00098 
00099 #ifdef CONFIG_GPL
00100   {"LIBMPEG2-MMX",    1, ff_mmx_idct,        idct, MMX_PERM, MM_MMX},
00101   {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     idct, MMX_PERM, MM_MMXEXT},
00102 #endif
00103   {"SIMPLE-MMX",      1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, MM_MMX},
00104   {"XVID-MMX",        1, ff_idct_xvid_mmx,   idct, NO_PERM, MM_MMX},
00105   {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  idct, NO_PERM, MM_MMXEXT},
00106   {"XVID-SSE2",       1, ff_idct_xvid_sse2,  idct, SSE2_PERM, MM_SSE2},
00107 #endif
00108 
00109 #ifdef HAVE_ALTIVEC
00110   {"altivecfdct",     0, fdct_altivec,       fdct, NO_PERM, MM_ALTIVEC},
00111 #endif
00112 
00113 #ifdef ARCH_BFIN
00114   {"BFINfdct",        0, ff_bfin_fdct,       fdct, NO_PERM},
00115   {"BFINidct",        1, ff_bfin_idct,       idct, NO_PERM},
00116 #endif
00117 
00118   { 0 }
00119 };
00120 
00121 #define AANSCALE_BITS 12
00122 static const unsigned short aanscales[64] = {
00123     /* precomputed values scaled up by 14 bits */
00124     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
00125     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
00126     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
00127     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
00128     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
00129     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
00130     8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
00131     4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
00132 };
00133 
00134 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
00135 
00136 int64_t gettime(void)
00137 {
00138     struct timeval tv;
00139     gettimeofday(&tv,NULL);
00140     return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
00141 }
00142 
00143 #define NB_ITS 20000
00144 #define NB_ITS_SPEED 50000
00145 
00146 static short idct_mmx_perm[64];
00147 
00148 static short idct_simple_mmx_perm[64]={
00149         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00150         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00151         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00152         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00153         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00154         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00155         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00156         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00157 };
00158 
00159 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00160 
00161 void idct_mmx_init(void)
00162 {
00163     int i;
00164 
00165     /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
00166     for (i = 0; i < 64; i++) {
00167         idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00168 //        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
00169     }
00170 }
00171 
00172 static DCTELEM block[64] __attribute__ ((aligned (16)));
00173 static DCTELEM block1[64] __attribute__ ((aligned (8)));
00174 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
00175 
00176 static inline void mmx_emms(void)
00177 {
00178 #ifdef HAVE_MMX
00179     if (cpu_flags & MM_MMX)
00180         asm volatile ("emms\n\t");
00181 #endif
00182 }
00183 
00184 void dct_error(const char *name, int is_idct,
00185                void (*fdct_func)(DCTELEM *block),
00186                void (*fdct_ref)(DCTELEM *block), int form, int test)
00187 {
00188     int it, i, scale;
00189     int err_inf, v;
00190     int64_t err2, ti, ti1, it1;
00191     int64_t sysErr[64], sysErrMax=0;
00192     int maxout=0;
00193     int blockSumErrMax=0, blockSumErr;
00194 
00195     srandom(0);
00196 
00197     err_inf = 0;
00198     err2 = 0;
00199     for(i=0; i<64; i++) sysErr[i]=0;
00200     for(it=0;it<NB_ITS;it++) {
00201         for(i=0;i<64;i++)
00202             block1[i] = 0;
00203         switch(test){
00204         case 0:
00205             for(i=0;i<64;i++)
00206                 block1[i] = (random() % 512) -256;
00207             if (is_idct){
00208                 fdct(block1);
00209 
00210                 for(i=0;i<64;i++)
00211                     block1[i]>>=3;
00212             }
00213         break;
00214         case 1:{
00215             int num= (random()%10)+1;
00216             for(i=0;i<num;i++)
00217                 block1[random()%64] = (random() % 512) -256;
00218         }break;
00219         case 2:
00220             block1[0]= (random()%4096)-2048;
00221             block1[63]= (block1[0]&1)^1;
00222         break;
00223         }
00224 
00225 #if 0 // simulate mismatch control
00226 { int sum=0;
00227         for(i=0;i<64;i++)
00228            sum+=block1[i];
00229 
00230         if((sum&1)==0) block1[63]^=1;
00231 }
00232 #endif
00233 
00234         for(i=0; i<64; i++)
00235             block_org[i]= block1[i];
00236 
00237         if (form == MMX_PERM) {
00238             for(i=0;i<64;i++)
00239                 block[idct_mmx_perm[i]] = block1[i];
00240             } else if (form == MMX_SIMPLE_PERM) {
00241             for(i=0;i<64;i++)
00242                 block[idct_simple_mmx_perm[i]] = block1[i];
00243 
00244         } else if (form == SSE2_PERM) {
00245             for(i=0; i<64; i++)
00246                 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
00247         } else {
00248             for(i=0; i<64; i++)
00249                 block[i]= block1[i];
00250         }
00251 #if 0 // simulate mismatch control for tested IDCT but not the ref
00252 { int sum=0;
00253         for(i=0;i<64;i++)
00254            sum+=block[i];
00255 
00256         if((sum&1)==0) block[63]^=1;
00257 }
00258 #endif
00259 
00260         fdct_func(block);
00261         mmx_emms();
00262 
00263         if (form == SCALE_PERM) {
00264             for(i=0; i<64; i++) {
00265                 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
00266                 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
00267             }
00268         }
00269 
00270         fdct_ref(block1);
00271 
00272         blockSumErr=0;
00273         for(i=0;i<64;i++) {
00274             v = abs(block[i] - block1[i]);
00275             if (v > err_inf)
00276                 err_inf = v;
00277             err2 += v * v;
00278             sysErr[i] += block[i] - block1[i];
00279             blockSumErr += v;
00280             if( abs(block[i])>maxout) maxout=abs(block[i]);
00281         }
00282         if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
00283 #if 0 // print different matrix pairs
00284         if(blockSumErr){
00285             printf("\n");
00286             for(i=0; i<64; i++){
00287                 if((i&7)==0) printf("\n");
00288                 printf("%4d ", block_org[i]);
00289             }
00290             for(i=0; i<64; i++){
00291                 if((i&7)==0) printf("\n");
00292                 printf("%4d ", block[i] - block1[i]);
00293             }
00294         }
00295 #endif
00296     }
00297     for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
00298 
00299 #if 1 // dump systematic errors
00300     for(i=0; i<64; i++){
00301         if(i%8==0) printf("\n");
00302         printf("%5d ", (int)sysErr[i]);
00303     }
00304     printf("\n");
00305 #endif
00306 
00307     printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
00308            is_idct ? "IDCT" : "DCT",
00309            name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
00310 #if 1 //Speed test
00311     /* speed test */
00312     for(i=0;i<64;i++)
00313         block1[i] = 0;
00314     switch(test){
00315     case 0:
00316         for(i=0;i<64;i++)
00317             block1[i] = (random() % 512) -256;
00318         if (is_idct){
00319             fdct(block1);
00320 
00321             for(i=0;i<64;i++)
00322                 block1[i]>>=3;
00323         }
00324     break;
00325     case 1:{
00326     case 2:
00327         block1[0] = (random() % 512) -256;
00328         block1[1] = (random() % 512) -256;
00329         block1[2] = (random() % 512) -256;
00330         block1[3] = (random() % 512) -256;
00331     }break;
00332     }
00333 
00334     if (form == MMX_PERM) {
00335         for(i=0;i<64;i++)
00336             block[idct_mmx_perm[i]] = block1[i];
00337     } else if(form == MMX_SIMPLE_PERM) {
00338         for(i=0;i<64;i++)
00339             block[idct_simple_mmx_perm[i]] = block1[i];
00340     } else {
00341         for(i=0; i<64; i++)
00342             block[i]= block1[i];
00343     }
00344 
00345     ti = gettime();
00346     it1 = 0;
00347     do {
00348         for(it=0;it<NB_ITS_SPEED;it++) {
00349             for(i=0; i<64; i++)
00350                 block[i]= block1[i];
00351 //            memcpy(block, block1, sizeof(DCTELEM) * 64);
00352 // do not memcpy especially not fastmemcpy because it does movntq !!!
00353             fdct_func(block);
00354         }
00355         it1 += NB_ITS_SPEED;
00356         ti1 = gettime() - ti;
00357     } while (ti1 < 1000000);
00358     mmx_emms();
00359 
00360     printf("%s %s: %0.1f kdct/s\n",
00361            is_idct ? "IDCT" : "DCT",
00362            name, (double)it1 * 1000.0 / (double)ti1);
00363 #endif
00364 }
00365 
00366 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
00367 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
00368 
00369 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
00370 {
00371     static int init;
00372     static double c8[8][8];
00373     static double c4[4][4];
00374     double block1[64], block2[64], block3[64];
00375     double s, sum, v;
00376     int i, j, k;
00377 
00378     if (!init) {
00379         init = 1;
00380 
00381         for(i=0;i<8;i++) {
00382             sum = 0;
00383             for(j=0;j<8;j++) {
00384                 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
00385                 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
00386                 sum += c8[i][j] * c8[i][j];
00387             }
00388         }
00389 
00390         for(i=0;i<4;i++) {
00391             sum = 0;
00392             for(j=0;j<4;j++) {
00393                 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
00394                 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
00395                 sum += c4[i][j] * c4[i][j];
00396             }
00397         }
00398     }
00399 
00400     /* butterfly */
00401     s = 0.5 * sqrt(2.0);
00402     for(i=0;i<4;i++) {
00403         for(j=0;j<8;j++) {
00404             block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
00405             block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
00406         }
00407     }
00408 
00409     /* idct8 on lines */
00410     for(i=0;i<8;i++) {
00411         for(j=0;j<8;j++) {
00412             sum = 0;
00413             for(k=0;k<8;k++)
00414                 sum += c8[k][j] * block1[8*i+k];
00415             block2[8*i+j] = sum;
00416         }
00417     }
00418 
00419     /* idct4 */
00420     for(i=0;i<8;i++) {
00421         for(j=0;j<4;j++) {
00422             /* top */
00423             sum = 0;
00424             for(k=0;k<4;k++)
00425                 sum += c4[k][j] * block2[8*(2*k)+i];
00426             block3[8*(2*j)+i] = sum;
00427 
00428             /* bottom */
00429             sum = 0;
00430             for(k=0;k<4;k++)
00431                 sum += c4[k][j] * block2[8*(2*k+1)+i];
00432             block3[8*(2*j+1)+i] = sum;
00433         }
00434     }
00435 
00436     /* clamp and store the result */
00437     for(i=0;i<8;i++) {
00438         for(j=0;j<8;j++) {
00439             v = block3[8*i+j];
00440             if (v < 0)
00441                 v = 0;
00442             else if (v > 255)
00443                 v = 255;
00444             dest[i * linesize + j] = (int)rint(v);
00445         }
00446     }
00447 }
00448 
00449 void idct248_error(const char *name,
00450                     void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
00451 {
00452     int it, i, it1, ti, ti1, err_max, v;
00453 
00454     srandom(0);
00455 
00456     /* just one test to see if code is correct (precision is less
00457        important here) */
00458     err_max = 0;
00459     for(it=0;it<NB_ITS;it++) {
00460 
00461         /* XXX: use forward transform to generate values */
00462         for(i=0;i<64;i++)
00463             block1[i] = (random() % 256) - 128;
00464         block1[0] += 1024;
00465 
00466         for(i=0; i<64; i++)
00467             block[i]= block1[i];
00468         idct248_ref(img_dest1, 8, block);
00469 
00470         for(i=0; i<64; i++)
00471             block[i]= block1[i];
00472         idct248_put(img_dest, 8, block);
00473 
00474         for(i=0;i<64;i++) {
00475             v = abs((int)img_dest[i] - (int)img_dest1[i]);
00476             if (v == 255)
00477                 printf("%d %d\n", img_dest[i], img_dest1[i]);
00478             if (v > err_max)
00479                 err_max = v;
00480         }
00481 #if 0
00482         printf("ref=\n");
00483         for(i=0;i<8;i++) {
00484             int j;
00485             for(j=0;j<8;j++) {
00486                 printf(" %3d", img_dest1[i*8+j]);
00487             }
00488             printf("\n");
00489         }
00490 
00491         printf("out=\n");
00492         for(i=0;i<8;i++) {
00493             int j;
00494             for(j=0;j<8;j++) {
00495                 printf(" %3d", img_dest[i*8+j]);
00496             }
00497             printf("\n");
00498         }
00499 #endif
00500     }
00501     printf("%s %s: err_inf=%d\n",
00502            1 ? "IDCT248" : "DCT248",
00503            name, err_max);
00504 
00505     ti = gettime();
00506     it1 = 0;
00507     do {
00508         for(it=0;it<NB_ITS_SPEED;it++) {
00509             for(i=0; i<64; i++)
00510                 block[i]= block1[i];
00511 //            memcpy(block, block1, sizeof(DCTELEM) * 64);
00512 // do not memcpy especially not fastmemcpy because it does movntq !!!
00513             idct248_put(img_dest, 8, block);
00514         }
00515         it1 += NB_ITS_SPEED;
00516         ti1 = gettime() - ti;
00517     } while (ti1 < 1000000);
00518     mmx_emms();
00519 
00520     printf("%s %s: %0.1f kdct/s\n",
00521            1 ? "IDCT248" : "DCT248",
00522            name, (double)it1 * 1000.0 / (double)ti1);
00523 }
00524 
00525 void help(void)
00526 {
00527     printf("dct-test [-i] [<test-number>]\n"
00528            "test-number 0 -> test with random matrixes\n"
00529            "            1 -> test with random sparse matrixes\n"
00530            "            2 -> do 3. test from mpeg4 std\n"
00531            "-i          test IDCT implementations\n"
00532            "-4          test IDCT248 implementations\n");
00533 }
00534 
00535 int main(int argc, char **argv)
00536 {
00537     int test_idct = 0, test_248_dct = 0;
00538     int c,i;
00539     int test=1;
00540     cpu_flags = mm_support();
00541 
00542     init_fdct();
00543     idct_mmx_init();
00544 
00545     for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
00546     for(i=0;i<MAX_NEG_CROP;i++) {
00547         cropTbl[i] = 0;
00548         cropTbl[i + MAX_NEG_CROP + 256] = 255;
00549     }
00550 
00551     for(;;) {
00552         c = getopt(argc, argv, "ih4");
00553         if (c == -1)
00554             break;
00555         switch(c) {
00556         case 'i':
00557             test_idct = 1;
00558             break;
00559         case '4':
00560             test_248_dct = 1;
00561             break;
00562         default :
00563         case 'h':
00564             help();
00565             return 0;
00566         }
00567     }
00568 
00569     if(optind <argc) test= atoi(argv[optind]);
00570 
00571     printf("ffmpeg DCT/IDCT test\n");
00572 
00573     if (test_248_dct) {
00574         idct248_error("SIMPLE-C", ff_simple_idct248_put);
00575     } else {
00576       for (i=0;algos[i].name;i++)
00577         if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
00578           dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
00579         }
00580     }
00581     return 0;
00582 }

Generated on Fri Jan 9 15:44:26 2009 for libextractor by  doxygen 1.5.1