00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include <stdlib.h>
00029 #include <stdio.h>
00030 #include <string.h>
00031 #include <sys/time.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034
00035 #include "libavutil/common.h"
00036
00037 #include "simple_idct.h"
00038 #include "faandct.h"
00039 #include "faanidct.h"
00040 #include "i386/idct_xvid.h"
00041
00042 #undef printf
00043 #undef random
00044
00045 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
00046
00047
00048 extern void fdct(DCTELEM *block);
00049 extern void idct(DCTELEM *block);
00050 extern void init_fdct();
00051
00052 extern void ff_mmx_idct(DCTELEM *data);
00053 extern void ff_mmxext_idct(DCTELEM *data);
00054
00055 extern void odivx_idct_c (short *block);
00056
00057
00058 extern void ff_bfin_idct (DCTELEM *block) ;
00059 extern void ff_bfin_fdct (DCTELEM *block) ;
00060
00061
00062 extern void fdct_altivec (DCTELEM *block);
00063
00064
00065
00066 struct algo {
00067 const char *name;
00068 enum { FDCT, IDCT } is_idct;
00069 void (* func) (DCTELEM *block);
00070 void (* ref) (DCTELEM *block);
00071 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM } format;
00072 int mm_support;
00073 };
00074
00075 #ifndef FAAN_POSTSCALE
00076 #define FAAN_SCALE SCALE_PERM
00077 #else
00078 #define FAAN_SCALE NO_PERM
00079 #endif
00080
00081 static int cpu_flags;
00082
00083 struct algo algos[] = {
00084 {"REF-DBL", 0, fdct, fdct, NO_PERM},
00085 {"FAAN", 0, ff_faandct, fdct, FAAN_SCALE},
00086 {"FAANI", 1, ff_faanidct, idct, NO_PERM},
00087 {"IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM},
00088 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM},
00089 {"REF-DBL", 1, idct, idct, NO_PERM},
00090 {"INT", 1, j_rev_dct, idct, MMX_PERM},
00091 {"SIMPLE-C", 1, ff_simple_idct, idct, NO_PERM},
00092
00093 #ifdef HAVE_MMX
00094 {"MMX", 0, ff_fdct_mmx, fdct, NO_PERM, MM_MMX},
00095 #ifdef HAVE_MMX2
00096 {"MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM, MM_MMXEXT},
00097 #endif
00098
00099 #ifdef CONFIG_GPL
00100 {"LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM, MM_MMX},
00101 {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM, MM_MMXEXT},
00102 #endif
00103 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, MM_MMX},
00104 {"XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM, MM_MMX},
00105 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM, MM_MMXEXT},
00106 {"XVID-SSE2", 1, ff_idct_xvid_sse2, idct, SSE2_PERM, MM_SSE2},
00107 #endif
00108
00109 #ifdef HAVE_ALTIVEC
00110 {"altivecfdct", 0, fdct_altivec, fdct, NO_PERM, MM_ALTIVEC},
00111 #endif
00112
00113 #ifdef ARCH_BFIN
00114 {"BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM},
00115 {"BFINidct", 1, ff_bfin_idct, idct, NO_PERM},
00116 #endif
00117
00118 { 0 }
00119 };
00120
00121 #define AANSCALE_BITS 12
00122 static const unsigned short aanscales[64] = {
00123
00124 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
00125 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
00126 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
00127 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
00128 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
00129 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
00130 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
00131 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
00132 };
00133
00134 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
00135
00136 int64_t gettime(void)
00137 {
00138 struct timeval tv;
00139 gettimeofday(&tv,NULL);
00140 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
00141 }
00142
00143 #define NB_ITS 20000
00144 #define NB_ITS_SPEED 50000
00145
00146 static short idct_mmx_perm[64];
00147
00148 static short idct_simple_mmx_perm[64]={
00149 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00150 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00151 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00152 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00153 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00154 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00155 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00156 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00157 };
00158
00159 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00160
00161 void idct_mmx_init(void)
00162 {
00163 int i;
00164
00165
00166 for (i = 0; i < 64; i++) {
00167 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00168
00169 }
00170 }
00171
00172 static DCTELEM block[64] __attribute__ ((aligned (16)));
00173 static DCTELEM block1[64] __attribute__ ((aligned (8)));
00174 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
00175
00176 static inline void mmx_emms(void)
00177 {
00178 #ifdef HAVE_MMX
00179 if (cpu_flags & MM_MMX)
00180 asm volatile ("emms\n\t");
00181 #endif
00182 }
00183
00184 void dct_error(const char *name, int is_idct,
00185 void (*fdct_func)(DCTELEM *block),
00186 void (*fdct_ref)(DCTELEM *block), int form, int test)
00187 {
00188 int it, i, scale;
00189 int err_inf, v;
00190 int64_t err2, ti, ti1, it1;
00191 int64_t sysErr[64], sysErrMax=0;
00192 int maxout=0;
00193 int blockSumErrMax=0, blockSumErr;
00194
00195 srandom(0);
00196
00197 err_inf = 0;
00198 err2 = 0;
00199 for(i=0; i<64; i++) sysErr[i]=0;
00200 for(it=0;it<NB_ITS;it++) {
00201 for(i=0;i<64;i++)
00202 block1[i] = 0;
00203 switch(test){
00204 case 0:
00205 for(i=0;i<64;i++)
00206 block1[i] = (random() % 512) -256;
00207 if (is_idct){
00208 fdct(block1);
00209
00210 for(i=0;i<64;i++)
00211 block1[i]>>=3;
00212 }
00213 break;
00214 case 1:{
00215 int num= (random()%10)+1;
00216 for(i=0;i<num;i++)
00217 block1[random()%64] = (random() % 512) -256;
00218 }break;
00219 case 2:
00220 block1[0]= (random()%4096)-2048;
00221 block1[63]= (block1[0]&1)^1;
00222 break;
00223 }
00224
00225 #if 0 // simulate mismatch control
00226 { int sum=0;
00227 for(i=0;i<64;i++)
00228 sum+=block1[i];
00229
00230 if((sum&1)==0) block1[63]^=1;
00231 }
00232 #endif
00233
00234 for(i=0; i<64; i++)
00235 block_org[i]= block1[i];
00236
00237 if (form == MMX_PERM) {
00238 for(i=0;i<64;i++)
00239 block[idct_mmx_perm[i]] = block1[i];
00240 } else if (form == MMX_SIMPLE_PERM) {
00241 for(i=0;i<64;i++)
00242 block[idct_simple_mmx_perm[i]] = block1[i];
00243
00244 } else if (form == SSE2_PERM) {
00245 for(i=0; i<64; i++)
00246 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
00247 } else {
00248 for(i=0; i<64; i++)
00249 block[i]= block1[i];
00250 }
00251 #if 0 // simulate mismatch control for tested IDCT but not the ref
00252 { int sum=0;
00253 for(i=0;i<64;i++)
00254 sum+=block[i];
00255
00256 if((sum&1)==0) block[63]^=1;
00257 }
00258 #endif
00259
00260 fdct_func(block);
00261 mmx_emms();
00262
00263 if (form == SCALE_PERM) {
00264 for(i=0; i<64; i++) {
00265 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
00266 block[i] = (block[i] * scale ) >> AANSCALE_BITS;
00267 }
00268 }
00269
00270 fdct_ref(block1);
00271
00272 blockSumErr=0;
00273 for(i=0;i<64;i++) {
00274 v = abs(block[i] - block1[i]);
00275 if (v > err_inf)
00276 err_inf = v;
00277 err2 += v * v;
00278 sysErr[i] += block[i] - block1[i];
00279 blockSumErr += v;
00280 if( abs(block[i])>maxout) maxout=abs(block[i]);
00281 }
00282 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
00283 #if 0 // print different matrix pairs
00284 if(blockSumErr){
00285 printf("\n");
00286 for(i=0; i<64; i++){
00287 if((i&7)==0) printf("\n");
00288 printf("%4d ", block_org[i]);
00289 }
00290 for(i=0; i<64; i++){
00291 if((i&7)==0) printf("\n");
00292 printf("%4d ", block[i] - block1[i]);
00293 }
00294 }
00295 #endif
00296 }
00297 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
00298
00299 #if 1 // dump systematic errors
00300 for(i=0; i<64; i++){
00301 if(i%8==0) printf("\n");
00302 printf("%5d ", (int)sysErr[i]);
00303 }
00304 printf("\n");
00305 #endif
00306
00307 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
00308 is_idct ? "IDCT" : "DCT",
00309 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
00310 #if 1 //Speed test
00311
00312 for(i=0;i<64;i++)
00313 block1[i] = 0;
00314 switch(test){
00315 case 0:
00316 for(i=0;i<64;i++)
00317 block1[i] = (random() % 512) -256;
00318 if (is_idct){
00319 fdct(block1);
00320
00321 for(i=0;i<64;i++)
00322 block1[i]>>=3;
00323 }
00324 break;
00325 case 1:{
00326 case 2:
00327 block1[0] = (random() % 512) -256;
00328 block1[1] = (random() % 512) -256;
00329 block1[2] = (random() % 512) -256;
00330 block1[3] = (random() % 512) -256;
00331 }break;
00332 }
00333
00334 if (form == MMX_PERM) {
00335 for(i=0;i<64;i++)
00336 block[idct_mmx_perm[i]] = block1[i];
00337 } else if(form == MMX_SIMPLE_PERM) {
00338 for(i=0;i<64;i++)
00339 block[idct_simple_mmx_perm[i]] = block1[i];
00340 } else {
00341 for(i=0; i<64; i++)
00342 block[i]= block1[i];
00343 }
00344
00345 ti = gettime();
00346 it1 = 0;
00347 do {
00348 for(it=0;it<NB_ITS_SPEED;it++) {
00349 for(i=0; i<64; i++)
00350 block[i]= block1[i];
00351
00352
00353 fdct_func(block);
00354 }
00355 it1 += NB_ITS_SPEED;
00356 ti1 = gettime() - ti;
00357 } while (ti1 < 1000000);
00358 mmx_emms();
00359
00360 printf("%s %s: %0.1f kdct/s\n",
00361 is_idct ? "IDCT" : "DCT",
00362 name, (double)it1 * 1000.0 / (double)ti1);
00363 #endif
00364 }
00365
00366 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
00367 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
00368
00369 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
00370 {
00371 static int init;
00372 static double c8[8][8];
00373 static double c4[4][4];
00374 double block1[64], block2[64], block3[64];
00375 double s, sum, v;
00376 int i, j, k;
00377
00378 if (!init) {
00379 init = 1;
00380
00381 for(i=0;i<8;i++) {
00382 sum = 0;
00383 for(j=0;j<8;j++) {
00384 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
00385 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
00386 sum += c8[i][j] * c8[i][j];
00387 }
00388 }
00389
00390 for(i=0;i<4;i++) {
00391 sum = 0;
00392 for(j=0;j<4;j++) {
00393 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
00394 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
00395 sum += c4[i][j] * c4[i][j];
00396 }
00397 }
00398 }
00399
00400
00401 s = 0.5 * sqrt(2.0);
00402 for(i=0;i<4;i++) {
00403 for(j=0;j<8;j++) {
00404 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
00405 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
00406 }
00407 }
00408
00409
00410 for(i=0;i<8;i++) {
00411 for(j=0;j<8;j++) {
00412 sum = 0;
00413 for(k=0;k<8;k++)
00414 sum += c8[k][j] * block1[8*i+k];
00415 block2[8*i+j] = sum;
00416 }
00417 }
00418
00419
00420 for(i=0;i<8;i++) {
00421 for(j=0;j<4;j++) {
00422
00423 sum = 0;
00424 for(k=0;k<4;k++)
00425 sum += c4[k][j] * block2[8*(2*k)+i];
00426 block3[8*(2*j)+i] = sum;
00427
00428
00429 sum = 0;
00430 for(k=0;k<4;k++)
00431 sum += c4[k][j] * block2[8*(2*k+1)+i];
00432 block3[8*(2*j+1)+i] = sum;
00433 }
00434 }
00435
00436
00437 for(i=0;i<8;i++) {
00438 for(j=0;j<8;j++) {
00439 v = block3[8*i+j];
00440 if (v < 0)
00441 v = 0;
00442 else if (v > 255)
00443 v = 255;
00444 dest[i * linesize + j] = (int)rint(v);
00445 }
00446 }
00447 }
00448
00449 void idct248_error(const char *name,
00450 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
00451 {
00452 int it, i, it1, ti, ti1, err_max, v;
00453
00454 srandom(0);
00455
00456
00457
00458 err_max = 0;
00459 for(it=0;it<NB_ITS;it++) {
00460
00461
00462 for(i=0;i<64;i++)
00463 block1[i] = (random() % 256) - 128;
00464 block1[0] += 1024;
00465
00466 for(i=0; i<64; i++)
00467 block[i]= block1[i];
00468 idct248_ref(img_dest1, 8, block);
00469
00470 for(i=0; i<64; i++)
00471 block[i]= block1[i];
00472 idct248_put(img_dest, 8, block);
00473
00474 for(i=0;i<64;i++) {
00475 v = abs((int)img_dest[i] - (int)img_dest1[i]);
00476 if (v == 255)
00477 printf("%d %d\n", img_dest[i], img_dest1[i]);
00478 if (v > err_max)
00479 err_max = v;
00480 }
00481 #if 0
00482 printf("ref=\n");
00483 for(i=0;i<8;i++) {
00484 int j;
00485 for(j=0;j<8;j++) {
00486 printf(" %3d", img_dest1[i*8+j]);
00487 }
00488 printf("\n");
00489 }
00490
00491 printf("out=\n");
00492 for(i=0;i<8;i++) {
00493 int j;
00494 for(j=0;j<8;j++) {
00495 printf(" %3d", img_dest[i*8+j]);
00496 }
00497 printf("\n");
00498 }
00499 #endif
00500 }
00501 printf("%s %s: err_inf=%d\n",
00502 1 ? "IDCT248" : "DCT248",
00503 name, err_max);
00504
00505 ti = gettime();
00506 it1 = 0;
00507 do {
00508 for(it=0;it<NB_ITS_SPEED;it++) {
00509 for(i=0; i<64; i++)
00510 block[i]= block1[i];
00511
00512
00513 idct248_put(img_dest, 8, block);
00514 }
00515 it1 += NB_ITS_SPEED;
00516 ti1 = gettime() - ti;
00517 } while (ti1 < 1000000);
00518 mmx_emms();
00519
00520 printf("%s %s: %0.1f kdct/s\n",
00521 1 ? "IDCT248" : "DCT248",
00522 name, (double)it1 * 1000.0 / (double)ti1);
00523 }
00524
00525 void help(void)
00526 {
00527 printf("dct-test [-i] [<test-number>]\n"
00528 "test-number 0 -> test with random matrixes\n"
00529 " 1 -> test with random sparse matrixes\n"
00530 " 2 -> do 3. test from mpeg4 std\n"
00531 "-i test IDCT implementations\n"
00532 "-4 test IDCT248 implementations\n");
00533 }
00534
00535 int main(int argc, char **argv)
00536 {
00537 int test_idct = 0, test_248_dct = 0;
00538 int c,i;
00539 int test=1;
00540 cpu_flags = mm_support();
00541
00542 init_fdct();
00543 idct_mmx_init();
00544
00545 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
00546 for(i=0;i<MAX_NEG_CROP;i++) {
00547 cropTbl[i] = 0;
00548 cropTbl[i + MAX_NEG_CROP + 256] = 255;
00549 }
00550
00551 for(;;) {
00552 c = getopt(argc, argv, "ih4");
00553 if (c == -1)
00554 break;
00555 switch(c) {
00556 case 'i':
00557 test_idct = 1;
00558 break;
00559 case '4':
00560 test_248_dct = 1;
00561 break;
00562 default :
00563 case 'h':
00564 help();
00565 return 0;
00566 }
00567 }
00568
00569 if(optind <argc) test= atoi(argv[optind]);
00570
00571 printf("ffmpeg DCT/IDCT test\n");
00572
00573 if (test_248_dct) {
00574 idct248_error("SIMPLE-C", ff_simple_idct248_put);
00575 } else {
00576 for (i=0;algos[i].name;i++)
00577 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
00578 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
00579 }
00580 }
00581 return 0;
00582 }