h264.c

Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00022 /**
00023  * @file h264.c
00024  * H.264 / AVC / MPEG4 part10 codec.
00025  * @author Michael Niedermayer <michaelni@gmx.at>
00026  */
00027 
00028 #include "dsputil.h"
00029 #include "avcodec.h"
00030 #include "mpegvideo.h"
00031 #include "h264.h"
00032 #include "h264data.h"
00033 #include "h264_parser.h"
00034 #include "golomb.h"
00035 #include "rectangle.h"
00036 
00037 #include "cabac.h"
00038 #ifdef ARCH_X86
00039 #include "i386/h264_i386.h"
00040 #endif
00041 
00042 //#undef NDEBUG
00043 #include <assert.h>
00044 
00045 /**
00046  * Value of Picture.reference when Picture is not a reference picture, but
00047  * is held for delayed output.
00048  */
00049 #define DELAYED_PIC_REF 4
00050 
00051 static VLC coeff_token_vlc[4];
00052 static VLC chroma_dc_coeff_token_vlc;
00053 
00054 static VLC total_zeros_vlc[15];
00055 static VLC chroma_dc_total_zeros_vlc[3];
00056 
00057 static VLC run_vlc[6];
00058 static VLC run7_vlc;
00059 
00060 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
00061 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
00062 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
00063 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
00064 
00065 static av_always_inline uint32_t pack16to32(int a, int b){
00066 #ifdef WORDS_BIGENDIAN
00067    return (b&0xFFFF) + (a<<16);
00068 #else
00069    return (a&0xFFFF) + (b<<16);
00070 #endif
00071 }
00072 
00073 const uint8_t ff_rem6[52]={
00074 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00075 };
00076 
00077 const uint8_t ff_div6[52]={
00078 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
00079 };
00080 
00081 
00082 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
00083     MpegEncContext * const s = &h->s;
00084     const int mb_xy= h->mb_xy;
00085     int topleft_xy, top_xy, topright_xy, left_xy[2];
00086     int topleft_type, top_type, topright_type, left_type[2];
00087     int left_block[8];
00088     int topleft_partition= -1;
00089     int i;
00090 
00091     top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
00092 
00093     //FIXME deblocking could skip the intra and nnz parts.
00094     if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
00095         return;
00096 
00097     /* Wow, what a mess, why didn't they simplify the interlacing & intra
00098      * stuff, I can't imagine that these complex rules are worth it. */
00099 
00100     topleft_xy = top_xy - 1;
00101     topright_xy= top_xy + 1;
00102     left_xy[1] = left_xy[0] = mb_xy-1;
00103     left_block[0]= 0;
00104     left_block[1]= 1;
00105     left_block[2]= 2;
00106     left_block[3]= 3;
00107     left_block[4]= 7;
00108     left_block[5]= 10;
00109     left_block[6]= 8;
00110     left_block[7]= 11;
00111     if(FRAME_MBAFF){
00112         const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
00113         const int top_pair_xy      = pair_xy     - s->mb_stride;
00114         const int topleft_pair_xy  = top_pair_xy - 1;
00115         const int topright_pair_xy = top_pair_xy + 1;
00116         const int topleft_mb_frame_flag  = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
00117         const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
00118         const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
00119         const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
00120         const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
00121         const int bottom = (s->mb_y & 1);
00122         tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
00123         if (bottom
00124                 ? !curr_mb_frame_flag // bottom macroblock
00125                 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
00126                 ) {
00127             top_xy -= s->mb_stride;
00128         }
00129         if (bottom
00130                 ? !curr_mb_frame_flag // bottom macroblock
00131                 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
00132                 ) {
00133             topleft_xy -= s->mb_stride;
00134         } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
00135             topleft_xy += s->mb_stride;
00136             // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
00137             topleft_partition = 0;
00138         }
00139         if (bottom
00140                 ? !curr_mb_frame_flag // bottom macroblock
00141                 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
00142                 ) {
00143             topright_xy -= s->mb_stride;
00144         }
00145         if (left_mb_frame_flag != curr_mb_frame_flag) {
00146             left_xy[1] = left_xy[0] = pair_xy - 1;
00147             if (curr_mb_frame_flag) {
00148                 if (bottom) {
00149                     left_block[0]= 2;
00150                     left_block[1]= 2;
00151                     left_block[2]= 3;
00152                     left_block[3]= 3;
00153                     left_block[4]= 8;
00154                     left_block[5]= 11;
00155                     left_block[6]= 8;
00156                     left_block[7]= 11;
00157                 } else {
00158                     left_block[0]= 0;
00159                     left_block[1]= 0;
00160                     left_block[2]= 1;
00161                     left_block[3]= 1;
00162                     left_block[4]= 7;
00163                     left_block[5]= 10;
00164                     left_block[6]= 7;
00165                     left_block[7]= 10;
00166                 }
00167             } else {
00168                 left_xy[1] += s->mb_stride;
00169                 //left_block[0]= 0;
00170                 left_block[1]= 2;
00171                 left_block[2]= 0;
00172                 left_block[3]= 2;
00173                 //left_block[4]= 7;
00174                 left_block[5]= 10;
00175                 left_block[6]= 7;
00176                 left_block[7]= 10;
00177             }
00178         }
00179     }
00180 
00181     h->top_mb_xy = top_xy;
00182     h->left_mb_xy[0] = left_xy[0];
00183     h->left_mb_xy[1] = left_xy[1];
00184     if(for_deblock){
00185         topleft_type = 0;
00186         topright_type = 0;
00187         top_type     = h->slice_table[top_xy     ] < 255 ? s->current_picture.mb_type[top_xy]     : 0;
00188         left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
00189         left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
00190 
00191         if(FRAME_MBAFF && !IS_INTRA(mb_type)){
00192             int list;
00193             int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
00194             for(i=0; i<16; i++)
00195                 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
00196             for(list=0; list<h->list_count; list++){
00197                 if(USES_LIST(mb_type,list)){
00198                     uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
00199                     uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
00200                     int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
00201                     for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
00202                         dst[0] = src[0];
00203                         dst[1] = src[1];
00204                         dst[2] = src[2];
00205                         dst[3] = src[3];
00206                     }
00207                     *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
00208                     *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
00209                     ref += h->b8_stride;
00210                     *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
00211                     *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
00212                 }else{
00213                     fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
00214                     fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
00215                 }
00216             }
00217         }
00218     }else{
00219         topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
00220         top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
00221         topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
00222         left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
00223         left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
00224     }
00225 
00226     if(IS_INTRA(mb_type)){
00227         h->topleft_samples_available=
00228         h->top_samples_available=
00229         h->left_samples_available= 0xFFFF;
00230         h->topright_samples_available= 0xEEEA;
00231 
00232         if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
00233             h->topleft_samples_available= 0xB3FF;
00234             h->top_samples_available= 0x33FF;
00235             h->topright_samples_available= 0x26EA;
00236         }
00237         for(i=0; i<2; i++){
00238             if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
00239                 h->topleft_samples_available&= 0xDF5F;
00240                 h->left_samples_available&= 0x5F5F;
00241             }
00242         }
00243 
00244         if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
00245             h->topleft_samples_available&= 0x7FFF;
00246 
00247         if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
00248             h->topright_samples_available&= 0xFBFF;
00249 
00250         if(IS_INTRA4x4(mb_type)){
00251             if(IS_INTRA4x4(top_type)){
00252                 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
00253                 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
00254                 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
00255                 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
00256             }else{
00257                 int pred;
00258                 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
00259                     pred= -1;
00260                 else{
00261                     pred= 2;
00262                 }
00263                 h->intra4x4_pred_mode_cache[4+8*0]=
00264                 h->intra4x4_pred_mode_cache[5+8*0]=
00265                 h->intra4x4_pred_mode_cache[6+8*0]=
00266                 h->intra4x4_pred_mode_cache[7+8*0]= pred;
00267             }
00268             for(i=0; i<2; i++){
00269                 if(IS_INTRA4x4(left_type[i])){
00270                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
00271                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
00272                 }else{
00273                     int pred;
00274                     if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
00275                         pred= -1;
00276                     else{
00277                         pred= 2;
00278                     }
00279                     h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
00280                     h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
00281                 }
00282             }
00283         }
00284     }
00285 
00286 
00287 /*
00288 0 . T T. T T T T
00289 1 L . .L . . . .
00290 2 L . .L . . . .
00291 3 . T TL . . . .
00292 4 L . .L . . . .
00293 5 L . .. . . . .
00294 */
00295 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
00296     if(top_type){
00297         h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
00298         h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
00299         h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
00300         h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
00301 
00302         h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
00303         h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
00304 
00305         h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
00306         h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
00307 
00308     }else{
00309         h->non_zero_count_cache[4+8*0]=
00310         h->non_zero_count_cache[5+8*0]=
00311         h->non_zero_count_cache[6+8*0]=
00312         h->non_zero_count_cache[7+8*0]=
00313 
00314         h->non_zero_count_cache[1+8*0]=
00315         h->non_zero_count_cache[2+8*0]=
00316 
00317         h->non_zero_count_cache[1+8*3]=
00318         h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
00319 
00320     }
00321 
00322     for (i=0; i<2; i++) {
00323         if(left_type[i]){
00324             h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
00325             h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
00326             h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
00327             h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
00328         }else{
00329             h->non_zero_count_cache[3+8*1 + 2*8*i]=
00330             h->non_zero_count_cache[3+8*2 + 2*8*i]=
00331             h->non_zero_count_cache[0+8*1 +   8*i]=
00332             h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
00333         }
00334     }
00335 
00336     if( h->pps.cabac ) {
00337         // top_cbp
00338         if(top_type) {
00339             h->top_cbp = h->cbp_table[top_xy];
00340         } else if(IS_INTRA(mb_type)) {
00341             h->top_cbp = 0x1C0;
00342         } else {
00343             h->top_cbp = 0;
00344         }
00345         // left_cbp
00346         if (left_type[0]) {
00347             h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
00348         } else if(IS_INTRA(mb_type)) {
00349             h->left_cbp = 0x1C0;
00350         } else {
00351             h->left_cbp = 0;
00352         }
00353         if (left_type[0]) {
00354             h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
00355         }
00356         if (left_type[1]) {
00357             h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
00358         }
00359     }
00360 
00361 #if 1
00362     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
00363         int list;
00364         for(list=0; list<h->list_count; list++){
00365             if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
00366                 /*if(!h->mv_cache_clean[list]){
00367                     memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
00368                     memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
00369                     h->mv_cache_clean[list]= 1;
00370                 }*/
00371                 continue;
00372             }
00373             h->mv_cache_clean[list]= 0;
00374 
00375             if(USES_LIST(top_type, list)){
00376                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
00377                 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
00378                 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
00379                 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
00380                 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
00381                 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
00382                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
00383                 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
00384                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
00385                 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
00386             }else{
00387                 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
00388                 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
00389                 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
00390                 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
00391                 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
00392             }
00393 
00394             for(i=0; i<2; i++){
00395                 int cache_idx = scan8[0] - 1 + i*2*8;
00396                 if(USES_LIST(left_type[i], list)){
00397                     const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
00398                     const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
00399                     *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
00400                     *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
00401                     h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
00402                     h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
00403                 }else{
00404                     *(uint32_t*)h->mv_cache [list][cache_idx  ]=
00405                     *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
00406                     h->ref_cache[list][cache_idx  ]=
00407                     h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
00408                 }
00409             }
00410 
00411             if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
00412                 continue;
00413 
00414             if(USES_LIST(topleft_type, list)){
00415                 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
00416                 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
00417                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
00418                 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
00419             }else{
00420                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
00421                 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
00422             }
00423 
00424             if(USES_LIST(topright_type, list)){
00425                 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
00426                 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
00427                 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
00428                 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
00429             }else{
00430                 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
00431                 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
00432             }
00433 
00434             if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
00435                 continue;
00436 
00437             h->ref_cache[list][scan8[5 ]+1] =
00438             h->ref_cache[list][scan8[7 ]+1] =
00439             h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
00440             h->ref_cache[list][scan8[4 ]] =
00441             h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
00442             *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
00443             *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
00444             *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
00445             *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
00446             *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
00447 
00448             if( h->pps.cabac ) {
00449                 /* XXX beurk, Load mvd */
00450                 if(USES_LIST(top_type, list)){
00451                     const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
00452                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
00453                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
00454                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
00455                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
00456                 }else{
00457                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
00458                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
00459                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
00460                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
00461                 }
00462                 if(USES_LIST(left_type[0], list)){
00463                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
00464                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
00465                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
00466                 }else{
00467                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
00468                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
00469                 }
00470                 if(USES_LIST(left_type[1], list)){
00471                     const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
00472                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
00473                     *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
00474                 }else{
00475                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
00476                     *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
00477                 }
00478                 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
00479                 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
00480                 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
00481                 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
00482                 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
00483 
00484                 if(h->slice_type == FF_B_TYPE){
00485                     fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
00486 
00487                     if(IS_DIRECT(top_type)){
00488                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
00489                     }else if(IS_8X8(top_type)){
00490                         int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
00491                         h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
00492                         h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
00493                     }else{
00494                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
00495                     }
00496 
00497                     if(IS_DIRECT(left_type[0]))
00498                         h->direct_cache[scan8[0] - 1 + 0*8]= 1;
00499                     else if(IS_8X8(left_type[0]))
00500                         h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
00501                     else
00502                         h->direct_cache[scan8[0] - 1 + 0*8]= 0;
00503 
00504                     if(IS_DIRECT(left_type[1]))
00505                         h->direct_cache[scan8[0] - 1 + 2*8]= 1;
00506                     else if(IS_8X8(left_type[1]))
00507                         h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
00508                     else
00509                         h->direct_cache[scan8[0] - 1 + 2*8]= 0;
00510                 }
00511             }
00512 
00513             if(FRAME_MBAFF){
00514 #define MAP_MVS\
00515                     MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
00516                     MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
00517                     MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
00518                     MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
00519                     MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
00520                     MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
00521                     MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
00522                     MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
00523                     MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
00524                     MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
00525                 if(MB_FIELD){
00526 #define MAP_F2F(idx, mb_type)\
00527                     if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
00528                         h->ref_cache[list][idx] <<= 1;\
00529                         h->mv_cache[list][idx][1] /= 2;\
00530                         h->mvd_cache[list][idx][1] /= 2;\
00531                     }
00532                     MAP_MVS
00533 #undef MAP_F2F
00534                 }else{
00535 #define MAP_F2F(idx, mb_type)\
00536                     if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
00537                         h->ref_cache[list][idx] >>= 1;\
00538                         h->mv_cache[list][idx][1] <<= 1;\
00539                         h->mvd_cache[list][idx][1] <<= 1;\
00540                     }
00541                     MAP_MVS
00542 #undef MAP_F2F
00543                 }
00544             }
00545         }
00546     }
00547 #endif
00548 
00549     h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
00550 }
00551 
00552 static inline void write_back_intra_pred_mode(H264Context *h){
00553     const int mb_xy= h->mb_xy;
00554 
00555     h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
00556     h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
00557     h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
00558     h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
00559     h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
00560     h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
00561     h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
00562 }
00563 
00564 /**
00565  * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
00566  */
00567 static inline int check_intra4x4_pred_mode(H264Context *h){
00568     MpegEncContext * const s = &h->s;
00569     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00570     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00571     int i;
00572 
00573     if(!(h->top_samples_available&0x8000)){
00574         for(i=0; i<4; i++){
00575             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00576             if(status<0){
00577                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00578                 return -1;
00579             } else if(status){
00580                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00581             }
00582         }
00583     }
00584 
00585     if(!(h->left_samples_available&0x8000)){
00586         for(i=0; i<4; i++){
00587             int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00588             if(status<0){
00589                 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00590                 return -1;
00591             } else if(status){
00592                 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00593             }
00594         }
00595     }
00596 
00597     return 0;
00598 } //FIXME cleanup like next
00599 
00600 /**
00601  * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
00602  */
00603 static inline int check_intra_pred_mode(H264Context *h, int mode){
00604     MpegEncContext * const s = &h->s;
00605     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00606     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00607 
00608     if(mode > 6U) {
00609         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00610         return -1;
00611     }
00612 
00613     if(!(h->top_samples_available&0x8000)){
00614         mode= top[ mode ];
00615         if(mode<0){
00616             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00617             return -1;
00618         }
00619     }
00620 
00621     if(!(h->left_samples_available&0x8000)){
00622         mode= left[ mode ];
00623         if(mode<0){
00624             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00625             return -1;
00626         }
00627     }
00628 
00629     return mode;
00630 }
00631 
00632 /**
00633  * gets the predicted intra4x4 prediction mode.
00634  */
00635 static inline int pred_intra_mode(H264Context *h, int n){
00636     const int index8= scan8[n];
00637     const int left= h->intra4x4_pred_mode_cache[index8 - 1];
00638     const int top = h->intra4x4_pred_mode_cache[index8 - 8];
00639     const int min= FFMIN(left, top);
00640 
00641     tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
00642 
00643     if(min<0) return DC_PRED;
00644     else      return min;
00645 }
00646 
00647 static inline void write_back_non_zero_count(H264Context *h){
00648     const int mb_xy= h->mb_xy;
00649 
00650     h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
00651     h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
00652     h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
00653     h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
00654     h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
00655     h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
00656     h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
00657 
00658     h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
00659     h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
00660     h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
00661 
00662     h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
00663     h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
00664     h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
00665 
00666     if(FRAME_MBAFF){
00667         // store all luma nnzs, for deblocking
00668         int v = 0, i;
00669         for(i=0; i<16; i++)
00670             v += (!!h->non_zero_count_cache[scan8[i]]) << i;
00671         *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
00672     }
00673 }
00674 
00675 /**
00676  * gets the predicted number of non zero coefficients.
00677  * @param n block index
00678  */
00679 static inline int pred_non_zero_count(H264Context *h, int n){
00680     const int index8= scan8[n];
00681     const int left= h->non_zero_count_cache[index8 - 1];
00682     const int top = h->non_zero_count_cache[index8 - 8];
00683     int i= left + top;
00684 
00685     if(i<64) i= (i+1)>>1;
00686 
00687     tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
00688 
00689     return i&31;
00690 }
00691 
00692 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
00693     const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
00694     MpegEncContext *s = &h->s;
00695 
00696     /* there is no consistent mapping of mvs to neighboring locations that will
00697      * make mbaff happy, so we can't move all this logic to fill_caches */
00698     if(FRAME_MBAFF){
00699         const uint32_t *mb_types = s->current_picture_ptr->mb_type;
00700         const int16_t *mv;
00701         *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
00702         *C = h->mv_cache[list][scan8[0]-2];
00703 
00704         if(!MB_FIELD
00705            && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
00706             int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
00707             if(IS_INTERLACED(mb_types[topright_xy])){
00708 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
00709                 const int x4 = X4, y4 = Y4;\
00710                 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
00711                 if(!USES_LIST(mb_type,list))\
00712                     return LIST_NOT_USED;\
00713                 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
00714                 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
00715                 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
00716                 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
00717 
00718                 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
00719             }
00720         }
00721         if(topright_ref == PART_NOT_AVAILABLE
00722            && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
00723            && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
00724             if(!MB_FIELD
00725                && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
00726                 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
00727             }
00728             if(MB_FIELD
00729                && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
00730                && i >= scan8[0]+8){
00731                 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
00732                 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
00733             }
00734         }
00735 #undef SET_DIAG_MV
00736     }
00737 
00738     if(topright_ref != PART_NOT_AVAILABLE){
00739         *C= h->mv_cache[list][ i - 8 + part_width ];
00740         return topright_ref;
00741     }else{
00742         tprintf(s->avctx, "topright MV not available\n");
00743 
00744         *C= h->mv_cache[list][ i - 8 - 1 ];
00745         return h->ref_cache[list][ i - 8 - 1 ];
00746     }
00747 }
00748 
00749 /**
00750  * gets the predicted MV.
00751  * @param n the block index
00752  * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
00753  * @param mx the x component of the predicted motion vector
00754  * @param my the y component of the predicted motion vector
00755  */
00756 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
00757     const int index8= scan8[n];
00758     const int top_ref=      h->ref_cache[list][ index8 - 8 ];
00759     const int left_ref=     h->ref_cache[list][ index8 - 1 ];
00760     const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
00761     const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
00762     const int16_t * C;
00763     int diagonal_ref, match_count;
00764 
00765     assert(part_width==1 || part_width==2 || part_width==4);
00766 
00767 /* mv_cache
00768   B . . A T T T T
00769   U . . L . . , .
00770   U . . L . . . .
00771   U . . L . . , .
00772   . . . L . . . .
00773 */
00774 
00775     diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
00776     match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
00777     tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
00778     if(match_count > 1){ //most common
00779         *mx= mid_pred(A[0], B[0], C[0]);
00780         *my= mid_pred(A[1], B[1], C[1]);
00781     }else if(match_count==1){
00782         if(left_ref==ref){
00783             *mx= A[0];
00784             *my= A[1];
00785         }else if(top_ref==ref){
00786             *mx= B[0];
00787             *my= B[1];
00788         }else{
00789             *mx= C[0];
00790             *my= C[1];
00791         }
00792     }else{
00793         if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
00794             *mx= A[0];
00795             *my= A[1];
00796         }else{
00797             *mx= mid_pred(A[0], B[0], C[0]);
00798             *my= mid_pred(A[1], B[1], C[1]);
00799         }
00800     }
00801 
00802     tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
00803 }
00804 
00805 /**
00806  * gets the directionally predicted 16x8 MV.
00807  * @param n the block index
00808  * @param mx the x component of the predicted motion vector
00809  * @param my the y component of the predicted motion vector
00810  */
00811 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
00812     if(n==0){
00813         const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
00814         const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
00815 
00816         tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
00817 
00818         if(top_ref == ref){
00819             *mx= B[0];
00820             *my= B[1];
00821             return;
00822         }
00823     }else{
00824         const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
00825         const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
00826 
00827         tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
00828 
00829         if(left_ref == ref){
00830             *mx= A[0];
00831             *my= A[1];
00832             return;
00833         }
00834     }
00835 
00836     //RARE
00837     pred_motion(h, n, 4, list, ref, mx, my);
00838 }
00839 
00840 /**
00841  * gets the directionally predicted 8x16 MV.
00842  * @param n the block index
00843  * @param mx the x component of the predicted motion vector
00844  * @param my the y component of the predicted motion vector
00845  */
00846 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
00847     if(n==0){
00848         const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
00849         const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
00850 
00851         tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
00852 
00853         if(left_ref == ref){
00854             *mx= A[0];
00855             *my= A[1];
00856             return;
00857         }
00858     }else{
00859         const int16_t * C;
00860         int diagonal_ref;
00861 
00862         diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
00863 
00864         tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
00865 
00866         if(diagonal_ref == ref){
00867             *mx= C[0];
00868             *my= C[1];
00869             return;
00870         }
00871     }
00872 
00873     //RARE
00874     pred_motion(h, n, 2, list, ref, mx, my);
00875 }
00876 
00877 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
00878     const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
00879     const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
00880 
00881     tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
00882 
00883     if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
00884        || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
00885        || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
00886 
00887         *mx = *my = 0;
00888         return;
00889     }
00890 
00891     pred_motion(h, 0, 4, 0, 0, mx, my);
00892 
00893     return;
00894 }
00895 
00896 static inline void direct_dist_scale_factor(H264Context * const h){
00897     const int poc = h->s.current_picture_ptr->poc;
00898     const int poc1 = h->ref_list[1][0].poc;
00899     int i;
00900     for(i=0; i<h->ref_count[0]; i++){
00901         int poc0 = h->ref_list[0][i].poc;
00902         int td = av_clip(poc1 - poc0, -128, 127);
00903         if(td == 0 /* FIXME || pic0 is a long-term ref */){
00904             h->dist_scale_factor[i] = 256;
00905         }else{
00906             int tb = av_clip(poc - poc0, -128, 127);
00907             int tx = (16384 + (FFABS(td) >> 1)) / td;
00908             h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
00909         }
00910     }
00911     if(FRAME_MBAFF){
00912         for(i=0; i<h->ref_count[0]; i++){
00913             h->dist_scale_factor_field[2*i] =
00914             h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
00915         }
00916     }
00917 }
00918 static inline void direct_ref_list_init(H264Context * const h){
00919     MpegEncContext * const s = &h->s;
00920     Picture * const ref1 = &h->ref_list[1][0];
00921     Picture * const cur = s->current_picture_ptr;
00922     int list, i, j;
00923     if(cur->pict_type == FF_I_TYPE)
00924         cur->ref_count[0] = 0;
00925     if(cur->pict_type != FF_B_TYPE)
00926         cur->ref_count[1] = 0;
00927     for(list=0; list<2; list++){
00928         cur->ref_count[list] = h->ref_count[list];
00929         for(j=0; j<h->ref_count[list]; j++)
00930             cur->ref_poc[list][j] = h->ref_list[list][j].poc;
00931     }
00932     if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
00933         return;
00934     for(list=0; list<2; list++){
00935         for(i=0; i<ref1->ref_count[list]; i++){
00936             const int poc = ref1->ref_poc[list][i];
00937             h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
00938             for(j=0; j<h->ref_count[list]; j++)
00939                 if(h->ref_list[list][j].poc == poc){
00940                     h->map_col_to_list0[list][i] = j;
00941                     break;
00942                 }
00943         }
00944     }
00945     if(FRAME_MBAFF){
00946         for(list=0; list<2; list++){
00947             for(i=0; i<ref1->ref_count[list]; i++){
00948                 j = h->map_col_to_list0[list][i];
00949                 h->map_col_to_list0_field[list][2*i] = 2*j;
00950                 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
00951             }
00952         }
00953     }
00954 }
00955 
00956 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
00957     MpegEncContext * const s = &h->s;
00958     const int mb_xy =   h->mb_xy;
00959     const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
00960     const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
00961     const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
00962     const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
00963     const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
00964     const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
00965     const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
00966     const int is_b8x8 = IS_8X8(*mb_type);
00967     unsigned int sub_mb_type;
00968     int i8, i4;
00969 
00970 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTR