Index: snow.c =================================================================== --- snow.c (revision 6149) +++ snow.c (working copy) @@ -28,6 +28,9 @@ #undef NDEBUG #include +#undef printf +static int mesh_stats[2] = {0,0}; + static const int8_t quant3[256]={ 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -377,6 +380,7 @@ //#define TYPE_SPLIT 1 #define BLOCK_INTRA 1 #define BLOCK_OPT 2 +#define BLOCK_MESH 4 //#define TYPE_NOCOLOR 4 uint8_t level; //FIXME merge into type? }BlockNode; @@ -442,6 +446,7 @@ int temporal_decomposition_count; int max_ref_frames; int ref_frames; + int mesh; int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; uint32_t *ref_scores[MAX_REF_FRAMES]; DWTELEM *spatial_dwt_buffer; @@ -481,6 +486,7 @@ //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) static void iterative_me(SnowContext *s); +static int decide_mesh(SnowContext *s, int mb_x, int mb_y); static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer) { @@ -1967,6 +1973,8 @@ } } +#define RAC_CTX_MESH (11 + (((left->type&BLOCK_MESH) + (top->type&BLOCK_MESH))>>2)) + //FIXME copy&paste #define P_LEFT P[1] #define P_TOP P[2] @@ -2017,7 +2025,7 @@ int mx_context= av_log2(2*ABS(left->mx - top->mx)); int my_context= av_log2(2*ABS(left->my - top->my)); int s_context= 2*left->level + 2*top->level + tl->level + tr->level; - int ref, best_ref, ref_score, ref_mx, ref_my; + int ref, best_ref, ref_score, ref_mx, ref_my, mesh; assert(sizeof(s->block_state) >= 256); if(s->keyframe){ @@ -2106,6 +2114,14 @@ my= ref_my; } } + + if(s->mesh && level == s->block_max_depth){ + set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); + mesh = decide_mesh(s, x, y); + mesh_stats[mesh]++; + }else + mesh = 0; + //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2 // subpel search @@ -2116,7 +2132,9 @@ if(level!=s->block_max_depth) put_rac(&pc, &p_state[4 + s_context], 1); - put_rac(&pc, &p_state[1 + left->type + top->type], 0); + put_rac(&pc, &p_state[1 + (left->type&BLOCK_INTRA) + (top->type&BLOCK_INTRA)], 0); + if(s->mesh) + put_rac(&pc, &p_state[RAC_CTX_MESH], mesh); if(s->ref_frames > 1) put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); @@ -2147,7 +2165,9 @@ memcpy(i_state, s->block_state, sizeof(s->block_state)); if(level!=s->block_max_depth) put_rac(&ic, &i_state[4 + s_context], 1); - put_rac(&ic, &i_state[1 + left->type + top->type], 1); + put_rac(&ic, &i_state[1 + (left->type&BLOCK_INTRA) + (top->type&BLOCK_INTRA)], 1); + if(s->mesh) + put_rac(&ic, &i_state[RAC_CTX_MESH], mesh); put_symbol(&ic, &i_state[32], l-pl , 1); put_symbol(&ic, &i_state[64], cb-pcb, 1); put_symbol(&ic, &i_state[96], cr-pcr, 1); @@ -2190,7 +2210,7 @@ s->c= ic; s->c.bytestream_start= pbbak_start; s->c.bytestream= pbbak + i_len; - set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); + set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA + mesh*BLOCK_MESH); memcpy(s->block_state, i_state, sizeof(s->block_state)); return iscore; }else{ @@ -2198,7 +2218,7 @@ s->c= pc; s->c.bytestream_start= pbbak_start; s->c.bytestream= pbbak + p_len; - set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); + set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, mesh*BLOCK_MESH); memcpy(s->block_state, p_state, sizeof(s->block_state)); return score; } @@ -2251,6 +2271,8 @@ if(b->type & BLOCK_INTRA){ pred_mv(s, &pmx, &pmy, 0, left, top, tr); put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); + if(s->mesh) + put_rac(&s->c, &s->block_state[RAC_CTX_MESH], b->type&BLOCK_MESH); put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); @@ -2258,6 +2280,8 @@ }else{ pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); + if(s->mesh) + put_rac(&s->c, &s->block_state[RAC_CTX_MESH], b->type&BLOCK_MESH); if(s->ref_frames > 1) put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); @@ -2294,9 +2318,11 @@ int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx)); int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my)); - type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; + type= get_rac(&s->c, &s->block_state[1 + (left->type&BLOCK_INTRA) + (top->type&BLOCK_INTRA)]) ? BLOCK_INTRA : 0; + if(s->mesh) + type|= get_rac(&s->c, &s->block_state[RAC_CTX_MESH]) * BLOCK_MESH; - if(type){ + if(type & BLOCK_INTRA){ pred_mv(s, &mx, &my, 0, left, top, tr); l += get_symbol(&s->c, &s->block_state[32], 1); cb+= get_symbol(&s->c, &s->block_state[64], 1); @@ -2608,6 +2634,55 @@ // src += src_x + src_y*src_stride; ptmp= tmp + 3*tmp_step; + + if((rb->type & BLOCK_MESH) && plane_index==0 + && !(lt->type & rt->type & lb->type & rb->type & BLOCK_INTRA)){ + //FIXME breaks with multiref + //FIXME breaks with frames bigger than 4k x 4k (ox,oy overflow) + //FIXME division + uint8_t *src= s->last_picture[rb->ref].data[plane_index]; + const int bits = 4; // 16th-pel precision + const int shift = (s->avctx->flags & CODEC_FLAG_QPEL) ? bits-2 : bits-1; + int ox, oy, dxx, dxy, dyx, dyy, i; + dxx = (rt->mx + rb->mx - lt->mx - lb->mx) << (15+shift); + dyx = (rt->my + rb->my - lt->my - lb->my) << (15+shift); + dxy = (lb->mx + rb->mx - lt->mx - rt->mx) << (15+shift); + dyy = (lb->my + rb->my - lt->my - rt->my) << (15+shift); + ox = ((lt->mx + rt->mx + lb->mx + rb->mx) << (14+shift)) + + (src_x << (16+bits)) - ((dxx + dxy) >> 1); + oy = ((lt->my + rt->my + lb->my + rb->my) << (14+shift)) + + (src_y << (16+bits)) - ((dyx + dyy) >> 1); + dxx = (dxx / b_w) + (1<<(16+bits)); + dyx = (dyx / b_w); + dxy = (dxy / b_h); + dyy = (dyy / b_h) + (1<<(16+bits)); + + for(x=0; xdsp.gmc(ptmp+x, src, src_stride, b_h, + ox, oy, dxx, dxy, dyx, dyy, + bits, 1<<(2*bits-1), w, h); + ox += dxx*8; + oy += dyx*8; + } + + block[0] = block[1] = block[2] = block[3] = ptmp; + if(lt->type & BLOCK_INTRA){ + block[0] = ptmp += tmp_step; + pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); + } + if(rt->type & BLOCK_INTRA){ + block[1] = ptmp += tmp_step; + pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); + } + if(lb->type & BLOCK_INTRA){ + block[2] = ptmp += tmp_step; + pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); + } + if(rb->type & BLOCK_INTRA){ + block[3] = ptmp += tmp_step; + pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); + } + }else{ block[0]= ptmp; ptmp+=tmp_step; pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); @@ -2640,6 +2715,7 @@ block[3]= ptmp; pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); } + } #if 0 for(y=0; y>= !!plane_index; + src_y >>= !!plane_index; + for(y=0; y> FRAC_BITS; + if(v&(~255)) v= ~(v>>31); + dst8[x + y*src_stride] = v; + }else{ + dstp[x] -= v; + } + } + } + }else if(sliced){ START_TIMER @@ -2841,6 +2937,36 @@ predict_slice(s, buf, plane_index, add, mb_y); } +static int decide_mesh(SnowContext *s, int mb_x, int mb_y){ + static const DWTELEM zero_dst[4096]; + const uint8_t *obmc = obmc_tab[s->block_max_depth]; + int stride = s->current_picture.linesize[0]; + int b_stride = s->b_width << s->block_max_depth; + int block_w = MB_SIZE >> s->block_max_depth; + int obmc_stride = 2*block_w; + int x = block_w*(mb_x-1) + block_w/2; + int y = block_w*(mb_y-1) + block_w/2; + uint8_t *src = s-> input_picture.data[0]; + uint8_t *dst = s->current_picture.data[0]; + BlockNode *b= &s->block[mb_x + mb_y*b_stride]; + int mesh, score[2]; + //FIXME skip the decision if all 4 blocks use the same mv + //note: not same_block(), since we don't care whether some are intra. + b->type |= BLOCK_MESH; + for(mesh=1; mesh>=0; mesh--){ + add_yblock(s, 0, NULL, zero_dst, dst, obmc, + x, y, block_w, block_w, s->plane[0].width, s->plane[0].height, + 0, stride, obmc_stride, mb_x-1, mb_y-1, 1, 1, 0); + score[mesh] = s->dsp.me_cmp[s->block_max_depth](&s->m, + src+x+y*stride, dst+x+y*stride, stride, block_w); + b->type &= ~BLOCK_MESH; + } + mesh = (score[1] < score[0]); + b->type |= mesh * BLOCK_MESH; +// printf("%d : %d \n", score[1], score[0]); + return mesh; +} + static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ int i, x2, y2; Plane *p= &s->plane[plane_index]; @@ -2946,6 +3072,7 @@ DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment uint8_t tmp[ref_stride*(2*MB_SIZE+5)]; + static const DWTELEM zero_dst[4096]; const int b_stride = s->b_width << s->block_max_depth; const int b_height = s->b_height<< s->block_max_depth; const int w= p->width; @@ -2963,6 +3090,17 @@ pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); + if(s->mesh){//FIXME limit to the blocks that need it + for(i=3; i>=0; i--){ + int mb_x2= mb_x - (i&1); + int mb_y2= mb_y - (i>>1); + int x= block_w*mb_x2 + block_w/2; + int y= block_w*mb_y2 + block_w/2; + add_yblock(s, 0, NULL, zero_dst, dst, obmc, + x, y, block_w, block_w, w, h, 0, ref_stride, obmc_stride, + mb_x2, mb_y2, 1, 1, plane_index); + } + }else for(y=y0; ymesh){ + decide_mesh(s, mb_x, mb_y); +// if((block->type ^ backup.type) & BLOCK_MESH) +// block->type &= ~BLOCK_OPT; + } + if(!same_block(block, &backup)){ if(tb ) tb ->type &= ~BLOCK_OPT; if(lb ) lb ->type &= ~BLOCK_OPT; @@ -3617,6 +3761,7 @@ put_rac(&s->c, s->header_state, s->spatial_scalability); // put_rac(&s->c, s->header_state, s->rate_scalability); put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0); + put_rac(&s->c, s->header_state, s->mesh); for(plane_index=0; plane_index<2; plane_index++){ for(level=0; levelspatial_decomposition_count; level++){ @@ -3659,6 +3804,7 @@ s->spatial_scalability= get_rac(&s->c, s->header_state); // s->rate_scalability= get_rac(&s->c, s->header_state); s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1; + s->mesh= get_rac(&s->c, s->header_state); for(plane_index=0; plane_index<3; plane_index++){ for(level=0; levelspatial_decomposition_count; level++){ @@ -3942,6 +4088,7 @@ h263_encode_init(&s->m); //mv_penalty s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); + s->mesh = 1; if(avctx->flags&CODEC_FLAG_PASS1){ if(!avctx->stats_out) @@ -4274,6 +4421,13 @@ emms_c(); + { + static int i = 0; + i++; + if(!(i&15)) + printf("mesh: %d / %d \n", mesh_stats[1], mesh_stats[0]); + } + return ff_rac_terminate(c); }