Index: ffv1.c =================================================================== RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/ffv1.c,v retrieving revision 1.31 diff -u -r1.31 ffv1.c --- ffv1.c 24 Feb 2005 16:39:03 -0000 1.31 +++ ffv1.c 9 Mar 2005 00:36:49 -0000 @@ -31,6 +31,8 @@ #include "rangecoder.h" #include "golomb.h" +#undef printf + #define MAX_PLANES 4 #define CONTEXT_SIZE 32 @@ -158,7 +160,11 @@ typedef struct PlaneContext{ int context_count; + int context_count_temporal; + int context_count_spatial; + int context_count_spacetime; uint8_t (*state)[CONTEXT_SIZE]; + int32_t *state_spacetime; VlcState *vlc_state; uint8_t interlace_bit_state[2]; } PlaneContext; @@ -174,12 +180,16 @@ int flags; int picture_number; AVFrame picture; + AVFrame prev_picture; int plane_count; int ac; ///< 1-> CABAC 0-> golomb rice PlaneContext plane[MAX_PLANES]; - int16_t quant_table[5][256]; + int16_t quant_table[6][256]; int run_index; int colorspace; + int context_model; + int prediction_method; + int temporal; DSPContext dsp; }FFV1Context; @@ -196,21 +206,65 @@ return diff; } -static inline int predict(int_fast16_t *src, int_fast16_t *last){ - const int LT= last[-1]; - const int T= last[ 0]; +#define LOAD_PIXELS0 \ + const int LT= last[-1]; \ + const int T= last[ 0]; \ + const int RT= last[ 1]; \ const int L = src[-1]; +#define LOAD_PIXELS1 \ + const int PLT=ref2[-1]; \ + const int PT= ref2[ 0]; \ + const int PL= ref[-1]; \ + const int P = ref[ 0]; + +/* +static inline int decide_temporal(int_fast16_t *src, int_fast16_t *last, int_fast16_t *ref, int_fast16_t *ref2){ + LOAD_PIXELS0; + LOAD_PIXELS1; + + return ABS(quant13[(L-PL) & 0xFF]) + ABS(quant13[(LT-PLT) & 0xFF]) + ABS(quant13[(T-PT) & 0xFF]) + < ABS(quant13[(L-LT) & 0xFF]) + ABS(quant13[(LT-T) & 0xFF]) + ABS(quant13[(T-RT) & 0xFF]); +} +*/ + +static inline int get_context_spacetime(int_fast16_t *src, int_fast16_t *last, int_fast16_t *ref, int_fast16_t *ref2){ + LOAD_PIXELS0; + LOAD_PIXELS1; + + return 19 * (ABS(quant13[(L-PL) & 0xFF]) + ABS(quant13[(LT-PLT) & 0xFF]) + ABS(quant13[(T-PT) & 0xFF])) + + (ABS(quant13[(L-LT) & 0xFF]) + ABS(quant13[(LT-T) & 0xFF]) + ABS(quant13[(T-RT) & 0xFF])); +} + +static inline int decide_spacetime(int32_t state){ + return state < 0; +} + +static inline int update_spacetime(int32_t *state, int diff_space, int diff_time){ + //FIXME: quant? + *state += ABS(diff_time) - ABS(diff_space); + if(ABS(*state) > 10000) + *state >>= 1; +} + +static inline int predict(int_fast16_t *src, int_fast16_t *last){ + LOAD_PIXELS0; return mid_pred(L, L + T - LT, T); } +static inline int predict_temporal(int_fast16_t *src, int_fast16_t *last, int_fast16_t *ref, int_fast16_t *ref2){ + LOAD_PIXELS0; + LOAD_PIXELS1; + return P; +// const int G = L + T - LT; +// const int PG= PL + PT - PLT; +// return mid_pred(P, G, P + G - PG); +} + static inline int get_context(FFV1Context *f, int_fast16_t *src, int_fast16_t *last, int_fast16_t *last2){ - const int LT= last[-1]; - const int T= last[ 0]; - const int RT= last[ 1]; - const int L = src[-1]; + LOAD_PIXELS0; - if(f->quant_table[3][127]){ + if(f->context_model){ const int TT= last2[0]; const int LL= src[-2]; return f->quant_table[0][(L-LT) & 0xFF] + f->quant_table[1][(LT-T) & 0xFF] + f->quant_table[2][(T-RT) & 0xFF] @@ -219,6 +273,37 @@ return f->quant_table[0][(L-LT) & 0xFF] + f->quant_table[1][(LT-T) & 0xFF] + f->quant_table[2][(T-RT) & 0xFF]; } +static inline int get_context_temporal(FFV1Context *f, int_fast16_t *src, int_fast16_t *last, int_fast16_t *ref, int_fast16_t *ref2){ + LOAD_PIXELS0; + LOAD_PIXELS1; + +// printf("gct: L%02x PL%02x T%02x PT%02x q3[L]=%d q4[T]=%d q2[42]=%d q3[42]=%d q4[42]=%d\n", +// L, PL, T, PT, f->quant_table[3][(L-PL) & 0xFF], f->quant_table[4][(T-PT) & 0xFF], +// f->quant_table[2][42], f->quant_table[3][42], f->quant_table[4][42]); + //FIXME some spatial variation too? + return f->quant_table[3][(L-PL) & 0xFF] + f->quant_table[4][(T-PT) & 0xFF]; +} + +#if 0 +static inline int analyse_temporal(int_fast16_t *src, int_fast16_t *last, int_fast16_t *ref, int_fast16_t *ref2){ + LOAD_PIXELS0; + LOAD_PIXELS1; + const int V = src[ 0]; + + int cs = ABS(quant13[(L-LT) & 0xFF]) + ABS(quant13[(LT-T) & 0xFF]) + ABS(quant13[(T-RT) & 0xFF]); + int ct = ABS(quant13[(L-PL) & 0xFF]) + ABS(quant13[(T-PT) & 0xFF]) + ABS(quant13[(LT-PLT) & 0xFF]); + int ps = predict(src, last); + int pt = predict_temporal(src, last, ref, ref2); + +// printf("s%d t%d S%d T%d\n", cs, ct, ABS(V-ps), ABS(V-pt)); +// printf("cs=%d+%d+%d ct=%d+%d+%d\n", +// ABS(quant13[(L-LT) & 0xFF]), ABS(quant13[(LT-T) & 0xFF]), ABS(quant13[(T-RT) & 0xFF]), +// ABS(quant13[(L-PL) & 0xFF]), ABS(quant13[(T-PT) & 0xFF]), ABS(quant13[(LT-PLT) & 0xFF])); +// printf("L%02x LT%02x T%02x RT%02x PL%02x PLT%02x PT%02x P%02x\n", +// L, LT, T, RT, PL, PLT, PT, P); +} +#endif + static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ int i; @@ -354,7 +439,7 @@ return ret; } -static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int plane_index, int bits){ +static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int_fast16_t *ref[2], int plane_index, int bits){ PlaneContext * const p= &s->plane[plane_index]; RangeCoder * const c= &s->c; int x; @@ -376,16 +461,37 @@ for(x=0; xtemporal){ + int32_t *state_spacetime= &p->state_spacetime[ get_context_spacetime(sample[0]+x, sample[1]+x, ref[0]+x, ref[1]+x) ]; + int diff_s= sample[0][x] - predict(sample[0]+x, sample[1]+x); + int diff_t= sample[0][x] - predict_temporal(sample[0]+x, sample[1]+x, ref[0]+x, ref[1]+x); + temporal= decide_spacetime(*state_spacetime); + update_spacetime(state_spacetime, diff_s, diff_t); + diff= temporal ? diff_t : diff_s; + }else{ + temporal= 0; + diff= sample[0][x] - predict(sample[0]+x, sample[1]+x); + } + + if(temporal) + context= get_context_temporal(s, sample[0]+x, sample[1]+x, ref[0]+x, ref[1]+x); + else + context= get_context(s, sample[0]+x, sample[1]+x, sample[2]+x); if(context < 0){ context = -context; diff= -diff; } + if(temporal) + context += p->context_count_spatial; + diff= fold(diff, bits); + +// if(s->temporal) +// printf("s=%d p=%d d=%d c=%d\n", sample[0][x], sample[0][x]-diff0, fold(diff0,bits), context); if(s->ac){ put_symbol(c, p->state[context], diff, 1); @@ -432,32 +538,47 @@ return 0; } -static void encode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, int plane_index){ +static void encode_plane(FFV1Context *s, uint8_t *src, uint8_t *ref, int w, int h, int stride, int ref_stride, int plane_index){ int x,y,i; - const int ring_size= s->avctx->context_model ? 3 : 2; + const int ring_size= s->context_model ? 3 : 2; int_fast16_t sample_buffer[ring_size][w+6], *sample[ring_size]; + int_fast16_t ref_sample_buffer[2][w+6], *ref_sample[2]; s->run_index=0; memset(sample_buffer, 0, sizeof(sample_buffer)); + memset(ref_sample_buffer, 0, sizeof(ref_sample_buffer)); for(y=0; ytemporal){ + for(i=0; i<2; i++){ + ref_sample[i] = &ref_sample_buffer[i][3]; + if(y-i<0) continue; + for(x=0; xavctx->context_model ? 3 : 2; + const int ring_size= s->context_model ? 3 : 2; int_fast16_t sample_buffer[3][ring_size][w+6], *sample[3][ring_size]; s->run_index=0; @@ -489,7 +610,7 @@ for(p=0; p<3; p++){ sample[p][0][-1]= sample[p][1][0 ]; sample[p][1][ w]= sample[p][1][w-1]; - encode_line(s, w, sample[p], FFMIN(p, 1), 9); + encode_line(s, w, sample[p], NULL, FFMIN(p, 1), 9); } } } @@ -518,6 +639,7 @@ put_symbol(c, state, f->version, 0); put_symbol(c, state, f->avctx->coder_type, 0); + put_symbol(c, state, f->prediction_method, 0); put_symbol(c, state, f->colorspace, 0); //YUV cs type put_rac(c, state, 1); //chroma planes put_symbol(c, state, f->chroma_h_shift, 0); @@ -560,12 +682,18 @@ s->version=0; s->ac= avctx->coder_type; + s->prediction_method= avctx->prediction_method==1 && s->colorspace==0; + s->context_model= avctx->context_model && !s->prediction_method; s->plane_count=2; for(i=0; i<256; i++){ s->quant_table[0][i]= quant11[i]; s->quant_table[1][i]= 11*quant11[i]; - if(avctx->context_model==0){ + if(s->prediction_method==1){ + s->quant_table[2][i]= 11*11*quant11[i]; + s->quant_table[3][i]= quant11[i]; + s->quant_table[4][i]= 11*quant11[i]; + }else if(s->context_model==0){ s->quant_table[2][i]= 11*11*quant11[i]; s->quant_table[3][i]= s->quant_table[4][i]=0; @@ -578,18 +706,30 @@ for(i=0; iplane_count; i++){ PlaneContext * const p= &s->plane[i]; - - if(avctx->context_model==0){ - p->context_count= (11*11*11+1)/2; + + if(s->prediction_method==1){ + p->context_count_spatial= (11*11*11+1)/2; + p->context_count_temporal= (11*11+1)/2; + p->context_count_spacetime= 19*19; + }else if(s->context_model==0){ + p->context_count_spatial= (11*11*11+1)/2; + p->context_count_temporal= 0; + p->context_count_spacetime= 0; }else{ - p->context_count= (11*11*5*5*5+1)/2; + p->context_count_spatial= (11*11*5*5*5+1)/2; + p->context_count_temporal= 0; + p->context_count_spacetime= 0; } + p->context_count= p->context_count_spatial + p->context_count_temporal; if(s->ac){ if(!p->state) p->state= av_malloc(CONTEXT_SIZE*p->context_count*sizeof(uint8_t)); }else{ if(!p->vlc_state) p->vlc_state= av_malloc(p->context_count*sizeof(VlcState)); } + if(p->context_count_spacetime) + if(!p->state_spacetime) + p->state_spacetime= av_malloc(p->context_count_spacetime*sizeof(int32_t)); } avctx->coded_frame= &s->picture; @@ -635,6 +775,7 @@ p->vlc_state[j].count= 1; } } + memset(p->state_spacetime, 0, p->context_count_spacetime*sizeof(int32_t)); } } @@ -645,6 +786,7 @@ const int width= f->width; const int height= f->height; AVFrame * const p= &f->picture; + AVFrame * const ref= &f->prev_picture; int used_count= 0; uint8_t keystate=128; @@ -652,6 +794,7 @@ // ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); + *ref = *p; *p = *pict; p->pict_type= FF_I_TYPE; @@ -660,9 +803,12 @@ p->key_frame= 1; write_header(f); clear_state(f); + *ref= *p; + f->temporal= 0; }else{ put_rac(c, &keystate, 0); p->key_frame= 0; + f->temporal= f->prediction_method==1; } if(!f->ac){ @@ -675,10 +821,10 @@ const int chroma_width = -((-width )>>f->chroma_h_shift); const int chroma_height= -((-height)>>f->chroma_v_shift); - encode_plane(f, p->data[0], width, height, p->linesize[0], 0); + encode_plane(f, p->data[0], ref->data[0], width, height, p->linesize[0], ref->linesize[0], 0); - encode_plane(f, p->data[1], chroma_width, chroma_height, p->linesize[1], 1); - encode_plane(f, p->data[2], chroma_width, chroma_height, p->linesize[2], 1); + encode_plane(f, p->data[1], ref->data[1], chroma_width, chroma_height, p->linesize[1], ref->linesize[1], 1); + encode_plane(f, p->data[2], ref->data[2], chroma_width, chroma_height, p->linesize[2], ref->linesize[2], 1); }else{ encode_rgb_frame(f, (uint32_t*)(p->data[0]), width, height, p->linesize[0]/4); } @@ -713,7 +859,7 @@ return 0; } -static inline void decode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int plane_index, int bits){ +static inline void decode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int_fast16_t *ref[2], int plane_index, int bits){ PlaneContext * const p= &s->plane[plane_index]; RangeCoder * const c= &s->c; int x; @@ -722,15 +868,30 @@ int run_index= s->run_index; for(x=0; xtemporal){ + state_spacetime= &p->state_spacetime[ get_context_spacetime(sample[1]+x, sample[0]+x, ref[0]+x, ref[1]+x) ]; + temporal= decide_spacetime(*state_spacetime); + }else{ + temporal= 0; + } + + if(temporal) + context= get_context_temporal(s, sample[1]+x, sample[0]+x, ref[0]+x, ref[1]+x); + else + context= get_context(s, sample[1]+x, sample[0]+x, sample[1]+x); - context= get_context(s, sample[1] + x, sample[0] + x, sample[1] + x); if(context < 0){ context= -context; sign=1; }else sign=0; + if(temporal) + context+= p->context_count_spatial; if(s->ac){ diff= get_symbol(c, p->state[context], 1); @@ -765,19 +926,38 @@ if(sign) diff= -diff; - sample[1][x]= (predict(sample[1] + x, sample[0] + x) + diff) & ((1<temporal){ + if(temporal){ + int diff_s= sample[1][x] - predict(sample[1]+x, sample[0]+x); + update_spacetime(state_spacetime, diff_s, diff); + }else{ + int diff_t= sample[1][x] - predict_temporal(sample[1]+x, sample[0]+x, ref[0]+x, ref[1]+x); + update_spacetime(state_spacetime, diff, diff_t); + } + } +// if(s->temporal) +// printf("s=%d p=%d d=%d c=%d\n", sample[1][x], pred, diff, context); } s->run_index= run_index; } -static void decode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, int plane_index){ +static void decode_plane(FFV1Context *s, uint8_t *src, uint8_t *ref, int w, int h, int stride, int ref_stride, int plane_index){ int x, y; int_fast16_t sample_buffer[2][w+6]; int_fast16_t *sample[2]= {sample_buffer[0]+3, sample_buffer[1]+3}; + int_fast16_t ref_sample_buffer[2][w+6], *ref_sample[2]; s->run_index=0; memset(sample_buffer, 0, sizeof(sample_buffer)); + memset(ref_sample_buffer, 0, sizeof(ref_sample_buffer)); for(y=0; ytemporal){ + int i; + for(i=0; i<2; i++){ + ref_sample[i] = &ref_sample_buffer[i][3]; + if(y-i<0) continue; + for(x=0; xc; memset(state, 128, sizeof(state)); f->version= get_symbol(c, state, 0); f->ac= f->avctx->coder_type= get_symbol(c, state, 0); + f->prediction_method= f->avctx->prediction_method= get_symbol(c, state, 0); f->colorspace= get_symbol(c, state, 0); //YUV cs type get_rac(c, state); //no chroma = false f->chroma_h_shift= get_symbol(c, state, 0); @@ -907,26 +1103,53 @@ //printf("%d %d %d\n", f->chroma_h_shift, f->chroma_v_shift,f->avctx->pix_fmt); +#define READ_QUANT(i) {\ + context_count*= read_quant_table(c, f->quant_table[i], context_count); \ + if(context_count < 0 || context_count > 32768){ \ + av_log(f->avctx, AV_LOG_ERROR, "read_quant_table error\n"); \ + return -1; \ + } \ + } + context_count=1; - for(i=0; i<5; i++){ - context_count*= read_quant_table(c, f->quant_table[i], context_count); - if(context_count < 0 || context_count > 32768){ - av_log(f->avctx, AV_LOG_ERROR, "read_quant_table error\n"); - return -1; - } + if(f->prediction_method==1){ + for(i=0; i<3; i++) + READ_QUANT(i); + context_count_spatial= (context_count+1)/2; + context_count=1; + for(i=3; i<5; i++) + READ_QUANT(i); + context_count_temporal= (context_count+1)/2; + context_count= context_count_spatial + context_count_temporal; + context_count_spacetime= 19*19; + }else{ + for(i=0; i<5; i++) + READ_QUANT(i); + context_count_temporal= 0; + context_count_spatial= + context_count= (context_count+1)/2; + context_count_spacetime= 0; } - context_count= (context_count+1)/2; - + +// printf("\ncontexts s: %d t: %d\n", +// context_count_spatial, context_count_temporal); + for(i=0; iplane_count; i++){ PlaneContext * const p= &f->plane[i]; p->context_count= context_count; + p->context_count_spatial= context_count_spatial; + p->context_count_temporal= context_count_temporal; + p->context_count_spacetime= context_count_spacetime; if(f->ac){ if(!p->state) p->state= av_malloc(CONTEXT_SIZE*p->context_count*sizeof(uint8_t)); }else{ if(!p->vlc_state) p->vlc_state= av_malloc(p->context_count*sizeof(VlcState)); } + if(p->context_count_spacetime) + if(!p->state_spacetime) + p->state_spacetime= av_malloc(p->context_count_spacetime*sizeof(int32_t)); } return 0; @@ -947,6 +1170,7 @@ const int width= f->width; const int height= f->height; AVFrame * const p= &f->picture; + AVFrame * const ref= &f->prev_picture; int bytes_read; uint8_t keystate= 128; @@ -956,13 +1180,16 @@ ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); - p->pict_type= FF_I_TYPE; //FIXME I vs. P if(get_rac(c, &keystate)){ + p->pict_type= FF_I_TYPE; p->key_frame= 1; read_header(f); clear_state(f); + f->temporal= 0; }else{ + p->pict_type= FF_P_TYPE; p->key_frame= 0; + f->temporal= f->prediction_method==1; } p->reference= 0; @@ -986,10 +1213,10 @@ if(f->colorspace==0){ const int chroma_width = -((-width )>>f->chroma_h_shift); const int chroma_height= -((-height)>>f->chroma_v_shift); - decode_plane(f, p->data[0], width, height, p->linesize[0], 0); - - decode_plane(f, p->data[1], chroma_width, chroma_height, p->linesize[1], 1); - decode_plane(f, p->data[2], chroma_width, chroma_height, p->linesize[2], 1); + decode_plane(f, p->data[0], ref->data[0], width, height, p->linesize[0], ref->linesize[0], 0); + + decode_plane(f, p->data[1], ref->data[1], chroma_width, chroma_height, p->linesize[1], ref->linesize[1], 1); + decode_plane(f, p->data[2], ref->data[2], chroma_width, chroma_height, p->linesize[2], ref->linesize[2], 1); }else{ decode_rgb_frame(f, (uint32_t*)p->data[0], width, height, p->linesize[0]/4); } @@ -999,8 +1226,14 @@ f->picture_number++; *picture= *p; - - avctx->release_buffer(avctx, p); //FIXME + + //FIXME: release p on uninit + if(f->prediction_method){ + if(ref->data[0]) + avctx->release_buffer(avctx, ref); + *ref= *p; + }else + avctx->release_buffer(avctx, p); *data_size = sizeof(AVFrame);