Index: ffv1.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/ffv1.c,v
retrieving revision 1.31
diff -u -r1.31 ffv1.c
--- ffv1.c	24 Feb 2005 16:39:03 -0000	1.31
+++ ffv1.c	9 Mar 2005 00:36:49 -0000
@@ -31,6 +31,8 @@
 #include "rangecoder.h"
 #include "golomb.h"
 
+#undef printf
+
 #define MAX_PLANES 4
 #define CONTEXT_SIZE 32
 
@@ -158,7 +160,11 @@
 
 typedef struct PlaneContext{
     int context_count;
+    int context_count_temporal;
+    int context_count_spatial;
+    int context_count_spacetime;
     uint8_t (*state)[CONTEXT_SIZE];
+    int32_t *state_spacetime;
     VlcState *vlc_state;
     uint8_t interlace_bit_state[2];
 } PlaneContext;
@@ -174,12 +180,16 @@
     int flags;
     int picture_number;
     AVFrame picture;
+    AVFrame prev_picture;
     int plane_count;
     int ac;                              ///< 1-> CABAC 0-> golomb rice
     PlaneContext plane[MAX_PLANES];
-    int16_t quant_table[5][256];
+    int16_t quant_table[6][256];
     int run_index;
     int colorspace;
+    int context_model;
+    int prediction_method;
+    int temporal;
     
     DSPContext dsp; 
 }FFV1Context;
@@ -196,21 +206,65 @@
     return diff;
 }
 
-static inline int predict(int_fast16_t *src, int_fast16_t *last){
-    const int LT= last[-1];
-    const int  T= last[ 0];
+#define LOAD_PIXELS0 \
+    const int LT= last[-1]; \
+    const int  T= last[ 0]; \
+    const int RT= last[ 1]; \
     const int L =  src[-1];
+#define LOAD_PIXELS1 \
+    const int PLT=ref2[-1]; \
+    const int PT= ref2[ 0]; \
+    const int PL=  ref[-1]; \
+    const int P =  ref[ 0];
+
+/*
+static inline int decide_temporal(int_fast16_t *src, int_fast16_t *last, int_fast16_t *ref, int_fast16_t *ref2){
+    LOAD_PIXELS0;
+    LOAD_PIXELS1;
+
+    return ABS(quant13[(L-PL) & 0xFF]) + ABS(quant13[(LT-PLT) & 0xFF]) + ABS(quant13[(T-PT) & 0xFF])
+         < ABS(quant13[(L-LT) & 0xFF]) + ABS(quant13[(LT-T) & 0xFF]) + ABS(quant13[(T-RT) & 0xFF]);
+}
+*/
+
+static inline int get_context_spacetime(int_fast16_t *src, int_fast16_t *last, int_fast16_t *ref, int_fast16_t *ref2){
+    LOAD_PIXELS0;
+    LOAD_PIXELS1;
+
+    return 19 * (ABS(quant13[(L-PL) & 0xFF]) + ABS(quant13[(LT-PLT) & 0xFF]) + ABS(quant13[(T-PT) & 0xFF]))
+              + (ABS(quant13[(L-LT) & 0xFF]) + ABS(quant13[(LT-T) & 0xFF]) + ABS(quant13[(T-RT) & 0xFF]));
+}
+
+static inline int decide_spacetime(int32_t state){
+    return state < 0;
+}
+
+static inline int update_spacetime(int32_t *state, int diff_space, int diff_time){
+    //FIXME: quant?
+    *state += ABS(diff_time) - ABS(diff_space);
+    if(ABS(*state) > 10000)
+        *state >>= 1;
+}
+
+static inline int predict(int_fast16_t *src, int_fast16_t *last){
+    LOAD_PIXELS0;
 
     return mid_pred(L, L + T - LT, T);
 }
 
+static inline int predict_temporal(int_fast16_t *src, int_fast16_t *last, int_fast16_t *ref, int_fast16_t *ref2){
+    LOAD_PIXELS0;
+    LOAD_PIXELS1;
+    return P;
+//  const int G = L + T - LT;
+//  const int PG= PL + PT - PLT;
+//  return mid_pred(P, G, P + G - PG);
+}
+
 static inline int get_context(FFV1Context *f, int_fast16_t *src, int_fast16_t *last, int_fast16_t *last2){
-    const int LT= last[-1];
-    const int  T= last[ 0];
-    const int RT= last[ 1];
-    const int L =  src[-1];
+    LOAD_PIXELS0;
 
-    if(f->quant_table[3][127]){
+    if(f->context_model){
         const int TT= last2[0];
         const int LL=  src[-2];
         return f->quant_table[0][(L-LT) & 0xFF] + f->quant_table[1][(LT-T) & 0xFF] + f->quant_table[2][(T-RT) & 0xFF]
@@ -219,6 +273,37 @@
         return f->quant_table[0][(L-LT) & 0xFF] + f->quant_table[1][(LT-T) & 0xFF] + f->quant_table[2][(T-RT) & 0xFF];
 }
 
+static inline int get_context_temporal(FFV1Context *f, int_fast16_t *src, int_fast16_t *last, int_fast16_t *ref, int_fast16_t *ref2){
+    LOAD_PIXELS0;
+    LOAD_PIXELS1;
+
+//  printf("gct: L%02x PL%02x T%02x PT%02x q3[L]=%d q4[T]=%d  q2[42]=%d q3[42]=%d q4[42]=%d\n",
+//          L, PL, T, PT, f->quant_table[3][(L-PL) & 0xFF], f->quant_table[4][(T-PT) & 0xFF],
+//          f->quant_table[2][42], f->quant_table[3][42], f->quant_table[4][42]);
+    //FIXME some spatial variation too?
+    return f->quant_table[3][(L-PL) & 0xFF] + f->quant_table[4][(T-PT) & 0xFF];
+}
+
+#if 0
+static inline int analyse_temporal(int_fast16_t *src, int_fast16_t *last, int_fast16_t *ref, int_fast16_t *ref2){
+    LOAD_PIXELS0;
+    LOAD_PIXELS1;
+    const int V =  src[ 0];
+
+    int cs = ABS(quant13[(L-LT) & 0xFF]) + ABS(quant13[(LT-T) & 0xFF]) + ABS(quant13[(T-RT) & 0xFF]);
+    int ct = ABS(quant13[(L-PL) & 0xFF]) + ABS(quant13[(T-PT) & 0xFF]) + ABS(quant13[(LT-PLT) & 0xFF]);
+    int ps = predict(src, last);
+    int pt = predict_temporal(src, last, ref, ref2);
+
+//  printf("s%d t%d S%d T%d\n", cs, ct, ABS(V-ps), ABS(V-pt));
+//  printf("cs=%d+%d+%d ct=%d+%d+%d\n",
+//          ABS(quant13[(L-LT) & 0xFF]), ABS(quant13[(LT-T) & 0xFF]), ABS(quant13[(T-RT) & 0xFF]),
+//          ABS(quant13[(L-PL) & 0xFF]), ABS(quant13[(T-PT) & 0xFF]), ABS(quant13[(LT-PLT) & 0xFF]));
+//  printf("L%02x LT%02x T%02x RT%02x  PL%02x PLT%02x PT%02x P%02x\n",
+//          L, LT, T, RT, PL, PLT, PT, P);
+}
+#endif
+
 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
     int i;
 
@@ -354,7 +439,7 @@
     return ret;
 }
 
-static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int plane_index, int bits){
+static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int_fast16_t *ref[2], int plane_index, int bits){
     PlaneContext * const p= &s->plane[plane_index];
     RangeCoder * const c= &s->c;
     int x;
@@ -376,16 +461,37 @@
 
     for(x=0; x<w; x++){
         int diff, context;
-        
-        context= get_context(s, sample[0]+x, sample[1]+x, sample[2]+x);
-        diff= sample[0][x] - predict(sample[0]+x, sample[1]+x);
+        int temporal;
+
+        if(s->temporal){
+            int32_t *state_spacetime= &p->state_spacetime[ get_context_spacetime(sample[0]+x, sample[1]+x, ref[0]+x, ref[1]+x) ];
+            int diff_s= sample[0][x] - predict(sample[0]+x, sample[1]+x);
+            int diff_t= sample[0][x] - predict_temporal(sample[0]+x, sample[1]+x, ref[0]+x, ref[1]+x);
+            temporal= decide_spacetime(*state_spacetime);
+            update_spacetime(state_spacetime, diff_s, diff_t);
+            diff= temporal ? diff_t : diff_s;
+        }else{
+            temporal= 0;
+            diff= sample[0][x] - predict(sample[0]+x, sample[1]+x);
+        }
+
+        if(temporal)
+            context= get_context_temporal(s, sample[0]+x, sample[1]+x, ref[0]+x, ref[1]+x);
+        else
+            context= get_context(s, sample[0]+x, sample[1]+x, sample[2]+x);
 
         if(context < 0){
             context = -context;
             diff= -diff;
         }
 
+        if(temporal)
+            context += p->context_count_spatial;
+
         diff= fold(diff, bits);
+
+//      if(s->temporal)
+//          printf("s=%d p=%d d=%d c=%d\n", sample[0][x], sample[0][x]-diff0, fold(diff0,bits), context);
         
         if(s->ac){
             put_symbol(c, p->state[context], diff, 1);
@@ -432,32 +538,47 @@
     return 0;
 }
 
-static void encode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, int plane_index){
+static void encode_plane(FFV1Context *s, uint8_t *src, uint8_t *ref, int w, int h, int stride, int ref_stride, int plane_index){
     int x,y,i;
-    const int ring_size= s->avctx->context_model ? 3 : 2;
+    const int ring_size= s->context_model ? 3 : 2;
     int_fast16_t sample_buffer[ring_size][w+6], *sample[ring_size];
+    int_fast16_t ref_sample_buffer[2][w+6], *ref_sample[2];
     s->run_index=0;
     
     memset(sample_buffer, 0, sizeof(sample_buffer));
+    memset(ref_sample_buffer, 0, sizeof(ref_sample_buffer));
     
     for(y=0; y<h; y++){
         for(i=0; i<ring_size; i++)
             sample[i]= sample_buffer[(h+i-y)%ring_size]+3;
-        
         sample[0][-1]= sample[1][0  ];
         sample[1][ w]= sample[1][w-1];
+
+        //FIXME rolling buffer
+        if(s->temporal){
+            for(i=0; i<2; i++){
+                ref_sample[i] = &ref_sample_buffer[i][3];
+                if(y-i<0) continue;
+                for(x=0; x<w; x++)
+                    ref_sample[i][x]= ref[x+(y-i)*ref_stride];
+                ref_sample[i][-1]= ref_sample[i][0];
+                ref_sample[i][ w]= ref_sample[i][w-1];
+            }
+        }
+
 //{START_TIMER
         for(x=0; x<w; x++){
             sample[0][x]= src[x + stride*y];
         }
-        encode_line(s, w, sample, plane_index, 8);
+
+        encode_line(s, w, sample, ref_sample, plane_index, 8);
 //STOP_TIMER("encode line")}
     }
 }
 
 static void encode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int stride){
     int x, y, p, i;
-    const int ring_size= s->avctx->context_model ? 3 : 2;
+    const int ring_size= s->context_model ? 3 : 2;
     int_fast16_t sample_buffer[3][ring_size][w+6], *sample[3][ring_size];
     s->run_index=0;
     
@@ -489,7 +610,7 @@
         for(p=0; p<3; p++){
             sample[p][0][-1]= sample[p][1][0  ];
             sample[p][1][ w]= sample[p][1][w-1];
-            encode_line(s, w, sample[p], FFMIN(p, 1), 9);
+            encode_line(s, w, sample[p], NULL, FFMIN(p, 1), 9);
         }
     }
 }
@@ -518,6 +639,7 @@
     
     put_symbol(c, state, f->version, 0);
     put_symbol(c, state, f->avctx->coder_type, 0);
+    put_symbol(c, state, f->prediction_method, 0);
     put_symbol(c, state, f->colorspace, 0); //YUV cs type 
     put_rac(c, state, 1); //chroma planes
         put_symbol(c, state, f->chroma_h_shift, 0);
@@ -560,12 +682,18 @@
  
     s->version=0;
     s->ac= avctx->coder_type;
+    s->prediction_method= avctx->prediction_method==1 && s->colorspace==0;
+    s->context_model= avctx->context_model && !s->prediction_method;
     
     s->plane_count=2;
     for(i=0; i<256; i++){
         s->quant_table[0][i]=           quant11[i];
         s->quant_table[1][i]=        11*quant11[i];
-        if(avctx->context_model==0){
+        if(s->prediction_method==1){
+            s->quant_table[2][i]= 11*11*quant11[i];
+            s->quant_table[3][i]=       quant11[i];
+            s->quant_table[4][i]=    11*quant11[i];
+        }else if(s->context_model==0){
             s->quant_table[2][i]=     11*11*quant11[i];
             s->quant_table[3][i]=
             s->quant_table[4][i]=0;
@@ -578,18 +706,30 @@
 
     for(i=0; i<s->plane_count; i++){
         PlaneContext * const p= &s->plane[i];
-               
-        if(avctx->context_model==0){
-            p->context_count= (11*11*11+1)/2;
+
+        if(s->prediction_method==1){
+            p->context_count_spatial= (11*11*11+1)/2;
+            p->context_count_temporal= (11*11+1)/2;
+            p->context_count_spacetime= 19*19;
+        }else if(s->context_model==0){
+            p->context_count_spatial= (11*11*11+1)/2;
+            p->context_count_temporal= 0;
+            p->context_count_spacetime= 0; 
         }else{        
-            p->context_count= (11*11*5*5*5+1)/2;
+            p->context_count_spatial= (11*11*5*5*5+1)/2;
+            p->context_count_temporal= 0;
+            p->context_count_spacetime= 0; 
         }
+        p->context_count= p->context_count_spatial + p->context_count_temporal;
 
         if(s->ac){
             if(!p->state) p->state= av_malloc(CONTEXT_SIZE*p->context_count*sizeof(uint8_t));
         }else{
             if(!p->vlc_state) p->vlc_state= av_malloc(p->context_count*sizeof(VlcState));
         }
+        if(p->context_count_spacetime)
+            if(!p->state_spacetime)
+                p->state_spacetime= av_malloc(p->context_count_spacetime*sizeof(int32_t));
     }
 
     avctx->coded_frame= &s->picture;
@@ -635,6 +775,7 @@
                 p->vlc_state[j].count= 1;
             }
         }
+        memset(p->state_spacetime, 0, p->context_count_spacetime*sizeof(int32_t));
     }
 }
 
@@ -645,6 +786,7 @@
     const int width= f->width;
     const int height= f->height;
     AVFrame * const p= &f->picture;
+    AVFrame * const ref= &f->prev_picture;
     int used_count= 0;
     uint8_t keystate=128;
 
@@ -652,6 +794,7 @@
 //    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
 
+    *ref = *p;
     *p = *pict;
     p->pict_type= FF_I_TYPE;
     
@@ -660,9 +803,12 @@
         p->key_frame= 1;
         write_header(f);
         clear_state(f);
+        *ref= *p;
+        f->temporal= 0;
     }else{
         put_rac(c, &keystate, 0);
         p->key_frame= 0;
+        f->temporal= f->prediction_method==1;
     }
 
     if(!f->ac){
@@ -675,10 +821,10 @@
         const int chroma_width = -((-width )>>f->chroma_h_shift);
         const int chroma_height= -((-height)>>f->chroma_v_shift);
 
-        encode_plane(f, p->data[0], width, height, p->linesize[0], 0);
+        encode_plane(f, p->data[0], ref->data[0], width, height, p->linesize[0], ref->linesize[0], 0);
 
-        encode_plane(f, p->data[1], chroma_width, chroma_height, p->linesize[1], 1);
-        encode_plane(f, p->data[2], chroma_width, chroma_height, p->linesize[2], 1);
+        encode_plane(f, p->data[1], ref->data[1], chroma_width, chroma_height, p->linesize[1], ref->linesize[1], 1);
+        encode_plane(f, p->data[2], ref->data[2], chroma_width, chroma_height, p->linesize[2], ref->linesize[2], 1);
     }else{
         encode_rgb_frame(f, (uint32_t*)(p->data[0]), width, height, p->linesize[0]/4);
     }
@@ -713,7 +859,7 @@
     return 0;
 }
 
-static inline void decode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int plane_index, int bits){
+static inline void decode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int_fast16_t *ref[2], int plane_index, int bits){
     PlaneContext * const p= &s->plane[plane_index];
     RangeCoder * const c= &s->c;
     int x;
@@ -722,15 +868,30 @@
     int run_index= s->run_index;
 
     for(x=0; x<w; x++){
-        int diff, context, sign;
+        int diff, context, sign, pred;
+        int temporal;
+        int32_t *state_spacetime;
+
+        if(s->temporal){
+            state_spacetime= &p->state_spacetime[ get_context_spacetime(sample[1]+x, sample[0]+x, ref[0]+x, ref[1]+x) ];
+            temporal= decide_spacetime(*state_spacetime);
+        }else{
+            temporal= 0;
+        }
+
+        if(temporal)
+            context= get_context_temporal(s, sample[1]+x, sample[0]+x, ref[0]+x, ref[1]+x);
+        else
+            context= get_context(s, sample[1]+x, sample[0]+x, sample[1]+x);
          
-        context= get_context(s, sample[1] + x, sample[0] + x, sample[1] + x);
         if(context < 0){
             context= -context;
             sign=1;
         }else
             sign=0;
         
+        if(temporal)
+            context+= p->context_count_spatial;
 
         if(s->ac){
             diff= get_symbol(c, p->state[context], 1);
@@ -765,19 +926,38 @@
 
         if(sign) diff= -diff;
 
-        sample[1][x]= (predict(sample[1] + x, sample[0] + x) + diff) & ((1<<bits)-1);
+        if(temporal)
+            pred= predict_temporal(sample[1]+x, sample[0]+x, ref[0]+x, ref[1]+x);
+        else
+            pred= predict(sample[1]+x, sample[0]+x);
+
+        sample[1][x]= (pred + diff) & ((1<<bits)-1);
+
+        if(s->temporal){
+            if(temporal){
+                int diff_s= sample[1][x] - predict(sample[1]+x, sample[0]+x);
+                update_spacetime(state_spacetime, diff_s, diff);
+            }else{
+                int diff_t= sample[1][x] - predict_temporal(sample[1]+x, sample[0]+x, ref[0]+x, ref[1]+x);
+                update_spacetime(state_spacetime, diff, diff_t);
+            }
+        }
+//      if(s->temporal)
+//          printf("s=%d p=%d d=%d c=%d\n", sample[1][x], pred, diff, context);
     }
     s->run_index= run_index;        
 }
 
-static void decode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, int plane_index){
+static void decode_plane(FFV1Context *s, uint8_t *src, uint8_t *ref, int w, int h, int stride, int ref_stride, int plane_index){
     int x, y;
     int_fast16_t sample_buffer[2][w+6];
     int_fast16_t *sample[2]= {sample_buffer[0]+3, sample_buffer[1]+3};
+    int_fast16_t ref_sample_buffer[2][w+6], *ref_sample[2];
 
     s->run_index=0;
     
     memset(sample_buffer, 0, sizeof(sample_buffer));
+    memset(ref_sample_buffer, 0, sizeof(ref_sample_buffer));
     
     for(y=0; y<h; y++){
         int_fast16_t *temp= sample[0]; //FIXME try a normal buffer
@@ -787,16 +967,29 @@
 
         sample[1][-1]= sample[0][0  ];
         sample[0][ w]= sample[0][w-1];
-        
+
+        //FIXME rolling buffer
+        if(s->temporal){
+            int i;
+            for(i=0; i<2; i++){
+                ref_sample[i] = &ref_sample_buffer[i][3];
+                if(y-i<0) continue;
+                for(x=0; x<w; x++)
+                    ref_sample[i][x]= ref[x+(y-i)*ref_stride];
+                ref_sample[i][-1]= ref_sample[i][0];
+                ref_sample[i][ w]= ref_sample[i][w-1];
+            }
+        }
+
 //{START_TIMER
-        decode_line(s, w, sample, plane_index, 8);
+        decode_line(s, w, sample, ref_sample, plane_index, 8);
         for(x=0; x<w; x++){
             src[x + stride*y]= sample[1][x];
         }
 //STOP_TIMER("decode-line")}
     }
 }
-
+    
 static void decode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int stride){
     int x, y, p;
     int_fast16_t sample_buffer[3][2][w+6];
@@ -818,7 +1011,7 @@
 
             sample[p][1][-1]= sample[p][0][0  ];
             sample[p][0][ w]= sample[p][0][w-1];
-            decode_line(s, w, sample[p], FFMIN(p, 1), 9);
+            decode_line(s, w, sample[p], NULL, FFMIN(p, 1), 9);
         }
         for(x=0; x<w; x++){
             int g= sample[0][1][x];
@@ -853,9 +1046,9 @@
         
         while(len--){
             quant_table[i] = scale*v;
-            i++;
-//printf("%2d ",v);
+//printf("%2d ",quant_table[i]);
 //if(i%16==0) printf("\n");
+            i++;
         }
     }
 
@@ -869,13 +1062,16 @@
 
 static int read_header(FFV1Context *f){
     uint8_t state[CONTEXT_SIZE];
-    int i, context_count;
+    int context_count, context_count_spatial;
+    int context_count_temporal, context_count_spacetime;
+    int i;
     RangeCoder * const c= &f->c;
     
     memset(state, 128, sizeof(state));
 
     f->version= get_symbol(c, state, 0);
     f->ac= f->avctx->coder_type= get_symbol(c, state, 0);
+    f->prediction_method= f->avctx->prediction_method= get_symbol(c, state, 0);
     f->colorspace= get_symbol(c, state, 0); //YUV cs type
     get_rac(c, state); //no chroma = false
     f->chroma_h_shift= get_symbol(c, state, 0);
@@ -907,26 +1103,53 @@
 
 //printf("%d %d %d\n", f->chroma_h_shift, f->chroma_v_shift,f->avctx->pix_fmt);
 
+#define READ_QUANT(i) {\
+        context_count*= read_quant_table(c, f->quant_table[i], context_count); \
+        if(context_count < 0 || context_count > 32768){ \
+            av_log(f->avctx, AV_LOG_ERROR, "read_quant_table error\n"); \
+            return -1; \
+        } \
+    }
+
     context_count=1;
-    for(i=0; i<5; i++){
-        context_count*= read_quant_table(c, f->quant_table[i], context_count);
-        if(context_count < 0 || context_count > 32768){
-            av_log(f->avctx, AV_LOG_ERROR, "read_quant_table error\n");
-            return -1;
-        }
+    if(f->prediction_method==1){
+        for(i=0; i<3; i++)
+            READ_QUANT(i);
+        context_count_spatial= (context_count+1)/2;
+        context_count=1;
+        for(i=3; i<5; i++)
+            READ_QUANT(i);
+        context_count_temporal= (context_count+1)/2;
+        context_count= context_count_spatial + context_count_temporal;
+        context_count_spacetime= 19*19;
+    }else{
+        for(i=0; i<5; i++)
+            READ_QUANT(i);
+        context_count_temporal= 0;
+        context_count_spatial=
+        context_count= (context_count+1)/2;
+        context_count_spacetime= 0;
     }
-    context_count= (context_count+1)/2;
-    
+
+//  printf("\ncontexts s: %d t: %d\n",
+//          context_count_spatial, context_count_temporal);
+
     for(i=0; i<f->plane_count; i++){
         PlaneContext * const p= &f->plane[i];
 
         p->context_count= context_count;
+        p->context_count_spatial= context_count_spatial;
+        p->context_count_temporal= context_count_temporal;
+        p->context_count_spacetime= context_count_spacetime;
 
         if(f->ac){
             if(!p->state) p->state= av_malloc(CONTEXT_SIZE*p->context_count*sizeof(uint8_t));
         }else{
             if(!p->vlc_state) p->vlc_state= av_malloc(p->context_count*sizeof(VlcState));
         }
+        if(p->context_count_spacetime)
+            if(!p->state_spacetime)
+                p->state_spacetime= av_malloc(p->context_count_spacetime*sizeof(int32_t));
     }
     
     return 0;
@@ -947,6 +1170,7 @@
     const int width= f->width;
     const int height= f->height;
     AVFrame * const p= &f->picture;
+    AVFrame * const ref= &f->prev_picture;
     int bytes_read;
     uint8_t keystate= 128;
 
@@ -956,13 +1180,16 @@
     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
 
 
-    p->pict_type= FF_I_TYPE; //FIXME I vs. P
     if(get_rac(c, &keystate)){
+        p->pict_type= FF_I_TYPE;
         p->key_frame= 1;
         read_header(f);
         clear_state(f);
+        f->temporal= 0;
     }else{
+        p->pict_type= FF_P_TYPE;
         p->key_frame= 0;
+        f->temporal= f->prediction_method==1;
     }
 
     p->reference= 0;
@@ -986,10 +1213,10 @@
     if(f->colorspace==0){
         const int chroma_width = -((-width )>>f->chroma_h_shift);
         const int chroma_height= -((-height)>>f->chroma_v_shift);
-        decode_plane(f, p->data[0], width, height, p->linesize[0], 0);
-        
-        decode_plane(f, p->data[1], chroma_width, chroma_height, p->linesize[1], 1);
-        decode_plane(f, p->data[2], chroma_width, chroma_height, p->linesize[2], 1);
+        decode_plane(f, p->data[0], ref->data[0], width, height, p->linesize[0], ref->linesize[0], 0);
+
+        decode_plane(f, p->data[1], ref->data[1], chroma_width, chroma_height, p->linesize[1], ref->linesize[1], 1);
+        decode_plane(f, p->data[2], ref->data[2], chroma_width, chroma_height, p->linesize[2], ref->linesize[2], 1);
     }else{
         decode_rgb_frame(f, (uint32_t*)p->data[0], width, height, p->linesize[0]/4);
     }
@@ -999,8 +1226,14 @@
     f->picture_number++;
 
     *picture= *p;
-    
-    avctx->release_buffer(avctx, p); //FIXME
+
+    //FIXME: release p on uninit
+    if(f->prediction_method){
+        if(ref->data[0])
+            avctx->release_buffer(avctx, ref);
+        *ref= *p;
+    }else
+        avctx->release_buffer(avctx, p);
 
     *data_size = sizeof(AVFrame);