diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 24b68d7..f0a1945 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -82,6 +82,8 @@ OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER) += 8svx.o OBJS-$(CONFIG_ESCAPE124_DECODER) += escape124.o OBJS-$(CONFIG_FFV1_DECODER) += ffv1.o rangecoder.o OBJS-$(CONFIG_FFV1_ENCODER) += ffv1.o rangecoder.o +OBJS-$(CONFIG_FFV2_DECODER) += ffv2.o rangecoder.o +OBJS-$(CONFIG_FFV2_ENCODER) += ffv2.o rangecoder.o OBJS-$(CONFIG_FFVHUFF_DECODER) += huffyuv.o OBJS-$(CONFIG_FFVHUFF_ENCODER) += huffyuv.o OBJS-$(CONFIG_FLAC_DECODER) += flacdec.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 0cb0e6d..3a1cde0 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -77,6 +77,7 @@ void avcodec_register_all(void) REGISTER_DECODER (EIGHTSVX_FIB, eightsvx_fib); REGISTER_DECODER (ESCAPE124, escape124); REGISTER_ENCDEC (FFV1, ffv1); + REGISTER_ENCDEC (FFV2, ffv2); REGISTER_ENCDEC (FFVHUFF, ffvhuff); REGISTER_ENCDEC (FLASHSV, flashsv); REGISTER_DECODER (FLIC, flic); diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 39dabeb..b6df475 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -97,6 +97,7 @@ enum CodecID { CODEC_ID_ASV1, CODEC_ID_ASV2, CODEC_ID_FFV1, + CODEC_ID_FFV2, // FIXME should go at the end, but that would force manual merges CODEC_ID_4XM, CODEC_ID_VCR1, CODEC_ID_CLJR, diff --git a/libavcodec/ffv2.c b/libavcodec/ffv2.c new file mode 100644 index 0000000..ab87e3d --- /dev/null +++ b/libavcodec/ffv2.c @@ -0,0 +1,1314 @@ +/* + * Copyright (C) 2008 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Coder types (in octal): + * 01 = order 0 entropy. directly comparable to ffvh. + * 02 = sample pairs. this is done as a speed optimization in ffvh, but if it's in the standard then it improves compression too. + * 03 = zero run. works almost as well as the trees in some anime, but fails in not-so-compressible content. + * 10 = 2x2 block of zero flags. escape code for nonzero coefs. + * 11 = 2x2 block of [-1,1] values. escape code for bigger coefs. + * 12 = 2x2 block of [-2,2] values. escape code for bigger coefs. + * 13 = 2x2 block of [-3,3] values. escape code for bigger coefs. + * 14 = 2x2 block of log2 values. uncompressed signs and mantissas. + * 2X = 8x8, 4x4 quadtree cbp followed by one of the above block coders. + * 3X = 8x4 cbp followed by one of the above block coders. + */ + +#define DEBUG +#include "avcodec.h" +#include "dsputil.h" +#include "mathops.h" +#include "mpegvideo.h" + +#define VLC_BITS 11 +#define NUM_TABLES 5 +#define NUM_VLCS 4096 + +typedef struct { + VLC vlc; + uint8_t len[NUM_VLCS]; + uint32_t bits[NUM_VLCS]; + uint32_t stats[NUM_VLCS]; +} VLCS; + +typedef struct FFV2Context FFV2Context; +struct FFV2Context { + AVCodecContext *avctx; + DSPContext dsp; + GetBitContext gb; + PutBitContext pb; + void (*read_line)(FFV2Context *s, VLCS *vlcs, uint8_t *residual, int width); + void (*write_line)(FFV2Context *s, VLCS *vlcs, uint8_t *residual, int width); + void (*read_block)(FFV2Context *s, VLCS *vlcs, uint8_t *residual); + void (*write_block)(FFV2Context *s, VLCS *vlcs, uint8_t *residual); + uint8_t *bs, *bs_end; + VLCS vlcs[6][NUM_TABLES]; ///< [6] = (Y/U/V)*2 + (I/P) + AVFrame fenc; + AVFrame fref; + int mb_width, mb_height; + int tstride; + uint8_t *temp[4]; + uint8_t *mb_types; ///< intra: 0, inter: 1 + int16_t (*mvs)[2]; ///< current row + int16_t (*mvs_top)[2]; ///< previous row + int16_t (*mvs_base)[2]; ///< buffer that the other mv arrays point into + int16_t (*mv_plane[2])[2]; ///< mvs chosen by motion est, not necessarily those that will be coded + int16_t (*mv_plane_base)[2]; + int gop; + int initted_vlc[2]; + int coder_type, coder_tree, coder_block; +}; + +static uint16_t map_coder6[1296]; + +static void common_init(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + int i, j, k, l; + memset(s, 0, sizeof(FFV2Context)); + assert(!(avctx->flags&CODEC_FLAG_EMU_EDGE)); + dsputil_init(&s->dsp, avctx); + s->avctx = avctx; + s->mb_width = (avctx->width+7)/8; + s->mb_height= (avctx->height+7)/8; + avctx->get_buffer(avctx, &s->fenc); + avctx->get_buffer(avctx, &s->fref); + s->tstride = s->fenc.linesize[0]; + assert(s->tstride >= ((avctx->width+15)&~7)); + s->temp[0] = av_malloc(s->tstride*9+16); + s->temp[1] = av_malloc(s->tstride*9+16); + s->temp[2] = av_malloc(s->tstride*9+16); + s->temp[3] = av_malloc(s->tstride*9+16); + s->mb_types = av_malloc(s->mb_width); + s->mvs_base = av_mallocz(4*(s->mb_width+2)*sizeof(int16_t)); + s->mvs = s->mvs_base+1; + s->mvs_top = s->mvs+s->mb_width+2; + for(i=0; i<6; i++) + for(j=0; j<6; j++) + for(k=0; k<6; k++) + for(l=0; l<6; l++) + map_coder6[i*6*6*6+j*6*6+k*6+l] = (i<<9) + (j<<6) + (k<<3) + l; +} + +static int decode_init(AVCodecContext *avctx) { + avctx->pix_fmt= PIX_FMT_YUV420P; + common_init(avctx); + return 0; +} + +static int encode_init(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + int i, j; + common_init(avctx); + s->coder_type = avctx->coder_type; + if(s->coder_type == 0) + s->coder_type = 26; + s->coder_tree = s->coder_type >> 3; + s->coder_block = s->coder_type & 7; + if((s->coder_type > 3 && s->coder_type < 8) || s->coder_tree < 0 || s->coder_tree > 3 || s->coder_block > 4) { + av_log(avctx, AV_LOG_ERROR, "bad coder_type\n"); + return -1; + } + s->mv_plane_base = av_mallocz(4*(s->mb_height+2)*(s->mb_width+2)*sizeof(int16_t)); + s->mv_plane[0] = s->mv_plane_base+s->mb_width+3; + s->mv_plane[1] = s->mv_plane[0]+(s->mb_width+2)*(s->mb_height+2); + for(j=0; j<6*NUM_TABLES; j++) + for(i=0; ivlcs[0][j].stats[i] = 1; + return 0; +} + +static void common_end(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + avctx->release_buffer(avctx, &s->fenc); + avctx->release_buffer(avctx, &s->fref); + av_free(s->temp[0]); + av_free(s->temp[1]); + av_free(s->temp[2]); + av_free(s->temp[3]); + av_free(s->mb_types); + av_free(s->mvs_base); +} + +static int decode_end(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + int i; + for(i=0; i<6*NUM_TABLES; i++) + free_vlc(&s->vlcs[0][i].vlc); + common_end(avctx); + return 0; +} + +static int encode_end(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + common_end(avctx); + av_free(s->mv_plane_base); + return 0; +} + +// FIXME code duplication from huffyuv.c +// FIXME port optimizations back +static int generate_bits_table(uint32_t *dst, uint8_t *len_table, int size){ + int len, index; + uint32_t bits=1; + + for(len=1; len<32 && ~bits; len++){ + for(index=size-1; index>=0; index--){ + if(len_table[index] == len) + dst[index] = bits--; + } + bits = bits*2+1; + } + if(~bits){ + av_log(NULL, AV_LOG_ERROR, "Error generating huffman table\n"); + return -1; + } + return 0; +} + +typedef struct { + uint64_t val; + int name; +} HeapElem; + +static void heap_sift(HeapElem *h, int root, int size) +{ + while(root*2+1 < size) { + int child = root*2+1; + if(child < size-1 && h[child].val > h[child+1].val) + child++; + if(h[root].val > h[child].val) { + FFSWAP(HeapElem, h[root], h[child]); + root = child; + } else + break; + } +} + +static void generate_len_table(uint8_t *dst, uint32_t *stats, int size){ + HeapElem h[size]; + int up[2*size]; + int len[2*size]; + int offset, i, next; + + for(offset=1; ; offset<<=1){ + for(i=0; i=0; i--) + heap_sift(h, i, size); + + for(next=size; next=size; i--) + len[i] = len[up[i]] + 1; + for(i=0; i= 32) break; + } + if(i==size) break; + } +} + +// TODO: simply vlcing the len table reduces table size by about 1.7x +// reordering and gradient prediction should help further. + +static int read_len_table(uint8_t *len, GetBitContext *gb, int size){ + int i, val, repeat=-1; + + for(i=0; i0 && repeat<256 && repeat>0); + if(repeat>7){ + buf[index++]= val; + buf[index++]= repeat; + }else{ + buf[index++]= val | (repeat<<5); + } + } + + return index; +} + +static int read_table(FFV2Context *s, VLCS* vlcs, int size, uint16_t *map) { + if(read_len_table(vlcs->len, &s->gb, size)) + return -1; + if(generate_bits_table(vlcs->bits, vlcs->len, size)) + return -1; + free_vlc(&vlcs->vlc); + init_vlc_sparse(&vlcs->vlc, VLC_BITS, size, vlcs->len, 1, 1, vlcs->bits, 4, 4, map, 2, 2, 0); + return 0; +} + +static void write_table(FFV2Context *s, VLCS* vlcs, int size) { + int i; + generate_len_table(vlcs->len, vlcs->stats, size); + generate_bits_table(vlcs->bits, vlcs->len, size); + s->bs += write_len_table(vlcs->len, s->bs, size); + for(i=0; istats[i] = (vlcs->stats[i]+1)>>1; +} + +// ---------- + +static void read_line_v0(FFV2Context *s, VLCS *vlcs, uint8_t *residual, int width) { + int x; + for(x=0; xgb, vlcs[0].vlc.table, VLC_BITS, 3); +} + +static void write_line_v0(FFV2Context *s, VLCS *vlcs, uint8_t *residual, int width) { + int x; + for(x=0; xpb, vlcs[0].len[v], vlcs[0].bits[v]); + vlcs[0].stats[v]++; + } +} + +// ---------- + +static void read_line_v1(FFV2Context *s, VLCS *vlcs, uint8_t *residual, int width) { + int x; + for(x=0; xgb, vlcs[0].vlc.table, VLC_BITS, 3); + if(a < 256) { + residual[x++] = a; + if((unsigned)(a - 8) >= 240 && x < width) + residual[x++] = 8 + get_vlc2(&s->gb, vlcs[1].vlc.table, VLC_BITS, 3); + } else { + residual[x++] = (int8_t)a >> 4; + residual[x++] = (int8_t)(a<<4) >> 4; + } + } +} + +static void write_line_v1(FFV2Context *s, VLCS *vlcs, uint8_t *residual, int width) { + int x; + for(x=0; xpb, vlcs[0].len[a], vlcs[0].bits[a]); + vlcs[0].stats[a]++; + x++; + } else if((unsigned)(b - 8) < 240) { + b -= 8; + put_bits(&s->pb, vlcs[0].len[a], vlcs[0].bits[a]); + put_bits(&s->pb, vlcs[1].len[b], vlcs[1].bits[b]); + vlcs[0].stats[a]++; + vlcs[1].stats[b]++; + x+=2; + } else { + int c = 256 + ((a&15)<<4) + (b&15); + put_bits(&s->pb, vlcs[0].len[c], vlcs[0].bits[c]); + vlcs[0].stats[c]++; + x+=2; + } + } +} + +// ---------- + +static void read_line_v2(FFV2Context *s, VLCS *vlcs, uint8_t *residual, int width) { + int x, v, rep; + for(x=0; xgb, vlcs[0].vlc.table, VLC_BITS, 3); + if(v >= 240) { + residual[x++] = v-232; + } else { + rep = v&15; + while(--rep >= 0) + residual[x++] = 0; + residual[x++] = (v>>4)-7; + if(v >= (7<<4) && v < (7<<4)+15 && xgb, vlcs[1].vlc.table, VLC_BITS, 3); + } + } +} + +static void write_line_v2(FFV2Context *s, VLCS *vlcs, uint8_t *residual, int width) { + int x, a, b, rep; + residual[width] = 128; + for(x=0; xpb, vlcs[0].len[a], vlcs[0].bits[a]); + vlcs[0].stats[a]++; + if(a >= (7<<4) && a < (7<<4)+15 && x+reppb, vlcs[1].len[b], vlcs[1].bits[b]); + vlcs[1].stats[b]++; + } + x += rep+1; + } +} + +// ---------- + +static void read_block_v0(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int cbp = ~get_vlc2(&s->gb, vlcs[1].vlc.table, VLC_BITS, 3); + if(cbp&1) residual[0] = 1 + get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + if(cbp&2) residual[1] = 1 + get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + if(cbp&4) residual[s->tstride] = 1 + get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + if(cbp&8) residual[s->tstride+1] = 1 + get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); +} + +static void write_block_v0(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + uint8_t a = residual[0]; + uint8_t b = residual[1]; + uint8_t c = residual[s->tstride]; + uint8_t d = residual[s->tstride+1]; + int v = !a + 2*!b + 4*!c + 8*!d; + put_bits(&s->pb, vlcs[1].len[v], vlcs[1].bits[v]); + vlcs[1].stats[v]++; + if(a) { a-=1; put_bits(&s->pb, vlcs[0].len[a], vlcs[0].bits[a]); vlcs[0].stats[a]++; } + if(b) { b-=1; put_bits(&s->pb, vlcs[0].len[b], vlcs[0].bits[b]); vlcs[0].stats[b]++; } + if(c) { c-=1; put_bits(&s->pb, vlcs[0].len[c], vlcs[0].bits[c]); vlcs[0].stats[c]++; } + if(d) { d-=1; put_bits(&s->pb, vlcs[0].len[d], vlcs[0].bits[d]); vlcs[0].stats[d]++; } +} + +// ---------- + +static void read_block_v1(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int v = get_vlc2(&s->gb, vlcs[1].vlc.table, VLC_BITS, 3); + int a = v & 3; + residual[0] = a<3 ? a-1 : 2+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + a = (v>>2) & 3; + residual[1] = a<3 ? a-1 : 2+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + a = (v>>4) & 3; + residual[s->tstride] = a<3 ? a-1 : 2+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + a = v>>6; + residual[s->tstride+1] = a<3 ? a-1 : 2+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); +} + +static void write_block_v1(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + uint8_t a = 1+residual[0]; + uint8_t b = 1+residual[1]; + uint8_t c = 1+residual[s->tstride]; + uint8_t d = 1+residual[s->tstride+1]; + int v = FFMIN(a,3) + 4*FFMIN(b,3) + 16*FFMIN(c,3) + 64*FFMIN(d,3); + put_bits(&s->pb, vlcs[1].len[v], vlcs[1].bits[v]); + vlcs[1].stats[v]++; + if(a>=3) { a-=3; put_bits(&s->pb, vlcs[0].len[a], vlcs[0].bits[a]); vlcs[0].stats[a]++; } + if(b>=3) { b-=3; put_bits(&s->pb, vlcs[0].len[b], vlcs[0].bits[b]); vlcs[0].stats[b]++; } + if(c>=3) { c-=3; put_bits(&s->pb, vlcs[0].len[c], vlcs[0].bits[c]); vlcs[0].stats[c]++; } + if(d>=3) { d-=3; put_bits(&s->pb, vlcs[0].len[d], vlcs[0].bits[d]); vlcs[0].stats[d]++; } +} + +// ---------- + +static void read_block_v2(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int v = get_vlc2(&s->gb, vlcs[1].vlc.table, VLC_BITS, 3); + int a = v & 7; + residual[0] = a<5 ? a-2 : 3+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + a = (v>>3) & 7; + residual[1] = a<5 ? a-2 : 3+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + a = (v>>6) & 7; + residual[s->tstride] = a<5 ? a-2 : 3+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + a = v>>9; + residual[s->tstride+1] = a<5 ? a-2 : 3+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); +} + +static void write_block_v2(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + uint8_t a = 2+residual[0]; + uint8_t b = 2+residual[1]; + uint8_t c = 2+residual[s->tstride]; + uint8_t d = 2+residual[s->tstride+1]; + int v = FFMIN(a,5) + 6*FFMIN(b,5) + 36*FFMIN(c,5) + 216*FFMIN(d,5); + put_bits(&s->pb, vlcs[1].len[v], vlcs[1].bits[v]); + vlcs[1].stats[v]++; + if(a>=5) { a-=5; put_bits(&s->pb, vlcs[0].len[a], vlcs[0].bits[a]); vlcs[0].stats[a]++; } + if(b>=5) { b-=5; put_bits(&s->pb, vlcs[0].len[b], vlcs[0].bits[b]); vlcs[0].stats[b]++; } + if(c>=5) { c-=5; put_bits(&s->pb, vlcs[0].len[c], vlcs[0].bits[c]); vlcs[0].stats[c]++; } + if(d>=5) { d-=5; put_bits(&s->pb, vlcs[0].len[d], vlcs[0].bits[d]); vlcs[0].stats[d]++; } +} + +// ---------- + +static void read_block_v3(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int v = get_vlc2(&s->gb, vlcs[1].vlc.table, VLC_BITS, 3); + int a = v & 7; + residual[0] = a<7 ? a-3 : 4+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + a = (v>>3) & 7; + residual[1] = a<7 ? a-3 : 4+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + a = (v>>6) & 7; + residual[s->tstride] = a<7 ? a-3 : 4+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + a = v>>9; + residual[s->tstride+1] = a<7 ? a-3 : 4+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); +} + +static void write_block_v3(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + uint8_t a = 3+residual[0]; + uint8_t b = 3+residual[1]; + uint8_t c = 3+residual[s->tstride]; + uint8_t d = 3+residual[s->tstride+1]; + int v = FFMIN(a,7) + 8*FFMIN(b,7) + 64*FFMIN(c,7) + 512*FFMIN(d,7); + put_bits(&s->pb, vlcs[1].len[v], vlcs[1].bits[v]); + vlcs[1].stats[v]++; + if(a>=7) { a-=7; put_bits(&s->pb, vlcs[0].len[a], vlcs[0].bits[a]); vlcs[0].stats[a]++; } + if(b>=7) { b-=7; put_bits(&s->pb, vlcs[0].len[b], vlcs[0].bits[b]); vlcs[0].stats[b]++; } + if(c>=7) { c-=7; put_bits(&s->pb, vlcs[0].len[c], vlcs[0].bits[c]); vlcs[0].stats[c]++; } + if(d>=7) { d-=7; put_bits(&s->pb, vlcs[0].len[d], vlcs[0].bits[d]); vlcs[0].stats[d]++; } +} + +// ---------- + +static int read_coef_v4(FFV2Context *s, int ctx) { + if(ctx) { + int bits = get_bits(&s->gb, ctx); + int sign = -(bits&1); + int a = ((1<> 1; + a = (a^sign)-sign; + if(a == -127) + a -= get_bits(&s->gb, 1); + return a; + } else { + return 0; + } +} + +static void read_block_v4(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int v = get_vlc2(&s->gb, vlcs[1].vlc.table, VLC_BITS, 3); + residual[0] = read_coef_v4(s, v & 7); + residual[1] = read_coef_v4(s, (v>>3) & 7); + residual[s->tstride] = read_coef_v4(s, (v>>6) & 7); + residual[s->tstride+1] = read_coef_v4(s, v>>9); +} + +#define WRITE_COEF_V4(i, coef) {\ + int x, a, b, sign;\ + a = x = (int8_t)(coef);\ + if(x) {\ + if(x == -128) a = -127;\ + sign = x>>7;\ + a = (a^sign)-sign;\ + b = 1+av_log2(a);\ + bits <<= b;\ + bits |= ((a<<1)-(1<tstride]); + WRITE_COEF_V4(3, residual[s->tstride+1]); + put_bits(&s->pb, vlcs[1].len[v], vlcs[1].bits[v]); + vlcs[1].stats[v]++; + if(count) + put_bits(&s->pb, count, bits); +} + +// ---------- + +static void read_tree_v2_4x4(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int cbp = ~get_vlc2(&s->gb, vlcs[2].vlc.table, VLC_BITS, 3); + if(cbp&1) s->read_block(s, vlcs, residual); + if(cbp&2) s->read_block(s, vlcs, residual+2); + if(cbp&4) s->read_block(s, vlcs, residual+2*s->tstride); + if(cbp&8) s->read_block(s, vlcs, residual+2*s->tstride+2); +} + +static void read_tree_v2(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int cbp, y; + for(y=0; y<8; y++) + *(uint64_t*)(residual+y*s->tstride) = 0; + cbp = ~get_vlc2(&s->gb, vlcs[3].vlc.table, VLC_BITS, 3); + if(cbp&1) read_tree_v2_4x4(s, vlcs, residual); + if(cbp&2) read_tree_v2_4x4(s, vlcs, residual+4); + if(cbp&4) read_tree_v2_4x4(s, vlcs, residual+4*s->tstride); + if(cbp&8) read_tree_v2_4x4(s, vlcs, residual+4*s->tstride+4); +} + +static int cbp_8x8_zigzag(uint8_t *res, int stride) { + int cbp; + cbp = !!(*(uint16_t*)(res+0) | *(uint16_t*)(res+stride+0)); + cbp |= !!(*(uint16_t*)(res+2) | *(uint16_t*)(res+stride+2)) << 1; + cbp |= !!(*(uint16_t*)(res+4) | *(uint16_t*)(res+stride+4)) << 4; + cbp |= !!(*(uint16_t*)(res+6) | *(uint16_t*)(res+stride+6)) << 5; + res += 2*stride; + cbp |= !!(*(uint16_t*)(res+0) | *(uint16_t*)(res+stride+0)) << 2; + cbp |= !!(*(uint16_t*)(res+2) | *(uint16_t*)(res+stride+2)) << 3; + cbp |= !!(*(uint16_t*)(res+4) | *(uint16_t*)(res+stride+4)) << 6; + cbp |= !!(*(uint16_t*)(res+6) | *(uint16_t*)(res+stride+6)) << 7; + res += 2*stride; + cbp |= !!(*(uint16_t*)(res+0) | *(uint16_t*)(res+stride+0)) << 8; + cbp |= !!(*(uint16_t*)(res+2) | *(uint16_t*)(res+stride+2)) << 9; + cbp |= !!(*(uint16_t*)(res+4) | *(uint16_t*)(res+stride+4)) << 12; + cbp |= !!(*(uint16_t*)(res+6) | *(uint16_t*)(res+stride+6)) << 13; + res += 2*stride; + cbp |= !!(*(uint16_t*)(res+0) | *(uint16_t*)(res+stride+0)) << 10; + cbp |= !!(*(uint16_t*)(res+2) | *(uint16_t*)(res+stride+2)) << 11; + cbp |= !!(*(uint16_t*)(res+4) | *(uint16_t*)(res+stride+4)) << 14; + cbp |= !!(*(uint16_t*)(res+6) | *(uint16_t*)(res+stride+6)) << 15; + return cbp; +} + +static void write_tree_v2(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int cbp, v, i; + cbp = cbp_8x8_zigzag(residual, s->tstride); + v = !(cbp&0xf) + 2*!(cbp&0xf0) + 4*!(cbp&0xf00) + 8*!(cbp&0xf000); + put_bits(&s->pb, vlcs[3].len[v], vlcs[3].bits[v]); + vlcs[3].stats[v]++; + for(i=0; cbp; i++, cbp>>=4) { + if(cbp&15) { + uint8_t *r = residual+4*(i&1)+4*(i>>1)*s->tstride; + v = (cbp&15)^15; + put_bits(&s->pb, vlcs[2].len[v], vlcs[2].bits[v]); + vlcs[2].stats[v]++; + if(cbp&1) s->write_block(s, vlcs, r); + if(cbp&2) s->write_block(s, vlcs, r+2); + if(cbp&4) s->write_block(s, vlcs, r+2*s->tstride); + if(cbp&8) s->write_block(s, vlcs, r+2*s->tstride+2); + } + } +} + +// ---------- + +static void read_tree_v3(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int cbp, y; + for(y=0; y<4; y++) + *(uint64_t*)(residual+y*s->tstride) = 0; + cbp = get_vlc2(&s->gb, vlcs[2].vlc.table, VLC_BITS, 3); + if(cbp&0x01) s->read_block(s, vlcs, residual); + if(cbp&0x02) s->read_block(s, vlcs, residual+2); + if(cbp&0x04) s->read_block(s, vlcs, residual+4); + if(cbp&0x08) s->read_block(s, vlcs, residual+6); + if(cbp&0x10) s->read_block(s, vlcs, residual+2*s->tstride); + if(cbp&0x20) s->read_block(s, vlcs, residual+2*s->tstride+2); + if(cbp&0x40) s->read_block(s, vlcs, residual+2*s->tstride+4); + if(cbp&0x80) s->read_block(s, vlcs, residual+2*s->tstride+6); +} + +static int cbp_8x4_raster(uint8_t *res, int stride) { + int cbp; + cbp = !!(*(uint16_t*)(res+0) | *(uint16_t*)(res+stride+0)) << 0; + cbp |= !!(*(uint16_t*)(res+2) | *(uint16_t*)(res+stride+2)) << 1; + cbp |= !!(*(uint16_t*)(res+4) | *(uint16_t*)(res+stride+4)) << 2; + cbp |= !!(*(uint16_t*)(res+6) | *(uint16_t*)(res+stride+6)) << 3; + res += 2*stride; + cbp |= !!(*(uint16_t*)(res+0) | *(uint16_t*)(res+stride+0)) << 4; + cbp |= !!(*(uint16_t*)(res+2) | *(uint16_t*)(res+stride+2)) << 5; + cbp |= !!(*(uint16_t*)(res+4) | *(uint16_t*)(res+stride+4)) << 6; + cbp |= !!(*(uint16_t*)(res+6) | *(uint16_t*)(res+stride+6)) << 7; + return cbp; +} + +static void write_tree_v3(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int cbp, i; + cbp = cbp_8x4_raster(residual, s->tstride); + put_bits(&s->pb, vlcs[2].len[cbp], vlcs[2].bits[cbp]); + vlcs[2].stats[cbp]++; + for(i=0; cbp; i++, cbp>>=1) + if(cbp&1) + s->write_block(s, vlcs, residual+2*(i&3)+2*(i>>2)*s->tstride); +} + +// ---------- + +static void read_lines(FFV2Context *s, uint8_t *buf, int width, int height, int plane) { + int x, y; + if(s->coder_tree == 0) { + for(y=0; yread_line(s, s->vlcs[plane], buf+y*s->tstride, width); + } else if(s->coder_tree == 1) { + if(s->coder_block == 0) + memset(buf, 0, 8*s->tstride); + for(y=0; yread_block(s, s->vlcs[plane], buf+x+y*s->tstride); + } else if(s->coder_tree == 2) { + for(x=0; xvlcs[plane], buf+x); + } else if(s->coder_tree == 3) { + for(y=0; yvlcs[plane], buf+x+y*s->tstride); + } else { + abort(); + } +} + +static void write_lines(FFV2Context *s, uint8_t *buf, int width, int height, int plane) { + int x, y; + if(s->coder_tree == 0) { + for(y=0; ywrite_line(s, s->vlcs[plane], buf+y*s->tstride, width); + } else if(s->coder_tree == 1) { + for(y=0; ywrite_block(s, s->vlcs[plane], buf+x+y*s->tstride); + } else if(s->coder_tree == 2) { + for(x=0; xvlcs[plane], buf+x); + } else if(s->coder_tree == 3) { + for(y=0; yvlcs[plane], buf+x+y*s->tstride); + } else { + abort(); + } +} + +static void read_mvd(FFV2Context *s, int16_t *mv, int plane) { + VLCS *vlcs = &s->vlcs[plane*2+1][4]; + int v = get_vlc2(&s->gb, vlcs->vlc.table, VLC_BITS, 3); + int d; + mv[0] += (v&15)<15 ? (v&15)-7 + : (d=(int8_t)get_bits(&s->gb, 8)) != -128 ? d + : (int16_t)get_bits(&s->gb, 16); + v >>= 4; + mv[1] += (v&15)<15 ? (v&15)-7 + : (d=(int8_t)get_bits(&s->gb, 8)) != -128 ? d + : (int16_t)get_bits(&s->gb, 16); +} + +static void write_mvd(FFV2Context *s, int mvdx, int mvdy, int plane) { + VLCS *vlcs = &s->vlcs[plane*2+1][4]; + int x = FFMIN(15, (unsigned)(mvdx+7)); + int y = FFMIN(15, (unsigned)(mvdy+7)); + int v = x+(y<<4); + put_bits(&s->pb, vlcs->len[v], vlcs->bits[v]); + vlcs->stats[v]++; + if(x==15) { + if((unsigned)(mvdx+127) < 255) + put_bits(&s->pb, 8, mvdx&0xff); + else + put_bits(&s->pb, 24, (128<<16)+(mvdx&0xffff)); + } + if(y==15) { + if((unsigned)(mvdy+127) < 255) + put_bits(&s->pb, 8, mvdy&0xff); + else + put_bits(&s->pb, 24, (128<<16)+(mvdy&0xffff)); + } +} + +#define proc_table_sparse(id, size, map) {\ + if(read) err |= read_table(s, &s->vlcs[plane][id], size, map);\ + else write_table(s, &s->vlcs[plane][id], size);\ +} +#define proc_table(id, size) proc_table_sparse(id, size, NULL) + +static int proc_tables(FFV2Context *s, int read, int plane) { + int err = 0; + if(!s->fenc.key_frame && !(s->fref.key_frame && (plane&1))) { + // I-frames must contain I-tables, and the first P-frame in a GOP must contain P-tables. the rest are optional. + int present; + if(read) present = get_bits(&s->gb, 8); + else present = *(s->bs++) = !(s->gop&3); + if(!present) + return 0; + } + if(plane&1) { + proc_table(4, 256); + return err; + } + if(s->coder_type == 1) { + s->read_line = read_line_v0; + s->write_line = write_line_v0; + proc_table(0, 256); + } else if(s->coder_type == 2) { + s->read_line = read_line_v1; + s->write_line = write_line_v1; + proc_table(0, 512); + proc_table(1, 240); + } else if(s->coder_type == 3) { + s->read_line = read_line_v2; + s->write_line = write_line_v2; + proc_table(0, 495); + proc_table(1, 241); + } else { + switch(s->coder_block) { + case 0: // merge 0s + s->read_block = read_block_v0; + s->write_block = write_block_v0; + proc_table(0, 255); + proc_table(1, 15 + (s->coder_tree==1)); + break; + case 1: // merge [-1,1] + s->read_block = read_block_v1; + s->write_block = write_block_v1; + proc_table(0, 253); + proc_table(1, 256); // should be 255 in the tree case, but the missing value is in the middle + break; + case 2: // merge [-2,2] + s->read_block = read_block_v2; + s->write_block = write_block_v2; + proc_table(0, 251); + proc_table_sparse(1, 1296, map_coder6); + break; + case 3: // merge [-3,3] + s->read_block = read_block_v3; + s->write_block = write_block_v3; + proc_table(0, 249); + proc_table(1, 4096); + break; + case 4: // merge log2 + s->read_block = read_block_v4; + s->write_block = write_block_v4; + proc_table(1, 4096); + break; + default: + return -2; + } + switch(s->coder_tree) { + case 1: // no tree + break; + case 2: // 8x8 quadtree + proc_table(2, 15); + proc_table(3, 16); + break; + case 3: // 8x4 octree + proc_table(2, 256); + break; + default: + return -2; + } + } + return err; +} + +static int sum_abs_int8(int8_t *buf, intptr_t stride) { + int sum=0; +#if HAVE_MMX + DECLARE_ALIGNED_8(static const uint64_t, pb_128) = 0x8080808080808080ULL; + __asm__ volatile( + "movq %4, %%mm7 \n" + "movq (%0), %%mm0 \n" + "movq (%0,%2), %%mm1 \n" + "movq (%0,%2,2), %%mm2 \n" + "movq (%0,%3), %%mm3 \n" + "lea (%0,%2,4), %0 \n" + "paddb %%mm7, %%mm0 \n" + "paddb %%mm7, %%mm1 \n" + "paddb %%mm7, %%mm2 \n" + "paddb %%mm7, %%mm3 \n" + "psadbw %%mm7, %%mm0 \n" + "psadbw %%mm7, %%mm1 \n" + "psadbw %%mm7, %%mm2 \n" + "psadbw %%mm7, %%mm3 \n" + "paddw %%mm2, %%mm0 \n" + "paddw %%mm3, %%mm1 \n" + "movq (%0), %%mm4 \n" + "movq (%0,%2), %%mm5 \n" + "movq (%0,%2,2), %%mm2 \n" + "movq (%0,%3), %%mm3 \n" + "paddb %%mm7, %%mm4 \n" + "paddb %%mm7, %%mm5 \n" + "paddb %%mm7, %%mm2 \n" + "paddb %%mm7, %%mm3 \n" + "psadbw %%mm7, %%mm4 \n" + "psadbw %%mm7, %%mm5 \n" + "psadbw %%mm7, %%mm2 \n" + "psadbw %%mm7, %%mm3 \n" + "paddw %%mm4, %%mm0 \n" + "paddw %%mm5, %%mm1 \n" + "paddw %%mm2, %%mm0 \n" + "paddw %%mm3, %%mm1 \n" + "paddw %%mm1, %%mm0 \n" + "movd %%mm0, %1 \n" + :"+&r"(buf), "=r"(sum) + :"r"(stride), "r"(stride*3), "m"(pb_128) + ); +#else + int x, y; + for(y=0; y<8; y++, buf+=stride) + for(x=0; x<8; x++) + sum += abs(buf[x]); +#endif + return sum; +} + +static void sub_block(uint8_t *dst, uint8_t *src, uint8_t *ref, intptr_t stride) { + __asm__ volatile( + "movq (%1), %%mm0 \n" + "movq (%1,%3), %%mm1 \n" + "movq (%1,%3,2), %%mm2 \n" + "movq (%1,%4), %%mm3 \n" + "psubb (%2), %%mm0 \n" + "psubb (%2,%3), %%mm1 \n" + "psubb (%2,%3,2), %%mm2 \n" + "psubb (%2,%4), %%mm3 \n" + "movq %%mm0, (%0) \n" + "movq %%mm1, (%0,%3) \n" + "movq %%mm2, (%0,%3,2) \n" + "movq %%mm3, (%0,%4) \n" + "lea (%0,%3,4), %0 \n" + "lea (%1,%3,4), %1 \n" + "lea (%2,%3,4), %2 \n" + "movq (%1), %%mm0 \n" + "movq (%1,%3), %%mm1 \n" + "movq (%1,%3,2), %%mm2 \n" + "movq (%1,%4), %%mm3 \n" + "psubb (%2), %%mm0 \n" + "psubb (%2,%3), %%mm1 \n" + "psubb (%2,%3,2), %%mm2 \n" + "psubb (%2,%4), %%mm3 \n" + "movq %%mm0, (%0) \n" + "movq %%mm1, (%0,%3) \n" + "movq %%mm2, (%0,%3,2) \n" + "movq %%mm3, (%0,%4) \n" + :"+&r"(dst), "+&r"(src), "+&r"(ref) + :"r"(stride), "r"(stride*3) + :"memory" + ); +} + +#define CHECK_MV(x, y) {\ + int cost = s->dsp.sad[1](NULL, src, ref+(x)+(y)*stride, stride, 8);\ + COPY3_IF_LT(bcost, cost, bmx, x, bmy, y);\ +} + +static void motion_search(FFV2Context *s, uint8_t *src, uint8_t *ref, int stride, int16_t (*mv_plane)[2], int mv_stride, int16_t *mv_out, int *mv_min, int *mv_max) { + int x, y, bmx=0, bmy=0, omx, omy; + int bcost = s->dsp.sad[1](NULL, src, ref, stride, 8); + for(y=-1; y<=1; y++) + for(x=-1; x<=1; x++) { + int mx = av_clip(mv_plane[x+y*mv_stride][0], mv_min[0]+1, mv_max[0]-1); + int my = av_clip(mv_plane[x+y*mv_stride][1], mv_min[1]+1, mv_max[1]-1); + if((mx|my) && ((mx-bmx)|(my-bmy))) + CHECK_MV(mx, my); + } + while(1) { + omx=bmx; omy=bmy; + CHECK_MV(omx-1, omy); + CHECK_MV(omx+1, omy); + CHECK_MV(omx, omy-1); + CHECK_MV(omx, omy+1); + if(bmx==omx && bmy==omy) + break; + if(bmx<=mv_min[0] || bmx>=mv_max[0] || bmy<=mv_min[1] || bmy>=mv_max[1]) + break; + } + mv_plane[0][0] = bmx; + mv_plane[0][1] = bmy; + if(bmx>mv_min[0] && bmxmv_min[1] && bmydsp.pix_abs[1][1](NULL, src, r-1, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx-1, bmy, omy); + cost = s->dsp.pix_abs[1][1](NULL, src, r, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx+1, bmy, omy); + cost = s->dsp.pix_abs[1][2](NULL, src, r-stride, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx, bmy, omy-1); + cost = s->dsp.pix_abs[1][2](NULL, src, r, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx, bmy, omy+1); + cost = s->dsp.pix_abs[1][3](NULL, src, r-stride-1, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx-1, bmy, omy-1); + cost = s->dsp.pix_abs[1][3](NULL, src, r-stride, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx+1, bmy, omy-1); + cost = s->dsp.pix_abs[1][3](NULL, src, r-1, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx-1, bmy, omy+1); + cost = s->dsp.pix_abs[1][3](NULL, src, r, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx+1, bmy, omy+1); + } + mv_out[0] = bmx; + mv_out[1] = bmy; +} + +static void add_ffv2_median_prediction(uint8_t *dst, uint8_t *top, uint8_t *residual, int width, int l, int tl) { + int x; + for(x=0; xfenc.key_frame) { + for(y=0; ymb_types[x>>3]) { + // filter an intra block, with decoded samples as neighbors. + // then compute what would have been the inter residual, and write that back to the residual buffer. + int l = x ? dst[x-1] : 0; + int tl = x ? topi[x-1] : 0; + add_ffv2_median_prediction(dst+x, topi+x, src+x, 8, l, tl); + for(i=0; i<8; i++) + src[x+i] = dst[x+i] - ref[x+i]; + } else { + // filter an inter block, with inter residuals as neighbors, and add inter prediction. + int l = x ? src[x-1] : 0; + int tl = x ? topp[x-1] : 0; + add_ffv2_median_prediction(src+x, topp+x, src+x, 8, l, tl); + for(i=0; i<8; i++) + dst[x+i] = src[x+i] + ref[x+i]; + } + } + } + } +} + +static void enfilter_lines(FFV2Context *s, uint8_t *dst, uint8_t *src, int stride, int width, int height, int y0) { + int y=0, v; + if(!y0) { + s->dsp.diff_bytes(dst, src, src-1, width); + dst[0] = src[0]; + y = 1; + } + for(; yfenc.linesize[plane]; + int width = s->avctx->width >> !!plane; + int height = s->avctx->height >> !!plane; + int mb_width = (width+7)>>3; + int mb_height = (height+7)>>3; + uint8_t *residual = s->temp[0] + stride + 8; + uint8_t *pmc = s->temp[1] + stride; + int x, y; + s->tstride = stride; + + init_get_bits(&s->gb, s->bs, (s->bs_end - s->bs)*8); + if(proc_tables(s, 1, plane*2) < 0) + return -1; + if(!s->fenc.key_frame && proc_tables(s, 1, plane*2+1) < 0) + return -1; + + if(!s->fenc.key_frame) + s->dsp.draw_edges(s->fref.data[plane], stride, width, height, 8); + memset(residual-stride-8, 0, stride+8); + memset(s->mvs_top, 0, mb_width*sizeof(*s->mvs_top)); + + for(y=0; yfenc.data[plane] + y*stride; + uint8_t *ref = s->fref.data[plane] + y*stride; + if(s->fenc.key_frame) { + read_lines(s, residual, width, FFMIN(8,height-y), plane*2); + defilter_lines(s, dst, residual, NULL, stride, width, FFMIN(8,height-y), y); + } else { + for(x=0; xmb_types[x] = get_bits(&s->gb, 1); + s->mvs[x][0] = mid_pred(s->mvs[x-1][0], s->mvs_top[x][0], s->mvs_top[x+1][0]); + s->mvs[x][1] = mid_pred(s->mvs[x-1][1], s->mvs_top[x][1], s->mvs_top[x+1][1]); + if(s->mb_types[x]) + read_mvd(s, s->mvs[x], plane); + mvx = av_clip(s->mvs[x][0], -16*x-16, (mb_width-x)*16); + mvy = av_clip(s->mvs[x][1], -2*y-16, mb_height*16-2*y); + s->dsp.put_pixels_tab[1][(mvx&1)+2*(mvy&1)](pmc+8*x, ref+8*x+(mvx>>1)+(mvy>>1)*stride, stride, 8); + } + read_lines(s, residual, width, FFMIN(8,height-y), plane*2); + defilter_lines(s, dst, residual, pmc, stride, width, FFMIN(8,height-y), y); + memcpy(residual-stride, residual+7*stride, stride); + FFSWAP(void*, s->mvs, s->mvs_top); + } + } + + s->bs += (get_bits_count(&s->gb)+7)>>3; + return 0; +} + +#undef rand + +static void encode_plane(FFV2Context *s, int plane) { + int stride = s->fenc.linesize[plane]; + int width = s->avctx->width >> !!plane; + int height = s->avctx->height >> !!plane; + int mb_width = (width+7)>>3; + int mb_height = (height+7)>>3; + int mv_stride = mb_width+2; + uint8_t *imed = s->temp[0] + stride; ///< median-filtered intra samples + uint8_t *pmc = s->temp[1] + stride; ///< inter prediction + uint8_t *pdiff = s->temp[2] + stride + 8; ///< inter residual + uint8_t *pmed = s->temp[3] + stride; ///< median-filtered inter residual + int x, y; + s->tstride = stride; + + proc_tables(s, 0, plane*2); + if(!s->fenc.key_frame) + proc_tables(s, 0, plane*2+1); + init_put_bits(&s->pb, s->bs, (s->bs_end - s->bs)*8); + + if(!s->fenc.key_frame) + s->dsp.draw_edges(s->fref.data[plane], stride, width, height, 8); + memset(pdiff-stride-8, 0, 9*stride+8); + memset(s->mvs_top, 0, mb_width*sizeof(*s->mvs_top)); + + for(y=0; yfenc.data[plane] + y*stride; + uint8_t *ref = s->fref.data[plane] + y*stride; + enfilter_lines(s, imed, src, stride, width, FFMIN(8,height-y), y); + if(!s->fenc.key_frame) { + int16_t (*mv_plane)[2] = s->mv_plane[!!plane] + (y>>3)*mv_stride; + int mvx, mvy, mvpx, mvpy, isad, psad, type; + int mv_min[2] = {0, -y-8}; + int mv_max[2] = {0, mb_height*8-y}; + for(x=0; xmvs[x-1][0], s->mvs_top[x][0], s->mvs_top[x+1][0]); + mvpy = mid_pred(s->mvs[x-1][1], s->mvs_top[x][1], s->mvs_top[x+1][1]); + motion_search(s, src+8*x, ref+8*x, stride, mv_plane, mv_stride, s->mvs[x], mv_min, mv_max); + mvx = s->mvs[x][0]; + mvy = s->mvs[x][1]; + s->dsp.put_pixels_tab[1][(mvx&1)+2*(mvy&1)](pmc+8*x, ref+8*x+(mvx>>1)+(mvy>>1)*stride, stride, 8); + sub_block(pdiff+8*x, src+8*x, pmc+8*x, stride); + enfilter_pblock(s, pmed+8*x, pdiff+8*x, stride); + isad = sum_abs_int8(imed+8*x, stride); + psad = sum_abs_int8(pmed+8*x, stride); + type = s->mb_types[x] = psad < isad; + put_bits(&s->pb, 1, type); + if(type) { + write_mvd(s, mvx-mvpx, mvy-mvpy, plane); + s->dsp.put_pixels_tab[1][0](imed+8*x, pmed+8*x, stride, 8); + } else { + s->mvs[x][0] = mvpx; + s->mvs[x][1] = mvpy; + mvx = av_clip(mvpx, 2*mv_min[0], 2*mv_max[0]); + mvy = av_clip(mvpy, 2*mv_min[1], 2*mv_max[1]); + s->dsp.put_pixels_tab[1][(mvx&1)+2*(mvy&1)](pmc+8*x, ref+8*x+(mvx>>1)+(mvy>>1)*stride, stride, 8); + sub_block(pdiff+8*x, src+8*x, pmc+8*x, stride); + } + } + memcpy(pdiff-stride, pdiff+7*stride, stride); + FFSWAP(void*, s->mvs, s->mvs_top); + } + write_lines(s, imed, width, FFMIN(8,height-y), plane*2); + } + + s->bs += (put_bits_count(&s->pb)+7)>>3; + flush_put_bits(&s->pb); +} + +static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size) { + FFV2Context *s = avctx->priv_data; + int err = 0; + s->bs = buf; + s->bs_end = buf + buf_size; + + if(s->bs[0]) { + s->fenc.key_frame = 1; + s->fenc.pict_type = FF_I_TYPE; + s->coder_type = s->bs[0]; + s->coder_tree = s->coder_type >> 3; + s->coder_block = s->coder_type & 7; + memset(s->mb_types, 0, s->mb_width); + } else { + FFSWAP(AVFrame, s->fenc, s->fref); + s->fenc.key_frame = 0; + s->fenc.pict_type = FF_P_TYPE; + } + s->bs++; + + err |= decode_plane(s, 0); + err |= decode_plane(s, 1); + err |= decode_plane(s, 2); + + *(AVFrame*)data = s->fenc; + *data_size = sizeof(s->fenc); + return err ? err : s->bs - buf; +} + +static int encode_frame(AVCodecContext *avctx, uint8_t *buf, int buf_size, void *data) { + FFV2Context *s = avctx->priv_data; + s->bs = buf; + s->bs_end = buf + buf_size; + + if(s->gop >= avctx->gop_size) + s->gop = 0; + if(s->gop == 0) { + memset(s->mb_types, 0, s->mb_width); + } else { + FFSWAP(AVFrame, s->fenc, s->fref); + } + s->fenc.key_frame = !s->gop; + s->fenc.pict_type = s->fenc.key_frame ? FF_I_TYPE : FF_P_TYPE; + *(s->bs++) = s->fenc.key_frame ? s->coder_type : 0; + + // FIXME skip in intra-only mode + av_picture_copy((AVPicture*)&s->fenc, (AVPicture*)data, PIX_FMT_YUV420P, avctx->width, avctx->height); + + encode_plane(s, 0); + encode_plane(s, 1); + encode_plane(s, 2); + + if(!s->initted_vlc[s->fenc.key_frame]) { + // re-encode with updated vlc + // FIXME do this whenever stats change enough? + s->initted_vlc[s->fenc.key_frame] = 1; + s->bs = buf+1; + encode_plane(s, 0); + encode_plane(s, 1); + encode_plane(s, 2); + } + + s->gop++; + avctx->coded_frame = &s->fenc; + if(s->bs_end - s->bs < 4) + return -1; + return s->bs - buf; +} + +AVCodec ffv2_decoder = { + "ffv2", + CODEC_TYPE_VIDEO, + CODEC_ID_FFV2, + sizeof(FFV2Context), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1, + NULL, +}; + +AVCodec ffv2_encoder = { + "ffv2", + CODEC_TYPE_VIDEO, + CODEC_ID_FFV2, + sizeof(FFV2Context), + encode_init, + encode_frame, + encode_end, + .pix_fmts = (enum PixelFormat[]){PIX_FMT_YUV420P}, +}; + diff --git a/libavformat/riff.c b/libavformat/riff.c index dfdc0fd..5e4772e 100644 --- a/libavformat/riff.c +++ b/libavformat/riff.c @@ -148,6 +148,7 @@ const AVCodecTag codec_bmp_tags[] = { { CODEC_ID_ASV2, MKTAG('A', 'S', 'V', '2') }, { CODEC_ID_VCR1, MKTAG('V', 'C', 'R', '1') }, { CODEC_ID_FFV1, MKTAG('F', 'F', 'V', '1') }, + { CODEC_ID_FFV2, MKTAG('F', 'F', 'V', '2') }, { CODEC_ID_XAN_WC4, MKTAG('X', 'x', 'a', 'n') }, { CODEC_ID_MIMIC, MKTAG('L', 'M', '2', '0') }, { CODEC_ID_MSRLE, MKTAG('m', 'r', 'l', 'e') },