diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 93420db..fa1be61 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -119,6 +119,8 @@ OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER) += 8svx.o OBJS-$(CONFIG_ESCAPE124_DECODER) += escape124.o OBJS-$(CONFIG_FFV1_DECODER) += ffv1.o rangecoder.o OBJS-$(CONFIG_FFV1_ENCODER) += ffv1.o rangecoder.o +OBJS-$(CONFIG_FFV2_DECODER) += ffv2.o rangecoder.o +OBJS-$(CONFIG_FFV2_ENCODER) += ffv2.o rangecoder.o OBJS-$(CONFIG_FFVHUFF_DECODER) += huffyuv.o OBJS-$(CONFIG_FFVHUFF_ENCODER) += huffyuv.o OBJS-$(CONFIG_FLAC_DECODER) += flacdec.o flacdata.o flac.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 5dbf1dc..937e808 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -99,6 +99,7 @@ void avcodec_register_all(void) REGISTER_DECODER (EIGHTSVX_FIB, eightsvx_fib); REGISTER_DECODER (ESCAPE124, escape124); REGISTER_ENCDEC (FFV1, ffv1); + REGISTER_ENCDEC (FFV2, ffv2); REGISTER_ENCDEC (FFVHUFF, ffvhuff); REGISTER_ENCDEC (FLASHSV, flashsv); REGISTER_DECODER (FLIC, flic); diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 43a0695..d472c20 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -97,6 +97,7 @@ enum CodecID { CODEC_ID_ASV1, CODEC_ID_ASV2, CODEC_ID_FFV1, + CODEC_ID_FFV2, // FIXME should go at the end, but that would force manual merges CODEC_ID_4XM, CODEC_ID_VCR1, CODEC_ID_CLJR, diff --git a/libavcodec/ffv2.c b/libavcodec/ffv2.c new file mode 100644 index 0000000..8b41d15 --- /dev/null +++ b/libavcodec/ffv2.c @@ -0,0 +1,728 @@ +/* + * Copyright (C) 2010 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define DEBUG +#include "avcodec.h" +#include "dsputil.h" +#include "mathops.h" +#include "mpegvideo.h" +#include "rectangle.h" +#include "ffv2dsp.c" + +#define VLC_BITS 11 +#define NUM_TABLES 19 +#define NUM_VLCS 1296 + +#define VLC_COEF_BLOCK 0 // (0..7)*2 +#define VLC_COEF_ESCAPE 1 // (0..7)*2+1 +#define VLC_CBP 16 +#define VLC_MV 17 +#define VLC_MBTYPE 18 + +typedef struct { + VLC vlc; + uint8_t len[NUM_VLCS]; + uint32_t bits[NUM_VLCS]; + uint32_t stats[NUM_VLCS]; +} VLCS; + +typedef struct FFV2Context FFV2Context; +struct FFV2Context { + AVCodecContext *avctx; + DSPContext dsp; + GetBitContext gb; + PutBitContext pb; + uint8_t *bs, *bs_end; + VLCS vlcs[2][NUM_TABLES]; + VLCS metavlc; + AVFrame fenc; + AVFrame fref; + int mb_width, mb_height; + int tstride; + uint8_t *temp[4]; + uint8_t *mb_types; ///< intra: 0, inter: 1 + int16_t (*mvs)[2]; ///< current row + int16_t (*mvs_top)[2]; ///< previous row + int16_t (*mvs_base)[2]; ///< buffer that the other mv arrays point into + int16_t (*mvps)[2]; + int16_t (*mv_plane[2])[2]; ///< mvs chosen by motion est, not necessarily those that will be coded + int16_t (*mv_plane_base)[2]; + int gop; + int initted_vlc[2]; + int coder_type, coder_tree, coder_block; + int rd_bits; ///< ok so there's no D in RD, but it's the same concept + uint8_t quantize_block_context[37]; +}; + +static uint8_t map_escape[251]; +static uint16_t map_coder3[81]; +static uint32_t map_coder6[1296]; +static uint8_t lut_block_sum[1296]; + +static av_always_inline uint32_t pack8to32(int a, int b, int c, int d) { +#if HAVE_BIGENDIAN + return (d&0xFF) + ((c&0xFF)<<8) + ((b&0xFF)<<16) + (a<<24); +#else + return (a&0xFF) + ((b&0xFF)<<8) + ((c&0xFF)<<16) + (d<<24); +#endif +} + +static void lut_init(void) { + int i, j, k, l; + for(i=0; i<3; i++) + for(j=0; j<3; j++) + for(k=0; k<3; k++) + for(l=0; l<3; l++) + map_coder3[i*3*3*3+j*3*3+k*3+l] = (i<<6) + (j<<4) + (k<<2) + l; + for(i=0; i<6; i++) + for(j=0; j<6; j++) + for(k=0; k<6; k++) + for(l=0; l<6; l++) { + map_coder6[i*6*6*6+j*6*6+k*6+l] = (l==5) | (k==5)<<1 | (j==5)<<2 | (i==5)<<3 | pack8to32(l-2, k-2, j-2, i-2)<<4; + lut_block_sum[i*6*6*6+j*6*6+k*6+l] = abs(l-2) + abs(k-2) + abs(j-2) + abs(i-2); + } + for(i=0; i<251; i++) + map_escape[i] = i+3; +} + +static void common_init(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + memset(s, 0, sizeof(FFV2Context)); + assert(!(avctx->flags&CODEC_FLAG_EMU_EDGE)); + dsputil_init(&s->dsp, avctx); + s->avctx = avctx; + s->mb_width = (avctx->width+7)/8; + s->mb_height= (avctx->height+7)/8; + avctx->get_buffer(avctx, &s->fenc); + avctx->get_buffer(avctx, &s->fref); + s->tstride = s->fenc.linesize[0]; + assert(s->tstride >= ((avctx->width+15)&~7)); + s->temp[0] = av_malloc(s->tstride*10+16); + s->temp[1] = av_malloc(s->tstride*10+16); + s->temp[2] = av_malloc(s->tstride*10+16); + s->temp[3] = av_malloc(s->tstride*10+16); + s->mb_types = av_mallocz(s->mb_width+3); + s->mvs_base = av_mallocz(6*(s->mb_width+5)*sizeof(int16_t)); + s->mvs = s->mvs_base+1; + s->mvs_top = s->mvs+s->mb_width+5; + s->mvps = s->mvs_top+s->mb_width+5; + memcpy(s->quantize_block_context, (uint8_t[]){0,0,0,1,1,2,2,3,3,4,4,4,5,5,5,5,5,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7}, 37); +} + +static int decode_init(AVCodecContext *avctx) { + avctx->pix_fmt= PIX_FMT_YUV420P; + common_init(avctx); + lut_init(); + return 0; +} + +static int encode_init(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + int i, j, k; + common_init(avctx); + s->coder_type = avctx->coder_type; + if(s->coder_type == 0) + s->coder_type = 26; + s->coder_tree = s->coder_type >> 3; + s->coder_block = s->coder_type & 7; + if((s->coder_type > 3 && s->coder_type < 8) || s->coder_tree < 0 || s->coder_tree > 3 || s->coder_block > 4) { + av_log(avctx, AV_LOG_ERROR, "bad coder_type\n"); + return -1; + } + s->mv_plane_base = av_mallocz(4*(s->mb_height+2)*(s->mb_width+2)*sizeof(int16_t)); + s->mv_plane[0] = s->mv_plane_base+s->mb_width+3; + s->mv_plane[1] = s->mv_plane[0]+(s->mb_width+2)*(s->mb_height+2); + for(k=0; k<2; k++) + for(j=0; jvlcs[k][j].stats[i] = 1; + return 0; +} + +static void common_end(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + avctx->release_buffer(avctx, &s->fenc); + avctx->release_buffer(avctx, &s->fref); + av_free(s->temp[0]); + av_free(s->temp[1]); + av_free(s->temp[2]); + av_free(s->temp[3]); + av_free(s->mb_types); + av_free(s->mvs_base); +} + +static int decode_end(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + int i, j; + for(j=0; j<2; j++) + for(i=0; ivlcs[j][i].vlc); + free_vlc(&s->metavlc.vlc); + common_end(avctx); + return 0; +} + +static int encode_end(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + common_end(avctx); + av_free(s->mv_plane_base); + return 0; +} + +static inline void put_vlc(FFV2Context *s, VLCS *vlcs, int v) { + put_bits(&s->pb, vlcs->len[v], vlcs->bits[v]); + vlcs->stats[v]++; +} + +static inline void size_vlc(FFV2Context *s, VLCS *vlcs, int v) { + s->rd_bits += vlcs->len[v]; +} + +#define RDO_SKIP_BS 0 +#include "ffv2bitstream.c" +#undef RDO_SKIP_BS +#define RDO_SKIP_BS 1 +#include "ffv2bitstream.c" +#undef RDO_SKIP_BS + +// FIXME code duplication from huffyuv.c +// FIXME port optimizations back +static int generate_bits_table(uint32_t *dst, uint8_t *len_table, int size){ + int len, index; + uint32_t bits=1; + + for(len=1; len<32 && ~bits; len++){ + for(index=size-1; index>=0; index--){ + if(len_table[index] == len) + dst[index] = bits--; + } + bits = bits*2+1; + } + if(~bits){ + av_log(NULL, AV_LOG_ERROR, "Error generating huffman table\n"); + return -1; + } + return 0; +} + +typedef struct { + uint64_t val; + int name; +} HeapElem; + +static void heap_sift(HeapElem *h, int root, int size) +{ + while(root*2+1 < size) { + int child = root*2+1; + if(child < size-1 && h[child].val > h[child+1].val) + child++; + if(h[root].val > h[child].val) { + FFSWAP(HeapElem, h[root], h[child]); + root = child; + } else + break; + } +} + +static void generate_len_table(uint8_t *dst, uint32_t *stats, int size){ + HeapElem h[size]; + int up[2*size]; + int len[2*size]; + int offset, i, next; + + for(offset=1; ; offset<<=1){ + for(i=0; i=0; i--) + heap_sift(h, i, size); + + for(next=size; next=size; i--) + len[i] = len[up[i]] + 1; + for(i=0; i= 32) break; + } + if(i==size) break; + } +} + +static int read_table(FFV2Context *s, VLCS* vlcs, int size, void *map, int map_elem, int use_metavlc) { + if(!s->fenc.key_frame && !get_bits1(&s->gb)) + return 0; + if(read_len_table(s, vlcs->len, size, use_metavlc)) + return -1; + if(generate_bits_table(vlcs->bits, vlcs->len, size)) + return -1; + free_vlc(&vlcs->vlc); + init_vlc_sparse(&vlcs->vlc, VLC_BITS, size, vlcs->len, 1, 1, vlcs->bits, 4, 4, map, map_elem, map_elem, 0); + return 0; +} + +static void write_table(FFV2Context *s, VLCS* vlcs, int size, int use_metavlc) { + int i; + if(s->fenc.key_frame) { + generate_len_table(vlcs->len, vlcs->stats, size); + } else { + uint8_t len[NUM_VLCS]; + int entropy_old=0, entropy_new=0; + int present=0; + if(s->gop<4 || !(s->gop&1)) { // skip on some frames to save encoder time + generate_len_table(len, vlcs->stats, size); + for(i=0; istats[i] * vlcs->len[i]; + entropy_new += vlcs->stats[i] * len[i]; + } + s->rd_bits = 0; + size_len_table(s, len, size, use_metavlc); + entropy_new += s->rd_bits; + entropy_new += size; // arbitrary penalty for cputime and possibility of future stats mismatch + present = entropy_old - entropy_new > 0; + } + put_bits(&s->pb, 1, present); + if(!present) return; + memcpy(vlcs->len, len, size); + } + generate_bits_table(vlcs->bits, vlcs->len, size); + write_len_table(s, vlcs->len, size, use_metavlc); + for(i=0; istats[i] = (vlcs->stats[i]+1)>>1; +} + +#define proc_table_sparse(id, size, map, map_elem) {\ + if(read) err |= read_table(s, &s->vlcs[plane][id], size, map, map_elem, 1);\ + else write_table(s, &s->vlcs[plane][id], size, 1);\ +} +#define proc_table(id, size) proc_table_sparse(id, size, NULL, 0) + +static int proc_tables(FFV2Context *s, int read, int plane) { + int err = 0; + int i; + if(s->fenc.key_frame && plane==0) { + if(read) err |= read_table(s, &s->metavlc, 256, NULL, 0, 0); + else write_table(s, &s->metavlc, 256, 0); + } + for(i=0; i<8; i++) + proc_table(i*2+VLC_COEF_BLOCK, 1296); + for(i=0; i<8; i++) + proc_table_sparse(i*2+VLC_COEF_ESCAPE, 251, map_escape, sizeof(*map_escape)); + proc_table(VLC_CBP, 256); + proc_table(VLC_MV, 256); + proc_table_sparse(VLC_MBTYPE, 81, map_coder3, sizeof(*map_coder3)); + return err; +} + +static void read_lines(FFV2Context *s, uint8_t *buf, uint8_t *sums, int width, int height, int plane) { + int x, y; + int sstride = s->tstride>>1; + for(x=0; xvlcs[plane], buf+x+y*s->tstride, sums+(x>>1)+(y>>1)*sstride); + memcpy(sums-sstride, sums+sstride*3, sstride); +} + +static void write_lines(FFV2Context *s, uint8_t *buf, int width, int height, int plane) { + int x, y; + for(x=0; xvlcs[plane], buf+x+y*s->tstride); +} + +static int size_mb(FFV2Context *s, uint8_t *buf, int type, int mvdx, int mvdy, int plane) { + s->rd_bits = 0; + size_tree(s, s->vlcs[plane], buf); + size_tree(s, s->vlcs[plane], buf+4*s->tstride); + if(type) + size_mvd(s, mvdx, mvdy, plane); + return s->rd_bits; +} + +static void read_mvs(FFV2Context *s, int mb_width, int plane) { + int x, i; + for(i=0; igb, s->vlcs[plane][VLC_MBTYPE].vlc.table, VLC_BITS, 3); + for(x=i; x>=2) { + s->mvs[x][0] = mid_pred(s->mvs[x-1][0], s->mvs_top[x][0], s->mvs_top[x+1][0]); + s->mvs[x][1] = mid_pred(s->mvs[x-1][1], s->mvs_top[x][1], s->mvs_top[x+1][1]); + s->mb_types[x] = (v&3)>0; + if((v&3)==2) + read_mvd(s, s->mvs[x], plane); + } + } +} + +static void write_mvs(FFV2Context *s, int mb_width, int plane) { + int x, i; + for(i=0; i=i; x--) { + v *= 3; + v += !s->mb_types[x] ? 0 : *(uint32_t*)s->mvs[x] == *(uint32_t*)s->mvps[x] ? 1 : 2; + } + put_vlc(s, &s->vlcs[plane][VLC_MBTYPE], v); + for(x=i; xmvs[x][0]-s->mvps[x][0]; + int dy = s->mvs[x][1]-s->mvps[x][1]; + if(dx|dy) + write_mvd(s, dx, dy, plane); + } + } +} + +#define CHECK_MV(x, y) {\ + int cost = s->dsp.sad[1](NULL, src, ref+(x)+(y)*stride, stride, 8);\ + COPY3_IF_LT(bcost, cost, bmx, x, bmy, y);\ +} + +static void motion_search(FFV2Context *s, uint8_t *src, uint8_t *ref, int stride, int16_t (*mv_plane)[2], int mv_stride, int16_t *mv_out, int *mv_min, int *mv_max) { + int x, y, bmx=0, bmy=0, omx, omy; + int bcost = s->dsp.sad[1](NULL, src, ref, stride, 8); + for(y=-1; y<=1; y++) + for(x=-1; x<=1; x++) { + int mx = av_clip(mv_plane[x+y*mv_stride][0], mv_min[0]+1, mv_max[0]-1); + int my = av_clip(mv_plane[x+y*mv_stride][1], mv_min[1]+1, mv_max[1]-1); + if((mx|my) && ((mx-bmx)|(my-bmy))) + CHECK_MV(mx, my); + } + while(1) { + omx=bmx; omy=bmy; + CHECK_MV(omx-1, omy); + CHECK_MV(omx+1, omy); + CHECK_MV(omx, omy-1); + CHECK_MV(omx, omy+1); + if(bmx==omx && bmy==omy) + break; + if(bmx<=mv_min[0] || bmx>=mv_max[0] || bmy<=mv_min[1] || bmy>=mv_max[1]) + break; + } + mv_plane[0][0] = bmx; + mv_plane[0][1] = bmy; + if(bmx>mv_min[0] && bmxmv_min[1] && bmydsp.pix_abs[1][1](NULL, src, r-1, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx-1, bmy, omy); + cost = s->dsp.pix_abs[1][1](NULL, src, r, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx+1, bmy, omy); + cost = s->dsp.pix_abs[1][2](NULL, src, r-stride, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx, bmy, omy-1); + cost = s->dsp.pix_abs[1][2](NULL, src, r, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx, bmy, omy+1); + cost = s->dsp.pix_abs[1][3](NULL, src, r-stride-1, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx-1, bmy, omy-1); + cost = s->dsp.pix_abs[1][3](NULL, src, r-stride, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx+1, bmy, omy-1); + cost = s->dsp.pix_abs[1][3](NULL, src, r-1, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx-1, bmy, omy+1); + cost = s->dsp.pix_abs[1][3](NULL, src, r, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx+1, bmy, omy+1); + } + mv_out[0] = bmx; + mv_out[1] = bmy; +} + +static void defilter_lines(FFV2Context *s, uint8_t *dst, uint8_t *src, uint8_t *ref, int stride, int width, int height) { + int x, y, i; + for(y=0; y<8; y++) + *(uint16_t*)&dst[-2+y*stride] = dst[width+y*stride] = 0; + if(s->fenc.key_frame) { + for(y=0; ymb_types[x>>3]) { + // filter an intra block, with decoded samples as neighbors. + // then compute what would have been the inter residual, and write that back to the residual buffer. + add_ffv2_median_prediction(dst+x, src+x, stride, 8); + for(i=0; i<8; i++) + src[x+i] = dst[x+i] - ref[x+i]; + } else { + // filter an inter block, with inter residuals as neighbors, and add inter prediction. + add_ffv2_median_prediction(src+x, src+x, stride, 8); + for(i=0; i<8; i++) + dst[x+i] = src[x+i] + ref[x+i]; + } + } + } + } +} + +static void enfilter_lines(FFV2Context *s, uint8_t *dst, uint8_t *src, int stride, int width, int height) { + int y; + for(y=0; y<8; y++) + *(uint16_t*)&src[-2+y*stride] = src[width+y*stride] = 0; + for(y=0; yfenc.linesize[plane]; + int width = s->avctx->width >> !!plane; + int height = s->avctx->height >> !!plane; + int mb_width = (width+7)>>3; + int mb_height = (height+7)>>3; + uint8_t *residual = s->temp[0] + 2*stride + 8; + uint8_t *pmc = s->temp[1] + 2*stride + 8; + uint8_t *sums = s->temp[2] + 2*stride + 8; + int x, y; + s->tstride = stride; + + init_get_bits(&s->gb, s->bs, (s->bs_end - s->bs)*8); + if(plane < 2) + if(proc_tables(s, 1, plane) < 0) + return -1; + + if(!s->fenc.key_frame) + s->dsp.draw_edges(s->fref.data[plane], stride, width, height, 8); + memset(residual-2*stride-8, 0, 10*stride+8); + memset(sums-stride-8, 0, 3*stride+8); // a bit overkill + memset(s->mvs_base, 0, 6*(s->mb_width+5)*sizeof(int16_t)); + memset(s->mb_types, 0, mb_width+3); + memset(s->fenc.data[plane]-2*stride, 0, 2*stride); + + for(y=0; yfenc.data[plane] + y*stride; + uint8_t *ref = s->fref.data[plane] + y*stride; + if(s->fenc.key_frame) { + read_lines(s, residual, sums, width, FFMIN(8,height-y), !!plane); + defilter_lines(s, dst, residual, NULL, stride, width, FFMIN(8,height-y)); + } else { + read_mvs(s, mb_width, !!plane); + for(x=0; xmvs[x][0], -16*x-16, (mb_width-x)*16); + mvy = av_clip(s->mvs[x][1], -2*y-16, mb_height*16-2*y); + s->dsp.put_pixels_tab[1][(mvx&1)+2*(mvy&1)](pmc+8*x, ref+8*x+(mvx>>1)+(mvy>>1)*stride, stride, 8); + } + read_lines(s, residual, sums, width, FFMIN(8,height-y), !!plane); + defilter_lines(s, dst, residual, pmc, stride, width, FFMIN(8,height-y)); + memcpy(residual-2*stride, residual+6*stride, 2*stride); + FFSWAP(void*, s->mvs, s->mvs_top); + } + } + + s->bs += (get_bits_count(&s->gb)+7)>>3; + return 0; +} + +static void encode_plane(FFV2Context *s, int plane) { + int stride = s->fenc.linesize[plane]; + int width = s->avctx->width >> !!plane; + int height = s->avctx->height >> !!plane; + int mb_width = (width+7)>>3; + int mb_height = (height+7)>>3; + int mv_stride = mb_width+2; + uint8_t *imed = s->temp[0] + 2*stride + 8; ///< median-filtered intra samples + uint8_t *pmc = s->temp[1] + 2*stride + 8; ///< inter prediction + uint8_t *pdiff = s->temp[2] + 2*stride + 8; ///< inter residual + uint8_t *pmed = s->temp[3] + 2*stride + 8; ///< median-filtered inter residual + int x, y; + s->tstride = stride; + + init_put_bits(&s->pb, s->bs, (s->bs_end - s->bs)*8); + if(plane < 2) + proc_tables(s, 0, plane); + + if(!s->fenc.key_frame) + s->dsp.draw_edges(s->fref.data[plane], stride, width, height, 8); + memset(imed-2*stride-8, 0, 10*stride+8); + memset(pdiff-2*stride-8, 0, 10*stride+8); + memset(s->mvs_base, 0, 6*(s->mb_width+5)*sizeof(int16_t)); + memset(s->mb_types, 0, mb_width+3); + memset(s->fenc.data[plane]-2*stride, 0, 2*stride); + + for(y=0; yfenc.data[plane] + y*stride; + uint8_t *ref = s->fref.data[plane] + y*stride; + enfilter_lines(s, imed, src, stride, width, FFMIN(8,height-y)); + if(!s->fenc.key_frame) { + int16_t (*mv_plane)[2] = s->mv_plane[!!plane] + (y>>3)*mv_stride; + int mvx, mvy, mvpx, mvpy, isad, psad, type; + int mv_min[2] = {0, -y-8}; + int mv_max[2] = {0, mb_height*8-y}; + for(x=0; xmvps[x][0] = mid_pred(s->mvs[x-1][0], s->mvs_top[x][0], s->mvs_top[x+1][0]); + mvpy = s->mvps[x][1] = mid_pred(s->mvs[x-1][1], s->mvs_top[x][1], s->mvs_top[x+1][1]); + motion_search(s, src+8*x, ref+8*x, stride, mv_plane, mv_stride, s->mvs[x], mv_min, mv_max); + mvx = s->mvs[x][0]; + mvy = s->mvs[x][1]; + s->dsp.put_pixels_tab[1][(mvx&1)+2*(mvy&1)](pmc+8*x, ref+8*x+(mvx>>1)+(mvy>>1)*stride, stride, 8); + sub_block(pdiff+8*x, src+8*x, pmc+8*x, stride); + if(x>1))+(mvy>>1)*stride, stride); + enfilter_pblock(s, pmed+8*x, pdiff+8*x, stride); + if(s->avctx->mb_decision) { + isad = size_mb(s, imed+8*x, 0, 0, 0, !!plane); + psad = size_mb(s, pmed+8*x, 1, mvx-mvpx, mvy-mvpy, !!plane); + } else { + isad = sum_abs_int8(imed+8*x, stride); + psad = sum_abs_int8(pmed+8*x, stride); + } + type = s->mb_types[x] = psad < isad; + if(!type) { + s->mvs[x][0] = mvpx; + s->mvs[x][1] = mvpy; + mvx = av_clip(mvpx, 2*mv_min[0], 2*mv_max[0]); + mvy = av_clip(mvpy, 2*mv_min[1], 2*mv_max[1]); + s->dsp.put_pixels_tab[1][(mvx&1)+2*(mvy&1)](pmc+8*x, ref+8*x+(mvx>>1)+(mvy>>1)*stride, stride, 8); + sub_block(pdiff+8*x, src+8*x, pmc+8*x, stride); + } + } + for(x=0; xmb_types[x]) // FIXME only needs to fixup the rightmost column + enfilter_pblock(s, imed+8*x, pdiff+8*x, stride); + write_mvs(s, mb_width, !!plane); + memcpy(pdiff-2*stride, pdiff+6*stride, 2*stride); + FFSWAP(void*, s->mvs, s->mvs_top); + } + write_lines(s, imed, width, FFMIN(8,height-y), !!plane); + memcpy(imed-2*stride, imed+6*stride, 2*stride); + } + + s->bs += (put_bits_count(&s->pb)+7)>>3; + flush_put_bits(&s->pb); +} + +static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt) { + uint8_t *buf = avpkt->data; + int buf_size = avpkt->size; + FFV2Context *s = avctx->priv_data; + int err = 0; + s->bs = buf; + s->bs_end = buf + buf_size; + + if(s->bs[0]) { + s->fenc.key_frame = 1; + s->fenc.pict_type = FF_I_TYPE; + s->coder_type = s->bs[0]; + s->coder_tree = s->coder_type >> 3; + s->coder_block = s->coder_type & 7; + } else { + FFSWAP(AVFrame, s->fenc, s->fref); + s->fenc.key_frame = 0; + s->fenc.pict_type = FF_P_TYPE; + } + s->bs++; + + err |= decode_plane(s, 0); + err |= decode_plane(s, 1); + err |= decode_plane(s, 2); + + *(AVFrame*)data = s->fenc; + *data_size = sizeof(s->fenc); + return err ? err : s->bs - buf; +} + +static int encode_frame(AVCodecContext *avctx, uint8_t *buf, int buf_size, void *data) { + FFV2Context *s = avctx->priv_data; + s->bs = buf; + s->bs_end = buf + buf_size; + + if(s->gop >= avctx->gop_size) + s->gop = 0; + if(s->gop) { + FFSWAP(AVFrame, s->fenc, s->fref); + } + s->fenc.key_frame = !s->gop; + s->fenc.pict_type = s->fenc.key_frame ? FF_I_TYPE : FF_P_TYPE; + *(s->bs++) = s->fenc.key_frame ? s->coder_type : 0; + + // FIXME skip in intra-only mode + av_picture_copy((AVPicture*)&s->fenc, (AVPicture*)data, PIX_FMT_YUV420P, avctx->width, avctx->height); + + encode_plane(s, 0); + encode_plane(s, 1); + encode_plane(s, 2); + + if(!s->initted_vlc[s->fenc.key_frame]) { + // re-encode with updated vlc + // FIXME do this whenever stats change enough? + s->bs = buf+1; + encode_plane(s, 0); + encode_plane(s, 1); + encode_plane(s, 2); + s->initted_vlc[s->fenc.key_frame] = 1; + } + + s->gop++; + avctx->coded_frame = &s->fenc; + if(s->bs_end - s->bs < 4) + return -1; + return s->bs - buf; +} + +AVCodec ffv2_decoder = { + "ffv2", + CODEC_TYPE_VIDEO, + CODEC_ID_FFV2, + sizeof(FFV2Context), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1, + NULL, +}; + +AVCodec ffv2_encoder = { + "ffv2", + CODEC_TYPE_VIDEO, + CODEC_ID_FFV2, + sizeof(FFV2Context), + encode_init, + encode_frame, + encode_end, + .pix_fmts = (enum PixelFormat[]){PIX_FMT_YUV420P}, +}; + diff --git a/libavcodec/ffv2bitstream.c b/libavcodec/ffv2bitstream.c new file mode 100644 index 0000000..f9a228c --- /dev/null +++ b/libavcodec/ffv2bitstream.c @@ -0,0 +1,205 @@ +/* + * Copyright (C) 2010 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +// This file is compiled twice, to provide the bitstream writer and +// corresponding functions that just count how many bits would be written. +// The reader doesn't need to be duplicated, but is here just so that it's +// next to the writer. + +#if RDO_SKIP_BS +#define put_bits(pb,n,x) s->rd_bits += n +#define put_vlc size_vlc +#define write_len_table size_len_table +#define write_mvd size_mvd +#define write_block size_block +#define write_tree size_tree +#endif + +#if !RDO_SKIP_BS +// TODO: gradient prediction? predict from the previous contents of the same table? +static int read_len_table(FFV2Context *s, uint8_t *len, int size, int use_metavlc){ + int val=1, repeat=-1, code, i; + + for(i=0; igb, s->metavlc.vlc.table, VLC_BITS, 3) : get_bits(&s->gb, 8); + repeat = code >> 5; + val = (val+code) & 31; + if(repeat==0) + repeat = get_bits(&s->gb, 8) + 8; + while(repeat-- && i0 && repeat<264 && repeat>0); + + code = (val - prev_val) & 31; + prev_val = val; + if(repeat<8) + code |= repeat<<5; + if(use_metavlc) + put_vlc(s, &s->metavlc, code); + else + put_bits(&s->pb, 8, code); + if(repeat>=8) + put_bits(&s->pb, 8, repeat-8); + } +} + +#if !RDO_SKIP_BS +static void read_mvd(FFV2Context *s, int16_t *mv, int plane) { + VLCS *vlcs = &s->vlcs[plane][VLC_MV]; + int v = get_vlc2(&s->gb, vlcs->vlc.table, VLC_BITS, 3); + int d; + mv[0] += (v&15)<15 ? (v&15)-7 + : (d=(int8_t)get_bits(&s->gb, 8)) != -128 ? d + : (int16_t)get_bits(&s->gb, 16); + v >>= 4; + mv[1] += (v&15)<15 ? (v&15)-7 + : (d=(int8_t)get_bits(&s->gb, 8)) != -128 ? d + : (int16_t)get_bits(&s->gb, 16); +} +#endif //!RDO_SKIP_BS + +static void write_mvd(FFV2Context *s, int mvdx, int mvdy, int plane) { + VLCS *vlcs = &s->vlcs[plane][VLC_MV]; + int x = FFMIN(15, (unsigned)(mvdx+7)); + int y = FFMIN(15, (unsigned)(mvdy+7)); + int v = x+(y<<4); + put_vlc(s, vlcs, v); + if(x==15) { + if((unsigned)(mvdx+127) < 255) + put_bits(&s->pb, 8, mvdx&0xff); + else + put_bits(&s->pb, 24, (128<<16)+(mvdx&0xffff)); + } + if(y==15) { + if((unsigned)(mvdy+127) < 255) + put_bits(&s->pb, 8, mvdy&0xff); + else + put_bits(&s->pb, 24, (128<<16)+(mvdy&0xffff)); + } +} + +#if !RDO_SKIP_BS +static int block_context(int8_t *residual, int stride) { + // TODO simd or lut + int sum = 0; + int x,y; + for(y=-2; y<0; y++) + for(x=-2; x<2; x++) + sum += FFMIN(3,abs(residual[x+y*stride])); + for(y=0; y<2; y++) + for(x=-2; x<0; x++) + sum += FFMIN(3,abs(residual[x+y*stride])); + return sum; +} + +static av_always_inline void read_escape(FFV2Context *s, VLCS *vlcs, int8_t *residual, int flags) { + if(flags&1) residual[0] = get_vlc2(&s->gb, vlcs[VLC_COEF_ESCAPE].vlc.table, VLC_BITS, 3); + if(flags&2) residual[1] = get_vlc2(&s->gb, vlcs[VLC_COEF_ESCAPE].vlc.table, VLC_BITS, 3); + if(flags&4) residual[s->tstride] = get_vlc2(&s->gb, vlcs[VLC_COEF_ESCAPE].vlc.table, VLC_BITS, 3); + if(flags&8) residual[s->tstride+1] = get_vlc2(&s->gb, vlcs[VLC_COEF_ESCAPE].vlc.table, VLC_BITS, 3); +} + +#define DECL_READ_ESCAPE(flags) \ +static void read_escape##flags(FFV2Context *s, VLCS *vlcs, int8_t *residual) {\ + read_escape(s, vlcs, residual, flags);\ +} +#define NAME_READ_ESCAPE(flags) read_escape##flags, +#define OP16(op) op(0) op(1) op(2) op(3) op(4) op(5) op(6) op(7) op(8) op(9) op(10) op(11) op(12) op(13) op(14) op(15) +OP16(DECL_READ_ESCAPE) +static void (*read_escape_tab[16])(FFV2Context *s, VLCS *vlcs, int8_t *residual) = { OP16(NAME_READ_ESCAPE) }; + +static void read_block(FFV2Context *s, VLCS *vlcs, int8_t *residual, uint8_t *sums) { + int code, v, ctx; + ctx = sums[-1] + sums[-(s->tstride>>1)] + sums[-1-(s->tstride>>1)]; + ctx = s->quantize_block_context[ctx]; + vlcs += 2*ctx; + code = get_vlc2(&s->gb, vlcs[VLC_COEF_BLOCK].vlc.table, VLC_BITS, 3); + sums[0] = lut_block_sum[code]; + v = map_coder6[code]; + AV_WN16(residual, v>>4); + AV_WN16(residual+s->tstride, v>>20); + read_escape_tab[v&15](s, vlcs, residual); +} + +static void read_tree(FFV2Context *s, VLCS *vlcs, uint8_t *residual, uint8_t *sums) { + int cbp; + int bstride = s->tstride<<1; + int sstride = s->tstride>>1; + fill_rectangle(residual, 4, 4, sstride, 0, 2); + fill_rectangle(sums, 4, 2, sstride, 0, 1); + cbp = get_vlc2(&s->gb, vlcs[VLC_CBP].vlc.table, VLC_BITS, 3); + if(cbp&0x01) read_block(s, vlcs, residual+0, sums+0); + if(cbp&0x02) read_block(s, vlcs, residual+2, sums+1); + if(cbp&0x04) read_block(s, vlcs, residual+4, sums+2); + if(cbp&0x08) read_block(s, vlcs, residual+6, sums+3); + if(cbp&0x10) read_block(s, vlcs, residual+bstride+0, sums+sstride+0); + if(cbp&0x20) read_block(s, vlcs, residual+bstride+2, sums+sstride+1); + if(cbp&0x40) read_block(s, vlcs, residual+bstride+4, sums+sstride+2); + if(cbp&0x80) read_block(s, vlcs, residual+bstride+6, sums+sstride+3); +} +#endif //!RDO_SKIP_BS + +static void write_block(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + uint8_t a = 2+residual[0]; + uint8_t b = 2+residual[1]; + uint8_t c = 2+residual[s->tstride]; + uint8_t d = 2+residual[s->tstride+1]; + int v = FFMIN(a,5) + 6*FFMIN(b,5) + 36*FFMIN(c,5) + 216*FFMIN(d,5); + vlcs += 2*s->quantize_block_context[block_context(residual, s->tstride)]; + put_vlc(s, &vlcs[VLC_COEF_BLOCK], v); + if(a>=5) put_vlc(s, &vlcs[VLC_COEF_ESCAPE], a-5); + if(b>=5) put_vlc(s, &vlcs[VLC_COEF_ESCAPE], b-5); + if(c>=5) put_vlc(s, &vlcs[VLC_COEF_ESCAPE], c-5); + if(d>=5) put_vlc(s, &vlcs[VLC_COEF_ESCAPE], d-5); +} + +static void write_tree(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int cbp, i; + cbp = cbp_8x4_raster(residual, s->tstride); + put_vlc(s, &vlcs[VLC_CBP], cbp); + for(i=0; cbp; i++, cbp>>=1) + if(cbp&1) + write_block(s, vlcs, residual+2*(i&3)+2*(i>>2)*s->tstride); +} + +#undef put_bits +#undef put_vlc +#undef write_len_table +#undef write_mvd +#undef write_block +#undef write_tree diff --git a/libavcodec/ffv2dsp.c b/libavcodec/ffv2dsp.c new file mode 100644 index 0000000..858799b --- /dev/null +++ b/libavcodec/ffv2dsp.c @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2010 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +static int cbp_8x4_raster(uint8_t *res, int stride) { + int cbp; + cbp = !!(*(uint16_t*)(res+0) | *(uint16_t*)(res+stride+0)) << 0; + cbp |= !!(*(uint16_t*)(res+2) | *(uint16_t*)(res+stride+2)) << 1; + cbp |= !!(*(uint16_t*)(res+4) | *(uint16_t*)(res+stride+4)) << 2; + cbp |= !!(*(uint16_t*)(res+6) | *(uint16_t*)(res+stride+6)) << 3; + res += 2*stride; + cbp |= !!(*(uint16_t*)(res+0) | *(uint16_t*)(res+stride+0)) << 4; + cbp |= !!(*(uint16_t*)(res+2) | *(uint16_t*)(res+stride+2)) << 5; + cbp |= !!(*(uint16_t*)(res+4) | *(uint16_t*)(res+stride+4)) << 6; + cbp |= !!(*(uint16_t*)(res+6) | *(uint16_t*)(res+stride+6)) << 7; + return cbp; +} + +static int sum_abs_int8(int8_t *buf, intptr_t stride) { + int sum=0; +#if HAVE_MMX + DECLARE_ASM_CONST(8, uint64_t, pb_128) = 0x8080808080808080ULL; + __asm__ volatile( + "movq %4, %%mm7 \n" + "movq (%0), %%mm0 \n" + "movq (%0,%2), %%mm1 \n" + "movq (%0,%2,2), %%mm2 \n" + "movq (%0,%3), %%mm3 \n" + "lea (%0,%2,4), %0 \n" + "paddb %%mm7, %%mm0 \n" + "paddb %%mm7, %%mm1 \n" + "paddb %%mm7, %%mm2 \n" + "paddb %%mm7, %%mm3 \n" + "psadbw %%mm7, %%mm0 \n" + "psadbw %%mm7, %%mm1 \n" + "psadbw %%mm7, %%mm2 \n" + "psadbw %%mm7, %%mm3 \n" + "paddw %%mm2, %%mm0 \n" + "paddw %%mm3, %%mm1 \n" + "movq (%0), %%mm4 \n" + "movq (%0,%2), %%mm5 \n" + "movq (%0,%2,2), %%mm2 \n" + "movq (%0,%3), %%mm3 \n" + "paddb %%mm7, %%mm4 \n" + "paddb %%mm7, %%mm5 \n" + "paddb %%mm7, %%mm2 \n" + "paddb %%mm7, %%mm3 \n" + "psadbw %%mm7, %%mm4 \n" + "psadbw %%mm7, %%mm5 \n" + "psadbw %%mm7, %%mm2 \n" + "psadbw %%mm7, %%mm3 \n" + "paddw %%mm4, %%mm0 \n" + "paddw %%mm5, %%mm1 \n" + "paddw %%mm2, %%mm0 \n" + "paddw %%mm3, %%mm1 \n" + "paddw %%mm1, %%mm0 \n" + "movd %%mm0, %1 \n" + :"+&r"(buf), "=r"(sum) + :"r"(stride), "r"(stride*3), "m"(pb_128) + ); +#else + int x, y; + for(y=0; y<8; y++, buf+=stride) + for(x=0; x<8; x++) + sum += abs(buf[x]); +#endif + return sum; +} + +static void sub_block(uint8_t *dst, uint8_t *src, uint8_t *ref, intptr_t stride) { + __asm__ volatile( + "movq (%1), %%mm0 \n" + "movq (%1,%3), %%mm1 \n" + "movq (%1,%3,2), %%mm2 \n" + "movq (%1,%4), %%mm3 \n" + "psubb (%2), %%mm0 \n" + "psubb (%2,%3), %%mm1 \n" + "psubb (%2,%3,2), %%mm2 \n" + "psubb (%2,%4), %%mm3 \n" + "movq %%mm0, (%0) \n" + "movq %%mm1, (%0,%3) \n" + "movq %%mm2, (%0,%3,2) \n" + "movq %%mm3, (%0,%4) \n" + "lea (%0,%3,4), %0 \n" + "lea (%1,%3,4), %1 \n" + "lea (%2,%3,4), %2 \n" + "movq (%1), %%mm0 \n" + "movq (%1,%3), %%mm1 \n" + "movq (%1,%3,2), %%mm2 \n" + "movq (%1,%4), %%mm3 \n" + "psubb (%2), %%mm0 \n" + "psubb (%2,%3), %%mm1 \n" + "psubb (%2,%3,2), %%mm2 \n" + "psubb (%2,%4), %%mm3 \n" + "movq %%mm0, (%0) \n" + "movq %%mm1, (%0,%3) \n" + "movq %%mm2, (%0,%3,2) \n" + "movq %%mm3, (%0,%4) \n" + :"+&r"(dst), "+&r"(src), "+&r"(ref) + :"r"(stride), "r"(stride*3) + :"memory" + ); +} + +static inline int16_t median5(int a, int b, int c, int d, int e) { +#if 0 // C + if(b>c) FFSWAP(int, b, c); + if(c>d) FFSWAP(int, c, d); + if(a>b) FFSWAP(int, a, b); + if(b>c) FFSWAP(int, b, c); + if(d>e) d = e; + if(a>b) b = a; + if(c>d) c = d; + if(b>c) c = b; +} +#elif 0 // AMD + int i; +#define CMPXCHG(a,b)\ + "cmp "#b","#a"\n"\ + "mov "#b", %0 \n"\ + "cmovg "#a","#b"\n"\ + "cmovg %0 ,"#a"\n" + __asm__ volatile( + CMPXCHG(%2,%3) + CMPXCHG(%3,%4) + CMPXCHG(%1,%2) + CMPXCHG(%2,%3) + "cmp %5, %4 \n" + "cmovg %5, %4 \n" + "cmp %1, %2 \n" + "cmovl %1, %2 \n" + "cmp %4, %3 \n" + "cmovg %4, %3 \n" + "cmp %2, %3 \n" + "cmovl %2, %3 \n" + :"=&r"(i), "+&r"(a), "+&r"(b), "+&r"(c), "+&r"(d) + :"r"(e) + ); +#else // Intel, int16 +#define CMPXCHG(a,b)\ + "movq "#a", %%mm0 \n"\ + "pminsw "#b", "#a" \n"\ + "pmaxsw %%mm0, "#b" \n" + __asm__ volatile( + "movd %1, %%mm1 \n" + "movd %2, %%mm2 \n" + "movd %3, %%mm3 \n" + "movd %4, %%mm4 \n" + "movd %5, %%mm5 \n" + CMPXCHG(%%mm2, %%mm3) + CMPXCHG(%%mm3, %%mm4) + CMPXCHG(%%mm1, %%mm2) + CMPXCHG(%%mm2, %%mm3) + "pminsw %%mm5, %%mm4 \n" + "pmaxsw %%mm1, %%mm2 \n" + "pminsw %%mm4, %%mm3 \n" + "pmaxsw %%mm2, %%mm3 \n" + "movd %%mm3, %0 \n" + :"=r"(c) + :"r"(a), "r"(b), "r"(c), "r"(d), "r"(e) + ); +#endif +#undef CMPXCHG + return c; +} + + +// median(l, tr, l+t-tl) except with l subtracted off each element to minimize overflows + +static void add_ffv2_median_prediction(int8_t *dst, int8_t *residual, int stride, int width) { + int x; + int l = dst[-1]; + int tl = dst[-1-stride]; + int t = dst[-stride]; + int8_t *top = dst-2*stride; + for(x=0; x