diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 24b68d7..f0a1945 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -82,6 +82,8 @@ OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER) += 8svx.o OBJS-$(CONFIG_ESCAPE124_DECODER) += escape124.o OBJS-$(CONFIG_FFV1_DECODER) += ffv1.o rangecoder.o OBJS-$(CONFIG_FFV1_ENCODER) += ffv1.o rangecoder.o +OBJS-$(CONFIG_FFV2_DECODER) += ffv2.o rangecoder.o +OBJS-$(CONFIG_FFV2_ENCODER) += ffv2.o rangecoder.o OBJS-$(CONFIG_FFVHUFF_DECODER) += huffyuv.o OBJS-$(CONFIG_FFVHUFF_ENCODER) += huffyuv.o OBJS-$(CONFIG_FLAC_DECODER) += flacdec.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 0cb0e6d..3a1cde0 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -77,6 +77,7 @@ void avcodec_register_all(void) REGISTER_DECODER (EIGHTSVX_FIB, eightsvx_fib); REGISTER_DECODER (ESCAPE124, escape124); REGISTER_ENCDEC (FFV1, ffv1); + REGISTER_ENCDEC (FFV2, ffv2); REGISTER_ENCDEC (FFVHUFF, ffvhuff); REGISTER_ENCDEC (FLASHSV, flashsv); REGISTER_DECODER (FLIC, flic); diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 39dabeb..b6df475 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -97,6 +97,7 @@ enum CodecID { CODEC_ID_ASV1, CODEC_ID_ASV2, CODEC_ID_FFV1, + CODEC_ID_FFV2, // FIXME should go at the end, but that would force manual merges CODEC_ID_4XM, CODEC_ID_VCR1, CODEC_ID_CLJR, diff --git a/libavcodec/ffv2.c b/libavcodec/ffv2.c new file mode 100644 index 0000000..f368e64 --- /dev/null +++ b/libavcodec/ffv2.c @@ -0,0 +1,722 @@ +/* + * Copyright (C) 2009 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define DEBUG +#include "avcodec.h" +#include "dsputil.h" +#include "mathops.h" +#include "mpegvideo.h" +#include "ffv2dsp.c" + +#define VLC_BITS 11 +#define NUM_TABLES 5 +#define NUM_VLCS 4096 + +typedef struct { + VLC vlc; + uint8_t len[NUM_VLCS]; + uint32_t bits[NUM_VLCS]; + uint32_t stats[NUM_VLCS]; +} VLCS; + +typedef struct FFV2Context FFV2Context; +struct FFV2Context { + AVCodecContext *avctx; + DSPContext dsp; + GetBitContext gb; + PutBitContext pb; + uint8_t *bs, *bs_end; + VLCS vlcs[3][NUM_TABLES]; + AVFrame fenc; + AVFrame fref; + int mb_width, mb_height; + int tstride; + uint8_t *temp[4]; + uint8_t *mb_types; ///< intra: 0, inter: 1 + int16_t (*mvs)[2]; ///< current row + int16_t (*mvs_top)[2]; ///< previous row + int16_t (*mvs_base)[2]; ///< buffer that the other mv arrays point into + int16_t (*mvps)[2]; + int16_t (*mv_plane[2])[2]; ///< mvs chosen by motion est, not necessarily those that will be coded + int16_t (*mv_plane_base)[2]; + int gop; + int initted_vlc[2]; + int coder_type, coder_tree, coder_block; + int rd_bits; ///< ok so there's no D in RD, but it's the same concept +}; + +static uint16_t map_coder3[81]; +static uint16_t map_coder6[1296]; + +static void common_init(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + int i, j, k, l; + memset(s, 0, sizeof(FFV2Context)); + assert(!(avctx->flags&CODEC_FLAG_EMU_EDGE)); + dsputil_init(&s->dsp, avctx); + s->avctx = avctx; + s->mb_width = (avctx->width+7)/8; + s->mb_height= (avctx->height+7)/8; + avctx->get_buffer(avctx, &s->fenc); + avctx->get_buffer(avctx, &s->fref); + s->tstride = s->fenc.linesize[0]; + assert(s->tstride >= ((avctx->width+15)&~7)); + s->temp[0] = av_malloc(s->tstride*9+16); + s->temp[1] = av_malloc(s->tstride*9+16); + s->temp[2] = av_malloc(s->tstride*9+16); + s->temp[3] = av_malloc(s->tstride*9+16); + s->mb_types = av_mallocz(s->mb_width+3); + s->mvs_base = av_mallocz(6*(s->mb_width+5)*sizeof(int16_t)); + s->mvs = s->mvs_base+1; + s->mvs_top = s->mvs+s->mb_width+5; + s->mvps = s->mvs_top+s->mb_width+5; + for(i=0; i<3; i++) + for(j=0; j<3; j++) + for(k=0; k<3; k++) + for(l=0; l<3; l++) + map_coder3[i*3*3*3+j*3*3+k*3+l] = (i<<6) + (j<<4) + (k<<2) + l; + for(i=0; i<6; i++) + for(j=0; j<6; j++) + for(k=0; k<6; k++) + for(l=0; l<6; l++) + map_coder6[i*6*6*6+j*6*6+k*6+l] = (i<<9) + (j<<6) + (k<<3) + l; +} + +static int decode_init(AVCodecContext *avctx) { + avctx->pix_fmt= PIX_FMT_YUV420P; + common_init(avctx); + return 0; +} + +static int encode_init(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + int i, j; + common_init(avctx); + s->coder_type = avctx->coder_type; + if(s->coder_type == 0) + s->coder_type = 26; + s->coder_tree = s->coder_type >> 3; + s->coder_block = s->coder_type & 7; + if((s->coder_type > 3 && s->coder_type < 8) || s->coder_tree < 0 || s->coder_tree > 3 || s->coder_block > 4) { + av_log(avctx, AV_LOG_ERROR, "bad coder_type\n"); + return -1; + } + s->mv_plane_base = av_mallocz(4*(s->mb_height+2)*(s->mb_width+2)*sizeof(int16_t)); + s->mv_plane[0] = s->mv_plane_base+s->mb_width+3; + s->mv_plane[1] = s->mv_plane[0]+(s->mb_width+2)*(s->mb_height+2); + for(j=0; j<3*NUM_TABLES; j++) + for(i=0; ivlcs[0][j].stats[i] = 1; + return 0; +} + +static void common_end(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + avctx->release_buffer(avctx, &s->fenc); + avctx->release_buffer(avctx, &s->fref); + av_free(s->temp[0]); + av_free(s->temp[1]); + av_free(s->temp[2]); + av_free(s->temp[3]); + av_free(s->mb_types); + av_free(s->mvs_base); +} + +static int decode_end(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + int i; + for(i=0; i<3*NUM_TABLES; i++) + free_vlc(&s->vlcs[0][i].vlc); + common_end(avctx); + return 0; +} + +static int encode_end(AVCodecContext *avctx) { + FFV2Context *s = avctx->priv_data; + common_end(avctx); + av_free(s->mv_plane_base); + return 0; +} + +static inline void put_vlc(FFV2Context *s, VLCS *vlcs, int v) { + put_bits(&s->pb, vlcs->len[v], vlcs->bits[v]); + vlcs->stats[v]++; +} + +static inline void size_vlc(FFV2Context *s, VLCS *vlcs, int v) { + s->rd_bits += vlcs->len[v]; +} + +#define RDO_SKIP_BS 0 +#include "ffv2bitstream.c" +#undef RDO_SKIP_BS +#define RDO_SKIP_BS 1 +#include "ffv2bitstream.c" +#undef RDO_SKIP_BS + +// FIXME code duplication from huffyuv.c +// FIXME port optimizations back +static int generate_bits_table(uint32_t *dst, uint8_t *len_table, int size){ + int len, index; + uint32_t bits=1; + + for(len=1; len<32 && ~bits; len++){ + for(index=size-1; index>=0; index--){ + if(len_table[index] == len) + dst[index] = bits--; + } + bits = bits*2+1; + } + if(~bits){ + av_log(NULL, AV_LOG_ERROR, "Error generating huffman table\n"); + return -1; + } + return 0; +} + +typedef struct { + uint64_t val; + int name; +} HeapElem; + +static void heap_sift(HeapElem *h, int root, int size) +{ + while(root*2+1 < size) { + int child = root*2+1; + if(child < size-1 && h[child].val > h[child+1].val) + child++; + if(h[root].val > h[child].val) { + FFSWAP(HeapElem, h[root], h[child]); + root = child; + } else + break; + } +} + +static void generate_len_table(uint8_t *dst, uint32_t *stats, int size){ + HeapElem h[size]; + int up[2*size]; + int len[2*size]; + int offset, i, next; + + for(offset=1; ; offset<<=1){ + for(i=0; i=0; i--) + heap_sift(h, i, size); + + for(next=size; next=size; i--) + len[i] = len[up[i]] + 1; + for(i=0; i= 32) break; + } + if(i==size) break; + } +} + +// TODO: simply vlcing the len table reduces table size by about 1.7x +// reordering and gradient prediction should help further. + +static int read_len_table(uint8_t *len, GetBitContext *gb, int size){ + int i, val, repeat=-1; + + for(i=0; i0 && repeat<256 && repeat>0); + if(repeat>7){ + buf[index++]= val; + buf[index++]= repeat; + }else{ + buf[index++]= val | (repeat<<5); + } + } + + return index; +} + +static int read_table(FFV2Context *s, VLCS* vlcs, int size, uint16_t *map) { + if(read_len_table(vlcs->len, &s->gb, size)) + return -1; + if(generate_bits_table(vlcs->bits, vlcs->len, size)) + return -1; + free_vlc(&vlcs->vlc); + init_vlc_sparse(&vlcs->vlc, VLC_BITS, size, vlcs->len, 1, 1, vlcs->bits, 4, 4, map, 2, 2, 0); + return 0; +} + +static void write_table(FFV2Context *s, VLCS* vlcs, int size) { + int i; + generate_len_table(vlcs->len, vlcs->stats, size); + generate_bits_table(vlcs->bits, vlcs->len, size); + s->bs += write_len_table(vlcs->len, s->bs, size); + for(i=0; istats[i] = (vlcs->stats[i]+1)>>1; +} + +static void read_lines(FFV2Context *s, uint8_t *buf, int width, int height, int plane) { + int x, y; + for(y=0; yvlcs[plane], buf+x+y*s->tstride); +} + +static void write_lines(FFV2Context *s, uint8_t *buf, int width, int height, int plane) { + int x, y; + for(y=0; yvlcs[plane], buf+x+y*s->tstride); +} + +static int size_mb(FFV2Context *s, uint8_t *buf, int type, int mvdx, int mvdy, int plane) { + s->rd_bits = 0; + size_tree(s, s->vlcs[plane], buf); + size_tree(s, s->vlcs[plane], buf+4*s->tstride); + if(type) + size_mvd(s, mvdx, mvdy, plane); + return s->rd_bits; +} + +static void read_mvs(FFV2Context *s, int mb_width, int plane) { + int x, i; + for(i=0; igb, s->vlcs[plane][4].vlc.table, VLC_BITS, 3); + for(x=i; x>=2) { + s->mvs[x][0] = mid_pred(s->mvs[x-1][0], s->mvs_top[x][0], s->mvs_top[x+1][0]); + s->mvs[x][1] = mid_pred(s->mvs[x-1][1], s->mvs_top[x][1], s->mvs_top[x+1][1]); + s->mb_types[x] = (v&3)>0; + if((v&3)==2) + read_mvd(s, s->mvs[x], plane); + } + } +} + +static void write_mvs(FFV2Context *s, int mb_width, int plane) { + int x, i; + for(i=0; i=i; x--) { + v *= 3; + v += !s->mb_types[x] ? 0 : *(uint32_t*)s->mvs[x] == *(uint32_t*)s->mvps[x] ? 1 : 2; + } + put_vlc(s, s->vlcs[plane]+4, v); + for(x=i; xmvs[x][0]-s->mvps[x][0]; + int dy = s->mvs[x][1]-s->mvps[x][1]; + if(dx|dy) + write_mvd(s, dx, dy, plane); + } + } +} + +#define proc_table_sparse(id, size, map) {\ + if(read) err |= read_table(s, &s->vlcs[plane][id], size, map);\ + else write_table(s, &s->vlcs[plane][id], size);\ +} +#define proc_table(id, size) proc_table_sparse(id, size, NULL) + +static int proc_tables(FFV2Context *s, int read, int plane) { + int err = 0; + if(!s->fenc.key_frame) { + // I-frames must contain tables. optional in P-frames. + int present; + if(read) present = get_bits(&s->gb, 8); + else present = *(s->bs++) = !(s->gop&3) || !s->initted_vlc[0]; + if(!present) + return 0; + } + proc_table(0, 251); // coef_esc + proc_table_sparse(1, 1296, map_coder6); // coef_block + proc_table(2, 256); // cbp + proc_table(3, 256); // mv + proc_table_sparse(4, 81, map_coder3); // mb_type + return err; +} + +#define CHECK_MV(x, y) {\ + int cost = s->dsp.sad[1](NULL, src, ref+(x)+(y)*stride, stride, 8);\ + COPY3_IF_LT(bcost, cost, bmx, x, bmy, y);\ +} + +static void motion_search(FFV2Context *s, uint8_t *src, uint8_t *ref, int stride, int16_t (*mv_plane)[2], int mv_stride, int16_t *mv_out, int *mv_min, int *mv_max) { + int x, y, bmx=0, bmy=0, omx, omy; + int bcost = s->dsp.sad[1](NULL, src, ref, stride, 8); + for(y=-1; y<=1; y++) + for(x=-1; x<=1; x++) { + int mx = av_clip(mv_plane[x+y*mv_stride][0], mv_min[0]+1, mv_max[0]-1); + int my = av_clip(mv_plane[x+y*mv_stride][1], mv_min[1]+1, mv_max[1]-1); + if((mx|my) && ((mx-bmx)|(my-bmy))) + CHECK_MV(mx, my); + } + while(1) { + omx=bmx; omy=bmy; + CHECK_MV(omx-1, omy); + CHECK_MV(omx+1, omy); + CHECK_MV(omx, omy-1); + CHECK_MV(omx, omy+1); + if(bmx==omx && bmy==omy) + break; + if(bmx<=mv_min[0] || bmx>=mv_max[0] || bmy<=mv_min[1] || bmy>=mv_max[1]) + break; + } + mv_plane[0][0] = bmx; + mv_plane[0][1] = bmy; + if(bmx>mv_min[0] && bmxmv_min[1] && bmydsp.pix_abs[1][1](NULL, src, r-1, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx-1, bmy, omy); + cost = s->dsp.pix_abs[1][1](NULL, src, r, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx+1, bmy, omy); + cost = s->dsp.pix_abs[1][2](NULL, src, r-stride, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx, bmy, omy-1); + cost = s->dsp.pix_abs[1][2](NULL, src, r, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx, bmy, omy+1); + cost = s->dsp.pix_abs[1][3](NULL, src, r-stride-1, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx-1, bmy, omy-1); + cost = s->dsp.pix_abs[1][3](NULL, src, r-stride, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx+1, bmy, omy-1); + cost = s->dsp.pix_abs[1][3](NULL, src, r-1, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx-1, bmy, omy+1); + cost = s->dsp.pix_abs[1][3](NULL, src, r, stride, 8); + COPY3_IF_LT(bcost, cost, bmx, omx+1, bmy, omy+1); + } + mv_out[0] = bmx; + mv_out[1] = bmy; +} + +DECLARE_ALIGNED_16(static uint8_t, zero[4096]); // FIXME size + +static void defilter_lines(FFV2Context *s, uint8_t *dst, uint8_t *src, uint8_t *ref, int stride, int width, int height, int y0) { + int x, y, v, i; + uint8_t *topi = y0 ? dst-stride : zero; + uint8_t *topp = y0 ? src-stride : zero; + if(s->fenc.key_frame) { + for(y=0; ymb_types[x>>3]) { + // filter an intra block, with decoded samples as neighbors. + // then compute what would have been the inter residual, and write that back to the residual buffer. + int l = x ? dst[x-1] : 0; + int tl = x ? topi[x-1] : 0; + add_ffv2_median_prediction(dst+x, topi+x, src+x, 8, l, tl); + for(i=0; i<8; i++) + src[x+i] = dst[x+i] - ref[x+i]; + } else { + // filter an inter block, with inter residuals as neighbors, and add inter prediction. + int l = x ? src[x-1] : 0; + int tl = x ? topp[x-1] : 0; + add_ffv2_median_prediction(src+x, topp+x, src+x, 8, l, tl); + for(i=0; i<8; i++) + dst[x+i] = src[x+i] + ref[x+i]; + } + } + } + } +} + +static void enfilter_lines(FFV2Context *s, uint8_t *dst, uint8_t *src, int stride, int width, int height, int y0) { + int y=0, v; + if(!y0) { + s->dsp.diff_bytes(dst, src, src-1, width); + dst[0] = src[0]; + y = 1; + } + for(; yfenc.linesize[plane]; + int width = s->avctx->width >> !!plane; + int height = s->avctx->height >> !!plane; + int mb_width = (width+7)>>3; + int mb_height = (height+7)>>3; + uint8_t *residual = s->temp[0] + stride + 8; + uint8_t *pmc = s->temp[1] + stride; + int x, y; + s->tstride = stride; + + init_get_bits(&s->gb, s->bs, (s->bs_end - s->bs)*8); + if(proc_tables(s, 1, plane) < 0) + return -1; + + if(!s->fenc.key_frame) + s->dsp.draw_edges(s->fref.data[plane], stride, width, height, 8); + memset(residual-stride-8, 0, stride+8); + memset(s->mvs_base, 0, 6*(s->mb_width+5)*sizeof(int16_t)); + memset(s->mb_types, 0, mb_width+3); + + for(y=0; yfenc.data[plane] + y*stride; + uint8_t *ref = s->fref.data[plane] + y*stride; + if(s->fenc.key_frame) { + read_lines(s, residual, width, FFMIN(8,height-y), plane); + defilter_lines(s, dst, residual, NULL, stride, width, FFMIN(8,height-y), y); + } else { + read_mvs(s, mb_width, plane); + for(x=0; xmvs[x][0], -16*x-16, (mb_width-x)*16); + mvy = av_clip(s->mvs[x][1], -2*y-16, mb_height*16-2*y); + s->dsp.put_pixels_tab[1][(mvx&1)+2*(mvy&1)](pmc+8*x, ref+8*x+(mvx>>1)+(mvy>>1)*stride, stride, 8); + } + read_lines(s, residual, width, FFMIN(8,height-y), plane); + defilter_lines(s, dst, residual, pmc, stride, width, FFMIN(8,height-y), y); + memcpy(residual-stride, residual+7*stride, stride); + FFSWAP(void*, s->mvs, s->mvs_top); + } + } + + s->bs += (get_bits_count(&s->gb)+7)>>3; + return 0; +} + +static void encode_plane(FFV2Context *s, int plane) { + int stride = s->fenc.linesize[plane]; + int width = s->avctx->width >> !!plane; + int height = s->avctx->height >> !!plane; + int mb_width = (width+7)>>3; + int mb_height = (height+7)>>3; + int mv_stride = mb_width+2; + uint8_t *imed = s->temp[0] + stride; ///< median-filtered intra samples + uint8_t *pmc = s->temp[1] + stride; ///< inter prediction + uint8_t *pdiff = s->temp[2] + stride + 8; ///< inter residual + uint8_t *pmed = s->temp[3] + stride; ///< median-filtered inter residual + int x, y; + s->tstride = stride; + + proc_tables(s, 0, plane); + init_put_bits(&s->pb, s->bs, (s->bs_end - s->bs)*8); + + if(!s->fenc.key_frame) + s->dsp.draw_edges(s->fref.data[plane], stride, width, height, 8); + memset(pdiff-stride-8, 0, 9*stride+8); + memset(s->mvs_base, 0, 6*(s->mb_width+5)*sizeof(int16_t)); + memset(s->mb_types, 0, mb_width+3); + + for(y=0; yfenc.data[plane] + y*stride; + uint8_t *ref = s->fref.data[plane] + y*stride; + enfilter_lines(s, imed, src, stride, width, FFMIN(8,height-y), y); + if(!s->fenc.key_frame) { + int16_t (*mv_plane)[2] = s->mv_plane[!!plane] + (y>>3)*mv_stride; + int mvx, mvy, mvpx, mvpy, isad, psad, type; + int mv_min[2] = {0, -y-8}; + int mv_max[2] = {0, mb_height*8-y}; + for(x=0; xmvps[x][0] = mid_pred(s->mvs[x-1][0], s->mvs_top[x][0], s->mvs_top[x+1][0]); + mvpy = s->mvps[x][1] = mid_pred(s->mvs[x-1][1], s->mvs_top[x][1], s->mvs_top[x+1][1]); + motion_search(s, src+8*x, ref+8*x, stride, mv_plane, mv_stride, s->mvs[x], mv_min, mv_max); + mvx = s->mvs[x][0]; + mvy = s->mvs[x][1]; + s->dsp.put_pixels_tab[1][(mvx&1)+2*(mvy&1)](pmc+8*x, ref+8*x+(mvx>>1)+(mvy>>1)*stride, stride, 8); + sub_block(pdiff+8*x, src+8*x, pmc+8*x, stride); + enfilter_pblock(s, pmed+8*x, pdiff+8*x, stride); + if(s->avctx->mb_decision) { + isad = size_mb(s, imed+8*x, 0, 0, 0, plane); + psad = size_mb(s, pmed+8*x, 1, mvx-mvpx, mvy-mvpy, plane); + } else { + isad = sum_abs_int8(imed+8*x, stride); + psad = sum_abs_int8(pmed+8*x, stride); + } + type = s->mb_types[x] = psad < isad; + if(type) { + s->dsp.put_pixels_tab[1][0](imed+8*x, pmed+8*x, stride, 8); + } else { + s->mvs[x][0] = mvpx; + s->mvs[x][1] = mvpy; + mvx = av_clip(mvpx, 2*mv_min[0], 2*mv_max[0]); + mvy = av_clip(mvpy, 2*mv_min[1], 2*mv_max[1]); + s->dsp.put_pixels_tab[1][(mvx&1)+2*(mvy&1)](pmc+8*x, ref+8*x+(mvx>>1)+(mvy>>1)*stride, stride, 8); + sub_block(pdiff+8*x, src+8*x, pmc+8*x, stride); + } + } + write_mvs(s, mb_width, plane); + memcpy(pdiff-stride, pdiff+7*stride, stride); + FFSWAP(void*, s->mvs, s->mvs_top); + } + write_lines(s, imed, width, FFMIN(8,height-y), plane); + } + + s->bs += (put_bits_count(&s->pb)+7)>>3; + flush_put_bits(&s->pb); +} + +static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size) { + FFV2Context *s = avctx->priv_data; + int err = 0; + s->bs = buf; + s->bs_end = buf + buf_size; + + if(s->bs[0]) { + s->fenc.key_frame = 1; + s->fenc.pict_type = FF_I_TYPE; + s->coder_type = s->bs[0]; + s->coder_tree = s->coder_type >> 3; + s->coder_block = s->coder_type & 7; + } else { + FFSWAP(AVFrame, s->fenc, s->fref); + s->fenc.key_frame = 0; + s->fenc.pict_type = FF_P_TYPE; + } + s->bs++; + + err |= decode_plane(s, 0); + err |= decode_plane(s, 1); + err |= decode_plane(s, 2); + + *(AVFrame*)data = s->fenc; + *data_size = sizeof(s->fenc); + return err ? err : s->bs - buf; +} + +static int encode_frame(AVCodecContext *avctx, uint8_t *buf, int buf_size, void *data) { + FFV2Context *s = avctx->priv_data; + s->bs = buf; + s->bs_end = buf + buf_size; + + if(s->gop >= avctx->gop_size) + s->gop = 0; + if(s->gop) { + FFSWAP(AVFrame, s->fenc, s->fref); + } + s->fenc.key_frame = !s->gop; + s->fenc.pict_type = s->fenc.key_frame ? FF_I_TYPE : FF_P_TYPE; + *(s->bs++) = s->fenc.key_frame ? s->coder_type : 0; + + // FIXME skip in intra-only mode + av_picture_copy((AVPicture*)&s->fenc, (AVPicture*)data, PIX_FMT_YUV420P, avctx->width, avctx->height); + + encode_plane(s, 0); + encode_plane(s, 1); + encode_plane(s, 2); + + if(!s->initted_vlc[s->fenc.key_frame]) { + // re-encode with updated vlc + // FIXME do this whenever stats change enough? + s->bs = buf+1; + encode_plane(s, 0); + encode_plane(s, 1); + encode_plane(s, 2); + s->initted_vlc[s->fenc.key_frame] = 1; + } + + s->gop++; + avctx->coded_frame = &s->fenc; + if(s->bs_end - s->bs < 4) + return -1; + return s->bs - buf; +} + +AVCodec ffv2_decoder = { + "ffv2", + CODEC_TYPE_VIDEO, + CODEC_ID_FFV2, + sizeof(FFV2Context), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1, + NULL, +}; + +AVCodec ffv2_encoder = { + "ffv2", + CODEC_TYPE_VIDEO, + CODEC_ID_FFV2, + sizeof(FFV2Context), + encode_init, + encode_frame, + encode_end, + .pix_fmts = (enum PixelFormat[]){PIX_FMT_YUV420P}, +}; + diff --git a/libavcodec/ffv2bitstream.c b/libavcodec/ffv2bitstream.c new file mode 100644 index 0000000..f9fd565 --- /dev/null +++ b/libavcodec/ffv2bitstream.c @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2009 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +// This file is compiled twice, to provide the bitstream writer and +// corresponding functions that just count how many bits would be written. +// The reader doesn't need to be duplicated, but is here just so that it's +// next to the writer. + +#if RDO_SKIP_BS +#define put_bits(pb,n,x) s->rd_bits += n +#define put_vlc size_vlc +#define write_mvd size_mvd +#define write_block size_block +#define write_tree size_tree +#endif + +#if !RDO_SKIP_BS +static void read_mvd(FFV2Context *s, int16_t *mv, int plane) { + VLCS *vlcs = &s->vlcs[plane][3]; + int v = get_vlc2(&s->gb, vlcs->vlc.table, VLC_BITS, 3); + int d; + mv[0] += (v&15)<15 ? (v&15)-7 + : (d=(int8_t)get_bits(&s->gb, 8)) != -128 ? d + : (int16_t)get_bits(&s->gb, 16); + v >>= 4; + mv[1] += (v&15)<15 ? (v&15)-7 + : (d=(int8_t)get_bits(&s->gb, 8)) != -128 ? d + : (int16_t)get_bits(&s->gb, 16); +} + +static void read_block(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int v = get_vlc2(&s->gb, vlcs[1].vlc.table, VLC_BITS, 3); + int a = v & 7; + residual[0] = a<5 ? a-2 : 3+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + a = (v>>3) & 7; + residual[1] = a<5 ? a-2 : 3+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + a = (v>>6) & 7; + residual[s->tstride] = a<5 ? a-2 : 3+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); + a = v>>9; + residual[s->tstride+1] = a<5 ? a-2 : 3+get_vlc2(&s->gb, vlcs[0].vlc.table, VLC_BITS, 3); +} + +static void read_tree(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int cbp, y; + for(y=0; y<4; y++) + *(uint64_t*)(residual+y*s->tstride) = 0; + cbp = get_vlc2(&s->gb, vlcs[2].vlc.table, VLC_BITS, 3); + if(cbp&0x01) read_block(s, vlcs, residual); + if(cbp&0x02) read_block(s, vlcs, residual+2); + if(cbp&0x04) read_block(s, vlcs, residual+4); + if(cbp&0x08) read_block(s, vlcs, residual+6); + if(cbp&0x10) read_block(s, vlcs, residual+2*s->tstride); + if(cbp&0x20) read_block(s, vlcs, residual+2*s->tstride+2); + if(cbp&0x40) read_block(s, vlcs, residual+2*s->tstride+4); + if(cbp&0x80) read_block(s, vlcs, residual+2*s->tstride+6); +} +#endif //!RDO_SKIP_BS + +static void write_mvd(FFV2Context *s, int mvdx, int mvdy, int plane) { + VLCS *vlcs = &s->vlcs[plane][3]; + int x = FFMIN(15, (unsigned)(mvdx+7)); + int y = FFMIN(15, (unsigned)(mvdy+7)); + int v = x+(y<<4); + put_vlc(s, vlcs, v); + if(x==15) { + if((unsigned)(mvdx+127) < 255) + put_bits(&s->pb, 8, mvdx&0xff); + else + put_bits(&s->pb, 24, (128<<16)+(mvdx&0xffff)); + } + if(y==15) { + if((unsigned)(mvdy+127) < 255) + put_bits(&s->pb, 8, mvdy&0xff); + else + put_bits(&s->pb, 24, (128<<16)+(mvdy&0xffff)); + } +} + +static void write_block(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + uint8_t a = 2+residual[0]; + uint8_t b = 2+residual[1]; + uint8_t c = 2+residual[s->tstride]; + uint8_t d = 2+residual[s->tstride+1]; + int v = FFMIN(a,5) + 6*FFMIN(b,5) + 36*FFMIN(c,5) + 216*FFMIN(d,5); + put_vlc(s, vlcs+1, v); + if(a>=5) put_vlc(s, vlcs, a-5); + if(b>=5) put_vlc(s, vlcs, b-5); + if(c>=5) put_vlc(s, vlcs, c-5); + if(d>=5) put_vlc(s, vlcs, d-5); +} + +static void write_tree(FFV2Context *s, VLCS *vlcs, uint8_t *residual) { + int cbp, i; + cbp = cbp_8x4_raster(residual, s->tstride); + put_vlc(s, vlcs+2, cbp); + for(i=0; cbp; i++, cbp>>=1) + if(cbp&1) + write_block(s, vlcs, residual+2*(i&3)+2*(i>>2)*s->tstride); +} + +#undef put_bits +#undef put_vlc +#undef write_mvd +#undef write_block +#undef write_tree diff --git a/libavcodec/ffv2dsp.c b/libavcodec/ffv2dsp.c new file mode 100644 index 0000000..6be3128 --- /dev/null +++ b/libavcodec/ffv2dsp.c @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2009 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +static int cbp_8x4_raster(uint8_t *res, int stride) { + int cbp; + cbp = !!(*(uint16_t*)(res+0) | *(uint16_t*)(res+stride+0)) << 0; + cbp |= !!(*(uint16_t*)(res+2) | *(uint16_t*)(res+stride+2)) << 1; + cbp |= !!(*(uint16_t*)(res+4) | *(uint16_t*)(res+stride+4)) << 2; + cbp |= !!(*(uint16_t*)(res+6) | *(uint16_t*)(res+stride+6)) << 3; + res += 2*stride; + cbp |= !!(*(uint16_t*)(res+0) | *(uint16_t*)(res+stride+0)) << 4; + cbp |= !!(*(uint16_t*)(res+2) | *(uint16_t*)(res+stride+2)) << 5; + cbp |= !!(*(uint16_t*)(res+4) | *(uint16_t*)(res+stride+4)) << 6; + cbp |= !!(*(uint16_t*)(res+6) | *(uint16_t*)(res+stride+6)) << 7; + return cbp; +} + +static int sum_abs_int8(int8_t *buf, intptr_t stride) { + int sum=0; +#if HAVE_MMX + DECLARE_ALIGNED_8(static const uint64_t, pb_128) = 0x8080808080808080ULL; + __asm__ volatile( + "movq %4, %%mm7 \n" + "movq (%0), %%mm0 \n" + "movq (%0,%2), %%mm1 \n" + "movq (%0,%2,2), %%mm2 \n" + "movq (%0,%3), %%mm3 \n" + "lea (%0,%2,4), %0 \n" + "paddb %%mm7, %%mm0 \n" + "paddb %%mm7, %%mm1 \n" + "paddb %%mm7, %%mm2 \n" + "paddb %%mm7, %%mm3 \n" + "psadbw %%mm7, %%mm0 \n" + "psadbw %%mm7, %%mm1 \n" + "psadbw %%mm7, %%mm2 \n" + "psadbw %%mm7, %%mm3 \n" + "paddw %%mm2, %%mm0 \n" + "paddw %%mm3, %%mm1 \n" + "movq (%0), %%mm4 \n" + "movq (%0,%2), %%mm5 \n" + "movq (%0,%2,2), %%mm2 \n" + "movq (%0,%3), %%mm3 \n" + "paddb %%mm7, %%mm4 \n" + "paddb %%mm7, %%mm5 \n" + "paddb %%mm7, %%mm2 \n" + "paddb %%mm7, %%mm3 \n" + "psadbw %%mm7, %%mm4 \n" + "psadbw %%mm7, %%mm5 \n" + "psadbw %%mm7, %%mm2 \n" + "psadbw %%mm7, %%mm3 \n" + "paddw %%mm4, %%mm0 \n" + "paddw %%mm5, %%mm1 \n" + "paddw %%mm2, %%mm0 \n" + "paddw %%mm3, %%mm1 \n" + "paddw %%mm1, %%mm0 \n" + "movd %%mm0, %1 \n" + :"+&r"(buf), "=r"(sum) + :"r"(stride), "r"(stride*3), "m"(pb_128) + ); +#else + int x, y; + for(y=0; y<8; y++, buf+=stride) + for(x=0; x<8; x++) + sum += abs(buf[x]); +#endif + return sum; +} + +static void sub_block(uint8_t *dst, uint8_t *src, uint8_t *ref, intptr_t stride) { + __asm__ volatile( + "movq (%1), %%mm0 \n" + "movq (%1,%3), %%mm1 \n" + "movq (%1,%3,2), %%mm2 \n" + "movq (%1,%4), %%mm3 \n" + "psubb (%2), %%mm0 \n" + "psubb (%2,%3), %%mm1 \n" + "psubb (%2,%3,2), %%mm2 \n" + "psubb (%2,%4), %%mm3 \n" + "movq %%mm0, (%0) \n" + "movq %%mm1, (%0,%3) \n" + "movq %%mm2, (%0,%3,2) \n" + "movq %%mm3, (%0,%4) \n" + "lea (%0,%3,4), %0 \n" + "lea (%1,%3,4), %1 \n" + "lea (%2,%3,4), %2 \n" + "movq (%1), %%mm0 \n" + "movq (%1,%3), %%mm1 \n" + "movq (%1,%3,2), %%mm2 \n" + "movq (%1,%4), %%mm3 \n" + "psubb (%2), %%mm0 \n" + "psubb (%2,%3), %%mm1 \n" + "psubb (%2,%3,2), %%mm2 \n" + "psubb (%2,%4), %%mm3 \n" + "movq %%mm0, (%0) \n" + "movq %%mm1, (%0,%3) \n" + "movq %%mm2, (%0,%3,2) \n" + "movq %%mm3, (%0,%4) \n" + :"+&r"(dst), "+&r"(src), "+&r"(ref) + :"r"(stride), "r"(stride*3) + :"memory" + ); +} + +static void add_ffv2_median_prediction(uint8_t *dst, uint8_t *top, uint8_t *residual, int width, int l, int tl) { + int x; + for(x=0; x