diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 9836b47..85a9e8b 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -117,10 +117,10 @@ OBJS-$(CONFIG_EIGHTBPS_DECODER) += 8bps.o OBJS-$(CONFIG_EIGHTSVX_EXP_DECODER) += 8svx.o OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER) += 8svx.o OBJS-$(CONFIG_ESCAPE124_DECODER) += escape124.o -OBJS-$(CONFIG_FFV1_DECODER) += ffv1.o rangecoder.o -OBJS-$(CONFIG_FFV1_ENCODER) += ffv1.o rangecoder.o -OBJS-$(CONFIG_FFV2_DECODER) += ffv2.o rangecoder.o -OBJS-$(CONFIG_FFV2_ENCODER) += ffv2.o rangecoder.o +OBJS-$(CONFIG_FFV1_DECODER) += ffv1.o rangecoder.o cabgt.o +OBJS-$(CONFIG_FFV1_ENCODER) += ffv1.o rangecoder.o cabgt.o +OBJS-$(CONFIG_FFV2_DECODER) += ffv2.o rangecoder.o cabgt.o +OBJS-$(CONFIG_FFV2_ENCODER) += ffv2.o rangecoder.o cabgt.o OBJS-$(CONFIG_FFVHUFF_DECODER) += huffyuv.o OBJS-$(CONFIG_FFVHUFF_ENCODER) += huffyuv.o OBJS-$(CONFIG_FLAC_DECODER) += flacdec.o flacdata.o flac.o diff --git a/libavcodec/bitstream.c b/libavcodec/bitstream.c index 1e24099..03d70ac 100644 --- a/libavcodec/bitstream.c +++ b/libavcodec/bitstream.c @@ -117,13 +117,6 @@ static int alloc_table(VLC *vlc, int size, int use_static) return index; } -static av_always_inline uint32_t bitswap_32(uint32_t x) { - return av_reverse[x&0xFF]<<24 - | av_reverse[(x>>8)&0xFF]<<16 - | av_reverse[(x>>16)&0xFF]<<8 - | av_reverse[x>>24]; -} - typedef struct { uint8_t bits; uint16_t symbol; diff --git a/libavcodec/cabgt.c b/libavcodec/cabgt.c new file mode 100644 index 0000000..b746dd4 --- /dev/null +++ b/libavcodec/cabgt.c @@ -0,0 +1,233 @@ +/* + * Context Adaptive Binary Group Tester + * Copyright (c) 2010 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "avcodec.h" +#include "cabgt.h" +#include "cabgt_data.h" +#include "bytestream.h" + +//FIXME code duplication + +static int swizzle_state(int state) { + return FFMIN(state, 255-state)<<1 | state>>7; +} + +static void swizzle_states(uint8_t (*dst)[2], const uint8_t *zeros, const uint8_t *ones) { + for(int i=0; i<256; i++) { + dst[swizzle_state(i)][0] = swizzle_state(zeros[i]); + dst[swizzle_state(i)][1] = swizzle_state(ones[i]); + } +} + +static void build_cabgt_states(CABGTContext *c, int factor, int max_p){ + uint8_t zero_state[256] = {0}; + uint8_t one_state[256] = {0}; + const int64_t one= 1LL<<32; + int64_t p; + int last_p8, p8, i; + + last_p8= 0; + p= one/2; + for(i=0; i<128; i++){ + p8= (256*p + one/2) >> 32; //FIXME try without the one + if(p8 <= last_p8) p8= last_p8+1; + if(last_p8 && last_p8<256 && p8<=max_p) + one_state[last_p8]= p8; + + p+= ((one-p)*factor + one/2) >> 32; + last_p8= p8; + } + + for(i=256-max_p; i<=max_p; i++){ + if(one_state[i]) + continue; + + p= (i*one + 128) >> 8; + p+= ((one-p)*factor + one/2) >> 32; + p8= (256*p + one/2) >> 32; //FIXME try without the one + if(p8 <= i) p8= i+1; + if(p8 > max_p) p8= max_p; + one_state[ i]= p8; + } + + for(i=1; i<255; i++) + zero_state[i]= 256-one_state[256-i]; + + swizzle_states(c->transition, zero_state, one_state); +} + +static int generate_bits_table(uint8_t *dst, uint8_t *len_table, int size){ + int len, index; + uint32_t bits=1; + + for(len=1; len<=8 && ~bits; len++){ + for(index=size-1; index>=0; index--){ + if(len_table[index] == len) + dst[index] = bits--; + } + bits = bits*2+1; + } + if(~bits){ + av_log(NULL, AV_LOG_ERROR, "Error generating huffman table\n"); + return -1; + } + return 0; +} + +static void alloc_cabgt_common(CABGTContext *c, int encoding){ + memset(c, 0, sizeof(*c)); + build_cabgt_states(c, 0.05*(1LL<<32), 256-8); + for(int group=0; groupgroups[group].codes; + uint8_t *bits = c->groups[group].bits; + uint8_t *lens = c->groups[group].lens; + int i; + for(i=0; i>1) | (1<<31)) >> (31-av_log2(code)); + lens[i] = len; + codes[i] = code; + } + int size = c->groups[group].size = i; + generate_bits_table(bits, lens, size); + if(encoding) { + int max_code = 0; + for(i=0; igroups[group].inv_codes = av_malloc(max_code+1); + memset(c->groups[group].inv_codes, -1, max_code+1); + for(i=0; igroups[group].inv_codes[codes[i]] = i; + } else { + init_vlc_sparse(&c->groups[group].vlc, MAX_VLC_BITS, size, lens, 1, 1, bits, 1, 1, codes, 2, 2, 0); + } + } +} + +void ff_alloc_cabgt_encoder(CABGTContext *c){ + alloc_cabgt_common(c, 1); +} + +void ff_alloc_cabgt_decoder(CABGTContext *c){ + alloc_cabgt_common(c, 0); +} + +void ff_dealloc_cabgt_coder(CABGTContext *c){ + for(int i=0; igroups[i].vlc); + av_freep(&c->groups[i].inv_codes); + } +} + +void ff_init_cabgt_encoder(CABGTContext *c, uint8_t *buf, int buf_size){ + for(int i=0; igroups[i].buffer_index = 0; + c->codes_buffered[0].code = 0; + c->buffer_index_old = 1; + c->buffer_index_new = 1; + init_put_bits(&c->pb, buf, buf_size); +} + +void ff_init_cabgt_decoder(CABGTContext *c, const uint8_t *buf, int buf_size){ + memset(c->codes_read, 0, sizeof(c->codes_read)); + init_get_bits(&c->gb, buf, buf_size); +} + +/** + * + * @return the number of bytes written + */ +int ff_cabgt_terminate(CABGTContext *c){ + int buffer_index = c->buffer_index_old; + while(buffer_index != c->buffer_index_new){ + int group = c->codes_buffered[buffer_index].group; + int code = c->codes_buffered[buffer_index].code; + int codename = c->codes_buffered[buffer_index].codename; + if(code) // FIXME not quite optimal at finding the cheapest extension of the given prefix + do { + code <<= 1; + codename = get_codename(c, group, code); + } while(codename < 0); + put_bits(&c->pb, c->groups[group].lens[codename], c->groups[group].bits[codename]); + buffer_index++; + buffer_index &= MAX_BUFFER-1; + } + + flush_put_bits(&c->pb); + return put_bits_count(&c->pb)/8; +} + +#ifdef TEST +#define SIZE 10240 + +#include "libavutil/lfg.h" + +int main(void){ + CABGTContext c; + uint8_t b[9*SIZE]; + uint8_t r[9*SIZE]; + int i; + uint8_t state[10]= {0}; + AVLFG prng; + + av_lfg_init(&prng, 1); + + ff_alloc_cabgt_encoder(&c); + ff_init_cabgt_encoder(&c, b, SIZE); + + memset(state, 255, sizeof(state)); + + for(i=0; i +#include +#include "libavutil/common.h" +#include "libavutil/internal.h" +#include "get_bits.h" +#include "put_bits.h" + +#define CABGT_NEUTRAL_STATE 255 // 50% probability + +#define MAX_CODES 17 +#define NUM_GROUPS 8 +#define MAX_VLC_BITS 8 +#define MAX_BUFFER 65536 + +typedef struct CABGTContext{ + GetBitContext gb; + PutBitContext pb; + int buffer_index_old; + int buffer_index_new; + uint32_t codes_read[NUM_GROUPS]; + uint8_t transition[256][2]; + struct { + VLC vlc; + int size; + uint16_t codes[MAX_CODES]; + uint8_t bits[MAX_CODES]; + uint8_t lens[MAX_CODES]; + int8_t *inv_codes; + int buffer_index; + } groups[NUM_GROUPS]; + struct { + uint16_t code; + uint8_t codename; + uint8_t group; + } codes_buffered[MAX_BUFFER]; +}CABGTContext; + +void ff_alloc_cabgt_encoder(CABGTContext *c); +void ff_alloc_cabgt_decoder(CABGTContext *c); +void ff_dealloc_cabgt_coder(CABGTContext *c); +void ff_init_cabgt_encoder(CABGTContext *c, uint8_t *buf, int buf_size); +void ff_init_cabgt_decoder(CABGTContext *c, const uint8_t *buf, int buf_size); +int ff_cabgt_terminate(CABGTContext *c); + +static inline int get_codename(CABGTContext *c, int group, int code){ + return c->groups[group].inv_codes[code]; +} + +// codes_buffered[] contains everything from the earliest code that hasn't yet been completed, +// up to the latest code that has been allocated an order in the bitstream. +// it is potentially unbounded, and large buffers are not especially rare; +// an all-black 1080p frame in ffv1 could cause it to buffer 200000 codes. +// +// groups[].buffer_index points to the entry in codes_buffered[] that that group is currently updating. +// if a group finishes a code, groups[].buffer_index isn't updated until it starts the next code. +// this matches the decoder behavior of reading a code only when you need to know a bit from it. +// +// buffer_index_new is the slot that will be allocated to the next group that needs one. +// +// buffer_index_old is the code that is blocking the bitstream writing. +// when this code completes, we immediately write it and any subsequent complete codes. + +static inline void put_cabgt(CABGTContext *c, uint8_t * const state, int bit){ + int group = *state >> 5; + int buffer_index = c->groups[group].buffer_index; + int code = c->codes_buffered[buffer_index].code; + + if(!code) { + buffer_index = c->buffer_index_new & (MAX_BUFFER-1); + c->buffer_index_new = (buffer_index+1) & (MAX_BUFFER-1); + c->groups[group].buffer_index = buffer_index; + c->codes_buffered[buffer_index].group = group; + code = 1; + assert(c->buffer_index_new != c->buffer_index_old); + } + code = (code << 1) + (bit ^ (*state & 1)); + *state = c->transition[*state][bit]; + int codename = get_codename(c, group, code); + if(codename < 0) { + c->codes_buffered[buffer_index].code = code; + } else { + c->codes_buffered[buffer_index].code = 0; + c->codes_buffered[buffer_index].codename = codename; + if(buffer_index == c->buffer_index_old) { + put_bits(&c->pb, c->groups[group].lens[codename], c->groups[group].bits[codename]); + while((buffer_index = (buffer_index+1) & (MAX_BUFFER-1)) != c->buffer_index_new) { + if(c->codes_buffered[buffer_index].code) + break; + group = c->codes_buffered[buffer_index].group; + codename = c->codes_buffered[buffer_index].codename; + put_bits(&c->pb, c->groups[group].lens[codename], c->groups[group].bits[codename]); + } + c->buffer_index_old = buffer_index; + } + } +} + +static inline int get_cabgt(CABGTContext *c, uint8_t * const restrict state){ + unsigned group = *state >> 5; + unsigned code = c->codes_read[group]; + if(__builtin_expect(code <= 1, 0)) + code = (uint16_t)get_vlc2(&c->gb, c->groups[group].vlc.table, MAX_VLC_BITS, 1); + unsigned bit = (code ^ *state) & 1; + *state = c->transition[*state][bit]; + c->codes_read[group] = code >> 1; + return bit; +} + +#endif /* AVCODEC_CABGT_H */ diff --git a/libavcodec/cabgt_data.h b/libavcodec/cabgt_data.h new file mode 100644 index 0000000..8f55732 --- /dev/null +++ b/libavcodec/cabgt_data.h @@ -0,0 +1,179 @@ +/* + * Context Adaptive Binary Group Tester + * Copyright (c) 2010 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "cabgt.h" + +// { huffman length, symbol name } +// symbols are big-endian, as generated by the cabgt encoder. +// the decoder wants little-endian. +// both want lsb-aligned, with a stop-bit in the msb, so only the payload part should be reversed. +static uint16_t cabgt_tables[NUM_GROUPS][MAX_CODES][2] = { +// p=0.0326 +// ideal entropy: 0.207268 +// cost: 0.210906 + {{ 1, 0b1000000000000000 }, + { 4, 0b11 }, + { 5, 0b101 }, + { 5, 0b1001 }, + { 5, 0b10001 }, + { 5, 0b100001 }, + { 5, 0b1000001 }, + { 5, 0b10000001 }, + { 5, 0b100000001 }, + { 5, 0b1000000001 }, + { 5, 0b10000000001 }, + { 5, 0b100000000001 }, + { 5, 0b1000000000001 }, + { 5, 0b10000000000001 }, + { 5, 0b100000000000001 }, + { 5, 0b1000000000000001 }}, + +// p=0.0943 +// ideal entropy: 0.450662 +// cost: 0.451236 + {{ 1, 0b10000000 }, + { 4, 0b10010 }, + { 4, 0b10001 }, + { 4, 0b110000 }, + { 4, 0b101000 }, + { 4, 0b100001 }, + { 4, 0b1000001 }, + { 4, 0b10000001 }, + { 7, 0b111 }, + { 7, 0b1101 }, + { 7, 0b1011 }, + { 7, 0b11001 }, + { 7, 0b10101 }, + { 7, 0b10011 }, + { 7, 0b110001 }, + { 7, 0b101001 }}, + +// p=0.1526 +// ideal entropy: 0.616309 +// cost: 0.617387 + {{ 1, 0b10000 }, + { 3, 0b110 }, + { 3, 0b101 }, + { 3, 0b1001 }, + { 4, 0b10001000 }, + { 6, 0b11100 }, + { 6, 0b100011 }, + { 6, 0b1000101 }, + { 7, 0b10001001 }, + { 8, 0b1111 }, + { 8, 0b11101 }}, + +// p=0.2168 +// ideal entropy: 0.754281 +// cost: 0.755098 + {{ 1, 0b1000 }, + { 3, 0b1100 }, + { 3, 0b1010 }, + { 3, 0b1001 }, + { 5, 0b11100 }, + { 5, 0b11010 }, + { 5, 0b10110 }, + { 7, 0b1111 }, + { 7, 0b11101 }, + { 7, 0b11011 }, + { 7, 0b10111 }}, + +// p=0.2835 +// ideal entropy: 0.860176 +// cost: 0.861949 + {{ 1, 0b100 }, + { 4, 0b101000 }, + { 4, 0b1110 }, + { 4, 0b1101 }, + { 4, 0b1011 }, + { 4, 0b1100000 }, + { 5, 0b110010 }, + { 5, 0b110001 }, + { 5, 0b101010 }, + { 5, 0b101001 }, + { 6, 0b1100001 }, + { 6, 0b11110 }, + { 7, 0b1100110 }, + { 7, 0b1010110 }, + { 7, 0b11111 }, + { 8, 0b1100111 }, + { 8, 0b1010111 }}, + +// p=0.3440 +// ideal entropy: 0.928595 +// cost: 0.930027 + {{ 2, 0b101 }, + { 3, 0b100000 }, + { 3, 0b111 }, + { 4, 0b1101 }, + { 4, 0b110000 }, + { 4, 0b100100 }, + { 4, 0b100010 }, + { 4, 0b100001 }, + { 5, 0b110001 }, + { 5, 0b100101 }, + { 5, 0b100011 }, + { 5, 0b110010 }, + { 5, 0b100110 }, + { 6, 0b110011 }, + { 6, 0b100111 }}, + +// p=0.4100 +// ideal entropy: 0.976500 +// cost: 0.977799 + {{ 2, 0b110 }, + { 2, 0b101 }, + { 3, 0b1001 }, + { 3, 0b10000 }, + { 4, 0b1111 }, + { 4, 0b11100 }, + { 5, 0b1000100 }, + { 6, 0b111011 }, + { 6, 0b1110100 }, + { 6, 0b1000111 }, + { 7, 0b1110101 }, + { 7, 0b10001011 }, + { 7, 0b10001101 }, + { 7, 0b100010100 }, + { 7, 0b100011000 }, + { 8, 0b100010101 }, + { 8, 0b100011001 }}, + +// p=0.4737 +// ideal entropy: 0.998003 +// cost: 1.000000 + {{ 4, 0b10000 }, + { 4, 0b10001 }, + { 4, 0b10010 }, + { 4, 0b10011 }, + { 4, 0b10100 }, + { 4, 0b10101 }, + { 4, 0b10110 }, + { 4, 0b10111 }, + { 4, 0b11000 }, + { 4, 0b11001 }, + { 4, 0b11010 }, + { 4, 0b11011 }, + { 4, 0b11100 }, + { 4, 0b11101 }, + { 4, 0b11110 }, + { 4, 0b11111 }}, +}; diff --git a/libavcodec/cabgt_sim.c b/libavcodec/cabgt_sim.c new file mode 100644 index 0000000..6e4c133 --- /dev/null +++ b/libavcodec/cabgt_sim.c @@ -0,0 +1,234 @@ +#define DEBUG +#include +#include "avcodec.h" +#include "cabgt_sim.h" + +#define MAX_ELEMS 256 +#define MAX_HUFF_DEPTH 8 +#define MAX_GROUP_DEPTH 15 + +typedef struct HeapElem { + uint64_t val; + uint32_t name; +} HeapElem; + +static void heap_sift(HeapElem *h, int root, int size) +{ + int child = root*2+1; + do { + child += h[child].val > h[child+1].val; + if(h[root].val <= h[child].val) + break; + FFSWAP(HeapElem, h[root], h[child]); + root = child; + child = root*2+1; + } while(child < size); +} + +static void build_len_table(uint8_t *dst, uint64_t *stats, int size, int max_depth) { + HeapElem h[size+1]; + int up[2*size]; + int len[2*size]; + int offset, i, next; + + assert(size <= 1ULL<=0; i--) + heap_sift(h, i, size); + + for(next=size; next=size; i--) + len[i] = len[up[i]] + 1; + for(i=0; i max_depth) break; + } + if(i==size) break; + } +} + +typedef struct { + double prob; + uint32_t code; +} TreeElem; + +static int treeelem_cmp(TreeElem *a, TreeElem *b) { + return a->prob > b->prob ? -1 : a->prob < b->prob; +} + +static double cost_len_table(TreeElem *tree, int size, int max_depth, int print) { + uint64_t stats[size]; + uint8_t lens[size]; + for(int i=0; i> 28; + int code = tree2[i].code & ((1<<28)-1); + printf("%.6f %d ", tree2[i].prob, len); + for(int j=av_log2(code)-1; j>=0; j--) + printf("%d", (code>>j)&1); + printf("\n"); + } + printf("\n"); + } + return cost; +} + +#define dump_tree(tree, size) cost_len_table(tree, size, MAX_HUFF_DEPTH, 1) + +static inline int try_tree(TreeElem *tree, int size, TreeElem *btree, int *bsize, double *bcost) { + double cost = cost_len_table(tree, size, 31, 0); + if(cost < *bcost) // limiting huffman length is slow, so don't do it unless we're going to use the score + cost = cost_len_table(tree, size, MAX_HUFF_DEPTH, 0); + if(cost < *bcost) { + *bcost = cost; + *bsize = size; + memcpy(btree, tree, size*sizeof(*tree)); + return 1; + } + return 0; +} + +#define BRUTEFORCE_MAX 18 +static void bruteforce_tables(double p, TreeElem *tree, int size, int left_pos, int max_size, TreeElem *btree, int *bsize, double *bcost) { + for(int i=left_pos; i= MAX_GROUP_DEPTH) + continue; + TreeElem backup = tree[i]; + tree[size].prob = tree[i].prob * p; + tree[size].code = (tree[i].code<<1)+1; + tree[i].prob *= 1-p; + tree[i].code <<= 1; + try_tree(tree, size+1, btree, bsize, bcost); + if(size+1 < max_size) + bruteforce_tables(p, tree, size+1, i, max_size, btree, bsize, bcost); + tree[i] = backup; + } +} + +static double build_grouptest_vlc_table(double p) { + TreeElem tree[MAX_ELEMS]; + TreeElem btree[MAX_ELEMS]; + int size = 2; + tree[0] = (TreeElem){ 1-p, 2 }; + tree[1] = (TreeElem){ p, 3 }; + double bcost = cost_len_table(tree, size, MAX_HUFF_DEPTH, 0); + int bsize = size; + memcpy(btree, tree, size*sizeof(*tree)); + bruteforce_tables(p, tree, size, 0, BRUTEFORCE_MAX, btree, &bsize, &bcost); + size = bsize; + memcpy(tree, btree, bsize*sizeof(*tree)); + while(size<=MAX_ELEMS) { + int improved = 0; + for(int i=0; i= MAX_GROUP_DEPTH) + continue; + TreeElem backup = tree[i]; + tree[size].prob = tree[i].prob * p; + tree[size].code = (tree[i].code<<1)+1; + tree[i].prob *= 1-p; + tree[i].code <<= 1; + if(try_tree(tree, size+1, btree, &bsize, &bcost)) + improved = 1; + tree[i] = backup; + } + if(!improved) + break; + size = bsize; + memcpy(tree, btree, bsize*sizeof(*tree)); + } + printf("p=%.4f\n", p); + printf("ideal entropy: %.6f\n", p*-log2(p)+(1-p)*-log2(1-p)); + dump_tree(tree, size); + return bcost; +} + +static double ideal_entropy(double p) { + return p>0 && p<1 ? p*-log2(p)+(1-p)*-log2(1-p) : 0; +} + +static inline double vlc_entropy(double p) { + int i = p*1000; + double f = p*1000-i; + return (1-f)*gt_entropy[i] + f*gt_entropy[i+1]; +} + +// assumes a uniform distribution of inputs. +// for any other distribution, this would involve an integral and a centroid. +#define part_cost(left,right) ((right-left)*vlc_entropy((right+left)*.5)*2) + +// tweaking partitions gains only a tiny amount of compression from the nonlinearity of the entropy curve. +// it would be more important if the input distribution were nonuniform. +static void build_partitions(int npart) { + double borders[npart+1]; + for(int i=0; i<=npart; i++) + borders[i] = .5*i/npart; + double prev_cost = 0; + for(int pass=0; pass<10; pass++) { + double total_cost = 0; + for(int i=0; i 1) { + $group_bits_tab{$code}[$bits&1]++; + $bits >>= 1; + } + $group_bits_tab{$code}[2] = $group_bits_tab{$code}[0] + $group_bits_tab{$code}[1]; + } +} + +sub dump_codes { + my @codes = @_; +# @codes = sort {$b->[0] <=> $a->[0]} @codes; + my $cost = cost_len_table($huffsize, @codes); + printf "cost %.5f\n", $cost; + my $toggle; + my @lens = build_len_table($huffsize, grep {$toggle ^= 1} @codes); + for(0..$#codes/2) { + my $code = $codes[$_*2+1]; + my $bits = $group_bits_tab{$code}[2]; + $code &= (1<<$bits)-1; + printf "%0*b %d %.5f\n", $bits, $code, $lens[$_], $codes[$_*2+0]; + } + print "\n"; +} + +sub build_grouptest_vlc_table { + my ($p) = @_; + init_group_bits_tab(); + my @codes = (($p,0b11), (1-$p,0b10)); + my $bcost = cost_len_table($huffsize, @codes); + while(1) { + my $improved; + print "new iteration\n"; + dump_codes(@codes); + foreach $i (0..$#codes/2) { + my @codes2 = @codes; + my ($leafp, $leafcode) = splice @codes2, $i*2, 2; + next if $leafcode >= 1<<$groupsize; + push @codes2, ($leafp*$p, ($leafcode<<1)+1); + push @codes2, ($leafp*(1-$p), $leafcode<<1); + my $cost = cost_len_table($huffsize, @codes2); + dump_codes(@codes2); + if($cost < $bcost) { + $improved = 1; + $bcost = $cost; + @bcodes = @codes2; + } + } + last if !$improved; + @codes = @bcodes; + last if @codes == (1<<$huffsize); + } + print "final\n"; + dump_codes(@codes); +} + +__END__ +__C__ + +#include + +#define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0) +#define FFMAX(a,b) ((a) > (b) ? (a) : (b)) + +typedef struct HeapElem { + uint64_t val; + uint32_t name; +} HeapElem; + +static void heap_sift(HeapElem *h, int root, int size) +{ + int child = root*2+1; + do { + child += h[child].val > h[child+1].val; + if(h[root].val <= h[child].val) + break; + FFSWAP(HeapElem, h[root], h[child]); + root = child; + child = root*2+1; + } while(child < size); +} + +void build_len_table_internal(uint8_t *dst, uint64_t *stats, int size, int max_depth) { + HeapElem h[size+1]; + int up[2*size]; + int len[2*size]; + int offset, i, next; + + assert(size <= 1ULL<=0; i--) + heap_sift(h, i, size); + + for(next=size; next=size; i--) + len[i] = len[up[i]] + 1; + for(i=0; i max_depth) break; + } + if(i==size) break; + } +} + +void build_len_table(int max_depth, ...) { + Inline_Stack_Vars; + int n = Inline_Stack_Items-1; + int i; + uint64_t stats[n]; + uint8_t lens[n]; + for(i=0; i>= 1; + int i; + float statsf[n]; + uint64_t stats[n]; + uint32_t codes[n]; + uint8_t lens[n]; + for(i=0; iac){ - if(c->bytestream_end - c->bytestream < w*20){ + if(c->pb.buf_end - c->pb.buf_ptr < w*20){ // not a very good estimate av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); return -1; } @@ -559,7 +567,7 @@ static void write_quant_table(RangeCoder *c, int16_t *quant_table){ int last=0; int i; uint8_t state[CONTEXT_SIZE]; - memset(state, 128, sizeof(state)); + memset(state, CABGT_NEUTRAL_STATE, sizeof(state)); for(i=1; i<128 ; i++){ if(quant_table[i] != quant_table[i-1]){ @@ -575,7 +583,7 @@ static void write_header(FFV1Context *f){ int i; RangeCoder * const c= &f->c; - memset(state, 128, sizeof(state)); + memset(state, CABGT_NEUTRAL_STATE, sizeof(state)); put_symbol(c, state, f->version, 0); put_symbol(c, state, f->avctx->coder_type, 0); @@ -619,6 +627,9 @@ static av_cold int encode_init(AVCodecContext *avctx) s->version=0; s->ac= avctx->coder_type; + if(s->ac) + ff_alloc_cabgt_encoder(&s->c); + s->plane_count=2; for(i=0; i<256; i++){ if(avctx->bits_per_raw_sample <=8){ @@ -707,12 +718,12 @@ static void clear_state(FFV1Context *f){ for(i=0; iplane_count; i++){ PlaneContext *p= &f->plane[i]; - p->interlace_bit_state[0]= 128; - p->interlace_bit_state[1]= 128; + p->interlace_bit_state[0]= CABGT_NEUTRAL_STATE; + p->interlace_bit_state[1]= CABGT_NEUTRAL_STATE; for(j=0; jcontext_count; j++){ if(f->ac){ - memset(p->state[j], 128, sizeof(uint8_t)*CONTEXT_SIZE); + memset(p->state[j], CABGT_NEUTRAL_STATE, sizeof(uint8_t)*CONTEXT_SIZE); }else{ p->vlc_state[j].drift= 0; p->vlc_state[j].error_sum= 4; //FFMAX((RANGE + 32)/64, 2); @@ -732,7 +743,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, const int height= f->height; AVFrame * const p= &f->picture; int used_count= 0; - uint8_t keystate=128; + uint8_t keystate= CABGT_NEUTRAL_STATE; ff_init_range_encoder(c, buf, buf_size); ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); @@ -791,6 +802,8 @@ static av_cold int common_end(AVCodecContext *avctx){ av_freep(&p->vlc_state); } + ff_dealloc_cabgt_coder(&s->c); + return 0; } @@ -935,7 +948,7 @@ static int read_quant_table(RangeCoder *c, int16_t *quant_table, int scale){ int i=0; uint8_t state[CONTEXT_SIZE]; - memset(state, 128, sizeof(state)); + memset(state, CABGT_NEUTRAL_STATE, sizeof(state)); for(v=0; i<128 ; v++){ int len= get_symbol(c, state, 0) + 1; @@ -963,7 +976,7 @@ static int read_header(FFV1Context *f){ int i, context_count; RangeCoder * const c= &f->c; - memset(state, 128, sizeof(state)); + memset(state, CABGT_NEUTRAL_STATE, sizeof(state)); f->version= get_symbol(c, state, 0); f->ac= f->avctx->coder_type= get_symbol(c, state, 0); @@ -1038,10 +1051,12 @@ static int read_header(FFV1Context *f){ static av_cold int decode_init(AVCodecContext *avctx) { -// FFV1Context *s = avctx->priv_data; + FFV1Context *s = avctx->priv_data; common_init(avctx); + ff_alloc_cabgt_decoder(&s->c); + return 0; } @@ -1054,7 +1069,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac const int height= f->height; AVFrame * const p= &f->picture; int bytes_read; - uint8_t keystate= 128; + uint8_t keystate= CABGT_NEUTRAL_STATE; AVFrame *picture = data; @@ -1084,7 +1099,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac av_log(avctx, AV_LOG_ERROR, "keyframe:%d coder:%d\n", p->key_frame, f->ac); if(!f->ac){ - bytes_read = c->bytestream - c->bytestream_start - 1; + bytes_read = get_bits_count(&c->gb)/8; if(bytes_read ==0) av_log(avctx, AV_LOG_ERROR, "error at end of AC stream\n"); //FIXME //printf("pos=%d\n", bytes_read); init_get_bits(&f->gb, buf + bytes_read, buf_size - bytes_read); @@ -1114,7 +1129,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac *data_size = sizeof(AVFrame); if(f->ac){ - bytes_read= c->bytestream - c->bytestream_start - 1; + bytes_read = get_bits_count(&c->gb)/8; if(bytes_read ==0) av_log(f->avctx, AV_LOG_ERROR, "error at end of frame\n"); }else{ bytes_read+= (get_bits_count(&f->gb)+7)/8; diff --git a/libavcodec/rangecoder.c b/libavcodec/rangecoder.c index d750e65..b0b8676 100644 --- a/libavcodec/rangecoder.c +++ b/libavcodec/rangecoder.c @@ -130,12 +130,12 @@ int main(void){ memset(state, 128, sizeof(state)); for(i=0; i #include "config.h" #include "attributes.h" +#include "common.h" #if ARCH_ARM # include "arm/bswap.h" @@ -98,4 +99,11 @@ static inline uint64_t av_const bswap_64(uint64_t x) #define le2me_64(x) (x) #endif +static av_always_inline uint32_t bitswap_32(uint32_t x) { + return av_reverse[x&0xFF]<<24 + | av_reverse[(x>>8)&0xFF]<<16 + | av_reverse[(x>>16)&0xFF]<<8 + | av_reverse[x>>24]; +} + #endif /* AVUTIL_BSWAP_H */