X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=common%2Fcabac.c;h=75eee3f042302cff9048b93920ac57c4f2316272;hb=94b9141609d17ebbeb3184a8a5fc0660725a4cf2;hp=ad5d203f0cc87dccabb5f05c6144ff2c3e3e386f;hpb=aaced0861e76767a5c0ce24a94214a261d9eb459;p=x264 diff --git a/common/cabac.c b/common/cabac.c index ad5d203f..75eee3f0 100644 --- a/common/cabac.c +++ b/common/cabac.c @@ -1,10 +1,11 @@ /***************************************************************************** - * cabac.c: h264 encoder library + * cabac.c: arithmetic coder ***************************************************************************** - * Copyright (C) 2003 Laurent Aimar - * $Id: cabac.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $ + * Copyright (C) 2003-2011 x264 project * * Authors: Laurent Aimar + * Loren Merritt + * Fiona Glaser * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,7 +19,10 @@ * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. *****************************************************************************/ #include "common.h" @@ -664,187 +668,127 @@ static const int8_t x264_cabac_context_init_PB[3][460][2] = } }; -/* FIXME could avoid this duplication by reversing the order of states - * with MPS=0, but that would uglify the other tables */ -const uint8_t x264_cabac_range_lps[128][4] = +const uint8_t x264_cabac_range_lps[64][4] = { - { 2, 2, 2, 2 }, - { 6, 7, 8, 9 }, { 6, 7, 9, 10 }, { 6, 8, 9, 11 }, - { 7, 8, 10, 11 }, { 7, 9, 10, 12 }, { 7, 9, 11, 12 }, - { 8, 9, 11, 13 }, { 8, 10, 12, 14 }, { 9, 11, 12, 14 }, - { 9, 11, 13, 15 }, { 10, 12, 14, 16 }, { 10, 12, 15, 17 }, - { 11, 13, 15, 18 }, { 11, 14, 16, 19 }, { 12, 14, 17, 20 }, - { 12, 15, 18, 21 }, { 13, 16, 19, 22 }, { 14, 17, 20, 23 }, - { 14, 18, 21, 24 }, { 15, 19, 22, 25 }, { 16, 20, 23, 27 }, - { 17, 21, 25, 28 }, { 18, 22, 26, 30 }, { 19, 23, 27, 31 }, - { 20, 24, 29, 33 }, { 21, 26, 30, 35 }, { 22, 27, 32, 37 }, - { 23, 28, 33, 39 }, { 24, 30, 35, 41 }, { 26, 31, 37, 43 }, - { 27, 33, 39, 45 }, { 29, 35, 41, 48 }, { 30, 37, 43, 50 }, - { 32, 39, 46, 53 }, { 33, 41, 48, 56 }, { 35, 43, 51, 59 }, - { 37, 45, 54, 62 }, { 39, 48, 56, 65 }, { 41, 50, 59, 69 }, - { 43, 53, 63, 72 }, { 46, 56, 66, 76 }, { 48, 59, 69, 80 }, - { 51, 62, 73, 85 }, { 53, 65, 77, 89 }, { 56, 69, 81, 94 }, - { 59, 72, 86, 99 }, { 62, 76, 90, 104 }, { 66, 80, 95, 110 }, - { 69, 85, 100, 116 }, { 73, 89, 105, 122 }, { 77, 94, 111, 128 }, - { 81, 99, 117, 135 }, { 85, 104, 123, 142 }, { 90, 110, 130, 150 }, - { 95, 116, 137, 158 }, { 100, 122, 144, 166 }, { 105, 128, 152, 175 }, - { 111, 135, 160, 185 }, { 116, 142, 169, 195 }, { 123, 150, 178, 205 }, - { 128, 158, 187, 216 }, { 128, 167, 197, 227 }, { 128, 176, 208, 240 }, - - { 128, 176, 208, 240 }, { 128, 167, 197, 227 }, { 128, 158, 187, 216 }, - { 123, 150, 178, 205 }, { 116, 142, 169, 195 }, { 111, 135, 160, 185 }, - { 105, 128, 152, 175 }, { 100, 122, 144, 166 }, { 95, 116, 137, 158 }, - { 90, 110, 130, 150 }, { 85, 104, 123, 142 }, { 81, 99, 117, 135 }, - { 77, 94, 111, 128 }, { 73, 89, 105, 122 }, { 69, 85, 100, 116 }, - { 66, 80, 95, 110 }, { 62, 76, 90, 104 }, { 59, 72, 86, 99 }, - { 56, 69, 81, 94 }, { 53, 65, 77, 89 }, { 51, 62, 73, 85 }, - { 48, 59, 69, 80 }, { 46, 56, 66, 76 }, { 43, 53, 63, 72 }, - { 41, 50, 59, 69 }, { 39, 48, 56, 65 }, { 37, 45, 54, 62 }, - { 35, 43, 51, 59 }, { 33, 41, 48, 56 }, { 32, 39, 46, 53 }, - { 30, 37, 43, 50 }, { 29, 35, 41, 48 }, { 27, 33, 39, 45 }, - { 26, 31, 37, 43 }, { 24, 30, 35, 41 }, { 23, 28, 33, 39 }, - { 22, 27, 32, 37 }, { 21, 26, 30, 35 }, { 20, 24, 29, 33 }, - { 19, 23, 27, 31 }, { 18, 22, 26, 30 }, { 17, 21, 25, 28 }, - { 16, 20, 23, 27 }, { 15, 19, 22, 25 }, { 14, 18, 21, 24 }, - { 14, 17, 20, 23 }, { 13, 16, 19, 22 }, { 12, 15, 18, 21 }, - { 12, 14, 17, 20 }, { 11, 14, 16, 19 }, { 11, 13, 15, 18 }, - { 10, 12, 15, 17 }, { 10, 12, 14, 16 }, { 9, 11, 13, 15 }, - { 9, 11, 12, 14 }, { 8, 10, 12, 14 }, { 8, 9, 11, 13 }, - { 7, 9, 11, 12 }, { 7, 9, 10, 12 }, { 7, 8, 10, 11 }, - { 6, 8, 9, 11 }, { 6, 7, 9, 10 }, { 6, 7, 8, 9 }, - { 2, 2, 2, 2 }, + { 2, 2, 2, 2}, { 6, 7, 8, 9}, { 6, 7, 9, 10}, { 6, 8, 9, 11}, + { 7, 8, 10, 11}, { 7, 9, 10, 12}, { 7, 9, 11, 12}, { 8, 9, 11, 13}, + { 8, 10, 12, 14}, { 9, 11, 12, 14}, { 9, 11, 13, 15}, { 10, 12, 14, 16}, + { 10, 12, 15, 17}, { 11, 13, 15, 18}, { 11, 14, 16, 19}, { 12, 14, 17, 20}, + { 12, 15, 18, 21}, { 13, 16, 19, 22}, { 14, 17, 20, 23}, { 14, 18, 21, 24}, + { 15, 19, 22, 25}, { 16, 20, 23, 27}, { 17, 21, 25, 28}, { 18, 22, 26, 30}, + { 19, 23, 27, 31}, { 20, 24, 29, 33}, { 21, 26, 30, 35}, { 22, 27, 32, 37}, + { 23, 28, 33, 39}, { 24, 30, 35, 41}, { 26, 31, 37, 43}, { 27, 33, 39, 45}, + { 29, 35, 41, 48}, { 30, 37, 43, 50}, { 32, 39, 46, 53}, { 33, 41, 48, 56}, + { 35, 43, 51, 59}, { 37, 45, 54, 62}, { 39, 48, 56, 65}, { 41, 50, 59, 69}, + { 43, 53, 63, 72}, { 46, 56, 66, 76}, { 48, 59, 69, 80}, { 51, 62, 73, 85}, + { 53, 65, 77, 89}, { 56, 69, 81, 94}, { 59, 72, 86, 99}, { 62, 76, 90, 104}, + { 66, 80, 95, 110}, { 69, 85, 100, 116}, { 73, 89, 105, 122}, { 77, 94, 111, 128}, + { 81, 99, 117, 135}, { 85, 104, 123, 142}, { 90, 110, 130, 150}, { 95, 116, 137, 158}, + {100, 122, 144, 166}, {105, 128, 152, 175}, {111, 135, 160, 185}, {116, 142, 169, 195}, + {123, 150, 178, 205}, {128, 158, 187, 216}, {128, 167, 197, 227}, {128, 176, 208, 240} }; const uint8_t x264_cabac_transition[128][2] = { - { 0, 0}, { 1, 25}, { 1, 25}, { 2, 26}, { 3, 26}, { 4, 26}, { 5, 27}, { 6, 27}, - { 7, 27}, { 8, 28}, { 9, 28}, { 10, 28}, { 11, 29}, { 12, 29}, { 13, 30}, { 14, 30}, - { 15, 30}, { 16, 31}, { 17, 31}, { 18, 32}, { 19, 33}, { 20, 33}, { 21, 33}, { 22, 34}, - { 23, 34}, { 24, 35}, { 25, 36}, { 26, 36}, { 27, 37}, { 28, 37}, { 29, 38}, { 30, 39}, - { 31, 39}, { 32, 40}, { 33, 41}, { 34, 41}, { 35, 42}, { 36, 42}, { 37, 44}, { 38, 44}, - { 39, 45}, { 40, 45}, { 41, 47}, { 42, 47}, { 43, 48}, { 44, 48}, { 45, 50}, { 46, 50}, - { 47, 51}, { 48, 52}, { 49, 52}, { 50, 54}, { 51, 54}, { 52, 55}, { 53, 56}, { 54, 57}, - { 55, 58}, { 56, 59}, { 57, 59}, { 58, 61}, { 59, 61}, { 60, 62}, { 61, 63}, { 62, 64}, - { 63, 65}, { 64, 66}, { 65, 67}, { 66, 68}, { 66, 69}, { 68, 70}, { 68, 71}, { 69, 72}, - { 70, 73}, { 71, 74}, { 72, 75}, { 73, 76}, { 73, 77}, { 75, 78}, { 75, 79}, { 76, 80}, - { 77, 81}, { 77, 82}, { 79, 83}, { 79, 84}, { 80, 85}, { 80, 86}, { 82, 87}, { 82, 88}, - { 83, 89}, { 83, 90}, { 85, 91}, { 85, 92}, { 86, 93}, { 86, 94}, { 87, 95}, { 88, 96}, - { 88, 97}, { 89, 98}, { 90, 99}, { 90,100}, { 91,101}, { 91,102}, { 92,103}, { 93,104}, - { 93,105}, { 94,106}, { 94,107}, { 94,108}, { 95,109}, { 96,110}, { 96,111}, { 97,112}, - { 97,113}, { 97,114}, { 98,115}, { 98,116}, { 99,117}, { 99,118}, { 99,119}, {100,120}, - {100,121}, {100,122}, {101,123}, {101,124}, {101,125}, {102,126}, {102,126}, {127,127}, -}; - -const uint8_t x264_cabac_renorm_shift[64]= { - 6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + { 0, 0}, { 1, 1}, { 2, 50}, { 51, 3}, { 2, 50}, { 51, 3}, { 4, 52}, { 53, 5}, + { 6, 52}, { 53, 7}, { 8, 52}, { 53, 9}, { 10, 54}, { 55, 11}, { 12, 54}, { 55, 13}, + { 14, 54}, { 55, 15}, { 16, 56}, { 57, 17}, { 18, 56}, { 57, 19}, { 20, 56}, { 57, 21}, + { 22, 58}, { 59, 23}, { 24, 58}, { 59, 25}, { 26, 60}, { 61, 27}, { 28, 60}, { 61, 29}, + { 30, 60}, { 61, 31}, { 32, 62}, { 63, 33}, { 34, 62}, { 63, 35}, { 36, 64}, { 65, 37}, + { 38, 66}, { 67, 39}, { 40, 66}, { 67, 41}, { 42, 66}, { 67, 43}, { 44, 68}, { 69, 45}, + { 46, 68}, { 69, 47}, { 48, 70}, { 71, 49}, { 50, 72}, { 73, 51}, { 52, 72}, { 73, 53}, + { 54, 74}, { 75, 55}, { 56, 74}, { 75, 57}, { 58, 76}, { 77, 59}, { 60, 78}, { 79, 61}, + { 62, 78}, { 79, 63}, { 64, 80}, { 81, 65}, { 66, 82}, { 83, 67}, { 68, 82}, { 83, 69}, + { 70, 84}, { 85, 71}, { 72, 84}, { 85, 73}, { 74, 88}, { 89, 75}, { 76, 88}, { 89, 77}, + { 78, 90}, { 91, 79}, { 80, 90}, { 91, 81}, { 82, 94}, { 95, 83}, { 84, 94}, { 95, 85}, + { 86, 96}, { 97, 87}, { 88, 96}, { 97, 89}, { 90, 100}, {101, 91}, { 92, 100}, {101, 93}, + { 94, 102}, {103, 95}, { 96, 104}, {105, 97}, { 98, 104}, {105, 99}, {100, 108}, {109, 101}, + {102, 108}, {109, 103}, {104, 110}, {111, 105}, {106, 112}, {113, 107}, {108, 114}, {115, 109}, + {110, 116}, {117, 111}, {112, 118}, {119, 113}, {114, 118}, {119, 115}, {116, 122}, {123, 117}, + {118, 122}, {123, 119}, {120, 124}, {125, 121}, {122, 126}, {127, 123}, {124, 127}, {126, 125} }; -static const uint8_t x264_cabac_probability[128] = +const uint8_t x264_cabac_renorm_shift[64] = { - FIX8(0.9812), FIX8(0.9802), FIX8(0.9792), FIX8(0.9781), - FIX8(0.9769), FIX8(0.9757), FIX8(0.9744), FIX8(0.9730), - FIX8(0.9716), FIX8(0.9700), FIX8(0.9684), FIX8(0.9667), - FIX8(0.9650), FIX8(0.9631), FIX8(0.9611), FIX8(0.9590), - FIX8(0.9568), FIX8(0.9545), FIX8(0.9521), FIX8(0.9495), - FIX8(0.9468), FIX8(0.9440), FIX8(0.9410), FIX8(0.9378), - FIX8(0.9345), FIX8(0.9310), FIX8(0.9273), FIX8(0.9234), - FIX8(0.9193), FIX8(0.9150), FIX8(0.9105), FIX8(0.9057), - FIX8(0.9006), FIX8(0.8953), FIX8(0.8897), FIX8(0.8838), - FIX8(0.8776), FIX8(0.8710), FIX8(0.8641), FIX8(0.8569), - FIX8(0.8492), FIX8(0.8411), FIX8(0.8326), FIX8(0.8237), - FIX8(0.8143), FIX8(0.8043), FIX8(0.7938), FIX8(0.7828), - FIX8(0.7712), FIX8(0.7590), FIX8(0.7461), FIX8(0.7325), - FIX8(0.7182), FIX8(0.7031), FIX8(0.6872), FIX8(0.6705), - FIX8(0.6528), FIX8(0.6343), FIX8(0.6147), FIX8(0.5941), - FIX8(0.5724), FIX8(0.5495), FIX8(0.5254), FIX8(0.5000), - FIX8(0.5000), FIX8(0.4746), FIX8(0.4505), FIX8(0.4276), - FIX8(0.4059), FIX8(0.3853), FIX8(0.3657), FIX8(0.3472), - FIX8(0.3295), FIX8(0.3128), FIX8(0.2969), FIX8(0.2818), - FIX8(0.2675), FIX8(0.2539), FIX8(0.2410), FIX8(0.2288), - FIX8(0.2172), FIX8(0.2062), FIX8(0.1957), FIX8(0.1857), - FIX8(0.1763), FIX8(0.1674), FIX8(0.1589), FIX8(0.1508), - FIX8(0.1431), FIX8(0.1359), FIX8(0.1290), FIX8(0.1224), - FIX8(0.1162), FIX8(0.1103), FIX8(0.1047), FIX8(0.0994), - FIX8(0.0943), FIX8(0.0895), FIX8(0.0850), FIX8(0.0807), - FIX8(0.0766), FIX8(0.0727), FIX8(0.0690), FIX8(0.0655), - FIX8(0.0622), FIX8(0.0590), FIX8(0.0560), FIX8(0.0532), - FIX8(0.0505), FIX8(0.0479), FIX8(0.0455), FIX8(0.0432), - FIX8(0.0410), FIX8(0.0389), FIX8(0.0369), FIX8(0.0350), - FIX8(0.0333), FIX8(0.0316), FIX8(0.0300), FIX8(0.0284), - FIX8(0.0270), FIX8(0.0256), FIX8(0.0243), FIX8(0.0231), - FIX8(0.0219), FIX8(0.0208), FIX8(0.0198), FIX8(0.0187) + 6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; + /* -ln2(probability) */ -#define F(a,b) {FIX8(a),FIX8(b)} -const uint16_t x264_cabac_entropy[128][2] = +const uint16_t x264_cabac_entropy[128] = { - F(0.0273,5.7370), F(0.0288,5.6618), F(0.0303,5.5866), F(0.0320,5.5114), - F(0.0337,5.4362), F(0.0355,5.3610), F(0.0375,5.2859), F(0.0395,5.2106), - F(0.0416,5.1354), F(0.0439,5.0602), F(0.0463,4.9851), F(0.0488,4.9099), - F(0.0515,4.8347), F(0.0543,4.7595), F(0.0572,4.6843), F(0.0604,4.6091), - F(0.0637,4.5339), F(0.0671,4.4588), F(0.0708,4.3836), F(0.0747,4.3083), - F(0.0788,4.2332), F(0.0832,4.1580), F(0.0878,4.0828), F(0.0926,4.0076), - F(0.0977,3.9324), F(0.1032,3.8572), F(0.1089,3.7820), F(0.1149,3.7068), - F(0.1214,3.6316), F(0.1282,3.5565), F(0.1353,3.4813), F(0.1429,3.4061), - F(0.1510,3.3309), F(0.1596,3.2557), F(0.1686,3.1805), F(0.1782,3.1053), - F(0.1884,3.0301), F(0.1992,2.9549), F(0.2107,2.8797), F(0.2229,2.8046), - F(0.2358,2.7294), F(0.2496,2.6542), F(0.2642,2.5790), F(0.2798,2.5038), - F(0.2964,2.4286), F(0.3142,2.3534), F(0.3331,2.2782), F(0.3532,2.2030), - F(0.3748,2.1278), F(0.3979,2.0527), F(0.4226,1.9775), F(0.4491,1.9023), - F(0.4776,1.8271), F(0.5082,1.7519), F(0.5412,1.6767), F(0.5768,1.6015), - F(0.6152,1.5263), F(0.6568,1.4511), F(0.7020,1.3759), F(0.7513,1.3008), - F(0.8050,1.2256), F(0.8638,1.1504), F(0.9285,1.0752), F(1.0000,1.0000), - F(1.0000,1.0000), F(1.0752,0.9285), F(1.1504,0.8638), F(1.2256,0.8050), - F(1.3008,0.7513), F(1.3759,0.7020), F(1.4511,0.6568), F(1.5263,0.6152), - F(1.6015,0.5768), F(1.6767,0.5412), F(1.7519,0.5082), F(1.8271,0.4776), - F(1.9023,0.4491), F(1.9775,0.4226), F(2.0527,0.3979), F(2.1278,0.3748), - F(2.2030,0.3532), F(2.2782,0.3331), F(2.3534,0.3142), F(2.4286,0.2964), - F(2.5038,0.2798), F(2.5790,0.2642), F(2.6542,0.2496), F(2.7294,0.2358), - F(2.8046,0.2229), F(2.8797,0.2107), F(2.9549,0.1992), F(3.0301,0.1884), - F(3.1053,0.1782), F(3.1805,0.1686), F(3.2557,0.1596), F(3.3309,0.1510), - F(3.4061,0.1429), F(3.4813,0.1353), F(3.5565,0.1282), F(3.6316,0.1214), - F(3.7068,0.1149), F(3.7820,0.1089), F(3.8572,0.1032), F(3.9324,0.0977), - F(4.0076,0.0926), F(4.0828,0.0878), F(4.1580,0.0832), F(4.2332,0.0788), - F(4.3083,0.0747), F(4.3836,0.0708), F(4.4588,0.0671), F(4.5339,0.0637), - F(4.6091,0.0604), F(4.6843,0.0572), F(4.7595,0.0543), F(4.8347,0.0515), - F(4.9099,0.0488), F(4.9851,0.0463), F(5.0602,0.0439), F(5.1354,0.0416), - F(5.2106,0.0395), F(5.2859,0.0375), F(5.3610,0.0355), F(5.4362,0.0337), - F(5.5114,0.0320), F(5.5866,0.0303), F(5.6618,0.0288), F(5.7370,0.0273), + FIX8(0.0273), FIX8(5.7370), FIX8(0.0288), FIX8(5.6618), + FIX8(0.0303), FIX8(5.5866), FIX8(0.0320), FIX8(5.5114), + FIX8(0.0337), FIX8(5.4362), FIX8(0.0355), FIX8(5.3610), + FIX8(0.0375), FIX8(5.2859), FIX8(0.0395), FIX8(5.2106), + FIX8(0.0416), FIX8(5.1354), FIX8(0.0439), FIX8(5.0602), + FIX8(0.0463), FIX8(4.9851), FIX8(0.0488), FIX8(4.9099), + FIX8(0.0515), FIX8(4.8347), FIX8(0.0543), FIX8(4.7595), + FIX8(0.0572), FIX8(4.6843), FIX8(0.0604), FIX8(4.6091), + FIX8(0.0637), FIX8(4.5339), FIX8(0.0671), FIX8(4.4588), + FIX8(0.0708), FIX8(4.3836), FIX8(0.0747), FIX8(4.3083), + FIX8(0.0788), FIX8(4.2332), FIX8(0.0832), FIX8(4.1580), + FIX8(0.0878), FIX8(4.0828), FIX8(0.0926), FIX8(4.0076), + FIX8(0.0977), FIX8(3.9324), FIX8(0.1032), FIX8(3.8572), + FIX8(0.1089), FIX8(3.7820), FIX8(0.1149), FIX8(3.7068), + FIX8(0.1214), FIX8(3.6316), FIX8(0.1282), FIX8(3.5565), + FIX8(0.1353), FIX8(3.4813), FIX8(0.1429), FIX8(3.4061), + FIX8(0.1510), FIX8(3.3309), FIX8(0.1596), FIX8(3.2557), + FIX8(0.1686), FIX8(3.1805), FIX8(0.1782), FIX8(3.1053), + FIX8(0.1884), FIX8(3.0301), FIX8(0.1992), FIX8(2.9549), + FIX8(0.2107), FIX8(2.8797), FIX8(0.2229), FIX8(2.8046), + FIX8(0.2358), FIX8(2.7294), FIX8(0.2496), FIX8(2.6542), + FIX8(0.2642), FIX8(2.5790), FIX8(0.2798), FIX8(2.5038), + FIX8(0.2964), FIX8(2.4286), FIX8(0.3142), FIX8(2.3534), + FIX8(0.3331), FIX8(2.2782), FIX8(0.3532), FIX8(2.2030), + FIX8(0.3748), FIX8(2.1278), FIX8(0.3979), FIX8(2.0527), + FIX8(0.4226), FIX8(1.9775), FIX8(0.4491), FIX8(1.9023), + FIX8(0.4776), FIX8(1.8271), FIX8(0.5082), FIX8(1.7519), + FIX8(0.5412), FIX8(1.6767), FIX8(0.5768), FIX8(1.6015), + FIX8(0.6152), FIX8(1.5263), FIX8(0.6568), FIX8(1.4511), + FIX8(0.7020), FIX8(1.3759), FIX8(0.7513), FIX8(1.3008), + FIX8(0.8050), FIX8(1.2256), FIX8(0.8638), FIX8(1.1504), + FIX8(0.9285), FIX8(1.0752), FIX8(1.0000), FIX8(1.0000) }; +uint8_t x264_cabac_contexts[4][QP_MAX_SPEC+1][460]; + +void x264_cabac_init( void ) +{ + for( int i = 0; i < 4; i++ ) + { + const int8_t (*cabac_context_init)[460][2] = i == 0 ? &x264_cabac_context_init_I + : &x264_cabac_context_init_PB[i-1]; + for( int qp = 0; qp <= QP_MAX_SPEC; qp++ ) + for( int j = 0; j < 460; j++ ) + { + int state = x264_clip3( (((*cabac_context_init)[j][0] * qp) >> 4) + (*cabac_context_init)[j][1], 1, 126 ); + x264_cabac_contexts[i][qp][j] = (X264_MIN( state, 127-state ) << 1) | (state >> 6); + } + } +} /***************************************************************************** * *****************************************************************************/ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model ) { - const int8_t (*cabac_context_init)[460][2]; - int i; - - if( i_slice_type == SLICE_TYPE_I ) - { - cabac_context_init = &x264_cabac_context_init_I; - } - else - { - cabac_context_init = &x264_cabac_context_init_PB[i_model]; - } - - for( i = 0; i < 460; i++ ) - { - cb->state[i] = x264_clip3( (((*cabac_context_init)[i][0] * i_qp) >> 4) + (*cabac_context_init)[i][1], 1, 126 ); - } + memcpy( cb->state, x264_cabac_contexts[i_slice_type == SLICE_TYPE_I ? 0 : i_model + 1][i_qp], 460 ); } -void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end ) +void x264_cabac_encode_init_core( x264_cabac_t *cb ) { cb->i_low = 0; cb->i_range = 0x01FE; - cb->i_queue = -1; // the first bit will be shifted away and not written + cb->i_queue = -9; // the first bit will be shifted away and not written cb->i_bytes_outstanding = 0; +} + +void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end ) +{ + x264_cabac_encode_init_core( cb ); cb->p_start = p_data; cb->p = p_data; cb->p_end = p_end; @@ -852,22 +796,18 @@ void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end ) static inline void x264_cabac_putbyte( x264_cabac_t *cb ) { - if( cb->i_queue >= 8 ) + if( cb->i_queue >= 0 ) { - int out = cb->i_low >> (cb->i_queue+2); - cb->i_low &= (4<i_queue)-1; + int out = cb->i_low >> (cb->i_queue+10); + cb->i_low &= (0x400<i_queue)-1; cb->i_queue -= 8; if( (out & 0xff) == 0xff ) - { cb->i_bytes_outstanding++; - } else { int carry = out >> 8; int bytes_outstanding = cb->i_bytes_outstanding; - if( cb->p + bytes_outstanding + 1 >= cb->p_end ) - return; // this can't modify before the beginning of the stream because // that would correspond to a probability > 1. // it will write before the beginning of the stream, which is ok @@ -895,13 +835,15 @@ static inline void x264_cabac_encode_renorm( x264_cabac_t *cb ) x264_cabac_putbyte( cb ); } -#ifndef HAVE_MMX -void x264_cabac_encode_decision( x264_cabac_t *cb, int i_ctx, int b ) +/* Making custom versions of this function, even in asm, for the cases where + * b is known to be 0 or 1, proved to be somewhat useful on x86_32 with GCC 3.4 + * but nearly useless with GCC 4.3 and worse than useless on x86_64. */ +void x264_cabac_encode_decision_c( x264_cabac_t *cb, int i_ctx, int b ) { int i_state = cb->state[i_ctx]; - int i_range_lps = x264_cabac_range_lps[i_state][(cb->i_range>>6)&0x03]; + int i_range_lps = x264_cabac_range_lps[i_state>>1][(cb->i_range>>6)-4]; cb->i_range -= i_range_lps; - if( b != (i_state >> 6) ) + if( b != (i_state & 1) ) { cb->i_low += cb->i_range; cb->i_range = i_range_lps; @@ -909,25 +851,29 @@ void x264_cabac_encode_decision( x264_cabac_t *cb, int i_ctx, int b ) cb->state[i_ctx] = x264_cabac_transition[i_state][b]; x264_cabac_encode_renorm( cb ); } -#endif -void x264_cabac_encode_bypass( x264_cabac_t *cb, int b ) +/* Note: b is negated for this function */ +void x264_cabac_encode_bypass_c( x264_cabac_t *cb, int b ) { cb->i_low <<= 1; - cb->i_low += -b & cb->i_range; + cb->i_low += b & cb->i_range; cb->i_queue += 1; x264_cabac_putbyte( cb ); } +static const int bypass_lut[16] = +{ + -1, 0x2, 0x14, 0x68, 0x1d0, 0x7a0, 0x1f40, 0x7e80, + 0x1fd00, 0x7fa00, 0x1ff400, 0x7fe800, 0x1ffd000, 0x7ffa000, 0x1fff4000, 0x7ffe8000 +}; + void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val ) { - int k, i; - uint32_t x; - for( k = exp_bits; val >= (1<i_low <<= i; @@ -938,7 +884,7 @@ void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val ) } while( k > 0 ); } -void x264_cabac_encode_terminal( x264_cabac_t *cb ) +void x264_cabac_encode_terminal_c( x264_cabac_t *cb ) { cb->i_range -= 2; x264_cabac_encode_renorm( cb ); @@ -952,14 +898,11 @@ void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb ) cb->i_queue += 9; x264_cabac_putbyte( cb ); x264_cabac_putbyte( cb ); - cb->i_low <<= 8 - cb->i_queue; + cb->i_low <<= -cb->i_queue; cb->i_low |= (0x35a4e4f5 >> (h->i_frame & 31) & 1) << 10; - cb->i_queue = 8; + cb->i_queue = 0; x264_cabac_putbyte( cb ); - if( cb->p + cb->i_bytes_outstanding + 1 >= cb->p_end ) - return; //FIXME throw an error instead of silently truncating the frame - while( cb->i_bytes_outstanding > 0 ) { *(cb->p++) = 0xff;