Geant4 Cross Reference |
1 2 // G.Barrand: pure header version of toojpeg found at https://github.com/stbrumme/toojpeg 3 4 // ////////////////////////////////////////////////////////// 5 // toojpeg.cpp 6 // written by Stephan Brumme, 2018-2019 7 // see https://create.stephan-brumme.com/toojpeg/ 8 // 9 10 #include <cstddef> //size_t 11 12 // - the "official" specifications: https://www.w3.org/Graphics/JPEG/itu-t81.pdf and https://www.w3.org/Graphics/JPEG/jfif3.pdf 13 // - Wikipedia has a short description of the JFIF/JPEG file format: https://en.wikipedia.org/wiki/JPEG_File_Interchange_Format 14 // - the popular STB Image library includes Jon's JPEG encoder as well: https://github.com/nothings/stb/blob/master/stb_image_write.h 15 // - the most readable JPEG book (from a developer's perspective) is Miano's "Compressed Image File Formats" (1999, ISBN 0-201-60443-4), 16 // used copies are really cheap nowadays and include a CD with C++ sources as well (plus great format descriptions of GIF & PNG) 17 // - much more detailled is Mitchell/Pennebaker's "JPEG: Still Image Data Compression Standard" (1993, ISBN 0-442-01272-1) 18 // which contains the official JPEG standard, too - fun fact: I bought a signed copy in a second-hand store without noticing 19 20 namespace tools { 21 namespace toojpeg { 22 // //////////////////////////////////////// 23 // data types 24 typedef unsigned char uint8_t; 25 typedef unsigned short uint16_t; 26 typedef short int16_t; 27 typedef int int32_t; // at least four bytes 28 29 // //////////////////////////////////////// 30 // constants 31 32 // quantization tables from JPEG Standard, Annex K 33 const uint8_t DefaultQuantLuminance[8*8] = 34 { 16, 11, 10, 16, 24, 40, 51, 61, // there are a few experts proposing slightly more efficient values, 35 12, 12, 14, 19, 26, 58, 60, 55, // e.g. https://www.imagemagick.org/discourse-server/viewtopic.php?t=20333 36 14, 13, 16, 24, 40, 57, 69, 56, // btw: Google's Guetzli project optimizes the quantization tables per image 37 14, 17, 22, 29, 51, 87, 80, 62, 38 18, 22, 37, 56, 68,109,103, 77, 39 24, 35, 55, 64, 81,104,113, 92, 40 49, 64, 78, 87,103,121,120,101, 41 72, 92, 95, 98,112,100,103, 99 }; 42 const uint8_t DefaultQuantChrominance[8*8] = 43 { 17, 18, 24, 47, 99, 99, 99, 99, 44 18, 21, 26, 66, 99, 99, 99, 99, 45 24, 26, 56, 99, 99, 99, 99, 99, 46 47, 66, 99, 99, 99, 99, 99, 99, 47 99, 99, 99, 99, 99, 99, 99, 99, 48 99, 99, 99, 99, 99, 99, 99, 99, 49 99, 99, 99, 99, 99, 99, 99, 99, 50 99, 99, 99, 99, 99, 99, 99, 99 }; 51 52 // 8x8 blocks are processed in zig-zag order 53 // most encoders use a zig-zag "forward" table, I switched to its inverse for performance reasons 54 // note: ZigZagInv[ZigZag[i]] = i 55 const uint8_t ZigZagInv[8*8] = 56 { 0, 1, 8,16, 9, 2, 3,10, // ZigZag[] = 0, 1, 5, 6,14,15,27,28, 57 17,24,32,25,18,11, 4, 5, // 2, 4, 7,13,16,26,29,42, 58 12,19,26,33,40,48,41,34, // 3, 8,12,17,25,30,41,43, 59 27,20,13, 6, 7,14,21,28, // 9,11,18,24,31,40,44,53, 60 35,42,49,56,57,50,43,36, // 10,19,23,32,39,45,52,54, 61 29,22,15,23,30,37,44,51, // 20,22,33,38,46,51,55,60, 62 58,59,52,45,38,31,39,46, // 21,34,37,47,50,56,59,61, 63 53,60,61,54,47,55,62,63 }; // 35,36,48,49,57,58,62,63 64 65 // static Huffman code tables from JPEG standard Annex K 66 // - CodesPerBitsize tables define how many Huffman codes will have a certain bitsize (plus 1 because there nothing with zero bits), 67 // e.g. DcLuminanceCodesPerBitsize[2] = 5 because there are 5 Huffman codes being 2+1=3 bits long 68 // - Values tables are a list of values ordered by their Huffman code bitsize, 69 // e.g. AcLuminanceValues => Huffman(0x01,0x02 and 0x03) will have 2 bits, Huffman(0x00) will have 3 bits, Huffman(0x04,0x11 and 0x05) will have 4 bits, ... 70 71 // Huffman definitions for first DC/AC tables (luminance / Y channel) 72 const uint8_t DcLuminanceCodesPerBitsize[16] = { 0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 }; // sum = 12 73 const uint8_t DcLuminanceValues [12] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; // => 12 codes 74 const uint8_t AcLuminanceCodesPerBitsize[16] = { 0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,125 }; // sum = 162 75 const uint8_t AcLuminanceValues [162] = // => 162 codes 76 { 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08, // 16*10+2 symbols because 77 0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0,0x24,0x33,0x62,0x72,0x82,0x09,0x0A,0x16,0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28, // upper 4 bits can be 0..F 78 0x29,0x2A,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,0x59, // while lower 4 bits can be 1..A 79 0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89, // plus two special codes 0x00 and 0xF0 80 0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6, // order of these symbols was determined empirically by JPEG committee 81 0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xE1,0xE2, 82 0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA }; 83 // Huffman definitions for second DC/AC tables (chrominance / Cb and Cr channels) 84 const uint8_t DcChrominanceCodesPerBitsize[16] = { 0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; // sum = 12 85 const uint8_t DcChrominanceValues [12] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; // => 12 codes (identical to DcLuminanceValues) 86 const uint8_t AcChrominanceCodesPerBitsize[16] = { 0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,119 }; // sum = 162 87 const uint8_t AcChrominanceValues [162] = // => 162 codes 88 { 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, // same number of symbol, just different order 89 0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0,0x15,0x62,0x72,0xD1,0x0A,0x16,0x24,0x34,0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26, // (which is more efficient for AC coding) 90 0x27,0x28,0x29,0x2A,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58, 91 0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87, 92 0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4, 93 0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA, 94 0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA }; 95 const int16_t CodeWordLimit = 2048; // +/-2^11, maximum value after DCT 96 97 // //////////////////////////////////////// 98 // structs 99 100 // represent a single Huffman code 101 struct BitCode 102 { 103 //BitCode() = default; // undefined state, must be initialized at a later time 104 BitCode():code(0),numBits(0) {} 105 BitCode(const BitCode& a_from):code(a_from.code),numBits(a_from.numBits) {} 106 BitCode& operator=(const BitCode& a_from) { 107 code = a_from.code; 108 numBits = a_from.numBits; 109 return *this; 110 } 111 112 BitCode(uint16_t code_, uint8_t numBits_) 113 : code(code_), numBits(numBits_) {} 114 uint16_t code; // JPEG's Huffman codes are limited to 16 bits 115 uint8_t numBits; // number of valid bits 116 }; 117 118 // wrapper for bit output operations 119 struct BitWriter 120 { 121 // user-supplied callback that writes/stores one byte 122 WRITE_ONE_BYTE output; 123 void* tag; 124 // initialize writer 125 explicit BitWriter(WRITE_ONE_BYTE output_,void* tag_) : output(output_),tag(tag_) { 126 buffer.data = 0; 127 buffer.numBits = 0; 128 } 129 130 // store the most recently encoded bits that are not written yet 131 struct BitBuffer 132 { 133 int32_t data /*= 0*/; // actually only at most 24 bits are used 134 uint8_t numBits /*= 0*/; // number of valid bits (the right-most bits) 135 } buffer; 136 137 // write Huffman bits stored in BitCode, keep excess bits in BitBuffer 138 BitWriter& operator<<(const BitCode& data) 139 { 140 // append the new bits to those bits leftover from previous call(s) 141 buffer.numBits += data.numBits; 142 buffer.data <<= data.numBits; 143 buffer.data |= data.code; 144 145 // write all "full" bytes 146 while (buffer.numBits >= 8) 147 { 148 // extract highest 8 bits 149 buffer.numBits -= 8; 150 uint8_t oneByte = uint8_t(buffer.data >> buffer.numBits); 151 output(oneByte,tag); 152 153 if (oneByte == 0xFF) // 0xFF has a special meaning for JPEGs (it's a block marker) 154 output(0,tag); // therefore pad a zero to indicate "nope, this one ain't a marker, it's just a coincidence" 155 156 // note: I don't clear those written bits, therefore buffer.bits may contain garbage in the high bits 157 // if you really want to "clean up" (e.g. for debugging purposes) then uncomment the following line 158 //buffer.bits &= (1 << buffer.numBits) - 1; 159 } 160 return *this; 161 } 162 163 // write all non-yet-written bits, fill gaps with 1s (that's a strange JPEG thing) 164 void flush() 165 { 166 // at most seven set bits needed to "fill" the last byte: 0x7F = binary 0111 1111 167 *this << BitCode(0x7F, 7); // I should set buffer.numBits = 0 but since there are no single bits written after flush() I can safely ignore it 168 } 169 170 // NOTE: all the following BitWriter functions IGNORE the BitBuffer and write straight to output ! 171 // write a single byte 172 BitWriter& operator<<(uint8_t oneByte) 173 { 174 output(oneByte,tag); 175 return *this; 176 } 177 178 // write an array of bytes 179 template <typename T, int Size> 180 BitWriter& operator<<(T (&manyBytes)[Size]) 181 { 182 //for (auto c : manyBytes) 183 // output(c); 184 for(size_t i=0;i<Size;i++) output(manyBytes[i],tag); 185 return *this; 186 } 187 188 // start a new JFIF block 189 void addMarker(uint8_t id, uint16_t length) 190 { 191 output(0xFF,tag); output(id,tag); // ID, always preceded by 0xFF 192 output(uint8_t(length >> 8),tag); // length of the block (big-endian, includes the 2 length bytes as well) 193 output(uint8_t(length & 0xFF),tag); 194 } 195 }; 196 197 // //////////////////////////////////////// 198 // functions / templates 199 200 // same as std::min() 201 template <typename Number> 202 inline Number minimum(Number value, Number maximum) 203 { 204 return value <= maximum ? value : maximum; 205 } 206 207 // restrict a value to the interval [minimum, maximum] 208 template <typename Number, typename Limit> 209 inline Number clamp(Number value, Limit minValue, Limit maxValue) 210 { 211 if (value <= minValue) return minValue; // never smaller than the minimum 212 if (value >= maxValue) return maxValue; // never bigger than the maximum 213 return value; // value was inside interval, keep it 214 } 215 216 // convert from RGB to YCbCr, constants are similar to ITU-R, see https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion 217 inline float rgb2y (float r, float g, float b) { return +0.299f * r +0.587f * g +0.114f * b; } 218 inline float rgb2cb(float r, float g, float b) { return -0.16874f * r -0.33126f * g +0.5f * b; } 219 inline float rgb2cr(float r, float g, float b) { return +0.5f * r -0.41869f * g -0.08131f * b; } 220 221 // forward DCT computation "in one dimension" (fast AAN algorithm by Arai, Agui and Nakajima: "A fast DCT-SQ scheme for images") 222 inline void DCT(float block[8*8], uint8_t stride) // stride must be 1 (=horizontal) or 8 (=vertical) 223 { 224 const float SqrtHalfSqrt = 1.306562965f; // sqrt((2 + sqrt(2)) / 2) = cos(pi * 1 / 8) * sqrt(2) 225 const float InvSqrt = 0.707106781f; // 1 / sqrt(2) = cos(pi * 2 / 8) 226 const float HalfSqrtSqrt = 0.382683432f; // sqrt(2 - sqrt(2)) / 2 = cos(pi * 3 / 8) 227 const float InvSqrtSqrt = 0.541196100f; // 1 / sqrt(2 - sqrt(2)) = cos(pi * 3 / 8) * sqrt(2) 228 229 // modify in-place 230 float& block0 = block[0 ]; 231 float& block1 = block[1 * stride]; 232 float& block2 = block[2 * stride]; 233 float& block3 = block[3 * stride]; 234 float& block4 = block[4 * stride]; 235 float& block5 = block[5 * stride]; 236 float& block6 = block[6 * stride]; 237 float& block7 = block[7 * stride]; 238 239 // based on https://dev.w3.org/Amaya/libjpeg/jfdctflt.c , the original variable names can be found in my comments 240 float add07 = block0 + block7; float sub07 = block0 - block7; // tmp0, tmp7 241 float add16 = block1 + block6; float sub16 = block1 - block6; // tmp1, tmp6 242 float add25 = block2 + block5; float sub25 = block2 - block5; // tmp2, tmp5 243 float add34 = block3 + block4; float sub34 = block3 - block4; // tmp3, tmp4 244 245 float add0347 = add07 + add34; float sub07_34 = add07 - add34; // tmp10, tmp13 ("even part" / "phase 2") 246 float add1256 = add16 + add25; float sub16_25 = add16 - add25; // tmp11, tmp12 247 248 block0 = add0347 + add1256; block4 = add0347 - add1256; // "phase 3" 249 250 float z1 = (sub16_25 + sub07_34) * InvSqrt; // all temporary z-variables kept their original names 251 block2 = sub07_34 + z1; block6 = sub07_34 - z1; // "phase 5" 252 253 float sub23_45 = sub25 + sub34; // tmp10 ("odd part" / "phase 2") 254 float sub12_56 = sub16 + sub25; // tmp11 255 float sub01_67 = sub16 + sub07; // tmp12 256 257 float z5 = (sub23_45 - sub01_67) * HalfSqrtSqrt; 258 float z2 = sub23_45 * InvSqrtSqrt + z5; 259 float z3 = sub12_56 * InvSqrt; 260 float z4 = sub01_67 * SqrtHalfSqrt + z5; 261 float z6 = sub07 + z3; // z11 ("phase 5") 262 float z7 = sub07 - z3; // z13 263 block1 = z6 + z4; block7 = z6 - z4; // "phase 6" 264 block5 = z7 + z2; block3 = z7 - z2; 265 } 266 267 // run DCT, quantize and write Huffman bit codes 268 inline int16_t encodeBlock(BitWriter& writer, float block[8][8], const float scaled[8*8], int16_t lastDC, 269 const BitCode huffmanDC[256], const BitCode huffmanAC[256], const BitCode* codewords) 270 { 271 // "linearize" the 8x8 block, treat it as a flat array of 64 floats 272 float* block64 = (float*) block; 273 274 // DCT: rows 275 for (size_t offset = 0; offset < 8; offset++) 276 DCT(block64 + offset*8, 1); 277 // DCT: columns 278 for (size_t offset = 0; offset < 8; offset++) 279 DCT(block64 + offset*1, 8); 280 281 // scale 282 for (size_t i = 0; i < 8*8; i++) 283 block64[i] *= scaled[i]; 284 285 // encode DC (the first coefficient is the "average color" of the 8x8 block) 286 int DC = int(block64[0] + (block64[0] >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect 287 288 // quantize and zigzag the other 63 coefficients 289 size_t posNonZero = 0; // find last coefficient which is not zero (because trailing zeros are encoded differently) 290 int16_t quantized[8*8]; 291 for (size_t i = 1; i < 8*8; i++) // start at 1 because block64[0]=DC was already processed 292 { 293 float value = block64[ZigZagInv[i]]; 294 // round to nearest integer 295 quantized[i] = int(value + (value >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect 296 // remember offset of last non-zero coefficient 297 if (quantized[i] != 0) 298 posNonZero = i; 299 } 300 301 // same "average color" as previous block ? 302 int diff = DC - lastDC; 303 if (diff == 0) 304 writer << huffmanDC[0x00]; // yes, write a special short symbol 305 else 306 { 307 const BitCode bits = codewords[diff]; // nope, encode the difference to previous block's average color 308 writer << huffmanDC[bits.numBits] << bits; 309 } 310 311 // encode ACs (quantized[1..63]) 312 size_t offset = 0; // upper 4 bits count the number of consecutive zeros 313 for (size_t i = 1; i <= posNonZero; i++) // quantized[0] was already written, skip all trailing zeros, too 314 { 315 // zeros are encoded in a special way 316 while (quantized[i] == 0) // found another zero ? 317 { 318 offset += 0x10; // add 1 to the upper 4 bits 319 // split into blocks of at most 16 consecutive zeros 320 if (offset > 0xF0) // remember, the counter is in the upper 4 bits, 0xF = 15 321 { 322 writer << huffmanAC[0xF0]; // 0xF0 is a special code for "16 zeros" 323 offset = 0; 324 } 325 i++; 326 } 327 328 const BitCode encoded = codewords[quantized[i]]; 329 // combine number of zeros with the number of bits of the next non-zero value 330 writer << huffmanAC[offset + encoded.numBits] << encoded; // and the value itself 331 offset = 0; 332 } 333 334 // send end-of-block code (0x00), only needed if there are trailing zeros 335 if (posNonZero < 8*8 - 1) // = 63 336 writer << huffmanAC[0x00]; 337 338 return DC; 339 } 340 341 // Jon's code includes the pre-generated Huffman codes 342 // I don't like these "magic constants" and compute them on my own :-) 343 inline void generateHuffmanTable(const uint8_t numCodes[16], const uint8_t* values, BitCode result[256]) 344 { 345 // process all bitsizes 1 thru 16, no JPEG Huffman code is allowed to exceed 16 bits 346 uint16_t huffmanCode = 0; 347 for (uint8_t numBits = 1; numBits <= 16; numBits++) 348 { 349 // ... and each code of these bitsizes 350 for (uint8_t i = 0; i < numCodes[numBits - 1]; i++) // note: numCodes array starts at zero, but smallest bitsize is 1 351 result[*values++] = BitCode(huffmanCode++, numBits); 352 353 // next Huffman code needs to be one bit wider 354 huffmanCode <<= 1; 355 } 356 } 357 358 // -------------------- externally visible code -------------------- 359 360 // the only exported function ... 361 inline bool writeJpeg(WRITE_ONE_BYTE output, void* tag,const void* pixels_, unsigned short width, unsigned short height, 362 bool isRGB, unsigned char quality_, bool downsample, const char* comment) 363 { 364 // reject invalid pointers 365 if (output == 0/*nullptr*/ || pixels_ == 0/*nullptr*/) 366 return false; 367 // check image format 368 if (width == 0 || height == 0) 369 return false; 370 371 // number of components 372 const uint16_t numComponents = isRGB ? 3 : 1; 373 // note: if there is just one component (=grayscale), then only luminance needs to be stored in the file 374 // thus everything related to chrominance need not to be written to the JPEG 375 // I still compute a few things, like quantization tables to avoid a complete code mess 376 377 // grayscale images can't be downsampled (because there are no Cb + Cr channels) 378 if (!isRGB) 379 downsample = false; 380 381 // wrapper for all output operations 382 BitWriter bitWriter(output,tag); 383 384 // //////////////////////////////////////// 385 // JFIF headers 386 const uint8_t HeaderJfif[2+2+16] = 387 { 0xFF,0xD8, // SOI marker (start of image) 388 0xFF,0xE0, // JFIF APP0 tag 389 0,16, // length: 16 bytes (14 bytes payload + 2 bytes for this length field) 390 'J','F','I','F',0, // JFIF identifier, zero-terminated 391 1,1, // JFIF version 1.1 392 0, // no density units specified 393 0,1,0,1, // density: 1 pixel "per pixel" horizontally and vertically 394 0,0 }; // no thumbnail (size 0 x 0) 395 bitWriter << HeaderJfif; 396 397 // //////////////////////////////////////// 398 // comment (optional) 399 if (comment != 0/*nullptr*/) 400 { 401 // look for zero terminator 402 uint16_t length = 0; // = strlen(comment); 403 while (comment[length] != 0) 404 length++; 405 406 // write COM marker 407 bitWriter.addMarker(0xFE, 2+length); // block size is number of bytes (without zero terminator) + 2 bytes for this length field 408 // ... and write the comment itself 409 for (uint16_t i = 0; i < length; i++) 410 bitWriter << comment[i]; 411 } 412 413 // //////////////////////////////////////// 414 // adjust quantization tables to desired quality 415 416 // quality level must be in 1 ... 100 417 uint16_t quality = clamp<uint16_t>(quality_, 1, 100); 418 // convert to an internal JPEG quality factor, formula taken from libjpeg 419 quality = quality < 50 ? 5000 / quality : 200 - quality * 2; 420 421 uint8_t quantLuminance [8*8]; 422 uint8_t quantChrominance[8*8]; 423 for (size_t i = 0; i < 8*8; i++) 424 { 425 int luminance = (DefaultQuantLuminance [ZigZagInv[i]] * quality + 50) / 100; 426 int chrominance = (DefaultQuantChrominance[ZigZagInv[i]] * quality + 50) / 100; 427 428 // clamp to 1..255 429 quantLuminance [i] = clamp(luminance, 1, 255); 430 quantChrominance[i] = clamp(chrominance, 1, 255); 431 } 432 433 // write quantization tables 434 bitWriter.addMarker(0xDB, 2 + (isRGB ? 2 : 1) * (1 + 8*8)); // length: 65 bytes per table + 2 bytes for this length field 435 // each table has 64 entries and is preceded by an ID byte 436 437 bitWriter << 0x00 << quantLuminance; // first quantization table 438 if (isRGB) 439 bitWriter << 0x01 << quantChrominance; // second quantization table, only relevant for color images 440 441 // //////////////////////////////////////// 442 // write image infos (SOF0 - start of frame) 443 bitWriter.addMarker(0xC0, 2+6+3*numComponents); // length: 6 bytes general info + 3 per channel + 2 bytes for this length field 444 445 // 8 bits per channel 446 bitWriter << 0x08 447 // image dimensions (big-endian) 448 << (height >> 8) << (height & 0xFF) 449 << (width >> 8) << (width & 0xFF); 450 451 // sampling and quantization tables for each component 452 bitWriter << numComponents; // 1 component (grayscale, Y only) or 3 components (Y,Cb,Cr) 453 for (uint16_t id = 1; id <= numComponents; id++) 454 bitWriter << id // component ID (Y=1, Cb=2, Cr=3) 455 // bitmasks for sampling: highest 4 bits: horizontal, lowest 4 bits: vertical 456 << (id == 1 && downsample ? 0x22 : 0x11) // 0x11 is default YCbCr 4:4:4 and 0x22 stands for YCbCr 4:2:0 457 << (id == 1 ? 0 : 1); // use quantization table 0 for Y, table 1 for Cb and Cr 458 459 // //////////////////////////////////////// 460 // Huffman tables 461 // DHT marker - define Huffman tables 462 bitWriter.addMarker(0xC4, isRGB ? (2+208+208) : (2+208)); 463 // 2 bytes for the length field, store chrominance only if needed 464 // 1+16+12 for the DC luminance 465 // 1+16+162 for the AC luminance (208 = 1+16+12 + 1+16+162) 466 // 1+16+12 for the DC chrominance 467 // 1+16+162 for the AC chrominance (208 = 1+16+12 + 1+16+162, same as above) 468 469 // store luminance's DC+AC Huffman table definitions 470 bitWriter << 0x00 // highest 4 bits: 0 => DC, lowest 4 bits: 0 => Y (baseline) 471 << DcLuminanceCodesPerBitsize 472 << DcLuminanceValues; 473 bitWriter << 0x10 // highest 4 bits: 1 => AC, lowest 4 bits: 0 => Y (baseline) 474 << AcLuminanceCodesPerBitsize 475 << AcLuminanceValues; 476 477 // compute actual Huffman code tables (see Jon's code for precalculated tables) 478 BitCode huffmanLuminanceDC[256]; 479 BitCode huffmanLuminanceAC[256]; 480 generateHuffmanTable(DcLuminanceCodesPerBitsize, DcLuminanceValues, huffmanLuminanceDC); 481 generateHuffmanTable(AcLuminanceCodesPerBitsize, AcLuminanceValues, huffmanLuminanceAC); 482 483 // chrominance is only relevant for color images 484 BitCode huffmanChrominanceDC[256]; 485 BitCode huffmanChrominanceAC[256]; 486 if (isRGB) 487 { 488 // store luminance's DC+AC Huffman table definitions 489 bitWriter << 0x01 // highest 4 bits: 0 => DC, lowest 4 bits: 1 => Cr,Cb (baseline) 490 << DcChrominanceCodesPerBitsize 491 << DcChrominanceValues; 492 bitWriter << 0x11 // highest 4 bits: 1 => AC, lowest 4 bits: 1 => Cr,Cb (baseline) 493 << AcChrominanceCodesPerBitsize 494 << AcChrominanceValues; 495 496 // compute actual Huffman code tables (see Jon's code for precalculated tables) 497 generateHuffmanTable(DcChrominanceCodesPerBitsize, DcChrominanceValues, huffmanChrominanceDC); 498 generateHuffmanTable(AcChrominanceCodesPerBitsize, AcChrominanceValues, huffmanChrominanceAC); 499 } 500 501 // //////////////////////////////////////// 502 // start of scan (there is only a single scan for baseline JPEGs) 503 bitWriter.addMarker(0xDA, 2+1+2*numComponents+3); // 2 bytes for the length field, 1 byte for number of components, 504 // then 2 bytes for each component and 3 bytes for spectral selection 505 506 // assign Huffman tables to each component 507 bitWriter << numComponents; 508 for (uint16_t id = 1; id <= numComponents; id++) 509 // highest 4 bits: DC Huffman table, lowest 4 bits: AC Huffman table 510 bitWriter << id << (id == 1 ? 0x00 : 0x11); // Y: tables 0 for DC and AC; Cb + Cr: tables 1 for DC and AC 511 512 // constant values for our baseline JPEGs (which have a single sequential scan) 513 static const uint8_t Spectral[3] = { 0, 63, 0 }; // spectral selection: must be from 0 to 63; successive approximation must be 0 514 bitWriter << Spectral; 515 516 // //////////////////////////////////////// 517 // adjust quantization tables with AAN scaling factors to simplify DCT 518 float scaledLuminance [8*8]; 519 float scaledChrominance[8*8]; 520 for (size_t i = 0; i < 8*8; i++) 521 { 522 size_t row = ZigZagInv[i] / 8; // same as ZigZagInv[i] >> 3 523 size_t column = ZigZagInv[i] % 8; // same as ZigZagInv[i] & 7 524 525 // scaling constants for AAN DCT algorithm: AanScaleFactors[0] = 1, AanScaleFactors[k=1..7] = cos(k*PI/16) * sqrt(2) 526 static const float AanScaleFactors[8] = { 1, 1.387039845f, 1.306562965f, 1.175875602f, 1, 0.785694958f, 0.541196100f, 0.275899379f }; 527 float factor = 1 / (AanScaleFactors[row] * AanScaleFactors[column] * 8); 528 scaledLuminance [ZigZagInv[i]] = factor / quantLuminance [i]; 529 scaledChrominance[ZigZagInv[i]] = factor / quantChrominance[i]; 530 // if you really want JPEGs that are bitwise identical to Jon Olick's code then you need slightly different formulas (note: sqrt(8) = 2.828427125f) 531 //static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; // line 240 of jo_jpeg.cpp 532 //scaledLuminance [ZigZagInv[i]] = 1 / (quantLuminance [i] * aasf[row] * aasf[column]); // lines 266-267 of jo_jpeg.cpp 533 //scaledChrominance[ZigZagInv[i]] = 1 / (quantChrominance[i] * aasf[row] * aasf[column]); 534 } 535 536 // //////////////////////////////////////// 537 // precompute JPEG codewords for quantized DCT 538 BitCode codewordsArray[2 * CodeWordLimit]; // note: quantized[i] is found at codewordsArray[quantized[i] + CodeWordLimit] 539 BitCode* codewords = &codewordsArray[CodeWordLimit]; // allow negative indices, so quantized[i] is at codewords[quantized[i]] 540 uint8_t numBits = 1; // each codeword has at least one bit (value == 0 is undefined) 541 int32_t mask = 1; // mask is always 2^numBits - 1, initial value 2^1-1 = 2-1 = 1 542 for (int16_t value = 1; value < CodeWordLimit; value++) 543 { 544 // numBits = position of highest set bit (ignoring the sign) 545 // mask = (2^numBits) - 1 546 if (value > mask) // one more bit ? 547 { 548 numBits++; 549 mask = (mask << 1) | 1; // append a set bit 550 } 551 codewords[-value] = BitCode(mask - value, numBits); // note that I use a negative index => codewords[-value] = codewordsArray[CodeWordLimit value] 552 codewords[+value] = BitCode( value, numBits); 553 } 554 555 // just convert image data from void* 556 const uint8_t* pixels = (const uint8_t*)pixels_; 557 558 // the next two variables are frequently used when checking for image borders 559 const unsigned short maxWidth = width - 1; // "last row" 560 const unsigned short maxHeight = height - 1; // "bottom line" 561 562 // process MCUs (minimum codes units) => image is subdivided into a grid of 8x8 or 16x16 tiles 563 const unsigned short sampling = downsample ? 2 : 1; // 1x1 or 2x2 sampling 564 const unsigned short mcuSize = 8 * sampling; 565 566 // average color of the previous MCU 567 int16_t lastYDC = 0, lastCbDC = 0, lastCrDC = 0; 568 // convert from RGB to YCbCr 569 float Y[8][8], Cb[8][8], Cr[8][8]; 570 571 for (unsigned short mcuY = 0; mcuY < height; mcuY += mcuSize) // each step is either 8 or 16 (=mcuSize) 572 for (unsigned short mcuX = 0; mcuX < width; mcuX += mcuSize) 573 { 574 // YCbCr 4:4:4 format: each MCU is a 8x8 block - the same applies to grayscale images, too 575 // YCbCr 4:2:0 format: each MCU represents a 16x16 block, stored as 4x 8x8 Y-blocks plus 1x 8x8 Cb and 1x 8x8 Cr block) 576 for (unsigned short blockY = 0; blockY < mcuSize; blockY += 8) // iterate once (YCbCr444 and grayscale) or twice (YCbCr420) 577 for (unsigned short blockX = 0; blockX < mcuSize; blockX += 8) 578 { 579 // now we finally have an 8x8 block ... 580 for (unsigned short deltaY = 0; deltaY < 8; deltaY++) 581 { 582 size_t column = minimum(uint16_t(mcuX + blockX) , maxWidth); // must not exceed image borders, replicate last row/column if needed 583 size_t row = minimum(uint16_t(mcuY + blockY + deltaY), maxHeight); 584 for (size_t deltaX = 0; deltaX < 8; deltaX++) 585 { 586 // find actual pixel position within the current image 587 size_t pixelPos = row * int(width) + column; // the cast ensures that we don't run into multiplication overflows 588 if (column < maxWidth) 589 column++; 590 591 // grayscale images have solely a Y channel which can be easily derived from the input pixel by shifting it by 128 592 if (!isRGB) 593 { 594 Y[deltaY][deltaX] = pixels[pixelPos] - 128.f; 595 continue; 596 } 597 598 // RGB: 3 bytes per pixel (whereas grayscale images have only 1 byte per pixel) 599 uint8_t r = pixels[3 * pixelPos ]; 600 uint8_t g = pixels[3 * pixelPos + 1]; 601 uint8_t b = pixels[3 * pixelPos + 2]; 602 603 Y [deltaY][deltaX] = rgb2y (r, g, b) - 128; // again, the JPEG standard requires Y to be shifted by 128 604 // YCbCr444 is easy - the more complex YCbCr420 has to be computed about 20 lines below in a second pass 605 if (!downsample) 606 { 607 Cb[deltaY][deltaX] = rgb2cb(r, g, b); // standard RGB-to-YCbCr conversion 608 Cr[deltaY][deltaX] = rgb2cr(r, g, b); 609 } 610 } 611 } 612 613 // encode Y channel 614 lastYDC = encodeBlock(bitWriter, Y, scaledLuminance, lastYDC, huffmanLuminanceDC, huffmanLuminanceAC, codewords); 615 // Cb and Cr are encoded about 50 lines below 616 } 617 618 // grayscale images don't need any Cb and Cr information 619 if (!isRGB) 620 continue; 621 622 // //////////////////////////////////////// 623 // the following lines are only relevant for YCbCr420: 624 // average/downsample chrominance of four pixels while respecting the image borders 625 if (downsample) 626 for (short deltaY = 7; downsample && deltaY >= 0; deltaY--) // iterating loop in reverse increases cache read efficiency 627 { 628 size_t row = minimum(uint16_t(mcuY + 2*deltaY), maxHeight); // each deltaX/Y step covers a 2x2 area 629 size_t column = mcuX; // column is updated inside next loop 630 size_t pixelPos = (row * int(width) + column) * 3; // numComponents = 3 631 632 // deltas (in bytes) to next row / column, must not exceed image borders 633 size_t rowStep = (row < maxHeight) ? 3 * int(width) : 0; // always numComponents*width except for bottom line 634 size_t columnStep = (column < maxWidth ) ? 3 : 0; // always numComponents except for rightmost pixel 635 636 for (short deltaX = 0; deltaX < 8; deltaX++) 637 { 638 // let's add all four samples (2x2 area) 639 size_t right = pixelPos + columnStep; 640 size_t down = pixelPos + rowStep; 641 size_t downRight = pixelPos + columnStep + rowStep; 642 643 // note: cast from 8 bits to >8 bits to avoid overflows when adding 644 short r = short(pixels[pixelPos ]) + pixels[right ] + pixels[down ] + pixels[downRight ]; 645 short g = short(pixels[pixelPos + 1]) + pixels[right + 1] + pixels[down + 1] + pixels[downRight + 1]; 646 short b = short(pixels[pixelPos + 2]) + pixels[right + 2] + pixels[down + 2] + pixels[downRight + 2]; 647 648 // convert to Cb and Cr 649 Cb[deltaY][deltaX] = rgb2cb(r, g, b) / 4; // I still have to divide r,g,b by 4 to get their average values 650 Cr[deltaY][deltaX] = rgb2cr(r, g, b) / 4; // it's a bit faster if done AFTER CbCr conversion 651 652 // step forward to next 2x2 area 653 pixelPos += 2*3; // 2 pixels => 6 bytes (2*numComponents) 654 column += 2; 655 656 // reached right border ? 657 if (column >= maxWidth) 658 { 659 columnStep = 0; 660 pixelPos = ((row + 1) * int(width) - 1) * 3; // same as (row * width + maxWidth) * numComponents => current's row last pixel 661 } 662 } 663 } // end of YCbCr420 code for Cb and Cr 664 665 // encode Cb and Cr 666 lastCbDC = encodeBlock(bitWriter, Cb, scaledChrominance, lastCbDC, huffmanChrominanceDC, huffmanChrominanceAC, codewords); 667 lastCrDC = encodeBlock(bitWriter, Cr, scaledChrominance, lastCrDC, huffmanChrominanceDC, huffmanChrominanceAC, codewords); 668 } 669 670 bitWriter.flush(); // now image is completely encoded, write any bits still left in the buffer 671 672 // /////////////////////////// 673 // EOI marker 674 bitWriter << 0xFF << 0xD9; // this marker has no length, therefore I can't use addMarker() 675 return true; 676 } // writeJpeg() 677 678 }} 679