Geant4 Cross Reference |
1 1 2 // G.Barrand: pure header version of toojpeg f 2 // G.Barrand: pure header version of toojpeg found at https://github.com/stbrumme/toojpeg 3 3 4 // /////////////////////////////////////////// 4 // ////////////////////////////////////////////////////////// 5 // toojpeg.cpp 5 // toojpeg.cpp 6 // written by Stephan Brumme, 2018-2019 6 // written by Stephan Brumme, 2018-2019 7 // see https://create.stephan-brumme.com/toojp 7 // see https://create.stephan-brumme.com/toojpeg/ 8 // 8 // 9 9 10 #include <cstddef> //size_t 10 #include <cstddef> //size_t 11 11 12 // - the "official" specifications: https://ww 12 // - the "official" specifications: https://www.w3.org/Graphics/JPEG/itu-t81.pdf and https://www.w3.org/Graphics/JPEG/jfif3.pdf 13 // - Wikipedia has a short description of the 13 // - Wikipedia has a short description of the JFIF/JPEG file format: https://en.wikipedia.org/wiki/JPEG_File_Interchange_Format 14 // - the popular STB Image library includes Jo 14 // - the popular STB Image library includes Jon's JPEG encoder as well: https://github.com/nothings/stb/blob/master/stb_image_write.h 15 // - the most readable JPEG book (from a devel 15 // - the most readable JPEG book (from a developer's perspective) is Miano's "Compressed Image File Formats" (1999, ISBN 0-201-60443-4), 16 // used copies are really cheap nowadays and 16 // used copies are really cheap nowadays and include a CD with C++ sources as well (plus great format descriptions of GIF & PNG) 17 // - much more detailled is Mitchell/Pennebake 17 // - much more detailled is Mitchell/Pennebaker's "JPEG: Still Image Data Compression Standard" (1993, ISBN 0-442-01272-1) 18 // which contains the official JPEG standard 18 // which contains the official JPEG standard, too - fun fact: I bought a signed copy in a second-hand store without noticing 19 19 20 namespace tools { 20 namespace tools { 21 namespace toojpeg { 21 namespace toojpeg { 22 // //////////////////////////////////////// 22 // //////////////////////////////////////// 23 // data types 23 // data types 24 typedef unsigned char uint8_t; 24 typedef unsigned char uint8_t; 25 typedef unsigned short uint16_t; 25 typedef unsigned short uint16_t; 26 typedef short int16_t; 26 typedef short int16_t; 27 typedef int int32_t; // at least four bytes 27 typedef int int32_t; // at least four bytes 28 28 29 // //////////////////////////////////////// 29 // //////////////////////////////////////// 30 // constants 30 // constants 31 31 32 // quantization tables from JPEG Standard, Ann 32 // quantization tables from JPEG Standard, Annex K 33 const uint8_t DefaultQuantLuminance[8*8] = 33 const uint8_t DefaultQuantLuminance[8*8] = 34 { 16, 11, 10, 16, 24, 40, 51, 61, // there 34 { 16, 11, 10, 16, 24, 40, 51, 61, // there are a few experts proposing slightly more efficient values, 35 12, 12, 14, 19, 26, 58, 60, 55, // e.g. 35 12, 12, 14, 19, 26, 58, 60, 55, // e.g. https://www.imagemagick.org/discourse-server/viewtopic.php?t=20333 36 14, 13, 16, 24, 40, 57, 69, 56, // btw: 36 14, 13, 16, 24, 40, 57, 69, 56, // btw: Google's Guetzli project optimizes the quantization tables per image 37 14, 17, 22, 29, 51, 87, 80, 62, 37 14, 17, 22, 29, 51, 87, 80, 62, 38 18, 22, 37, 56, 68,109,103, 77, 38 18, 22, 37, 56, 68,109,103, 77, 39 24, 35, 55, 64, 81,104,113, 92, 39 24, 35, 55, 64, 81,104,113, 92, 40 49, 64, 78, 87,103,121,120,101, 40 49, 64, 78, 87,103,121,120,101, 41 72, 92, 95, 98,112,100,103, 99 }; 41 72, 92, 95, 98,112,100,103, 99 }; 42 const uint8_t DefaultQuantChrominance[8*8] = 42 const uint8_t DefaultQuantChrominance[8*8] = 43 { 17, 18, 24, 47, 99, 99, 99, 99, 43 { 17, 18, 24, 47, 99, 99, 99, 99, 44 18, 21, 26, 66, 99, 99, 99, 99, 44 18, 21, 26, 66, 99, 99, 99, 99, 45 24, 26, 56, 99, 99, 99, 99, 99, 45 24, 26, 56, 99, 99, 99, 99, 99, 46 47, 66, 99, 99, 99, 99, 99, 99, 46 47, 66, 99, 99, 99, 99, 99, 99, 47 99, 99, 99, 99, 99, 99, 99, 99, 47 99, 99, 99, 99, 99, 99, 99, 99, 48 99, 99, 99, 99, 99, 99, 99, 99, 48 99, 99, 99, 99, 99, 99, 99, 99, 49 99, 99, 99, 99, 99, 99, 99, 99, 49 99, 99, 99, 99, 99, 99, 99, 99, 50 99, 99, 99, 99, 99, 99, 99, 99 }; 50 99, 99, 99, 99, 99, 99, 99, 99 }; 51 51 52 // 8x8 blocks are processed in zig-zag order 52 // 8x8 blocks are processed in zig-zag order 53 // most encoders use a zig-zag "forward" table 53 // most encoders use a zig-zag "forward" table, I switched to its inverse for performance reasons 54 // note: ZigZagInv[ZigZag[i]] = i 54 // note: ZigZagInv[ZigZag[i]] = i 55 const uint8_t ZigZagInv[8*8] = 55 const uint8_t ZigZagInv[8*8] = 56 { 0, 1, 8,16, 9, 2, 3,10, // ZigZag[] = 56 { 0, 1, 8,16, 9, 2, 3,10, // ZigZag[] = 0, 1, 5, 6,14,15,27,28, 57 17,24,32,25,18,11, 4, 5, // 57 17,24,32,25,18,11, 4, 5, // 2, 4, 7,13,16,26,29,42, 58 12,19,26,33,40,48,41,34, // 58 12,19,26,33,40,48,41,34, // 3, 8,12,17,25,30,41,43, 59 27,20,13, 6, 7,14,21,28, // 59 27,20,13, 6, 7,14,21,28, // 9,11,18,24,31,40,44,53, 60 35,42,49,56,57,50,43,36, // 60 35,42,49,56,57,50,43,36, // 10,19,23,32,39,45,52,54, 61 29,22,15,23,30,37,44,51, // 61 29,22,15,23,30,37,44,51, // 20,22,33,38,46,51,55,60, 62 58,59,52,45,38,31,39,46, // 62 58,59,52,45,38,31,39,46, // 21,34,37,47,50,56,59,61, 63 53,60,61,54,47,55,62,63 }; // 63 53,60,61,54,47,55,62,63 }; // 35,36,48,49,57,58,62,63 64 64 65 // static Huffman code tables from JPEG standa 65 // static Huffman code tables from JPEG standard Annex K 66 // - CodesPerBitsize tables define how many Hu 66 // - CodesPerBitsize tables define how many Huffman codes will have a certain bitsize (plus 1 because there nothing with zero bits), 67 // e.g. DcLuminanceCodesPerBitsize[2] = 5 be 67 // e.g. DcLuminanceCodesPerBitsize[2] = 5 because there are 5 Huffman codes being 2+1=3 bits long 68 // - Values tables are a list of values ordere 68 // - Values tables are a list of values ordered by their Huffman code bitsize, 69 // e.g. AcLuminanceValues => Huffman(0x01,0x 69 // e.g. AcLuminanceValues => Huffman(0x01,0x02 and 0x03) will have 2 bits, Huffman(0x00) will have 3 bits, Huffman(0x04,0x11 and 0x05) will have 4 bits, ... 70 70 71 // Huffman definitions for first DC/AC tables 71 // Huffman definitions for first DC/AC tables (luminance / Y channel) 72 const uint8_t DcLuminanceCodesPerBitsize[16] 72 const uint8_t DcLuminanceCodesPerBitsize[16] = { 0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 }; // sum = 12 73 const uint8_t DcLuminanceValues [12] 73 const uint8_t DcLuminanceValues [12] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; // => 12 codes 74 const uint8_t AcLuminanceCodesPerBitsize[16] 74 const uint8_t AcLuminanceCodesPerBitsize[16] = { 0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,125 }; // sum = 162 75 const uint8_t AcLuminanceValues [162] 75 const uint8_t AcLuminanceValues [162] = // => 162 codes 76 { 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12, 76 { 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08, // 16*10+2 symbols because 77 0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0, 77 0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0,0x24,0x33,0x62,0x72,0x82,0x09,0x0A,0x16,0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28, // upper 4 bits can be 0..F 78 0x29,0x2A,0x34,0x35,0x36,0x37,0x38,0x39, 78 0x29,0x2A,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,0x59, // while lower 4 bits can be 1..A 79 0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69, 79 0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89, // plus two special codes 0x00 and 0xF0 80 0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98, 80 0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6, // order of these symbols was determined empirically by JPEG committee 81 0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5, 81 0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xE1,0xE2, 82 0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA, 82 0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA }; 83 // Huffman definitions for second DC/AC tables 83 // Huffman definitions for second DC/AC tables (chrominance / Cb and Cr channels) 84 const uint8_t DcChrominanceCodesPerBitsize[16] 84 const uint8_t DcChrominanceCodesPerBitsize[16] = { 0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; // sum = 12 85 const uint8_t DcChrominanceValues [12] 85 const uint8_t DcChrominanceValues [12] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; // => 12 codes (identical to DcLuminanceValues) 86 const uint8_t AcChrominanceCodesPerBitsize[16] 86 const uint8_t AcChrominanceCodesPerBitsize[16] = { 0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,119 }; // sum = 162 87 const uint8_t AcChrominanceValues [162] 87 const uint8_t AcChrominanceValues [162] = // => 162 codes 88 { 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21, 88 { 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, // same number of symbol, just different order 89 0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0, 89 0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0,0x15,0x62,0x72,0xD1,0x0A,0x16,0x24,0x34,0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26, // (which is more efficient for AC coding) 90 0x27,0x28,0x29,0x2A,0x35,0x36,0x37,0x38, 90 0x27,0x28,0x29,0x2A,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58, 91 0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68, 91 0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87, 92 0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96, 92 0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4, 93 0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3, 93 0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA, 94 0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9, 94 0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA }; 95 const int16_t CodeWordLimit = 2048; // +/-2^11 95 const int16_t CodeWordLimit = 2048; // +/-2^11, maximum value after DCT 96 96 97 // //////////////////////////////////////// 97 // //////////////////////////////////////// 98 // structs 98 // structs 99 99 100 // represent a single Huffman code 100 // represent a single Huffman code 101 struct BitCode 101 struct BitCode 102 { 102 { 103 //BitCode() = default; // undefined state, m 103 //BitCode() = default; // undefined state, must be initialized at a later time 104 BitCode():code(0),numBits(0) {} 104 BitCode():code(0),numBits(0) {} 105 BitCode(const BitCode& a_from):code(a_from.c 105 BitCode(const BitCode& a_from):code(a_from.code),numBits(a_from.numBits) {} 106 BitCode& operator=(const BitCode& a_from) { 106 BitCode& operator=(const BitCode& a_from) { 107 code = a_from.code; 107 code = a_from.code; 108 numBits = a_from.numBits; 108 numBits = a_from.numBits; 109 return *this; 109 return *this; 110 } 110 } 111 111 112 BitCode(uint16_t code_, uint8_t numBits_) 112 BitCode(uint16_t code_, uint8_t numBits_) 113 : code(code_), numBits(numBits_) {} 113 : code(code_), numBits(numBits_) {} 114 uint16_t code; // JPEG's Huffman codes 114 uint16_t code; // JPEG's Huffman codes are limited to 16 bits 115 uint8_t numBits; // number of valid bits 115 uint8_t numBits; // number of valid bits 116 }; 116 }; 117 117 118 // wrapper for bit output operations 118 // wrapper for bit output operations 119 struct BitWriter 119 struct BitWriter 120 { 120 { 121 // user-supplied callback that writes/stores 121 // user-supplied callback that writes/stores one byte 122 WRITE_ONE_BYTE output; 122 WRITE_ONE_BYTE output; 123 void* tag; 123 void* tag; 124 // initialize writer 124 // initialize writer 125 explicit BitWriter(WRITE_ONE_BYTE output_,vo 125 explicit BitWriter(WRITE_ONE_BYTE output_,void* tag_) : output(output_),tag(tag_) { 126 buffer.data = 0; 126 buffer.data = 0; 127 buffer.numBits = 0; 127 buffer.numBits = 0; 128 } 128 } 129 129 130 // store the most recently encoded bits that 130 // store the most recently encoded bits that are not written yet 131 struct BitBuffer 131 struct BitBuffer 132 { 132 { 133 int32_t data /*= 0*/; // actually only 133 int32_t data /*= 0*/; // actually only at most 24 bits are used 134 uint8_t numBits /*= 0*/; // number of vali 134 uint8_t numBits /*= 0*/; // number of valid bits (the right-most bits) 135 } buffer; 135 } buffer; 136 136 137 // write Huffman bits stored in BitCode, kee 137 // write Huffman bits stored in BitCode, keep excess bits in BitBuffer 138 BitWriter& operator<<(const BitCode& data) 138 BitWriter& operator<<(const BitCode& data) 139 { 139 { 140 // append the new bits to those bits lefto 140 // append the new bits to those bits leftover from previous call(s) 141 buffer.numBits += data.numBits; 141 buffer.numBits += data.numBits; 142 buffer.data <<= data.numBits; 142 buffer.data <<= data.numBits; 143 buffer.data |= data.code; 143 buffer.data |= data.code; 144 144 145 // write all "full" bytes 145 // write all "full" bytes 146 while (buffer.numBits >= 8) 146 while (buffer.numBits >= 8) 147 { 147 { 148 // extract highest 8 bits 148 // extract highest 8 bits 149 buffer.numBits -= 8; 149 buffer.numBits -= 8; 150 uint8_t oneByte = uint8_t(buffer.data >> 150 uint8_t oneByte = uint8_t(buffer.data >> buffer.numBits); 151 output(oneByte,tag); 151 output(oneByte,tag); 152 152 153 if (oneByte == 0xFF) // 0xFF has a speci 153 if (oneByte == 0xFF) // 0xFF has a special meaning for JPEGs (it's a block marker) 154 output(0,tag); // therefore pa 154 output(0,tag); // therefore pad a zero to indicate "nope, this one ain't a marker, it's just a coincidence" 155 155 156 // note: I don't clear those written bit 156 // note: I don't clear those written bits, therefore buffer.bits may contain garbage in the high bits 157 // if you really want to "clean up 157 // if you really want to "clean up" (e.g. for debugging purposes) then uncomment the following line 158 //buffer.bits &= (1 << buffer.numBits) - 158 //buffer.bits &= (1 << buffer.numBits) - 1; 159 } 159 } 160 return *this; 160 return *this; 161 } 161 } 162 162 163 // write all non-yet-written bits, fill gaps 163 // write all non-yet-written bits, fill gaps with 1s (that's a strange JPEG thing) 164 void flush() 164 void flush() 165 { 165 { 166 // at most seven set bits needed to "fill" 166 // at most seven set bits needed to "fill" the last byte: 0x7F = binary 0111 1111 167 *this << BitCode(0x7F, 7); // I should set 167 *this << BitCode(0x7F, 7); // I should set buffer.numBits = 0 but since there are no single bits written after flush() I can safely ignore it 168 } 168 } 169 169 170 // NOTE: all the following BitWriter functio 170 // NOTE: all the following BitWriter functions IGNORE the BitBuffer and write straight to output ! 171 // write a single byte 171 // write a single byte 172 BitWriter& operator<<(uint8_t oneByte) 172 BitWriter& operator<<(uint8_t oneByte) 173 { 173 { 174 output(oneByte,tag); 174 output(oneByte,tag); 175 return *this; 175 return *this; 176 } 176 } 177 177 178 // write an array of bytes 178 // write an array of bytes 179 template <typename T, int Size> 179 template <typename T, int Size> 180 BitWriter& operator<<(T (&manyBytes)[Size]) 180 BitWriter& operator<<(T (&manyBytes)[Size]) 181 { 181 { 182 //for (auto c : manyBytes) 182 //for (auto c : manyBytes) 183 // output(c); 183 // output(c); 184 for(size_t i=0;i<Size;i++) output(manyByte 184 for(size_t i=0;i<Size;i++) output(manyBytes[i],tag); 185 return *this; 185 return *this; 186 } 186 } 187 187 188 // start a new JFIF block 188 // start a new JFIF block 189 void addMarker(uint8_t id, uint16_t length) 189 void addMarker(uint8_t id, uint16_t length) 190 { 190 { 191 output(0xFF,tag); output(id,tag); // I 191 output(0xFF,tag); output(id,tag); // ID, always preceded by 0xFF 192 output(uint8_t(length >> 8),tag); // lengt 192 output(uint8_t(length >> 8),tag); // length of the block (big-endian, includes the 2 length bytes as well) 193 output(uint8_t(length & 0xFF),tag); 193 output(uint8_t(length & 0xFF),tag); 194 } 194 } 195 }; 195 }; 196 196 197 // //////////////////////////////////////// 197 // //////////////////////////////////////// 198 // functions / templates 198 // functions / templates 199 199 200 // same as std::min() 200 // same as std::min() 201 template <typename Number> 201 template <typename Number> 202 inline Number minimum(Number value, Number max 202 inline Number minimum(Number value, Number maximum) 203 { 203 { 204 return value <= maximum ? value : maximum; 204 return value <= maximum ? value : maximum; 205 } 205 } 206 206 207 // restrict a value to the interval [minimum, 207 // restrict a value to the interval [minimum, maximum] 208 template <typename Number, typename Limit> 208 template <typename Number, typename Limit> 209 inline Number clamp(Number value, Limit minVal 209 inline Number clamp(Number value, Limit minValue, Limit maxValue) 210 { 210 { 211 if (value <= minValue) return minValue; // n 211 if (value <= minValue) return minValue; // never smaller than the minimum 212 if (value >= maxValue) return maxValue; // n 212 if (value >= maxValue) return maxValue; // never bigger than the maximum 213 return value; // v 213 return value; // value was inside interval, keep it 214 } 214 } 215 215 216 // convert from RGB to YCbCr, constants are si 216 // convert from RGB to YCbCr, constants are similar to ITU-R, see https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion 217 inline float rgb2y (float r, float g, float b) 217 inline float rgb2y (float r, float g, float b) { return +0.299f * r +0.587f * g +0.114f * b; } 218 inline float rgb2cb(float r, float g, float b) 218 inline float rgb2cb(float r, float g, float b) { return -0.16874f * r -0.33126f * g +0.5f * b; } 219 inline float rgb2cr(float r, float g, float b) 219 inline float rgb2cr(float r, float g, float b) { return +0.5f * r -0.41869f * g -0.08131f * b; } 220 220 221 // forward DCT computation "in one dimension" 221 // forward DCT computation "in one dimension" (fast AAN algorithm by Arai, Agui and Nakajima: "A fast DCT-SQ scheme for images") 222 inline void DCT(float block[8*8], uint8_t stri 222 inline void DCT(float block[8*8], uint8_t stride) // stride must be 1 (=horizontal) or 8 (=vertical) 223 { 223 { 224 const float SqrtHalfSqrt = 1.306562965f; // 224 const float SqrtHalfSqrt = 1.306562965f; // sqrt((2 + sqrt(2)) / 2) = cos(pi * 1 / 8) * sqrt(2) 225 const float InvSqrt = 0.707106781f; // 225 const float InvSqrt = 0.707106781f; // 1 / sqrt(2) = cos(pi * 2 / 8) 226 const float HalfSqrtSqrt = 0.382683432f; // 226 const float HalfSqrtSqrt = 0.382683432f; // sqrt(2 - sqrt(2)) / 2 = cos(pi * 3 / 8) 227 const float InvSqrtSqrt = 0.541196100f; // 227 const float InvSqrtSqrt = 0.541196100f; // 1 / sqrt(2 - sqrt(2)) = cos(pi * 3 / 8) * sqrt(2) 228 228 229 // modify in-place 229 // modify in-place 230 float& block0 = block[0 ]; 230 float& block0 = block[0 ]; 231 float& block1 = block[1 * stride]; 231 float& block1 = block[1 * stride]; 232 float& block2 = block[2 * stride]; 232 float& block2 = block[2 * stride]; 233 float& block3 = block[3 * stride]; 233 float& block3 = block[3 * stride]; 234 float& block4 = block[4 * stride]; 234 float& block4 = block[4 * stride]; 235 float& block5 = block[5 * stride]; 235 float& block5 = block[5 * stride]; 236 float& block6 = block[6 * stride]; 236 float& block6 = block[6 * stride]; 237 float& block7 = block[7 * stride]; 237 float& block7 = block[7 * stride]; 238 238 239 // based on https://dev.w3.org/Amaya/libjpeg 239 // based on https://dev.w3.org/Amaya/libjpeg/jfdctflt.c , the original variable names can be found in my comments 240 float add07 = block0 + block7; float sub07 = 240 float add07 = block0 + block7; float sub07 = block0 - block7; // tmp0, tmp7 241 float add16 = block1 + block6; float sub16 = 241 float add16 = block1 + block6; float sub16 = block1 - block6; // tmp1, tmp6 242 float add25 = block2 + block5; float sub25 = 242 float add25 = block2 + block5; float sub25 = block2 - block5; // tmp2, tmp5 243 float add34 = block3 + block4; float sub34 = 243 float add34 = block3 + block4; float sub34 = block3 - block4; // tmp3, tmp4 244 244 245 float add0347 = add07 + add34; float sub07_3 245 float add0347 = add07 + add34; float sub07_34 = add07 - add34; // tmp10, tmp13 ("even part" / "phase 2") 246 float add1256 = add16 + add25; float sub16_2 246 float add1256 = add16 + add25; float sub16_25 = add16 - add25; // tmp11, tmp12 247 247 248 block0 = add0347 + add1256; block4 = add0347 248 block0 = add0347 + add1256; block4 = add0347 - add1256; // "phase 3" 249 249 250 float z1 = (sub16_25 + sub07_34) * InvSqrt; 250 float z1 = (sub16_25 + sub07_34) * InvSqrt; // all temporary z-variables kept their original names 251 block2 = sub07_34 + z1; block6 = sub07_34 - 251 block2 = sub07_34 + z1; block6 = sub07_34 - z1; // "phase 5" 252 252 253 float sub23_45 = sub25 + sub34; // tmp10 ("o 253 float sub23_45 = sub25 + sub34; // tmp10 ("odd part" / "phase 2") 254 float sub12_56 = sub16 + sub25; // tmp11 254 float sub12_56 = sub16 + sub25; // tmp11 255 float sub01_67 = sub16 + sub07; // tmp12 255 float sub01_67 = sub16 + sub07; // tmp12 256 256 257 float z5 = (sub23_45 - sub01_67) * HalfSqrtS 257 float z5 = (sub23_45 - sub01_67) * HalfSqrtSqrt; 258 float z2 = sub23_45 * InvSqrtSqrt + z5; 258 float z2 = sub23_45 * InvSqrtSqrt + z5; 259 float z3 = sub12_56 * InvSqrt; 259 float z3 = sub12_56 * InvSqrt; 260 float z4 = sub01_67 * SqrtHalfSqrt + z5; 260 float z4 = sub01_67 * SqrtHalfSqrt + z5; 261 float z6 = sub07 + z3; // z11 ("phase 5") 261 float z6 = sub07 + z3; // z11 ("phase 5") 262 float z7 = sub07 - z3; // z13 262 float z7 = sub07 - z3; // z13 263 block1 = z6 + z4; block7 = z6 - z4; // "phas 263 block1 = z6 + z4; block7 = z6 - z4; // "phase 6" 264 block5 = z7 + z2; block3 = z7 - z2; 264 block5 = z7 + z2; block3 = z7 - z2; 265 } 265 } 266 266 267 // run DCT, quantize and write Huffman bit cod 267 // run DCT, quantize and write Huffman bit codes 268 inline int16_t encodeBlock(BitWriter& writer, 268 inline int16_t encodeBlock(BitWriter& writer, float block[8][8], const float scaled[8*8], int16_t lastDC, 269 const BitCode huffmanDC[25 269 const BitCode huffmanDC[256], const BitCode huffmanAC[256], const BitCode* codewords) 270 { 270 { 271 // "linearize" the 8x8 block, treat it as a 271 // "linearize" the 8x8 block, treat it as a flat array of 64 floats 272 float* block64 = (float*) block; 272 float* block64 = (float*) block; 273 273 274 // DCT: rows 274 // DCT: rows 275 for (size_t offset = 0; offset < 8; offset++ 275 for (size_t offset = 0; offset < 8; offset++) 276 DCT(block64 + offset*8, 1); 276 DCT(block64 + offset*8, 1); 277 // DCT: columns 277 // DCT: columns 278 for (size_t offset = 0; offset < 8; offset++ 278 for (size_t offset = 0; offset < 8; offset++) 279 DCT(block64 + offset*1, 8); 279 DCT(block64 + offset*1, 8); 280 280 281 // scale 281 // scale 282 for (size_t i = 0; i < 8*8; i++) 282 for (size_t i = 0; i < 8*8; i++) 283 block64[i] *= scaled[i]; 283 block64[i] *= scaled[i]; 284 284 285 // encode DC (the first coefficient is the " 285 // encode DC (the first coefficient is the "average color" of the 8x8 block) 286 int DC = int(block64[0] + (block64[0] >= 0 ? 286 int DC = int(block64[0] + (block64[0] >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect 287 287 288 // quantize and zigzag the other 63 coeffici 288 // quantize and zigzag the other 63 coefficients 289 size_t posNonZero = 0; // find last coeffici 289 size_t posNonZero = 0; // find last coefficient which is not zero (because trailing zeros are encoded differently) 290 int16_t quantized[8*8]; 290 int16_t quantized[8*8]; 291 for (size_t i = 1; i < 8*8; i++) // start at 291 for (size_t i = 1; i < 8*8; i++) // start at 1 because block64[0]=DC was already processed 292 { 292 { 293 float value = block64[ZigZagInv[i]]; 293 float value = block64[ZigZagInv[i]]; 294 // round to nearest integer 294 // round to nearest integer 295 quantized[i] = int(value + (value >= 0 ? + 295 quantized[i] = int(value + (value >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect 296 // remember offset of last non-zero coeffi 296 // remember offset of last non-zero coefficient 297 if (quantized[i] != 0) 297 if (quantized[i] != 0) 298 posNonZero = i; 298 posNonZero = i; 299 } 299 } 300 300 301 // same "average color" as previous block ? 301 // same "average color" as previous block ? 302 int diff = DC - lastDC; 302 int diff = DC - lastDC; 303 if (diff == 0) 303 if (diff == 0) 304 writer << huffmanDC[0x00]; // yes, write 304 writer << huffmanDC[0x00]; // yes, write a special short symbol 305 else 305 else 306 { 306 { 307 const BitCode bits = codewords[diff]; // n 307 const BitCode bits = codewords[diff]; // nope, encode the difference to previous block's average color 308 writer << huffmanDC[bits.numBits] << bits; 308 writer << huffmanDC[bits.numBits] << bits; 309 } 309 } 310 310 311 // encode ACs (quantized[1..63]) 311 // encode ACs (quantized[1..63]) 312 size_t offset = 0; // upper 4 bits count the 312 size_t offset = 0; // upper 4 bits count the number of consecutive zeros 313 for (size_t i = 1; i <= posNonZero; i++) // 313 for (size_t i = 1; i <= posNonZero; i++) // quantized[0] was already written, skip all trailing zeros, too 314 { 314 { 315 // zeros are encoded in a special way 315 // zeros are encoded in a special way 316 while (quantized[i] == 0) // found another 316 while (quantized[i] == 0) // found another zero ? 317 { 317 { 318 offset += 0x10; // add 1 to the upper 318 offset += 0x10; // add 1 to the upper 4 bits 319 // split into blocks of at most 16 conse 319 // split into blocks of at most 16 consecutive zeros 320 if (offset > 0xF0) // remember, the coun 320 if (offset > 0xF0) // remember, the counter is in the upper 4 bits, 0xF = 15 321 { 321 { 322 writer << huffmanAC[0xF0]; // 0xF0 is 322 writer << huffmanAC[0xF0]; // 0xF0 is a special code for "16 zeros" 323 offset = 0; 323 offset = 0; 324 } 324 } 325 i++; 325 i++; 326 } 326 } 327 327 328 const BitCode encoded = codewords[quantize 328 const BitCode encoded = codewords[quantized[i]]; 329 // combine number of zeros with the number 329 // combine number of zeros with the number of bits of the next non-zero value 330 writer << huffmanAC[offset + encoded.numBi 330 writer << huffmanAC[offset + encoded.numBits] << encoded; // and the value itself 331 offset = 0; 331 offset = 0; 332 } 332 } 333 333 334 // send end-of-block code (0x00), only neede 334 // send end-of-block code (0x00), only needed if there are trailing zeros 335 if (posNonZero < 8*8 - 1) // = 63 335 if (posNonZero < 8*8 - 1) // = 63 336 writer << huffmanAC[0x00]; 336 writer << huffmanAC[0x00]; 337 337 338 return DC; 338 return DC; 339 } 339 } 340 340 341 // Jon's code includes the pre-generated Huffm 341 // Jon's code includes the pre-generated Huffman codes 342 // I don't like these "magic constants" and co 342 // I don't like these "magic constants" and compute them on my own :-) 343 inline void generateHuffmanTable(const uint8_t 343 inline void generateHuffmanTable(const uint8_t numCodes[16], const uint8_t* values, BitCode result[256]) 344 { 344 { 345 // process all bitsizes 1 thru 16, no JPEG H 345 // process all bitsizes 1 thru 16, no JPEG Huffman code is allowed to exceed 16 bits 346 uint16_t huffmanCode = 0; 346 uint16_t huffmanCode = 0; 347 for (uint8_t numBits = 1; numBits <= 16; num 347 for (uint8_t numBits = 1; numBits <= 16; numBits++) 348 { 348 { 349 // ... and each code of these bitsizes 349 // ... and each code of these bitsizes 350 for (uint8_t i = 0; i < numCodes[numBits - 350 for (uint8_t i = 0; i < numCodes[numBits - 1]; i++) // note: numCodes array starts at zero, but smallest bitsize is 1 351 result[*values++] = BitCode(huffmanCode+ 351 result[*values++] = BitCode(huffmanCode++, numBits); 352 352 353 // next Huffman code needs to be one bit w 353 // next Huffman code needs to be one bit wider 354 huffmanCode <<= 1; 354 huffmanCode <<= 1; 355 } 355 } 356 } 356 } 357 357 358 // -------------------- externally visible cod 358 // -------------------- externally visible code -------------------- 359 359 360 // the only exported function ... 360 // the only exported function ... 361 inline bool writeJpeg(WRITE_ONE_BYTE output, v 361 inline bool writeJpeg(WRITE_ONE_BYTE output, void* tag,const void* pixels_, unsigned short width, unsigned short height, 362 bool isRGB, unsigned char quali 362 bool isRGB, unsigned char quality_, bool downsample, const char* comment) 363 { 363 { 364 // reject invalid pointers 364 // reject invalid pointers 365 if (output == 0/*nullptr*/ || pixels_ == 0/* 365 if (output == 0/*nullptr*/ || pixels_ == 0/*nullptr*/) 366 return false; 366 return false; 367 // check image format 367 // check image format 368 if (width == 0 || height == 0) 368 if (width == 0 || height == 0) 369 return false; 369 return false; 370 370 371 // number of components 371 // number of components 372 const uint16_t numComponents = isRGB ? 3 : 1 372 const uint16_t numComponents = isRGB ? 3 : 1; 373 // note: if there is just one component (=gr 373 // note: if there is just one component (=grayscale), then only luminance needs to be stored in the file 374 // thus everything related to chromina 374 // thus everything related to chrominance need not to be written to the JPEG 375 // I still compute a few things, like 375 // I still compute a few things, like quantization tables to avoid a complete code mess 376 376 377 // grayscale images can't be downsampled (be 377 // grayscale images can't be downsampled (because there are no Cb + Cr channels) 378 if (!isRGB) 378 if (!isRGB) 379 downsample = false; 379 downsample = false; 380 380 381 // wrapper for all output operations 381 // wrapper for all output operations 382 BitWriter bitWriter(output,tag); 382 BitWriter bitWriter(output,tag); 383 383 384 // //////////////////////////////////////// 384 // //////////////////////////////////////// 385 // JFIF headers 385 // JFIF headers 386 const uint8_t HeaderJfif[2+2+16] = 386 const uint8_t HeaderJfif[2+2+16] = 387 { 0xFF,0xD8, // SOI marker (star 387 { 0xFF,0xD8, // SOI marker (start of image) 388 0xFF,0xE0, // JFIF APP0 tag 388 0xFF,0xE0, // JFIF APP0 tag 389 0,16, // length: 16 bytes 389 0,16, // length: 16 bytes (14 bytes payload + 2 bytes for this length field) 390 'J','F','I','F',0, // JFIF identifier, 390 'J','F','I','F',0, // JFIF identifier, zero-terminated 391 1,1, // JFIF version 1.1 391 1,1, // JFIF version 1.1 392 0, // no density units 392 0, // no density units specified 393 0,1,0,1, // density: 1 pixel 393 0,1,0,1, // density: 1 pixel "per pixel" horizontally and vertically 394 0,0 }; // no thumbnail (si 394 0,0 }; // no thumbnail (size 0 x 0) 395 bitWriter << HeaderJfif; 395 bitWriter << HeaderJfif; 396 396 397 // //////////////////////////////////////// 397 // //////////////////////////////////////// 398 // comment (optional) 398 // comment (optional) 399 if (comment != 0/*nullptr*/) 399 if (comment != 0/*nullptr*/) 400 { 400 { 401 // look for zero terminator 401 // look for zero terminator 402 uint16_t length = 0; // = strlen(comment); 402 uint16_t length = 0; // = strlen(comment); 403 while (comment[length] != 0) 403 while (comment[length] != 0) 404 length++; 404 length++; 405 405 406 // write COM marker 406 // write COM marker 407 bitWriter.addMarker(0xFE, 2+length); // bl 407 bitWriter.addMarker(0xFE, 2+length); // block size is number of bytes (without zero terminator) + 2 bytes for this length field 408 // ... and write the comment itself 408 // ... and write the comment itself 409 for (uint16_t i = 0; i < length; i++) 409 for (uint16_t i = 0; i < length; i++) 410 bitWriter << comment[i]; 410 bitWriter << comment[i]; 411 } 411 } 412 412 413 // //////////////////////////////////////// 413 // //////////////////////////////////////// 414 // adjust quantization tables to desired qua 414 // adjust quantization tables to desired quality 415 415 416 // quality level must be in 1 ... 100 416 // quality level must be in 1 ... 100 417 uint16_t quality = clamp<uint16_t>(quality_, 417 uint16_t quality = clamp<uint16_t>(quality_, 1, 100); 418 // convert to an internal JPEG quality facto 418 // convert to an internal JPEG quality factor, formula taken from libjpeg 419 quality = quality < 50 ? 5000 / quality : 20 419 quality = quality < 50 ? 5000 / quality : 200 - quality * 2; 420 420 421 uint8_t quantLuminance [8*8]; 421 uint8_t quantLuminance [8*8]; 422 uint8_t quantChrominance[8*8]; 422 uint8_t quantChrominance[8*8]; 423 for (size_t i = 0; i < 8*8; i++) 423 for (size_t i = 0; i < 8*8; i++) 424 { 424 { 425 int luminance = (DefaultQuantLuminance 425 int luminance = (DefaultQuantLuminance [ZigZagInv[i]] * quality + 50) / 100; 426 int chrominance = (DefaultQuantChrominance 426 int chrominance = (DefaultQuantChrominance[ZigZagInv[i]] * quality + 50) / 100; 427 427 428 // clamp to 1..255 428 // clamp to 1..255 429 quantLuminance [i] = clamp(luminance, 1 429 quantLuminance [i] = clamp(luminance, 1, 255); 430 quantChrominance[i] = clamp(chrominance, 1 430 quantChrominance[i] = clamp(chrominance, 1, 255); 431 } 431 } 432 432 433 // write quantization tables 433 // write quantization tables 434 bitWriter.addMarker(0xDB, 2 + (isRGB ? 2 : 1 434 bitWriter.addMarker(0xDB, 2 + (isRGB ? 2 : 1) * (1 + 8*8)); // length: 65 bytes per table + 2 bytes for this length field 435 435 // each table has 64 entries and is preceded by an ID byte 436 436 437 bitWriter << 0x00 << quantLuminance; // 437 bitWriter << 0x00 << quantLuminance; // first quantization table 438 if (isRGB) 438 if (isRGB) 439 bitWriter << 0x01 << quantChrominance; // 439 bitWriter << 0x01 << quantChrominance; // second quantization table, only relevant for color images 440 440 441 // //////////////////////////////////////// 441 // //////////////////////////////////////// 442 // write image infos (SOF0 - start of frame) 442 // write image infos (SOF0 - start of frame) 443 bitWriter.addMarker(0xC0, 2+6+3*numComponent 443 bitWriter.addMarker(0xC0, 2+6+3*numComponents); // length: 6 bytes general info + 3 per channel + 2 bytes for this length field 444 444 445 // 8 bits per channel 445 // 8 bits per channel 446 bitWriter << 0x08 446 bitWriter << 0x08 447 // image dimensions (big-endian) 447 // image dimensions (big-endian) 448 << (height >> 8) << (height & 0xFF 448 << (height >> 8) << (height & 0xFF) 449 << (width >> 8) << (width & 0xFF 449 << (width >> 8) << (width & 0xFF); 450 450 451 // sampling and quantization tables for each 451 // sampling and quantization tables for each component 452 bitWriter << numComponents; // 1 compo 452 bitWriter << numComponents; // 1 component (grayscale, Y only) or 3 components (Y,Cb,Cr) 453 for (uint16_t id = 1; id <= numComponents; i 453 for (uint16_t id = 1; id <= numComponents; id++) 454 bitWriter << id // compone 454 bitWriter << id // component ID (Y=1, Cb=2, Cr=3) 455 // bitmasks for sampling: highest 4 bits: 455 // bitmasks for sampling: highest 4 bits: horizontal, lowest 4 bits: vertical 456 << (id == 1 && downsample ? 0x22 456 << (id == 1 && downsample ? 0x22 : 0x11) // 0x11 is default YCbCr 4:4:4 and 0x22 stands for YCbCr 4:2:0 457 << (id == 1 ? 0 : 1); // use qua 457 << (id == 1 ? 0 : 1); // use quantization table 0 for Y, table 1 for Cb and Cr 458 458 459 // //////////////////////////////////////// 459 // //////////////////////////////////////// 460 // Huffman tables 460 // Huffman tables 461 // DHT marker - define Huffman tables 461 // DHT marker - define Huffman tables 462 bitWriter.addMarker(0xC4, isRGB ? (2+208+208 462 bitWriter.addMarker(0xC4, isRGB ? (2+208+208) : (2+208)); 463 // 2 bytes for the 463 // 2 bytes for the length field, store chrominance only if needed 464 // 1+16+12 for 464 // 1+16+12 for the DC luminance 465 // 1+16+162 for 465 // 1+16+162 for the AC luminance (208 = 1+16+12 + 1+16+162) 466 // 1+16+12 for 466 // 1+16+12 for the DC chrominance 467 // 1+16+162 for 467 // 1+16+162 for the AC chrominance (208 = 1+16+12 + 1+16+162, same as above) 468 468 469 // store luminance's DC+AC Huffman table def 469 // store luminance's DC+AC Huffman table definitions 470 bitWriter << 0x00 // highest 4 bits: 0 => DC 470 bitWriter << 0x00 // highest 4 bits: 0 => DC, lowest 4 bits: 0 => Y (baseline) 471 << DcLuminanceCodesPerBitsize 471 << DcLuminanceCodesPerBitsize 472 << DcLuminanceValues; 472 << DcLuminanceValues; 473 bitWriter << 0x10 // highest 4 bits: 1 => AC 473 bitWriter << 0x10 // highest 4 bits: 1 => AC, lowest 4 bits: 0 => Y (baseline) 474 << AcLuminanceCodesPerBitsize 474 << AcLuminanceCodesPerBitsize 475 << AcLuminanceValues; 475 << AcLuminanceValues; 476 476 477 // compute actual Huffman code tables (see J 477 // compute actual Huffman code tables (see Jon's code for precalculated tables) 478 BitCode huffmanLuminanceDC[256]; 478 BitCode huffmanLuminanceDC[256]; 479 BitCode huffmanLuminanceAC[256]; 479 BitCode huffmanLuminanceAC[256]; 480 generateHuffmanTable(DcLuminanceCodesPerBits 480 generateHuffmanTable(DcLuminanceCodesPerBitsize, DcLuminanceValues, huffmanLuminanceDC); 481 generateHuffmanTable(AcLuminanceCodesPerBits 481 generateHuffmanTable(AcLuminanceCodesPerBitsize, AcLuminanceValues, huffmanLuminanceAC); 482 482 483 // chrominance is only relevant for color im 483 // chrominance is only relevant for color images 484 BitCode huffmanChrominanceDC[256]; 484 BitCode huffmanChrominanceDC[256]; 485 BitCode huffmanChrominanceAC[256]; 485 BitCode huffmanChrominanceAC[256]; 486 if (isRGB) 486 if (isRGB) 487 { 487 { 488 // store luminance's DC+AC Huffman table d 488 // store luminance's DC+AC Huffman table definitions 489 bitWriter << 0x01 // highest 4 bits: 0 => 489 bitWriter << 0x01 // highest 4 bits: 0 => DC, lowest 4 bits: 1 => Cr,Cb (baseline) 490 << DcChrominanceCodesPerBitsize 490 << DcChrominanceCodesPerBitsize 491 << DcChrominanceValues; 491 << DcChrominanceValues; 492 bitWriter << 0x11 // highest 4 bits: 1 => 492 bitWriter << 0x11 // highest 4 bits: 1 => AC, lowest 4 bits: 1 => Cr,Cb (baseline) 493 << AcChrominanceCodesPerBitsize 493 << AcChrominanceCodesPerBitsize 494 << AcChrominanceValues; 494 << AcChrominanceValues; 495 495 496 // compute actual Huffman code tables (see 496 // compute actual Huffman code tables (see Jon's code for precalculated tables) 497 generateHuffmanTable(DcChrominanceCodesPer 497 generateHuffmanTable(DcChrominanceCodesPerBitsize, DcChrominanceValues, huffmanChrominanceDC); 498 generateHuffmanTable(AcChrominanceCodesPer 498 generateHuffmanTable(AcChrominanceCodesPerBitsize, AcChrominanceValues, huffmanChrominanceAC); 499 } 499 } 500 500 501 // //////////////////////////////////////// 501 // //////////////////////////////////////// 502 // start of scan (there is only a single sca 502 // start of scan (there is only a single scan for baseline JPEGs) 503 bitWriter.addMarker(0xDA, 2+1+2*numComponent 503 bitWriter.addMarker(0xDA, 2+1+2*numComponents+3); // 2 bytes for the length field, 1 byte for number of components, 504 504 // then 2 bytes for each component and 3 bytes for spectral selection 505 505 506 // assign Huffman tables to each component 506 // assign Huffman tables to each component 507 bitWriter << numComponents; 507 bitWriter << numComponents; 508 for (uint16_t id = 1; id <= numComponents; i 508 for (uint16_t id = 1; id <= numComponents; id++) 509 // highest 4 bits: DC Huffman table, lowes 509 // highest 4 bits: DC Huffman table, lowest 4 bits: AC Huffman table 510 bitWriter << id << (id == 1 ? 0x00 : 0x11) 510 bitWriter << id << (id == 1 ? 0x00 : 0x11); // Y: tables 0 for DC and AC; Cb + Cr: tables 1 for DC and AC 511 511 512 // constant values for our baseline JPEGs (w 512 // constant values for our baseline JPEGs (which have a single sequential scan) 513 static const uint8_t Spectral[3] = { 0, 63, 513 static const uint8_t Spectral[3] = { 0, 63, 0 }; // spectral selection: must be from 0 to 63; successive approximation must be 0 514 bitWriter << Spectral; 514 bitWriter << Spectral; 515 515 516 // //////////////////////////////////////// 516 // //////////////////////////////////////// 517 // adjust quantization tables with AAN scali 517 // adjust quantization tables with AAN scaling factors to simplify DCT 518 float scaledLuminance [8*8]; 518 float scaledLuminance [8*8]; 519 float scaledChrominance[8*8]; 519 float scaledChrominance[8*8]; 520 for (size_t i = 0; i < 8*8; i++) 520 for (size_t i = 0; i < 8*8; i++) 521 { 521 { 522 size_t row = ZigZagInv[i] / 8; // same 522 size_t row = ZigZagInv[i] / 8; // same as ZigZagInv[i] >> 3 523 size_t column = ZigZagInv[i] % 8; // same 523 size_t column = ZigZagInv[i] % 8; // same as ZigZagInv[i] & 7 524 524 525 // scaling constants for AAN DCT algorithm 525 // scaling constants for AAN DCT algorithm: AanScaleFactors[0] = 1, AanScaleFactors[k=1..7] = cos(k*PI/16) * sqrt(2) 526 static const float AanScaleFactors[8] = { 526 static const float AanScaleFactors[8] = { 1, 1.387039845f, 1.306562965f, 1.175875602f, 1, 0.785694958f, 0.541196100f, 0.275899379f }; 527 float factor = 1 / (AanScaleFactors[row] * 527 float factor = 1 / (AanScaleFactors[row] * AanScaleFactors[column] * 8); 528 scaledLuminance [ZigZagInv[i]] = factor / 528 scaledLuminance [ZigZagInv[i]] = factor / quantLuminance [i]; 529 scaledChrominance[ZigZagInv[i]] = factor / 529 scaledChrominance[ZigZagInv[i]] = factor / quantChrominance[i]; 530 // if you really want JPEGs that are bitwi 530 // if you really want JPEGs that are bitwise identical to Jon Olick's code then you need slightly different formulas (note: sqrt(8) = 2.828427125f) 531 //static const float aasf[] = { 1.0f * 2.8 531 //static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; // line 240 of jo_jpeg.cpp 532 //scaledLuminance [ZigZagInv[i]] = 1 / (q 532 //scaledLuminance [ZigZagInv[i]] = 1 / (quantLuminance [i] * aasf[row] * aasf[column]); // lines 266-267 of jo_jpeg.cpp 533 //scaledChrominance[ZigZagInv[i]] = 1 / (q 533 //scaledChrominance[ZigZagInv[i]] = 1 / (quantChrominance[i] * aasf[row] * aasf[column]); 534 } 534 } 535 535 536 // //////////////////////////////////////// 536 // //////////////////////////////////////// 537 // precompute JPEG codewords for quantized D 537 // precompute JPEG codewords for quantized DCT 538 BitCode codewordsArray[2 * CodeWordLimit]; 538 BitCode codewordsArray[2 * CodeWordLimit]; // note: quantized[i] is found at codewordsArray[quantized[i] + CodeWordLimit] 539 BitCode* codewords = &codewordsArray[CodeWor 539 BitCode* codewords = &codewordsArray[CodeWordLimit]; // allow negative indices, so quantized[i] is at codewords[quantized[i]] 540 uint8_t numBits = 1; // each codeword has at 540 uint8_t numBits = 1; // each codeword has at least one bit (value == 0 is undefined) 541 int32_t mask = 1; // mask is always 2^num 541 int32_t mask = 1; // mask is always 2^numBits - 1, initial value 2^1-1 = 2-1 = 1 542 for (int16_t value = 1; value < CodeWordLimi 542 for (int16_t value = 1; value < CodeWordLimit; value++) 543 { 543 { 544 // numBits = position of highest set bit ( 544 // numBits = position of highest set bit (ignoring the sign) 545 // mask = (2^numBits) - 1 545 // mask = (2^numBits) - 1 546 if (value > mask) // one more bit ? 546 if (value > mask) // one more bit ? 547 { 547 { 548 numBits++; 548 numBits++; 549 mask = (mask << 1) | 1; // append a set 549 mask = (mask << 1) | 1; // append a set bit 550 } 550 } 551 codewords[-value] = BitCode(mask - value, 551 codewords[-value] = BitCode(mask - value, numBits); // note that I use a negative index => codewords[-value] = codewordsArray[CodeWordLimit value] 552 codewords[+value] = BitCode( value, 552 codewords[+value] = BitCode( value, numBits); 553 } 553 } 554 554 555 // just convert image data from void* 555 // just convert image data from void* 556 const uint8_t* pixels = (const uint8_t*)pixe 556 const uint8_t* pixels = (const uint8_t*)pixels_; 557 557 558 // the next two variables are frequently use 558 // the next two variables are frequently used when checking for image borders 559 const unsigned short maxWidth = width - 1; 559 const unsigned short maxWidth = width - 1; // "last row" 560 const unsigned short maxHeight = height - 1; 560 const unsigned short maxHeight = height - 1; // "bottom line" 561 561 562 // process MCUs (minimum codes units) => ima 562 // process MCUs (minimum codes units) => image is subdivided into a grid of 8x8 or 16x16 tiles 563 const unsigned short sampling = downsample ? 563 const unsigned short sampling = downsample ? 2 : 1; // 1x1 or 2x2 sampling 564 const unsigned short mcuSize = 8 * sampling 564 const unsigned short mcuSize = 8 * sampling; 565 565 566 // average color of the previous MCU 566 // average color of the previous MCU 567 int16_t lastYDC = 0, lastCbDC = 0, lastCrDC 567 int16_t lastYDC = 0, lastCbDC = 0, lastCrDC = 0; 568 // convert from RGB to YCbCr 568 // convert from RGB to YCbCr 569 float Y[8][8], Cb[8][8], Cr[8][8]; 569 float Y[8][8], Cb[8][8], Cr[8][8]; 570 570 571 for (unsigned short mcuY = 0; mcuY < height; 571 for (unsigned short mcuY = 0; mcuY < height; mcuY += mcuSize) // each step is either 8 or 16 (=mcuSize) 572 for (unsigned short mcuX = 0; mcuX < width 572 for (unsigned short mcuX = 0; mcuX < width; mcuX += mcuSize) 573 { 573 { 574 // YCbCr 4:4:4 format: each MCU is a 8x8 574 // YCbCr 4:4:4 format: each MCU is a 8x8 block - the same applies to grayscale images, too 575 // YCbCr 4:2:0 format: each MCU represen 575 // YCbCr 4:2:0 format: each MCU represents a 16x16 block, stored as 4x 8x8 Y-blocks plus 1x 8x8 Cb and 1x 8x8 Cr block) 576 for (unsigned short blockY = 0; blockY < 576 for (unsigned short blockY = 0; blockY < mcuSize; blockY += 8) // iterate once (YCbCr444 and grayscale) or twice (YCbCr420) 577 for (unsigned short blockX = 0; blockX 577 for (unsigned short blockX = 0; blockX < mcuSize; blockX += 8) 578 { 578 { 579 // now we finally have an 8x8 block 579 // now we finally have an 8x8 block ... 580 for (unsigned short deltaY = 0; delt 580 for (unsigned short deltaY = 0; deltaY < 8; deltaY++) 581 { 581 { 582 size_t column = minimum(uint16_t(m 582 size_t column = minimum(uint16_t(mcuX + blockX) , maxWidth); // must not exceed image borders, replicate last row/column if needed 583 size_t row = minimum(uint16_t(m 583 size_t row = minimum(uint16_t(mcuY + blockY + deltaY), maxHeight); 584 for (size_t deltaX = 0; deltaX < 8 584 for (size_t deltaX = 0; deltaX < 8; deltaX++) 585 { 585 { 586 // find actual pixel position wi 586 // find actual pixel position within the current image 587 size_t pixelPos = row * int(widt 587 size_t pixelPos = row * int(width) + column; // the cast ensures that we don't run into multiplication overflows 588 if (column < maxWidth) 588 if (column < maxWidth) 589 column++; 589 column++; 590 590 591 // grayscale images have solely 591 // grayscale images have solely a Y channel which can be easily derived from the input pixel by shifting it by 128 592 if (!isRGB) 592 if (!isRGB) 593 { 593 { 594 Y[deltaY][deltaX] = pixels[pix 594 Y[deltaY][deltaX] = pixels[pixelPos] - 128.f; 595 continue; 595 continue; 596 } 596 } 597 597 598 // RGB: 3 bytes per pixel (where 598 // RGB: 3 bytes per pixel (whereas grayscale images have only 1 byte per pixel) 599 uint8_t r = pixels[3 * pixelPos 599 uint8_t r = pixels[3 * pixelPos ]; 600 uint8_t g = pixels[3 * pixelPos 600 uint8_t g = pixels[3 * pixelPos + 1]; 601 uint8_t b = pixels[3 * pixelPos 601 uint8_t b = pixels[3 * pixelPos + 2]; 602 602 603 Y [deltaY][deltaX] = rgb2y (r, 603 Y [deltaY][deltaX] = rgb2y (r, g, b) - 128; // again, the JPEG standard requires Y to be shifted by 128 604 // YCbCr444 is easy - the more c 604 // YCbCr444 is easy - the more complex YCbCr420 has to be computed about 20 lines below in a second pass 605 if (!downsample) 605 if (!downsample) 606 { 606 { 607 Cb[deltaY][deltaX] = rgb2cb(r, 607 Cb[deltaY][deltaX] = rgb2cb(r, g, b); // standard RGB-to-YCbCr conversion 608 Cr[deltaY][deltaX] = rgb2cr(r, 608 Cr[deltaY][deltaX] = rgb2cr(r, g, b); 609 } 609 } 610 } 610 } 611 } 611 } 612 612 613 // encode Y channel 613 // encode Y channel 614 lastYDC = encodeBlock(bitWriter, Y, sc 614 lastYDC = encodeBlock(bitWriter, Y, scaledLuminance, lastYDC, huffmanLuminanceDC, huffmanLuminanceAC, codewords); 615 // Cb and Cr are encoded about 50 line 615 // Cb and Cr are encoded about 50 lines below 616 } 616 } 617 617 618 // grayscale images don't need any Cb an 618 // grayscale images don't need any Cb and Cr information 619 if (!isRGB) 619 if (!isRGB) 620 continue; 620 continue; 621 621 622 // ///////////////////////////////////// 622 // //////////////////////////////////////// 623 // the following lines are only relevant 623 // the following lines are only relevant for YCbCr420: 624 // average/downsample chrominance of fou 624 // average/downsample chrominance of four pixels while respecting the image borders 625 if (downsample) 625 if (downsample) 626 for (short deltaY = 7; downsample && d 626 for (short deltaY = 7; downsample && deltaY >= 0; deltaY--) // iterating loop in reverse increases cache read efficiency 627 { 627 { 628 size_t row = minimum(uint16_t(m 628 size_t row = minimum(uint16_t(mcuY + 2*deltaY), maxHeight); // each deltaX/Y step covers a 2x2 area 629 size_t column = mcuX; 629 size_t column = mcuX; // column is updated inside next loop 630 size_t pixelPos = (row * int(width) 630 size_t pixelPos = (row * int(width) + column) * 3; // numComponents = 3 631 631 632 // deltas (in bytes) to next row / c 632 // deltas (in bytes) to next row / column, must not exceed image borders 633 size_t rowStep = (row < maxHei 633 size_t rowStep = (row < maxHeight) ? 3 * int(width) : 0; // always numComponents*width except for bottom line 634 size_t columnStep = (column < maxWid 634 size_t columnStep = (column < maxWidth ) ? 3 : 0; // always numComponents except for rightmost pixel 635 635 636 for (short deltaX = 0; deltaX < 8; d 636 for (short deltaX = 0; deltaX < 8; deltaX++) 637 { 637 { 638 // let's add all four samples (2x2 638 // let's add all four samples (2x2 area) 639 size_t right = pixelPos + colu 639 size_t right = pixelPos + columnStep; 640 size_t down = pixelPos + 640 size_t down = pixelPos + rowStep; 641 size_t downRight = pixelPos + colu 641 size_t downRight = pixelPos + columnStep + rowStep; 642 642 643 // note: cast from 8 bits to >8 bi 643 // note: cast from 8 bits to >8 bits to avoid overflows when adding 644 short r = short(pixels[pixelPos 644 short r = short(pixels[pixelPos ]) + pixels[right ] + pixels[down ] + pixels[downRight ]; 645 short g = short(pixels[pixelPos + 645 short g = short(pixels[pixelPos + 1]) + pixels[right + 1] + pixels[down + 1] + pixels[downRight + 1]; 646 short b = short(pixels[pixelPos + 646 short b = short(pixels[pixelPos + 2]) + pixels[right + 2] + pixels[down + 2] + pixels[downRight + 2]; 647 647 648 // convert to Cb and Cr 648 // convert to Cb and Cr 649 Cb[deltaY][deltaX] = rgb2cb(r, g, 649 Cb[deltaY][deltaX] = rgb2cb(r, g, b) / 4; // I still have to divide r,g,b by 4 to get their average values 650 Cr[deltaY][deltaX] = rgb2cr(r, g, 650 Cr[deltaY][deltaX] = rgb2cr(r, g, b) / 4; // it's a bit faster if done AFTER CbCr conversion 651 651 652 // step forward to next 2x2 area 652 // step forward to next 2x2 area 653 pixelPos += 2*3; // 2 pixels => 6 653 pixelPos += 2*3; // 2 pixels => 6 bytes (2*numComponents) 654 column += 2; 654 column += 2; 655 655 656 // reached right border ? 656 // reached right border ? 657 if (column >= maxWidth) 657 if (column >= maxWidth) 658 { 658 { 659 columnStep = 0; 659 columnStep = 0; 660 pixelPos = ((row + 1) * int(widt 660 pixelPos = ((row + 1) * int(width) - 1) * 3; // same as (row * width + maxWidth) * numComponents => current's row last pixel 661 } 661 } 662 } 662 } 663 } // end of YCbCr420 code for Cb and C 663 } // end of YCbCr420 code for Cb and Cr 664 664 665 // encode Cb and Cr 665 // encode Cb and Cr 666 lastCbDC = encodeBlock(bitWriter, Cb, sc 666 lastCbDC = encodeBlock(bitWriter, Cb, scaledChrominance, lastCbDC, huffmanChrominanceDC, huffmanChrominanceAC, codewords); 667 lastCrDC = encodeBlock(bitWriter, Cr, sc 667 lastCrDC = encodeBlock(bitWriter, Cr, scaledChrominance, lastCrDC, huffmanChrominanceDC, huffmanChrominanceAC, codewords); 668 } 668 } 669 669 670 bitWriter.flush(); // now image is completel 670 bitWriter.flush(); // now image is completely encoded, write any bits still left in the buffer 671 671 672 // /////////////////////////// 672 // /////////////////////////// 673 // EOI marker 673 // EOI marker 674 bitWriter << 0xFF << 0xD9; // this marker ha 674 bitWriter << 0xFF << 0xD9; // this marker has no length, therefore I can't use addMarker() 675 return true; 675 return true; 676 } // writeJpeg() 676 } // writeJpeg() 677 677 678 }} 678 }} 679 679