Diff markup

Differences between /externals/g4tools/include/tools/toojpeg.icc (Version 11.3.0) and /externals/g4tools/include/tools/toojpeg.icc (Version 11.1.1)

  1                                                     1 
  2 // G.Barrand: pure header version of toojpeg f      2 // G.Barrand: pure header version of toojpeg found at https://github.com/stbrumme/toojpeg
  3                                                     3 
  4 // ///////////////////////////////////////////      4 // //////////////////////////////////////////////////////////
  5 // toojpeg.cpp                                      5 // toojpeg.cpp
  6 // written by Stephan Brumme, 2018-2019             6 // written by Stephan Brumme, 2018-2019
  7 // see https://create.stephan-brumme.com/toojp      7 // see https://create.stephan-brumme.com/toojpeg/
  8 //                                                  8 //
  9                                                     9 
 10 #include <cstddef> //size_t                        10 #include <cstddef> //size_t
 11                                                    11 
 12 // - the "official" specifications: https://ww     12 // - the "official" specifications: https://www.w3.org/Graphics/JPEG/itu-t81.pdf and https://www.w3.org/Graphics/JPEG/jfif3.pdf
 13 // - Wikipedia has a short description of the      13 // - Wikipedia has a short description of the JFIF/JPEG file format: https://en.wikipedia.org/wiki/JPEG_File_Interchange_Format
 14 // - the popular STB Image library includes Jo     14 // - the popular STB Image library includes Jon's JPEG encoder as well: https://github.com/nothings/stb/blob/master/stb_image_write.h
 15 // - the most readable JPEG book (from a devel     15 // - the most readable JPEG book (from a developer's perspective) is Miano's "Compressed Image File Formats" (1999, ISBN 0-201-60443-4),
 16 //   used copies are really cheap nowadays and     16 //   used copies are really cheap nowadays and include a CD with C++ sources as well (plus great format descriptions of GIF & PNG)
 17 // - much more detailled is Mitchell/Pennebake     17 // - much more detailled is Mitchell/Pennebaker's "JPEG: Still Image Data Compression Standard" (1993, ISBN 0-442-01272-1)
 18 //   which contains the official JPEG standard     18 //   which contains the official JPEG standard, too - fun fact: I bought a signed copy in a second-hand store without noticing
 19                                                    19 
 20 namespace tools {                                  20 namespace tools {
 21 namespace toojpeg {                                21 namespace toojpeg {
 22 // ////////////////////////////////////////        22 // ////////////////////////////////////////
 23 // data types                                      23 // data types
 24 typedef unsigned char uint8_t;                     24 typedef unsigned char uint8_t;
 25 typedef unsigned short uint16_t;                   25 typedef unsigned short uint16_t;
 26 typedef short int16_t;                             26 typedef short int16_t;
 27 typedef int int32_t; // at least four bytes        27 typedef int int32_t; // at least four bytes
 28                                                    28 
 29 // ////////////////////////////////////////        29 // ////////////////////////////////////////
 30 // constants                                       30 // constants
 31                                                    31 
 32 // quantization tables from JPEG Standard, Ann     32 // quantization tables from JPEG Standard, Annex K
 33 const uint8_t DefaultQuantLuminance[8*8] =         33 const uint8_t DefaultQuantLuminance[8*8] =
 34     { 16, 11, 10, 16, 24, 40, 51, 61, // there     34     { 16, 11, 10, 16, 24, 40, 51, 61, // there are a few experts proposing slightly more efficient values,
 35       12, 12, 14, 19, 26, 58, 60, 55, // e.g.      35       12, 12, 14, 19, 26, 58, 60, 55, // e.g. https://www.imagemagick.org/discourse-server/viewtopic.php?t=20333
 36       14, 13, 16, 24, 40, 57, 69, 56, // btw:      36       14, 13, 16, 24, 40, 57, 69, 56, // btw: Google's Guetzli project optimizes the quantization tables per image
 37       14, 17, 22, 29, 51, 87, 80, 62,              37       14, 17, 22, 29, 51, 87, 80, 62,
 38       18, 22, 37, 56, 68,109,103, 77,              38       18, 22, 37, 56, 68,109,103, 77,
 39       24, 35, 55, 64, 81,104,113, 92,              39       24, 35, 55, 64, 81,104,113, 92,
 40       49, 64, 78, 87,103,121,120,101,              40       49, 64, 78, 87,103,121,120,101,
 41       72, 92, 95, 98,112,100,103, 99 };            41       72, 92, 95, 98,112,100,103, 99 };
 42 const uint8_t DefaultQuantChrominance[8*8] =       42 const uint8_t DefaultQuantChrominance[8*8] =
 43     { 17, 18, 24, 47, 99, 99, 99, 99,              43     { 17, 18, 24, 47, 99, 99, 99, 99,
 44       18, 21, 26, 66, 99, 99, 99, 99,              44       18, 21, 26, 66, 99, 99, 99, 99,
 45       24, 26, 56, 99, 99, 99, 99, 99,              45       24, 26, 56, 99, 99, 99, 99, 99,
 46       47, 66, 99, 99, 99, 99, 99, 99,              46       47, 66, 99, 99, 99, 99, 99, 99,
 47       99, 99, 99, 99, 99, 99, 99, 99,              47       99, 99, 99, 99, 99, 99, 99, 99,
 48       99, 99, 99, 99, 99, 99, 99, 99,              48       99, 99, 99, 99, 99, 99, 99, 99,
 49       99, 99, 99, 99, 99, 99, 99, 99,              49       99, 99, 99, 99, 99, 99, 99, 99,
 50       99, 99, 99, 99, 99, 99, 99, 99 };            50       99, 99, 99, 99, 99, 99, 99, 99 };
 51                                                    51 
 52 // 8x8 blocks are processed in zig-zag order       52 // 8x8 blocks are processed in zig-zag order
 53 // most encoders use a zig-zag "forward" table     53 // most encoders use a zig-zag "forward" table, I switched to its inverse for performance reasons
 54 // note: ZigZagInv[ZigZag[i]] = i                  54 // note: ZigZagInv[ZigZag[i]] = i
 55 const uint8_t ZigZagInv[8*8] =                     55 const uint8_t ZigZagInv[8*8] =
 56     {  0, 1, 8,16, 9, 2, 3,10,   // ZigZag[] =     56     {  0, 1, 8,16, 9, 2, 3,10,   // ZigZag[] =  0, 1, 5, 6,14,15,27,28,
 57       17,24,32,25,18,11, 4, 5,   //                57       17,24,32,25,18,11, 4, 5,   //             2, 4, 7,13,16,26,29,42,
 58       12,19,26,33,40,48,41,34,   //                58       12,19,26,33,40,48,41,34,   //             3, 8,12,17,25,30,41,43,
 59       27,20,13, 6, 7,14,21,28,   //                59       27,20,13, 6, 7,14,21,28,   //             9,11,18,24,31,40,44,53,
 60       35,42,49,56,57,50,43,36,   //                60       35,42,49,56,57,50,43,36,   //            10,19,23,32,39,45,52,54,
 61       29,22,15,23,30,37,44,51,   //                61       29,22,15,23,30,37,44,51,   //            20,22,33,38,46,51,55,60,
 62       58,59,52,45,38,31,39,46,   //                62       58,59,52,45,38,31,39,46,   //            21,34,37,47,50,56,59,61,
 63       53,60,61,54,47,55,62,63 }; //                63       53,60,61,54,47,55,62,63 }; //            35,36,48,49,57,58,62,63
 64                                                    64 
 65 // static Huffman code tables from JPEG standa     65 // static Huffman code tables from JPEG standard Annex K
 66 // - CodesPerBitsize tables define how many Hu     66 // - CodesPerBitsize tables define how many Huffman codes will have a certain bitsize (plus 1 because there nothing with zero bits),
 67 //   e.g. DcLuminanceCodesPerBitsize[2] = 5 be     67 //   e.g. DcLuminanceCodesPerBitsize[2] = 5 because there are 5 Huffman codes being 2+1=3 bits long
 68 // - Values tables are a list of values ordere     68 // - Values tables are a list of values ordered by their Huffman code bitsize,
 69 //   e.g. AcLuminanceValues => Huffman(0x01,0x     69 //   e.g. AcLuminanceValues => Huffman(0x01,0x02 and 0x03) will have 2 bits, Huffman(0x00) will have 3 bits, Huffman(0x04,0x11 and 0x05) will have 4 bits, ...
 70                                                    70 
 71 // Huffman definitions for first DC/AC tables      71 // Huffman definitions for first DC/AC tables (luminance / Y channel)
 72 const uint8_t DcLuminanceCodesPerBitsize[16]       72 const uint8_t DcLuminanceCodesPerBitsize[16]   = { 0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 };   // sum = 12
 73 const uint8_t DcLuminanceValues         [12]       73 const uint8_t DcLuminanceValues         [12]   = { 0,1,2,3,4,5,6,7,8,9,10,11 };         // => 12 codes
 74 const uint8_t AcLuminanceCodesPerBitsize[16]       74 const uint8_t AcLuminanceCodesPerBitsize[16]   = { 0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,125 }; // sum = 162
 75 const uint8_t AcLuminanceValues        [162]       75 const uint8_t AcLuminanceValues        [162]   =                                        // => 162 codes
 76     { 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,     76     { 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08, // 16*10+2 symbols because
 77       0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0,     77       0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0,0x24,0x33,0x62,0x72,0x82,0x09,0x0A,0x16,0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28, // upper 4 bits can be 0..F
 78       0x29,0x2A,0x34,0x35,0x36,0x37,0x38,0x39,     78       0x29,0x2A,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,0x59, // while lower 4 bits can be 1..A
 79       0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,     79       0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89, // plus two special codes 0x00 and 0xF0
 80       0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,     80       0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6, // order of these symbols was determined empirically by JPEG committee
 81       0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,     81       0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xE1,0xE2,
 82       0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,     82       0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA };
 83 // Huffman definitions for second DC/AC tables     83 // Huffman definitions for second DC/AC tables (chrominance / Cb and Cr channels)
 84 const uint8_t DcChrominanceCodesPerBitsize[16]     84 const uint8_t DcChrominanceCodesPerBitsize[16] = { 0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };   // sum = 12
 85 const uint8_t DcChrominanceValues         [12]     85 const uint8_t DcChrominanceValues         [12] = { 0,1,2,3,4,5,6,7,8,9,10,11 };         // => 12 codes (identical to DcLuminanceValues)
 86 const uint8_t AcChrominanceCodesPerBitsize[16]     86 const uint8_t AcChrominanceCodesPerBitsize[16] = { 0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,119 }; // sum = 162
 87 const uint8_t AcChrominanceValues        [162]     87 const uint8_t AcChrominanceValues        [162] =                                        // => 162 codes
 88     { 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,     88     { 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, // same number of symbol, just different order
 89       0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0,     89       0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0,0x15,0x62,0x72,0xD1,0x0A,0x16,0x24,0x34,0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26, // (which is more efficient for AC coding)
 90       0x27,0x28,0x29,0x2A,0x35,0x36,0x37,0x38,     90       0x27,0x28,0x29,0x2A,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,
 91       0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,     91       0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87,
 92       0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,     92       0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,
 93       0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,     93       0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,
 94       0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,     94       0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA };
 95 const int16_t CodeWordLimit = 2048; // +/-2^11     95 const int16_t CodeWordLimit = 2048; // +/-2^11, maximum value after DCT
 96                                                    96 
 97 // ////////////////////////////////////////        97 // ////////////////////////////////////////
 98 // structs                                         98 // structs
 99                                                    99 
100 // represent a single Huffman code                100 // represent a single Huffman code
101 struct BitCode                                    101 struct BitCode
102 {                                                 102 {
103   //BitCode() = default; // undefined state, m    103   //BitCode() = default; // undefined state, must be initialized at a later time
104   BitCode():code(0),numBits(0) {}                 104   BitCode():code(0),numBits(0) {}
105   BitCode(const BitCode& a_from):code(a_from.c    105   BitCode(const BitCode& a_from):code(a_from.code),numBits(a_from.numBits) {}
106   BitCode& operator=(const BitCode& a_from) {     106   BitCode& operator=(const BitCode& a_from) {
107     code = a_from.code;                           107     code = a_from.code;
108     numBits = a_from.numBits;                     108     numBits = a_from.numBits;
109     return *this;                                 109     return *this;
110   }                                               110   }    
111                                                   111   
112   BitCode(uint16_t code_, uint8_t numBits_)       112   BitCode(uint16_t code_, uint8_t numBits_)
113   : code(code_), numBits(numBits_) {}             113   : code(code_), numBits(numBits_) {}
114   uint16_t code;       // JPEG's Huffman codes    114   uint16_t code;       // JPEG's Huffman codes are limited to 16 bits
115   uint8_t  numBits;    // number of valid bits    115   uint8_t  numBits;    // number of valid bits
116 };                                                116 };
117                                                   117 
118 // wrapper for bit output operations              118 // wrapper for bit output operations
119 struct BitWriter                                  119 struct BitWriter
120 {                                                 120 {
121   // user-supplied callback that writes/stores    121   // user-supplied callback that writes/stores one byte
122   WRITE_ONE_BYTE output;                          122   WRITE_ONE_BYTE output;
123   void* tag;                                      123   void* tag;
124   // initialize writer                            124   // initialize writer
125   explicit BitWriter(WRITE_ONE_BYTE output_,vo    125   explicit BitWriter(WRITE_ONE_BYTE output_,void* tag_) : output(output_),tag(tag_) {
126     buffer.data = 0;                              126     buffer.data = 0;
127     buffer.numBits = 0;                           127     buffer.numBits = 0;
128   }                                               128   }
129                                                   129 
130   // store the most recently encoded bits that    130   // store the most recently encoded bits that are not written yet
131   struct BitBuffer                                131   struct BitBuffer
132   {                                               132   {
133     int32_t data    /*= 0*/; // actually only     133     int32_t data    /*= 0*/; // actually only at most 24 bits are used
134     uint8_t numBits /*= 0*/; // number of vali    134     uint8_t numBits /*= 0*/; // number of valid bits (the right-most bits)
135   } buffer;                                       135   } buffer;
136                                                   136 
137   // write Huffman bits stored in BitCode, kee    137   // write Huffman bits stored in BitCode, keep excess bits in BitBuffer
138   BitWriter& operator<<(const BitCode& data)      138   BitWriter& operator<<(const BitCode& data)
139   {                                               139   {
140     // append the new bits to those bits lefto    140     // append the new bits to those bits leftover from previous call(s)
141     buffer.numBits += data.numBits;               141     buffer.numBits += data.numBits;
142     buffer.data   <<= data.numBits;               142     buffer.data   <<= data.numBits;
143     buffer.data    |= data.code;                  143     buffer.data    |= data.code;
144                                                   144 
145     // write all "full" bytes                     145     // write all "full" bytes
146     while (buffer.numBits >= 8)                   146     while (buffer.numBits >= 8)
147     {                                             147     {
148       // extract highest 8 bits                   148       // extract highest 8 bits
149       buffer.numBits -= 8;                        149       buffer.numBits -= 8;
150       uint8_t oneByte = uint8_t(buffer.data >>    150       uint8_t oneByte = uint8_t(buffer.data >> buffer.numBits);
151       output(oneByte,tag);                        151       output(oneByte,tag);
152                                                   152 
153       if (oneByte == 0xFF) // 0xFF has a speci    153       if (oneByte == 0xFF) // 0xFF has a special meaning for JPEGs (it's a block marker)
154         output(0,tag);         // therefore pa    154         output(0,tag);         // therefore pad a zero to indicate "nope, this one ain't a marker, it's just a coincidence"
155                                                   155 
156       // note: I don't clear those written bit    156       // note: I don't clear those written bits, therefore buffer.bits may contain garbage in the high bits
157       //       if you really want to "clean up    157       //       if you really want to "clean up" (e.g. for debugging purposes) then uncomment the following line
158       //buffer.bits &= (1 << buffer.numBits) -    158       //buffer.bits &= (1 << buffer.numBits) - 1;
159     }                                             159     }
160     return *this;                                 160     return *this;
161   }                                               161   }
162                                                   162 
163   // write all non-yet-written bits, fill gaps    163   // write all non-yet-written bits, fill gaps with 1s (that's a strange JPEG thing)
164   void flush()                                    164   void flush()
165   {                                               165   {
166     // at most seven set bits needed to "fill"    166     // at most seven set bits needed to "fill" the last byte: 0x7F = binary 0111 1111
167     *this << BitCode(0x7F, 7); // I should set    167     *this << BitCode(0x7F, 7); // I should set buffer.numBits = 0 but since there are no single bits written after flush() I can safely ignore it
168   }                                               168   }
169                                                   169 
170   // NOTE: all the following BitWriter functio    170   // NOTE: all the following BitWriter functions IGNORE the BitBuffer and write straight to output !
171   // write a single byte                          171   // write a single byte
172   BitWriter& operator<<(uint8_t oneByte)          172   BitWriter& operator<<(uint8_t oneByte)
173   {                                               173   {
174     output(oneByte,tag);                          174     output(oneByte,tag);
175     return *this;                                 175     return *this;
176   }                                               176   }
177                                                   177 
178   // write an array of bytes                      178   // write an array of bytes
179   template <typename T, int Size>                 179   template <typename T, int Size>
180   BitWriter& operator<<(T (&manyBytes)[Size])     180   BitWriter& operator<<(T (&manyBytes)[Size])
181   {                                               181   {
182   //for (auto c : manyBytes)                      182   //for (auto c : manyBytes)
183   //  output(c);                                  183   //  output(c);
184     for(size_t i=0;i<Size;i++) output(manyByte    184     for(size_t i=0;i<Size;i++) output(manyBytes[i],tag);
185     return *this;                                 185     return *this;
186   }                                               186   }
187                                                   187 
188   // start a new JFIF block                       188   // start a new JFIF block
189   void addMarker(uint8_t id, uint16_t length)     189   void addMarker(uint8_t id, uint16_t length)
190   {                                               190   {
191     output(0xFF,tag); output(id,tag);     // I    191     output(0xFF,tag); output(id,tag);     // ID, always preceded by 0xFF
192     output(uint8_t(length >> 8),tag); // lengt    192     output(uint8_t(length >> 8),tag); // length of the block (big-endian, includes the 2 length bytes as well)
193     output(uint8_t(length & 0xFF),tag);           193     output(uint8_t(length & 0xFF),tag);
194   }                                               194   }
195 };                                                195 };
196                                                   196 
197 // ////////////////////////////////////////       197 // ////////////////////////////////////////
198 // functions / templates                          198 // functions / templates
199                                                   199 
200 // same as std::min()                             200 // same as std::min()
201 template <typename Number>                        201 template <typename Number>
202 inline Number minimum(Number value, Number max    202 inline Number minimum(Number value, Number maximum)
203 {                                                 203 {
204   return value <= maximum ? value : maximum;      204   return value <= maximum ? value : maximum;
205 }                                                 205 }
206                                                   206 
207 // restrict a value to the interval [minimum,     207 // restrict a value to the interval [minimum, maximum]
208 template <typename Number, typename Limit>        208 template <typename Number, typename Limit>
209 inline Number clamp(Number value, Limit minVal    209 inline Number clamp(Number value, Limit minValue, Limit maxValue)
210 {                                                 210 {
211   if (value <= minValue) return minValue; // n    211   if (value <= minValue) return minValue; // never smaller than the minimum
212   if (value >= maxValue) return maxValue; // n    212   if (value >= maxValue) return maxValue; // never bigger  than the maximum
213   return value;                           // v    213   return value;                           // value was inside interval, keep it
214 }                                                 214 }
215                                                   215 
216 // convert from RGB to YCbCr, constants are si    216 // convert from RGB to YCbCr, constants are similar to ITU-R, see https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion
217 inline float rgb2y (float r, float g, float b)    217 inline float rgb2y (float r, float g, float b) { return +0.299f   * r +0.587f   * g +0.114f   * b; }
218 inline float rgb2cb(float r, float g, float b)    218 inline float rgb2cb(float r, float g, float b) { return -0.16874f * r -0.33126f * g +0.5f     * b; }
219 inline float rgb2cr(float r, float g, float b)    219 inline float rgb2cr(float r, float g, float b) { return +0.5f     * r -0.41869f * g -0.08131f * b; }
220                                                   220 
221 // forward DCT computation "in one dimension"     221 // forward DCT computation "in one dimension" (fast AAN algorithm by Arai, Agui and Nakajima: "A fast DCT-SQ scheme for images")
222 inline void DCT(float block[8*8], uint8_t stri    222 inline void DCT(float block[8*8], uint8_t stride) // stride must be 1 (=horizontal) or 8 (=vertical)
223 {                                                 223 {
224   const float SqrtHalfSqrt = 1.306562965f; //     224   const float SqrtHalfSqrt = 1.306562965f; //    sqrt((2 + sqrt(2)) / 2) = cos(pi * 1 / 8) * sqrt(2)
225   const float InvSqrt      = 0.707106781f; //     225   const float InvSqrt      = 0.707106781f; // 1 / sqrt(2)                = cos(pi * 2 / 8)
226   const float HalfSqrtSqrt = 0.382683432f; //     226   const float HalfSqrtSqrt = 0.382683432f; //     sqrt(2 - sqrt(2)) / 2  = cos(pi * 3 / 8)
227   const float InvSqrtSqrt  = 0.541196100f; //     227   const float InvSqrtSqrt  = 0.541196100f; // 1 / sqrt(2 - sqrt(2))      = cos(pi * 3 / 8) * sqrt(2)
228                                                   228 
229   // modify in-place                              229   // modify in-place
230   float& block0 = block[0         ];              230   float& block0 = block[0         ];
231   float& block1 = block[1 * stride];              231   float& block1 = block[1 * stride];
232   float& block2 = block[2 * stride];              232   float& block2 = block[2 * stride];
233   float& block3 = block[3 * stride];              233   float& block3 = block[3 * stride];
234   float& block4 = block[4 * stride];              234   float& block4 = block[4 * stride];
235   float& block5 = block[5 * stride];              235   float& block5 = block[5 * stride];
236   float& block6 = block[6 * stride];              236   float& block6 = block[6 * stride];
237   float& block7 = block[7 * stride];              237   float& block7 = block[7 * stride];
238                                                   238 
239   // based on https://dev.w3.org/Amaya/libjpeg    239   // based on https://dev.w3.org/Amaya/libjpeg/jfdctflt.c , the original variable names can be found in my comments
240   float add07 = block0 + block7; float sub07 =    240   float add07 = block0 + block7; float sub07 = block0 - block7; // tmp0, tmp7
241   float add16 = block1 + block6; float sub16 =    241   float add16 = block1 + block6; float sub16 = block1 - block6; // tmp1, tmp6
242   float add25 = block2 + block5; float sub25 =    242   float add25 = block2 + block5; float sub25 = block2 - block5; // tmp2, tmp5
243   float add34 = block3 + block4; float sub34 =    243   float add34 = block3 + block4; float sub34 = block3 - block4; // tmp3, tmp4
244                                                   244 
245   float add0347 = add07 + add34; float sub07_3    245   float add0347 = add07 + add34; float sub07_34 = add07 - add34; // tmp10, tmp13 ("even part" / "phase 2")
246   float add1256 = add16 + add25; float sub16_2    246   float add1256 = add16 + add25; float sub16_25 = add16 - add25; // tmp11, tmp12
247                                                   247 
248   block0 = add0347 + add1256; block4 = add0347    248   block0 = add0347 + add1256; block4 = add0347 - add1256; // "phase 3"
249                                                   249 
250   float z1 = (sub16_25 + sub07_34) * InvSqrt;     250   float z1 = (sub16_25 + sub07_34) * InvSqrt; // all temporary z-variables kept their original names
251   block2 = sub07_34 + z1; block6 = sub07_34 -     251   block2 = sub07_34 + z1; block6 = sub07_34 - z1; // "phase 5"
252                                                   252 
253   float sub23_45 = sub25 + sub34; // tmp10 ("o    253   float sub23_45 = sub25 + sub34; // tmp10 ("odd part" / "phase 2")
254   float sub12_56 = sub16 + sub25; // tmp11        254   float sub12_56 = sub16 + sub25; // tmp11
255   float sub01_67 = sub16 + sub07; // tmp12        255   float sub01_67 = sub16 + sub07; // tmp12
256                                                   256 
257   float z5 = (sub23_45 - sub01_67) * HalfSqrtS    257   float z5 = (sub23_45 - sub01_67) * HalfSqrtSqrt;
258   float z2 = sub23_45 * InvSqrtSqrt  + z5;        258   float z2 = sub23_45 * InvSqrtSqrt  + z5;
259   float z3 = sub12_56 * InvSqrt;                  259   float z3 = sub12_56 * InvSqrt;
260   float z4 = sub01_67 * SqrtHalfSqrt + z5;        260   float z4 = sub01_67 * SqrtHalfSqrt + z5;
261   float z6 = sub07 + z3; // z11 ("phase 5")       261   float z6 = sub07 + z3; // z11 ("phase 5")
262   float z7 = sub07 - z3; // z13                   262   float z7 = sub07 - z3; // z13
263   block1 = z6 + z4; block7 = z6 - z4; // "phas    263   block1 = z6 + z4; block7 = z6 - z4; // "phase 6"
264   block5 = z7 + z2; block3 = z7 - z2;             264   block5 = z7 + z2; block3 = z7 - z2;
265 }                                                 265 }
266                                                   266 
267 // run DCT, quantize and write Huffman bit cod    267 // run DCT, quantize and write Huffman bit codes
268 inline int16_t encodeBlock(BitWriter& writer,     268 inline int16_t encodeBlock(BitWriter& writer, float block[8][8], const float scaled[8*8], int16_t lastDC,
269                     const BitCode huffmanDC[25    269                     const BitCode huffmanDC[256], const BitCode huffmanAC[256], const BitCode* codewords)
270 {                                                 270 {
271   // "linearize" the 8x8 block, treat it as a     271   // "linearize" the 8x8 block, treat it as a flat array of 64 floats
272   float* block64 = (float*) block;                272   float* block64 = (float*) block;
273                                                   273 
274   // DCT: rows                                    274   // DCT: rows
275   for (size_t offset = 0; offset < 8; offset++    275   for (size_t offset = 0; offset < 8; offset++)
276     DCT(block64 + offset*8, 1);                   276     DCT(block64 + offset*8, 1);
277   // DCT: columns                                 277   // DCT: columns
278   for (size_t offset = 0; offset < 8; offset++    278   for (size_t offset = 0; offset < 8; offset++)
279     DCT(block64 + offset*1, 8);                   279     DCT(block64 + offset*1, 8);
280                                                   280 
281   // scale                                        281   // scale
282   for (size_t i = 0; i < 8*8; i++)                282   for (size_t i = 0; i < 8*8; i++)
283     block64[i] *= scaled[i];                      283     block64[i] *= scaled[i];
284                                                   284 
285   // encode DC (the first coefficient is the "    285   // encode DC (the first coefficient is the "average color" of the 8x8 block)
286   int DC = int(block64[0] + (block64[0] >= 0 ?    286   int DC = int(block64[0] + (block64[0] >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect
287                                                   287 
288   // quantize and zigzag the other 63 coeffici    288   // quantize and zigzag the other 63 coefficients
289   size_t posNonZero = 0; // find last coeffici    289   size_t posNonZero = 0; // find last coefficient which is not zero (because trailing zeros are encoded differently)
290   int16_t quantized[8*8];                         290   int16_t quantized[8*8];
291   for (size_t i = 1; i < 8*8; i++) // start at    291   for (size_t i = 1; i < 8*8; i++) // start at 1 because block64[0]=DC was already processed
292   {                                               292   {
293     float value = block64[ZigZagInv[i]];          293     float value = block64[ZigZagInv[i]];
294     // round to nearest integer                   294     // round to nearest integer
295     quantized[i] = int(value + (value >= 0 ? +    295     quantized[i] = int(value + (value >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect
296     // remember offset of last non-zero coeffi    296     // remember offset of last non-zero coefficient
297     if (quantized[i] != 0)                        297     if (quantized[i] != 0)
298       posNonZero = i;                             298       posNonZero = i;
299   }                                               299   }
300                                                   300 
301   // same "average color" as previous block ?     301   // same "average color" as previous block ?
302   int diff = DC - lastDC;                         302   int diff = DC - lastDC;
303   if (diff == 0)                                  303   if (diff == 0)
304     writer << huffmanDC[0x00];   // yes, write    304     writer << huffmanDC[0x00];   // yes, write a special short symbol
305   else                                            305   else
306   {                                               306   {
307     const BitCode bits = codewords[diff]; // n    307     const BitCode bits = codewords[diff]; // nope, encode the difference to previous block's average color
308     writer << huffmanDC[bits.numBits] << bits;    308     writer << huffmanDC[bits.numBits] << bits;
309   }                                               309   }
310                                                   310 
311   // encode ACs (quantized[1..63])                311   // encode ACs (quantized[1..63])
312   size_t offset = 0; // upper 4 bits count the    312   size_t offset = 0; // upper 4 bits count the number of consecutive zeros
313   for (size_t i = 1; i <= posNonZero; i++) //     313   for (size_t i = 1; i <= posNonZero; i++) // quantized[0] was already written, skip all trailing zeros, too
314   {                                               314   {
315     // zeros are encoded in a special way         315     // zeros are encoded in a special way
316     while (quantized[i] == 0) // found another    316     while (quantized[i] == 0) // found another zero ?
317     {                                             317     {
318       offset    += 0x10; // add 1 to the upper    318       offset    += 0x10; // add 1 to the upper 4 bits
319       // split into blocks of at most 16 conse    319       // split into blocks of at most 16 consecutive zeros
320       if (offset > 0xF0) // remember, the coun    320       if (offset > 0xF0) // remember, the counter is in the upper 4 bits, 0xF = 15
321       {                                           321       {
322         writer << huffmanAC[0xF0]; // 0xF0 is     322         writer << huffmanAC[0xF0]; // 0xF0 is a special code for "16 zeros"
323         offset = 0;                               323         offset = 0;
324       }                                           324       }
325       i++;                                        325       i++;
326     }                                             326     }
327                                                   327 
328     const BitCode encoded = codewords[quantize    328     const BitCode encoded = codewords[quantized[i]];
329     // combine number of zeros with the number    329     // combine number of zeros with the number of bits of the next non-zero value
330     writer << huffmanAC[offset + encoded.numBi    330     writer << huffmanAC[offset + encoded.numBits] << encoded; // and the value itself
331     offset = 0;                                   331     offset = 0;
332   }                                               332   }
333                                                   333 
334   // send end-of-block code (0x00), only neede    334   // send end-of-block code (0x00), only needed if there are trailing zeros
335   if (posNonZero < 8*8 - 1) // = 63               335   if (posNonZero < 8*8 - 1) // = 63
336     writer << huffmanAC[0x00];                    336     writer << huffmanAC[0x00];
337                                                   337 
338   return DC;                                      338   return DC;
339 }                                                 339 }
340                                                   340 
341 // Jon's code includes the pre-generated Huffm    341 // Jon's code includes the pre-generated Huffman codes
342 // I don't like these "magic constants" and co    342 // I don't like these "magic constants" and compute them on my own :-)
343 inline void generateHuffmanTable(const uint8_t    343 inline void generateHuffmanTable(const uint8_t numCodes[16], const uint8_t* values, BitCode result[256])
344 {                                                 344 {
345   // process all bitsizes 1 thru 16, no JPEG H    345   // process all bitsizes 1 thru 16, no JPEG Huffman code is allowed to exceed 16 bits
346   uint16_t huffmanCode = 0;                       346   uint16_t huffmanCode = 0;
347   for (uint8_t numBits = 1; numBits <= 16; num    347   for (uint8_t numBits = 1; numBits <= 16; numBits++)
348   {                                               348   {
349     // ... and each code of these bitsizes        349     // ... and each code of these bitsizes
350     for (uint8_t i = 0; i < numCodes[numBits -    350     for (uint8_t i = 0; i < numCodes[numBits - 1]; i++) // note: numCodes array starts at zero, but smallest bitsize is 1
351       result[*values++] = BitCode(huffmanCode+    351       result[*values++] = BitCode(huffmanCode++, numBits);
352                                                   352 
353     // next Huffman code needs to be one bit w    353     // next Huffman code needs to be one bit wider
354     huffmanCode <<= 1;                            354     huffmanCode <<= 1;
355   }                                               355   }
356 }                                                 356 }
357                                                   357 
358 // -------------------- externally visible cod    358 // -------------------- externally visible code --------------------
359                                                   359 
360 // the only exported function ...                 360 // the only exported function ...
361 inline bool writeJpeg(WRITE_ONE_BYTE output, v    361 inline bool writeJpeg(WRITE_ONE_BYTE output, void* tag,const void* pixels_, unsigned short width, unsigned short height,
362                bool isRGB, unsigned char quali    362                bool isRGB, unsigned char quality_, bool downsample, const char* comment)
363 {                                                 363 {
364   // reject invalid pointers                      364   // reject invalid pointers
365   if (output == 0/*nullptr*/ || pixels_ == 0/*    365   if (output == 0/*nullptr*/ || pixels_ == 0/*nullptr*/)
366     return false;                                 366     return false;
367   // check image format                           367   // check image format
368   if (width == 0 || height == 0)                  368   if (width == 0 || height == 0)
369     return false;                                 369     return false;
370                                                   370 
371   // number of components                         371   // number of components
372   const uint16_t numComponents = isRGB ? 3 : 1    372   const uint16_t numComponents = isRGB ? 3 : 1;
373   // note: if there is just one component (=gr    373   // note: if there is just one component (=grayscale), then only luminance needs to be stored in the file
374   //       thus everything related to chromina    374   //       thus everything related to chrominance need not to be written to the JPEG
375   //       I still compute a few things, like     375   //       I still compute a few things, like quantization tables to avoid a complete code mess
376                                                   376 
377   // grayscale images can't be downsampled (be    377   // grayscale images can't be downsampled (because there are no Cb + Cr channels)
378   if (!isRGB)                                     378   if (!isRGB)
379     downsample = false;                           379     downsample = false;
380                                                   380 
381   // wrapper for all output operations            381   // wrapper for all output operations
382   BitWriter bitWriter(output,tag);                382   BitWriter bitWriter(output,tag);
383                                                   383 
384   // ////////////////////////////////////////     384   // ////////////////////////////////////////
385   // JFIF headers                                 385   // JFIF headers
386   const uint8_t HeaderJfif[2+2+16] =              386   const uint8_t HeaderJfif[2+2+16] =
387       { 0xFF,0xD8,         // SOI marker (star    387       { 0xFF,0xD8,         // SOI marker (start of image)
388         0xFF,0xE0,         // JFIF APP0 tag       388         0xFF,0xE0,         // JFIF APP0 tag
389         0,16,              // length: 16 bytes    389         0,16,              // length: 16 bytes (14 bytes payload + 2 bytes for this length field)
390         'J','F','I','F',0, // JFIF identifier,    390         'J','F','I','F',0, // JFIF identifier, zero-terminated
391         1,1,               // JFIF version 1.1    391         1,1,               // JFIF version 1.1
392         0,                 // no density units    392         0,                 // no density units specified
393         0,1,0,1,           // density: 1 pixel    393         0,1,0,1,           // density: 1 pixel "per pixel" horizontally and vertically
394         0,0 };             // no thumbnail (si    394         0,0 };             // no thumbnail (size 0 x 0)
395   bitWriter << HeaderJfif;                        395   bitWriter << HeaderJfif;
396                                                   396 
397   // ////////////////////////////////////////     397   // ////////////////////////////////////////
398   // comment (optional)                           398   // comment (optional)
399   if (comment != 0/*nullptr*/)                    399   if (comment != 0/*nullptr*/)
400   {                                               400   {
401     // look for zero terminator                   401     // look for zero terminator
402     uint16_t length = 0; // = strlen(comment);    402     uint16_t length = 0; // = strlen(comment);
403     while (comment[length] != 0)                  403     while (comment[length] != 0)
404       length++;                                   404       length++;
405                                                   405 
406     // write COM marker                           406     // write COM marker
407     bitWriter.addMarker(0xFE, 2+length); // bl    407     bitWriter.addMarker(0xFE, 2+length); // block size is number of bytes (without zero terminator) + 2 bytes for this length field
408     // ... and write the comment itself           408     // ... and write the comment itself
409     for (uint16_t i = 0; i < length; i++)         409     for (uint16_t i = 0; i < length; i++)
410       bitWriter << comment[i];                    410       bitWriter << comment[i];
411   }                                               411   }
412                                                   412 
413   // ////////////////////////////////////////     413   // ////////////////////////////////////////
414   // adjust quantization tables to desired qua    414   // adjust quantization tables to desired quality
415                                                   415 
416   // quality level must be in 1 ... 100           416   // quality level must be in 1 ... 100
417   uint16_t quality = clamp<uint16_t>(quality_,    417   uint16_t quality = clamp<uint16_t>(quality_, 1, 100);
418   // convert to an internal JPEG quality facto    418   // convert to an internal JPEG quality factor, formula taken from libjpeg
419   quality = quality < 50 ? 5000 / quality : 20    419   quality = quality < 50 ? 5000 / quality : 200 - quality * 2;
420                                                   420 
421   uint8_t quantLuminance  [8*8];                  421   uint8_t quantLuminance  [8*8];
422   uint8_t quantChrominance[8*8];                  422   uint8_t quantChrominance[8*8];
423   for (size_t i = 0; i < 8*8; i++)                423   for (size_t i = 0; i < 8*8; i++)
424   {                                               424   {
425     int luminance   = (DefaultQuantLuminance      425     int luminance   = (DefaultQuantLuminance  [ZigZagInv[i]] * quality + 50) / 100;
426     int chrominance = (DefaultQuantChrominance    426     int chrominance = (DefaultQuantChrominance[ZigZagInv[i]] * quality + 50) / 100;
427                                                   427 
428     // clamp to 1..255                            428     // clamp to 1..255
429     quantLuminance  [i] = clamp(luminance,   1    429     quantLuminance  [i] = clamp(luminance,   1, 255);
430     quantChrominance[i] = clamp(chrominance, 1    430     quantChrominance[i] = clamp(chrominance, 1, 255);
431   }                                               431   }
432                                                   432 
433   // write quantization tables                    433   // write quantization tables
434   bitWriter.addMarker(0xDB, 2 + (isRGB ? 2 : 1    434   bitWriter.addMarker(0xDB, 2 + (isRGB ? 2 : 1) * (1 + 8*8)); // length: 65 bytes per table + 2 bytes for this length field
435                                                   435                                                               // each table has 64 entries and is preceded by an ID byte
436                                                   436 
437   bitWriter   << 0x00 << quantLuminance;   //     437   bitWriter   << 0x00 << quantLuminance;   // first  quantization table
438   if (isRGB)                                      438   if (isRGB)
439     bitWriter << 0x01 << quantChrominance; //     439     bitWriter << 0x01 << quantChrominance; // second quantization table, only relevant for color images
440                                                   440 
441   // ////////////////////////////////////////     441   // ////////////////////////////////////////
442   // write image infos (SOF0 - start of frame)    442   // write image infos (SOF0 - start of frame)
443   bitWriter.addMarker(0xC0, 2+6+3*numComponent    443   bitWriter.addMarker(0xC0, 2+6+3*numComponents); // length: 6 bytes general info + 3 per channel + 2 bytes for this length field
444                                                   444 
445   // 8 bits per channel                           445   // 8 bits per channel
446   bitWriter << 0x08                               446   bitWriter << 0x08
447   // image dimensions (big-endian)                447   // image dimensions (big-endian)
448             << (height >> 8) << (height & 0xFF    448             << (height >> 8) << (height & 0xFF)
449             << (width  >> 8) << (width  & 0xFF    449             << (width  >> 8) << (width  & 0xFF);
450                                                   450 
451   // sampling and quantization tables for each    451   // sampling and quantization tables for each component
452   bitWriter << numComponents;       // 1 compo    452   bitWriter << numComponents;       // 1 component (grayscale, Y only) or 3 components (Y,Cb,Cr)
453   for (uint16_t id = 1; id <= numComponents; i    453   for (uint16_t id = 1; id <= numComponents; id++)
454     bitWriter <<  id                // compone    454     bitWriter <<  id                // component ID (Y=1, Cb=2, Cr=3)
455     // bitmasks for sampling: highest 4 bits:     455     // bitmasks for sampling: highest 4 bits: horizontal, lowest 4 bits: vertical
456               << (id == 1 && downsample ? 0x22    456               << (id == 1 && downsample ? 0x22 : 0x11) // 0x11 is default YCbCr 4:4:4 and 0x22 stands for YCbCr 4:2:0
457               << (id == 1 ? 0 : 1); // use qua    457               << (id == 1 ? 0 : 1); // use quantization table 0 for Y, table 1 for Cb and Cr
458                                                   458 
459   // ////////////////////////////////////////     459   // ////////////////////////////////////////
460   // Huffman tables                               460   // Huffman tables
461   // DHT marker - define Huffman tables           461   // DHT marker - define Huffman tables
462   bitWriter.addMarker(0xC4, isRGB ? (2+208+208    462   bitWriter.addMarker(0xC4, isRGB ? (2+208+208) : (2+208));
463                             // 2 bytes for the    463                             // 2 bytes for the length field, store chrominance only if needed
464                             //   1+16+12  for     464                             //   1+16+12  for the DC luminance
465                             //   1+16+162 for     465                             //   1+16+162 for the AC luminance   (208 = 1+16+12 + 1+16+162)
466                             //   1+16+12  for     466                             //   1+16+12  for the DC chrominance
467                             //   1+16+162 for     467                             //   1+16+162 for the AC chrominance (208 = 1+16+12 + 1+16+162, same as above)
468                                                   468 
469   // store luminance's DC+AC Huffman table def    469   // store luminance's DC+AC Huffman table definitions
470   bitWriter << 0x00 // highest 4 bits: 0 => DC    470   bitWriter << 0x00 // highest 4 bits: 0 => DC, lowest 4 bits: 0 => Y (baseline)
471             << DcLuminanceCodesPerBitsize         471             << DcLuminanceCodesPerBitsize
472             << DcLuminanceValues;                 472             << DcLuminanceValues;
473   bitWriter << 0x10 // highest 4 bits: 1 => AC    473   bitWriter << 0x10 // highest 4 bits: 1 => AC, lowest 4 bits: 0 => Y (baseline)
474             << AcLuminanceCodesPerBitsize         474             << AcLuminanceCodesPerBitsize
475             << AcLuminanceValues;                 475             << AcLuminanceValues;
476                                                   476 
477   // compute actual Huffman code tables (see J    477   // compute actual Huffman code tables (see Jon's code for precalculated tables)
478   BitCode huffmanLuminanceDC[256];                478   BitCode huffmanLuminanceDC[256];
479   BitCode huffmanLuminanceAC[256];                479   BitCode huffmanLuminanceAC[256];
480   generateHuffmanTable(DcLuminanceCodesPerBits    480   generateHuffmanTable(DcLuminanceCodesPerBitsize, DcLuminanceValues, huffmanLuminanceDC);
481   generateHuffmanTable(AcLuminanceCodesPerBits    481   generateHuffmanTable(AcLuminanceCodesPerBitsize, AcLuminanceValues, huffmanLuminanceAC);
482                                                   482 
483   // chrominance is only relevant for color im    483   // chrominance is only relevant for color images
484   BitCode huffmanChrominanceDC[256];              484   BitCode huffmanChrominanceDC[256];
485   BitCode huffmanChrominanceAC[256];              485   BitCode huffmanChrominanceAC[256];
486   if (isRGB)                                      486   if (isRGB)
487   {                                               487   {
488     // store luminance's DC+AC Huffman table d    488     // store luminance's DC+AC Huffman table definitions
489     bitWriter << 0x01 // highest 4 bits: 0 =>     489     bitWriter << 0x01 // highest 4 bits: 0 => DC, lowest 4 bits: 1 => Cr,Cb (baseline)
490               << DcChrominanceCodesPerBitsize     490               << DcChrominanceCodesPerBitsize
491               << DcChrominanceValues;             491               << DcChrominanceValues;
492     bitWriter << 0x11 // highest 4 bits: 1 =>     492     bitWriter << 0x11 // highest 4 bits: 1 => AC, lowest 4 bits: 1 => Cr,Cb (baseline)
493               << AcChrominanceCodesPerBitsize     493               << AcChrominanceCodesPerBitsize
494               << AcChrominanceValues;             494               << AcChrominanceValues;
495                                                   495 
496     // compute actual Huffman code tables (see    496     // compute actual Huffman code tables (see Jon's code for precalculated tables)
497     generateHuffmanTable(DcChrominanceCodesPer    497     generateHuffmanTable(DcChrominanceCodesPerBitsize, DcChrominanceValues, huffmanChrominanceDC);
498     generateHuffmanTable(AcChrominanceCodesPer    498     generateHuffmanTable(AcChrominanceCodesPerBitsize, AcChrominanceValues, huffmanChrominanceAC);
499   }                                               499   }
500                                                   500 
501   // ////////////////////////////////////////     501   // ////////////////////////////////////////
502   // start of scan (there is only a single sca    502   // start of scan (there is only a single scan for baseline JPEGs)
503   bitWriter.addMarker(0xDA, 2+1+2*numComponent    503   bitWriter.addMarker(0xDA, 2+1+2*numComponents+3); // 2 bytes for the length field, 1 byte for number of components,
504                                                   504                                                     // then 2 bytes for each component and 3 bytes for spectral selection
505                                                   505 
506   // assign Huffman tables to each component      506   // assign Huffman tables to each component
507   bitWriter << numComponents;                     507   bitWriter << numComponents;
508   for (uint16_t id = 1; id <= numComponents; i    508   for (uint16_t id = 1; id <= numComponents; id++)
509     // highest 4 bits: DC Huffman table, lowes    509     // highest 4 bits: DC Huffman table, lowest 4 bits: AC Huffman table
510     bitWriter << id << (id == 1 ? 0x00 : 0x11)    510     bitWriter << id << (id == 1 ? 0x00 : 0x11); // Y: tables 0 for DC and AC; Cb + Cr: tables 1 for DC and AC
511                                                   511 
512   // constant values for our baseline JPEGs (w    512   // constant values for our baseline JPEGs (which have a single sequential scan)
513   static const uint8_t Spectral[3] = { 0, 63,     513   static const uint8_t Spectral[3] = { 0, 63, 0 }; // spectral selection: must be from 0 to 63; successive approximation must be 0
514   bitWriter << Spectral;                          514   bitWriter << Spectral;
515                                                   515 
516   // ////////////////////////////////////////     516   // ////////////////////////////////////////
517   // adjust quantization tables with AAN scali    517   // adjust quantization tables with AAN scaling factors to simplify DCT
518   float scaledLuminance  [8*8];                   518   float scaledLuminance  [8*8];
519   float scaledChrominance[8*8];                   519   float scaledChrominance[8*8];
520   for (size_t i = 0; i < 8*8; i++)                520   for (size_t i = 0; i < 8*8; i++)
521   {                                               521   {
522     size_t row    = ZigZagInv[i] / 8; // same     522     size_t row    = ZigZagInv[i] / 8; // same as ZigZagInv[i] >> 3
523     size_t column = ZigZagInv[i] % 8; // same     523     size_t column = ZigZagInv[i] % 8; // same as ZigZagInv[i] &  7
524                                                   524 
525     // scaling constants for AAN DCT algorithm    525     // scaling constants for AAN DCT algorithm: AanScaleFactors[0] = 1, AanScaleFactors[k=1..7] = cos(k*PI/16) * sqrt(2)
526     static const float AanScaleFactors[8] = {     526     static const float AanScaleFactors[8] = { 1, 1.387039845f, 1.306562965f, 1.175875602f, 1, 0.785694958f, 0.541196100f, 0.275899379f };
527     float factor = 1 / (AanScaleFactors[row] *    527     float factor = 1 / (AanScaleFactors[row] * AanScaleFactors[column] * 8);
528     scaledLuminance  [ZigZagInv[i]] = factor /    528     scaledLuminance  [ZigZagInv[i]] = factor / quantLuminance  [i];
529     scaledChrominance[ZigZagInv[i]] = factor /    529     scaledChrominance[ZigZagInv[i]] = factor / quantChrominance[i];
530     // if you really want JPEGs that are bitwi    530     // if you really want JPEGs that are bitwise identical to Jon Olick's code then you need slightly different formulas (note: sqrt(8) = 2.828427125f)
531     //static const float aasf[] = { 1.0f * 2.8    531     //static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; // line 240 of jo_jpeg.cpp
532     //scaledLuminance  [ZigZagInv[i]] = 1 / (q    532     //scaledLuminance  [ZigZagInv[i]] = 1 / (quantLuminance  [i] * aasf[row] * aasf[column]); // lines 266-267 of jo_jpeg.cpp
533     //scaledChrominance[ZigZagInv[i]] = 1 / (q    533     //scaledChrominance[ZigZagInv[i]] = 1 / (quantChrominance[i] * aasf[row] * aasf[column]);
534   }                                               534   }
535                                                   535 
536   // ////////////////////////////////////////     536   // ////////////////////////////////////////
537   // precompute JPEG codewords for quantized D    537   // precompute JPEG codewords for quantized DCT
538   BitCode  codewordsArray[2 * CodeWordLimit];     538   BitCode  codewordsArray[2 * CodeWordLimit];          // note: quantized[i] is found at codewordsArray[quantized[i] + CodeWordLimit]
539   BitCode* codewords = &codewordsArray[CodeWor    539   BitCode* codewords = &codewordsArray[CodeWordLimit]; // allow negative indices, so quantized[i] is at codewords[quantized[i]]
540   uint8_t numBits = 1; // each codeword has at    540   uint8_t numBits = 1; // each codeword has at least one bit (value == 0 is undefined)
541   int32_t mask    = 1; // mask is always 2^num    541   int32_t mask    = 1; // mask is always 2^numBits - 1, initial value 2^1-1 = 2-1 = 1
542   for (int16_t value = 1; value < CodeWordLimi    542   for (int16_t value = 1; value < CodeWordLimit; value++)
543   {                                               543   {
544     // numBits = position of highest set bit (    544     // numBits = position of highest set bit (ignoring the sign)
545     // mask    = (2^numBits) - 1                  545     // mask    = (2^numBits) - 1
546     if (value > mask) // one more bit ?           546     if (value > mask) // one more bit ?
547     {                                             547     {
548       numBits++;                                  548       numBits++;
549       mask = (mask << 1) | 1; // append a set     549       mask = (mask << 1) | 1; // append a set bit
550     }                                             550     }
551     codewords[-value] = BitCode(mask - value,     551     codewords[-value] = BitCode(mask - value, numBits); // note that I use a negative index => codewords[-value] = codewordsArray[CodeWordLimit  value]
552     codewords[+value] = BitCode(       value,     552     codewords[+value] = BitCode(       value, numBits);
553   }                                               553   }
554                                                   554 
555   // just convert image data from void*           555   // just convert image data from void*
556   const uint8_t* pixels = (const uint8_t*)pixe    556   const uint8_t* pixels = (const uint8_t*)pixels_;
557                                                   557 
558   // the next two variables are frequently use    558   // the next two variables are frequently used when checking for image borders
559   const unsigned short maxWidth  = width  - 1;    559   const unsigned short maxWidth  = width  - 1; // "last row"
560   const unsigned short maxHeight = height - 1;    560   const unsigned short maxHeight = height - 1; // "bottom line"
561                                                   561 
562   // process MCUs (minimum codes units) => ima    562   // process MCUs (minimum codes units) => image is subdivided into a grid of 8x8 or 16x16 tiles
563   const unsigned short sampling = downsample ?    563   const unsigned short sampling = downsample ? 2 : 1; // 1x1 or 2x2 sampling
564   const unsigned short mcuSize  = 8 * sampling    564   const unsigned short mcuSize  = 8 * sampling;
565                                                   565 
566   // average color of the previous MCU            566   // average color of the previous MCU
567   int16_t lastYDC = 0, lastCbDC = 0, lastCrDC     567   int16_t lastYDC = 0, lastCbDC = 0, lastCrDC = 0;
568   // convert from RGB to YCbCr                    568   // convert from RGB to YCbCr
569   float Y[8][8], Cb[8][8], Cr[8][8];              569   float Y[8][8], Cb[8][8], Cr[8][8];
570                                                   570 
571   for (unsigned short mcuY = 0; mcuY < height;    571   for (unsigned short mcuY = 0; mcuY < height; mcuY += mcuSize) // each step is either 8 or 16 (=mcuSize)
572     for (unsigned short mcuX = 0; mcuX < width    572     for (unsigned short mcuX = 0; mcuX < width; mcuX += mcuSize)
573     {                                             573     {
574       // YCbCr 4:4:4 format: each MCU is a 8x8    574       // YCbCr 4:4:4 format: each MCU is a 8x8 block - the same applies to grayscale images, too
575       // YCbCr 4:2:0 format: each MCU represen    575       // YCbCr 4:2:0 format: each MCU represents a 16x16 block, stored as 4x 8x8 Y-blocks plus 1x 8x8 Cb and 1x 8x8 Cr block)
576       for (unsigned short blockY = 0; blockY <    576       for (unsigned short blockY = 0; blockY < mcuSize; blockY += 8) // iterate once (YCbCr444 and grayscale) or twice (YCbCr420)
577         for (unsigned short blockX = 0; blockX    577         for (unsigned short blockX = 0; blockX < mcuSize; blockX += 8)
578         {                                         578         {
579           // now we finally have an 8x8 block     579           // now we finally have an 8x8 block ...
580           for (unsigned short deltaY = 0; delt    580           for (unsigned short deltaY = 0; deltaY < 8; deltaY++)
581           {                                       581           {
582             size_t column = minimum(uint16_t(m    582             size_t column = minimum(uint16_t(mcuX + blockX)         , maxWidth); // must not exceed image borders, replicate last row/column if needed
583             size_t row    = minimum(uint16_t(m    583             size_t row    = minimum(uint16_t(mcuY + blockY + deltaY), maxHeight);
584             for (size_t deltaX = 0; deltaX < 8    584             for (size_t deltaX = 0; deltaX < 8; deltaX++)
585             {                                     585             {
586               // find actual pixel position wi    586               // find actual pixel position within the current image
587               size_t pixelPos = row * int(widt    587               size_t pixelPos = row * int(width) + column; // the cast ensures that we don't run into multiplication overflows
588               if (column < maxWidth)              588               if (column < maxWidth)
589                 column++;                         589                 column++;
590                                                   590 
591               // grayscale images have solely     591               // grayscale images have solely a Y channel which can be easily derived from the input pixel by shifting it by 128
592               if (!isRGB)                         592               if (!isRGB)
593               {                                   593               {
594                 Y[deltaY][deltaX] = pixels[pix    594                 Y[deltaY][deltaX] = pixels[pixelPos] - 128.f;
595                 continue;                         595                 continue;
596               }                                   596               }
597                                                   597 
598               // RGB: 3 bytes per pixel (where    598               // RGB: 3 bytes per pixel (whereas grayscale images have only 1 byte per pixel)
599               uint8_t r = pixels[3 * pixelPos     599               uint8_t r = pixels[3 * pixelPos    ];
600               uint8_t g = pixels[3 * pixelPos     600               uint8_t g = pixels[3 * pixelPos + 1];
601               uint8_t b = pixels[3 * pixelPos     601               uint8_t b = pixels[3 * pixelPos + 2];
602                                                   602 
603               Y   [deltaY][deltaX] = rgb2y (r,    603               Y   [deltaY][deltaX] = rgb2y (r, g, b) - 128; // again, the JPEG standard requires Y to be shifted by 128
604               // YCbCr444 is easy - the more c    604               // YCbCr444 is easy - the more complex YCbCr420 has to be computed about 20 lines below in a second pass
605               if (!downsample)                    605               if (!downsample)
606               {                                   606               {
607                 Cb[deltaY][deltaX] = rgb2cb(r,    607                 Cb[deltaY][deltaX] = rgb2cb(r, g, b); // standard RGB-to-YCbCr conversion
608                 Cr[deltaY][deltaX] = rgb2cr(r,    608                 Cr[deltaY][deltaX] = rgb2cr(r, g, b);
609               }                                   609               }
610             }                                     610             }
611           }                                       611           }
612                                                   612 
613         // encode Y channel                       613         // encode Y channel
614         lastYDC = encodeBlock(bitWriter, Y, sc    614         lastYDC = encodeBlock(bitWriter, Y, scaledLuminance, lastYDC, huffmanLuminanceDC, huffmanLuminanceAC, codewords);
615         // Cb and Cr are encoded about 50 line    615         // Cb and Cr are encoded about 50 lines below
616       }                                           616       }
617                                                   617 
618       // grayscale images don't need any Cb an    618       // grayscale images don't need any Cb and Cr information
619       if (!isRGB)                                 619       if (!isRGB)
620         continue;                                 620         continue;
621                                                   621 
622       // /////////////////////////////////////    622       // ////////////////////////////////////////
623       // the following lines are only relevant    623       // the following lines are only relevant for YCbCr420:
624       // average/downsample chrominance of fou    624       // average/downsample chrominance of four pixels while respecting the image borders
625       if (downsample)                             625       if (downsample)
626         for (short deltaY = 7; downsample && d    626         for (short deltaY = 7; downsample && deltaY >= 0; deltaY--) // iterating loop in reverse increases cache read efficiency
627         {                                         627         {
628           size_t row      = minimum(uint16_t(m    628           size_t row      = minimum(uint16_t(mcuY + 2*deltaY), maxHeight); // each deltaX/Y step covers a 2x2 area
629           size_t column   =         mcuX;         629           size_t column   =         mcuX;                        // column is updated inside next loop
630           size_t pixelPos = (row * int(width)     630           size_t pixelPos = (row * int(width) + column) * 3;     // numComponents = 3
631                                                   631 
632           // deltas (in bytes) to next row / c    632           // deltas (in bytes) to next row / column, must not exceed image borders
633           size_t rowStep    = (row    < maxHei    633           size_t rowStep    = (row    < maxHeight) ? 3 * int(width) : 0; // always numComponents*width except for bottom    line
634           size_t columnStep = (column < maxWid    634           size_t columnStep = (column < maxWidth ) ? 3              : 0; // always numComponents       except for rightmost pixel
635                                                   635 
636           for (short deltaX = 0; deltaX < 8; d    636           for (short deltaX = 0; deltaX < 8; deltaX++)
637           {                                       637           {
638             // let's add all four samples (2x2    638             // let's add all four samples (2x2 area)
639             size_t right     = pixelPos + colu    639             size_t right     = pixelPos + columnStep;
640             size_t down      = pixelPos +         640             size_t down      = pixelPos +              rowStep;
641             size_t downRight = pixelPos + colu    641             size_t downRight = pixelPos + columnStep + rowStep;
642                                                   642 
643             // note: cast from 8 bits to >8 bi    643             // note: cast from 8 bits to >8 bits to avoid overflows when adding
644             short r = short(pixels[pixelPos       644             short r = short(pixels[pixelPos    ]) + pixels[right    ] + pixels[down    ] + pixels[downRight    ];
645             short g = short(pixels[pixelPos +     645             short g = short(pixels[pixelPos + 1]) + pixels[right + 1] + pixels[down + 1] + pixels[downRight + 1];
646             short b = short(pixels[pixelPos +     646             short b = short(pixels[pixelPos + 2]) + pixels[right + 2] + pixels[down + 2] + pixels[downRight + 2];
647                                                   647 
648             // convert to Cb and Cr               648             // convert to Cb and Cr
649             Cb[deltaY][deltaX] = rgb2cb(r, g,     649             Cb[deltaY][deltaX] = rgb2cb(r, g, b) / 4; // I still have to divide r,g,b by 4 to get their average values
650             Cr[deltaY][deltaX] = rgb2cr(r, g,     650             Cr[deltaY][deltaX] = rgb2cr(r, g, b) / 4; // it's a bit faster if done AFTER CbCr conversion
651                                                   651 
652             // step forward to next 2x2 area      652             // step forward to next 2x2 area
653             pixelPos += 2*3; // 2 pixels => 6     653             pixelPos += 2*3; // 2 pixels => 6 bytes (2*numComponents)
654             column   += 2;                        654             column   += 2;
655                                                   655 
656             // reached right border ?             656             // reached right border ?
657             if (column >= maxWidth)               657             if (column >= maxWidth)
658             {                                     658             {
659               columnStep = 0;                     659               columnStep = 0;
660               pixelPos = ((row + 1) * int(widt    660               pixelPos = ((row + 1) * int(width) - 1) * 3; // same as (row * width + maxWidth) * numComponents => current's row last pixel
661             }                                     661             }
662           }                                       662           }
663         } // end of YCbCr420 code for Cb and C    663         } // end of YCbCr420 code for Cb and Cr
664                                                   664 
665       // encode Cb and Cr                         665       // encode Cb and Cr
666       lastCbDC = encodeBlock(bitWriter, Cb, sc    666       lastCbDC = encodeBlock(bitWriter, Cb, scaledChrominance, lastCbDC, huffmanChrominanceDC, huffmanChrominanceAC, codewords);
667       lastCrDC = encodeBlock(bitWriter, Cr, sc    667       lastCrDC = encodeBlock(bitWriter, Cr, scaledChrominance, lastCrDC, huffmanChrominanceDC, huffmanChrominanceAC, codewords);
668     }                                             668     }
669                                                   669 
670   bitWriter.flush(); // now image is completel    670   bitWriter.flush(); // now image is completely encoded, write any bits still left in the buffer
671                                                   671 
672   // ///////////////////////////                  672   // ///////////////////////////
673   // EOI marker                                   673   // EOI marker
674   bitWriter << 0xFF << 0xD9; // this marker ha    674   bitWriter << 0xFF << 0xD9; // this marker has no length, therefore I can't use addMarker()
675   return true;                                    675   return true;
676 } // writeJpeg()                                  676 } // writeJpeg()
677                                                   677 
678 }}                                                678 }}
679                                                   679
Cross-Referencing Geant4 Geant4/externals/g4tools/include/tools/toojpeg.icc

Diff markup

Differences between /externals/g4tools/include/tools/toojpeg.icc (Version 11.3.0) and /externals/g4tools/include/tools/toojpeg.icc (Version 11.1.1)

Cross-Referencing Geant4
Geant4/externals/g4tools/include/tools/toojpeg.icc