Geant4 Cross Reference

Cross-Referencing   Geant4
Geant4/externals/expat/src/xmltok_impl.c

Version: [ ReleaseNotes ] [ 1.0 ] [ 1.1 ] [ 2.0 ] [ 3.0 ] [ 3.1 ] [ 3.2 ] [ 4.0 ] [ 4.0.p1 ] [ 4.0.p2 ] [ 4.1 ] [ 4.1.p1 ] [ 5.0 ] [ 5.0.p1 ] [ 5.1 ] [ 5.1.p1 ] [ 5.2 ] [ 5.2.p1 ] [ 5.2.p2 ] [ 6.0 ] [ 6.0.p1 ] [ 6.1 ] [ 6.2 ] [ 6.2.p1 ] [ 6.2.p2 ] [ 7.0 ] [ 7.0.p1 ] [ 7.1 ] [ 7.1.p1 ] [ 8.0 ] [ 8.0.p1 ] [ 8.1 ] [ 8.1.p1 ] [ 8.1.p2 ] [ 8.2 ] [ 8.2.p1 ] [ 8.3 ] [ 8.3.p1 ] [ 8.3.p2 ] [ 9.0 ] [ 9.0.p1 ] [ 9.0.p2 ] [ 9.1 ] [ 9.1.p1 ] [ 9.1.p2 ] [ 9.1.p3 ] [ 9.2 ] [ 9.2.p1 ] [ 9.2.p2 ] [ 9.2.p3 ] [ 9.2.p4 ] [ 9.3 ] [ 9.3.p1 ] [ 9.3.p2 ] [ 9.4 ] [ 9.4.p1 ] [ 9.4.p2 ] [ 9.4.p3 ] [ 9.4.p4 ] [ 9.5 ] [ 9.5.p1 ] [ 9.5.p2 ] [ 9.6 ] [ 9.6.p1 ] [ 9.6.p2 ] [ 9.6.p3 ] [ 9.6.p4 ] [ 10.0 ] [ 10.0.p1 ] [ 10.0.p2 ] [ 10.0.p3 ] [ 10.0.p4 ] [ 10.1 ] [ 10.1.p1 ] [ 10.1.p2 ] [ 10.1.p3 ] [ 10.2 ] [ 10.2.p1 ] [ 10.2.p2 ] [ 10.2.p3 ] [ 10.3 ] [ 10.3.p1 ] [ 10.3.p2 ] [ 10.3.p3 ] [ 10.4 ] [ 10.4.p1 ] [ 10.4.p2 ] [ 10.4.p3 ] [ 10.5 ] [ 10.5.p1 ] [ 10.6 ] [ 10.6.p1 ] [ 10.6.p2 ] [ 10.6.p3 ] [ 10.7 ] [ 10.7.p1 ] [ 10.7.p2 ] [ 10.7.p3 ] [ 10.7.p4 ] [ 11.0 ] [ 11.0.p1 ] [ 11.0.p2 ] [ 11.0.p3, ] [ 11.0.p4 ] [ 11.1 ] [ 11.1.1 ] [ 11.1.2 ] [ 11.1.3 ] [ 11.2 ] [ 11.2.1 ] [ 11.2.2 ] [ 11.3.0 ]

Diff markup

Differences between /externals/expat/src/xmltok_impl.c (Version 11.3.0) and /externals/expat/src/xmltok_impl.c (Version 11.2)


  1 /* This file is included (from xmltok.c, 1-3 t      1 /* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)!
  2                             __  __                  2                             __  __            _
  3                          ___\ \/ /_ __   __ _|      3                          ___\ \/ /_ __   __ _| |_
  4                         / _ \\  /| '_ \ / _` |      4                         / _ \\  /| '_ \ / _` | __|
  5                        |  __//  \| |_) | (_| |      5                        |  __//  \| |_) | (_| | |_
  6                         \___/_/\_\ .__/ \__,_|      6                         \___/_/\_\ .__/ \__,_|\__|
  7                                  |_| XML parse      7                                  |_| XML parser
  8                                                     8 
  9    Copyright (c) 1997-2000 Thai Open Source So      9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
 10    Copyright (c) 2000      Clark Cooper <coope     10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
 11    Copyright (c) 2002      Fred L. Drake, Jr.      11    Copyright (c) 2002      Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
 12    Copyright (c) 2002-2016 Karl Waclawek <karl     12    Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
 13    Copyright (c) 2016-2022 Sebastian Pipping <     13    Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
 14    Copyright (c) 2017      Rhodri James <rhodr     14    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
 15    Copyright (c) 2018      Benjamin Peterson <     15    Copyright (c) 2018      Benjamin Peterson <benjamin@python.org>
 16    Copyright (c) 2018      Anton Maklakov <ant     16    Copyright (c) 2018      Anton Maklakov <antmak.pub@gmail.com>
 17    Copyright (c) 2019      David Loffredo <lof     17    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
 18    Copyright (c) 2020      Boris Kolpackov <bo     18    Copyright (c) 2020      Boris Kolpackov <boris@codesynthesis.com>
 19    Copyright (c) 2022      Martin Ettl <ettl.m     19    Copyright (c) 2022      Martin Ettl <ettl.martin78@googlemail.com>
 20    Licensed under the MIT license:                 20    Licensed under the MIT license:
 21                                                    21 
 22    Permission is  hereby granted,  free of cha     22    Permission is  hereby granted,  free of charge,  to any  person obtaining
 23    a  copy  of  this  software   and  associat     23    a  copy  of  this  software   and  associated  documentation  files  (the
 24    "Software"),  to  deal in  the  Software  w     24    "Software"),  to  deal in  the  Software  without restriction,  including
 25    without  limitation the  rights  to use,  c     25    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
 26    distribute, sublicense, and/or sell copies      26    distribute, sublicense, and/or sell copies of the Software, and to permit
 27    persons  to whom  the Software  is  furnish     27    persons  to whom  the Software  is  furnished to  do so,  subject to  the
 28    following conditions:                           28    following conditions:
 29                                                    29 
 30    The above copyright  notice and this permis     30    The above copyright  notice and this permission notice  shall be included
 31    in all copies or substantial portions of th     31    in all copies or substantial portions of the Software.
 32                                                    32 
 33    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WIT     33    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
 34    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT L     34    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
 35    MERCHANTABILITY, FITNESS FOR A PARTICULAR P     35    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
 36    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HO     36    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 37    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN     37    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
 38    OTHERWISE, ARISING FROM, OUT OF OR IN CONNE     38    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 39    USE OR OTHER DEALINGS IN THE SOFTWARE.          39    USE OR OTHER DEALINGS IN THE SOFTWARE.
 40 */                                                 40 */
 41                                                    41 
 42 #ifdef XML_TOK_IMPL_C                              42 #ifdef XML_TOK_IMPL_C
 43                                                    43 
 44 #  ifndef IS_INVALID_CHAR // i.e. for UTF-16 a     44 #  ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined
 45 #    define IS_INVALID_CHAR(enc, ptr, n) (0)       45 #    define IS_INVALID_CHAR(enc, ptr, n) (0)
 46 #  endif                                           46 #  endif
 47                                                    47 
 48 #  define INVALID_LEAD_CASE(n, ptr, nextTokPtr     48 #  define INVALID_LEAD_CASE(n, ptr, nextTokPtr)                                \
 49   case BT_LEAD##n:                                 49   case BT_LEAD##n:                                                             \
 50     if (end - ptr < n)                             50     if (end - ptr < n)                                                         \
 51       return XML_TOK_PARTIAL_CHAR;                 51       return XML_TOK_PARTIAL_CHAR;                                             \
 52     if (IS_INVALID_CHAR(enc, ptr, n)) {            52     if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
 53       *(nextTokPtr) = (ptr);                       53       *(nextTokPtr) = (ptr);                                                   \
 54       return XML_TOK_INVALID;                      54       return XML_TOK_INVALID;                                                  \
 55     }                                              55     }                                                                          \
 56     ptr += n;                                      56     ptr += n;                                                                  \
 57     break;                                         57     break;
 58                                                    58 
 59 #  define INVALID_CASES(ptr, nextTokPtr)           59 #  define INVALID_CASES(ptr, nextTokPtr)                                       \
 60     INVALID_LEAD_CASE(2, ptr, nextTokPtr)          60     INVALID_LEAD_CASE(2, ptr, nextTokPtr)                                      \
 61     INVALID_LEAD_CASE(3, ptr, nextTokPtr)          61     INVALID_LEAD_CASE(3, ptr, nextTokPtr)                                      \
 62     INVALID_LEAD_CASE(4, ptr, nextTokPtr)          62     INVALID_LEAD_CASE(4, ptr, nextTokPtr)                                      \
 63   case BT_NONXML:                                  63   case BT_NONXML:                                                              \
 64   case BT_MALFORM:                                 64   case BT_MALFORM:                                                             \
 65   case BT_TRAIL:                                   65   case BT_TRAIL:                                                               \
 66     *(nextTokPtr) = (ptr);                         66     *(nextTokPtr) = (ptr);                                                     \
 67     return XML_TOK_INVALID;                        67     return XML_TOK_INVALID;
 68                                                    68 
 69 #  define CHECK_NAME_CASE(n, enc, ptr, end, ne     69 #  define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr)                        \
 70   case BT_LEAD##n:                                 70   case BT_LEAD##n:                                                             \
 71     if (end - ptr < n)                             71     if (end - ptr < n)                                                         \
 72       return XML_TOK_PARTIAL_CHAR;                 72       return XML_TOK_PARTIAL_CHAR;                                             \
 73     if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_N     73     if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) {         \
 74       *nextTokPtr = ptr;                           74       *nextTokPtr = ptr;                                                       \
 75       return XML_TOK_INVALID;                      75       return XML_TOK_INVALID;                                                  \
 76     }                                              76     }                                                                          \
 77     ptr += n;                                      77     ptr += n;                                                                  \
 78     break;                                         78     break;
 79                                                    79 
 80 #  define CHECK_NAME_CASES(enc, ptr, end, next     80 #  define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)                          \
 81   case BT_NONASCII:                                81   case BT_NONASCII:                                                            \
 82     if (! IS_NAME_CHAR_MINBPC(enc, ptr)) {         82     if (! IS_NAME_CHAR_MINBPC(enc, ptr)) {                                     \
 83       *nextTokPtr = ptr;                           83       *nextTokPtr = ptr;                                                       \
 84       return XML_TOK_INVALID;                      84       return XML_TOK_INVALID;                                                  \
 85     }                                              85     }                                                                          \
 86     /* fall through */                             86     /* fall through */                                                         \
 87   case BT_NMSTRT:                                  87   case BT_NMSTRT:                                                              \
 88   case BT_HEX:                                     88   case BT_HEX:                                                                 \
 89   case BT_DIGIT:                                   89   case BT_DIGIT:                                                               \
 90   case BT_NAME:                                    90   case BT_NAME:                                                                \
 91   case BT_MINUS:                                   91   case BT_MINUS:                                                               \
 92     ptr += MINBPC(enc);                            92     ptr += MINBPC(enc);                                                        \
 93     break;                                         93     break;                                                                     \
 94     CHECK_NAME_CASE(2, enc, ptr, end, nextTokP     94     CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr)                              \
 95     CHECK_NAME_CASE(3, enc, ptr, end, nextTokP     95     CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr)                              \
 96     CHECK_NAME_CASE(4, enc, ptr, end, nextTokP     96     CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
 97                                                    97 
 98 #  define CHECK_NMSTRT_CASE(n, enc, ptr, end,      98 #  define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr)                      \
 99   case BT_LEAD##n:                                 99   case BT_LEAD##n:                                                             \
100     if ((end) - (ptr) < (n))                      100     if ((end) - (ptr) < (n))                                                   \
101       return XML_TOK_PARTIAL_CHAR;                101       return XML_TOK_PARTIAL_CHAR;                                             \
102     if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_N    102     if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) {       \
103       *nextTokPtr = ptr;                          103       *nextTokPtr = ptr;                                                       \
104       return XML_TOK_INVALID;                     104       return XML_TOK_INVALID;                                                  \
105     }                                             105     }                                                                          \
106     ptr += n;                                     106     ptr += n;                                                                  \
107     break;                                        107     break;
108                                                   108 
109 #  define CHECK_NMSTRT_CASES(enc, ptr, end, ne    109 #  define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)                        \
110   case BT_NONASCII:                               110   case BT_NONASCII:                                                            \
111     if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {      111     if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {                                   \
112       *nextTokPtr = ptr;                          112       *nextTokPtr = ptr;                                                       \
113       return XML_TOK_INVALID;                     113       return XML_TOK_INVALID;                                                  \
114     }                                             114     }                                                                          \
115     /* fall through */                            115     /* fall through */                                                         \
116   case BT_NMSTRT:                                 116   case BT_NMSTRT:                                                              \
117   case BT_HEX:                                    117   case BT_HEX:                                                                 \
118     ptr += MINBPC(enc);                           118     ptr += MINBPC(enc);                                                        \
119     break;                                        119     break;                                                                     \
120     CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTo    120     CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr)                            \
121     CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTo    121     CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr)                            \
122     CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTo    122     CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
123                                                   123 
124 #  ifndef PREFIX                                  124 #  ifndef PREFIX
125 #    define PREFIX(ident) ident                   125 #    define PREFIX(ident) ident
126 #  endif                                          126 #  endif
127                                                   127 
128 #  define HAS_CHARS(enc, ptr, end, count)         128 #  define HAS_CHARS(enc, ptr, end, count)                                      \
129     ((end) - (ptr) >= ((count)*MINBPC(enc)))      129     ((end) - (ptr) >= ((count)*MINBPC(enc)))
130                                                   130 
131 #  define HAS_CHAR(enc, ptr, end) HAS_CHARS(en    131 #  define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1)
132                                                   132 
133 #  define REQUIRE_CHARS(enc, ptr, end, count)     133 #  define REQUIRE_CHARS(enc, ptr, end, count)                                  \
134     {                                             134     {                                                                          \
135       if (! HAS_CHARS(enc, ptr, end, count)) {    135       if (! HAS_CHARS(enc, ptr, end, count)) {                                 \
136         return XML_TOK_PARTIAL;                   136         return XML_TOK_PARTIAL;                                                \
137       }                                           137       }                                                                        \
138     }                                             138     }
139                                                   139 
140 #  define REQUIRE_CHAR(enc, ptr, end) REQUIRE_    140 #  define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1)
141                                                   141 
142 /* ptr points to character following "<!-" */     142 /* ptr points to character following "<!-" */
143                                                   143 
144 static int PTRCALL                                144 static int PTRCALL
145 PREFIX(scanComment)(const ENCODING *enc, const    145 PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
146                     const char **nextTokPtr) {    146                     const char **nextTokPtr) {
147   if (HAS_CHAR(enc, ptr, end)) {                  147   if (HAS_CHAR(enc, ptr, end)) {
148     if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS))    148     if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
149       *nextTokPtr = ptr;                          149       *nextTokPtr = ptr;
150       return XML_TOK_INVALID;                     150       return XML_TOK_INVALID;
151     }                                             151     }
152     ptr += MINBPC(enc);                           152     ptr += MINBPC(enc);
153     while (HAS_CHAR(enc, ptr, end)) {             153     while (HAS_CHAR(enc, ptr, end)) {
154       switch (BYTE_TYPE(enc, ptr)) {              154       switch (BYTE_TYPE(enc, ptr)) {
155         INVALID_CASES(ptr, nextTokPtr)            155         INVALID_CASES(ptr, nextTokPtr)
156       case BT_MINUS:                              156       case BT_MINUS:
157         ptr += MINBPC(enc);                       157         ptr += MINBPC(enc);
158         REQUIRE_CHAR(enc, ptr, end);              158         REQUIRE_CHAR(enc, ptr, end);
159         if (CHAR_MATCHES(enc, ptr, ASCII_MINUS    159         if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
160           ptr += MINBPC(enc);                     160           ptr += MINBPC(enc);
161           REQUIRE_CHAR(enc, ptr, end);            161           REQUIRE_CHAR(enc, ptr, end);
162           if (! CHAR_MATCHES(enc, ptr, ASCII_G    162           if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
163             *nextTokPtr = ptr;                    163             *nextTokPtr = ptr;
164             return XML_TOK_INVALID;               164             return XML_TOK_INVALID;
165           }                                       165           }
166           *nextTokPtr = ptr + MINBPC(enc);        166           *nextTokPtr = ptr + MINBPC(enc);
167           return XML_TOK_COMMENT;                 167           return XML_TOK_COMMENT;
168         }                                         168         }
169         break;                                    169         break;
170       default:                                    170       default:
171         ptr += MINBPC(enc);                       171         ptr += MINBPC(enc);
172         break;                                    172         break;
173       }                                           173       }
174     }                                             174     }
175   }                                               175   }
176   return XML_TOK_PARTIAL;                         176   return XML_TOK_PARTIAL;
177 }                                                 177 }
178                                                   178 
179 /* ptr points to character following "<!" */      179 /* ptr points to character following "<!" */
180                                                   180 
181 static int PTRCALL                                181 static int PTRCALL
182 PREFIX(scanDecl)(const ENCODING *enc, const ch    182 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
183                  const char **nextTokPtr) {       183                  const char **nextTokPtr) {
184   REQUIRE_CHAR(enc, ptr, end);                    184   REQUIRE_CHAR(enc, ptr, end);
185   switch (BYTE_TYPE(enc, ptr)) {                  185   switch (BYTE_TYPE(enc, ptr)) {
186   case BT_MINUS:                                  186   case BT_MINUS:
187     return PREFIX(scanComment)(enc, ptr + MINB    187     return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
188   case BT_LSQB:                                   188   case BT_LSQB:
189     *nextTokPtr = ptr + MINBPC(enc);              189     *nextTokPtr = ptr + MINBPC(enc);
190     return XML_TOK_COND_SECT_OPEN;                190     return XML_TOK_COND_SECT_OPEN;
191   case BT_NMSTRT:                                 191   case BT_NMSTRT:
192   case BT_HEX:                                    192   case BT_HEX:
193     ptr += MINBPC(enc);                           193     ptr += MINBPC(enc);
194     break;                                        194     break;
195   default:                                        195   default:
196     *nextTokPtr = ptr;                            196     *nextTokPtr = ptr;
197     return XML_TOK_INVALID;                       197     return XML_TOK_INVALID;
198   }                                               198   }
199   while (HAS_CHAR(enc, ptr, end)) {               199   while (HAS_CHAR(enc, ptr, end)) {
200     switch (BYTE_TYPE(enc, ptr)) {                200     switch (BYTE_TYPE(enc, ptr)) {
201     case BT_PERCNT:                               201     case BT_PERCNT:
202       REQUIRE_CHARS(enc, ptr, end, 2);            202       REQUIRE_CHARS(enc, ptr, end, 2);
203       /* don't allow <!ENTITY% foo "whatever">    203       /* don't allow <!ENTITY% foo "whatever"> */
204       switch (BYTE_TYPE(enc, ptr + MINBPC(enc)    204       switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
205       case BT_S:                                  205       case BT_S:
206       case BT_CR:                                 206       case BT_CR:
207       case BT_LF:                                 207       case BT_LF:
208       case BT_PERCNT:                             208       case BT_PERCNT:
209         *nextTokPtr = ptr;                        209         *nextTokPtr = ptr;
210         return XML_TOK_INVALID;                   210         return XML_TOK_INVALID;
211       }                                           211       }
212       /* fall through */                          212       /* fall through */
213     case BT_S:                                    213     case BT_S:
214     case BT_CR:                                   214     case BT_CR:
215     case BT_LF:                                   215     case BT_LF:
216       *nextTokPtr = ptr;                          216       *nextTokPtr = ptr;
217       return XML_TOK_DECL_OPEN;                   217       return XML_TOK_DECL_OPEN;
218     case BT_NMSTRT:                               218     case BT_NMSTRT:
219     case BT_HEX:                                  219     case BT_HEX:
220       ptr += MINBPC(enc);                         220       ptr += MINBPC(enc);
221       break;                                      221       break;
222     default:                                      222     default:
223       *nextTokPtr = ptr;                          223       *nextTokPtr = ptr;
224       return XML_TOK_INVALID;                     224       return XML_TOK_INVALID;
225     }                                             225     }
226   }                                               226   }
227   return XML_TOK_PARTIAL;                         227   return XML_TOK_PARTIAL;
228 }                                                 228 }
229                                                   229 
230 static int PTRCALL                                230 static int PTRCALL
231 PREFIX(checkPiTarget)(const ENCODING *enc, con    231 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
232                       int *tokPtr) {              232                       int *tokPtr) {
233   int upper = 0;                                  233   int upper = 0;
234   UNUSED_P(enc);                                  234   UNUSED_P(enc);
235   *tokPtr = XML_TOK_PI;                           235   *tokPtr = XML_TOK_PI;
236   if (end - ptr != MINBPC(enc) * 3)               236   if (end - ptr != MINBPC(enc) * 3)
237     return 1;                                     237     return 1;
238   switch (BYTE_TO_ASCII(enc, ptr)) {              238   switch (BYTE_TO_ASCII(enc, ptr)) {
239   case ASCII_x:                                   239   case ASCII_x:
240     break;                                        240     break;
241   case ASCII_X:                                   241   case ASCII_X:
242     upper = 1;                                    242     upper = 1;
243     break;                                        243     break;
244   default:                                        244   default:
245     return 1;                                     245     return 1;
246   }                                               246   }
247   ptr += MINBPC(enc);                             247   ptr += MINBPC(enc);
248   switch (BYTE_TO_ASCII(enc, ptr)) {              248   switch (BYTE_TO_ASCII(enc, ptr)) {
249   case ASCII_m:                                   249   case ASCII_m:
250     break;                                        250     break;
251   case ASCII_M:                                   251   case ASCII_M:
252     upper = 1;                                    252     upper = 1;
253     break;                                        253     break;
254   default:                                        254   default:
255     return 1;                                     255     return 1;
256   }                                               256   }
257   ptr += MINBPC(enc);                             257   ptr += MINBPC(enc);
258   switch (BYTE_TO_ASCII(enc, ptr)) {              258   switch (BYTE_TO_ASCII(enc, ptr)) {
259   case ASCII_l:                                   259   case ASCII_l:
260     break;                                        260     break;
261   case ASCII_L:                                   261   case ASCII_L:
262     upper = 1;                                    262     upper = 1;
263     break;                                        263     break;
264   default:                                        264   default:
265     return 1;                                     265     return 1;
266   }                                               266   }
267   if (upper)                                      267   if (upper)
268     return 0;                                     268     return 0;
269   *tokPtr = XML_TOK_XML_DECL;                     269   *tokPtr = XML_TOK_XML_DECL;
270   return 1;                                       270   return 1;
271 }                                                 271 }
272                                                   272 
273 /* ptr points to character following "<?" */      273 /* ptr points to character following "<?" */
274                                                   274 
275 static int PTRCALL                                275 static int PTRCALL
276 PREFIX(scanPi)(const ENCODING *enc, const char    276 PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
277                const char **nextTokPtr) {         277                const char **nextTokPtr) {
278   int tok;                                        278   int tok;
279   const char *target = ptr;                       279   const char *target = ptr;
280   REQUIRE_CHAR(enc, ptr, end);                    280   REQUIRE_CHAR(enc, ptr, end);
281   switch (BYTE_TYPE(enc, ptr)) {                  281   switch (BYTE_TYPE(enc, ptr)) {
282     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP    282     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
283   default:                                        283   default:
284     *nextTokPtr = ptr;                            284     *nextTokPtr = ptr;
285     return XML_TOK_INVALID;                       285     return XML_TOK_INVALID;
286   }                                               286   }
287   while (HAS_CHAR(enc, ptr, end)) {               287   while (HAS_CHAR(enc, ptr, end)) {
288     switch (BYTE_TYPE(enc, ptr)) {                288     switch (BYTE_TYPE(enc, ptr)) {
289       CHECK_NAME_CASES(enc, ptr, end, nextTokP    289       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
290     case BT_S:                                    290     case BT_S:
291     case BT_CR:                                   291     case BT_CR:
292     case BT_LF:                                   292     case BT_LF:
293       if (! PREFIX(checkPiTarget)(enc, target,    293       if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
294         *nextTokPtr = ptr;                        294         *nextTokPtr = ptr;
295         return XML_TOK_INVALID;                   295         return XML_TOK_INVALID;
296       }                                           296       }
297       ptr += MINBPC(enc);                         297       ptr += MINBPC(enc);
298       while (HAS_CHAR(enc, ptr, end)) {           298       while (HAS_CHAR(enc, ptr, end)) {
299         switch (BYTE_TYPE(enc, ptr)) {            299         switch (BYTE_TYPE(enc, ptr)) {
300           INVALID_CASES(ptr, nextTokPtr)          300           INVALID_CASES(ptr, nextTokPtr)
301         case BT_QUEST:                            301         case BT_QUEST:
302           ptr += MINBPC(enc);                     302           ptr += MINBPC(enc);
303           REQUIRE_CHAR(enc, ptr, end);            303           REQUIRE_CHAR(enc, ptr, end);
304           if (CHAR_MATCHES(enc, ptr, ASCII_GT)    304           if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
305             *nextTokPtr = ptr + MINBPC(enc);      305             *nextTokPtr = ptr + MINBPC(enc);
306             return tok;                           306             return tok;
307           }                                       307           }
308           break;                                  308           break;
309         default:                                  309         default:
310           ptr += MINBPC(enc);                     310           ptr += MINBPC(enc);
311           break;                                  311           break;
312         }                                         312         }
313       }                                           313       }
314       return XML_TOK_PARTIAL;                     314       return XML_TOK_PARTIAL;
315     case BT_QUEST:                                315     case BT_QUEST:
316       if (! PREFIX(checkPiTarget)(enc, target,    316       if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
317         *nextTokPtr = ptr;                        317         *nextTokPtr = ptr;
318         return XML_TOK_INVALID;                   318         return XML_TOK_INVALID;
319       }                                           319       }
320       ptr += MINBPC(enc);                         320       ptr += MINBPC(enc);
321       REQUIRE_CHAR(enc, ptr, end);                321       REQUIRE_CHAR(enc, ptr, end);
322       if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {     322       if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
323         *nextTokPtr = ptr + MINBPC(enc);          323         *nextTokPtr = ptr + MINBPC(enc);
324         return tok;                               324         return tok;
325       }                                           325       }
326       /* fall through */                          326       /* fall through */
327     default:                                      327     default:
328       *nextTokPtr = ptr;                          328       *nextTokPtr = ptr;
329       return XML_TOK_INVALID;                     329       return XML_TOK_INVALID;
330     }                                             330     }
331   }                                               331   }
332   return XML_TOK_PARTIAL;                         332   return XML_TOK_PARTIAL;
333 }                                                 333 }
334                                                   334 
335 static int PTRCALL                                335 static int PTRCALL
336 PREFIX(scanCdataSection)(const ENCODING *enc,     336 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
337                          const char **nextTokP    337                          const char **nextTokPtr) {
338   static const char CDATA_LSQB[]                  338   static const char CDATA_LSQB[]
339       = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, A    339       = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB};
340   int i;                                          340   int i;
341   UNUSED_P(enc);                                  341   UNUSED_P(enc);
342   /* CDATA[ */                                    342   /* CDATA[ */
343   REQUIRE_CHARS(enc, ptr, end, 6);                343   REQUIRE_CHARS(enc, ptr, end, 6);
344   for (i = 0; i < 6; i++, ptr += MINBPC(enc))     344   for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
345     if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i]    345     if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
346       *nextTokPtr = ptr;                          346       *nextTokPtr = ptr;
347       return XML_TOK_INVALID;                     347       return XML_TOK_INVALID;
348     }                                             348     }
349   }                                               349   }
350   *nextTokPtr = ptr;                              350   *nextTokPtr = ptr;
351   return XML_TOK_CDATA_SECT_OPEN;                 351   return XML_TOK_CDATA_SECT_OPEN;
352 }                                                 352 }
353                                                   353 
354 static int PTRCALL                                354 static int PTRCALL
355 PREFIX(cdataSectionTok)(const ENCODING *enc, c    355 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
356                         const char **nextTokPt    356                         const char **nextTokPtr) {
357   if (ptr >= end)                                 357   if (ptr >= end)
358     return XML_TOK_NONE;                          358     return XML_TOK_NONE;
359   if (MINBPC(enc) > 1) {                          359   if (MINBPC(enc) > 1) {
360     size_t n = end - ptr;                         360     size_t n = end - ptr;
361     if (n & (MINBPC(enc) - 1)) {                  361     if (n & (MINBPC(enc) - 1)) {
362       n &= ~(MINBPC(enc) - 1);                    362       n &= ~(MINBPC(enc) - 1);
363       if (n == 0)                                 363       if (n == 0)
364         return XML_TOK_PARTIAL;                   364         return XML_TOK_PARTIAL;
365       end = ptr + n;                              365       end = ptr + n;
366     }                                             366     }
367   }                                               367   }
368   switch (BYTE_TYPE(enc, ptr)) {                  368   switch (BYTE_TYPE(enc, ptr)) {
369   case BT_RSQB:                                   369   case BT_RSQB:
370     ptr += MINBPC(enc);                           370     ptr += MINBPC(enc);
371     REQUIRE_CHAR(enc, ptr, end);                  371     REQUIRE_CHAR(enc, ptr, end);
372     if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))     372     if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
373       break;                                      373       break;
374     ptr += MINBPC(enc);                           374     ptr += MINBPC(enc);
375     REQUIRE_CHAR(enc, ptr, end);                  375     REQUIRE_CHAR(enc, ptr, end);
376     if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {     376     if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
377       ptr -= MINBPC(enc);                         377       ptr -= MINBPC(enc);
378       break;                                      378       break;
379     }                                             379     }
380     *nextTokPtr = ptr + MINBPC(enc);              380     *nextTokPtr = ptr + MINBPC(enc);
381     return XML_TOK_CDATA_SECT_CLOSE;              381     return XML_TOK_CDATA_SECT_CLOSE;
382   case BT_CR:                                     382   case BT_CR:
383     ptr += MINBPC(enc);                           383     ptr += MINBPC(enc);
384     REQUIRE_CHAR(enc, ptr, end);                  384     REQUIRE_CHAR(enc, ptr, end);
385     if (BYTE_TYPE(enc, ptr) == BT_LF)             385     if (BYTE_TYPE(enc, ptr) == BT_LF)
386       ptr += MINBPC(enc);                         386       ptr += MINBPC(enc);
387     *nextTokPtr = ptr;                            387     *nextTokPtr = ptr;
388     return XML_TOK_DATA_NEWLINE;                  388     return XML_TOK_DATA_NEWLINE;
389   case BT_LF:                                     389   case BT_LF:
390     *nextTokPtr = ptr + MINBPC(enc);              390     *nextTokPtr = ptr + MINBPC(enc);
391     return XML_TOK_DATA_NEWLINE;                  391     return XML_TOK_DATA_NEWLINE;
392     INVALID_CASES(ptr, nextTokPtr)                392     INVALID_CASES(ptr, nextTokPtr)
393   default:                                        393   default:
394     ptr += MINBPC(enc);                           394     ptr += MINBPC(enc);
395     break;                                        395     break;
396   }                                               396   }
397   while (HAS_CHAR(enc, ptr, end)) {               397   while (HAS_CHAR(enc, ptr, end)) {
398     switch (BYTE_TYPE(enc, ptr)) {                398     switch (BYTE_TYPE(enc, ptr)) {
399 #  define LEAD_CASE(n)                            399 #  define LEAD_CASE(n)                                                         \
400   case BT_LEAD##n:                                400   case BT_LEAD##n:                                                             \
401     if (end - ptr < n || IS_INVALID_CHAR(enc,     401     if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
402       *nextTokPtr = ptr;                          402       *nextTokPtr = ptr;                                                       \
403       return XML_TOK_DATA_CHARS;                  403       return XML_TOK_DATA_CHARS;                                               \
404     }                                             404     }                                                                          \
405     ptr += n;                                     405     ptr += n;                                                                  \
406     break;                                        406     break;
407       LEAD_CASE(2)                                407       LEAD_CASE(2)
408       LEAD_CASE(3)                                408       LEAD_CASE(3)
409       LEAD_CASE(4)                                409       LEAD_CASE(4)
410 #  undef LEAD_CASE                                410 #  undef LEAD_CASE
411     case BT_NONXML:                               411     case BT_NONXML:
412     case BT_MALFORM:                              412     case BT_MALFORM:
413     case BT_TRAIL:                                413     case BT_TRAIL:
414     case BT_CR:                                   414     case BT_CR:
415     case BT_LF:                                   415     case BT_LF:
416     case BT_RSQB:                                 416     case BT_RSQB:
417       *nextTokPtr = ptr;                          417       *nextTokPtr = ptr;
418       return XML_TOK_DATA_CHARS;                  418       return XML_TOK_DATA_CHARS;
419     default:                                      419     default:
420       ptr += MINBPC(enc);                         420       ptr += MINBPC(enc);
421       break;                                      421       break;
422     }                                             422     }
423   }                                               423   }
424   *nextTokPtr = ptr;                              424   *nextTokPtr = ptr;
425   return XML_TOK_DATA_CHARS;                      425   return XML_TOK_DATA_CHARS;
426 }                                                 426 }
427                                                   427 
428 /* ptr points to character following "</" */      428 /* ptr points to character following "</" */
429                                                   429 
430 static int PTRCALL                                430 static int PTRCALL
431 PREFIX(scanEndTag)(const ENCODING *enc, const     431 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
432                    const char **nextTokPtr) {     432                    const char **nextTokPtr) {
433   REQUIRE_CHAR(enc, ptr, end);                    433   REQUIRE_CHAR(enc, ptr, end);
434   switch (BYTE_TYPE(enc, ptr)) {                  434   switch (BYTE_TYPE(enc, ptr)) {
435     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP    435     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
436   default:                                        436   default:
437     *nextTokPtr = ptr;                            437     *nextTokPtr = ptr;
438     return XML_TOK_INVALID;                       438     return XML_TOK_INVALID;
439   }                                               439   }
440   while (HAS_CHAR(enc, ptr, end)) {               440   while (HAS_CHAR(enc, ptr, end)) {
441     switch (BYTE_TYPE(enc, ptr)) {                441     switch (BYTE_TYPE(enc, ptr)) {
442       CHECK_NAME_CASES(enc, ptr, end, nextTokP    442       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
443     case BT_S:                                    443     case BT_S:
444     case BT_CR:                                   444     case BT_CR:
445     case BT_LF:                                   445     case BT_LF:
446       for (ptr += MINBPC(enc); HAS_CHAR(enc, p    446       for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
447         switch (BYTE_TYPE(enc, ptr)) {            447         switch (BYTE_TYPE(enc, ptr)) {
448         case BT_S:                                448         case BT_S:
449         case BT_CR:                               449         case BT_CR:
450         case BT_LF:                               450         case BT_LF:
451           break;                                  451           break;
452         case BT_GT:                               452         case BT_GT:
453           *nextTokPtr = ptr + MINBPC(enc);        453           *nextTokPtr = ptr + MINBPC(enc);
454           return XML_TOK_END_TAG;                 454           return XML_TOK_END_TAG;
455         default:                                  455         default:
456           *nextTokPtr = ptr;                      456           *nextTokPtr = ptr;
457           return XML_TOK_INVALID;                 457           return XML_TOK_INVALID;
458         }                                         458         }
459       }                                           459       }
460       return XML_TOK_PARTIAL;                     460       return XML_TOK_PARTIAL;
461 #  ifdef XML_NS                                   461 #  ifdef XML_NS
462     case BT_COLON:                                462     case BT_COLON:
463       /* no need to check qname syntax here,      463       /* no need to check qname syntax here,
464          since end-tag must match exactly */      464          since end-tag must match exactly */
465       ptr += MINBPC(enc);                         465       ptr += MINBPC(enc);
466       break;                                      466       break;
467 #  endif                                          467 #  endif
468     case BT_GT:                                   468     case BT_GT:
469       *nextTokPtr = ptr + MINBPC(enc);            469       *nextTokPtr = ptr + MINBPC(enc);
470       return XML_TOK_END_TAG;                     470       return XML_TOK_END_TAG;
471     default:                                      471     default:
472       *nextTokPtr = ptr;                          472       *nextTokPtr = ptr;
473       return XML_TOK_INVALID;                     473       return XML_TOK_INVALID;
474     }                                             474     }
475   }                                               475   }
476   return XML_TOK_PARTIAL;                         476   return XML_TOK_PARTIAL;
477 }                                                 477 }
478                                                   478 
479 /* ptr points to character following "&#X" */     479 /* ptr points to character following "&#X" */
480                                                   480 
481 static int PTRCALL                                481 static int PTRCALL
482 PREFIX(scanHexCharRef)(const ENCODING *enc, co    482 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
483                        const char **nextTokPtr    483                        const char **nextTokPtr) {
484   if (HAS_CHAR(enc, ptr, end)) {                  484   if (HAS_CHAR(enc, ptr, end)) {
485     switch (BYTE_TYPE(enc, ptr)) {                485     switch (BYTE_TYPE(enc, ptr)) {
486     case BT_DIGIT:                                486     case BT_DIGIT:
487     case BT_HEX:                                  487     case BT_HEX:
488       break;                                      488       break;
489     default:                                      489     default:
490       *nextTokPtr = ptr;                          490       *nextTokPtr = ptr;
491       return XML_TOK_INVALID;                     491       return XML_TOK_INVALID;
492     }                                             492     }
493     for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr    493     for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
494       switch (BYTE_TYPE(enc, ptr)) {              494       switch (BYTE_TYPE(enc, ptr)) {
495       case BT_DIGIT:                              495       case BT_DIGIT:
496       case BT_HEX:                                496       case BT_HEX:
497         break;                                    497         break;
498       case BT_SEMI:                               498       case BT_SEMI:
499         *nextTokPtr = ptr + MINBPC(enc);          499         *nextTokPtr = ptr + MINBPC(enc);
500         return XML_TOK_CHAR_REF;                  500         return XML_TOK_CHAR_REF;
501       default:                                    501       default:
502         *nextTokPtr = ptr;                        502         *nextTokPtr = ptr;
503         return XML_TOK_INVALID;                   503         return XML_TOK_INVALID;
504       }                                           504       }
505     }                                             505     }
506   }                                               506   }
507   return XML_TOK_PARTIAL;                         507   return XML_TOK_PARTIAL;
508 }                                                 508 }
509                                                   509 
510 /* ptr points to character following "&#" */      510 /* ptr points to character following "&#" */
511                                                   511 
512 static int PTRCALL                                512 static int PTRCALL
513 PREFIX(scanCharRef)(const ENCODING *enc, const    513 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
514                     const char **nextTokPtr) {    514                     const char **nextTokPtr) {
515   if (HAS_CHAR(enc, ptr, end)) {                  515   if (HAS_CHAR(enc, ptr, end)) {
516     if (CHAR_MATCHES(enc, ptr, ASCII_x))          516     if (CHAR_MATCHES(enc, ptr, ASCII_x))
517       return PREFIX(scanHexCharRef)(enc, ptr +    517       return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
518     switch (BYTE_TYPE(enc, ptr)) {                518     switch (BYTE_TYPE(enc, ptr)) {
519     case BT_DIGIT:                                519     case BT_DIGIT:
520       break;                                      520       break;
521     default:                                      521     default:
522       *nextTokPtr = ptr;                          522       *nextTokPtr = ptr;
523       return XML_TOK_INVALID;                     523       return XML_TOK_INVALID;
524     }                                             524     }
525     for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr    525     for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
526       switch (BYTE_TYPE(enc, ptr)) {              526       switch (BYTE_TYPE(enc, ptr)) {
527       case BT_DIGIT:                              527       case BT_DIGIT:
528         break;                                    528         break;
529       case BT_SEMI:                               529       case BT_SEMI:
530         *nextTokPtr = ptr + MINBPC(enc);          530         *nextTokPtr = ptr + MINBPC(enc);
531         return XML_TOK_CHAR_REF;                  531         return XML_TOK_CHAR_REF;
532       default:                                    532       default:
533         *nextTokPtr = ptr;                        533         *nextTokPtr = ptr;
534         return XML_TOK_INVALID;                   534         return XML_TOK_INVALID;
535       }                                           535       }
536     }                                             536     }
537   }                                               537   }
538   return XML_TOK_PARTIAL;                         538   return XML_TOK_PARTIAL;
539 }                                                 539 }
540                                                   540 
541 /* ptr points to character following "&" */       541 /* ptr points to character following "&" */
542                                                   542 
543 static int PTRCALL                                543 static int PTRCALL
544 PREFIX(scanRef)(const ENCODING *enc, const cha    544 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
545                 const char **nextTokPtr) {        545                 const char **nextTokPtr) {
546   REQUIRE_CHAR(enc, ptr, end);                    546   REQUIRE_CHAR(enc, ptr, end);
547   switch (BYTE_TYPE(enc, ptr)) {                  547   switch (BYTE_TYPE(enc, ptr)) {
548     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP    548     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
549   case BT_NUM:                                    549   case BT_NUM:
550     return PREFIX(scanCharRef)(enc, ptr + MINB    550     return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
551   default:                                        551   default:
552     *nextTokPtr = ptr;                            552     *nextTokPtr = ptr;
553     return XML_TOK_INVALID;                       553     return XML_TOK_INVALID;
554   }                                               554   }
555   while (HAS_CHAR(enc, ptr, end)) {               555   while (HAS_CHAR(enc, ptr, end)) {
556     switch (BYTE_TYPE(enc, ptr)) {                556     switch (BYTE_TYPE(enc, ptr)) {
557       CHECK_NAME_CASES(enc, ptr, end, nextTokP    557       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
558     case BT_SEMI:                                 558     case BT_SEMI:
559       *nextTokPtr = ptr + MINBPC(enc);            559       *nextTokPtr = ptr + MINBPC(enc);
560       return XML_TOK_ENTITY_REF;                  560       return XML_TOK_ENTITY_REF;
561     default:                                      561     default:
562       *nextTokPtr = ptr;                          562       *nextTokPtr = ptr;
563       return XML_TOK_INVALID;                     563       return XML_TOK_INVALID;
564     }                                             564     }
565   }                                               565   }
566   return XML_TOK_PARTIAL;                         566   return XML_TOK_PARTIAL;
567 }                                                 567 }
568                                                   568 
569 /* ptr points to character following first cha    569 /* ptr points to character following first character of attribute name */
570                                                   570 
571 static int PTRCALL                                571 static int PTRCALL
572 PREFIX(scanAtts)(const ENCODING *enc, const ch    572 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
573                  const char **nextTokPtr) {       573                  const char **nextTokPtr) {
574 #  ifdef XML_NS                                   574 #  ifdef XML_NS
575   int hadColon = 0;                               575   int hadColon = 0;
576 #  endif                                          576 #  endif
577   while (HAS_CHAR(enc, ptr, end)) {               577   while (HAS_CHAR(enc, ptr, end)) {
578     switch (BYTE_TYPE(enc, ptr)) {                578     switch (BYTE_TYPE(enc, ptr)) {
579       CHECK_NAME_CASES(enc, ptr, end, nextTokP    579       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
580 #  ifdef XML_NS                                   580 #  ifdef XML_NS
581     case BT_COLON:                                581     case BT_COLON:
582       if (hadColon) {                             582       if (hadColon) {
583         *nextTokPtr = ptr;                        583         *nextTokPtr = ptr;
584         return XML_TOK_INVALID;                   584         return XML_TOK_INVALID;
585       }                                           585       }
586       hadColon = 1;                               586       hadColon = 1;
587       ptr += MINBPC(enc);                         587       ptr += MINBPC(enc);
588       REQUIRE_CHAR(enc, ptr, end);                588       REQUIRE_CHAR(enc, ptr, end);
589       switch (BYTE_TYPE(enc, ptr)) {              589       switch (BYTE_TYPE(enc, ptr)) {
590         CHECK_NMSTRT_CASES(enc, ptr, end, next    590         CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
591       default:                                    591       default:
592         *nextTokPtr = ptr;                        592         *nextTokPtr = ptr;
593         return XML_TOK_INVALID;                   593         return XML_TOK_INVALID;
594       }                                           594       }
595       break;                                      595       break;
596 #  endif                                          596 #  endif
597     case BT_S:                                    597     case BT_S:
598     case BT_CR:                                   598     case BT_CR:
599     case BT_LF:                                   599     case BT_LF:
600       for (;;) {                                  600       for (;;) {
601         int t;                                    601         int t;
602                                                   602 
603         ptr += MINBPC(enc);                       603         ptr += MINBPC(enc);
604         REQUIRE_CHAR(enc, ptr, end);              604         REQUIRE_CHAR(enc, ptr, end);
605         t = BYTE_TYPE(enc, ptr);                  605         t = BYTE_TYPE(enc, ptr);
606         if (t == BT_EQUALS)                       606         if (t == BT_EQUALS)
607           break;                                  607           break;
608         switch (t) {                              608         switch (t) {
609         case BT_S:                                609         case BT_S:
610         case BT_LF:                               610         case BT_LF:
611         case BT_CR:                               611         case BT_CR:
612           break;                                  612           break;
613         default:                                  613         default:
614           *nextTokPtr = ptr;                      614           *nextTokPtr = ptr;
615           return XML_TOK_INVALID;                 615           return XML_TOK_INVALID;
616         }                                         616         }
617       }                                           617       }
618       /* fall through */                          618       /* fall through */
619     case BT_EQUALS: {                             619     case BT_EQUALS: {
620       int open;                                   620       int open;
621 #  ifdef XML_NS                                   621 #  ifdef XML_NS
622       hadColon = 0;                               622       hadColon = 0;
623 #  endif                                          623 #  endif
624       for (;;) {                                  624       for (;;) {
625         ptr += MINBPC(enc);                       625         ptr += MINBPC(enc);
626         REQUIRE_CHAR(enc, ptr, end);              626         REQUIRE_CHAR(enc, ptr, end);
627         open = BYTE_TYPE(enc, ptr);               627         open = BYTE_TYPE(enc, ptr);
628         if (open == BT_QUOT || open == BT_APOS    628         if (open == BT_QUOT || open == BT_APOS)
629           break;                                  629           break;
630         switch (open) {                           630         switch (open) {
631         case BT_S:                                631         case BT_S:
632         case BT_LF:                               632         case BT_LF:
633         case BT_CR:                               633         case BT_CR:
634           break;                                  634           break;
635         default:                                  635         default:
636           *nextTokPtr = ptr;                      636           *nextTokPtr = ptr;
637           return XML_TOK_INVALID;                 637           return XML_TOK_INVALID;
638         }                                         638         }
639       }                                           639       }
640       ptr += MINBPC(enc);                         640       ptr += MINBPC(enc);
641       /* in attribute value */                    641       /* in attribute value */
642       for (;;) {                                  642       for (;;) {
643         int t;                                    643         int t;
644         REQUIRE_CHAR(enc, ptr, end);              644         REQUIRE_CHAR(enc, ptr, end);
645         t = BYTE_TYPE(enc, ptr);                  645         t = BYTE_TYPE(enc, ptr);
646         if (t == open)                            646         if (t == open)
647           break;                                  647           break;
648         switch (t) {                              648         switch (t) {
649           INVALID_CASES(ptr, nextTokPtr)          649           INVALID_CASES(ptr, nextTokPtr)
650         case BT_AMP: {                            650         case BT_AMP: {
651           int tok = PREFIX(scanRef)(enc, ptr +    651           int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
652           if (tok <= 0) {                         652           if (tok <= 0) {
653             if (tok == XML_TOK_INVALID)           653             if (tok == XML_TOK_INVALID)
654               *nextTokPtr = ptr;                  654               *nextTokPtr = ptr;
655             return tok;                           655             return tok;
656           }                                       656           }
657           break;                                  657           break;
658         }                                         658         }
659         case BT_LT:                               659         case BT_LT:
660           *nextTokPtr = ptr;                      660           *nextTokPtr = ptr;
661           return XML_TOK_INVALID;                 661           return XML_TOK_INVALID;
662         default:                                  662         default:
663           ptr += MINBPC(enc);                     663           ptr += MINBPC(enc);
664           break;                                  664           break;
665         }                                         665         }
666       }                                           666       }
667       ptr += MINBPC(enc);                         667       ptr += MINBPC(enc);
668       REQUIRE_CHAR(enc, ptr, end);                668       REQUIRE_CHAR(enc, ptr, end);
669       switch (BYTE_TYPE(enc, ptr)) {              669       switch (BYTE_TYPE(enc, ptr)) {
670       case BT_S:                                  670       case BT_S:
671       case BT_CR:                                 671       case BT_CR:
672       case BT_LF:                                 672       case BT_LF:
673         break;                                    673         break;
674       case BT_SOL:                                674       case BT_SOL:
675         goto sol;                                 675         goto sol;
676       case BT_GT:                                 676       case BT_GT:
677         goto gt;                                  677         goto gt;
678       default:                                    678       default:
679         *nextTokPtr = ptr;                        679         *nextTokPtr = ptr;
680         return XML_TOK_INVALID;                   680         return XML_TOK_INVALID;
681       }                                           681       }
682       /* ptr points to closing quote */           682       /* ptr points to closing quote */
683       for (;;) {                                  683       for (;;) {
684         ptr += MINBPC(enc);                       684         ptr += MINBPC(enc);
685         REQUIRE_CHAR(enc, ptr, end);              685         REQUIRE_CHAR(enc, ptr, end);
686         switch (BYTE_TYPE(enc, ptr)) {            686         switch (BYTE_TYPE(enc, ptr)) {
687           CHECK_NMSTRT_CASES(enc, ptr, end, ne    687           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
688         case BT_S:                                688         case BT_S:
689         case BT_CR:                               689         case BT_CR:
690         case BT_LF:                               690         case BT_LF:
691           continue;                               691           continue;
692         case BT_GT:                               692         case BT_GT:
693         gt:                                       693         gt:
694           *nextTokPtr = ptr + MINBPC(enc);        694           *nextTokPtr = ptr + MINBPC(enc);
695           return XML_TOK_START_TAG_WITH_ATTS;     695           return XML_TOK_START_TAG_WITH_ATTS;
696         case BT_SOL:                              696         case BT_SOL:
697         sol:                                      697         sol:
698           ptr += MINBPC(enc);                     698           ptr += MINBPC(enc);
699           REQUIRE_CHAR(enc, ptr, end);            699           REQUIRE_CHAR(enc, ptr, end);
700           if (! CHAR_MATCHES(enc, ptr, ASCII_G    700           if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
701             *nextTokPtr = ptr;                    701             *nextTokPtr = ptr;
702             return XML_TOK_INVALID;               702             return XML_TOK_INVALID;
703           }                                       703           }
704           *nextTokPtr = ptr + MINBPC(enc);        704           *nextTokPtr = ptr + MINBPC(enc);
705           return XML_TOK_EMPTY_ELEMENT_WITH_AT    705           return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
706         default:                                  706         default:
707           *nextTokPtr = ptr;                      707           *nextTokPtr = ptr;
708           return XML_TOK_INVALID;                 708           return XML_TOK_INVALID;
709         }                                         709         }
710         break;                                    710         break;
711       }                                           711       }
712       break;                                      712       break;
713     }                                             713     }
714     default:                                      714     default:
715       *nextTokPtr = ptr;                          715       *nextTokPtr = ptr;
716       return XML_TOK_INVALID;                     716       return XML_TOK_INVALID;
717     }                                             717     }
718   }                                               718   }
719   return XML_TOK_PARTIAL;                         719   return XML_TOK_PARTIAL;
720 }                                                 720 }
721                                                   721 
722 /* ptr points to character following "<" */       722 /* ptr points to character following "<" */
723                                                   723 
724 static int PTRCALL                                724 static int PTRCALL
725 PREFIX(scanLt)(const ENCODING *enc, const char    725 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
726                const char **nextTokPtr) {         726                const char **nextTokPtr) {
727 #  ifdef XML_NS                                   727 #  ifdef XML_NS
728   int hadColon;                                   728   int hadColon;
729 #  endif                                          729 #  endif
730   REQUIRE_CHAR(enc, ptr, end);                    730   REQUIRE_CHAR(enc, ptr, end);
731   switch (BYTE_TYPE(enc, ptr)) {                  731   switch (BYTE_TYPE(enc, ptr)) {
732     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP    732     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
733   case BT_EXCL:                                   733   case BT_EXCL:
734     ptr += MINBPC(enc);                           734     ptr += MINBPC(enc);
735     REQUIRE_CHAR(enc, ptr, end);                  735     REQUIRE_CHAR(enc, ptr, end);
736     switch (BYTE_TYPE(enc, ptr)) {                736     switch (BYTE_TYPE(enc, ptr)) {
737     case BT_MINUS:                                737     case BT_MINUS:
738       return PREFIX(scanComment)(enc, ptr + MI    738       return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
739     case BT_LSQB:                                 739     case BT_LSQB:
740       return PREFIX(scanCdataSection)(enc, ptr    740       return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
741     }                                             741     }
742     *nextTokPtr = ptr;                            742     *nextTokPtr = ptr;
743     return XML_TOK_INVALID;                       743     return XML_TOK_INVALID;
744   case BT_QUEST:                                  744   case BT_QUEST:
745     return PREFIX(scanPi)(enc, ptr + MINBPC(en    745     return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
746   case BT_SOL:                                    746   case BT_SOL:
747     return PREFIX(scanEndTag)(enc, ptr + MINBP    747     return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
748   default:                                        748   default:
749     *nextTokPtr = ptr;                            749     *nextTokPtr = ptr;
750     return XML_TOK_INVALID;                       750     return XML_TOK_INVALID;
751   }                                               751   }
752 #  ifdef XML_NS                                   752 #  ifdef XML_NS
753   hadColon = 0;                                   753   hadColon = 0;
754 #  endif                                          754 #  endif
755   /* we have a start-tag */                       755   /* we have a start-tag */
756   while (HAS_CHAR(enc, ptr, end)) {               756   while (HAS_CHAR(enc, ptr, end)) {
757     switch (BYTE_TYPE(enc, ptr)) {                757     switch (BYTE_TYPE(enc, ptr)) {
758       CHECK_NAME_CASES(enc, ptr, end, nextTokP    758       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
759 #  ifdef XML_NS                                   759 #  ifdef XML_NS
760     case BT_COLON:                                760     case BT_COLON:
761       if (hadColon) {                             761       if (hadColon) {
762         *nextTokPtr = ptr;                        762         *nextTokPtr = ptr;
763         return XML_TOK_INVALID;                   763         return XML_TOK_INVALID;
764       }                                           764       }
765       hadColon = 1;                               765       hadColon = 1;
766       ptr += MINBPC(enc);                         766       ptr += MINBPC(enc);
767       REQUIRE_CHAR(enc, ptr, end);                767       REQUIRE_CHAR(enc, ptr, end);
768       switch (BYTE_TYPE(enc, ptr)) {              768       switch (BYTE_TYPE(enc, ptr)) {
769         CHECK_NMSTRT_CASES(enc, ptr, end, next    769         CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
770       default:                                    770       default:
771         *nextTokPtr = ptr;                        771         *nextTokPtr = ptr;
772         return XML_TOK_INVALID;                   772         return XML_TOK_INVALID;
773       }                                           773       }
774       break;                                      774       break;
775 #  endif                                          775 #  endif
776     case BT_S:                                    776     case BT_S:
777     case BT_CR:                                   777     case BT_CR:
778     case BT_LF: {                                 778     case BT_LF: {
779       ptr += MINBPC(enc);                         779       ptr += MINBPC(enc);
780       while (HAS_CHAR(enc, ptr, end)) {           780       while (HAS_CHAR(enc, ptr, end)) {
781         switch (BYTE_TYPE(enc, ptr)) {            781         switch (BYTE_TYPE(enc, ptr)) {
782           CHECK_NMSTRT_CASES(enc, ptr, end, ne    782           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
783         case BT_GT:                               783         case BT_GT:
784           goto gt;                                784           goto gt;
785         case BT_SOL:                              785         case BT_SOL:
786           goto sol;                               786           goto sol;
787         case BT_S:                                787         case BT_S:
788         case BT_CR:                               788         case BT_CR:
789         case BT_LF:                               789         case BT_LF:
790           ptr += MINBPC(enc);                     790           ptr += MINBPC(enc);
791           continue;                               791           continue;
792         default:                                  792         default:
793           *nextTokPtr = ptr;                      793           *nextTokPtr = ptr;
794           return XML_TOK_INVALID;                 794           return XML_TOK_INVALID;
795         }                                         795         }
796         return PREFIX(scanAtts)(enc, ptr, end,    796         return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
797       }                                           797       }
798       return XML_TOK_PARTIAL;                     798       return XML_TOK_PARTIAL;
799     }                                             799     }
800     case BT_GT:                                   800     case BT_GT:
801     gt:                                           801     gt:
802       *nextTokPtr = ptr + MINBPC(enc);            802       *nextTokPtr = ptr + MINBPC(enc);
803       return XML_TOK_START_TAG_NO_ATTS;           803       return XML_TOK_START_TAG_NO_ATTS;
804     case BT_SOL:                                  804     case BT_SOL:
805     sol:                                          805     sol:
806       ptr += MINBPC(enc);                         806       ptr += MINBPC(enc);
807       REQUIRE_CHAR(enc, ptr, end);                807       REQUIRE_CHAR(enc, ptr, end);
808       if (! CHAR_MATCHES(enc, ptr, ASCII_GT))     808       if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
809         *nextTokPtr = ptr;                        809         *nextTokPtr = ptr;
810         return XML_TOK_INVALID;                   810         return XML_TOK_INVALID;
811       }                                           811       }
812       *nextTokPtr = ptr + MINBPC(enc);            812       *nextTokPtr = ptr + MINBPC(enc);
813       return XML_TOK_EMPTY_ELEMENT_NO_ATTS;       813       return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
814     default:                                      814     default:
815       *nextTokPtr = ptr;                          815       *nextTokPtr = ptr;
816       return XML_TOK_INVALID;                     816       return XML_TOK_INVALID;
817     }                                             817     }
818   }                                               818   }
819   return XML_TOK_PARTIAL;                         819   return XML_TOK_PARTIAL;
820 }                                                 820 }
821                                                   821 
822 static int PTRCALL                                822 static int PTRCALL
823 PREFIX(contentTok)(const ENCODING *enc, const     823 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
824                    const char **nextTokPtr) {     824                    const char **nextTokPtr) {
825   if (ptr >= end)                                 825   if (ptr >= end)
826     return XML_TOK_NONE;                          826     return XML_TOK_NONE;
827   if (MINBPC(enc) > 1) {                          827   if (MINBPC(enc) > 1) {
828     size_t n = end - ptr;                         828     size_t n = end - ptr;
829     if (n & (MINBPC(enc) - 1)) {                  829     if (n & (MINBPC(enc) - 1)) {
830       n &= ~(MINBPC(enc) - 1);                    830       n &= ~(MINBPC(enc) - 1);
831       if (n == 0)                                 831       if (n == 0)
832         return XML_TOK_PARTIAL;                   832         return XML_TOK_PARTIAL;
833       end = ptr + n;                              833       end = ptr + n;
834     }                                             834     }
835   }                                               835   }
836   switch (BYTE_TYPE(enc, ptr)) {                  836   switch (BYTE_TYPE(enc, ptr)) {
837   case BT_LT:                                     837   case BT_LT:
838     return PREFIX(scanLt)(enc, ptr + MINBPC(en    838     return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
839   case BT_AMP:                                    839   case BT_AMP:
840     return PREFIX(scanRef)(enc, ptr + MINBPC(e    840     return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
841   case BT_CR:                                     841   case BT_CR:
842     ptr += MINBPC(enc);                           842     ptr += MINBPC(enc);
843     if (! HAS_CHAR(enc, ptr, end))                843     if (! HAS_CHAR(enc, ptr, end))
844       return XML_TOK_TRAILING_CR;                 844       return XML_TOK_TRAILING_CR;
845     if (BYTE_TYPE(enc, ptr) == BT_LF)             845     if (BYTE_TYPE(enc, ptr) == BT_LF)
846       ptr += MINBPC(enc);                         846       ptr += MINBPC(enc);
847     *nextTokPtr = ptr;                            847     *nextTokPtr = ptr;
848     return XML_TOK_DATA_NEWLINE;                  848     return XML_TOK_DATA_NEWLINE;
849   case BT_LF:                                     849   case BT_LF:
850     *nextTokPtr = ptr + MINBPC(enc);              850     *nextTokPtr = ptr + MINBPC(enc);
851     return XML_TOK_DATA_NEWLINE;                  851     return XML_TOK_DATA_NEWLINE;
852   case BT_RSQB:                                   852   case BT_RSQB:
853     ptr += MINBPC(enc);                           853     ptr += MINBPC(enc);
854     if (! HAS_CHAR(enc, ptr, end))                854     if (! HAS_CHAR(enc, ptr, end))
855       return XML_TOK_TRAILING_RSQB;               855       return XML_TOK_TRAILING_RSQB;
856     if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))     856     if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
857       break;                                      857       break;
858     ptr += MINBPC(enc);                           858     ptr += MINBPC(enc);
859     if (! HAS_CHAR(enc, ptr, end))                859     if (! HAS_CHAR(enc, ptr, end))
860       return XML_TOK_TRAILING_RSQB;               860       return XML_TOK_TRAILING_RSQB;
861     if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {     861     if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
862       ptr -= MINBPC(enc);                         862       ptr -= MINBPC(enc);
863       break;                                      863       break;
864     }                                             864     }
865     *nextTokPtr = ptr;                            865     *nextTokPtr = ptr;
866     return XML_TOK_INVALID;                       866     return XML_TOK_INVALID;
867     INVALID_CASES(ptr, nextTokPtr)                867     INVALID_CASES(ptr, nextTokPtr)
868   default:                                        868   default:
869     ptr += MINBPC(enc);                           869     ptr += MINBPC(enc);
870     break;                                        870     break;
871   }                                               871   }
872   while (HAS_CHAR(enc, ptr, end)) {               872   while (HAS_CHAR(enc, ptr, end)) {
873     switch (BYTE_TYPE(enc, ptr)) {                873     switch (BYTE_TYPE(enc, ptr)) {
874 #  define LEAD_CASE(n)                            874 #  define LEAD_CASE(n)                                                         \
875   case BT_LEAD##n:                                875   case BT_LEAD##n:                                                             \
876     if (end - ptr < n || IS_INVALID_CHAR(enc,     876     if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
877       *nextTokPtr = ptr;                          877       *nextTokPtr = ptr;                                                       \
878       return XML_TOK_DATA_CHARS;                  878       return XML_TOK_DATA_CHARS;                                               \
879     }                                             879     }                                                                          \
880     ptr += n;                                     880     ptr += n;                                                                  \
881     break;                                        881     break;
882       LEAD_CASE(2)                                882       LEAD_CASE(2)
883       LEAD_CASE(3)                                883       LEAD_CASE(3)
884       LEAD_CASE(4)                                884       LEAD_CASE(4)
885 #  undef LEAD_CASE                                885 #  undef LEAD_CASE
886     case BT_RSQB:                                 886     case BT_RSQB:
887       if (HAS_CHARS(enc, ptr, end, 2)) {          887       if (HAS_CHARS(enc, ptr, end, 2)) {
888         if (! CHAR_MATCHES(enc, ptr + MINBPC(e    888         if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
889           ptr += MINBPC(enc);                     889           ptr += MINBPC(enc);
890           break;                                  890           break;
891         }                                         891         }
892         if (HAS_CHARS(enc, ptr, end, 3)) {        892         if (HAS_CHARS(enc, ptr, end, 3)) {
893           if (! CHAR_MATCHES(enc, ptr + 2 * MI    893           if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) {
894             ptr += MINBPC(enc);                   894             ptr += MINBPC(enc);
895             break;                                895             break;
896           }                                       896           }
897           *nextTokPtr = ptr + 2 * MINBPC(enc);    897           *nextTokPtr = ptr + 2 * MINBPC(enc);
898           return XML_TOK_INVALID;                 898           return XML_TOK_INVALID;
899         }                                         899         }
900       }                                           900       }
901       /* fall through */                          901       /* fall through */
902     case BT_AMP:                                  902     case BT_AMP:
903     case BT_LT:                                   903     case BT_LT:
904     case BT_NONXML:                               904     case BT_NONXML:
905     case BT_MALFORM:                              905     case BT_MALFORM:
906     case BT_TRAIL:                                906     case BT_TRAIL:
907     case BT_CR:                                   907     case BT_CR:
908     case BT_LF:                                   908     case BT_LF:
909       *nextTokPtr = ptr;                          909       *nextTokPtr = ptr;
910       return XML_TOK_DATA_CHARS;                  910       return XML_TOK_DATA_CHARS;
911     default:                                      911     default:
912       ptr += MINBPC(enc);                         912       ptr += MINBPC(enc);
913       break;                                      913       break;
914     }                                             914     }
915   }                                               915   }
916   *nextTokPtr = ptr;                              916   *nextTokPtr = ptr;
917   return XML_TOK_DATA_CHARS;                      917   return XML_TOK_DATA_CHARS;
918 }                                                 918 }
919                                                   919 
920 /* ptr points to character following "%" */       920 /* ptr points to character following "%" */
921                                                   921 
922 static int PTRCALL                                922 static int PTRCALL
923 PREFIX(scanPercent)(const ENCODING *enc, const    923 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
924                     const char **nextTokPtr) {    924                     const char **nextTokPtr) {
925   REQUIRE_CHAR(enc, ptr, end);                    925   REQUIRE_CHAR(enc, ptr, end);
926   switch (BYTE_TYPE(enc, ptr)) {                  926   switch (BYTE_TYPE(enc, ptr)) {
927     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP    927     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
928   case BT_S:                                      928   case BT_S:
929   case BT_LF:                                     929   case BT_LF:
930   case BT_CR:                                     930   case BT_CR:
931   case BT_PERCNT:                                 931   case BT_PERCNT:
932     *nextTokPtr = ptr;                            932     *nextTokPtr = ptr;
933     return XML_TOK_PERCENT;                       933     return XML_TOK_PERCENT;
934   default:                                        934   default:
935     *nextTokPtr = ptr;                            935     *nextTokPtr = ptr;
936     return XML_TOK_INVALID;                       936     return XML_TOK_INVALID;
937   }                                               937   }
938   while (HAS_CHAR(enc, ptr, end)) {               938   while (HAS_CHAR(enc, ptr, end)) {
939     switch (BYTE_TYPE(enc, ptr)) {                939     switch (BYTE_TYPE(enc, ptr)) {
940       CHECK_NAME_CASES(enc, ptr, end, nextTokP    940       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
941     case BT_SEMI:                                 941     case BT_SEMI:
942       *nextTokPtr = ptr + MINBPC(enc);            942       *nextTokPtr = ptr + MINBPC(enc);
943       return XML_TOK_PARAM_ENTITY_REF;            943       return XML_TOK_PARAM_ENTITY_REF;
944     default:                                      944     default:
945       *nextTokPtr = ptr;                          945       *nextTokPtr = ptr;
946       return XML_TOK_INVALID;                     946       return XML_TOK_INVALID;
947     }                                             947     }
948   }                                               948   }
949   return XML_TOK_PARTIAL;                         949   return XML_TOK_PARTIAL;
950 }                                                 950 }
951                                                   951 
952 static int PTRCALL                                952 static int PTRCALL
953 PREFIX(scanPoundName)(const ENCODING *enc, con    953 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
954                       const char **nextTokPtr)    954                       const char **nextTokPtr) {
955   REQUIRE_CHAR(enc, ptr, end);                    955   REQUIRE_CHAR(enc, ptr, end);
956   switch (BYTE_TYPE(enc, ptr)) {                  956   switch (BYTE_TYPE(enc, ptr)) {
957     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP    957     CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
958   default:                                        958   default:
959     *nextTokPtr = ptr;                            959     *nextTokPtr = ptr;
960     return XML_TOK_INVALID;                       960     return XML_TOK_INVALID;
961   }                                               961   }
962   while (HAS_CHAR(enc, ptr, end)) {               962   while (HAS_CHAR(enc, ptr, end)) {
963     switch (BYTE_TYPE(enc, ptr)) {                963     switch (BYTE_TYPE(enc, ptr)) {
964       CHECK_NAME_CASES(enc, ptr, end, nextTokP    964       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
965     case BT_CR:                                   965     case BT_CR:
966     case BT_LF:                                   966     case BT_LF:
967     case BT_S:                                    967     case BT_S:
968     case BT_RPAR:                                 968     case BT_RPAR:
969     case BT_GT:                                   969     case BT_GT:
970     case BT_PERCNT:                               970     case BT_PERCNT:
971     case BT_VERBAR:                               971     case BT_VERBAR:
972       *nextTokPtr = ptr;                          972       *nextTokPtr = ptr;
973       return XML_TOK_POUND_NAME;                  973       return XML_TOK_POUND_NAME;
974     default:                                      974     default:
975       *nextTokPtr = ptr;                          975       *nextTokPtr = ptr;
976       return XML_TOK_INVALID;                     976       return XML_TOK_INVALID;
977     }                                             977     }
978   }                                               978   }
979   return -XML_TOK_POUND_NAME;                     979   return -XML_TOK_POUND_NAME;
980 }                                                 980 }
981                                                   981 
982 static int PTRCALL                                982 static int PTRCALL
983 PREFIX(scanLit)(int open, const ENCODING *enc,    983 PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end,
984                 const char **nextTokPtr) {        984                 const char **nextTokPtr) {
985   while (HAS_CHAR(enc, ptr, end)) {               985   while (HAS_CHAR(enc, ptr, end)) {
986     int t = BYTE_TYPE(enc, ptr);                  986     int t = BYTE_TYPE(enc, ptr);
987     switch (t) {                                  987     switch (t) {
988       INVALID_CASES(ptr, nextTokPtr)              988       INVALID_CASES(ptr, nextTokPtr)
989     case BT_QUOT:                                 989     case BT_QUOT:
990     case BT_APOS:                                 990     case BT_APOS:
991       ptr += MINBPC(enc);                         991       ptr += MINBPC(enc);
992       if (t != open)                              992       if (t != open)
993         break;                                    993         break;
994       if (! HAS_CHAR(enc, ptr, end))              994       if (! HAS_CHAR(enc, ptr, end))
995         return -XML_TOK_LITERAL;                  995         return -XML_TOK_LITERAL;
996       *nextTokPtr = ptr;                          996       *nextTokPtr = ptr;
997       switch (BYTE_TYPE(enc, ptr)) {              997       switch (BYTE_TYPE(enc, ptr)) {
998       case BT_S:                                  998       case BT_S:
999       case BT_CR:                                 999       case BT_CR:
1000       case BT_LF:                                1000       case BT_LF:
1001       case BT_GT:                                1001       case BT_GT:
1002       case BT_PERCNT:                            1002       case BT_PERCNT:
1003       case BT_LSQB:                              1003       case BT_LSQB:
1004         return XML_TOK_LITERAL;                  1004         return XML_TOK_LITERAL;
1005       default:                                   1005       default:
1006         return XML_TOK_INVALID;                  1006         return XML_TOK_INVALID;
1007       }                                          1007       }
1008     default:                                     1008     default:
1009       ptr += MINBPC(enc);                        1009       ptr += MINBPC(enc);
1010       break;                                     1010       break;
1011     }                                            1011     }
1012   }                                              1012   }
1013   return XML_TOK_PARTIAL;                        1013   return XML_TOK_PARTIAL;
1014 }                                                1014 }
1015                                                  1015 
1016 static int PTRCALL                               1016 static int PTRCALL
1017 PREFIX(prologTok)(const ENCODING *enc, const     1017 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
1018                   const char **nextTokPtr) {     1018                   const char **nextTokPtr) {
1019   int tok;                                       1019   int tok;
1020   if (ptr >= end)                                1020   if (ptr >= end)
1021     return XML_TOK_NONE;                         1021     return XML_TOK_NONE;
1022   if (MINBPC(enc) > 1) {                         1022   if (MINBPC(enc) > 1) {
1023     size_t n = end - ptr;                        1023     size_t n = end - ptr;
1024     if (n & (MINBPC(enc) - 1)) {                 1024     if (n & (MINBPC(enc) - 1)) {
1025       n &= ~(MINBPC(enc) - 1);                   1025       n &= ~(MINBPC(enc) - 1);
1026       if (n == 0)                                1026       if (n == 0)
1027         return XML_TOK_PARTIAL;                  1027         return XML_TOK_PARTIAL;
1028       end = ptr + n;                             1028       end = ptr + n;
1029     }                                            1029     }
1030   }                                              1030   }
1031   switch (BYTE_TYPE(enc, ptr)) {                 1031   switch (BYTE_TYPE(enc, ptr)) {
1032   case BT_QUOT:                                  1032   case BT_QUOT:
1033     return PREFIX(scanLit)(BT_QUOT, enc, ptr     1033     return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
1034   case BT_APOS:                                  1034   case BT_APOS:
1035     return PREFIX(scanLit)(BT_APOS, enc, ptr     1035     return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
1036   case BT_LT: {                                  1036   case BT_LT: {
1037     ptr += MINBPC(enc);                          1037     ptr += MINBPC(enc);
1038     REQUIRE_CHAR(enc, ptr, end);                 1038     REQUIRE_CHAR(enc, ptr, end);
1039     switch (BYTE_TYPE(enc, ptr)) {               1039     switch (BYTE_TYPE(enc, ptr)) {
1040     case BT_EXCL:                                1040     case BT_EXCL:
1041       return PREFIX(scanDecl)(enc, ptr + MINB    1041       return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1042     case BT_QUEST:                               1042     case BT_QUEST:
1043       return PREFIX(scanPi)(enc, ptr + MINBPC    1043       return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1044     case BT_NMSTRT:                              1044     case BT_NMSTRT:
1045     case BT_HEX:                                 1045     case BT_HEX:
1046     case BT_NONASCII:                            1046     case BT_NONASCII:
1047     case BT_LEAD2:                               1047     case BT_LEAD2:
1048     case BT_LEAD3:                               1048     case BT_LEAD3:
1049     case BT_LEAD4:                               1049     case BT_LEAD4:
1050       *nextTokPtr = ptr - MINBPC(enc);           1050       *nextTokPtr = ptr - MINBPC(enc);
1051       return XML_TOK_INSTANCE_START;             1051       return XML_TOK_INSTANCE_START;
1052     }                                            1052     }
1053     *nextTokPtr = ptr;                           1053     *nextTokPtr = ptr;
1054     return XML_TOK_INVALID;                      1054     return XML_TOK_INVALID;
1055   }                                              1055   }
1056   case BT_CR:                                    1056   case BT_CR:
1057     if (ptr + MINBPC(enc) == end) {              1057     if (ptr + MINBPC(enc) == end) {
1058       *nextTokPtr = end;                         1058       *nextTokPtr = end;
1059       /* indicate that this might be part of     1059       /* indicate that this might be part of a CR/LF pair */
1060       return -XML_TOK_PROLOG_S;                  1060       return -XML_TOK_PROLOG_S;
1061     }                                            1061     }
1062     /* fall through */                           1062     /* fall through */
1063   case BT_S:                                     1063   case BT_S:
1064   case BT_LF:                                    1064   case BT_LF:
1065     for (;;) {                                   1065     for (;;) {
1066       ptr += MINBPC(enc);                        1066       ptr += MINBPC(enc);
1067       if (! HAS_CHAR(enc, ptr, end))             1067       if (! HAS_CHAR(enc, ptr, end))
1068         break;                                   1068         break;
1069       switch (BYTE_TYPE(enc, ptr)) {             1069       switch (BYTE_TYPE(enc, ptr)) {
1070       case BT_S:                                 1070       case BT_S:
1071       case BT_LF:                                1071       case BT_LF:
1072         break;                                   1072         break;
1073       case BT_CR:                                1073       case BT_CR:
1074         /* don't split CR/LF pair */             1074         /* don't split CR/LF pair */
1075         if (ptr + MINBPC(enc) != end)            1075         if (ptr + MINBPC(enc) != end)
1076           break;                                 1076           break;
1077         /* fall through */                       1077         /* fall through */
1078       default:                                   1078       default:
1079         *nextTokPtr = ptr;                       1079         *nextTokPtr = ptr;
1080         return XML_TOK_PROLOG_S;                 1080         return XML_TOK_PROLOG_S;
1081       }                                          1081       }
1082     }                                            1082     }
1083     *nextTokPtr = ptr;                           1083     *nextTokPtr = ptr;
1084     return XML_TOK_PROLOG_S;                     1084     return XML_TOK_PROLOG_S;
1085   case BT_PERCNT:                                1085   case BT_PERCNT:
1086     return PREFIX(scanPercent)(enc, ptr + MIN    1086     return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1087   case BT_COMMA:                                 1087   case BT_COMMA:
1088     *nextTokPtr = ptr + MINBPC(enc);             1088     *nextTokPtr = ptr + MINBPC(enc);
1089     return XML_TOK_COMMA;                        1089     return XML_TOK_COMMA;
1090   case BT_LSQB:                                  1090   case BT_LSQB:
1091     *nextTokPtr = ptr + MINBPC(enc);             1091     *nextTokPtr = ptr + MINBPC(enc);
1092     return XML_TOK_OPEN_BRACKET;                 1092     return XML_TOK_OPEN_BRACKET;
1093   case BT_RSQB:                                  1093   case BT_RSQB:
1094     ptr += MINBPC(enc);                          1094     ptr += MINBPC(enc);
1095     if (! HAS_CHAR(enc, ptr, end))               1095     if (! HAS_CHAR(enc, ptr, end))
1096       return -XML_TOK_CLOSE_BRACKET;             1096       return -XML_TOK_CLOSE_BRACKET;
1097     if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {    1097     if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1098       REQUIRE_CHARS(enc, ptr, end, 2);           1098       REQUIRE_CHARS(enc, ptr, end, 2);
1099       if (CHAR_MATCHES(enc, ptr + MINBPC(enc)    1099       if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1100         *nextTokPtr = ptr + 2 * MINBPC(enc);     1100         *nextTokPtr = ptr + 2 * MINBPC(enc);
1101         return XML_TOK_COND_SECT_CLOSE;          1101         return XML_TOK_COND_SECT_CLOSE;
1102       }                                          1102       }
1103     }                                            1103     }
1104     *nextTokPtr = ptr;                           1104     *nextTokPtr = ptr;
1105     return XML_TOK_CLOSE_BRACKET;                1105     return XML_TOK_CLOSE_BRACKET;
1106   case BT_LPAR:                                  1106   case BT_LPAR:
1107     *nextTokPtr = ptr + MINBPC(enc);             1107     *nextTokPtr = ptr + MINBPC(enc);
1108     return XML_TOK_OPEN_PAREN;                   1108     return XML_TOK_OPEN_PAREN;
1109   case BT_RPAR:                                  1109   case BT_RPAR:
1110     ptr += MINBPC(enc);                          1110     ptr += MINBPC(enc);
1111     if (! HAS_CHAR(enc, ptr, end))               1111     if (! HAS_CHAR(enc, ptr, end))
1112       return -XML_TOK_CLOSE_PAREN;               1112       return -XML_TOK_CLOSE_PAREN;
1113     switch (BYTE_TYPE(enc, ptr)) {               1113     switch (BYTE_TYPE(enc, ptr)) {
1114     case BT_AST:                                 1114     case BT_AST:
1115       *nextTokPtr = ptr + MINBPC(enc);           1115       *nextTokPtr = ptr + MINBPC(enc);
1116       return XML_TOK_CLOSE_PAREN_ASTERISK;       1116       return XML_TOK_CLOSE_PAREN_ASTERISK;
1117     case BT_QUEST:                               1117     case BT_QUEST:
1118       *nextTokPtr = ptr + MINBPC(enc);           1118       *nextTokPtr = ptr + MINBPC(enc);
1119       return XML_TOK_CLOSE_PAREN_QUESTION;       1119       return XML_TOK_CLOSE_PAREN_QUESTION;
1120     case BT_PLUS:                                1120     case BT_PLUS:
1121       *nextTokPtr = ptr + MINBPC(enc);           1121       *nextTokPtr = ptr + MINBPC(enc);
1122       return XML_TOK_CLOSE_PAREN_PLUS;           1122       return XML_TOK_CLOSE_PAREN_PLUS;
1123     case BT_CR:                                  1123     case BT_CR:
1124     case BT_LF:                                  1124     case BT_LF:
1125     case BT_S:                                   1125     case BT_S:
1126     case BT_GT:                                  1126     case BT_GT:
1127     case BT_COMMA:                               1127     case BT_COMMA:
1128     case BT_VERBAR:                              1128     case BT_VERBAR:
1129     case BT_RPAR:                                1129     case BT_RPAR:
1130       *nextTokPtr = ptr;                         1130       *nextTokPtr = ptr;
1131       return XML_TOK_CLOSE_PAREN;                1131       return XML_TOK_CLOSE_PAREN;
1132     }                                            1132     }
1133     *nextTokPtr = ptr;                           1133     *nextTokPtr = ptr;
1134     return XML_TOK_INVALID;                      1134     return XML_TOK_INVALID;
1135   case BT_VERBAR:                                1135   case BT_VERBAR:
1136     *nextTokPtr = ptr + MINBPC(enc);             1136     *nextTokPtr = ptr + MINBPC(enc);
1137     return XML_TOK_OR;                           1137     return XML_TOK_OR;
1138   case BT_GT:                                    1138   case BT_GT:
1139     *nextTokPtr = ptr + MINBPC(enc);             1139     *nextTokPtr = ptr + MINBPC(enc);
1140     return XML_TOK_DECL_CLOSE;                   1140     return XML_TOK_DECL_CLOSE;
1141   case BT_NUM:                                   1141   case BT_NUM:
1142     return PREFIX(scanPoundName)(enc, ptr + M    1142     return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1143 #  define LEAD_CASE(n)                           1143 #  define LEAD_CASE(n)                                                         \
1144   case BT_LEAD##n:                               1144   case BT_LEAD##n:                                                             \
1145     if (end - ptr < n)                           1145     if (end - ptr < n)                                                         \
1146       return XML_TOK_PARTIAL_CHAR;               1146       return XML_TOK_PARTIAL_CHAR;                                             \
1147     if (IS_INVALID_CHAR(enc, ptr, n)) {          1147     if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
1148       *nextTokPtr = ptr;                         1148       *nextTokPtr = ptr;                                                       \
1149       return XML_TOK_INVALID;                    1149       return XML_TOK_INVALID;                                                  \
1150     }                                            1150     }                                                                          \
1151     if (IS_NMSTRT_CHAR(enc, ptr, n)) {           1151     if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
1152       ptr += n;                                  1152       ptr += n;                                                                \
1153       tok = XML_TOK_NAME;                        1153       tok = XML_TOK_NAME;                                                      \
1154       break;                                     1154       break;                                                                   \
1155     }                                            1155     }                                                                          \
1156     if (IS_NAME_CHAR(enc, ptr, n)) {             1156     if (IS_NAME_CHAR(enc, ptr, n)) {                                           \
1157       ptr += n;                                  1157       ptr += n;                                                                \
1158       tok = XML_TOK_NMTOKEN;                     1158       tok = XML_TOK_NMTOKEN;                                                   \
1159       break;                                     1159       break;                                                                   \
1160     }                                            1160     }                                                                          \
1161     *nextTokPtr = ptr;                           1161     *nextTokPtr = ptr;                                                         \
1162     return XML_TOK_INVALID;                      1162     return XML_TOK_INVALID;
1163     LEAD_CASE(2)                                 1163     LEAD_CASE(2)
1164     LEAD_CASE(3)                                 1164     LEAD_CASE(3)
1165     LEAD_CASE(4)                                 1165     LEAD_CASE(4)
1166 #  undef LEAD_CASE                               1166 #  undef LEAD_CASE
1167   case BT_NMSTRT:                                1167   case BT_NMSTRT:
1168   case BT_HEX:                                   1168   case BT_HEX:
1169     tok = XML_TOK_NAME;                          1169     tok = XML_TOK_NAME;
1170     ptr += MINBPC(enc);                          1170     ptr += MINBPC(enc);
1171     break;                                       1171     break;
1172   case BT_DIGIT:                                 1172   case BT_DIGIT:
1173   case BT_NAME:                                  1173   case BT_NAME:
1174   case BT_MINUS:                                 1174   case BT_MINUS:
1175 #  ifdef XML_NS                                  1175 #  ifdef XML_NS
1176   case BT_COLON:                                 1176   case BT_COLON:
1177 #  endif                                         1177 #  endif
1178     tok = XML_TOK_NMTOKEN;                       1178     tok = XML_TOK_NMTOKEN;
1179     ptr += MINBPC(enc);                          1179     ptr += MINBPC(enc);
1180     break;                                       1180     break;
1181   case BT_NONASCII:                              1181   case BT_NONASCII:
1182     if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {       1182     if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1183       ptr += MINBPC(enc);                        1183       ptr += MINBPC(enc);
1184       tok = XML_TOK_NAME;                        1184       tok = XML_TOK_NAME;
1185       break;                                     1185       break;
1186     }                                            1186     }
1187     if (IS_NAME_CHAR_MINBPC(enc, ptr)) {         1187     if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1188       ptr += MINBPC(enc);                        1188       ptr += MINBPC(enc);
1189       tok = XML_TOK_NMTOKEN;                     1189       tok = XML_TOK_NMTOKEN;
1190       break;                                     1190       break;
1191     }                                            1191     }
1192     /* fall through */                           1192     /* fall through */
1193   default:                                       1193   default:
1194     *nextTokPtr = ptr;                           1194     *nextTokPtr = ptr;
1195     return XML_TOK_INVALID;                      1195     return XML_TOK_INVALID;
1196   }                                              1196   }
1197   while (HAS_CHAR(enc, ptr, end)) {              1197   while (HAS_CHAR(enc, ptr, end)) {
1198     switch (BYTE_TYPE(enc, ptr)) {               1198     switch (BYTE_TYPE(enc, ptr)) {
1199       CHECK_NAME_CASES(enc, ptr, end, nextTok    1199       CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1200     case BT_GT:                                  1200     case BT_GT:
1201     case BT_RPAR:                                1201     case BT_RPAR:
1202     case BT_COMMA:                               1202     case BT_COMMA:
1203     case BT_VERBAR:                              1203     case BT_VERBAR:
1204     case BT_LSQB:                                1204     case BT_LSQB:
1205     case BT_PERCNT:                              1205     case BT_PERCNT:
1206     case BT_S:                                   1206     case BT_S:
1207     case BT_CR:                                  1207     case BT_CR:
1208     case BT_LF:                                  1208     case BT_LF:
1209       *nextTokPtr = ptr;                         1209       *nextTokPtr = ptr;
1210       return tok;                                1210       return tok;
1211 #  ifdef XML_NS                                  1211 #  ifdef XML_NS
1212     case BT_COLON:                               1212     case BT_COLON:
1213       ptr += MINBPC(enc);                        1213       ptr += MINBPC(enc);
1214       switch (tok) {                             1214       switch (tok) {
1215       case XML_TOK_NAME:                         1215       case XML_TOK_NAME:
1216         REQUIRE_CHAR(enc, ptr, end);             1216         REQUIRE_CHAR(enc, ptr, end);
1217         tok = XML_TOK_PREFIXED_NAME;             1217         tok = XML_TOK_PREFIXED_NAME;
1218         switch (BYTE_TYPE(enc, ptr)) {           1218         switch (BYTE_TYPE(enc, ptr)) {
1219           CHECK_NAME_CASES(enc, ptr, end, nex    1219           CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1220         default:                                 1220         default:
1221           tok = XML_TOK_NMTOKEN;                 1221           tok = XML_TOK_NMTOKEN;
1222           break;                                 1222           break;
1223         }                                        1223         }
1224         break;                                   1224         break;
1225       case XML_TOK_PREFIXED_NAME:                1225       case XML_TOK_PREFIXED_NAME:
1226         tok = XML_TOK_NMTOKEN;                   1226         tok = XML_TOK_NMTOKEN;
1227         break;                                   1227         break;
1228       }                                          1228       }
1229       break;                                     1229       break;
1230 #  endif                                         1230 #  endif
1231     case BT_PLUS:                                1231     case BT_PLUS:
1232       if (tok == XML_TOK_NMTOKEN) {              1232       if (tok == XML_TOK_NMTOKEN) {
1233         *nextTokPtr = ptr;                       1233         *nextTokPtr = ptr;
1234         return XML_TOK_INVALID;                  1234         return XML_TOK_INVALID;
1235       }                                          1235       }
1236       *nextTokPtr = ptr + MINBPC(enc);           1236       *nextTokPtr = ptr + MINBPC(enc);
1237       return XML_TOK_NAME_PLUS;                  1237       return XML_TOK_NAME_PLUS;
1238     case BT_AST:                                 1238     case BT_AST:
1239       if (tok == XML_TOK_NMTOKEN) {              1239       if (tok == XML_TOK_NMTOKEN) {
1240         *nextTokPtr = ptr;                       1240         *nextTokPtr = ptr;
1241         return XML_TOK_INVALID;                  1241         return XML_TOK_INVALID;
1242       }                                          1242       }
1243       *nextTokPtr = ptr + MINBPC(enc);           1243       *nextTokPtr = ptr + MINBPC(enc);
1244       return XML_TOK_NAME_ASTERISK;              1244       return XML_TOK_NAME_ASTERISK;
1245     case BT_QUEST:                               1245     case BT_QUEST:
1246       if (tok == XML_TOK_NMTOKEN) {              1246       if (tok == XML_TOK_NMTOKEN) {
1247         *nextTokPtr = ptr;                       1247         *nextTokPtr = ptr;
1248         return XML_TOK_INVALID;                  1248         return XML_TOK_INVALID;
1249       }                                          1249       }
1250       *nextTokPtr = ptr + MINBPC(enc);           1250       *nextTokPtr = ptr + MINBPC(enc);
1251       return XML_TOK_NAME_QUESTION;              1251       return XML_TOK_NAME_QUESTION;
1252     default:                                     1252     default:
1253       *nextTokPtr = ptr;                         1253       *nextTokPtr = ptr;
1254       return XML_TOK_INVALID;                    1254       return XML_TOK_INVALID;
1255     }                                            1255     }
1256   }                                              1256   }
1257   return -tok;                                   1257   return -tok;
1258 }                                                1258 }
1259                                                  1259 
1260 static int PTRCALL                               1260 static int PTRCALL
1261 PREFIX(attributeValueTok)(const ENCODING *enc    1261 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1262                           const char **nextTo    1262                           const char **nextTokPtr) {
1263   const char *start;                             1263   const char *start;
1264   if (ptr >= end)                                1264   if (ptr >= end)
1265     return XML_TOK_NONE;                         1265     return XML_TOK_NONE;
1266   else if (! HAS_CHAR(enc, ptr, end)) {          1266   else if (! HAS_CHAR(enc, ptr, end)) {
1267     /* This line cannot be executed.  The inc    1267     /* This line cannot be executed.  The incoming data has already
1268      * been tokenized once, so incomplete cha    1268      * been tokenized once, so incomplete characters like this have
1269      * already been eliminated from the input    1269      * already been eliminated from the input.  Retaining the paranoia
1270      * check is still valuable, however.         1270      * check is still valuable, however.
1271      */                                          1271      */
1272     return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE    1272     return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1273   }                                              1273   }
1274   start = ptr;                                   1274   start = ptr;
1275   while (HAS_CHAR(enc, ptr, end)) {              1275   while (HAS_CHAR(enc, ptr, end)) {
1276     switch (BYTE_TYPE(enc, ptr)) {               1276     switch (BYTE_TYPE(enc, ptr)) {
1277 #  define LEAD_CASE(n)                           1277 #  define LEAD_CASE(n)                                                         \
1278   case BT_LEAD##n:                               1278   case BT_LEAD##n:                                                             \
1279     ptr += n; /* NOTE: The encoding has alrea    1279     ptr += n; /* NOTE: The encoding has already been validated. */             \
1280     break;                                       1280     break;
1281       LEAD_CASE(2)                               1281       LEAD_CASE(2)
1282       LEAD_CASE(3)                               1282       LEAD_CASE(3)
1283       LEAD_CASE(4)                               1283       LEAD_CASE(4)
1284 #  undef LEAD_CASE                               1284 #  undef LEAD_CASE
1285     case BT_AMP:                                 1285     case BT_AMP:
1286       if (ptr == start)                          1286       if (ptr == start)
1287         return PREFIX(scanRef)(enc, ptr + MIN    1287         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1288       *nextTokPtr = ptr;                         1288       *nextTokPtr = ptr;
1289       return XML_TOK_DATA_CHARS;                 1289       return XML_TOK_DATA_CHARS;
1290     case BT_LT:                                  1290     case BT_LT:
1291       /* this is for inside entity references    1291       /* this is for inside entity references */
1292       *nextTokPtr = ptr;                         1292       *nextTokPtr = ptr;
1293       return XML_TOK_INVALID;                    1293       return XML_TOK_INVALID;
1294     case BT_LF:                                  1294     case BT_LF:
1295       if (ptr == start) {                        1295       if (ptr == start) {
1296         *nextTokPtr = ptr + MINBPC(enc);         1296         *nextTokPtr = ptr + MINBPC(enc);
1297         return XML_TOK_DATA_NEWLINE;             1297         return XML_TOK_DATA_NEWLINE;
1298       }                                          1298       }
1299       *nextTokPtr = ptr;                         1299       *nextTokPtr = ptr;
1300       return XML_TOK_DATA_CHARS;                 1300       return XML_TOK_DATA_CHARS;
1301     case BT_CR:                                  1301     case BT_CR:
1302       if (ptr == start) {                        1302       if (ptr == start) {
1303         ptr += MINBPC(enc);                      1303         ptr += MINBPC(enc);
1304         if (! HAS_CHAR(enc, ptr, end))           1304         if (! HAS_CHAR(enc, ptr, end))
1305           return XML_TOK_TRAILING_CR;            1305           return XML_TOK_TRAILING_CR;
1306         if (BYTE_TYPE(enc, ptr) == BT_LF)        1306         if (BYTE_TYPE(enc, ptr) == BT_LF)
1307           ptr += MINBPC(enc);                    1307           ptr += MINBPC(enc);
1308         *nextTokPtr = ptr;                       1308         *nextTokPtr = ptr;
1309         return XML_TOK_DATA_NEWLINE;             1309         return XML_TOK_DATA_NEWLINE;
1310       }                                          1310       }
1311       *nextTokPtr = ptr;                         1311       *nextTokPtr = ptr;
1312       return XML_TOK_DATA_CHARS;                 1312       return XML_TOK_DATA_CHARS;
1313     case BT_S:                                   1313     case BT_S:
1314       if (ptr == start) {                        1314       if (ptr == start) {
1315         *nextTokPtr = ptr + MINBPC(enc);         1315         *nextTokPtr = ptr + MINBPC(enc);
1316         return XML_TOK_ATTRIBUTE_VALUE_S;        1316         return XML_TOK_ATTRIBUTE_VALUE_S;
1317       }                                          1317       }
1318       *nextTokPtr = ptr;                         1318       *nextTokPtr = ptr;
1319       return XML_TOK_DATA_CHARS;                 1319       return XML_TOK_DATA_CHARS;
1320     default:                                     1320     default:
1321       ptr += MINBPC(enc);                        1321       ptr += MINBPC(enc);
1322       break;                                     1322       break;
1323     }                                            1323     }
1324   }                                              1324   }
1325   *nextTokPtr = ptr;                             1325   *nextTokPtr = ptr;
1326   return XML_TOK_DATA_CHARS;                     1326   return XML_TOK_DATA_CHARS;
1327 }                                                1327 }
1328                                                  1328 
1329 static int PTRCALL                               1329 static int PTRCALL
1330 PREFIX(entityValueTok)(const ENCODING *enc, c    1330 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1331                        const char **nextTokPt    1331                        const char **nextTokPtr) {
1332   const char *start;                             1332   const char *start;
1333   if (ptr >= end)                                1333   if (ptr >= end)
1334     return XML_TOK_NONE;                         1334     return XML_TOK_NONE;
1335   else if (! HAS_CHAR(enc, ptr, end)) {          1335   else if (! HAS_CHAR(enc, ptr, end)) {
1336     /* This line cannot be executed.  The inc    1336     /* This line cannot be executed.  The incoming data has already
1337      * been tokenized once, so incomplete cha    1337      * been tokenized once, so incomplete characters like this have
1338      * already been eliminated from the input    1338      * already been eliminated from the input.  Retaining the paranoia
1339      * check is still valuable, however.         1339      * check is still valuable, however.
1340      */                                          1340      */
1341     return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE    1341     return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1342   }                                              1342   }
1343   start = ptr;                                   1343   start = ptr;
1344   while (HAS_CHAR(enc, ptr, end)) {              1344   while (HAS_CHAR(enc, ptr, end)) {
1345     switch (BYTE_TYPE(enc, ptr)) {               1345     switch (BYTE_TYPE(enc, ptr)) {
1346 #  define LEAD_CASE(n)                           1346 #  define LEAD_CASE(n)                                                         \
1347   case BT_LEAD##n:                               1347   case BT_LEAD##n:                                                             \
1348     ptr += n; /* NOTE: The encoding has alrea    1348     ptr += n; /* NOTE: The encoding has already been validated. */             \
1349     break;                                       1349     break;
1350       LEAD_CASE(2)                               1350       LEAD_CASE(2)
1351       LEAD_CASE(3)                               1351       LEAD_CASE(3)
1352       LEAD_CASE(4)                               1352       LEAD_CASE(4)
1353 #  undef LEAD_CASE                               1353 #  undef LEAD_CASE
1354     case BT_AMP:                                 1354     case BT_AMP:
1355       if (ptr == start)                          1355       if (ptr == start)
1356         return PREFIX(scanRef)(enc, ptr + MIN    1356         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1357       *nextTokPtr = ptr;                         1357       *nextTokPtr = ptr;
1358       return XML_TOK_DATA_CHARS;                 1358       return XML_TOK_DATA_CHARS;
1359     case BT_PERCNT:                              1359     case BT_PERCNT:
1360       if (ptr == start) {                        1360       if (ptr == start) {
1361         int tok = PREFIX(scanPercent)(enc, pt    1361         int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1362         return (tok == XML_TOK_PERCENT) ? XML    1362         return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1363       }                                          1363       }
1364       *nextTokPtr = ptr;                         1364       *nextTokPtr = ptr;
1365       return XML_TOK_DATA_CHARS;                 1365       return XML_TOK_DATA_CHARS;
1366     case BT_LF:                                  1366     case BT_LF:
1367       if (ptr == start) {                        1367       if (ptr == start) {
1368         *nextTokPtr = ptr + MINBPC(enc);         1368         *nextTokPtr = ptr + MINBPC(enc);
1369         return XML_TOK_DATA_NEWLINE;             1369         return XML_TOK_DATA_NEWLINE;
1370       }                                          1370       }
1371       *nextTokPtr = ptr;                         1371       *nextTokPtr = ptr;
1372       return XML_TOK_DATA_CHARS;                 1372       return XML_TOK_DATA_CHARS;
1373     case BT_CR:                                  1373     case BT_CR:
1374       if (ptr == start) {                        1374       if (ptr == start) {
1375         ptr += MINBPC(enc);                      1375         ptr += MINBPC(enc);
1376         if (! HAS_CHAR(enc, ptr, end))           1376         if (! HAS_CHAR(enc, ptr, end))
1377           return XML_TOK_TRAILING_CR;            1377           return XML_TOK_TRAILING_CR;
1378         if (BYTE_TYPE(enc, ptr) == BT_LF)        1378         if (BYTE_TYPE(enc, ptr) == BT_LF)
1379           ptr += MINBPC(enc);                    1379           ptr += MINBPC(enc);
1380         *nextTokPtr = ptr;                       1380         *nextTokPtr = ptr;
1381         return XML_TOK_DATA_NEWLINE;             1381         return XML_TOK_DATA_NEWLINE;
1382       }                                          1382       }
1383       *nextTokPtr = ptr;                         1383       *nextTokPtr = ptr;
1384       return XML_TOK_DATA_CHARS;                 1384       return XML_TOK_DATA_CHARS;
1385     default:                                     1385     default:
1386       ptr += MINBPC(enc);                        1386       ptr += MINBPC(enc);
1387       break;                                     1387       break;
1388     }                                            1388     }
1389   }                                              1389   }
1390   *nextTokPtr = ptr;                             1390   *nextTokPtr = ptr;
1391   return XML_TOK_DATA_CHARS;                     1391   return XML_TOK_DATA_CHARS;
1392 }                                                1392 }
1393                                                  1393 
1394 #  ifdef XML_DTD                                 1394 #  ifdef XML_DTD
1395                                                  1395 
1396 static int PTRCALL                               1396 static int PTRCALL
1397 PREFIX(ignoreSectionTok)(const ENCODING *enc,    1397 PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
1398                          const char **nextTok    1398                          const char **nextTokPtr) {
1399   int level = 0;                                 1399   int level = 0;
1400   if (MINBPC(enc) > 1) {                         1400   if (MINBPC(enc) > 1) {
1401     size_t n = end - ptr;                        1401     size_t n = end - ptr;
1402     if (n & (MINBPC(enc) - 1)) {                 1402     if (n & (MINBPC(enc) - 1)) {
1403       n &= ~(MINBPC(enc) - 1);                   1403       n &= ~(MINBPC(enc) - 1);
1404       end = ptr + n;                             1404       end = ptr + n;
1405     }                                            1405     }
1406   }                                              1406   }
1407   while (HAS_CHAR(enc, ptr, end)) {              1407   while (HAS_CHAR(enc, ptr, end)) {
1408     switch (BYTE_TYPE(enc, ptr)) {               1408     switch (BYTE_TYPE(enc, ptr)) {
1409       INVALID_CASES(ptr, nextTokPtr)             1409       INVALID_CASES(ptr, nextTokPtr)
1410     case BT_LT:                                  1410     case BT_LT:
1411       ptr += MINBPC(enc);                        1411       ptr += MINBPC(enc);
1412       REQUIRE_CHAR(enc, ptr, end);               1412       REQUIRE_CHAR(enc, ptr, end);
1413       if (CHAR_MATCHES(enc, ptr, ASCII_EXCL))    1413       if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1414         ptr += MINBPC(enc);                      1414         ptr += MINBPC(enc);
1415         REQUIRE_CHAR(enc, ptr, end);             1415         REQUIRE_CHAR(enc, ptr, end);
1416         if (CHAR_MATCHES(enc, ptr, ASCII_LSQB    1416         if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1417           ++level;                               1417           ++level;
1418           ptr += MINBPC(enc);                    1418           ptr += MINBPC(enc);
1419         }                                        1419         }
1420       }                                          1420       }
1421       break;                                     1421       break;
1422     case BT_RSQB:                                1422     case BT_RSQB:
1423       ptr += MINBPC(enc);                        1423       ptr += MINBPC(enc);
1424       REQUIRE_CHAR(enc, ptr, end);               1424       REQUIRE_CHAR(enc, ptr, end);
1425       if (CHAR_MATCHES(enc, ptr, ASCII_RSQB))    1425       if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1426         ptr += MINBPC(enc);                      1426         ptr += MINBPC(enc);
1427         REQUIRE_CHAR(enc, ptr, end);             1427         REQUIRE_CHAR(enc, ptr, end);
1428         if (CHAR_MATCHES(enc, ptr, ASCII_GT))    1428         if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1429           ptr += MINBPC(enc);                    1429           ptr += MINBPC(enc);
1430           if (level == 0) {                      1430           if (level == 0) {
1431             *nextTokPtr = ptr;                   1431             *nextTokPtr = ptr;
1432             return XML_TOK_IGNORE_SECT;          1432             return XML_TOK_IGNORE_SECT;
1433           }                                      1433           }
1434           --level;                               1434           --level;
1435         }                                        1435         }
1436       }                                          1436       }
1437       break;                                     1437       break;
1438     default:                                     1438     default:
1439       ptr += MINBPC(enc);                        1439       ptr += MINBPC(enc);
1440       break;                                     1440       break;
1441     }                                            1441     }
1442   }                                              1442   }
1443   return XML_TOK_PARTIAL;                        1443   return XML_TOK_PARTIAL;
1444 }                                                1444 }
1445                                                  1445 
1446 #  endif /* XML_DTD */                           1446 #  endif /* XML_DTD */
1447                                                  1447 
1448 static int PTRCALL                               1448 static int PTRCALL
1449 PREFIX(isPublicId)(const ENCODING *enc, const    1449 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1450                    const char **badPtr) {        1450                    const char **badPtr) {
1451   ptr += MINBPC(enc);                            1451   ptr += MINBPC(enc);
1452   end -= MINBPC(enc);                            1452   end -= MINBPC(enc);
1453   for (; HAS_CHAR(enc, ptr, end); ptr += MINB    1453   for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1454     switch (BYTE_TYPE(enc, ptr)) {               1454     switch (BYTE_TYPE(enc, ptr)) {
1455     case BT_DIGIT:                               1455     case BT_DIGIT:
1456     case BT_HEX:                                 1456     case BT_HEX:
1457     case BT_MINUS:                               1457     case BT_MINUS:
1458     case BT_APOS:                                1458     case BT_APOS:
1459     case BT_LPAR:                                1459     case BT_LPAR:
1460     case BT_RPAR:                                1460     case BT_RPAR:
1461     case BT_PLUS:                                1461     case BT_PLUS:
1462     case BT_COMMA:                               1462     case BT_COMMA:
1463     case BT_SOL:                                 1463     case BT_SOL:
1464     case BT_EQUALS:                              1464     case BT_EQUALS:
1465     case BT_QUEST:                               1465     case BT_QUEST:
1466     case BT_CR:                                  1466     case BT_CR:
1467     case BT_LF:                                  1467     case BT_LF:
1468     case BT_SEMI:                                1468     case BT_SEMI:
1469     case BT_EXCL:                                1469     case BT_EXCL:
1470     case BT_AST:                                 1470     case BT_AST:
1471     case BT_PERCNT:                              1471     case BT_PERCNT:
1472     case BT_NUM:                                 1472     case BT_NUM:
1473 #  ifdef XML_NS                                  1473 #  ifdef XML_NS
1474     case BT_COLON:                               1474     case BT_COLON:
1475 #  endif                                         1475 #  endif
1476       break;                                     1476       break;
1477     case BT_S:                                   1477     case BT_S:
1478       if (CHAR_MATCHES(enc, ptr, ASCII_TAB))     1478       if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1479         *badPtr = ptr;                           1479         *badPtr = ptr;
1480         return 0;                                1480         return 0;
1481       }                                          1481       }
1482       break;                                     1482       break;
1483     case BT_NAME:                                1483     case BT_NAME:
1484     case BT_NMSTRT:                              1484     case BT_NMSTRT:
1485       if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f)    1485       if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1486         break;                                   1486         break;
1487       /* fall through */                         1487       /* fall through */
1488     default:                                     1488     default:
1489       switch (BYTE_TO_ASCII(enc, ptr)) {         1489       switch (BYTE_TO_ASCII(enc, ptr)) {
1490       case 0x24: /* $ */                         1490       case 0x24: /* $ */
1491       case 0x40: /* @ */                         1491       case 0x40: /* @ */
1492         break;                                   1492         break;
1493       default:                                   1493       default:
1494         *badPtr = ptr;                           1494         *badPtr = ptr;
1495         return 0;                                1495         return 0;
1496       }                                          1496       }
1497       break;                                     1497       break;
1498     }                                            1498     }
1499   }                                              1499   }
1500   return 1;                                      1500   return 1;
1501 }                                                1501 }
1502                                                  1502 
1503 /* This must only be called for a well-formed    1503 /* This must only be called for a well-formed start-tag or empty
1504    element tag.  Returns the number of attrib    1504    element tag.  Returns the number of attributes.  Pointers to the
1505    first attsMax attributes are stored in att    1505    first attsMax attributes are stored in atts.
1506 */                                               1506 */
1507                                                  1507 
1508 static int PTRCALL                               1508 static int PTRCALL
1509 PREFIX(getAtts)(const ENCODING *enc, const ch    1509 PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
1510                 ATTRIBUTE *atts) {               1510                 ATTRIBUTE *atts) {
1511   enum { other, inName, inValue } state = inN    1511   enum { other, inName, inValue } state = inName;
1512   int nAtts = 0;                                 1512   int nAtts = 0;
1513   int open = 0; /* defined when state == inVa    1513   int open = 0; /* defined when state == inValue;
1514                    initialization just to shu    1514                    initialization just to shut up compilers */
1515                                                  1515 
1516   for (ptr += MINBPC(enc);; ptr += MINBPC(enc    1516   for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1517     switch (BYTE_TYPE(enc, ptr)) {               1517     switch (BYTE_TYPE(enc, ptr)) {
1518 #  define START_NAME                             1518 #  define START_NAME                                                           \
1519     if (state == other) {                        1519     if (state == other) {                                                      \
1520       if (nAtts < attsMax) {                     1520       if (nAtts < attsMax) {                                                   \
1521         atts[nAtts].name = ptr;                  1521         atts[nAtts].name = ptr;                                                \
1522         atts[nAtts].normalized = 1;              1522         atts[nAtts].normalized = 1;                                            \
1523       }                                          1523       }                                                                        \
1524       state = inName;                            1524       state = inName;                                                          \
1525     }                                            1525     }
1526 #  define LEAD_CASE(n)                           1526 #  define LEAD_CASE(n)                                                         \
1527   case BT_LEAD##n: /* NOTE: The encoding has     1527   case BT_LEAD##n: /* NOTE: The encoding has already been validated. */        \
1528     START_NAME ptr += (n - MINBPC(enc));         1528     START_NAME ptr += (n - MINBPC(enc));                                       \
1529     break;                                       1529     break;
1530       LEAD_CASE(2)                               1530       LEAD_CASE(2)
1531       LEAD_CASE(3)                               1531       LEAD_CASE(3)
1532       LEAD_CASE(4)                               1532       LEAD_CASE(4)
1533 #  undef LEAD_CASE                               1533 #  undef LEAD_CASE
1534     case BT_NONASCII:                            1534     case BT_NONASCII:
1535     case BT_NMSTRT:                              1535     case BT_NMSTRT:
1536     case BT_HEX:                                 1536     case BT_HEX:
1537       START_NAME                                 1537       START_NAME
1538       break;                                     1538       break;
1539 #  undef START_NAME                              1539 #  undef START_NAME
1540     case BT_QUOT:                                1540     case BT_QUOT:
1541       if (state != inValue) {                    1541       if (state != inValue) {
1542         if (nAtts < attsMax)                     1542         if (nAtts < attsMax)
1543           atts[nAtts].valuePtr = ptr + MINBPC    1543           atts[nAtts].valuePtr = ptr + MINBPC(enc);
1544         state = inValue;                         1544         state = inValue;
1545         open = BT_QUOT;                          1545         open = BT_QUOT;
1546       } else if (open == BT_QUOT) {              1546       } else if (open == BT_QUOT) {
1547         state = other;                           1547         state = other;
1548         if (nAtts < attsMax)                     1548         if (nAtts < attsMax)
1549           atts[nAtts].valueEnd = ptr;            1549           atts[nAtts].valueEnd = ptr;
1550         nAtts++;                                 1550         nAtts++;
1551       }                                          1551       }
1552       break;                                     1552       break;
1553     case BT_APOS:                                1553     case BT_APOS:
1554       if (state != inValue) {                    1554       if (state != inValue) {
1555         if (nAtts < attsMax)                     1555         if (nAtts < attsMax)
1556           atts[nAtts].valuePtr = ptr + MINBPC    1556           atts[nAtts].valuePtr = ptr + MINBPC(enc);
1557         state = inValue;                         1557         state = inValue;
1558         open = BT_APOS;                          1558         open = BT_APOS;
1559       } else if (open == BT_APOS) {              1559       } else if (open == BT_APOS) {
1560         state = other;                           1560         state = other;
1561         if (nAtts < attsMax)                     1561         if (nAtts < attsMax)
1562           atts[nAtts].valueEnd = ptr;            1562           atts[nAtts].valueEnd = ptr;
1563         nAtts++;                                 1563         nAtts++;
1564       }                                          1564       }
1565       break;                                     1565       break;
1566     case BT_AMP:                                 1566     case BT_AMP:
1567       if (nAtts < attsMax)                       1567       if (nAtts < attsMax)
1568         atts[nAtts].normalized = 0;              1568         atts[nAtts].normalized = 0;
1569       break;                                     1569       break;
1570     case BT_S:                                   1570     case BT_S:
1571       if (state == inName)                       1571       if (state == inName)
1572         state = other;                           1572         state = other;
1573       else if (state == inValue && nAtts < at    1573       else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized
1574                && (ptr == atts[nAtts].valuePt    1574                && (ptr == atts[nAtts].valuePtr
1575                    || BYTE_TO_ASCII(enc, ptr)    1575                    || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1576                    || BYTE_TO_ASCII(enc, ptr     1576                    || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1577                    || BYTE_TYPE(enc, ptr + MI    1577                    || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1578         atts[nAtts].normalized = 0;              1578         atts[nAtts].normalized = 0;
1579       break;                                     1579       break;
1580     case BT_CR:                                  1580     case BT_CR:
1581     case BT_LF:                                  1581     case BT_LF:
1582       /* This case ensures that the first att    1582       /* This case ensures that the first attribute name is counted
1583          Apart from that we could just change    1583          Apart from that we could just change state on the quote. */
1584       if (state == inName)                       1584       if (state == inName)
1585         state = other;                           1585         state = other;
1586       else if (state == inValue && nAtts < at    1586       else if (state == inValue && nAtts < attsMax)
1587         atts[nAtts].normalized = 0;              1587         atts[nAtts].normalized = 0;
1588       break;                                     1588       break;
1589     case BT_GT:                                  1589     case BT_GT:
1590     case BT_SOL:                                 1590     case BT_SOL:
1591       if (state != inValue)                      1591       if (state != inValue)
1592         return nAtts;                            1592         return nAtts;
1593       break;                                     1593       break;
1594     default:                                     1594     default:
1595       break;                                     1595       break;
1596     }                                            1596     }
1597   }                                              1597   }
1598   /* not reached */                              1598   /* not reached */
1599 }                                                1599 }
1600                                                  1600 
1601 static int PTRFASTCALL                           1601 static int PTRFASTCALL
1602 PREFIX(charRefNumber)(const ENCODING *enc, co    1602 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) {
1603   int result = 0;                                1603   int result = 0;
1604   /* skip &# */                                  1604   /* skip &# */
1605   UNUSED_P(enc);                                 1605   UNUSED_P(enc);
1606   ptr += 2 * MINBPC(enc);                        1606   ptr += 2 * MINBPC(enc);
1607   if (CHAR_MATCHES(enc, ptr, ASCII_x)) {         1607   if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1608     for (ptr += MINBPC(enc); ! CHAR_MATCHES(e    1608     for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1609          ptr += MINBPC(enc)) {                   1609          ptr += MINBPC(enc)) {
1610       int c = BYTE_TO_ASCII(enc, ptr);           1610       int c = BYTE_TO_ASCII(enc, ptr);
1611       switch (c) {                               1611       switch (c) {
1612       case ASCII_0:                              1612       case ASCII_0:
1613       case ASCII_1:                              1613       case ASCII_1:
1614       case ASCII_2:                              1614       case ASCII_2:
1615       case ASCII_3:                              1615       case ASCII_3:
1616       case ASCII_4:                              1616       case ASCII_4:
1617       case ASCII_5:                              1617       case ASCII_5:
1618       case ASCII_6:                              1618       case ASCII_6:
1619       case ASCII_7:                              1619       case ASCII_7:
1620       case ASCII_8:                              1620       case ASCII_8:
1621       case ASCII_9:                              1621       case ASCII_9:
1622         result <<= 4;                            1622         result <<= 4;
1623         result |= (c - ASCII_0);                 1623         result |= (c - ASCII_0);
1624         break;                                   1624         break;
1625       case ASCII_A:                              1625       case ASCII_A:
1626       case ASCII_B:                              1626       case ASCII_B:
1627       case ASCII_C:                              1627       case ASCII_C:
1628       case ASCII_D:                              1628       case ASCII_D:
1629       case ASCII_E:                              1629       case ASCII_E:
1630       case ASCII_F:                              1630       case ASCII_F:
1631         result <<= 4;                            1631         result <<= 4;
1632         result += 10 + (c - ASCII_A);            1632         result += 10 + (c - ASCII_A);
1633         break;                                   1633         break;
1634       case ASCII_a:                              1634       case ASCII_a:
1635       case ASCII_b:                              1635       case ASCII_b:
1636       case ASCII_c:                              1636       case ASCII_c:
1637       case ASCII_d:                              1637       case ASCII_d:
1638       case ASCII_e:                              1638       case ASCII_e:
1639       case ASCII_f:                              1639       case ASCII_f:
1640         result <<= 4;                            1640         result <<= 4;
1641         result += 10 + (c - ASCII_a);            1641         result += 10 + (c - ASCII_a);
1642         break;                                   1642         break;
1643       }                                          1643       }
1644       if (result >= 0x110000)                    1644       if (result >= 0x110000)
1645         return -1;                               1645         return -1;
1646     }                                            1646     }
1647   } else {                                       1647   } else {
1648     for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEM    1648     for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1649       int c = BYTE_TO_ASCII(enc, ptr);           1649       int c = BYTE_TO_ASCII(enc, ptr);
1650       result *= 10;                              1650       result *= 10;
1651       result += (c - ASCII_0);                   1651       result += (c - ASCII_0);
1652       if (result >= 0x110000)                    1652       if (result >= 0x110000)
1653         return -1;                               1653         return -1;
1654     }                                            1654     }
1655   }                                              1655   }
1656   return checkCharRefNumber(result);             1656   return checkCharRefNumber(result);
1657 }                                                1657 }
1658                                                  1658 
1659 static int PTRCALL                               1659 static int PTRCALL
1660 PREFIX(predefinedEntityName)(const ENCODING *    1660 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
1661                              const char *end)    1661                              const char *end) {
1662   UNUSED_P(enc);                                 1662   UNUSED_P(enc);
1663   switch ((end - ptr) / MINBPC(enc)) {           1663   switch ((end - ptr) / MINBPC(enc)) {
1664   case 2:                                        1664   case 2:
1665     if (CHAR_MATCHES(enc, ptr + MINBPC(enc),     1665     if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1666       switch (BYTE_TO_ASCII(enc, ptr)) {         1666       switch (BYTE_TO_ASCII(enc, ptr)) {
1667       case ASCII_l:                              1667       case ASCII_l:
1668         return ASCII_LT;                         1668         return ASCII_LT;
1669       case ASCII_g:                              1669       case ASCII_g:
1670         return ASCII_GT;                         1670         return ASCII_GT;
1671       }                                          1671       }
1672     }                                            1672     }
1673     break;                                       1673     break;
1674   case 3:                                        1674   case 3:
1675     if (CHAR_MATCHES(enc, ptr, ASCII_a)) {       1675     if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1676       ptr += MINBPC(enc);                        1676       ptr += MINBPC(enc);
1677       if (CHAR_MATCHES(enc, ptr, ASCII_m)) {     1677       if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1678         ptr += MINBPC(enc);                      1678         ptr += MINBPC(enc);
1679         if (CHAR_MATCHES(enc, ptr, ASCII_p))     1679         if (CHAR_MATCHES(enc, ptr, ASCII_p))
1680           return ASCII_AMP;                      1680           return ASCII_AMP;
1681       }                                          1681       }
1682     }                                            1682     }
1683     break;                                       1683     break;
1684   case 4:                                        1684   case 4:
1685     switch (BYTE_TO_ASCII(enc, ptr)) {           1685     switch (BYTE_TO_ASCII(enc, ptr)) {
1686     case ASCII_q:                                1686     case ASCII_q:
1687       ptr += MINBPC(enc);                        1687       ptr += MINBPC(enc);
1688       if (CHAR_MATCHES(enc, ptr, ASCII_u)) {     1688       if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1689         ptr += MINBPC(enc);                      1689         ptr += MINBPC(enc);
1690         if (CHAR_MATCHES(enc, ptr, ASCII_o))     1690         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1691           ptr += MINBPC(enc);                    1691           ptr += MINBPC(enc);
1692           if (CHAR_MATCHES(enc, ptr, ASCII_t)    1692           if (CHAR_MATCHES(enc, ptr, ASCII_t))
1693             return ASCII_QUOT;                   1693             return ASCII_QUOT;
1694         }                                        1694         }
1695       }                                          1695       }
1696       break;                                     1696       break;
1697     case ASCII_a:                                1697     case ASCII_a:
1698       ptr += MINBPC(enc);                        1698       ptr += MINBPC(enc);
1699       if (CHAR_MATCHES(enc, ptr, ASCII_p)) {     1699       if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1700         ptr += MINBPC(enc);                      1700         ptr += MINBPC(enc);
1701         if (CHAR_MATCHES(enc, ptr, ASCII_o))     1701         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1702           ptr += MINBPC(enc);                    1702           ptr += MINBPC(enc);
1703           if (CHAR_MATCHES(enc, ptr, ASCII_s)    1703           if (CHAR_MATCHES(enc, ptr, ASCII_s))
1704             return ASCII_APOS;                   1704             return ASCII_APOS;
1705         }                                        1705         }
1706       }                                          1706       }
1707       break;                                     1707       break;
1708     }                                            1708     }
1709   }                                              1709   }
1710   return 0;                                      1710   return 0;
1711 }                                                1711 }
1712                                                  1712 
1713 static int PTRCALL                               1713 static int PTRCALL
1714 PREFIX(nameMatchesAscii)(const ENCODING *enc,    1714 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
1715                          const char *end1, co    1715                          const char *end1, const char *ptr2) {
1716   UNUSED_P(enc);                                 1716   UNUSED_P(enc);
1717   for (; *ptr2; ptr1 += MINBPC(enc), ptr2++)     1717   for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1718     if (end1 - ptr1 < MINBPC(enc)) {             1718     if (end1 - ptr1 < MINBPC(enc)) {
1719       /* This line cannot be executed.  The i    1719       /* This line cannot be executed.  The incoming data has already
1720        * been tokenized once, so incomplete c    1720        * been tokenized once, so incomplete characters like this have
1721        * already been eliminated from the inp    1721        * already been eliminated from the input.  Retaining the
1722        * paranoia check is still valuable, ho    1722        * paranoia check is still valuable, however.
1723        */                                        1723        */
1724       return 0; /* LCOV_EXCL_LINE */             1724       return 0; /* LCOV_EXCL_LINE */
1725     }                                            1725     }
1726     if (! CHAR_MATCHES(enc, ptr1, *ptr2))        1726     if (! CHAR_MATCHES(enc, ptr1, *ptr2))
1727       return 0;                                  1727       return 0;
1728   }                                              1728   }
1729   return ptr1 == end1;                           1729   return ptr1 == end1;
1730 }                                                1730 }
1731                                                  1731 
1732 static int PTRFASTCALL                           1732 static int PTRFASTCALL
1733 PREFIX(nameLength)(const ENCODING *enc, const    1733 PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
1734   const char *start = ptr;                       1734   const char *start = ptr;
1735   for (;;) {                                     1735   for (;;) {
1736     switch (BYTE_TYPE(enc, ptr)) {               1736     switch (BYTE_TYPE(enc, ptr)) {
1737 #  define LEAD_CASE(n)                           1737 #  define LEAD_CASE(n)                                                         \
1738   case BT_LEAD##n:                               1738   case BT_LEAD##n:                                                             \
1739     ptr += n; /* NOTE: The encoding has alrea    1739     ptr += n; /* NOTE: The encoding has already been validated. */             \
1740     break;                                       1740     break;
1741       LEAD_CASE(2)                               1741       LEAD_CASE(2)
1742       LEAD_CASE(3)                               1742       LEAD_CASE(3)
1743       LEAD_CASE(4)                               1743       LEAD_CASE(4)
1744 #  undef LEAD_CASE                               1744 #  undef LEAD_CASE
1745     case BT_NONASCII:                            1745     case BT_NONASCII:
1746     case BT_NMSTRT:                              1746     case BT_NMSTRT:
1747 #  ifdef XML_NS                                  1747 #  ifdef XML_NS
1748     case BT_COLON:                               1748     case BT_COLON:
1749 #  endif                                         1749 #  endif
1750     case BT_HEX:                                 1750     case BT_HEX:
1751     case BT_DIGIT:                               1751     case BT_DIGIT:
1752     case BT_NAME:                                1752     case BT_NAME:
1753     case BT_MINUS:                               1753     case BT_MINUS:
1754       ptr += MINBPC(enc);                        1754       ptr += MINBPC(enc);
1755       break;                                     1755       break;
1756     default:                                     1756     default:
1757       return (int)(ptr - start);                 1757       return (int)(ptr - start);
1758     }                                            1758     }
1759   }                                              1759   }
1760 }                                                1760 }
1761                                                  1761 
1762 static const char *PTRFASTCALL                   1762 static const char *PTRFASTCALL
1763 PREFIX(skipS)(const ENCODING *enc, const char    1763 PREFIX(skipS)(const ENCODING *enc, const char *ptr) {
1764   for (;;) {                                     1764   for (;;) {
1765     switch (BYTE_TYPE(enc, ptr)) {               1765     switch (BYTE_TYPE(enc, ptr)) {
1766     case BT_LF:                                  1766     case BT_LF:
1767     case BT_CR:                                  1767     case BT_CR:
1768     case BT_S:                                   1768     case BT_S:
1769       ptr += MINBPC(enc);                        1769       ptr += MINBPC(enc);
1770       break;                                     1770       break;
1771     default:                                     1771     default:
1772       return ptr;                                1772       return ptr;
1773     }                                            1773     }
1774   }                                              1774   }
1775 }                                                1775 }
1776                                                  1776 
1777 static void PTRCALL                              1777 static void PTRCALL
1778 PREFIX(updatePosition)(const ENCODING *enc, c    1778 PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
1779                        POSITION *pos) {          1779                        POSITION *pos) {
1780   while (HAS_CHAR(enc, ptr, end)) {              1780   while (HAS_CHAR(enc, ptr, end)) {
1781     switch (BYTE_TYPE(enc, ptr)) {               1781     switch (BYTE_TYPE(enc, ptr)) {
1782 #  define LEAD_CASE(n)                           1782 #  define LEAD_CASE(n)                                                         \
1783   case BT_LEAD##n:                               1783   case BT_LEAD##n:                                                             \
1784     ptr += n; /* NOTE: The encoding has alrea    1784     ptr += n; /* NOTE: The encoding has already been validated. */             \
1785     pos->columnNumber++;                         1785     pos->columnNumber++;                                                       \
1786     break;                                       1786     break;
1787       LEAD_CASE(2)                               1787       LEAD_CASE(2)
1788       LEAD_CASE(3)                               1788       LEAD_CASE(3)
1789       LEAD_CASE(4)                               1789       LEAD_CASE(4)
1790 #  undef LEAD_CASE                               1790 #  undef LEAD_CASE
1791     case BT_LF:                                  1791     case BT_LF:
1792       pos->columnNumber = 0;                     1792       pos->columnNumber = 0;
1793       pos->lineNumber++;                         1793       pos->lineNumber++;
1794       ptr += MINBPC(enc);                        1794       ptr += MINBPC(enc);
1795       break;                                     1795       break;
1796     case BT_CR:                                  1796     case BT_CR:
1797       pos->lineNumber++;                         1797       pos->lineNumber++;
1798       ptr += MINBPC(enc);                        1798       ptr += MINBPC(enc);
1799       if (HAS_CHAR(enc, ptr, end) && BYTE_TYP    1799       if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
1800         ptr += MINBPC(enc);                      1800         ptr += MINBPC(enc);
1801       pos->columnNumber = 0;                     1801       pos->columnNumber = 0;
1802       break;                                     1802       break;
1803     default:                                     1803     default:
1804       ptr += MINBPC(enc);                        1804       ptr += MINBPC(enc);
1805       pos->columnNumber++;                       1805       pos->columnNumber++;
1806       break;                                     1806       break;
1807     }                                            1807     }
1808   }                                              1808   }
1809 }                                                1809 }
1810                                                  1810 
1811 #  undef DO_LEAD_CASE                            1811 #  undef DO_LEAD_CASE
1812 #  undef MULTIBYTE_CASES                         1812 #  undef MULTIBYTE_CASES
1813 #  undef INVALID_CASES                           1813 #  undef INVALID_CASES
1814 #  undef CHECK_NAME_CASE                         1814 #  undef CHECK_NAME_CASE
1815 #  undef CHECK_NAME_CASES                        1815 #  undef CHECK_NAME_CASES
1816 #  undef CHECK_NMSTRT_CASE                       1816 #  undef CHECK_NMSTRT_CASE
1817 #  undef CHECK_NMSTRT_CASES                      1817 #  undef CHECK_NMSTRT_CASES
1818                                                  1818 
1819 #endif /* XML_TOK_IMPL_C */                      1819 #endif /* XML_TOK_IMPL_C */
1820                                                  1820