Geant4 Cross Reference |
1 /* This file is included (from xmltok.c, 1-3 t 1 /* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)! 2 __ __ 2 __ __ _ 3 ___\ \/ /_ __ __ _| 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_| 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parse 7 |_| XML parser 8 8 9 Copyright (c) 1997-2000 Thai Open Source So 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000 Clark Cooper <coope 10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 11 Copyright (c) 2002 Fred L. Drake, Jr. 11 Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 12 Copyright (c) 2002-2016 Karl Waclawek <karl 12 Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2016-2022 Sebastian Pipping < 13 Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org> 14 Copyright (c) 2017 Rhodri James <rhodr 14 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 15 Copyright (c) 2018 Benjamin Peterson < 15 Copyright (c) 2018 Benjamin Peterson <benjamin@python.org> 16 Copyright (c) 2018 Anton Maklakov <ant 16 Copyright (c) 2018 Anton Maklakov <antmak.pub@gmail.com> 17 Copyright (c) 2019 David Loffredo <lof 17 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 18 Copyright (c) 2020 Boris Kolpackov <bo 18 Copyright (c) 2020 Boris Kolpackov <boris@codesynthesis.com> 19 Copyright (c) 2022 Martin Ettl <ettl.m 19 Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com> 20 Licensed under the MIT license: 20 Licensed under the MIT license: 21 21 22 Permission is hereby granted, free of cha 22 Permission is hereby granted, free of charge, to any person obtaining 23 a copy of this software and associat 23 a copy of this software and associated documentation files (the 24 "Software"), to deal in the Software w 24 "Software"), to deal in the Software without restriction, including 25 without limitation the rights to use, c 25 without limitation the rights to use, copy, modify, merge, publish, 26 distribute, sublicense, and/or sell copies 26 distribute, sublicense, and/or sell copies of the Software, and to permit 27 persons to whom the Software is furnish 27 persons to whom the Software is furnished to do so, subject to the 28 following conditions: 28 following conditions: 29 29 30 The above copyright notice and this permis 30 The above copyright notice and this permission notice shall be included 31 in all copies or substantial portions of th 31 in all copies or substantial portions of the Software. 32 32 33 THE SOFTWARE IS PROVIDED "AS IS", WIT 33 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 34 EXPRESS OR IMPLIED, INCLUDING BUT NOT L 34 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 35 MERCHANTABILITY, FITNESS FOR A PARTICULAR P 35 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 36 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HO 36 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 37 DAMAGES OR OTHER LIABILITY, WHETHER IN AN 37 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 38 OTHERWISE, ARISING FROM, OUT OF OR IN CONNE 38 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 39 USE OR OTHER DEALINGS IN THE SOFTWARE. 39 USE OR OTHER DEALINGS IN THE SOFTWARE. 40 */ 40 */ 41 41 42 #ifdef XML_TOK_IMPL_C 42 #ifdef XML_TOK_IMPL_C 43 43 44 # ifndef IS_INVALID_CHAR // i.e. for UTF-16 a 44 # ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined 45 # define IS_INVALID_CHAR(enc, ptr, n) (0) 45 # define IS_INVALID_CHAR(enc, ptr, n) (0) 46 # endif 46 # endif 47 47 48 # define INVALID_LEAD_CASE(n, ptr, nextTokPtr 48 # define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ 49 case BT_LEAD##n: 49 case BT_LEAD##n: \ 50 if (end - ptr < n) 50 if (end - ptr < n) \ 51 return XML_TOK_PARTIAL_CHAR; 51 return XML_TOK_PARTIAL_CHAR; \ 52 if (IS_INVALID_CHAR(enc, ptr, n)) { 52 if (IS_INVALID_CHAR(enc, ptr, n)) { \ 53 *(nextTokPtr) = (ptr); 53 *(nextTokPtr) = (ptr); \ 54 return XML_TOK_INVALID; 54 return XML_TOK_INVALID; \ 55 } 55 } \ 56 ptr += n; 56 ptr += n; \ 57 break; 57 break; 58 58 59 # define INVALID_CASES(ptr, nextTokPtr) 59 # define INVALID_CASES(ptr, nextTokPtr) \ 60 INVALID_LEAD_CASE(2, ptr, nextTokPtr) 60 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ 61 INVALID_LEAD_CASE(3, ptr, nextTokPtr) 61 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ 62 INVALID_LEAD_CASE(4, ptr, nextTokPtr) 62 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ 63 case BT_NONXML: 63 case BT_NONXML: \ 64 case BT_MALFORM: 64 case BT_MALFORM: \ 65 case BT_TRAIL: 65 case BT_TRAIL: \ 66 *(nextTokPtr) = (ptr); 66 *(nextTokPtr) = (ptr); \ 67 return XML_TOK_INVALID; 67 return XML_TOK_INVALID; 68 68 69 # define CHECK_NAME_CASE(n, enc, ptr, end, ne 69 # define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ 70 case BT_LEAD##n: 70 case BT_LEAD##n: \ 71 if (end - ptr < n) 71 if (end - ptr < n) \ 72 return XML_TOK_PARTIAL_CHAR; 72 return XML_TOK_PARTIAL_CHAR; \ 73 if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_N 73 if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \ 74 *nextTokPtr = ptr; 74 *nextTokPtr = ptr; \ 75 return XML_TOK_INVALID; 75 return XML_TOK_INVALID; \ 76 } 76 } \ 77 ptr += n; 77 ptr += n; \ 78 break; 78 break; 79 79 80 # define CHECK_NAME_CASES(enc, ptr, end, next 80 # define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ 81 case BT_NONASCII: 81 case BT_NONASCII: \ 82 if (! IS_NAME_CHAR_MINBPC(enc, ptr)) { 82 if (! IS_NAME_CHAR_MINBPC(enc, ptr)) { \ 83 *nextTokPtr = ptr; 83 *nextTokPtr = ptr; \ 84 return XML_TOK_INVALID; 84 return XML_TOK_INVALID; \ 85 } 85 } \ 86 /* fall through */ 86 /* fall through */ \ 87 case BT_NMSTRT: 87 case BT_NMSTRT: \ 88 case BT_HEX: 88 case BT_HEX: \ 89 case BT_DIGIT: 89 case BT_DIGIT: \ 90 case BT_NAME: 90 case BT_NAME: \ 91 case BT_MINUS: 91 case BT_MINUS: \ 92 ptr += MINBPC(enc); 92 ptr += MINBPC(enc); \ 93 break; 93 break; \ 94 CHECK_NAME_CASE(2, enc, ptr, end, nextTokP 94 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ 95 CHECK_NAME_CASE(3, enc, ptr, end, nextTokP 95 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ 96 CHECK_NAME_CASE(4, enc, ptr, end, nextTokP 96 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) 97 97 98 # define CHECK_NMSTRT_CASE(n, enc, ptr, end, 98 # define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ 99 case BT_LEAD##n: 99 case BT_LEAD##n: \ 100 if ((end) - (ptr) < (n)) 100 if ((end) - (ptr) < (n)) \ 101 return XML_TOK_PARTIAL_CHAR; 101 return XML_TOK_PARTIAL_CHAR; \ 102 if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_N 102 if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \ 103 *nextTokPtr = ptr; 103 *nextTokPtr = ptr; \ 104 return XML_TOK_INVALID; 104 return XML_TOK_INVALID; \ 105 } 105 } \ 106 ptr += n; 106 ptr += n; \ 107 break; 107 break; 108 108 109 # define CHECK_NMSTRT_CASES(enc, ptr, end, ne 109 # define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ 110 case BT_NONASCII: 110 case BT_NONASCII: \ 111 if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { 111 if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ 112 *nextTokPtr = ptr; 112 *nextTokPtr = ptr; \ 113 return XML_TOK_INVALID; 113 return XML_TOK_INVALID; \ 114 } 114 } \ 115 /* fall through */ 115 /* fall through */ \ 116 case BT_NMSTRT: 116 case BT_NMSTRT: \ 117 case BT_HEX: 117 case BT_HEX: \ 118 ptr += MINBPC(enc); 118 ptr += MINBPC(enc); \ 119 break; 119 break; \ 120 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTo 120 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ 121 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTo 121 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ 122 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTo 122 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) 123 123 124 # ifndef PREFIX 124 # ifndef PREFIX 125 # define PREFIX(ident) ident 125 # define PREFIX(ident) ident 126 # endif 126 # endif 127 127 128 # define HAS_CHARS(enc, ptr, end, count) 128 # define HAS_CHARS(enc, ptr, end, count) \ 129 ((end) - (ptr) >= ((count)*MINBPC(enc))) 129 ((end) - (ptr) >= ((count)*MINBPC(enc))) 130 130 131 # define HAS_CHAR(enc, ptr, end) HAS_CHARS(en 131 # define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1) 132 132 133 # define REQUIRE_CHARS(enc, ptr, end, count) 133 # define REQUIRE_CHARS(enc, ptr, end, count) \ 134 { 134 { \ 135 if (! HAS_CHARS(enc, ptr, end, count)) { 135 if (! HAS_CHARS(enc, ptr, end, count)) { \ 136 return XML_TOK_PARTIAL; 136 return XML_TOK_PARTIAL; \ 137 } 137 } \ 138 } 138 } 139 139 140 # define REQUIRE_CHAR(enc, ptr, end) REQUIRE_ 140 # define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1) 141 141 142 /* ptr points to character following "<!-" */ 142 /* ptr points to character following "<!-" */ 143 143 144 static int PTRCALL 144 static int PTRCALL 145 PREFIX(scanComment)(const ENCODING *enc, const 145 PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, 146 const char **nextTokPtr) { 146 const char **nextTokPtr) { 147 if (HAS_CHAR(enc, ptr, end)) { 147 if (HAS_CHAR(enc, ptr, end)) { 148 if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) 148 if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 149 *nextTokPtr = ptr; 149 *nextTokPtr = ptr; 150 return XML_TOK_INVALID; 150 return XML_TOK_INVALID; 151 } 151 } 152 ptr += MINBPC(enc); 152 ptr += MINBPC(enc); 153 while (HAS_CHAR(enc, ptr, end)) { 153 while (HAS_CHAR(enc, ptr, end)) { 154 switch (BYTE_TYPE(enc, ptr)) { 154 switch (BYTE_TYPE(enc, ptr)) { 155 INVALID_CASES(ptr, nextTokPtr) 155 INVALID_CASES(ptr, nextTokPtr) 156 case BT_MINUS: 156 case BT_MINUS: 157 ptr += MINBPC(enc); 157 ptr += MINBPC(enc); 158 REQUIRE_CHAR(enc, ptr, end); 158 REQUIRE_CHAR(enc, ptr, end); 159 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS 159 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 160 ptr += MINBPC(enc); 160 ptr += MINBPC(enc); 161 REQUIRE_CHAR(enc, ptr, end); 161 REQUIRE_CHAR(enc, ptr, end); 162 if (! CHAR_MATCHES(enc, ptr, ASCII_G 162 if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { 163 *nextTokPtr = ptr; 163 *nextTokPtr = ptr; 164 return XML_TOK_INVALID; 164 return XML_TOK_INVALID; 165 } 165 } 166 *nextTokPtr = ptr + MINBPC(enc); 166 *nextTokPtr = ptr + MINBPC(enc); 167 return XML_TOK_COMMENT; 167 return XML_TOK_COMMENT; 168 } 168 } 169 break; 169 break; 170 default: 170 default: 171 ptr += MINBPC(enc); 171 ptr += MINBPC(enc); 172 break; 172 break; 173 } 173 } 174 } 174 } 175 } 175 } 176 return XML_TOK_PARTIAL; 176 return XML_TOK_PARTIAL; 177 } 177 } 178 178 179 /* ptr points to character following "<!" */ 179 /* ptr points to character following "<!" */ 180 180 181 static int PTRCALL 181 static int PTRCALL 182 PREFIX(scanDecl)(const ENCODING *enc, const ch 182 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, 183 const char **nextTokPtr) { 183 const char **nextTokPtr) { 184 REQUIRE_CHAR(enc, ptr, end); 184 REQUIRE_CHAR(enc, ptr, end); 185 switch (BYTE_TYPE(enc, ptr)) { 185 switch (BYTE_TYPE(enc, ptr)) { 186 case BT_MINUS: 186 case BT_MINUS: 187 return PREFIX(scanComment)(enc, ptr + MINB 187 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 188 case BT_LSQB: 188 case BT_LSQB: 189 *nextTokPtr = ptr + MINBPC(enc); 189 *nextTokPtr = ptr + MINBPC(enc); 190 return XML_TOK_COND_SECT_OPEN; 190 return XML_TOK_COND_SECT_OPEN; 191 case BT_NMSTRT: 191 case BT_NMSTRT: 192 case BT_HEX: 192 case BT_HEX: 193 ptr += MINBPC(enc); 193 ptr += MINBPC(enc); 194 break; 194 break; 195 default: 195 default: 196 *nextTokPtr = ptr; 196 *nextTokPtr = ptr; 197 return XML_TOK_INVALID; 197 return XML_TOK_INVALID; 198 } 198 } 199 while (HAS_CHAR(enc, ptr, end)) { 199 while (HAS_CHAR(enc, ptr, end)) { 200 switch (BYTE_TYPE(enc, ptr)) { 200 switch (BYTE_TYPE(enc, ptr)) { 201 case BT_PERCNT: 201 case BT_PERCNT: 202 REQUIRE_CHARS(enc, ptr, end, 2); 202 REQUIRE_CHARS(enc, ptr, end, 2); 203 /* don't allow <!ENTITY% foo "whatever"> 203 /* don't allow <!ENTITY% foo "whatever"> */ 204 switch (BYTE_TYPE(enc, ptr + MINBPC(enc) 204 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { 205 case BT_S: 205 case BT_S: 206 case BT_CR: 206 case BT_CR: 207 case BT_LF: 207 case BT_LF: 208 case BT_PERCNT: 208 case BT_PERCNT: 209 *nextTokPtr = ptr; 209 *nextTokPtr = ptr; 210 return XML_TOK_INVALID; 210 return XML_TOK_INVALID; 211 } 211 } 212 /* fall through */ 212 /* fall through */ 213 case BT_S: 213 case BT_S: 214 case BT_CR: 214 case BT_CR: 215 case BT_LF: 215 case BT_LF: 216 *nextTokPtr = ptr; 216 *nextTokPtr = ptr; 217 return XML_TOK_DECL_OPEN; 217 return XML_TOK_DECL_OPEN; 218 case BT_NMSTRT: 218 case BT_NMSTRT: 219 case BT_HEX: 219 case BT_HEX: 220 ptr += MINBPC(enc); 220 ptr += MINBPC(enc); 221 break; 221 break; 222 default: 222 default: 223 *nextTokPtr = ptr; 223 *nextTokPtr = ptr; 224 return XML_TOK_INVALID; 224 return XML_TOK_INVALID; 225 } 225 } 226 } 226 } 227 return XML_TOK_PARTIAL; 227 return XML_TOK_PARTIAL; 228 } 228 } 229 229 230 static int PTRCALL 230 static int PTRCALL 231 PREFIX(checkPiTarget)(const ENCODING *enc, con 231 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, 232 int *tokPtr) { 232 int *tokPtr) { 233 int upper = 0; 233 int upper = 0; 234 UNUSED_P(enc); 234 UNUSED_P(enc); 235 *tokPtr = XML_TOK_PI; 235 *tokPtr = XML_TOK_PI; 236 if (end - ptr != MINBPC(enc) * 3) 236 if (end - ptr != MINBPC(enc) * 3) 237 return 1; 237 return 1; 238 switch (BYTE_TO_ASCII(enc, ptr)) { 238 switch (BYTE_TO_ASCII(enc, ptr)) { 239 case ASCII_x: 239 case ASCII_x: 240 break; 240 break; 241 case ASCII_X: 241 case ASCII_X: 242 upper = 1; 242 upper = 1; 243 break; 243 break; 244 default: 244 default: 245 return 1; 245 return 1; 246 } 246 } 247 ptr += MINBPC(enc); 247 ptr += MINBPC(enc); 248 switch (BYTE_TO_ASCII(enc, ptr)) { 248 switch (BYTE_TO_ASCII(enc, ptr)) { 249 case ASCII_m: 249 case ASCII_m: 250 break; 250 break; 251 case ASCII_M: 251 case ASCII_M: 252 upper = 1; 252 upper = 1; 253 break; 253 break; 254 default: 254 default: 255 return 1; 255 return 1; 256 } 256 } 257 ptr += MINBPC(enc); 257 ptr += MINBPC(enc); 258 switch (BYTE_TO_ASCII(enc, ptr)) { 258 switch (BYTE_TO_ASCII(enc, ptr)) { 259 case ASCII_l: 259 case ASCII_l: 260 break; 260 break; 261 case ASCII_L: 261 case ASCII_L: 262 upper = 1; 262 upper = 1; 263 break; 263 break; 264 default: 264 default: 265 return 1; 265 return 1; 266 } 266 } 267 if (upper) 267 if (upper) 268 return 0; 268 return 0; 269 *tokPtr = XML_TOK_XML_DECL; 269 *tokPtr = XML_TOK_XML_DECL; 270 return 1; 270 return 1; 271 } 271 } 272 272 273 /* ptr points to character following "<?" */ 273 /* ptr points to character following "<?" */ 274 274 275 static int PTRCALL 275 static int PTRCALL 276 PREFIX(scanPi)(const ENCODING *enc, const char 276 PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, 277 const char **nextTokPtr) { 277 const char **nextTokPtr) { 278 int tok; 278 int tok; 279 const char *target = ptr; 279 const char *target = ptr; 280 REQUIRE_CHAR(enc, ptr, end); 280 REQUIRE_CHAR(enc, ptr, end); 281 switch (BYTE_TYPE(enc, ptr)) { 281 switch (BYTE_TYPE(enc, ptr)) { 282 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP 282 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 283 default: 283 default: 284 *nextTokPtr = ptr; 284 *nextTokPtr = ptr; 285 return XML_TOK_INVALID; 285 return XML_TOK_INVALID; 286 } 286 } 287 while (HAS_CHAR(enc, ptr, end)) { 287 while (HAS_CHAR(enc, ptr, end)) { 288 switch (BYTE_TYPE(enc, ptr)) { 288 switch (BYTE_TYPE(enc, ptr)) { 289 CHECK_NAME_CASES(enc, ptr, end, nextTokP 289 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 290 case BT_S: 290 case BT_S: 291 case BT_CR: 291 case BT_CR: 292 case BT_LF: 292 case BT_LF: 293 if (! PREFIX(checkPiTarget)(enc, target, 293 if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 294 *nextTokPtr = ptr; 294 *nextTokPtr = ptr; 295 return XML_TOK_INVALID; 295 return XML_TOK_INVALID; 296 } 296 } 297 ptr += MINBPC(enc); 297 ptr += MINBPC(enc); 298 while (HAS_CHAR(enc, ptr, end)) { 298 while (HAS_CHAR(enc, ptr, end)) { 299 switch (BYTE_TYPE(enc, ptr)) { 299 switch (BYTE_TYPE(enc, ptr)) { 300 INVALID_CASES(ptr, nextTokPtr) 300 INVALID_CASES(ptr, nextTokPtr) 301 case BT_QUEST: 301 case BT_QUEST: 302 ptr += MINBPC(enc); 302 ptr += MINBPC(enc); 303 REQUIRE_CHAR(enc, ptr, end); 303 REQUIRE_CHAR(enc, ptr, end); 304 if (CHAR_MATCHES(enc, ptr, ASCII_GT) 304 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 305 *nextTokPtr = ptr + MINBPC(enc); 305 *nextTokPtr = ptr + MINBPC(enc); 306 return tok; 306 return tok; 307 } 307 } 308 break; 308 break; 309 default: 309 default: 310 ptr += MINBPC(enc); 310 ptr += MINBPC(enc); 311 break; 311 break; 312 } 312 } 313 } 313 } 314 return XML_TOK_PARTIAL; 314 return XML_TOK_PARTIAL; 315 case BT_QUEST: 315 case BT_QUEST: 316 if (! PREFIX(checkPiTarget)(enc, target, 316 if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 317 *nextTokPtr = ptr; 317 *nextTokPtr = ptr; 318 return XML_TOK_INVALID; 318 return XML_TOK_INVALID; 319 } 319 } 320 ptr += MINBPC(enc); 320 ptr += MINBPC(enc); 321 REQUIRE_CHAR(enc, ptr, end); 321 REQUIRE_CHAR(enc, ptr, end); 322 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 322 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 323 *nextTokPtr = ptr + MINBPC(enc); 323 *nextTokPtr = ptr + MINBPC(enc); 324 return tok; 324 return tok; 325 } 325 } 326 /* fall through */ 326 /* fall through */ 327 default: 327 default: 328 *nextTokPtr = ptr; 328 *nextTokPtr = ptr; 329 return XML_TOK_INVALID; 329 return XML_TOK_INVALID; 330 } 330 } 331 } 331 } 332 return XML_TOK_PARTIAL; 332 return XML_TOK_PARTIAL; 333 } 333 } 334 334 335 static int PTRCALL 335 static int PTRCALL 336 PREFIX(scanCdataSection)(const ENCODING *enc, 336 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, 337 const char **nextTokP 337 const char **nextTokPtr) { 338 static const char CDATA_LSQB[] 338 static const char CDATA_LSQB[] 339 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, A 339 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB}; 340 int i; 340 int i; 341 UNUSED_P(enc); 341 UNUSED_P(enc); 342 /* CDATA[ */ 342 /* CDATA[ */ 343 REQUIRE_CHARS(enc, ptr, end, 6); 343 REQUIRE_CHARS(enc, ptr, end, 6); 344 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) 344 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { 345 if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i] 345 if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { 346 *nextTokPtr = ptr; 346 *nextTokPtr = ptr; 347 return XML_TOK_INVALID; 347 return XML_TOK_INVALID; 348 } 348 } 349 } 349 } 350 *nextTokPtr = ptr; 350 *nextTokPtr = ptr; 351 return XML_TOK_CDATA_SECT_OPEN; 351 return XML_TOK_CDATA_SECT_OPEN; 352 } 352 } 353 353 354 static int PTRCALL 354 static int PTRCALL 355 PREFIX(cdataSectionTok)(const ENCODING *enc, c 355 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, 356 const char **nextTokPt 356 const char **nextTokPtr) { 357 if (ptr >= end) 357 if (ptr >= end) 358 return XML_TOK_NONE; 358 return XML_TOK_NONE; 359 if (MINBPC(enc) > 1) { 359 if (MINBPC(enc) > 1) { 360 size_t n = end - ptr; 360 size_t n = end - ptr; 361 if (n & (MINBPC(enc) - 1)) { 361 if (n & (MINBPC(enc) - 1)) { 362 n &= ~(MINBPC(enc) - 1); 362 n &= ~(MINBPC(enc) - 1); 363 if (n == 0) 363 if (n == 0) 364 return XML_TOK_PARTIAL; 364 return XML_TOK_PARTIAL; 365 end = ptr + n; 365 end = ptr + n; 366 } 366 } 367 } 367 } 368 switch (BYTE_TYPE(enc, ptr)) { 368 switch (BYTE_TYPE(enc, ptr)) { 369 case BT_RSQB: 369 case BT_RSQB: 370 ptr += MINBPC(enc); 370 ptr += MINBPC(enc); 371 REQUIRE_CHAR(enc, ptr, end); 371 REQUIRE_CHAR(enc, ptr, end); 372 if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 372 if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 373 break; 373 break; 374 ptr += MINBPC(enc); 374 ptr += MINBPC(enc); 375 REQUIRE_CHAR(enc, ptr, end); 375 REQUIRE_CHAR(enc, ptr, end); 376 if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { 376 if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { 377 ptr -= MINBPC(enc); 377 ptr -= MINBPC(enc); 378 break; 378 break; 379 } 379 } 380 *nextTokPtr = ptr + MINBPC(enc); 380 *nextTokPtr = ptr + MINBPC(enc); 381 return XML_TOK_CDATA_SECT_CLOSE; 381 return XML_TOK_CDATA_SECT_CLOSE; 382 case BT_CR: 382 case BT_CR: 383 ptr += MINBPC(enc); 383 ptr += MINBPC(enc); 384 REQUIRE_CHAR(enc, ptr, end); 384 REQUIRE_CHAR(enc, ptr, end); 385 if (BYTE_TYPE(enc, ptr) == BT_LF) 385 if (BYTE_TYPE(enc, ptr) == BT_LF) 386 ptr += MINBPC(enc); 386 ptr += MINBPC(enc); 387 *nextTokPtr = ptr; 387 *nextTokPtr = ptr; 388 return XML_TOK_DATA_NEWLINE; 388 return XML_TOK_DATA_NEWLINE; 389 case BT_LF: 389 case BT_LF: 390 *nextTokPtr = ptr + MINBPC(enc); 390 *nextTokPtr = ptr + MINBPC(enc); 391 return XML_TOK_DATA_NEWLINE; 391 return XML_TOK_DATA_NEWLINE; 392 INVALID_CASES(ptr, nextTokPtr) 392 INVALID_CASES(ptr, nextTokPtr) 393 default: 393 default: 394 ptr += MINBPC(enc); 394 ptr += MINBPC(enc); 395 break; 395 break; 396 } 396 } 397 while (HAS_CHAR(enc, ptr, end)) { 397 while (HAS_CHAR(enc, ptr, end)) { 398 switch (BYTE_TYPE(enc, ptr)) { 398 switch (BYTE_TYPE(enc, ptr)) { 399 # define LEAD_CASE(n) 399 # define LEAD_CASE(n) \ 400 case BT_LEAD##n: 400 case BT_LEAD##n: \ 401 if (end - ptr < n || IS_INVALID_CHAR(enc, 401 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 402 *nextTokPtr = ptr; 402 *nextTokPtr = ptr; \ 403 return XML_TOK_DATA_CHARS; 403 return XML_TOK_DATA_CHARS; \ 404 } 404 } \ 405 ptr += n; 405 ptr += n; \ 406 break; 406 break; 407 LEAD_CASE(2) 407 LEAD_CASE(2) 408 LEAD_CASE(3) 408 LEAD_CASE(3) 409 LEAD_CASE(4) 409 LEAD_CASE(4) 410 # undef LEAD_CASE 410 # undef LEAD_CASE 411 case BT_NONXML: 411 case BT_NONXML: 412 case BT_MALFORM: 412 case BT_MALFORM: 413 case BT_TRAIL: 413 case BT_TRAIL: 414 case BT_CR: 414 case BT_CR: 415 case BT_LF: 415 case BT_LF: 416 case BT_RSQB: 416 case BT_RSQB: 417 *nextTokPtr = ptr; 417 *nextTokPtr = ptr; 418 return XML_TOK_DATA_CHARS; 418 return XML_TOK_DATA_CHARS; 419 default: 419 default: 420 ptr += MINBPC(enc); 420 ptr += MINBPC(enc); 421 break; 421 break; 422 } 422 } 423 } 423 } 424 *nextTokPtr = ptr; 424 *nextTokPtr = ptr; 425 return XML_TOK_DATA_CHARS; 425 return XML_TOK_DATA_CHARS; 426 } 426 } 427 427 428 /* ptr points to character following "</" */ 428 /* ptr points to character following "</" */ 429 429 430 static int PTRCALL 430 static int PTRCALL 431 PREFIX(scanEndTag)(const ENCODING *enc, const 431 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, 432 const char **nextTokPtr) { 432 const char **nextTokPtr) { 433 REQUIRE_CHAR(enc, ptr, end); 433 REQUIRE_CHAR(enc, ptr, end); 434 switch (BYTE_TYPE(enc, ptr)) { 434 switch (BYTE_TYPE(enc, ptr)) { 435 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP 435 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 436 default: 436 default: 437 *nextTokPtr = ptr; 437 *nextTokPtr = ptr; 438 return XML_TOK_INVALID; 438 return XML_TOK_INVALID; 439 } 439 } 440 while (HAS_CHAR(enc, ptr, end)) { 440 while (HAS_CHAR(enc, ptr, end)) { 441 switch (BYTE_TYPE(enc, ptr)) { 441 switch (BYTE_TYPE(enc, ptr)) { 442 CHECK_NAME_CASES(enc, ptr, end, nextTokP 442 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 443 case BT_S: 443 case BT_S: 444 case BT_CR: 444 case BT_CR: 445 case BT_LF: 445 case BT_LF: 446 for (ptr += MINBPC(enc); HAS_CHAR(enc, p 446 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { 447 switch (BYTE_TYPE(enc, ptr)) { 447 switch (BYTE_TYPE(enc, ptr)) { 448 case BT_S: 448 case BT_S: 449 case BT_CR: 449 case BT_CR: 450 case BT_LF: 450 case BT_LF: 451 break; 451 break; 452 case BT_GT: 452 case BT_GT: 453 *nextTokPtr = ptr + MINBPC(enc); 453 *nextTokPtr = ptr + MINBPC(enc); 454 return XML_TOK_END_TAG; 454 return XML_TOK_END_TAG; 455 default: 455 default: 456 *nextTokPtr = ptr; 456 *nextTokPtr = ptr; 457 return XML_TOK_INVALID; 457 return XML_TOK_INVALID; 458 } 458 } 459 } 459 } 460 return XML_TOK_PARTIAL; 460 return XML_TOK_PARTIAL; 461 # ifdef XML_NS 461 # ifdef XML_NS 462 case BT_COLON: 462 case BT_COLON: 463 /* no need to check qname syntax here, 463 /* no need to check qname syntax here, 464 since end-tag must match exactly */ 464 since end-tag must match exactly */ 465 ptr += MINBPC(enc); 465 ptr += MINBPC(enc); 466 break; 466 break; 467 # endif 467 # endif 468 case BT_GT: 468 case BT_GT: 469 *nextTokPtr = ptr + MINBPC(enc); 469 *nextTokPtr = ptr + MINBPC(enc); 470 return XML_TOK_END_TAG; 470 return XML_TOK_END_TAG; 471 default: 471 default: 472 *nextTokPtr = ptr; 472 *nextTokPtr = ptr; 473 return XML_TOK_INVALID; 473 return XML_TOK_INVALID; 474 } 474 } 475 } 475 } 476 return XML_TOK_PARTIAL; 476 return XML_TOK_PARTIAL; 477 } 477 } 478 478 479 /* ptr points to character following "&#X" */ 479 /* ptr points to character following "&#X" */ 480 480 481 static int PTRCALL 481 static int PTRCALL 482 PREFIX(scanHexCharRef)(const ENCODING *enc, co 482 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, 483 const char **nextTokPtr 483 const char **nextTokPtr) { 484 if (HAS_CHAR(enc, ptr, end)) { 484 if (HAS_CHAR(enc, ptr, end)) { 485 switch (BYTE_TYPE(enc, ptr)) { 485 switch (BYTE_TYPE(enc, ptr)) { 486 case BT_DIGIT: 486 case BT_DIGIT: 487 case BT_HEX: 487 case BT_HEX: 488 break; 488 break; 489 default: 489 default: 490 *nextTokPtr = ptr; 490 *nextTokPtr = ptr; 491 return XML_TOK_INVALID; 491 return XML_TOK_INVALID; 492 } 492 } 493 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr 493 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { 494 switch (BYTE_TYPE(enc, ptr)) { 494 switch (BYTE_TYPE(enc, ptr)) { 495 case BT_DIGIT: 495 case BT_DIGIT: 496 case BT_HEX: 496 case BT_HEX: 497 break; 497 break; 498 case BT_SEMI: 498 case BT_SEMI: 499 *nextTokPtr = ptr + MINBPC(enc); 499 *nextTokPtr = ptr + MINBPC(enc); 500 return XML_TOK_CHAR_REF; 500 return XML_TOK_CHAR_REF; 501 default: 501 default: 502 *nextTokPtr = ptr; 502 *nextTokPtr = ptr; 503 return XML_TOK_INVALID; 503 return XML_TOK_INVALID; 504 } 504 } 505 } 505 } 506 } 506 } 507 return XML_TOK_PARTIAL; 507 return XML_TOK_PARTIAL; 508 } 508 } 509 509 510 /* ptr points to character following "&#" */ 510 /* ptr points to character following "&#" */ 511 511 512 static int PTRCALL 512 static int PTRCALL 513 PREFIX(scanCharRef)(const ENCODING *enc, const 513 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, 514 const char **nextTokPtr) { 514 const char **nextTokPtr) { 515 if (HAS_CHAR(enc, ptr, end)) { 515 if (HAS_CHAR(enc, ptr, end)) { 516 if (CHAR_MATCHES(enc, ptr, ASCII_x)) 516 if (CHAR_MATCHES(enc, ptr, ASCII_x)) 517 return PREFIX(scanHexCharRef)(enc, ptr + 517 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 518 switch (BYTE_TYPE(enc, ptr)) { 518 switch (BYTE_TYPE(enc, ptr)) { 519 case BT_DIGIT: 519 case BT_DIGIT: 520 break; 520 break; 521 default: 521 default: 522 *nextTokPtr = ptr; 522 *nextTokPtr = ptr; 523 return XML_TOK_INVALID; 523 return XML_TOK_INVALID; 524 } 524 } 525 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr 525 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { 526 switch (BYTE_TYPE(enc, ptr)) { 526 switch (BYTE_TYPE(enc, ptr)) { 527 case BT_DIGIT: 527 case BT_DIGIT: 528 break; 528 break; 529 case BT_SEMI: 529 case BT_SEMI: 530 *nextTokPtr = ptr + MINBPC(enc); 530 *nextTokPtr = ptr + MINBPC(enc); 531 return XML_TOK_CHAR_REF; 531 return XML_TOK_CHAR_REF; 532 default: 532 default: 533 *nextTokPtr = ptr; 533 *nextTokPtr = ptr; 534 return XML_TOK_INVALID; 534 return XML_TOK_INVALID; 535 } 535 } 536 } 536 } 537 } 537 } 538 return XML_TOK_PARTIAL; 538 return XML_TOK_PARTIAL; 539 } 539 } 540 540 541 /* ptr points to character following "&" */ 541 /* ptr points to character following "&" */ 542 542 543 static int PTRCALL 543 static int PTRCALL 544 PREFIX(scanRef)(const ENCODING *enc, const cha 544 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, 545 const char **nextTokPtr) { 545 const char **nextTokPtr) { 546 REQUIRE_CHAR(enc, ptr, end); 546 REQUIRE_CHAR(enc, ptr, end); 547 switch (BYTE_TYPE(enc, ptr)) { 547 switch (BYTE_TYPE(enc, ptr)) { 548 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP 548 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 549 case BT_NUM: 549 case BT_NUM: 550 return PREFIX(scanCharRef)(enc, ptr + MINB 550 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 551 default: 551 default: 552 *nextTokPtr = ptr; 552 *nextTokPtr = ptr; 553 return XML_TOK_INVALID; 553 return XML_TOK_INVALID; 554 } 554 } 555 while (HAS_CHAR(enc, ptr, end)) { 555 while (HAS_CHAR(enc, ptr, end)) { 556 switch (BYTE_TYPE(enc, ptr)) { 556 switch (BYTE_TYPE(enc, ptr)) { 557 CHECK_NAME_CASES(enc, ptr, end, nextTokP 557 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 558 case BT_SEMI: 558 case BT_SEMI: 559 *nextTokPtr = ptr + MINBPC(enc); 559 *nextTokPtr = ptr + MINBPC(enc); 560 return XML_TOK_ENTITY_REF; 560 return XML_TOK_ENTITY_REF; 561 default: 561 default: 562 *nextTokPtr = ptr; 562 *nextTokPtr = ptr; 563 return XML_TOK_INVALID; 563 return XML_TOK_INVALID; 564 } 564 } 565 } 565 } 566 return XML_TOK_PARTIAL; 566 return XML_TOK_PARTIAL; 567 } 567 } 568 568 569 /* ptr points to character following first cha 569 /* ptr points to character following first character of attribute name */ 570 570 571 static int PTRCALL 571 static int PTRCALL 572 PREFIX(scanAtts)(const ENCODING *enc, const ch 572 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, 573 const char **nextTokPtr) { 573 const char **nextTokPtr) { 574 # ifdef XML_NS 574 # ifdef XML_NS 575 int hadColon = 0; 575 int hadColon = 0; 576 # endif 576 # endif 577 while (HAS_CHAR(enc, ptr, end)) { 577 while (HAS_CHAR(enc, ptr, end)) { 578 switch (BYTE_TYPE(enc, ptr)) { 578 switch (BYTE_TYPE(enc, ptr)) { 579 CHECK_NAME_CASES(enc, ptr, end, nextTokP 579 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 580 # ifdef XML_NS 580 # ifdef XML_NS 581 case BT_COLON: 581 case BT_COLON: 582 if (hadColon) { 582 if (hadColon) { 583 *nextTokPtr = ptr; 583 *nextTokPtr = ptr; 584 return XML_TOK_INVALID; 584 return XML_TOK_INVALID; 585 } 585 } 586 hadColon = 1; 586 hadColon = 1; 587 ptr += MINBPC(enc); 587 ptr += MINBPC(enc); 588 REQUIRE_CHAR(enc, ptr, end); 588 REQUIRE_CHAR(enc, ptr, end); 589 switch (BYTE_TYPE(enc, ptr)) { 589 switch (BYTE_TYPE(enc, ptr)) { 590 CHECK_NMSTRT_CASES(enc, ptr, end, next 590 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 591 default: 591 default: 592 *nextTokPtr = ptr; 592 *nextTokPtr = ptr; 593 return XML_TOK_INVALID; 593 return XML_TOK_INVALID; 594 } 594 } 595 break; 595 break; 596 # endif 596 # endif 597 case BT_S: 597 case BT_S: 598 case BT_CR: 598 case BT_CR: 599 case BT_LF: 599 case BT_LF: 600 for (;;) { 600 for (;;) { 601 int t; 601 int t; 602 602 603 ptr += MINBPC(enc); 603 ptr += MINBPC(enc); 604 REQUIRE_CHAR(enc, ptr, end); 604 REQUIRE_CHAR(enc, ptr, end); 605 t = BYTE_TYPE(enc, ptr); 605 t = BYTE_TYPE(enc, ptr); 606 if (t == BT_EQUALS) 606 if (t == BT_EQUALS) 607 break; 607 break; 608 switch (t) { 608 switch (t) { 609 case BT_S: 609 case BT_S: 610 case BT_LF: 610 case BT_LF: 611 case BT_CR: 611 case BT_CR: 612 break; 612 break; 613 default: 613 default: 614 *nextTokPtr = ptr; 614 *nextTokPtr = ptr; 615 return XML_TOK_INVALID; 615 return XML_TOK_INVALID; 616 } 616 } 617 } 617 } 618 /* fall through */ 618 /* fall through */ 619 case BT_EQUALS: { 619 case BT_EQUALS: { 620 int open; 620 int open; 621 # ifdef XML_NS 621 # ifdef XML_NS 622 hadColon = 0; 622 hadColon = 0; 623 # endif 623 # endif 624 for (;;) { 624 for (;;) { 625 ptr += MINBPC(enc); 625 ptr += MINBPC(enc); 626 REQUIRE_CHAR(enc, ptr, end); 626 REQUIRE_CHAR(enc, ptr, end); 627 open = BYTE_TYPE(enc, ptr); 627 open = BYTE_TYPE(enc, ptr); 628 if (open == BT_QUOT || open == BT_APOS 628 if (open == BT_QUOT || open == BT_APOS) 629 break; 629 break; 630 switch (open) { 630 switch (open) { 631 case BT_S: 631 case BT_S: 632 case BT_LF: 632 case BT_LF: 633 case BT_CR: 633 case BT_CR: 634 break; 634 break; 635 default: 635 default: 636 *nextTokPtr = ptr; 636 *nextTokPtr = ptr; 637 return XML_TOK_INVALID; 637 return XML_TOK_INVALID; 638 } 638 } 639 } 639 } 640 ptr += MINBPC(enc); 640 ptr += MINBPC(enc); 641 /* in attribute value */ 641 /* in attribute value */ 642 for (;;) { 642 for (;;) { 643 int t; 643 int t; 644 REQUIRE_CHAR(enc, ptr, end); 644 REQUIRE_CHAR(enc, ptr, end); 645 t = BYTE_TYPE(enc, ptr); 645 t = BYTE_TYPE(enc, ptr); 646 if (t == open) 646 if (t == open) 647 break; 647 break; 648 switch (t) { 648 switch (t) { 649 INVALID_CASES(ptr, nextTokPtr) 649 INVALID_CASES(ptr, nextTokPtr) 650 case BT_AMP: { 650 case BT_AMP: { 651 int tok = PREFIX(scanRef)(enc, ptr + 651 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); 652 if (tok <= 0) { 652 if (tok <= 0) { 653 if (tok == XML_TOK_INVALID) 653 if (tok == XML_TOK_INVALID) 654 *nextTokPtr = ptr; 654 *nextTokPtr = ptr; 655 return tok; 655 return tok; 656 } 656 } 657 break; 657 break; 658 } 658 } 659 case BT_LT: 659 case BT_LT: 660 *nextTokPtr = ptr; 660 *nextTokPtr = ptr; 661 return XML_TOK_INVALID; 661 return XML_TOK_INVALID; 662 default: 662 default: 663 ptr += MINBPC(enc); 663 ptr += MINBPC(enc); 664 break; 664 break; 665 } 665 } 666 } 666 } 667 ptr += MINBPC(enc); 667 ptr += MINBPC(enc); 668 REQUIRE_CHAR(enc, ptr, end); 668 REQUIRE_CHAR(enc, ptr, end); 669 switch (BYTE_TYPE(enc, ptr)) { 669 switch (BYTE_TYPE(enc, ptr)) { 670 case BT_S: 670 case BT_S: 671 case BT_CR: 671 case BT_CR: 672 case BT_LF: 672 case BT_LF: 673 break; 673 break; 674 case BT_SOL: 674 case BT_SOL: 675 goto sol; 675 goto sol; 676 case BT_GT: 676 case BT_GT: 677 goto gt; 677 goto gt; 678 default: 678 default: 679 *nextTokPtr = ptr; 679 *nextTokPtr = ptr; 680 return XML_TOK_INVALID; 680 return XML_TOK_INVALID; 681 } 681 } 682 /* ptr points to closing quote */ 682 /* ptr points to closing quote */ 683 for (;;) { 683 for (;;) { 684 ptr += MINBPC(enc); 684 ptr += MINBPC(enc); 685 REQUIRE_CHAR(enc, ptr, end); 685 REQUIRE_CHAR(enc, ptr, end); 686 switch (BYTE_TYPE(enc, ptr)) { 686 switch (BYTE_TYPE(enc, ptr)) { 687 CHECK_NMSTRT_CASES(enc, ptr, end, ne 687 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 688 case BT_S: 688 case BT_S: 689 case BT_CR: 689 case BT_CR: 690 case BT_LF: 690 case BT_LF: 691 continue; 691 continue; 692 case BT_GT: 692 case BT_GT: 693 gt: 693 gt: 694 *nextTokPtr = ptr + MINBPC(enc); 694 *nextTokPtr = ptr + MINBPC(enc); 695 return XML_TOK_START_TAG_WITH_ATTS; 695 return XML_TOK_START_TAG_WITH_ATTS; 696 case BT_SOL: 696 case BT_SOL: 697 sol: 697 sol: 698 ptr += MINBPC(enc); 698 ptr += MINBPC(enc); 699 REQUIRE_CHAR(enc, ptr, end); 699 REQUIRE_CHAR(enc, ptr, end); 700 if (! CHAR_MATCHES(enc, ptr, ASCII_G 700 if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { 701 *nextTokPtr = ptr; 701 *nextTokPtr = ptr; 702 return XML_TOK_INVALID; 702 return XML_TOK_INVALID; 703 } 703 } 704 *nextTokPtr = ptr + MINBPC(enc); 704 *nextTokPtr = ptr + MINBPC(enc); 705 return XML_TOK_EMPTY_ELEMENT_WITH_AT 705 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; 706 default: 706 default: 707 *nextTokPtr = ptr; 707 *nextTokPtr = ptr; 708 return XML_TOK_INVALID; 708 return XML_TOK_INVALID; 709 } 709 } 710 break; 710 break; 711 } 711 } 712 break; 712 break; 713 } 713 } 714 default: 714 default: 715 *nextTokPtr = ptr; 715 *nextTokPtr = ptr; 716 return XML_TOK_INVALID; 716 return XML_TOK_INVALID; 717 } 717 } 718 } 718 } 719 return XML_TOK_PARTIAL; 719 return XML_TOK_PARTIAL; 720 } 720 } 721 721 722 /* ptr points to character following "<" */ 722 /* ptr points to character following "<" */ 723 723 724 static int PTRCALL 724 static int PTRCALL 725 PREFIX(scanLt)(const ENCODING *enc, const char 725 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, 726 const char **nextTokPtr) { 726 const char **nextTokPtr) { 727 # ifdef XML_NS 727 # ifdef XML_NS 728 int hadColon; 728 int hadColon; 729 # endif 729 # endif 730 REQUIRE_CHAR(enc, ptr, end); 730 REQUIRE_CHAR(enc, ptr, end); 731 switch (BYTE_TYPE(enc, ptr)) { 731 switch (BYTE_TYPE(enc, ptr)) { 732 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP 732 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 733 case BT_EXCL: 733 case BT_EXCL: 734 ptr += MINBPC(enc); 734 ptr += MINBPC(enc); 735 REQUIRE_CHAR(enc, ptr, end); 735 REQUIRE_CHAR(enc, ptr, end); 736 switch (BYTE_TYPE(enc, ptr)) { 736 switch (BYTE_TYPE(enc, ptr)) { 737 case BT_MINUS: 737 case BT_MINUS: 738 return PREFIX(scanComment)(enc, ptr + MI 738 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 739 case BT_LSQB: 739 case BT_LSQB: 740 return PREFIX(scanCdataSection)(enc, ptr 740 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); 741 } 741 } 742 *nextTokPtr = ptr; 742 *nextTokPtr = ptr; 743 return XML_TOK_INVALID; 743 return XML_TOK_INVALID; 744 case BT_QUEST: 744 case BT_QUEST: 745 return PREFIX(scanPi)(enc, ptr + MINBPC(en 745 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 746 case BT_SOL: 746 case BT_SOL: 747 return PREFIX(scanEndTag)(enc, ptr + MINBP 747 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); 748 default: 748 default: 749 *nextTokPtr = ptr; 749 *nextTokPtr = ptr; 750 return XML_TOK_INVALID; 750 return XML_TOK_INVALID; 751 } 751 } 752 # ifdef XML_NS 752 # ifdef XML_NS 753 hadColon = 0; 753 hadColon = 0; 754 # endif 754 # endif 755 /* we have a start-tag */ 755 /* we have a start-tag */ 756 while (HAS_CHAR(enc, ptr, end)) { 756 while (HAS_CHAR(enc, ptr, end)) { 757 switch (BYTE_TYPE(enc, ptr)) { 757 switch (BYTE_TYPE(enc, ptr)) { 758 CHECK_NAME_CASES(enc, ptr, end, nextTokP 758 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 759 # ifdef XML_NS 759 # ifdef XML_NS 760 case BT_COLON: 760 case BT_COLON: 761 if (hadColon) { 761 if (hadColon) { 762 *nextTokPtr = ptr; 762 *nextTokPtr = ptr; 763 return XML_TOK_INVALID; 763 return XML_TOK_INVALID; 764 } 764 } 765 hadColon = 1; 765 hadColon = 1; 766 ptr += MINBPC(enc); 766 ptr += MINBPC(enc); 767 REQUIRE_CHAR(enc, ptr, end); 767 REQUIRE_CHAR(enc, ptr, end); 768 switch (BYTE_TYPE(enc, ptr)) { 768 switch (BYTE_TYPE(enc, ptr)) { 769 CHECK_NMSTRT_CASES(enc, ptr, end, next 769 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 770 default: 770 default: 771 *nextTokPtr = ptr; 771 *nextTokPtr = ptr; 772 return XML_TOK_INVALID; 772 return XML_TOK_INVALID; 773 } 773 } 774 break; 774 break; 775 # endif 775 # endif 776 case BT_S: 776 case BT_S: 777 case BT_CR: 777 case BT_CR: 778 case BT_LF: { 778 case BT_LF: { 779 ptr += MINBPC(enc); 779 ptr += MINBPC(enc); 780 while (HAS_CHAR(enc, ptr, end)) { 780 while (HAS_CHAR(enc, ptr, end)) { 781 switch (BYTE_TYPE(enc, ptr)) { 781 switch (BYTE_TYPE(enc, ptr)) { 782 CHECK_NMSTRT_CASES(enc, ptr, end, ne 782 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 783 case BT_GT: 783 case BT_GT: 784 goto gt; 784 goto gt; 785 case BT_SOL: 785 case BT_SOL: 786 goto sol; 786 goto sol; 787 case BT_S: 787 case BT_S: 788 case BT_CR: 788 case BT_CR: 789 case BT_LF: 789 case BT_LF: 790 ptr += MINBPC(enc); 790 ptr += MINBPC(enc); 791 continue; 791 continue; 792 default: 792 default: 793 *nextTokPtr = ptr; 793 *nextTokPtr = ptr; 794 return XML_TOK_INVALID; 794 return XML_TOK_INVALID; 795 } 795 } 796 return PREFIX(scanAtts)(enc, ptr, end, 796 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); 797 } 797 } 798 return XML_TOK_PARTIAL; 798 return XML_TOK_PARTIAL; 799 } 799 } 800 case BT_GT: 800 case BT_GT: 801 gt: 801 gt: 802 *nextTokPtr = ptr + MINBPC(enc); 802 *nextTokPtr = ptr + MINBPC(enc); 803 return XML_TOK_START_TAG_NO_ATTS; 803 return XML_TOK_START_TAG_NO_ATTS; 804 case BT_SOL: 804 case BT_SOL: 805 sol: 805 sol: 806 ptr += MINBPC(enc); 806 ptr += MINBPC(enc); 807 REQUIRE_CHAR(enc, ptr, end); 807 REQUIRE_CHAR(enc, ptr, end); 808 if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) 808 if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { 809 *nextTokPtr = ptr; 809 *nextTokPtr = ptr; 810 return XML_TOK_INVALID; 810 return XML_TOK_INVALID; 811 } 811 } 812 *nextTokPtr = ptr + MINBPC(enc); 812 *nextTokPtr = ptr + MINBPC(enc); 813 return XML_TOK_EMPTY_ELEMENT_NO_ATTS; 813 return XML_TOK_EMPTY_ELEMENT_NO_ATTS; 814 default: 814 default: 815 *nextTokPtr = ptr; 815 *nextTokPtr = ptr; 816 return XML_TOK_INVALID; 816 return XML_TOK_INVALID; 817 } 817 } 818 } 818 } 819 return XML_TOK_PARTIAL; 819 return XML_TOK_PARTIAL; 820 } 820 } 821 821 822 static int PTRCALL 822 static int PTRCALL 823 PREFIX(contentTok)(const ENCODING *enc, const 823 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, 824 const char **nextTokPtr) { 824 const char **nextTokPtr) { 825 if (ptr >= end) 825 if (ptr >= end) 826 return XML_TOK_NONE; 826 return XML_TOK_NONE; 827 if (MINBPC(enc) > 1) { 827 if (MINBPC(enc) > 1) { 828 size_t n = end - ptr; 828 size_t n = end - ptr; 829 if (n & (MINBPC(enc) - 1)) { 829 if (n & (MINBPC(enc) - 1)) { 830 n &= ~(MINBPC(enc) - 1); 830 n &= ~(MINBPC(enc) - 1); 831 if (n == 0) 831 if (n == 0) 832 return XML_TOK_PARTIAL; 832 return XML_TOK_PARTIAL; 833 end = ptr + n; 833 end = ptr + n; 834 } 834 } 835 } 835 } 836 switch (BYTE_TYPE(enc, ptr)) { 836 switch (BYTE_TYPE(enc, ptr)) { 837 case BT_LT: 837 case BT_LT: 838 return PREFIX(scanLt)(enc, ptr + MINBPC(en 838 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); 839 case BT_AMP: 839 case BT_AMP: 840 return PREFIX(scanRef)(enc, ptr + MINBPC(e 840 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 841 case BT_CR: 841 case BT_CR: 842 ptr += MINBPC(enc); 842 ptr += MINBPC(enc); 843 if (! HAS_CHAR(enc, ptr, end)) 843 if (! HAS_CHAR(enc, ptr, end)) 844 return XML_TOK_TRAILING_CR; 844 return XML_TOK_TRAILING_CR; 845 if (BYTE_TYPE(enc, ptr) == BT_LF) 845 if (BYTE_TYPE(enc, ptr) == BT_LF) 846 ptr += MINBPC(enc); 846 ptr += MINBPC(enc); 847 *nextTokPtr = ptr; 847 *nextTokPtr = ptr; 848 return XML_TOK_DATA_NEWLINE; 848 return XML_TOK_DATA_NEWLINE; 849 case BT_LF: 849 case BT_LF: 850 *nextTokPtr = ptr + MINBPC(enc); 850 *nextTokPtr = ptr + MINBPC(enc); 851 return XML_TOK_DATA_NEWLINE; 851 return XML_TOK_DATA_NEWLINE; 852 case BT_RSQB: 852 case BT_RSQB: 853 ptr += MINBPC(enc); 853 ptr += MINBPC(enc); 854 if (! HAS_CHAR(enc, ptr, end)) 854 if (! HAS_CHAR(enc, ptr, end)) 855 return XML_TOK_TRAILING_RSQB; 855 return XML_TOK_TRAILING_RSQB; 856 if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 856 if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 857 break; 857 break; 858 ptr += MINBPC(enc); 858 ptr += MINBPC(enc); 859 if (! HAS_CHAR(enc, ptr, end)) 859 if (! HAS_CHAR(enc, ptr, end)) 860 return XML_TOK_TRAILING_RSQB; 860 return XML_TOK_TRAILING_RSQB; 861 if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { 861 if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { 862 ptr -= MINBPC(enc); 862 ptr -= MINBPC(enc); 863 break; 863 break; 864 } 864 } 865 *nextTokPtr = ptr; 865 *nextTokPtr = ptr; 866 return XML_TOK_INVALID; 866 return XML_TOK_INVALID; 867 INVALID_CASES(ptr, nextTokPtr) 867 INVALID_CASES(ptr, nextTokPtr) 868 default: 868 default: 869 ptr += MINBPC(enc); 869 ptr += MINBPC(enc); 870 break; 870 break; 871 } 871 } 872 while (HAS_CHAR(enc, ptr, end)) { 872 while (HAS_CHAR(enc, ptr, end)) { 873 switch (BYTE_TYPE(enc, ptr)) { 873 switch (BYTE_TYPE(enc, ptr)) { 874 # define LEAD_CASE(n) 874 # define LEAD_CASE(n) \ 875 case BT_LEAD##n: 875 case BT_LEAD##n: \ 876 if (end - ptr < n || IS_INVALID_CHAR(enc, 876 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 877 *nextTokPtr = ptr; 877 *nextTokPtr = ptr; \ 878 return XML_TOK_DATA_CHARS; 878 return XML_TOK_DATA_CHARS; \ 879 } 879 } \ 880 ptr += n; 880 ptr += n; \ 881 break; 881 break; 882 LEAD_CASE(2) 882 LEAD_CASE(2) 883 LEAD_CASE(3) 883 LEAD_CASE(3) 884 LEAD_CASE(4) 884 LEAD_CASE(4) 885 # undef LEAD_CASE 885 # undef LEAD_CASE 886 case BT_RSQB: 886 case BT_RSQB: 887 if (HAS_CHARS(enc, ptr, end, 2)) { 887 if (HAS_CHARS(enc, ptr, end, 2)) { 888 if (! CHAR_MATCHES(enc, ptr + MINBPC(e 888 if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { 889 ptr += MINBPC(enc); 889 ptr += MINBPC(enc); 890 break; 890 break; 891 } 891 } 892 if (HAS_CHARS(enc, ptr, end, 3)) { 892 if (HAS_CHARS(enc, ptr, end, 3)) { 893 if (! CHAR_MATCHES(enc, ptr + 2 * MI 893 if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) { 894 ptr += MINBPC(enc); 894 ptr += MINBPC(enc); 895 break; 895 break; 896 } 896 } 897 *nextTokPtr = ptr + 2 * MINBPC(enc); 897 *nextTokPtr = ptr + 2 * MINBPC(enc); 898 return XML_TOK_INVALID; 898 return XML_TOK_INVALID; 899 } 899 } 900 } 900 } 901 /* fall through */ 901 /* fall through */ 902 case BT_AMP: 902 case BT_AMP: 903 case BT_LT: 903 case BT_LT: 904 case BT_NONXML: 904 case BT_NONXML: 905 case BT_MALFORM: 905 case BT_MALFORM: 906 case BT_TRAIL: 906 case BT_TRAIL: 907 case BT_CR: 907 case BT_CR: 908 case BT_LF: 908 case BT_LF: 909 *nextTokPtr = ptr; 909 *nextTokPtr = ptr; 910 return XML_TOK_DATA_CHARS; 910 return XML_TOK_DATA_CHARS; 911 default: 911 default: 912 ptr += MINBPC(enc); 912 ptr += MINBPC(enc); 913 break; 913 break; 914 } 914 } 915 } 915 } 916 *nextTokPtr = ptr; 916 *nextTokPtr = ptr; 917 return XML_TOK_DATA_CHARS; 917 return XML_TOK_DATA_CHARS; 918 } 918 } 919 919 920 /* ptr points to character following "%" */ 920 /* ptr points to character following "%" */ 921 921 922 static int PTRCALL 922 static int PTRCALL 923 PREFIX(scanPercent)(const ENCODING *enc, const 923 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, 924 const char **nextTokPtr) { 924 const char **nextTokPtr) { 925 REQUIRE_CHAR(enc, ptr, end); 925 REQUIRE_CHAR(enc, ptr, end); 926 switch (BYTE_TYPE(enc, ptr)) { 926 switch (BYTE_TYPE(enc, ptr)) { 927 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP 927 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 928 case BT_S: 928 case BT_S: 929 case BT_LF: 929 case BT_LF: 930 case BT_CR: 930 case BT_CR: 931 case BT_PERCNT: 931 case BT_PERCNT: 932 *nextTokPtr = ptr; 932 *nextTokPtr = ptr; 933 return XML_TOK_PERCENT; 933 return XML_TOK_PERCENT; 934 default: 934 default: 935 *nextTokPtr = ptr; 935 *nextTokPtr = ptr; 936 return XML_TOK_INVALID; 936 return XML_TOK_INVALID; 937 } 937 } 938 while (HAS_CHAR(enc, ptr, end)) { 938 while (HAS_CHAR(enc, ptr, end)) { 939 switch (BYTE_TYPE(enc, ptr)) { 939 switch (BYTE_TYPE(enc, ptr)) { 940 CHECK_NAME_CASES(enc, ptr, end, nextTokP 940 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 941 case BT_SEMI: 941 case BT_SEMI: 942 *nextTokPtr = ptr + MINBPC(enc); 942 *nextTokPtr = ptr + MINBPC(enc); 943 return XML_TOK_PARAM_ENTITY_REF; 943 return XML_TOK_PARAM_ENTITY_REF; 944 default: 944 default: 945 *nextTokPtr = ptr; 945 *nextTokPtr = ptr; 946 return XML_TOK_INVALID; 946 return XML_TOK_INVALID; 947 } 947 } 948 } 948 } 949 return XML_TOK_PARTIAL; 949 return XML_TOK_PARTIAL; 950 } 950 } 951 951 952 static int PTRCALL 952 static int PTRCALL 953 PREFIX(scanPoundName)(const ENCODING *enc, con 953 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, 954 const char **nextTokPtr) 954 const char **nextTokPtr) { 955 REQUIRE_CHAR(enc, ptr, end); 955 REQUIRE_CHAR(enc, ptr, end); 956 switch (BYTE_TYPE(enc, ptr)) { 956 switch (BYTE_TYPE(enc, ptr)) { 957 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP 957 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 958 default: 958 default: 959 *nextTokPtr = ptr; 959 *nextTokPtr = ptr; 960 return XML_TOK_INVALID; 960 return XML_TOK_INVALID; 961 } 961 } 962 while (HAS_CHAR(enc, ptr, end)) { 962 while (HAS_CHAR(enc, ptr, end)) { 963 switch (BYTE_TYPE(enc, ptr)) { 963 switch (BYTE_TYPE(enc, ptr)) { 964 CHECK_NAME_CASES(enc, ptr, end, nextTokP 964 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 965 case BT_CR: 965 case BT_CR: 966 case BT_LF: 966 case BT_LF: 967 case BT_S: 967 case BT_S: 968 case BT_RPAR: 968 case BT_RPAR: 969 case BT_GT: 969 case BT_GT: 970 case BT_PERCNT: 970 case BT_PERCNT: 971 case BT_VERBAR: 971 case BT_VERBAR: 972 *nextTokPtr = ptr; 972 *nextTokPtr = ptr; 973 return XML_TOK_POUND_NAME; 973 return XML_TOK_POUND_NAME; 974 default: 974 default: 975 *nextTokPtr = ptr; 975 *nextTokPtr = ptr; 976 return XML_TOK_INVALID; 976 return XML_TOK_INVALID; 977 } 977 } 978 } 978 } 979 return -XML_TOK_POUND_NAME; 979 return -XML_TOK_POUND_NAME; 980 } 980 } 981 981 982 static int PTRCALL 982 static int PTRCALL 983 PREFIX(scanLit)(int open, const ENCODING *enc, 983 PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, 984 const char **nextTokPtr) { 984 const char **nextTokPtr) { 985 while (HAS_CHAR(enc, ptr, end)) { 985 while (HAS_CHAR(enc, ptr, end)) { 986 int t = BYTE_TYPE(enc, ptr); 986 int t = BYTE_TYPE(enc, ptr); 987 switch (t) { 987 switch (t) { 988 INVALID_CASES(ptr, nextTokPtr) 988 INVALID_CASES(ptr, nextTokPtr) 989 case BT_QUOT: 989 case BT_QUOT: 990 case BT_APOS: 990 case BT_APOS: 991 ptr += MINBPC(enc); 991 ptr += MINBPC(enc); 992 if (t != open) 992 if (t != open) 993 break; 993 break; 994 if (! HAS_CHAR(enc, ptr, end)) 994 if (! HAS_CHAR(enc, ptr, end)) 995 return -XML_TOK_LITERAL; 995 return -XML_TOK_LITERAL; 996 *nextTokPtr = ptr; 996 *nextTokPtr = ptr; 997 switch (BYTE_TYPE(enc, ptr)) { 997 switch (BYTE_TYPE(enc, ptr)) { 998 case BT_S: 998 case BT_S: 999 case BT_CR: 999 case BT_CR: 1000 case BT_LF: 1000 case BT_LF: 1001 case BT_GT: 1001 case BT_GT: 1002 case BT_PERCNT: 1002 case BT_PERCNT: 1003 case BT_LSQB: 1003 case BT_LSQB: 1004 return XML_TOK_LITERAL; 1004 return XML_TOK_LITERAL; 1005 default: 1005 default: 1006 return XML_TOK_INVALID; 1006 return XML_TOK_INVALID; 1007 } 1007 } 1008 default: 1008 default: 1009 ptr += MINBPC(enc); 1009 ptr += MINBPC(enc); 1010 break; 1010 break; 1011 } 1011 } 1012 } 1012 } 1013 return XML_TOK_PARTIAL; 1013 return XML_TOK_PARTIAL; 1014 } 1014 } 1015 1015 1016 static int PTRCALL 1016 static int PTRCALL 1017 PREFIX(prologTok)(const ENCODING *enc, const 1017 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, 1018 const char **nextTokPtr) { 1018 const char **nextTokPtr) { 1019 int tok; 1019 int tok; 1020 if (ptr >= end) 1020 if (ptr >= end) 1021 return XML_TOK_NONE; 1021 return XML_TOK_NONE; 1022 if (MINBPC(enc) > 1) { 1022 if (MINBPC(enc) > 1) { 1023 size_t n = end - ptr; 1023 size_t n = end - ptr; 1024 if (n & (MINBPC(enc) - 1)) { 1024 if (n & (MINBPC(enc) - 1)) { 1025 n &= ~(MINBPC(enc) - 1); 1025 n &= ~(MINBPC(enc) - 1); 1026 if (n == 0) 1026 if (n == 0) 1027 return XML_TOK_PARTIAL; 1027 return XML_TOK_PARTIAL; 1028 end = ptr + n; 1028 end = ptr + n; 1029 } 1029 } 1030 } 1030 } 1031 switch (BYTE_TYPE(enc, ptr)) { 1031 switch (BYTE_TYPE(enc, ptr)) { 1032 case BT_QUOT: 1032 case BT_QUOT: 1033 return PREFIX(scanLit)(BT_QUOT, enc, ptr 1033 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); 1034 case BT_APOS: 1034 case BT_APOS: 1035 return PREFIX(scanLit)(BT_APOS, enc, ptr 1035 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); 1036 case BT_LT: { 1036 case BT_LT: { 1037 ptr += MINBPC(enc); 1037 ptr += MINBPC(enc); 1038 REQUIRE_CHAR(enc, ptr, end); 1038 REQUIRE_CHAR(enc, ptr, end); 1039 switch (BYTE_TYPE(enc, ptr)) { 1039 switch (BYTE_TYPE(enc, ptr)) { 1040 case BT_EXCL: 1040 case BT_EXCL: 1041 return PREFIX(scanDecl)(enc, ptr + MINB 1041 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1042 case BT_QUEST: 1042 case BT_QUEST: 1043 return PREFIX(scanPi)(enc, ptr + MINBPC 1043 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1044 case BT_NMSTRT: 1044 case BT_NMSTRT: 1045 case BT_HEX: 1045 case BT_HEX: 1046 case BT_NONASCII: 1046 case BT_NONASCII: 1047 case BT_LEAD2: 1047 case BT_LEAD2: 1048 case BT_LEAD3: 1048 case BT_LEAD3: 1049 case BT_LEAD4: 1049 case BT_LEAD4: 1050 *nextTokPtr = ptr - MINBPC(enc); 1050 *nextTokPtr = ptr - MINBPC(enc); 1051 return XML_TOK_INSTANCE_START; 1051 return XML_TOK_INSTANCE_START; 1052 } 1052 } 1053 *nextTokPtr = ptr; 1053 *nextTokPtr = ptr; 1054 return XML_TOK_INVALID; 1054 return XML_TOK_INVALID; 1055 } 1055 } 1056 case BT_CR: 1056 case BT_CR: 1057 if (ptr + MINBPC(enc) == end) { 1057 if (ptr + MINBPC(enc) == end) { 1058 *nextTokPtr = end; 1058 *nextTokPtr = end; 1059 /* indicate that this might be part of 1059 /* indicate that this might be part of a CR/LF pair */ 1060 return -XML_TOK_PROLOG_S; 1060 return -XML_TOK_PROLOG_S; 1061 } 1061 } 1062 /* fall through */ 1062 /* fall through */ 1063 case BT_S: 1063 case BT_S: 1064 case BT_LF: 1064 case BT_LF: 1065 for (;;) { 1065 for (;;) { 1066 ptr += MINBPC(enc); 1066 ptr += MINBPC(enc); 1067 if (! HAS_CHAR(enc, ptr, end)) 1067 if (! HAS_CHAR(enc, ptr, end)) 1068 break; 1068 break; 1069 switch (BYTE_TYPE(enc, ptr)) { 1069 switch (BYTE_TYPE(enc, ptr)) { 1070 case BT_S: 1070 case BT_S: 1071 case BT_LF: 1071 case BT_LF: 1072 break; 1072 break; 1073 case BT_CR: 1073 case BT_CR: 1074 /* don't split CR/LF pair */ 1074 /* don't split CR/LF pair */ 1075 if (ptr + MINBPC(enc) != end) 1075 if (ptr + MINBPC(enc) != end) 1076 break; 1076 break; 1077 /* fall through */ 1077 /* fall through */ 1078 default: 1078 default: 1079 *nextTokPtr = ptr; 1079 *nextTokPtr = ptr; 1080 return XML_TOK_PROLOG_S; 1080 return XML_TOK_PROLOG_S; 1081 } 1081 } 1082 } 1082 } 1083 *nextTokPtr = ptr; 1083 *nextTokPtr = ptr; 1084 return XML_TOK_PROLOG_S; 1084 return XML_TOK_PROLOG_S; 1085 case BT_PERCNT: 1085 case BT_PERCNT: 1086 return PREFIX(scanPercent)(enc, ptr + MIN 1086 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1087 case BT_COMMA: 1087 case BT_COMMA: 1088 *nextTokPtr = ptr + MINBPC(enc); 1088 *nextTokPtr = ptr + MINBPC(enc); 1089 return XML_TOK_COMMA; 1089 return XML_TOK_COMMA; 1090 case BT_LSQB: 1090 case BT_LSQB: 1091 *nextTokPtr = ptr + MINBPC(enc); 1091 *nextTokPtr = ptr + MINBPC(enc); 1092 return XML_TOK_OPEN_BRACKET; 1092 return XML_TOK_OPEN_BRACKET; 1093 case BT_RSQB: 1093 case BT_RSQB: 1094 ptr += MINBPC(enc); 1094 ptr += MINBPC(enc); 1095 if (! HAS_CHAR(enc, ptr, end)) 1095 if (! HAS_CHAR(enc, ptr, end)) 1096 return -XML_TOK_CLOSE_BRACKET; 1096 return -XML_TOK_CLOSE_BRACKET; 1097 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1097 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1098 REQUIRE_CHARS(enc, ptr, end, 2); 1098 REQUIRE_CHARS(enc, ptr, end, 2); 1099 if (CHAR_MATCHES(enc, ptr + MINBPC(enc) 1099 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { 1100 *nextTokPtr = ptr + 2 * MINBPC(enc); 1100 *nextTokPtr = ptr + 2 * MINBPC(enc); 1101 return XML_TOK_COND_SECT_CLOSE; 1101 return XML_TOK_COND_SECT_CLOSE; 1102 } 1102 } 1103 } 1103 } 1104 *nextTokPtr = ptr; 1104 *nextTokPtr = ptr; 1105 return XML_TOK_CLOSE_BRACKET; 1105 return XML_TOK_CLOSE_BRACKET; 1106 case BT_LPAR: 1106 case BT_LPAR: 1107 *nextTokPtr = ptr + MINBPC(enc); 1107 *nextTokPtr = ptr + MINBPC(enc); 1108 return XML_TOK_OPEN_PAREN; 1108 return XML_TOK_OPEN_PAREN; 1109 case BT_RPAR: 1109 case BT_RPAR: 1110 ptr += MINBPC(enc); 1110 ptr += MINBPC(enc); 1111 if (! HAS_CHAR(enc, ptr, end)) 1111 if (! HAS_CHAR(enc, ptr, end)) 1112 return -XML_TOK_CLOSE_PAREN; 1112 return -XML_TOK_CLOSE_PAREN; 1113 switch (BYTE_TYPE(enc, ptr)) { 1113 switch (BYTE_TYPE(enc, ptr)) { 1114 case BT_AST: 1114 case BT_AST: 1115 *nextTokPtr = ptr + MINBPC(enc); 1115 *nextTokPtr = ptr + MINBPC(enc); 1116 return XML_TOK_CLOSE_PAREN_ASTERISK; 1116 return XML_TOK_CLOSE_PAREN_ASTERISK; 1117 case BT_QUEST: 1117 case BT_QUEST: 1118 *nextTokPtr = ptr + MINBPC(enc); 1118 *nextTokPtr = ptr + MINBPC(enc); 1119 return XML_TOK_CLOSE_PAREN_QUESTION; 1119 return XML_TOK_CLOSE_PAREN_QUESTION; 1120 case BT_PLUS: 1120 case BT_PLUS: 1121 *nextTokPtr = ptr + MINBPC(enc); 1121 *nextTokPtr = ptr + MINBPC(enc); 1122 return XML_TOK_CLOSE_PAREN_PLUS; 1122 return XML_TOK_CLOSE_PAREN_PLUS; 1123 case BT_CR: 1123 case BT_CR: 1124 case BT_LF: 1124 case BT_LF: 1125 case BT_S: 1125 case BT_S: 1126 case BT_GT: 1126 case BT_GT: 1127 case BT_COMMA: 1127 case BT_COMMA: 1128 case BT_VERBAR: 1128 case BT_VERBAR: 1129 case BT_RPAR: 1129 case BT_RPAR: 1130 *nextTokPtr = ptr; 1130 *nextTokPtr = ptr; 1131 return XML_TOK_CLOSE_PAREN; 1131 return XML_TOK_CLOSE_PAREN; 1132 } 1132 } 1133 *nextTokPtr = ptr; 1133 *nextTokPtr = ptr; 1134 return XML_TOK_INVALID; 1134 return XML_TOK_INVALID; 1135 case BT_VERBAR: 1135 case BT_VERBAR: 1136 *nextTokPtr = ptr + MINBPC(enc); 1136 *nextTokPtr = ptr + MINBPC(enc); 1137 return XML_TOK_OR; 1137 return XML_TOK_OR; 1138 case BT_GT: 1138 case BT_GT: 1139 *nextTokPtr = ptr + MINBPC(enc); 1139 *nextTokPtr = ptr + MINBPC(enc); 1140 return XML_TOK_DECL_CLOSE; 1140 return XML_TOK_DECL_CLOSE; 1141 case BT_NUM: 1141 case BT_NUM: 1142 return PREFIX(scanPoundName)(enc, ptr + M 1142 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1143 # define LEAD_CASE(n) 1143 # define LEAD_CASE(n) \ 1144 case BT_LEAD##n: 1144 case BT_LEAD##n: \ 1145 if (end - ptr < n) 1145 if (end - ptr < n) \ 1146 return XML_TOK_PARTIAL_CHAR; 1146 return XML_TOK_PARTIAL_CHAR; \ 1147 if (IS_INVALID_CHAR(enc, ptr, n)) { 1147 if (IS_INVALID_CHAR(enc, ptr, n)) { \ 1148 *nextTokPtr = ptr; 1148 *nextTokPtr = ptr; \ 1149 return XML_TOK_INVALID; 1149 return XML_TOK_INVALID; \ 1150 } 1150 } \ 1151 if (IS_NMSTRT_CHAR(enc, ptr, n)) { 1151 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ 1152 ptr += n; 1152 ptr += n; \ 1153 tok = XML_TOK_NAME; 1153 tok = XML_TOK_NAME; \ 1154 break; 1154 break; \ 1155 } 1155 } \ 1156 if (IS_NAME_CHAR(enc, ptr, n)) { 1156 if (IS_NAME_CHAR(enc, ptr, n)) { \ 1157 ptr += n; 1157 ptr += n; \ 1158 tok = XML_TOK_NMTOKEN; 1158 tok = XML_TOK_NMTOKEN; \ 1159 break; 1159 break; \ 1160 } 1160 } \ 1161 *nextTokPtr = ptr; 1161 *nextTokPtr = ptr; \ 1162 return XML_TOK_INVALID; 1162 return XML_TOK_INVALID; 1163 LEAD_CASE(2) 1163 LEAD_CASE(2) 1164 LEAD_CASE(3) 1164 LEAD_CASE(3) 1165 LEAD_CASE(4) 1165 LEAD_CASE(4) 1166 # undef LEAD_CASE 1166 # undef LEAD_CASE 1167 case BT_NMSTRT: 1167 case BT_NMSTRT: 1168 case BT_HEX: 1168 case BT_HEX: 1169 tok = XML_TOK_NAME; 1169 tok = XML_TOK_NAME; 1170 ptr += MINBPC(enc); 1170 ptr += MINBPC(enc); 1171 break; 1171 break; 1172 case BT_DIGIT: 1172 case BT_DIGIT: 1173 case BT_NAME: 1173 case BT_NAME: 1174 case BT_MINUS: 1174 case BT_MINUS: 1175 # ifdef XML_NS 1175 # ifdef XML_NS 1176 case BT_COLON: 1176 case BT_COLON: 1177 # endif 1177 # endif 1178 tok = XML_TOK_NMTOKEN; 1178 tok = XML_TOK_NMTOKEN; 1179 ptr += MINBPC(enc); 1179 ptr += MINBPC(enc); 1180 break; 1180 break; 1181 case BT_NONASCII: 1181 case BT_NONASCII: 1182 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { 1182 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { 1183 ptr += MINBPC(enc); 1183 ptr += MINBPC(enc); 1184 tok = XML_TOK_NAME; 1184 tok = XML_TOK_NAME; 1185 break; 1185 break; 1186 } 1186 } 1187 if (IS_NAME_CHAR_MINBPC(enc, ptr)) { 1187 if (IS_NAME_CHAR_MINBPC(enc, ptr)) { 1188 ptr += MINBPC(enc); 1188 ptr += MINBPC(enc); 1189 tok = XML_TOK_NMTOKEN; 1189 tok = XML_TOK_NMTOKEN; 1190 break; 1190 break; 1191 } 1191 } 1192 /* fall through */ 1192 /* fall through */ 1193 default: 1193 default: 1194 *nextTokPtr = ptr; 1194 *nextTokPtr = ptr; 1195 return XML_TOK_INVALID; 1195 return XML_TOK_INVALID; 1196 } 1196 } 1197 while (HAS_CHAR(enc, ptr, end)) { 1197 while (HAS_CHAR(enc, ptr, end)) { 1198 switch (BYTE_TYPE(enc, ptr)) { 1198 switch (BYTE_TYPE(enc, ptr)) { 1199 CHECK_NAME_CASES(enc, ptr, end, nextTok 1199 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1200 case BT_GT: 1200 case BT_GT: 1201 case BT_RPAR: 1201 case BT_RPAR: 1202 case BT_COMMA: 1202 case BT_COMMA: 1203 case BT_VERBAR: 1203 case BT_VERBAR: 1204 case BT_LSQB: 1204 case BT_LSQB: 1205 case BT_PERCNT: 1205 case BT_PERCNT: 1206 case BT_S: 1206 case BT_S: 1207 case BT_CR: 1207 case BT_CR: 1208 case BT_LF: 1208 case BT_LF: 1209 *nextTokPtr = ptr; 1209 *nextTokPtr = ptr; 1210 return tok; 1210 return tok; 1211 # ifdef XML_NS 1211 # ifdef XML_NS 1212 case BT_COLON: 1212 case BT_COLON: 1213 ptr += MINBPC(enc); 1213 ptr += MINBPC(enc); 1214 switch (tok) { 1214 switch (tok) { 1215 case XML_TOK_NAME: 1215 case XML_TOK_NAME: 1216 REQUIRE_CHAR(enc, ptr, end); 1216 REQUIRE_CHAR(enc, ptr, end); 1217 tok = XML_TOK_PREFIXED_NAME; 1217 tok = XML_TOK_PREFIXED_NAME; 1218 switch (BYTE_TYPE(enc, ptr)) { 1218 switch (BYTE_TYPE(enc, ptr)) { 1219 CHECK_NAME_CASES(enc, ptr, end, nex 1219 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1220 default: 1220 default: 1221 tok = XML_TOK_NMTOKEN; 1221 tok = XML_TOK_NMTOKEN; 1222 break; 1222 break; 1223 } 1223 } 1224 break; 1224 break; 1225 case XML_TOK_PREFIXED_NAME: 1225 case XML_TOK_PREFIXED_NAME: 1226 tok = XML_TOK_NMTOKEN; 1226 tok = XML_TOK_NMTOKEN; 1227 break; 1227 break; 1228 } 1228 } 1229 break; 1229 break; 1230 # endif 1230 # endif 1231 case BT_PLUS: 1231 case BT_PLUS: 1232 if (tok == XML_TOK_NMTOKEN) { 1232 if (tok == XML_TOK_NMTOKEN) { 1233 *nextTokPtr = ptr; 1233 *nextTokPtr = ptr; 1234 return XML_TOK_INVALID; 1234 return XML_TOK_INVALID; 1235 } 1235 } 1236 *nextTokPtr = ptr + MINBPC(enc); 1236 *nextTokPtr = ptr + MINBPC(enc); 1237 return XML_TOK_NAME_PLUS; 1237 return XML_TOK_NAME_PLUS; 1238 case BT_AST: 1238 case BT_AST: 1239 if (tok == XML_TOK_NMTOKEN) { 1239 if (tok == XML_TOK_NMTOKEN) { 1240 *nextTokPtr = ptr; 1240 *nextTokPtr = ptr; 1241 return XML_TOK_INVALID; 1241 return XML_TOK_INVALID; 1242 } 1242 } 1243 *nextTokPtr = ptr + MINBPC(enc); 1243 *nextTokPtr = ptr + MINBPC(enc); 1244 return XML_TOK_NAME_ASTERISK; 1244 return XML_TOK_NAME_ASTERISK; 1245 case BT_QUEST: 1245 case BT_QUEST: 1246 if (tok == XML_TOK_NMTOKEN) { 1246 if (tok == XML_TOK_NMTOKEN) { 1247 *nextTokPtr = ptr; 1247 *nextTokPtr = ptr; 1248 return XML_TOK_INVALID; 1248 return XML_TOK_INVALID; 1249 } 1249 } 1250 *nextTokPtr = ptr + MINBPC(enc); 1250 *nextTokPtr = ptr + MINBPC(enc); 1251 return XML_TOK_NAME_QUESTION; 1251 return XML_TOK_NAME_QUESTION; 1252 default: 1252 default: 1253 *nextTokPtr = ptr; 1253 *nextTokPtr = ptr; 1254 return XML_TOK_INVALID; 1254 return XML_TOK_INVALID; 1255 } 1255 } 1256 } 1256 } 1257 return -tok; 1257 return -tok; 1258 } 1258 } 1259 1259 1260 static int PTRCALL 1260 static int PTRCALL 1261 PREFIX(attributeValueTok)(const ENCODING *enc 1261 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, 1262 const char **nextTo 1262 const char **nextTokPtr) { 1263 const char *start; 1263 const char *start; 1264 if (ptr >= end) 1264 if (ptr >= end) 1265 return XML_TOK_NONE; 1265 return XML_TOK_NONE; 1266 else if (! HAS_CHAR(enc, ptr, end)) { 1266 else if (! HAS_CHAR(enc, ptr, end)) { 1267 /* This line cannot be executed. The inc 1267 /* This line cannot be executed. The incoming data has already 1268 * been tokenized once, so incomplete cha 1268 * been tokenized once, so incomplete characters like this have 1269 * already been eliminated from the input 1269 * already been eliminated from the input. Retaining the paranoia 1270 * check is still valuable, however. 1270 * check is still valuable, however. 1271 */ 1271 */ 1272 return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE 1272 return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ 1273 } 1273 } 1274 start = ptr; 1274 start = ptr; 1275 while (HAS_CHAR(enc, ptr, end)) { 1275 while (HAS_CHAR(enc, ptr, end)) { 1276 switch (BYTE_TYPE(enc, ptr)) { 1276 switch (BYTE_TYPE(enc, ptr)) { 1277 # define LEAD_CASE(n) 1277 # define LEAD_CASE(n) \ 1278 case BT_LEAD##n: 1278 case BT_LEAD##n: \ 1279 ptr += n; /* NOTE: The encoding has alrea 1279 ptr += n; /* NOTE: The encoding has already been validated. */ \ 1280 break; 1280 break; 1281 LEAD_CASE(2) 1281 LEAD_CASE(2) 1282 LEAD_CASE(3) 1282 LEAD_CASE(3) 1283 LEAD_CASE(4) 1283 LEAD_CASE(4) 1284 # undef LEAD_CASE 1284 # undef LEAD_CASE 1285 case BT_AMP: 1285 case BT_AMP: 1286 if (ptr == start) 1286 if (ptr == start) 1287 return PREFIX(scanRef)(enc, ptr + MIN 1287 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1288 *nextTokPtr = ptr; 1288 *nextTokPtr = ptr; 1289 return XML_TOK_DATA_CHARS; 1289 return XML_TOK_DATA_CHARS; 1290 case BT_LT: 1290 case BT_LT: 1291 /* this is for inside entity references 1291 /* this is for inside entity references */ 1292 *nextTokPtr = ptr; 1292 *nextTokPtr = ptr; 1293 return XML_TOK_INVALID; 1293 return XML_TOK_INVALID; 1294 case BT_LF: 1294 case BT_LF: 1295 if (ptr == start) { 1295 if (ptr == start) { 1296 *nextTokPtr = ptr + MINBPC(enc); 1296 *nextTokPtr = ptr + MINBPC(enc); 1297 return XML_TOK_DATA_NEWLINE; 1297 return XML_TOK_DATA_NEWLINE; 1298 } 1298 } 1299 *nextTokPtr = ptr; 1299 *nextTokPtr = ptr; 1300 return XML_TOK_DATA_CHARS; 1300 return XML_TOK_DATA_CHARS; 1301 case BT_CR: 1301 case BT_CR: 1302 if (ptr == start) { 1302 if (ptr == start) { 1303 ptr += MINBPC(enc); 1303 ptr += MINBPC(enc); 1304 if (! HAS_CHAR(enc, ptr, end)) 1304 if (! HAS_CHAR(enc, ptr, end)) 1305 return XML_TOK_TRAILING_CR; 1305 return XML_TOK_TRAILING_CR; 1306 if (BYTE_TYPE(enc, ptr) == BT_LF) 1306 if (BYTE_TYPE(enc, ptr) == BT_LF) 1307 ptr += MINBPC(enc); 1307 ptr += MINBPC(enc); 1308 *nextTokPtr = ptr; 1308 *nextTokPtr = ptr; 1309 return XML_TOK_DATA_NEWLINE; 1309 return XML_TOK_DATA_NEWLINE; 1310 } 1310 } 1311 *nextTokPtr = ptr; 1311 *nextTokPtr = ptr; 1312 return XML_TOK_DATA_CHARS; 1312 return XML_TOK_DATA_CHARS; 1313 case BT_S: 1313 case BT_S: 1314 if (ptr == start) { 1314 if (ptr == start) { 1315 *nextTokPtr = ptr + MINBPC(enc); 1315 *nextTokPtr = ptr + MINBPC(enc); 1316 return XML_TOK_ATTRIBUTE_VALUE_S; 1316 return XML_TOK_ATTRIBUTE_VALUE_S; 1317 } 1317 } 1318 *nextTokPtr = ptr; 1318 *nextTokPtr = ptr; 1319 return XML_TOK_DATA_CHARS; 1319 return XML_TOK_DATA_CHARS; 1320 default: 1320 default: 1321 ptr += MINBPC(enc); 1321 ptr += MINBPC(enc); 1322 break; 1322 break; 1323 } 1323 } 1324 } 1324 } 1325 *nextTokPtr = ptr; 1325 *nextTokPtr = ptr; 1326 return XML_TOK_DATA_CHARS; 1326 return XML_TOK_DATA_CHARS; 1327 } 1327 } 1328 1328 1329 static int PTRCALL 1329 static int PTRCALL 1330 PREFIX(entityValueTok)(const ENCODING *enc, c 1330 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, 1331 const char **nextTokPt 1331 const char **nextTokPtr) { 1332 const char *start; 1332 const char *start; 1333 if (ptr >= end) 1333 if (ptr >= end) 1334 return XML_TOK_NONE; 1334 return XML_TOK_NONE; 1335 else if (! HAS_CHAR(enc, ptr, end)) { 1335 else if (! HAS_CHAR(enc, ptr, end)) { 1336 /* This line cannot be executed. The inc 1336 /* This line cannot be executed. The incoming data has already 1337 * been tokenized once, so incomplete cha 1337 * been tokenized once, so incomplete characters like this have 1338 * already been eliminated from the input 1338 * already been eliminated from the input. Retaining the paranoia 1339 * check is still valuable, however. 1339 * check is still valuable, however. 1340 */ 1340 */ 1341 return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE 1341 return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ 1342 } 1342 } 1343 start = ptr; 1343 start = ptr; 1344 while (HAS_CHAR(enc, ptr, end)) { 1344 while (HAS_CHAR(enc, ptr, end)) { 1345 switch (BYTE_TYPE(enc, ptr)) { 1345 switch (BYTE_TYPE(enc, ptr)) { 1346 # define LEAD_CASE(n) 1346 # define LEAD_CASE(n) \ 1347 case BT_LEAD##n: 1347 case BT_LEAD##n: \ 1348 ptr += n; /* NOTE: The encoding has alrea 1348 ptr += n; /* NOTE: The encoding has already been validated. */ \ 1349 break; 1349 break; 1350 LEAD_CASE(2) 1350 LEAD_CASE(2) 1351 LEAD_CASE(3) 1351 LEAD_CASE(3) 1352 LEAD_CASE(4) 1352 LEAD_CASE(4) 1353 # undef LEAD_CASE 1353 # undef LEAD_CASE 1354 case BT_AMP: 1354 case BT_AMP: 1355 if (ptr == start) 1355 if (ptr == start) 1356 return PREFIX(scanRef)(enc, ptr + MIN 1356 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1357 *nextTokPtr = ptr; 1357 *nextTokPtr = ptr; 1358 return XML_TOK_DATA_CHARS; 1358 return XML_TOK_DATA_CHARS; 1359 case BT_PERCNT: 1359 case BT_PERCNT: 1360 if (ptr == start) { 1360 if (ptr == start) { 1361 int tok = PREFIX(scanPercent)(enc, pt 1361 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1362 return (tok == XML_TOK_PERCENT) ? XML 1362 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; 1363 } 1363 } 1364 *nextTokPtr = ptr; 1364 *nextTokPtr = ptr; 1365 return XML_TOK_DATA_CHARS; 1365 return XML_TOK_DATA_CHARS; 1366 case BT_LF: 1366 case BT_LF: 1367 if (ptr == start) { 1367 if (ptr == start) { 1368 *nextTokPtr = ptr + MINBPC(enc); 1368 *nextTokPtr = ptr + MINBPC(enc); 1369 return XML_TOK_DATA_NEWLINE; 1369 return XML_TOK_DATA_NEWLINE; 1370 } 1370 } 1371 *nextTokPtr = ptr; 1371 *nextTokPtr = ptr; 1372 return XML_TOK_DATA_CHARS; 1372 return XML_TOK_DATA_CHARS; 1373 case BT_CR: 1373 case BT_CR: 1374 if (ptr == start) { 1374 if (ptr == start) { 1375 ptr += MINBPC(enc); 1375 ptr += MINBPC(enc); 1376 if (! HAS_CHAR(enc, ptr, end)) 1376 if (! HAS_CHAR(enc, ptr, end)) 1377 return XML_TOK_TRAILING_CR; 1377 return XML_TOK_TRAILING_CR; 1378 if (BYTE_TYPE(enc, ptr) == BT_LF) 1378 if (BYTE_TYPE(enc, ptr) == BT_LF) 1379 ptr += MINBPC(enc); 1379 ptr += MINBPC(enc); 1380 *nextTokPtr = ptr; 1380 *nextTokPtr = ptr; 1381 return XML_TOK_DATA_NEWLINE; 1381 return XML_TOK_DATA_NEWLINE; 1382 } 1382 } 1383 *nextTokPtr = ptr; 1383 *nextTokPtr = ptr; 1384 return XML_TOK_DATA_CHARS; 1384 return XML_TOK_DATA_CHARS; 1385 default: 1385 default: 1386 ptr += MINBPC(enc); 1386 ptr += MINBPC(enc); 1387 break; 1387 break; 1388 } 1388 } 1389 } 1389 } 1390 *nextTokPtr = ptr; 1390 *nextTokPtr = ptr; 1391 return XML_TOK_DATA_CHARS; 1391 return XML_TOK_DATA_CHARS; 1392 } 1392 } 1393 1393 1394 # ifdef XML_DTD 1394 # ifdef XML_DTD 1395 1395 1396 static int PTRCALL 1396 static int PTRCALL 1397 PREFIX(ignoreSectionTok)(const ENCODING *enc, 1397 PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, 1398 const char **nextTok 1398 const char **nextTokPtr) { 1399 int level = 0; 1399 int level = 0; 1400 if (MINBPC(enc) > 1) { 1400 if (MINBPC(enc) > 1) { 1401 size_t n = end - ptr; 1401 size_t n = end - ptr; 1402 if (n & (MINBPC(enc) - 1)) { 1402 if (n & (MINBPC(enc) - 1)) { 1403 n &= ~(MINBPC(enc) - 1); 1403 n &= ~(MINBPC(enc) - 1); 1404 end = ptr + n; 1404 end = ptr + n; 1405 } 1405 } 1406 } 1406 } 1407 while (HAS_CHAR(enc, ptr, end)) { 1407 while (HAS_CHAR(enc, ptr, end)) { 1408 switch (BYTE_TYPE(enc, ptr)) { 1408 switch (BYTE_TYPE(enc, ptr)) { 1409 INVALID_CASES(ptr, nextTokPtr) 1409 INVALID_CASES(ptr, nextTokPtr) 1410 case BT_LT: 1410 case BT_LT: 1411 ptr += MINBPC(enc); 1411 ptr += MINBPC(enc); 1412 REQUIRE_CHAR(enc, ptr, end); 1412 REQUIRE_CHAR(enc, ptr, end); 1413 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) 1413 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { 1414 ptr += MINBPC(enc); 1414 ptr += MINBPC(enc); 1415 REQUIRE_CHAR(enc, ptr, end); 1415 REQUIRE_CHAR(enc, ptr, end); 1416 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB 1416 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { 1417 ++level; 1417 ++level; 1418 ptr += MINBPC(enc); 1418 ptr += MINBPC(enc); 1419 } 1419 } 1420 } 1420 } 1421 break; 1421 break; 1422 case BT_RSQB: 1422 case BT_RSQB: 1423 ptr += MINBPC(enc); 1423 ptr += MINBPC(enc); 1424 REQUIRE_CHAR(enc, ptr, end); 1424 REQUIRE_CHAR(enc, ptr, end); 1425 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 1425 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1426 ptr += MINBPC(enc); 1426 ptr += MINBPC(enc); 1427 REQUIRE_CHAR(enc, ptr, end); 1427 REQUIRE_CHAR(enc, ptr, end); 1428 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) 1428 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1429 ptr += MINBPC(enc); 1429 ptr += MINBPC(enc); 1430 if (level == 0) { 1430 if (level == 0) { 1431 *nextTokPtr = ptr; 1431 *nextTokPtr = ptr; 1432 return XML_TOK_IGNORE_SECT; 1432 return XML_TOK_IGNORE_SECT; 1433 } 1433 } 1434 --level; 1434 --level; 1435 } 1435 } 1436 } 1436 } 1437 break; 1437 break; 1438 default: 1438 default: 1439 ptr += MINBPC(enc); 1439 ptr += MINBPC(enc); 1440 break; 1440 break; 1441 } 1441 } 1442 } 1442 } 1443 return XML_TOK_PARTIAL; 1443 return XML_TOK_PARTIAL; 1444 } 1444 } 1445 1445 1446 # endif /* XML_DTD */ 1446 # endif /* XML_DTD */ 1447 1447 1448 static int PTRCALL 1448 static int PTRCALL 1449 PREFIX(isPublicId)(const ENCODING *enc, const 1449 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, 1450 const char **badPtr) { 1450 const char **badPtr) { 1451 ptr += MINBPC(enc); 1451 ptr += MINBPC(enc); 1452 end -= MINBPC(enc); 1452 end -= MINBPC(enc); 1453 for (; HAS_CHAR(enc, ptr, end); ptr += MINB 1453 for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { 1454 switch (BYTE_TYPE(enc, ptr)) { 1454 switch (BYTE_TYPE(enc, ptr)) { 1455 case BT_DIGIT: 1455 case BT_DIGIT: 1456 case BT_HEX: 1456 case BT_HEX: 1457 case BT_MINUS: 1457 case BT_MINUS: 1458 case BT_APOS: 1458 case BT_APOS: 1459 case BT_LPAR: 1459 case BT_LPAR: 1460 case BT_RPAR: 1460 case BT_RPAR: 1461 case BT_PLUS: 1461 case BT_PLUS: 1462 case BT_COMMA: 1462 case BT_COMMA: 1463 case BT_SOL: 1463 case BT_SOL: 1464 case BT_EQUALS: 1464 case BT_EQUALS: 1465 case BT_QUEST: 1465 case BT_QUEST: 1466 case BT_CR: 1466 case BT_CR: 1467 case BT_LF: 1467 case BT_LF: 1468 case BT_SEMI: 1468 case BT_SEMI: 1469 case BT_EXCL: 1469 case BT_EXCL: 1470 case BT_AST: 1470 case BT_AST: 1471 case BT_PERCNT: 1471 case BT_PERCNT: 1472 case BT_NUM: 1472 case BT_NUM: 1473 # ifdef XML_NS 1473 # ifdef XML_NS 1474 case BT_COLON: 1474 case BT_COLON: 1475 # endif 1475 # endif 1476 break; 1476 break; 1477 case BT_S: 1477 case BT_S: 1478 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) 1478 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { 1479 *badPtr = ptr; 1479 *badPtr = ptr; 1480 return 0; 1480 return 0; 1481 } 1481 } 1482 break; 1482 break; 1483 case BT_NAME: 1483 case BT_NAME: 1484 case BT_NMSTRT: 1484 case BT_NMSTRT: 1485 if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f) 1485 if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f)) 1486 break; 1486 break; 1487 /* fall through */ 1487 /* fall through */ 1488 default: 1488 default: 1489 switch (BYTE_TO_ASCII(enc, ptr)) { 1489 switch (BYTE_TO_ASCII(enc, ptr)) { 1490 case 0x24: /* $ */ 1490 case 0x24: /* $ */ 1491 case 0x40: /* @ */ 1491 case 0x40: /* @ */ 1492 break; 1492 break; 1493 default: 1493 default: 1494 *badPtr = ptr; 1494 *badPtr = ptr; 1495 return 0; 1495 return 0; 1496 } 1496 } 1497 break; 1497 break; 1498 } 1498 } 1499 } 1499 } 1500 return 1; 1500 return 1; 1501 } 1501 } 1502 1502 1503 /* This must only be called for a well-formed 1503 /* This must only be called for a well-formed start-tag or empty 1504 element tag. Returns the number of attrib 1504 element tag. Returns the number of attributes. Pointers to the 1505 first attsMax attributes are stored in att 1505 first attsMax attributes are stored in atts. 1506 */ 1506 */ 1507 1507 1508 static int PTRCALL 1508 static int PTRCALL 1509 PREFIX(getAtts)(const ENCODING *enc, const ch 1509 PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax, 1510 ATTRIBUTE *atts) { 1510 ATTRIBUTE *atts) { 1511 enum { other, inName, inValue } state = inN 1511 enum { other, inName, inValue } state = inName; 1512 int nAtts = 0; 1512 int nAtts = 0; 1513 int open = 0; /* defined when state == inVa 1513 int open = 0; /* defined when state == inValue; 1514 initialization just to shu 1514 initialization just to shut up compilers */ 1515 1515 1516 for (ptr += MINBPC(enc);; ptr += MINBPC(enc 1516 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { 1517 switch (BYTE_TYPE(enc, ptr)) { 1517 switch (BYTE_TYPE(enc, ptr)) { 1518 # define START_NAME 1518 # define START_NAME \ 1519 if (state == other) { 1519 if (state == other) { \ 1520 if (nAtts < attsMax) { 1520 if (nAtts < attsMax) { \ 1521 atts[nAtts].name = ptr; 1521 atts[nAtts].name = ptr; \ 1522 atts[nAtts].normalized = 1; 1522 atts[nAtts].normalized = 1; \ 1523 } 1523 } \ 1524 state = inName; 1524 state = inName; \ 1525 } 1525 } 1526 # define LEAD_CASE(n) 1526 # define LEAD_CASE(n) \ 1527 case BT_LEAD##n: /* NOTE: The encoding has 1527 case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \ 1528 START_NAME ptr += (n - MINBPC(enc)); 1528 START_NAME ptr += (n - MINBPC(enc)); \ 1529 break; 1529 break; 1530 LEAD_CASE(2) 1530 LEAD_CASE(2) 1531 LEAD_CASE(3) 1531 LEAD_CASE(3) 1532 LEAD_CASE(4) 1532 LEAD_CASE(4) 1533 # undef LEAD_CASE 1533 # undef LEAD_CASE 1534 case BT_NONASCII: 1534 case BT_NONASCII: 1535 case BT_NMSTRT: 1535 case BT_NMSTRT: 1536 case BT_HEX: 1536 case BT_HEX: 1537 START_NAME 1537 START_NAME 1538 break; 1538 break; 1539 # undef START_NAME 1539 # undef START_NAME 1540 case BT_QUOT: 1540 case BT_QUOT: 1541 if (state != inValue) { 1541 if (state != inValue) { 1542 if (nAtts < attsMax) 1542 if (nAtts < attsMax) 1543 atts[nAtts].valuePtr = ptr + MINBPC 1543 atts[nAtts].valuePtr = ptr + MINBPC(enc); 1544 state = inValue; 1544 state = inValue; 1545 open = BT_QUOT; 1545 open = BT_QUOT; 1546 } else if (open == BT_QUOT) { 1546 } else if (open == BT_QUOT) { 1547 state = other; 1547 state = other; 1548 if (nAtts < attsMax) 1548 if (nAtts < attsMax) 1549 atts[nAtts].valueEnd = ptr; 1549 atts[nAtts].valueEnd = ptr; 1550 nAtts++; 1550 nAtts++; 1551 } 1551 } 1552 break; 1552 break; 1553 case BT_APOS: 1553 case BT_APOS: 1554 if (state != inValue) { 1554 if (state != inValue) { 1555 if (nAtts < attsMax) 1555 if (nAtts < attsMax) 1556 atts[nAtts].valuePtr = ptr + MINBPC 1556 atts[nAtts].valuePtr = ptr + MINBPC(enc); 1557 state = inValue; 1557 state = inValue; 1558 open = BT_APOS; 1558 open = BT_APOS; 1559 } else if (open == BT_APOS) { 1559 } else if (open == BT_APOS) { 1560 state = other; 1560 state = other; 1561 if (nAtts < attsMax) 1561 if (nAtts < attsMax) 1562 atts[nAtts].valueEnd = ptr; 1562 atts[nAtts].valueEnd = ptr; 1563 nAtts++; 1563 nAtts++; 1564 } 1564 } 1565 break; 1565 break; 1566 case BT_AMP: 1566 case BT_AMP: 1567 if (nAtts < attsMax) 1567 if (nAtts < attsMax) 1568 atts[nAtts].normalized = 0; 1568 atts[nAtts].normalized = 0; 1569 break; 1569 break; 1570 case BT_S: 1570 case BT_S: 1571 if (state == inName) 1571 if (state == inName) 1572 state = other; 1572 state = other; 1573 else if (state == inValue && nAtts < at 1573 else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized 1574 && (ptr == atts[nAtts].valuePt 1574 && (ptr == atts[nAtts].valuePtr 1575 || BYTE_TO_ASCII(enc, ptr) 1575 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE 1576 || BYTE_TO_ASCII(enc, ptr 1576 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE 1577 || BYTE_TYPE(enc, ptr + MI 1577 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) 1578 atts[nAtts].normalized = 0; 1578 atts[nAtts].normalized = 0; 1579 break; 1579 break; 1580 case BT_CR: 1580 case BT_CR: 1581 case BT_LF: 1581 case BT_LF: 1582 /* This case ensures that the first att 1582 /* This case ensures that the first attribute name is counted 1583 Apart from that we could just change 1583 Apart from that we could just change state on the quote. */ 1584 if (state == inName) 1584 if (state == inName) 1585 state = other; 1585 state = other; 1586 else if (state == inValue && nAtts < at 1586 else if (state == inValue && nAtts < attsMax) 1587 atts[nAtts].normalized = 0; 1587 atts[nAtts].normalized = 0; 1588 break; 1588 break; 1589 case BT_GT: 1589 case BT_GT: 1590 case BT_SOL: 1590 case BT_SOL: 1591 if (state != inValue) 1591 if (state != inValue) 1592 return nAtts; 1592 return nAtts; 1593 break; 1593 break; 1594 default: 1594 default: 1595 break; 1595 break; 1596 } 1596 } 1597 } 1597 } 1598 /* not reached */ 1598 /* not reached */ 1599 } 1599 } 1600 1600 1601 static int PTRFASTCALL 1601 static int PTRFASTCALL 1602 PREFIX(charRefNumber)(const ENCODING *enc, co 1602 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { 1603 int result = 0; 1603 int result = 0; 1604 /* skip &# */ 1604 /* skip &# */ 1605 UNUSED_P(enc); 1605 UNUSED_P(enc); 1606 ptr += 2 * MINBPC(enc); 1606 ptr += 2 * MINBPC(enc); 1607 if (CHAR_MATCHES(enc, ptr, ASCII_x)) { 1607 if (CHAR_MATCHES(enc, ptr, ASCII_x)) { 1608 for (ptr += MINBPC(enc); ! CHAR_MATCHES(e 1608 for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); 1609 ptr += MINBPC(enc)) { 1609 ptr += MINBPC(enc)) { 1610 int c = BYTE_TO_ASCII(enc, ptr); 1610 int c = BYTE_TO_ASCII(enc, ptr); 1611 switch (c) { 1611 switch (c) { 1612 case ASCII_0: 1612 case ASCII_0: 1613 case ASCII_1: 1613 case ASCII_1: 1614 case ASCII_2: 1614 case ASCII_2: 1615 case ASCII_3: 1615 case ASCII_3: 1616 case ASCII_4: 1616 case ASCII_4: 1617 case ASCII_5: 1617 case ASCII_5: 1618 case ASCII_6: 1618 case ASCII_6: 1619 case ASCII_7: 1619 case ASCII_7: 1620 case ASCII_8: 1620 case ASCII_8: 1621 case ASCII_9: 1621 case ASCII_9: 1622 result <<= 4; 1622 result <<= 4; 1623 result |= (c - ASCII_0); 1623 result |= (c - ASCII_0); 1624 break; 1624 break; 1625 case ASCII_A: 1625 case ASCII_A: 1626 case ASCII_B: 1626 case ASCII_B: 1627 case ASCII_C: 1627 case ASCII_C: 1628 case ASCII_D: 1628 case ASCII_D: 1629 case ASCII_E: 1629 case ASCII_E: 1630 case ASCII_F: 1630 case ASCII_F: 1631 result <<= 4; 1631 result <<= 4; 1632 result += 10 + (c - ASCII_A); 1632 result += 10 + (c - ASCII_A); 1633 break; 1633 break; 1634 case ASCII_a: 1634 case ASCII_a: 1635 case ASCII_b: 1635 case ASCII_b: 1636 case ASCII_c: 1636 case ASCII_c: 1637 case ASCII_d: 1637 case ASCII_d: 1638 case ASCII_e: 1638 case ASCII_e: 1639 case ASCII_f: 1639 case ASCII_f: 1640 result <<= 4; 1640 result <<= 4; 1641 result += 10 + (c - ASCII_a); 1641 result += 10 + (c - ASCII_a); 1642 break; 1642 break; 1643 } 1643 } 1644 if (result >= 0x110000) 1644 if (result >= 0x110000) 1645 return -1; 1645 return -1; 1646 } 1646 } 1647 } else { 1647 } else { 1648 for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEM 1648 for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { 1649 int c = BYTE_TO_ASCII(enc, ptr); 1649 int c = BYTE_TO_ASCII(enc, ptr); 1650 result *= 10; 1650 result *= 10; 1651 result += (c - ASCII_0); 1651 result += (c - ASCII_0); 1652 if (result >= 0x110000) 1652 if (result >= 0x110000) 1653 return -1; 1653 return -1; 1654 } 1654 } 1655 } 1655 } 1656 return checkCharRefNumber(result); 1656 return checkCharRefNumber(result); 1657 } 1657 } 1658 1658 1659 static int PTRCALL 1659 static int PTRCALL 1660 PREFIX(predefinedEntityName)(const ENCODING * 1660 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, 1661 const char *end) 1661 const char *end) { 1662 UNUSED_P(enc); 1662 UNUSED_P(enc); 1663 switch ((end - ptr) / MINBPC(enc)) { 1663 switch ((end - ptr) / MINBPC(enc)) { 1664 case 2: 1664 case 2: 1665 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), 1665 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { 1666 switch (BYTE_TO_ASCII(enc, ptr)) { 1666 switch (BYTE_TO_ASCII(enc, ptr)) { 1667 case ASCII_l: 1667 case ASCII_l: 1668 return ASCII_LT; 1668 return ASCII_LT; 1669 case ASCII_g: 1669 case ASCII_g: 1670 return ASCII_GT; 1670 return ASCII_GT; 1671 } 1671 } 1672 } 1672 } 1673 break; 1673 break; 1674 case 3: 1674 case 3: 1675 if (CHAR_MATCHES(enc, ptr, ASCII_a)) { 1675 if (CHAR_MATCHES(enc, ptr, ASCII_a)) { 1676 ptr += MINBPC(enc); 1676 ptr += MINBPC(enc); 1677 if (CHAR_MATCHES(enc, ptr, ASCII_m)) { 1677 if (CHAR_MATCHES(enc, ptr, ASCII_m)) { 1678 ptr += MINBPC(enc); 1678 ptr += MINBPC(enc); 1679 if (CHAR_MATCHES(enc, ptr, ASCII_p)) 1679 if (CHAR_MATCHES(enc, ptr, ASCII_p)) 1680 return ASCII_AMP; 1680 return ASCII_AMP; 1681 } 1681 } 1682 } 1682 } 1683 break; 1683 break; 1684 case 4: 1684 case 4: 1685 switch (BYTE_TO_ASCII(enc, ptr)) { 1685 switch (BYTE_TO_ASCII(enc, ptr)) { 1686 case ASCII_q: 1686 case ASCII_q: 1687 ptr += MINBPC(enc); 1687 ptr += MINBPC(enc); 1688 if (CHAR_MATCHES(enc, ptr, ASCII_u)) { 1688 if (CHAR_MATCHES(enc, ptr, ASCII_u)) { 1689 ptr += MINBPC(enc); 1689 ptr += MINBPC(enc); 1690 if (CHAR_MATCHES(enc, ptr, ASCII_o)) 1690 if (CHAR_MATCHES(enc, ptr, ASCII_o)) { 1691 ptr += MINBPC(enc); 1691 ptr += MINBPC(enc); 1692 if (CHAR_MATCHES(enc, ptr, ASCII_t) 1692 if (CHAR_MATCHES(enc, ptr, ASCII_t)) 1693 return ASCII_QUOT; 1693 return ASCII_QUOT; 1694 } 1694 } 1695 } 1695 } 1696 break; 1696 break; 1697 case ASCII_a: 1697 case ASCII_a: 1698 ptr += MINBPC(enc); 1698 ptr += MINBPC(enc); 1699 if (CHAR_MATCHES(enc, ptr, ASCII_p)) { 1699 if (CHAR_MATCHES(enc, ptr, ASCII_p)) { 1700 ptr += MINBPC(enc); 1700 ptr += MINBPC(enc); 1701 if (CHAR_MATCHES(enc, ptr, ASCII_o)) 1701 if (CHAR_MATCHES(enc, ptr, ASCII_o)) { 1702 ptr += MINBPC(enc); 1702 ptr += MINBPC(enc); 1703 if (CHAR_MATCHES(enc, ptr, ASCII_s) 1703 if (CHAR_MATCHES(enc, ptr, ASCII_s)) 1704 return ASCII_APOS; 1704 return ASCII_APOS; 1705 } 1705 } 1706 } 1706 } 1707 break; 1707 break; 1708 } 1708 } 1709 } 1709 } 1710 return 0; 1710 return 0; 1711 } 1711 } 1712 1712 1713 static int PTRCALL 1713 static int PTRCALL 1714 PREFIX(nameMatchesAscii)(const ENCODING *enc, 1714 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, 1715 const char *end1, co 1715 const char *end1, const char *ptr2) { 1716 UNUSED_P(enc); 1716 UNUSED_P(enc); 1717 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) 1717 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { 1718 if (end1 - ptr1 < MINBPC(enc)) { 1718 if (end1 - ptr1 < MINBPC(enc)) { 1719 /* This line cannot be executed. The i 1719 /* This line cannot be executed. The incoming data has already 1720 * been tokenized once, so incomplete c 1720 * been tokenized once, so incomplete characters like this have 1721 * already been eliminated from the inp 1721 * already been eliminated from the input. Retaining the 1722 * paranoia check is still valuable, ho 1722 * paranoia check is still valuable, however. 1723 */ 1723 */ 1724 return 0; /* LCOV_EXCL_LINE */ 1724 return 0; /* LCOV_EXCL_LINE */ 1725 } 1725 } 1726 if (! CHAR_MATCHES(enc, ptr1, *ptr2)) 1726 if (! CHAR_MATCHES(enc, ptr1, *ptr2)) 1727 return 0; 1727 return 0; 1728 } 1728 } 1729 return ptr1 == end1; 1729 return ptr1 == end1; 1730 } 1730 } 1731 1731 1732 static int PTRFASTCALL 1732 static int PTRFASTCALL 1733 PREFIX(nameLength)(const ENCODING *enc, const 1733 PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { 1734 const char *start = ptr; 1734 const char *start = ptr; 1735 for (;;) { 1735 for (;;) { 1736 switch (BYTE_TYPE(enc, ptr)) { 1736 switch (BYTE_TYPE(enc, ptr)) { 1737 # define LEAD_CASE(n) 1737 # define LEAD_CASE(n) \ 1738 case BT_LEAD##n: 1738 case BT_LEAD##n: \ 1739 ptr += n; /* NOTE: The encoding has alrea 1739 ptr += n; /* NOTE: The encoding has already been validated. */ \ 1740 break; 1740 break; 1741 LEAD_CASE(2) 1741 LEAD_CASE(2) 1742 LEAD_CASE(3) 1742 LEAD_CASE(3) 1743 LEAD_CASE(4) 1743 LEAD_CASE(4) 1744 # undef LEAD_CASE 1744 # undef LEAD_CASE 1745 case BT_NONASCII: 1745 case BT_NONASCII: 1746 case BT_NMSTRT: 1746 case BT_NMSTRT: 1747 # ifdef XML_NS 1747 # ifdef XML_NS 1748 case BT_COLON: 1748 case BT_COLON: 1749 # endif 1749 # endif 1750 case BT_HEX: 1750 case BT_HEX: 1751 case BT_DIGIT: 1751 case BT_DIGIT: 1752 case BT_NAME: 1752 case BT_NAME: 1753 case BT_MINUS: 1753 case BT_MINUS: 1754 ptr += MINBPC(enc); 1754 ptr += MINBPC(enc); 1755 break; 1755 break; 1756 default: 1756 default: 1757 return (int)(ptr - start); 1757 return (int)(ptr - start); 1758 } 1758 } 1759 } 1759 } 1760 } 1760 } 1761 1761 1762 static const char *PTRFASTCALL 1762 static const char *PTRFASTCALL 1763 PREFIX(skipS)(const ENCODING *enc, const char 1763 PREFIX(skipS)(const ENCODING *enc, const char *ptr) { 1764 for (;;) { 1764 for (;;) { 1765 switch (BYTE_TYPE(enc, ptr)) { 1765 switch (BYTE_TYPE(enc, ptr)) { 1766 case BT_LF: 1766 case BT_LF: 1767 case BT_CR: 1767 case BT_CR: 1768 case BT_S: 1768 case BT_S: 1769 ptr += MINBPC(enc); 1769 ptr += MINBPC(enc); 1770 break; 1770 break; 1771 default: 1771 default: 1772 return ptr; 1772 return ptr; 1773 } 1773 } 1774 } 1774 } 1775 } 1775 } 1776 1776 1777 static void PTRCALL 1777 static void PTRCALL 1778 PREFIX(updatePosition)(const ENCODING *enc, c 1778 PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, 1779 POSITION *pos) { 1779 POSITION *pos) { 1780 while (HAS_CHAR(enc, ptr, end)) { 1780 while (HAS_CHAR(enc, ptr, end)) { 1781 switch (BYTE_TYPE(enc, ptr)) { 1781 switch (BYTE_TYPE(enc, ptr)) { 1782 # define LEAD_CASE(n) 1782 # define LEAD_CASE(n) \ 1783 case BT_LEAD##n: 1783 case BT_LEAD##n: \ 1784 ptr += n; /* NOTE: The encoding has alrea 1784 ptr += n; /* NOTE: The encoding has already been validated. */ \ 1785 pos->columnNumber++; 1785 pos->columnNumber++; \ 1786 break; 1786 break; 1787 LEAD_CASE(2) 1787 LEAD_CASE(2) 1788 LEAD_CASE(3) 1788 LEAD_CASE(3) 1789 LEAD_CASE(4) 1789 LEAD_CASE(4) 1790 # undef LEAD_CASE 1790 # undef LEAD_CASE 1791 case BT_LF: 1791 case BT_LF: 1792 pos->columnNumber = 0; 1792 pos->columnNumber = 0; 1793 pos->lineNumber++; 1793 pos->lineNumber++; 1794 ptr += MINBPC(enc); 1794 ptr += MINBPC(enc); 1795 break; 1795 break; 1796 case BT_CR: 1796 case BT_CR: 1797 pos->lineNumber++; 1797 pos->lineNumber++; 1798 ptr += MINBPC(enc); 1798 ptr += MINBPC(enc); 1799 if (HAS_CHAR(enc, ptr, end) && BYTE_TYP 1799 if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) 1800 ptr += MINBPC(enc); 1800 ptr += MINBPC(enc); 1801 pos->columnNumber = 0; 1801 pos->columnNumber = 0; 1802 break; 1802 break; 1803 default: 1803 default: 1804 ptr += MINBPC(enc); 1804 ptr += MINBPC(enc); 1805 pos->columnNumber++; 1805 pos->columnNumber++; 1806 break; 1806 break; 1807 } 1807 } 1808 } 1808 } 1809 } 1809 } 1810 1810 1811 # undef DO_LEAD_CASE 1811 # undef DO_LEAD_CASE 1812 # undef MULTIBYTE_CASES 1812 # undef MULTIBYTE_CASES 1813 # undef INVALID_CASES 1813 # undef INVALID_CASES 1814 # undef CHECK_NAME_CASE 1814 # undef CHECK_NAME_CASE 1815 # undef CHECK_NAME_CASES 1815 # undef CHECK_NAME_CASES 1816 # undef CHECK_NMSTRT_CASE 1816 # undef CHECK_NMSTRT_CASE 1817 # undef CHECK_NMSTRT_CASES 1817 # undef CHECK_NMSTRT_CASES 1818 1818 1819 #endif /* XML_TOK_IMPL_C */ 1819 #endif /* XML_TOK_IMPL_C */ 1820 1820