Geant4 Cross Reference |
1 /* This file is included (from xmltok.c, 1-3 t << 1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 2 __ __ << 2 See the file COPYING for copying permission. 3 ___\ \/ /_ __ __ _| << 4 / _ \\ /| '_ \ / _` | << 5 | __// \| |_) | (_| | << 6 \___/_/\_\ .__/ \__,_| << 7 |_| XML parse << 8 << 9 Copyright (c) 1997-2000 Thai Open Source So << 10 Copyright (c) 2000 Clark Cooper <coope << 11 Copyright (c) 2002 Fred L. Drake, Jr. << 12 Copyright (c) 2002-2016 Karl Waclawek <karl << 13 Copyright (c) 2016-2022 Sebastian Pipping < << 14 Copyright (c) 2017 Rhodri James <rhodr << 15 Copyright (c) 2018 Benjamin Peterson < << 16 Copyright (c) 2018 Anton Maklakov <ant << 17 Copyright (c) 2019 David Loffredo <lof << 18 Copyright (c) 2020 Boris Kolpackov <bo << 19 Copyright (c) 2022 Martin Ettl <ettl.m << 20 Licensed under the MIT license: << 21 << 22 Permission is hereby granted, free of cha << 23 a copy of this software and associat << 24 "Software"), to deal in the Software w << 25 without limitation the rights to use, c << 26 distribute, sublicense, and/or sell copies << 27 persons to whom the Software is furnish << 28 following conditions: << 29 << 30 The above copyright notice and this permis << 31 in all copies or substantial portions of th << 32 << 33 THE SOFTWARE IS PROVIDED "AS IS", WIT << 34 EXPRESS OR IMPLIED, INCLUDING BUT NOT L << 35 MERCHANTABILITY, FITNESS FOR A PARTICULAR P << 36 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HO << 37 DAMAGES OR OTHER LIABILITY, WHETHER IN AN << 38 OTHERWISE, ARISING FROM, OUT OF OR IN CONNE << 39 USE OR OTHER DEALINGS IN THE SOFTWARE. << 40 */ 3 */ 41 4 >> 5 /* This file is included! */ 42 #ifdef XML_TOK_IMPL_C 6 #ifdef XML_TOK_IMPL_C 43 7 44 # ifndef IS_INVALID_CHAR // i.e. for UTF-16 a << 8 #ifndef IS_INVALID_CHAR 45 # define IS_INVALID_CHAR(enc, ptr, n) (0) << 9 #define IS_INVALID_CHAR(enc, ptr, n) (0) 46 # endif << 10 #endif 47 << 11 48 # define INVALID_LEAD_CASE(n, ptr, nextTokPtr << 12 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ 49 case BT_LEAD##n: << 13 case BT_LEAD ## n: \ 50 if (end - ptr < n) << 14 if (end - ptr < n) \ 51 return XML_TOK_PARTIAL_CHAR; << 15 return XML_TOK_PARTIAL_CHAR; \ 52 if (IS_INVALID_CHAR(enc, ptr, n)) { << 16 if (IS_INVALID_CHAR(enc, ptr, n)) { \ 53 *(nextTokPtr) = (ptr); << 17 *(nextTokPtr) = (ptr); \ 54 return XML_TOK_INVALID; << 18 return XML_TOK_INVALID; \ 55 } << 19 } \ 56 ptr += n; << 20 ptr += n; \ 57 break; << 21 break; 58 << 22 59 # define INVALID_CASES(ptr, nextTokPtr) << 23 #define INVALID_CASES(ptr, nextTokPtr) \ 60 INVALID_LEAD_CASE(2, ptr, nextTokPtr) << 24 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ 61 INVALID_LEAD_CASE(3, ptr, nextTokPtr) << 25 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ 62 INVALID_LEAD_CASE(4, ptr, nextTokPtr) << 26 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ 63 case BT_NONXML: << 27 case BT_NONXML: \ 64 case BT_MALFORM: << 28 case BT_MALFORM: \ 65 case BT_TRAIL: << 29 case BT_TRAIL: \ 66 *(nextTokPtr) = (ptr); << 30 *(nextTokPtr) = (ptr); \ 67 return XML_TOK_INVALID; 31 return XML_TOK_INVALID; 68 32 69 # define CHECK_NAME_CASE(n, enc, ptr, end, ne << 33 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ 70 case BT_LEAD##n: << 34 case BT_LEAD ## n: \ 71 if (end - ptr < n) << 35 if (end - ptr < n) \ 72 return XML_TOK_PARTIAL_CHAR; << 36 return XML_TOK_PARTIAL_CHAR; \ 73 if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_N << 37 if (!IS_NAME_CHAR(enc, ptr, n)) { \ 74 *nextTokPtr = ptr; << 38 *nextTokPtr = ptr; \ 75 return XML_TOK_INVALID; << 39 return XML_TOK_INVALID; \ 76 } << 40 } \ 77 ptr += n; << 41 ptr += n; \ 78 break; << 42 break; 79 << 43 80 # define CHECK_NAME_CASES(enc, ptr, end, next << 44 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ 81 case BT_NONASCII: << 45 case BT_NONASCII: \ 82 if (! IS_NAME_CHAR_MINBPC(enc, ptr)) { << 46 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ 83 *nextTokPtr = ptr; << 47 *nextTokPtr = ptr; \ 84 return XML_TOK_INVALID; << 48 return XML_TOK_INVALID; \ 85 } << 49 } \ 86 /* fall through */ << 50 case BT_NMSTRT: \ 87 case BT_NMSTRT: << 51 case BT_HEX: \ 88 case BT_HEX: << 52 case BT_DIGIT: \ 89 case BT_DIGIT: << 53 case BT_NAME: \ 90 case BT_NAME: << 54 case BT_MINUS: \ 91 case BT_MINUS: << 55 ptr += MINBPC(enc); \ 92 ptr += MINBPC(enc); << 56 break; \ 93 break; << 57 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ 94 CHECK_NAME_CASE(2, enc, ptr, end, nextTokP << 58 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ 95 CHECK_NAME_CASE(3, enc, ptr, end, nextTokP << 59 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) 96 CHECK_NAME_CASE(4, enc, ptr, end, nextTokP << 60 97 << 61 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ 98 # define CHECK_NMSTRT_CASE(n, enc, ptr, end, << 62 case BT_LEAD ## n: \ 99 case BT_LEAD##n: << 63 if (end - ptr < n) \ 100 if ((end) - (ptr) < (n)) << 64 return XML_TOK_PARTIAL_CHAR; \ 101 return XML_TOK_PARTIAL_CHAR; << 65 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ 102 if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_N << 66 *nextTokPtr = ptr; \ 103 *nextTokPtr = ptr; << 67 return XML_TOK_INVALID; \ 104 return XML_TOK_INVALID; << 68 } \ 105 } << 69 ptr += n; \ 106 ptr += n; << 70 break; 107 break; << 71 108 << 72 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ 109 # define CHECK_NMSTRT_CASES(enc, ptr, end, ne << 73 case BT_NONASCII: \ 110 case BT_NONASCII: << 74 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ 111 if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { << 75 *nextTokPtr = ptr; \ 112 *nextTokPtr = ptr; << 76 return XML_TOK_INVALID; \ 113 return XML_TOK_INVALID; << 77 } \ 114 } << 78 case BT_NMSTRT: \ 115 /* fall through */ << 79 case BT_HEX: \ 116 case BT_NMSTRT: << 80 ptr += MINBPC(enc); \ 117 case BT_HEX: << 81 break; \ 118 ptr += MINBPC(enc); << 82 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ 119 break; << 83 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ 120 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTo << 84 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) 121 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTo << 85 122 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTo << 86 #ifndef PREFIX 123 << 87 #define PREFIX(ident) ident 124 # ifndef PREFIX << 88 #endif 125 # define PREFIX(ident) ident << 126 # endif << 127 << 128 # define HAS_CHARS(enc, ptr, end, count) << 129 ((end) - (ptr) >= ((count)*MINBPC(enc))) << 130 << 131 # define HAS_CHAR(enc, ptr, end) HAS_CHARS(en << 132 << 133 # define REQUIRE_CHARS(enc, ptr, end, count) << 134 { << 135 if (! HAS_CHARS(enc, ptr, end, count)) { << 136 return XML_TOK_PARTIAL; << 137 } << 138 } << 139 << 140 # define REQUIRE_CHAR(enc, ptr, end) REQUIRE_ << 141 89 142 /* ptr points to character following "<!-" */ 90 /* ptr points to character following "<!-" */ 143 91 144 static int PTRCALL 92 static int PTRCALL 145 PREFIX(scanComment)(const ENCODING *enc, const << 93 PREFIX(scanComment)(const ENCODING *enc, const char *ptr, 146 const char **nextTokPtr) { << 94 const char *end, const char **nextTokPtr) 147 if (HAS_CHAR(enc, ptr, end)) { << 95 { 148 if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) << 96 if (ptr != end) { >> 97 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 149 *nextTokPtr = ptr; 98 *nextTokPtr = ptr; 150 return XML_TOK_INVALID; 99 return XML_TOK_INVALID; 151 } 100 } 152 ptr += MINBPC(enc); 101 ptr += MINBPC(enc); 153 while (HAS_CHAR(enc, ptr, end)) { << 102 while (ptr != end) { 154 switch (BYTE_TYPE(enc, ptr)) { 103 switch (BYTE_TYPE(enc, ptr)) { 155 INVALID_CASES(ptr, nextTokPtr) << 104 INVALID_CASES(ptr, nextTokPtr) 156 case BT_MINUS: 105 case BT_MINUS: 157 ptr += MINBPC(enc); << 106 if ((ptr += MINBPC(enc)) == end) 158 REQUIRE_CHAR(enc, ptr, end); << 107 return XML_TOK_PARTIAL; 159 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS 108 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 160 ptr += MINBPC(enc); << 109 if ((ptr += MINBPC(enc)) == end) 161 REQUIRE_CHAR(enc, ptr, end); << 110 return XML_TOK_PARTIAL; 162 if (! CHAR_MATCHES(enc, ptr, ASCII_G << 111 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 163 *nextTokPtr = ptr; 112 *nextTokPtr = ptr; 164 return XML_TOK_INVALID; 113 return XML_TOK_INVALID; 165 } 114 } 166 *nextTokPtr = ptr + MINBPC(enc); 115 *nextTokPtr = ptr + MINBPC(enc); 167 return XML_TOK_COMMENT; 116 return XML_TOK_COMMENT; 168 } 117 } 169 break; 118 break; 170 default: 119 default: 171 ptr += MINBPC(enc); 120 ptr += MINBPC(enc); 172 break; 121 break; 173 } 122 } 174 } 123 } 175 } 124 } 176 return XML_TOK_PARTIAL; 125 return XML_TOK_PARTIAL; 177 } 126 } 178 127 179 /* ptr points to character following "<!" */ 128 /* ptr points to character following "<!" */ 180 129 181 static int PTRCALL 130 static int PTRCALL 182 PREFIX(scanDecl)(const ENCODING *enc, const ch << 131 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, 183 const char **nextTokPtr) { << 132 const char *end, const char **nextTokPtr) 184 REQUIRE_CHAR(enc, ptr, end); << 133 { >> 134 if (ptr == end) >> 135 return XML_TOK_PARTIAL; 185 switch (BYTE_TYPE(enc, ptr)) { 136 switch (BYTE_TYPE(enc, ptr)) { 186 case BT_MINUS: 137 case BT_MINUS: 187 return PREFIX(scanComment)(enc, ptr + MINB 138 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 188 case BT_LSQB: 139 case BT_LSQB: 189 *nextTokPtr = ptr + MINBPC(enc); 140 *nextTokPtr = ptr + MINBPC(enc); 190 return XML_TOK_COND_SECT_OPEN; 141 return XML_TOK_COND_SECT_OPEN; 191 case BT_NMSTRT: 142 case BT_NMSTRT: 192 case BT_HEX: 143 case BT_HEX: 193 ptr += MINBPC(enc); 144 ptr += MINBPC(enc); 194 break; 145 break; 195 default: 146 default: 196 *nextTokPtr = ptr; 147 *nextTokPtr = ptr; 197 return XML_TOK_INVALID; 148 return XML_TOK_INVALID; 198 } 149 } 199 while (HAS_CHAR(enc, ptr, end)) { << 150 while (ptr != end) { 200 switch (BYTE_TYPE(enc, ptr)) { 151 switch (BYTE_TYPE(enc, ptr)) { 201 case BT_PERCNT: 152 case BT_PERCNT: 202 REQUIRE_CHARS(enc, ptr, end, 2); << 153 if (ptr + MINBPC(enc) == end) >> 154 return XML_TOK_PARTIAL; 203 /* don't allow <!ENTITY% foo "whatever"> 155 /* don't allow <!ENTITY% foo "whatever"> */ 204 switch (BYTE_TYPE(enc, ptr + MINBPC(enc) 156 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { 205 case BT_S: << 157 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: 206 case BT_CR: << 207 case BT_LF: << 208 case BT_PERCNT: << 209 *nextTokPtr = ptr; 158 *nextTokPtr = ptr; 210 return XML_TOK_INVALID; 159 return XML_TOK_INVALID; 211 } 160 } 212 /* fall through */ 161 /* fall through */ 213 case BT_S: << 162 case BT_S: case BT_CR: case BT_LF: 214 case BT_CR: << 215 case BT_LF: << 216 *nextTokPtr = ptr; 163 *nextTokPtr = ptr; 217 return XML_TOK_DECL_OPEN; 164 return XML_TOK_DECL_OPEN; 218 case BT_NMSTRT: 165 case BT_NMSTRT: 219 case BT_HEX: 166 case BT_HEX: 220 ptr += MINBPC(enc); 167 ptr += MINBPC(enc); 221 break; 168 break; 222 default: 169 default: 223 *nextTokPtr = ptr; 170 *nextTokPtr = ptr; 224 return XML_TOK_INVALID; 171 return XML_TOK_INVALID; 225 } 172 } 226 } 173 } 227 return XML_TOK_PARTIAL; 174 return XML_TOK_PARTIAL; 228 } 175 } 229 176 230 static int PTRCALL 177 static int PTRCALL 231 PREFIX(checkPiTarget)(const ENCODING *enc, con << 178 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, 232 int *tokPtr) { << 179 const char *end, int *tokPtr) >> 180 { 233 int upper = 0; 181 int upper = 0; 234 UNUSED_P(enc); << 235 *tokPtr = XML_TOK_PI; 182 *tokPtr = XML_TOK_PI; 236 if (end - ptr != MINBPC(enc) * 3) << 183 if (end - ptr != MINBPC(enc)*3) 237 return 1; 184 return 1; 238 switch (BYTE_TO_ASCII(enc, ptr)) { 185 switch (BYTE_TO_ASCII(enc, ptr)) { 239 case ASCII_x: 186 case ASCII_x: 240 break; 187 break; 241 case ASCII_X: 188 case ASCII_X: 242 upper = 1; 189 upper = 1; 243 break; 190 break; 244 default: 191 default: 245 return 1; 192 return 1; 246 } 193 } 247 ptr += MINBPC(enc); 194 ptr += MINBPC(enc); 248 switch (BYTE_TO_ASCII(enc, ptr)) { 195 switch (BYTE_TO_ASCII(enc, ptr)) { 249 case ASCII_m: 196 case ASCII_m: 250 break; 197 break; 251 case ASCII_M: 198 case ASCII_M: 252 upper = 1; 199 upper = 1; 253 break; 200 break; 254 default: 201 default: 255 return 1; 202 return 1; 256 } 203 } 257 ptr += MINBPC(enc); 204 ptr += MINBPC(enc); 258 switch (BYTE_TO_ASCII(enc, ptr)) { 205 switch (BYTE_TO_ASCII(enc, ptr)) { 259 case ASCII_l: 206 case ASCII_l: 260 break; 207 break; 261 case ASCII_L: 208 case ASCII_L: 262 upper = 1; 209 upper = 1; 263 break; 210 break; 264 default: 211 default: 265 return 1; 212 return 1; 266 } 213 } 267 if (upper) 214 if (upper) 268 return 0; 215 return 0; 269 *tokPtr = XML_TOK_XML_DECL; 216 *tokPtr = XML_TOK_XML_DECL; 270 return 1; 217 return 1; 271 } 218 } 272 219 273 /* ptr points to character following "<?" */ 220 /* ptr points to character following "<?" */ 274 221 275 static int PTRCALL 222 static int PTRCALL 276 PREFIX(scanPi)(const ENCODING *enc, const char << 223 PREFIX(scanPi)(const ENCODING *enc, const char *ptr, 277 const char **nextTokPtr) { << 224 const char *end, const char **nextTokPtr) >> 225 { 278 int tok; 226 int tok; 279 const char *target = ptr; 227 const char *target = ptr; 280 REQUIRE_CHAR(enc, ptr, end); << 228 if (ptr == end) >> 229 return XML_TOK_PARTIAL; 281 switch (BYTE_TYPE(enc, ptr)) { 230 switch (BYTE_TYPE(enc, ptr)) { 282 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP << 231 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 283 default: 232 default: 284 *nextTokPtr = ptr; 233 *nextTokPtr = ptr; 285 return XML_TOK_INVALID; 234 return XML_TOK_INVALID; 286 } 235 } 287 while (HAS_CHAR(enc, ptr, end)) { << 236 while (ptr != end) { 288 switch (BYTE_TYPE(enc, ptr)) { 237 switch (BYTE_TYPE(enc, ptr)) { 289 CHECK_NAME_CASES(enc, ptr, end, nextTokP << 238 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 290 case BT_S: << 239 case BT_S: case BT_CR: case BT_LF: 291 case BT_CR: << 240 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 292 case BT_LF: << 293 if (! PREFIX(checkPiTarget)(enc, target, << 294 *nextTokPtr = ptr; 241 *nextTokPtr = ptr; 295 return XML_TOK_INVALID; 242 return XML_TOK_INVALID; 296 } 243 } 297 ptr += MINBPC(enc); 244 ptr += MINBPC(enc); 298 while (HAS_CHAR(enc, ptr, end)) { << 245 while (ptr != end) { 299 switch (BYTE_TYPE(enc, ptr)) { 246 switch (BYTE_TYPE(enc, ptr)) { 300 INVALID_CASES(ptr, nextTokPtr) << 247 INVALID_CASES(ptr, nextTokPtr) 301 case BT_QUEST: 248 case BT_QUEST: 302 ptr += MINBPC(enc); 249 ptr += MINBPC(enc); 303 REQUIRE_CHAR(enc, ptr, end); << 250 if (ptr == end) >> 251 return XML_TOK_PARTIAL; 304 if (CHAR_MATCHES(enc, ptr, ASCII_GT) 252 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 305 *nextTokPtr = ptr + MINBPC(enc); 253 *nextTokPtr = ptr + MINBPC(enc); 306 return tok; 254 return tok; 307 } 255 } 308 break; 256 break; 309 default: 257 default: 310 ptr += MINBPC(enc); 258 ptr += MINBPC(enc); 311 break; 259 break; 312 } 260 } 313 } 261 } 314 return XML_TOK_PARTIAL; 262 return XML_TOK_PARTIAL; 315 case BT_QUEST: 263 case BT_QUEST: 316 if (! PREFIX(checkPiTarget)(enc, target, << 264 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 317 *nextTokPtr = ptr; 265 *nextTokPtr = ptr; 318 return XML_TOK_INVALID; 266 return XML_TOK_INVALID; 319 } 267 } 320 ptr += MINBPC(enc); 268 ptr += MINBPC(enc); 321 REQUIRE_CHAR(enc, ptr, end); << 269 if (ptr == end) >> 270 return XML_TOK_PARTIAL; 322 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 271 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 323 *nextTokPtr = ptr + MINBPC(enc); 272 *nextTokPtr = ptr + MINBPC(enc); 324 return tok; 273 return tok; 325 } 274 } 326 /* fall through */ 275 /* fall through */ 327 default: 276 default: 328 *nextTokPtr = ptr; 277 *nextTokPtr = ptr; 329 return XML_TOK_INVALID; 278 return XML_TOK_INVALID; 330 } 279 } 331 } 280 } 332 return XML_TOK_PARTIAL; 281 return XML_TOK_PARTIAL; 333 } 282 } 334 283 335 static int PTRCALL 284 static int PTRCALL 336 PREFIX(scanCdataSection)(const ENCODING *enc, << 285 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, 337 const char **nextTokP << 286 const char *end, const char **nextTokPtr) 338 static const char CDATA_LSQB[] << 287 { 339 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, A << 288 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, >> 289 ASCII_T, ASCII_A, ASCII_LSQB }; 340 int i; 290 int i; 341 UNUSED_P(enc); << 342 /* CDATA[ */ 291 /* CDATA[ */ 343 REQUIRE_CHARS(enc, ptr, end, 6); << 292 if (end - ptr < 6 * MINBPC(enc)) >> 293 return XML_TOK_PARTIAL; 344 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) 294 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { 345 if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i] << 295 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { 346 *nextTokPtr = ptr; 296 *nextTokPtr = ptr; 347 return XML_TOK_INVALID; 297 return XML_TOK_INVALID; 348 } 298 } 349 } 299 } 350 *nextTokPtr = ptr; 300 *nextTokPtr = ptr; 351 return XML_TOK_CDATA_SECT_OPEN; 301 return XML_TOK_CDATA_SECT_OPEN; 352 } 302 } 353 303 354 static int PTRCALL 304 static int PTRCALL 355 PREFIX(cdataSectionTok)(const ENCODING *enc, c << 305 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, 356 const char **nextTokPt << 306 const char *end, const char **nextTokPtr) 357 if (ptr >= end) << 307 { >> 308 if (ptr == end) 358 return XML_TOK_NONE; 309 return XML_TOK_NONE; 359 if (MINBPC(enc) > 1) { 310 if (MINBPC(enc) > 1) { 360 size_t n = end - ptr; 311 size_t n = end - ptr; 361 if (n & (MINBPC(enc) - 1)) { 312 if (n & (MINBPC(enc) - 1)) { 362 n &= ~(MINBPC(enc) - 1); 313 n &= ~(MINBPC(enc) - 1); 363 if (n == 0) 314 if (n == 0) 364 return XML_TOK_PARTIAL; 315 return XML_TOK_PARTIAL; 365 end = ptr + n; 316 end = ptr + n; 366 } 317 } 367 } 318 } 368 switch (BYTE_TYPE(enc, ptr)) { 319 switch (BYTE_TYPE(enc, ptr)) { 369 case BT_RSQB: 320 case BT_RSQB: 370 ptr += MINBPC(enc); 321 ptr += MINBPC(enc); 371 REQUIRE_CHAR(enc, ptr, end); << 322 if (ptr == end) 372 if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) << 323 return XML_TOK_PARTIAL; >> 324 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 373 break; 325 break; 374 ptr += MINBPC(enc); 326 ptr += MINBPC(enc); 375 REQUIRE_CHAR(enc, ptr, end); << 327 if (ptr == end) 376 if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { << 328 return XML_TOK_PARTIAL; >> 329 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 377 ptr -= MINBPC(enc); 330 ptr -= MINBPC(enc); 378 break; 331 break; 379 } 332 } 380 *nextTokPtr = ptr + MINBPC(enc); 333 *nextTokPtr = ptr + MINBPC(enc); 381 return XML_TOK_CDATA_SECT_CLOSE; 334 return XML_TOK_CDATA_SECT_CLOSE; 382 case BT_CR: 335 case BT_CR: 383 ptr += MINBPC(enc); 336 ptr += MINBPC(enc); 384 REQUIRE_CHAR(enc, ptr, end); << 337 if (ptr == end) >> 338 return XML_TOK_PARTIAL; 385 if (BYTE_TYPE(enc, ptr) == BT_LF) 339 if (BYTE_TYPE(enc, ptr) == BT_LF) 386 ptr += MINBPC(enc); 340 ptr += MINBPC(enc); 387 *nextTokPtr = ptr; 341 *nextTokPtr = ptr; 388 return XML_TOK_DATA_NEWLINE; 342 return XML_TOK_DATA_NEWLINE; 389 case BT_LF: 343 case BT_LF: 390 *nextTokPtr = ptr + MINBPC(enc); 344 *nextTokPtr = ptr + MINBPC(enc); 391 return XML_TOK_DATA_NEWLINE; 345 return XML_TOK_DATA_NEWLINE; 392 INVALID_CASES(ptr, nextTokPtr) << 346 INVALID_CASES(ptr, nextTokPtr) 393 default: 347 default: 394 ptr += MINBPC(enc); 348 ptr += MINBPC(enc); 395 break; 349 break; 396 } 350 } 397 while (HAS_CHAR(enc, ptr, end)) { << 351 while (ptr != end) { 398 switch (BYTE_TYPE(enc, ptr)) { 352 switch (BYTE_TYPE(enc, ptr)) { 399 # define LEAD_CASE(n) << 353 #define LEAD_CASE(n) \ 400 case BT_LEAD##n: << 354 case BT_LEAD ## n: \ 401 if (end - ptr < n || IS_INVALID_CHAR(enc, << 355 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 402 *nextTokPtr = ptr; << 356 *nextTokPtr = ptr; \ 403 return XML_TOK_DATA_CHARS; << 357 return XML_TOK_DATA_CHARS; \ 404 } << 358 } \ 405 ptr += n; << 359 ptr += n; \ 406 break; << 360 break; 407 LEAD_CASE(2) << 361 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 408 LEAD_CASE(3) << 362 #undef LEAD_CASE 409 LEAD_CASE(4) << 410 # undef LEAD_CASE << 411 case BT_NONXML: 363 case BT_NONXML: 412 case BT_MALFORM: 364 case BT_MALFORM: 413 case BT_TRAIL: 365 case BT_TRAIL: 414 case BT_CR: 366 case BT_CR: 415 case BT_LF: 367 case BT_LF: 416 case BT_RSQB: 368 case BT_RSQB: 417 *nextTokPtr = ptr; 369 *nextTokPtr = ptr; 418 return XML_TOK_DATA_CHARS; 370 return XML_TOK_DATA_CHARS; 419 default: 371 default: 420 ptr += MINBPC(enc); 372 ptr += MINBPC(enc); 421 break; 373 break; 422 } 374 } 423 } 375 } 424 *nextTokPtr = ptr; 376 *nextTokPtr = ptr; 425 return XML_TOK_DATA_CHARS; 377 return XML_TOK_DATA_CHARS; 426 } 378 } 427 379 428 /* ptr points to character following "</" */ 380 /* ptr points to character following "</" */ 429 381 430 static int PTRCALL 382 static int PTRCALL 431 PREFIX(scanEndTag)(const ENCODING *enc, const << 383 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, 432 const char **nextTokPtr) { << 384 const char *end, const char **nextTokPtr) 433 REQUIRE_CHAR(enc, ptr, end); << 385 { >> 386 if (ptr == end) >> 387 return XML_TOK_PARTIAL; 434 switch (BYTE_TYPE(enc, ptr)) { 388 switch (BYTE_TYPE(enc, ptr)) { 435 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP << 389 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 436 default: 390 default: 437 *nextTokPtr = ptr; 391 *nextTokPtr = ptr; 438 return XML_TOK_INVALID; 392 return XML_TOK_INVALID; 439 } 393 } 440 while (HAS_CHAR(enc, ptr, end)) { << 394 while (ptr != end) { 441 switch (BYTE_TYPE(enc, ptr)) { 395 switch (BYTE_TYPE(enc, ptr)) { 442 CHECK_NAME_CASES(enc, ptr, end, nextTokP << 396 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 443 case BT_S: << 397 case BT_S: case BT_CR: case BT_LF: 444 case BT_CR: << 398 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 445 case BT_LF: << 446 for (ptr += MINBPC(enc); HAS_CHAR(enc, p << 447 switch (BYTE_TYPE(enc, ptr)) { 399 switch (BYTE_TYPE(enc, ptr)) { 448 case BT_S: << 400 case BT_S: case BT_CR: case BT_LF: 449 case BT_CR: << 450 case BT_LF: << 451 break; 401 break; 452 case BT_GT: 402 case BT_GT: 453 *nextTokPtr = ptr + MINBPC(enc); 403 *nextTokPtr = ptr + MINBPC(enc); 454 return XML_TOK_END_TAG; 404 return XML_TOK_END_TAG; 455 default: 405 default: 456 *nextTokPtr = ptr; 406 *nextTokPtr = ptr; 457 return XML_TOK_INVALID; 407 return XML_TOK_INVALID; 458 } 408 } 459 } 409 } 460 return XML_TOK_PARTIAL; 410 return XML_TOK_PARTIAL; 461 # ifdef XML_NS << 411 #ifdef XML_NS 462 case BT_COLON: 412 case BT_COLON: 463 /* no need to check qname syntax here, 413 /* no need to check qname syntax here, 464 since end-tag must match exactly */ 414 since end-tag must match exactly */ 465 ptr += MINBPC(enc); 415 ptr += MINBPC(enc); 466 break; 416 break; 467 # endif << 417 #endif 468 case BT_GT: 418 case BT_GT: 469 *nextTokPtr = ptr + MINBPC(enc); 419 *nextTokPtr = ptr + MINBPC(enc); 470 return XML_TOK_END_TAG; 420 return XML_TOK_END_TAG; 471 default: 421 default: 472 *nextTokPtr = ptr; 422 *nextTokPtr = ptr; 473 return XML_TOK_INVALID; 423 return XML_TOK_INVALID; 474 } 424 } 475 } 425 } 476 return XML_TOK_PARTIAL; 426 return XML_TOK_PARTIAL; 477 } 427 } 478 428 479 /* ptr points to character following "&#X" */ 429 /* ptr points to character following "&#X" */ 480 430 481 static int PTRCALL 431 static int PTRCALL 482 PREFIX(scanHexCharRef)(const ENCODING *enc, co << 432 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, 483 const char **nextTokPtr << 433 const char *end, const char **nextTokPtr) 484 if (HAS_CHAR(enc, ptr, end)) { << 434 { >> 435 if (ptr != end) { 485 switch (BYTE_TYPE(enc, ptr)) { 436 switch (BYTE_TYPE(enc, ptr)) { 486 case BT_DIGIT: 437 case BT_DIGIT: 487 case BT_HEX: 438 case BT_HEX: 488 break; 439 break; 489 default: 440 default: 490 *nextTokPtr = ptr; 441 *nextTokPtr = ptr; 491 return XML_TOK_INVALID; 442 return XML_TOK_INVALID; 492 } 443 } 493 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr << 444 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 494 switch (BYTE_TYPE(enc, ptr)) { 445 switch (BYTE_TYPE(enc, ptr)) { 495 case BT_DIGIT: 446 case BT_DIGIT: 496 case BT_HEX: 447 case BT_HEX: 497 break; 448 break; 498 case BT_SEMI: 449 case BT_SEMI: 499 *nextTokPtr = ptr + MINBPC(enc); 450 *nextTokPtr = ptr + MINBPC(enc); 500 return XML_TOK_CHAR_REF; 451 return XML_TOK_CHAR_REF; 501 default: 452 default: 502 *nextTokPtr = ptr; 453 *nextTokPtr = ptr; 503 return XML_TOK_INVALID; 454 return XML_TOK_INVALID; 504 } 455 } 505 } 456 } 506 } 457 } 507 return XML_TOK_PARTIAL; 458 return XML_TOK_PARTIAL; 508 } 459 } 509 460 510 /* ptr points to character following "&#" */ 461 /* ptr points to character following "&#" */ 511 462 512 static int PTRCALL 463 static int PTRCALL 513 PREFIX(scanCharRef)(const ENCODING *enc, const << 464 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, 514 const char **nextTokPtr) { << 465 const char *end, const char **nextTokPtr) 515 if (HAS_CHAR(enc, ptr, end)) { << 466 { >> 467 if (ptr != end) { 516 if (CHAR_MATCHES(enc, ptr, ASCII_x)) 468 if (CHAR_MATCHES(enc, ptr, ASCII_x)) 517 return PREFIX(scanHexCharRef)(enc, ptr + 469 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 518 switch (BYTE_TYPE(enc, ptr)) { 470 switch (BYTE_TYPE(enc, ptr)) { 519 case BT_DIGIT: 471 case BT_DIGIT: 520 break; 472 break; 521 default: 473 default: 522 *nextTokPtr = ptr; 474 *nextTokPtr = ptr; 523 return XML_TOK_INVALID; 475 return XML_TOK_INVALID; 524 } 476 } 525 for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr << 477 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 526 switch (BYTE_TYPE(enc, ptr)) { 478 switch (BYTE_TYPE(enc, ptr)) { 527 case BT_DIGIT: 479 case BT_DIGIT: 528 break; 480 break; 529 case BT_SEMI: 481 case BT_SEMI: 530 *nextTokPtr = ptr + MINBPC(enc); 482 *nextTokPtr = ptr + MINBPC(enc); 531 return XML_TOK_CHAR_REF; 483 return XML_TOK_CHAR_REF; 532 default: 484 default: 533 *nextTokPtr = ptr; 485 *nextTokPtr = ptr; 534 return XML_TOK_INVALID; 486 return XML_TOK_INVALID; 535 } 487 } 536 } 488 } 537 } 489 } 538 return XML_TOK_PARTIAL; 490 return XML_TOK_PARTIAL; 539 } 491 } 540 492 541 /* ptr points to character following "&" */ 493 /* ptr points to character following "&" */ 542 494 543 static int PTRCALL 495 static int PTRCALL 544 PREFIX(scanRef)(const ENCODING *enc, const cha 496 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, 545 const char **nextTokPtr) { << 497 const char **nextTokPtr) 546 REQUIRE_CHAR(enc, ptr, end); << 498 { >> 499 if (ptr == end) >> 500 return XML_TOK_PARTIAL; 547 switch (BYTE_TYPE(enc, ptr)) { 501 switch (BYTE_TYPE(enc, ptr)) { 548 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP << 502 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 549 case BT_NUM: 503 case BT_NUM: 550 return PREFIX(scanCharRef)(enc, ptr + MINB 504 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 551 default: 505 default: 552 *nextTokPtr = ptr; 506 *nextTokPtr = ptr; 553 return XML_TOK_INVALID; 507 return XML_TOK_INVALID; 554 } 508 } 555 while (HAS_CHAR(enc, ptr, end)) { << 509 while (ptr != end) { 556 switch (BYTE_TYPE(enc, ptr)) { 510 switch (BYTE_TYPE(enc, ptr)) { 557 CHECK_NAME_CASES(enc, ptr, end, nextTokP << 511 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 558 case BT_SEMI: 512 case BT_SEMI: 559 *nextTokPtr = ptr + MINBPC(enc); 513 *nextTokPtr = ptr + MINBPC(enc); 560 return XML_TOK_ENTITY_REF; 514 return XML_TOK_ENTITY_REF; 561 default: 515 default: 562 *nextTokPtr = ptr; 516 *nextTokPtr = ptr; 563 return XML_TOK_INVALID; 517 return XML_TOK_INVALID; 564 } 518 } 565 } 519 } 566 return XML_TOK_PARTIAL; 520 return XML_TOK_PARTIAL; 567 } 521 } 568 522 569 /* ptr points to character following first cha 523 /* ptr points to character following first character of attribute name */ 570 524 571 static int PTRCALL 525 static int PTRCALL 572 PREFIX(scanAtts)(const ENCODING *enc, const ch 526 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, 573 const char **nextTokPtr) { << 527 const char **nextTokPtr) 574 # ifdef XML_NS << 528 { >> 529 #ifdef XML_NS 575 int hadColon = 0; 530 int hadColon = 0; 576 # endif << 531 #endif 577 while (HAS_CHAR(enc, ptr, end)) { << 532 while (ptr != end) { 578 switch (BYTE_TYPE(enc, ptr)) { 533 switch (BYTE_TYPE(enc, ptr)) { 579 CHECK_NAME_CASES(enc, ptr, end, nextTokP << 534 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 580 # ifdef XML_NS << 535 #ifdef XML_NS 581 case BT_COLON: 536 case BT_COLON: 582 if (hadColon) { 537 if (hadColon) { 583 *nextTokPtr = ptr; 538 *nextTokPtr = ptr; 584 return XML_TOK_INVALID; 539 return XML_TOK_INVALID; 585 } 540 } 586 hadColon = 1; 541 hadColon = 1; 587 ptr += MINBPC(enc); 542 ptr += MINBPC(enc); 588 REQUIRE_CHAR(enc, ptr, end); << 543 if (ptr == end) >> 544 return XML_TOK_PARTIAL; 589 switch (BYTE_TYPE(enc, ptr)) { 545 switch (BYTE_TYPE(enc, ptr)) { 590 CHECK_NMSTRT_CASES(enc, ptr, end, next << 546 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 591 default: 547 default: 592 *nextTokPtr = ptr; 548 *nextTokPtr = ptr; 593 return XML_TOK_INVALID; 549 return XML_TOK_INVALID; 594 } 550 } 595 break; 551 break; 596 # endif << 552 #endif 597 case BT_S: << 553 case BT_S: case BT_CR: case BT_LF: 598 case BT_CR: << 599 case BT_LF: << 600 for (;;) { 554 for (;;) { 601 int t; 555 int t; 602 556 603 ptr += MINBPC(enc); 557 ptr += MINBPC(enc); 604 REQUIRE_CHAR(enc, ptr, end); << 558 if (ptr == end) >> 559 return XML_TOK_PARTIAL; 605 t = BYTE_TYPE(enc, ptr); 560 t = BYTE_TYPE(enc, ptr); 606 if (t == BT_EQUALS) 561 if (t == BT_EQUALS) 607 break; 562 break; 608 switch (t) { 563 switch (t) { 609 case BT_S: 564 case BT_S: 610 case BT_LF: 565 case BT_LF: 611 case BT_CR: 566 case BT_CR: 612 break; 567 break; 613 default: 568 default: 614 *nextTokPtr = ptr; 569 *nextTokPtr = ptr; 615 return XML_TOK_INVALID; 570 return XML_TOK_INVALID; 616 } 571 } 617 } 572 } 618 /* fall through */ << 573 /* fall through */ 619 case BT_EQUALS: { << 574 case BT_EQUALS: 620 int open; << 575 { 621 # ifdef XML_NS << 576 int open; 622 hadColon = 0; << 577 #ifdef XML_NS 623 # endif << 578 hadColon = 0; 624 for (;;) { << 579 #endif 625 ptr += MINBPC(enc); << 580 for (;;) { 626 REQUIRE_CHAR(enc, ptr, end); << 581 ptr += MINBPC(enc); 627 open = BYTE_TYPE(enc, ptr); << 582 if (ptr == end) 628 if (open == BT_QUOT || open == BT_APOS << 583 return XML_TOK_PARTIAL; 629 break; << 584 open = BYTE_TYPE(enc, ptr); 630 switch (open) { << 585 if (open == BT_QUOT || open == BT_APOS) 631 case BT_S: << 586 break; 632 case BT_LF: << 587 switch (open) { 633 case BT_CR: << 588 case BT_S: 634 break; << 589 case BT_LF: 635 default: << 590 case BT_CR: 636 *nextTokPtr = ptr; << 591 break; 637 return XML_TOK_INVALID; << 592 default: >> 593 *nextTokPtr = ptr; >> 594 return XML_TOK_INVALID; >> 595 } 638 } 596 } 639 } << 597 ptr += MINBPC(enc); 640 ptr += MINBPC(enc); << 598 /* in attribute value */ 641 /* in attribute value */ << 599 for (;;) { 642 for (;;) { << 600 int t; 643 int t; << 601 if (ptr == end) 644 REQUIRE_CHAR(enc, ptr, end); << 602 return XML_TOK_PARTIAL; 645 t = BYTE_TYPE(enc, ptr); << 603 t = BYTE_TYPE(enc, ptr); 646 if (t == open) << 604 if (t == open) 647 break; << 605 break; 648 switch (t) { << 606 switch (t) { 649 INVALID_CASES(ptr, nextTokPtr) 607 INVALID_CASES(ptr, nextTokPtr) 650 case BT_AMP: { << 608 case BT_AMP: 651 int tok = PREFIX(scanRef)(enc, ptr + << 609 { 652 if (tok <= 0) { << 610 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); 653 if (tok == XML_TOK_INVALID) << 611 if (tok <= 0) { 654 *nextTokPtr = ptr; << 612 if (tok == XML_TOK_INVALID) 655 return tok; << 613 *nextTokPtr = ptr; >> 614 return tok; >> 615 } >> 616 break; >> 617 } >> 618 case BT_LT: >> 619 *nextTokPtr = ptr; >> 620 return XML_TOK_INVALID; >> 621 default: >> 622 ptr += MINBPC(enc); >> 623 break; 656 } 624 } 657 break; << 658 } 625 } 659 case BT_LT: << 660 *nextTokPtr = ptr; << 661 return XML_TOK_INVALID; << 662 default: << 663 ptr += MINBPC(enc); << 664 break; << 665 } << 666 } << 667 ptr += MINBPC(enc); << 668 REQUIRE_CHAR(enc, ptr, end); << 669 switch (BYTE_TYPE(enc, ptr)) { << 670 case BT_S: << 671 case BT_CR: << 672 case BT_LF: << 673 break; << 674 case BT_SOL: << 675 goto sol; << 676 case BT_GT: << 677 goto gt; << 678 default: << 679 *nextTokPtr = ptr; << 680 return XML_TOK_INVALID; << 681 } << 682 /* ptr points to closing quote */ << 683 for (;;) { << 684 ptr += MINBPC(enc); 626 ptr += MINBPC(enc); 685 REQUIRE_CHAR(enc, ptr, end); << 627 if (ptr == end) >> 628 return XML_TOK_PARTIAL; 686 switch (BYTE_TYPE(enc, ptr)) { 629 switch (BYTE_TYPE(enc, ptr)) { 687 CHECK_NMSTRT_CASES(enc, ptr, end, ne << 688 case BT_S: 630 case BT_S: 689 case BT_CR: 631 case BT_CR: 690 case BT_LF: 632 case BT_LF: 691 continue; << 633 break; 692 case BT_GT: << 693 gt: << 694 *nextTokPtr = ptr + MINBPC(enc); << 695 return XML_TOK_START_TAG_WITH_ATTS; << 696 case BT_SOL: 634 case BT_SOL: 697 sol: << 635 goto sol; >> 636 case BT_GT: >> 637 goto gt; >> 638 default: >> 639 *nextTokPtr = ptr; >> 640 return XML_TOK_INVALID; >> 641 } >> 642 /* ptr points to closing quote */ >> 643 for (;;) { 698 ptr += MINBPC(enc); 644 ptr += MINBPC(enc); 699 REQUIRE_CHAR(enc, ptr, end); << 645 if (ptr == end) 700 if (! CHAR_MATCHES(enc, ptr, ASCII_G << 646 return XML_TOK_PARTIAL; >> 647 switch (BYTE_TYPE(enc, ptr)) { >> 648 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) >> 649 case BT_S: case BT_CR: case BT_LF: >> 650 continue; >> 651 case BT_GT: >> 652 gt: >> 653 *nextTokPtr = ptr + MINBPC(enc); >> 654 return XML_TOK_START_TAG_WITH_ATTS; >> 655 case BT_SOL: >> 656 sol: >> 657 ptr += MINBPC(enc); >> 658 if (ptr == end) >> 659 return XML_TOK_PARTIAL; >> 660 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { >> 661 *nextTokPtr = ptr; >> 662 return XML_TOK_INVALID; >> 663 } >> 664 *nextTokPtr = ptr + MINBPC(enc); >> 665 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; >> 666 default: 701 *nextTokPtr = ptr; 667 *nextTokPtr = ptr; 702 return XML_TOK_INVALID; 668 return XML_TOK_INVALID; 703 } 669 } 704 *nextTokPtr = ptr + MINBPC(enc); << 670 break; 705 return XML_TOK_EMPTY_ELEMENT_WITH_AT << 706 default: << 707 *nextTokPtr = ptr; << 708 return XML_TOK_INVALID; << 709 } 671 } 710 break; 672 break; 711 } 673 } 712 break; << 713 } << 714 default: 674 default: 715 *nextTokPtr = ptr; 675 *nextTokPtr = ptr; 716 return XML_TOK_INVALID; 676 return XML_TOK_INVALID; 717 } 677 } 718 } 678 } 719 return XML_TOK_PARTIAL; 679 return XML_TOK_PARTIAL; 720 } 680 } 721 681 722 /* ptr points to character following "<" */ 682 /* ptr points to character following "<" */ 723 683 724 static int PTRCALL 684 static int PTRCALL 725 PREFIX(scanLt)(const ENCODING *enc, const char 685 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, 726 const char **nextTokPtr) { << 686 const char **nextTokPtr) 727 # ifdef XML_NS << 687 { >> 688 #ifdef XML_NS 728 int hadColon; 689 int hadColon; 729 # endif << 690 #endif 730 REQUIRE_CHAR(enc, ptr, end); << 691 if (ptr == end) >> 692 return XML_TOK_PARTIAL; 731 switch (BYTE_TYPE(enc, ptr)) { 693 switch (BYTE_TYPE(enc, ptr)) { 732 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP << 694 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 733 case BT_EXCL: 695 case BT_EXCL: 734 ptr += MINBPC(enc); << 696 if ((ptr += MINBPC(enc)) == end) 735 REQUIRE_CHAR(enc, ptr, end); << 697 return XML_TOK_PARTIAL; 736 switch (BYTE_TYPE(enc, ptr)) { 698 switch (BYTE_TYPE(enc, ptr)) { 737 case BT_MINUS: 699 case BT_MINUS: 738 return PREFIX(scanComment)(enc, ptr + MI 700 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 739 case BT_LSQB: 701 case BT_LSQB: 740 return PREFIX(scanCdataSection)(enc, ptr << 702 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), >> 703 end, nextTokPtr); 741 } 704 } 742 *nextTokPtr = ptr; 705 *nextTokPtr = ptr; 743 return XML_TOK_INVALID; 706 return XML_TOK_INVALID; 744 case BT_QUEST: 707 case BT_QUEST: 745 return PREFIX(scanPi)(enc, ptr + MINBPC(en 708 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 746 case BT_SOL: 709 case BT_SOL: 747 return PREFIX(scanEndTag)(enc, ptr + MINBP 710 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); 748 default: 711 default: 749 *nextTokPtr = ptr; 712 *nextTokPtr = ptr; 750 return XML_TOK_INVALID; 713 return XML_TOK_INVALID; 751 } 714 } 752 # ifdef XML_NS << 715 #ifdef XML_NS 753 hadColon = 0; 716 hadColon = 0; 754 # endif << 717 #endif 755 /* we have a start-tag */ 718 /* we have a start-tag */ 756 while (HAS_CHAR(enc, ptr, end)) { << 719 while (ptr != end) { 757 switch (BYTE_TYPE(enc, ptr)) { 720 switch (BYTE_TYPE(enc, ptr)) { 758 CHECK_NAME_CASES(enc, ptr, end, nextTokP << 721 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 759 # ifdef XML_NS << 722 #ifdef XML_NS 760 case BT_COLON: 723 case BT_COLON: 761 if (hadColon) { 724 if (hadColon) { 762 *nextTokPtr = ptr; 725 *nextTokPtr = ptr; 763 return XML_TOK_INVALID; 726 return XML_TOK_INVALID; 764 } 727 } 765 hadColon = 1; 728 hadColon = 1; 766 ptr += MINBPC(enc); 729 ptr += MINBPC(enc); 767 REQUIRE_CHAR(enc, ptr, end); << 730 if (ptr == end) >> 731 return XML_TOK_PARTIAL; 768 switch (BYTE_TYPE(enc, ptr)) { 732 switch (BYTE_TYPE(enc, ptr)) { 769 CHECK_NMSTRT_CASES(enc, ptr, end, next << 733 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 770 default: 734 default: 771 *nextTokPtr = ptr; 735 *nextTokPtr = ptr; 772 return XML_TOK_INVALID; 736 return XML_TOK_INVALID; 773 } 737 } 774 break; 738 break; 775 # endif << 739 #endif 776 case BT_S: << 740 case BT_S: case BT_CR: case BT_LF: 777 case BT_CR: << 741 { 778 case BT_LF: { << 742 ptr += MINBPC(enc); 779 ptr += MINBPC(enc); << 743 while (ptr != end) { 780 while (HAS_CHAR(enc, ptr, end)) { << 744 switch (BYTE_TYPE(enc, ptr)) { 781 switch (BYTE_TYPE(enc, ptr)) { << 782 CHECK_NMSTRT_CASES(enc, ptr, end, ne 745 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 783 case BT_GT: << 746 case BT_GT: 784 goto gt; << 747 goto gt; 785 case BT_SOL: << 748 case BT_SOL: 786 goto sol; << 749 goto sol; 787 case BT_S: << 750 case BT_S: case BT_CR: case BT_LF: 788 case BT_CR: << 751 ptr += MINBPC(enc); 789 case BT_LF: << 752 continue; 790 ptr += MINBPC(enc); << 753 default: 791 continue; << 754 *nextTokPtr = ptr; 792 default: << 755 return XML_TOK_INVALID; 793 *nextTokPtr = ptr; << 756 } 794 return XML_TOK_INVALID; << 757 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); 795 } 758 } 796 return PREFIX(scanAtts)(enc, ptr, end, << 759 return XML_TOK_PARTIAL; 797 } 760 } 798 return XML_TOK_PARTIAL; << 799 } << 800 case BT_GT: 761 case BT_GT: 801 gt: 762 gt: 802 *nextTokPtr = ptr + MINBPC(enc); 763 *nextTokPtr = ptr + MINBPC(enc); 803 return XML_TOK_START_TAG_NO_ATTS; 764 return XML_TOK_START_TAG_NO_ATTS; 804 case BT_SOL: 765 case BT_SOL: 805 sol: 766 sol: 806 ptr += MINBPC(enc); 767 ptr += MINBPC(enc); 807 REQUIRE_CHAR(enc, ptr, end); << 768 if (ptr == end) 808 if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) << 769 return XML_TOK_PARTIAL; >> 770 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 809 *nextTokPtr = ptr; 771 *nextTokPtr = ptr; 810 return XML_TOK_INVALID; 772 return XML_TOK_INVALID; 811 } 773 } 812 *nextTokPtr = ptr + MINBPC(enc); 774 *nextTokPtr = ptr + MINBPC(enc); 813 return XML_TOK_EMPTY_ELEMENT_NO_ATTS; 775 return XML_TOK_EMPTY_ELEMENT_NO_ATTS; 814 default: 776 default: 815 *nextTokPtr = ptr; 777 *nextTokPtr = ptr; 816 return XML_TOK_INVALID; 778 return XML_TOK_INVALID; 817 } 779 } 818 } 780 } 819 return XML_TOK_PARTIAL; 781 return XML_TOK_PARTIAL; 820 } 782 } 821 783 822 static int PTRCALL 784 static int PTRCALL 823 PREFIX(contentTok)(const ENCODING *enc, const 785 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, 824 const char **nextTokPtr) { << 786 const char **nextTokPtr) 825 if (ptr >= end) << 787 { >> 788 if (ptr == end) 826 return XML_TOK_NONE; 789 return XML_TOK_NONE; 827 if (MINBPC(enc) > 1) { 790 if (MINBPC(enc) > 1) { 828 size_t n = end - ptr; 791 size_t n = end - ptr; 829 if (n & (MINBPC(enc) - 1)) { 792 if (n & (MINBPC(enc) - 1)) { 830 n &= ~(MINBPC(enc) - 1); 793 n &= ~(MINBPC(enc) - 1); 831 if (n == 0) 794 if (n == 0) 832 return XML_TOK_PARTIAL; 795 return XML_TOK_PARTIAL; 833 end = ptr + n; 796 end = ptr + n; 834 } 797 } 835 } 798 } 836 switch (BYTE_TYPE(enc, ptr)) { 799 switch (BYTE_TYPE(enc, ptr)) { 837 case BT_LT: 800 case BT_LT: 838 return PREFIX(scanLt)(enc, ptr + MINBPC(en 801 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); 839 case BT_AMP: 802 case BT_AMP: 840 return PREFIX(scanRef)(enc, ptr + MINBPC(e 803 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 841 case BT_CR: 804 case BT_CR: 842 ptr += MINBPC(enc); 805 ptr += MINBPC(enc); 843 if (! HAS_CHAR(enc, ptr, end)) << 806 if (ptr == end) 844 return XML_TOK_TRAILING_CR; 807 return XML_TOK_TRAILING_CR; 845 if (BYTE_TYPE(enc, ptr) == BT_LF) 808 if (BYTE_TYPE(enc, ptr) == BT_LF) 846 ptr += MINBPC(enc); 809 ptr += MINBPC(enc); 847 *nextTokPtr = ptr; 810 *nextTokPtr = ptr; 848 return XML_TOK_DATA_NEWLINE; 811 return XML_TOK_DATA_NEWLINE; 849 case BT_LF: 812 case BT_LF: 850 *nextTokPtr = ptr + MINBPC(enc); 813 *nextTokPtr = ptr + MINBPC(enc); 851 return XML_TOK_DATA_NEWLINE; 814 return XML_TOK_DATA_NEWLINE; 852 case BT_RSQB: 815 case BT_RSQB: 853 ptr += MINBPC(enc); 816 ptr += MINBPC(enc); 854 if (! HAS_CHAR(enc, ptr, end)) << 817 if (ptr == end) 855 return XML_TOK_TRAILING_RSQB; 818 return XML_TOK_TRAILING_RSQB; 856 if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB)) << 819 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 857 break; 820 break; 858 ptr += MINBPC(enc); 821 ptr += MINBPC(enc); 859 if (! HAS_CHAR(enc, ptr, end)) << 822 if (ptr == end) 860 return XML_TOK_TRAILING_RSQB; 823 return XML_TOK_TRAILING_RSQB; 861 if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) { << 824 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 862 ptr -= MINBPC(enc); 825 ptr -= MINBPC(enc); 863 break; 826 break; 864 } 827 } 865 *nextTokPtr = ptr; 828 *nextTokPtr = ptr; 866 return XML_TOK_INVALID; 829 return XML_TOK_INVALID; 867 INVALID_CASES(ptr, nextTokPtr) << 830 INVALID_CASES(ptr, nextTokPtr) 868 default: 831 default: 869 ptr += MINBPC(enc); 832 ptr += MINBPC(enc); 870 break; 833 break; 871 } 834 } 872 while (HAS_CHAR(enc, ptr, end)) { << 835 while (ptr != end) { 873 switch (BYTE_TYPE(enc, ptr)) { 836 switch (BYTE_TYPE(enc, ptr)) { 874 # define LEAD_CASE(n) << 837 #define LEAD_CASE(n) \ 875 case BT_LEAD##n: << 838 case BT_LEAD ## n: \ 876 if (end - ptr < n || IS_INVALID_CHAR(enc, << 839 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 877 *nextTokPtr = ptr; << 840 *nextTokPtr = ptr; \ 878 return XML_TOK_DATA_CHARS; << 841 return XML_TOK_DATA_CHARS; \ 879 } << 842 } \ 880 ptr += n; << 843 ptr += n; \ 881 break; << 844 break; 882 LEAD_CASE(2) << 845 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 883 LEAD_CASE(3) << 846 #undef LEAD_CASE 884 LEAD_CASE(4) << 885 # undef LEAD_CASE << 886 case BT_RSQB: 847 case BT_RSQB: 887 if (HAS_CHARS(enc, ptr, end, 2)) { << 848 if (ptr + MINBPC(enc) != end) { 888 if (! CHAR_MATCHES(enc, ptr + MINBPC(e << 849 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { 889 ptr += MINBPC(enc); << 850 ptr += MINBPC(enc); 890 break; << 851 break; 891 } << 852 } 892 if (HAS_CHARS(enc, ptr, end, 3)) { << 853 if (ptr + 2*MINBPC(enc) != end) { 893 if (! CHAR_MATCHES(enc, ptr + 2 * MI << 854 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { 894 ptr += MINBPC(enc); << 855 ptr += MINBPC(enc); 895 break; << 856 break; 896 } << 857 } 897 *nextTokPtr = ptr + 2 * MINBPC(enc); << 858 *nextTokPtr = ptr + 2*MINBPC(enc); 898 return XML_TOK_INVALID; << 859 return XML_TOK_INVALID; 899 } << 860 } 900 } 861 } 901 /* fall through */ 862 /* fall through */ 902 case BT_AMP: 863 case BT_AMP: 903 case BT_LT: 864 case BT_LT: 904 case BT_NONXML: 865 case BT_NONXML: 905 case BT_MALFORM: 866 case BT_MALFORM: 906 case BT_TRAIL: 867 case BT_TRAIL: 907 case BT_CR: 868 case BT_CR: 908 case BT_LF: 869 case BT_LF: 909 *nextTokPtr = ptr; 870 *nextTokPtr = ptr; 910 return XML_TOK_DATA_CHARS; 871 return XML_TOK_DATA_CHARS; 911 default: 872 default: 912 ptr += MINBPC(enc); 873 ptr += MINBPC(enc); 913 break; 874 break; 914 } 875 } 915 } 876 } 916 *nextTokPtr = ptr; 877 *nextTokPtr = ptr; 917 return XML_TOK_DATA_CHARS; 878 return XML_TOK_DATA_CHARS; 918 } 879 } 919 880 920 /* ptr points to character following "%" */ 881 /* ptr points to character following "%" */ 921 882 922 static int PTRCALL 883 static int PTRCALL 923 PREFIX(scanPercent)(const ENCODING *enc, const 884 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, 924 const char **nextTokPtr) { << 885 const char **nextTokPtr) 925 REQUIRE_CHAR(enc, ptr, end); << 886 { >> 887 if (ptr == end) >> 888 return -XML_TOK_PERCENT; 926 switch (BYTE_TYPE(enc, ptr)) { 889 switch (BYTE_TYPE(enc, ptr)) { 927 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP << 890 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 928 case BT_S: << 891 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: 929 case BT_LF: << 930 case BT_CR: << 931 case BT_PERCNT: << 932 *nextTokPtr = ptr; 892 *nextTokPtr = ptr; 933 return XML_TOK_PERCENT; 893 return XML_TOK_PERCENT; 934 default: 894 default: 935 *nextTokPtr = ptr; 895 *nextTokPtr = ptr; 936 return XML_TOK_INVALID; 896 return XML_TOK_INVALID; 937 } 897 } 938 while (HAS_CHAR(enc, ptr, end)) { << 898 while (ptr != end) { 939 switch (BYTE_TYPE(enc, ptr)) { 899 switch (BYTE_TYPE(enc, ptr)) { 940 CHECK_NAME_CASES(enc, ptr, end, nextTokP << 900 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 941 case BT_SEMI: 901 case BT_SEMI: 942 *nextTokPtr = ptr + MINBPC(enc); 902 *nextTokPtr = ptr + MINBPC(enc); 943 return XML_TOK_PARAM_ENTITY_REF; 903 return XML_TOK_PARAM_ENTITY_REF; 944 default: 904 default: 945 *nextTokPtr = ptr; 905 *nextTokPtr = ptr; 946 return XML_TOK_INVALID; 906 return XML_TOK_INVALID; 947 } 907 } 948 } 908 } 949 return XML_TOK_PARTIAL; 909 return XML_TOK_PARTIAL; 950 } 910 } 951 911 952 static int PTRCALL 912 static int PTRCALL 953 PREFIX(scanPoundName)(const ENCODING *enc, con 913 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, 954 const char **nextTokPtr) << 914 const char **nextTokPtr) 955 REQUIRE_CHAR(enc, ptr, end); << 915 { >> 916 if (ptr == end) >> 917 return XML_TOK_PARTIAL; 956 switch (BYTE_TYPE(enc, ptr)) { 918 switch (BYTE_TYPE(enc, ptr)) { 957 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokP << 919 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 958 default: 920 default: 959 *nextTokPtr = ptr; 921 *nextTokPtr = ptr; 960 return XML_TOK_INVALID; 922 return XML_TOK_INVALID; 961 } 923 } 962 while (HAS_CHAR(enc, ptr, end)) { << 924 while (ptr != end) { 963 switch (BYTE_TYPE(enc, ptr)) { 925 switch (BYTE_TYPE(enc, ptr)) { 964 CHECK_NAME_CASES(enc, ptr, end, nextTokP << 926 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 965 case BT_CR: << 927 case BT_CR: case BT_LF: case BT_S: 966 case BT_LF: << 928 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: 967 case BT_S: << 968 case BT_RPAR: << 969 case BT_GT: << 970 case BT_PERCNT: << 971 case BT_VERBAR: << 972 *nextTokPtr = ptr; 929 *nextTokPtr = ptr; 973 return XML_TOK_POUND_NAME; 930 return XML_TOK_POUND_NAME; 974 default: 931 default: 975 *nextTokPtr = ptr; 932 *nextTokPtr = ptr; 976 return XML_TOK_INVALID; 933 return XML_TOK_INVALID; 977 } 934 } 978 } 935 } 979 return -XML_TOK_POUND_NAME; 936 return -XML_TOK_POUND_NAME; 980 } 937 } 981 938 982 static int PTRCALL 939 static int PTRCALL 983 PREFIX(scanLit)(int open, const ENCODING *enc, << 940 PREFIX(scanLit)(int open, const ENCODING *enc, 984 const char **nextTokPtr) { << 941 const char *ptr, const char *end, 985 while (HAS_CHAR(enc, ptr, end)) { << 942 const char **nextTokPtr) >> 943 { >> 944 while (ptr != end) { 986 int t = BYTE_TYPE(enc, ptr); 945 int t = BYTE_TYPE(enc, ptr); 987 switch (t) { 946 switch (t) { 988 INVALID_CASES(ptr, nextTokPtr) << 947 INVALID_CASES(ptr, nextTokPtr) 989 case BT_QUOT: 948 case BT_QUOT: 990 case BT_APOS: 949 case BT_APOS: 991 ptr += MINBPC(enc); 950 ptr += MINBPC(enc); 992 if (t != open) 951 if (t != open) 993 break; 952 break; 994 if (! HAS_CHAR(enc, ptr, end)) << 953 if (ptr == end) 995 return -XML_TOK_LITERAL; 954 return -XML_TOK_LITERAL; 996 *nextTokPtr = ptr; 955 *nextTokPtr = ptr; 997 switch (BYTE_TYPE(enc, ptr)) { 956 switch (BYTE_TYPE(enc, ptr)) { 998 case BT_S: << 957 case BT_S: case BT_CR: case BT_LF: 999 case BT_CR: << 958 case BT_GT: case BT_PERCNT: case BT_LSQB: 1000 case BT_LF: << 1001 case BT_GT: << 1002 case BT_PERCNT: << 1003 case BT_LSQB: << 1004 return XML_TOK_LITERAL; 959 return XML_TOK_LITERAL; 1005 default: 960 default: 1006 return XML_TOK_INVALID; 961 return XML_TOK_INVALID; 1007 } 962 } 1008 default: 963 default: 1009 ptr += MINBPC(enc); 964 ptr += MINBPC(enc); 1010 break; 965 break; 1011 } 966 } 1012 } 967 } 1013 return XML_TOK_PARTIAL; 968 return XML_TOK_PARTIAL; 1014 } 969 } 1015 970 1016 static int PTRCALL 971 static int PTRCALL 1017 PREFIX(prologTok)(const ENCODING *enc, const 972 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, 1018 const char **nextTokPtr) { << 973 const char **nextTokPtr) >> 974 { 1019 int tok; 975 int tok; 1020 if (ptr >= end) << 976 if (ptr == end) 1021 return XML_TOK_NONE; 977 return XML_TOK_NONE; 1022 if (MINBPC(enc) > 1) { 978 if (MINBPC(enc) > 1) { 1023 size_t n = end - ptr; 979 size_t n = end - ptr; 1024 if (n & (MINBPC(enc) - 1)) { 980 if (n & (MINBPC(enc) - 1)) { 1025 n &= ~(MINBPC(enc) - 1); 981 n &= ~(MINBPC(enc) - 1); 1026 if (n == 0) 982 if (n == 0) 1027 return XML_TOK_PARTIAL; 983 return XML_TOK_PARTIAL; 1028 end = ptr + n; 984 end = ptr + n; 1029 } 985 } 1030 } 986 } 1031 switch (BYTE_TYPE(enc, ptr)) { 987 switch (BYTE_TYPE(enc, ptr)) { 1032 case BT_QUOT: 988 case BT_QUOT: 1033 return PREFIX(scanLit)(BT_QUOT, enc, ptr 989 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); 1034 case BT_APOS: 990 case BT_APOS: 1035 return PREFIX(scanLit)(BT_APOS, enc, ptr 991 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); 1036 case BT_LT: { << 992 case BT_LT: 1037 ptr += MINBPC(enc); << 993 { 1038 REQUIRE_CHAR(enc, ptr, end); << 994 ptr += MINBPC(enc); 1039 switch (BYTE_TYPE(enc, ptr)) { << 995 if (ptr == end) 1040 case BT_EXCL: << 996 return XML_TOK_PARTIAL; 1041 return PREFIX(scanDecl)(enc, ptr + MINB << 997 switch (BYTE_TYPE(enc, ptr)) { 1042 case BT_QUEST: << 998 case BT_EXCL: 1043 return PREFIX(scanPi)(enc, ptr + MINBPC << 999 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1044 case BT_NMSTRT: << 1000 case BT_QUEST: 1045 case BT_HEX: << 1001 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1046 case BT_NONASCII: << 1002 case BT_NMSTRT: 1047 case BT_LEAD2: << 1003 case BT_HEX: 1048 case BT_LEAD3: << 1004 case BT_NONASCII: 1049 case BT_LEAD4: << 1005 case BT_LEAD2: 1050 *nextTokPtr = ptr - MINBPC(enc); << 1006 case BT_LEAD3: 1051 return XML_TOK_INSTANCE_START; << 1007 case BT_LEAD4: >> 1008 *nextTokPtr = ptr - MINBPC(enc); >> 1009 return XML_TOK_INSTANCE_START; >> 1010 } >> 1011 *nextTokPtr = ptr; >> 1012 return XML_TOK_INVALID; 1052 } 1013 } 1053 *nextTokPtr = ptr; << 1054 return XML_TOK_INVALID; << 1055 } << 1056 case BT_CR: 1014 case BT_CR: 1057 if (ptr + MINBPC(enc) == end) { 1015 if (ptr + MINBPC(enc) == end) { 1058 *nextTokPtr = end; 1016 *nextTokPtr = end; 1059 /* indicate that this might be part of 1017 /* indicate that this might be part of a CR/LF pair */ 1060 return -XML_TOK_PROLOG_S; 1018 return -XML_TOK_PROLOG_S; 1061 } 1019 } 1062 /* fall through */ 1020 /* fall through */ 1063 case BT_S: << 1021 case BT_S: case BT_LF: 1064 case BT_LF: << 1065 for (;;) { 1022 for (;;) { 1066 ptr += MINBPC(enc); 1023 ptr += MINBPC(enc); 1067 if (! HAS_CHAR(enc, ptr, end)) << 1024 if (ptr == end) 1068 break; 1025 break; 1069 switch (BYTE_TYPE(enc, ptr)) { 1026 switch (BYTE_TYPE(enc, ptr)) { 1070 case BT_S: << 1027 case BT_S: case BT_LF: 1071 case BT_LF: << 1072 break; 1028 break; 1073 case BT_CR: 1029 case BT_CR: 1074 /* don't split CR/LF pair */ 1030 /* don't split CR/LF pair */ 1075 if (ptr + MINBPC(enc) != end) 1031 if (ptr + MINBPC(enc) != end) 1076 break; 1032 break; 1077 /* fall through */ 1033 /* fall through */ 1078 default: 1034 default: 1079 *nextTokPtr = ptr; 1035 *nextTokPtr = ptr; 1080 return XML_TOK_PROLOG_S; 1036 return XML_TOK_PROLOG_S; 1081 } 1037 } 1082 } 1038 } 1083 *nextTokPtr = ptr; 1039 *nextTokPtr = ptr; 1084 return XML_TOK_PROLOG_S; 1040 return XML_TOK_PROLOG_S; 1085 case BT_PERCNT: 1041 case BT_PERCNT: 1086 return PREFIX(scanPercent)(enc, ptr + MIN 1042 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1087 case BT_COMMA: 1043 case BT_COMMA: 1088 *nextTokPtr = ptr + MINBPC(enc); 1044 *nextTokPtr = ptr + MINBPC(enc); 1089 return XML_TOK_COMMA; 1045 return XML_TOK_COMMA; 1090 case BT_LSQB: 1046 case BT_LSQB: 1091 *nextTokPtr = ptr + MINBPC(enc); 1047 *nextTokPtr = ptr + MINBPC(enc); 1092 return XML_TOK_OPEN_BRACKET; 1048 return XML_TOK_OPEN_BRACKET; 1093 case BT_RSQB: 1049 case BT_RSQB: 1094 ptr += MINBPC(enc); 1050 ptr += MINBPC(enc); 1095 if (! HAS_CHAR(enc, ptr, end)) << 1051 if (ptr == end) 1096 return -XML_TOK_CLOSE_BRACKET; 1052 return -XML_TOK_CLOSE_BRACKET; 1097 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1053 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1098 REQUIRE_CHARS(enc, ptr, end, 2); << 1054 if (ptr + MINBPC(enc) == end) >> 1055 return XML_TOK_PARTIAL; 1099 if (CHAR_MATCHES(enc, ptr + MINBPC(enc) 1056 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { 1100 *nextTokPtr = ptr + 2 * MINBPC(enc); << 1057 *nextTokPtr = ptr + 2*MINBPC(enc); 1101 return XML_TOK_COND_SECT_CLOSE; 1058 return XML_TOK_COND_SECT_CLOSE; 1102 } 1059 } 1103 } 1060 } 1104 *nextTokPtr = ptr; 1061 *nextTokPtr = ptr; 1105 return XML_TOK_CLOSE_BRACKET; 1062 return XML_TOK_CLOSE_BRACKET; 1106 case BT_LPAR: 1063 case BT_LPAR: 1107 *nextTokPtr = ptr + MINBPC(enc); 1064 *nextTokPtr = ptr + MINBPC(enc); 1108 return XML_TOK_OPEN_PAREN; 1065 return XML_TOK_OPEN_PAREN; 1109 case BT_RPAR: 1066 case BT_RPAR: 1110 ptr += MINBPC(enc); 1067 ptr += MINBPC(enc); 1111 if (! HAS_CHAR(enc, ptr, end)) << 1068 if (ptr == end) 1112 return -XML_TOK_CLOSE_PAREN; 1069 return -XML_TOK_CLOSE_PAREN; 1113 switch (BYTE_TYPE(enc, ptr)) { 1070 switch (BYTE_TYPE(enc, ptr)) { 1114 case BT_AST: 1071 case BT_AST: 1115 *nextTokPtr = ptr + MINBPC(enc); 1072 *nextTokPtr = ptr + MINBPC(enc); 1116 return XML_TOK_CLOSE_PAREN_ASTERISK; 1073 return XML_TOK_CLOSE_PAREN_ASTERISK; 1117 case BT_QUEST: 1074 case BT_QUEST: 1118 *nextTokPtr = ptr + MINBPC(enc); 1075 *nextTokPtr = ptr + MINBPC(enc); 1119 return XML_TOK_CLOSE_PAREN_QUESTION; 1076 return XML_TOK_CLOSE_PAREN_QUESTION; 1120 case BT_PLUS: 1077 case BT_PLUS: 1121 *nextTokPtr = ptr + MINBPC(enc); 1078 *nextTokPtr = ptr + MINBPC(enc); 1122 return XML_TOK_CLOSE_PAREN_PLUS; 1079 return XML_TOK_CLOSE_PAREN_PLUS; 1123 case BT_CR: << 1080 case BT_CR: case BT_LF: case BT_S: 1124 case BT_LF: << 1081 case BT_GT: case BT_COMMA: case BT_VERBAR: 1125 case BT_S: << 1126 case BT_GT: << 1127 case BT_COMMA: << 1128 case BT_VERBAR: << 1129 case BT_RPAR: 1082 case BT_RPAR: 1130 *nextTokPtr = ptr; 1083 *nextTokPtr = ptr; 1131 return XML_TOK_CLOSE_PAREN; 1084 return XML_TOK_CLOSE_PAREN; 1132 } 1085 } 1133 *nextTokPtr = ptr; 1086 *nextTokPtr = ptr; 1134 return XML_TOK_INVALID; 1087 return XML_TOK_INVALID; 1135 case BT_VERBAR: 1088 case BT_VERBAR: 1136 *nextTokPtr = ptr + MINBPC(enc); 1089 *nextTokPtr = ptr + MINBPC(enc); 1137 return XML_TOK_OR; 1090 return XML_TOK_OR; 1138 case BT_GT: 1091 case BT_GT: 1139 *nextTokPtr = ptr + MINBPC(enc); 1092 *nextTokPtr = ptr + MINBPC(enc); 1140 return XML_TOK_DECL_CLOSE; 1093 return XML_TOK_DECL_CLOSE; 1141 case BT_NUM: 1094 case BT_NUM: 1142 return PREFIX(scanPoundName)(enc, ptr + M 1095 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1143 # define LEAD_CASE(n) << 1096 #define LEAD_CASE(n) \ 1144 case BT_LEAD##n: << 1097 case BT_LEAD ## n: \ 1145 if (end - ptr < n) << 1098 if (end - ptr < n) \ 1146 return XML_TOK_PARTIAL_CHAR; << 1099 return XML_TOK_PARTIAL_CHAR; \ 1147 if (IS_INVALID_CHAR(enc, ptr, n)) { << 1100 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ 1148 *nextTokPtr = ptr; << 1101 ptr += n; \ 1149 return XML_TOK_INVALID; << 1102 tok = XML_TOK_NAME; \ 1150 } << 1103 break; \ 1151 if (IS_NMSTRT_CHAR(enc, ptr, n)) { << 1104 } \ 1152 ptr += n; << 1105 if (IS_NAME_CHAR(enc, ptr, n)) { \ 1153 tok = XML_TOK_NAME; << 1106 ptr += n; \ 1154 break; << 1107 tok = XML_TOK_NMTOKEN; \ 1155 } << 1108 break; \ 1156 if (IS_NAME_CHAR(enc, ptr, n)) { << 1109 } \ 1157 ptr += n; << 1110 *nextTokPtr = ptr; \ 1158 tok = XML_TOK_NMTOKEN; << 1159 break; << 1160 } << 1161 *nextTokPtr = ptr; << 1162 return XML_TOK_INVALID; 1111 return XML_TOK_INVALID; 1163 LEAD_CASE(2) << 1112 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1164 LEAD_CASE(3) << 1113 #undef LEAD_CASE 1165 LEAD_CASE(4) << 1166 # undef LEAD_CASE << 1167 case BT_NMSTRT: 1114 case BT_NMSTRT: 1168 case BT_HEX: 1115 case BT_HEX: 1169 tok = XML_TOK_NAME; 1116 tok = XML_TOK_NAME; 1170 ptr += MINBPC(enc); 1117 ptr += MINBPC(enc); 1171 break; 1118 break; 1172 case BT_DIGIT: 1119 case BT_DIGIT: 1173 case BT_NAME: 1120 case BT_NAME: 1174 case BT_MINUS: 1121 case BT_MINUS: 1175 # ifdef XML_NS << 1122 #ifdef XML_NS 1176 case BT_COLON: 1123 case BT_COLON: 1177 # endif << 1124 #endif 1178 tok = XML_TOK_NMTOKEN; 1125 tok = XML_TOK_NMTOKEN; 1179 ptr += MINBPC(enc); 1126 ptr += MINBPC(enc); 1180 break; 1127 break; 1181 case BT_NONASCII: 1128 case BT_NONASCII: 1182 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { 1129 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { 1183 ptr += MINBPC(enc); 1130 ptr += MINBPC(enc); 1184 tok = XML_TOK_NAME; 1131 tok = XML_TOK_NAME; 1185 break; 1132 break; 1186 } 1133 } 1187 if (IS_NAME_CHAR_MINBPC(enc, ptr)) { 1134 if (IS_NAME_CHAR_MINBPC(enc, ptr)) { 1188 ptr += MINBPC(enc); 1135 ptr += MINBPC(enc); 1189 tok = XML_TOK_NMTOKEN; 1136 tok = XML_TOK_NMTOKEN; 1190 break; 1137 break; 1191 } 1138 } 1192 /* fall through */ 1139 /* fall through */ 1193 default: 1140 default: 1194 *nextTokPtr = ptr; 1141 *nextTokPtr = ptr; 1195 return XML_TOK_INVALID; 1142 return XML_TOK_INVALID; 1196 } 1143 } 1197 while (HAS_CHAR(enc, ptr, end)) { << 1144 while (ptr != end) { 1198 switch (BYTE_TYPE(enc, ptr)) { 1145 switch (BYTE_TYPE(enc, ptr)) { 1199 CHECK_NAME_CASES(enc, ptr, end, nextTok << 1146 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1200 case BT_GT: << 1147 case BT_GT: case BT_RPAR: case BT_COMMA: 1201 case BT_RPAR: << 1148 case BT_VERBAR: case BT_LSQB: case BT_PERCNT: 1202 case BT_COMMA: << 1149 case BT_S: case BT_CR: case BT_LF: 1203 case BT_VERBAR: << 1204 case BT_LSQB: << 1205 case BT_PERCNT: << 1206 case BT_S: << 1207 case BT_CR: << 1208 case BT_LF: << 1209 *nextTokPtr = ptr; 1150 *nextTokPtr = ptr; 1210 return tok; 1151 return tok; 1211 # ifdef XML_NS << 1152 #ifdef XML_NS 1212 case BT_COLON: 1153 case BT_COLON: 1213 ptr += MINBPC(enc); 1154 ptr += MINBPC(enc); 1214 switch (tok) { 1155 switch (tok) { 1215 case XML_TOK_NAME: 1156 case XML_TOK_NAME: 1216 REQUIRE_CHAR(enc, ptr, end); << 1157 if (ptr == end) >> 1158 return XML_TOK_PARTIAL; 1217 tok = XML_TOK_PREFIXED_NAME; 1159 tok = XML_TOK_PREFIXED_NAME; 1218 switch (BYTE_TYPE(enc, ptr)) { 1160 switch (BYTE_TYPE(enc, ptr)) { 1219 CHECK_NAME_CASES(enc, ptr, end, nex << 1161 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1220 default: 1162 default: 1221 tok = XML_TOK_NMTOKEN; 1163 tok = XML_TOK_NMTOKEN; 1222 break; 1164 break; 1223 } 1165 } 1224 break; 1166 break; 1225 case XML_TOK_PREFIXED_NAME: 1167 case XML_TOK_PREFIXED_NAME: 1226 tok = XML_TOK_NMTOKEN; 1168 tok = XML_TOK_NMTOKEN; 1227 break; 1169 break; 1228 } 1170 } 1229 break; 1171 break; 1230 # endif << 1172 #endif 1231 case BT_PLUS: 1173 case BT_PLUS: 1232 if (tok == XML_TOK_NMTOKEN) { << 1174 if (tok == XML_TOK_NMTOKEN) { 1233 *nextTokPtr = ptr; 1175 *nextTokPtr = ptr; 1234 return XML_TOK_INVALID; 1176 return XML_TOK_INVALID; 1235 } 1177 } 1236 *nextTokPtr = ptr + MINBPC(enc); 1178 *nextTokPtr = ptr + MINBPC(enc); 1237 return XML_TOK_NAME_PLUS; 1179 return XML_TOK_NAME_PLUS; 1238 case BT_AST: 1180 case BT_AST: 1239 if (tok == XML_TOK_NMTOKEN) { << 1181 if (tok == XML_TOK_NMTOKEN) { 1240 *nextTokPtr = ptr; 1182 *nextTokPtr = ptr; 1241 return XML_TOK_INVALID; 1183 return XML_TOK_INVALID; 1242 } 1184 } 1243 *nextTokPtr = ptr + MINBPC(enc); 1185 *nextTokPtr = ptr + MINBPC(enc); 1244 return XML_TOK_NAME_ASTERISK; 1186 return XML_TOK_NAME_ASTERISK; 1245 case BT_QUEST: 1187 case BT_QUEST: 1246 if (tok == XML_TOK_NMTOKEN) { << 1188 if (tok == XML_TOK_NMTOKEN) { 1247 *nextTokPtr = ptr; 1189 *nextTokPtr = ptr; 1248 return XML_TOK_INVALID; 1190 return XML_TOK_INVALID; 1249 } 1191 } 1250 *nextTokPtr = ptr + MINBPC(enc); 1192 *nextTokPtr = ptr + MINBPC(enc); 1251 return XML_TOK_NAME_QUESTION; 1193 return XML_TOK_NAME_QUESTION; 1252 default: 1194 default: 1253 *nextTokPtr = ptr; 1195 *nextTokPtr = ptr; 1254 return XML_TOK_INVALID; 1196 return XML_TOK_INVALID; 1255 } 1197 } 1256 } 1198 } 1257 return -tok; 1199 return -tok; 1258 } 1200 } 1259 1201 1260 static int PTRCALL 1202 static int PTRCALL 1261 PREFIX(attributeValueTok)(const ENCODING *enc << 1203 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, 1262 const char **nextTo << 1204 const char *end, const char **nextTokPtr) >> 1205 { 1263 const char *start; 1206 const char *start; 1264 if (ptr >= end) << 1207 if (ptr == end) 1265 return XML_TOK_NONE; 1208 return XML_TOK_NONE; 1266 else if (! HAS_CHAR(enc, ptr, end)) { << 1267 /* This line cannot be executed. The inc << 1268 * been tokenized once, so incomplete cha << 1269 * already been eliminated from the input << 1270 * check is still valuable, however. << 1271 */ << 1272 return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE << 1273 } << 1274 start = ptr; 1209 start = ptr; 1275 while (HAS_CHAR(enc, ptr, end)) { << 1210 while (ptr != end) { 1276 switch (BYTE_TYPE(enc, ptr)) { 1211 switch (BYTE_TYPE(enc, ptr)) { 1277 # define LEAD_CASE(n) << 1212 #define LEAD_CASE(n) \ 1278 case BT_LEAD##n: << 1213 case BT_LEAD ## n: ptr += n; break; 1279 ptr += n; /* NOTE: The encoding has alrea << 1214 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1280 break; << 1215 #undef LEAD_CASE 1281 LEAD_CASE(2) << 1282 LEAD_CASE(3) << 1283 LEAD_CASE(4) << 1284 # undef LEAD_CASE << 1285 case BT_AMP: 1216 case BT_AMP: 1286 if (ptr == start) 1217 if (ptr == start) 1287 return PREFIX(scanRef)(enc, ptr + MIN 1218 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1288 *nextTokPtr = ptr; 1219 *nextTokPtr = ptr; 1289 return XML_TOK_DATA_CHARS; 1220 return XML_TOK_DATA_CHARS; 1290 case BT_LT: 1221 case BT_LT: 1291 /* this is for inside entity references 1222 /* this is for inside entity references */ 1292 *nextTokPtr = ptr; 1223 *nextTokPtr = ptr; 1293 return XML_TOK_INVALID; 1224 return XML_TOK_INVALID; 1294 case BT_LF: 1225 case BT_LF: 1295 if (ptr == start) { 1226 if (ptr == start) { 1296 *nextTokPtr = ptr + MINBPC(enc); 1227 *nextTokPtr = ptr + MINBPC(enc); 1297 return XML_TOK_DATA_NEWLINE; 1228 return XML_TOK_DATA_NEWLINE; 1298 } 1229 } 1299 *nextTokPtr = ptr; 1230 *nextTokPtr = ptr; 1300 return XML_TOK_DATA_CHARS; 1231 return XML_TOK_DATA_CHARS; 1301 case BT_CR: 1232 case BT_CR: 1302 if (ptr == start) { 1233 if (ptr == start) { 1303 ptr += MINBPC(enc); 1234 ptr += MINBPC(enc); 1304 if (! HAS_CHAR(enc, ptr, end)) << 1235 if (ptr == end) 1305 return XML_TOK_TRAILING_CR; 1236 return XML_TOK_TRAILING_CR; 1306 if (BYTE_TYPE(enc, ptr) == BT_LF) 1237 if (BYTE_TYPE(enc, ptr) == BT_LF) 1307 ptr += MINBPC(enc); 1238 ptr += MINBPC(enc); 1308 *nextTokPtr = ptr; 1239 *nextTokPtr = ptr; 1309 return XML_TOK_DATA_NEWLINE; 1240 return XML_TOK_DATA_NEWLINE; 1310 } 1241 } 1311 *nextTokPtr = ptr; 1242 *nextTokPtr = ptr; 1312 return XML_TOK_DATA_CHARS; 1243 return XML_TOK_DATA_CHARS; 1313 case BT_S: 1244 case BT_S: 1314 if (ptr == start) { 1245 if (ptr == start) { 1315 *nextTokPtr = ptr + MINBPC(enc); 1246 *nextTokPtr = ptr + MINBPC(enc); 1316 return XML_TOK_ATTRIBUTE_VALUE_S; 1247 return XML_TOK_ATTRIBUTE_VALUE_S; 1317 } 1248 } 1318 *nextTokPtr = ptr; 1249 *nextTokPtr = ptr; 1319 return XML_TOK_DATA_CHARS; 1250 return XML_TOK_DATA_CHARS; 1320 default: 1251 default: 1321 ptr += MINBPC(enc); 1252 ptr += MINBPC(enc); 1322 break; 1253 break; 1323 } 1254 } 1324 } 1255 } 1325 *nextTokPtr = ptr; 1256 *nextTokPtr = ptr; 1326 return XML_TOK_DATA_CHARS; 1257 return XML_TOK_DATA_CHARS; 1327 } 1258 } 1328 1259 1329 static int PTRCALL 1260 static int PTRCALL 1330 PREFIX(entityValueTok)(const ENCODING *enc, c << 1261 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, 1331 const char **nextTokPt << 1262 const char *end, const char **nextTokPtr) >> 1263 { 1332 const char *start; 1264 const char *start; 1333 if (ptr >= end) << 1265 if (ptr == end) 1334 return XML_TOK_NONE; 1266 return XML_TOK_NONE; 1335 else if (! HAS_CHAR(enc, ptr, end)) { << 1336 /* This line cannot be executed. The inc << 1337 * been tokenized once, so incomplete cha << 1338 * already been eliminated from the input << 1339 * check is still valuable, however. << 1340 */ << 1341 return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE << 1342 } << 1343 start = ptr; 1267 start = ptr; 1344 while (HAS_CHAR(enc, ptr, end)) { << 1268 while (ptr != end) { 1345 switch (BYTE_TYPE(enc, ptr)) { 1269 switch (BYTE_TYPE(enc, ptr)) { 1346 # define LEAD_CASE(n) << 1270 #define LEAD_CASE(n) \ 1347 case BT_LEAD##n: << 1271 case BT_LEAD ## n: ptr += n; break; 1348 ptr += n; /* NOTE: The encoding has alrea << 1272 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1349 break; << 1273 #undef LEAD_CASE 1350 LEAD_CASE(2) << 1351 LEAD_CASE(3) << 1352 LEAD_CASE(4) << 1353 # undef LEAD_CASE << 1354 case BT_AMP: 1274 case BT_AMP: 1355 if (ptr == start) 1275 if (ptr == start) 1356 return PREFIX(scanRef)(enc, ptr + MIN 1276 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1357 *nextTokPtr = ptr; 1277 *nextTokPtr = ptr; 1358 return XML_TOK_DATA_CHARS; 1278 return XML_TOK_DATA_CHARS; 1359 case BT_PERCNT: 1279 case BT_PERCNT: 1360 if (ptr == start) { 1280 if (ptr == start) { 1361 int tok = PREFIX(scanPercent)(enc, pt << 1281 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), >> 1282 end, nextTokPtr); 1362 return (tok == XML_TOK_PERCENT) ? XML 1283 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; 1363 } 1284 } 1364 *nextTokPtr = ptr; 1285 *nextTokPtr = ptr; 1365 return XML_TOK_DATA_CHARS; 1286 return XML_TOK_DATA_CHARS; 1366 case BT_LF: 1287 case BT_LF: 1367 if (ptr == start) { 1288 if (ptr == start) { 1368 *nextTokPtr = ptr + MINBPC(enc); 1289 *nextTokPtr = ptr + MINBPC(enc); 1369 return XML_TOK_DATA_NEWLINE; 1290 return XML_TOK_DATA_NEWLINE; 1370 } 1291 } 1371 *nextTokPtr = ptr; 1292 *nextTokPtr = ptr; 1372 return XML_TOK_DATA_CHARS; 1293 return XML_TOK_DATA_CHARS; 1373 case BT_CR: 1294 case BT_CR: 1374 if (ptr == start) { 1295 if (ptr == start) { 1375 ptr += MINBPC(enc); 1296 ptr += MINBPC(enc); 1376 if (! HAS_CHAR(enc, ptr, end)) << 1297 if (ptr == end) 1377 return XML_TOK_TRAILING_CR; 1298 return XML_TOK_TRAILING_CR; 1378 if (BYTE_TYPE(enc, ptr) == BT_LF) 1299 if (BYTE_TYPE(enc, ptr) == BT_LF) 1379 ptr += MINBPC(enc); 1300 ptr += MINBPC(enc); 1380 *nextTokPtr = ptr; 1301 *nextTokPtr = ptr; 1381 return XML_TOK_DATA_NEWLINE; 1302 return XML_TOK_DATA_NEWLINE; 1382 } 1303 } 1383 *nextTokPtr = ptr; 1304 *nextTokPtr = ptr; 1384 return XML_TOK_DATA_CHARS; 1305 return XML_TOK_DATA_CHARS; 1385 default: 1306 default: 1386 ptr += MINBPC(enc); 1307 ptr += MINBPC(enc); 1387 break; 1308 break; 1388 } 1309 } 1389 } 1310 } 1390 *nextTokPtr = ptr; 1311 *nextTokPtr = ptr; 1391 return XML_TOK_DATA_CHARS; 1312 return XML_TOK_DATA_CHARS; 1392 } 1313 } 1393 1314 1394 # ifdef XML_DTD << 1315 #ifdef XML_DTD 1395 1316 1396 static int PTRCALL 1317 static int PTRCALL 1397 PREFIX(ignoreSectionTok)(const ENCODING *enc, << 1318 PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, 1398 const char **nextTok << 1319 const char *end, const char **nextTokPtr) >> 1320 { 1399 int level = 0; 1321 int level = 0; 1400 if (MINBPC(enc) > 1) { 1322 if (MINBPC(enc) > 1) { 1401 size_t n = end - ptr; 1323 size_t n = end - ptr; 1402 if (n & (MINBPC(enc) - 1)) { 1324 if (n & (MINBPC(enc) - 1)) { 1403 n &= ~(MINBPC(enc) - 1); 1325 n &= ~(MINBPC(enc) - 1); 1404 end = ptr + n; 1326 end = ptr + n; 1405 } 1327 } 1406 } 1328 } 1407 while (HAS_CHAR(enc, ptr, end)) { << 1329 while (ptr != end) { 1408 switch (BYTE_TYPE(enc, ptr)) { 1330 switch (BYTE_TYPE(enc, ptr)) { 1409 INVALID_CASES(ptr, nextTokPtr) << 1331 INVALID_CASES(ptr, nextTokPtr) 1410 case BT_LT: 1332 case BT_LT: 1411 ptr += MINBPC(enc); << 1333 if ((ptr += MINBPC(enc)) == end) 1412 REQUIRE_CHAR(enc, ptr, end); << 1334 return XML_TOK_PARTIAL; 1413 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) 1335 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { 1414 ptr += MINBPC(enc); << 1336 if ((ptr += MINBPC(enc)) == end) 1415 REQUIRE_CHAR(enc, ptr, end); << 1337 return XML_TOK_PARTIAL; 1416 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB 1338 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { 1417 ++level; 1339 ++level; 1418 ptr += MINBPC(enc); 1340 ptr += MINBPC(enc); 1419 } 1341 } 1420 } 1342 } 1421 break; 1343 break; 1422 case BT_RSQB: 1344 case BT_RSQB: 1423 ptr += MINBPC(enc); << 1345 if ((ptr += MINBPC(enc)) == end) 1424 REQUIRE_CHAR(enc, ptr, end); << 1346 return XML_TOK_PARTIAL; 1425 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 1347 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1426 ptr += MINBPC(enc); << 1348 if ((ptr += MINBPC(enc)) == end) 1427 REQUIRE_CHAR(enc, ptr, end); << 1349 return XML_TOK_PARTIAL; 1428 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) 1350 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1429 ptr += MINBPC(enc); 1351 ptr += MINBPC(enc); 1430 if (level == 0) { 1352 if (level == 0) { 1431 *nextTokPtr = ptr; 1353 *nextTokPtr = ptr; 1432 return XML_TOK_IGNORE_SECT; 1354 return XML_TOK_IGNORE_SECT; 1433 } 1355 } 1434 --level; 1356 --level; 1435 } 1357 } 1436 } 1358 } 1437 break; 1359 break; 1438 default: 1360 default: 1439 ptr += MINBPC(enc); 1361 ptr += MINBPC(enc); 1440 break; 1362 break; 1441 } 1363 } 1442 } 1364 } 1443 return XML_TOK_PARTIAL; 1365 return XML_TOK_PARTIAL; 1444 } 1366 } 1445 1367 1446 # endif /* XML_DTD */ << 1368 #endif /* XML_DTD */ 1447 1369 1448 static int PTRCALL 1370 static int PTRCALL 1449 PREFIX(isPublicId)(const ENCODING *enc, const 1371 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, 1450 const char **badPtr) { << 1372 const char **badPtr) >> 1373 { 1451 ptr += MINBPC(enc); 1374 ptr += MINBPC(enc); 1452 end -= MINBPC(enc); 1375 end -= MINBPC(enc); 1453 for (; HAS_CHAR(enc, ptr, end); ptr += MINB << 1376 for (; ptr != end; ptr += MINBPC(enc)) { 1454 switch (BYTE_TYPE(enc, ptr)) { 1377 switch (BYTE_TYPE(enc, ptr)) { 1455 case BT_DIGIT: 1378 case BT_DIGIT: 1456 case BT_HEX: 1379 case BT_HEX: 1457 case BT_MINUS: 1380 case BT_MINUS: 1458 case BT_APOS: 1381 case BT_APOS: 1459 case BT_LPAR: 1382 case BT_LPAR: 1460 case BT_RPAR: 1383 case BT_RPAR: 1461 case BT_PLUS: 1384 case BT_PLUS: 1462 case BT_COMMA: 1385 case BT_COMMA: 1463 case BT_SOL: 1386 case BT_SOL: 1464 case BT_EQUALS: 1387 case BT_EQUALS: 1465 case BT_QUEST: 1388 case BT_QUEST: 1466 case BT_CR: 1389 case BT_CR: 1467 case BT_LF: 1390 case BT_LF: 1468 case BT_SEMI: 1391 case BT_SEMI: 1469 case BT_EXCL: 1392 case BT_EXCL: 1470 case BT_AST: 1393 case BT_AST: 1471 case BT_PERCNT: 1394 case BT_PERCNT: 1472 case BT_NUM: 1395 case BT_NUM: 1473 # ifdef XML_NS << 1396 #ifdef XML_NS 1474 case BT_COLON: 1397 case BT_COLON: 1475 # endif << 1398 #endif 1476 break; 1399 break; 1477 case BT_S: 1400 case BT_S: 1478 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) 1401 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { 1479 *badPtr = ptr; 1402 *badPtr = ptr; 1480 return 0; 1403 return 0; 1481 } 1404 } 1482 break; 1405 break; 1483 case BT_NAME: 1406 case BT_NAME: 1484 case BT_NMSTRT: 1407 case BT_NMSTRT: 1485 if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f) << 1408 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) 1486 break; 1409 break; 1487 /* fall through */ << 1488 default: 1410 default: 1489 switch (BYTE_TO_ASCII(enc, ptr)) { 1411 switch (BYTE_TO_ASCII(enc, ptr)) { 1490 case 0x24: /* $ */ 1412 case 0x24: /* $ */ 1491 case 0x40: /* @ */ 1413 case 0x40: /* @ */ 1492 break; 1414 break; 1493 default: 1415 default: 1494 *badPtr = ptr; 1416 *badPtr = ptr; 1495 return 0; 1417 return 0; 1496 } 1418 } 1497 break; 1419 break; 1498 } 1420 } 1499 } 1421 } 1500 return 1; 1422 return 1; 1501 } 1423 } 1502 1424 1503 /* This must only be called for a well-formed 1425 /* This must only be called for a well-formed start-tag or empty 1504 element tag. Returns the number of attrib 1426 element tag. Returns the number of attributes. Pointers to the 1505 first attsMax attributes are stored in att 1427 first attsMax attributes are stored in atts. 1506 */ 1428 */ 1507 1429 1508 static int PTRCALL 1430 static int PTRCALL 1509 PREFIX(getAtts)(const ENCODING *enc, const ch << 1431 PREFIX(getAtts)(const ENCODING *enc, const char *ptr, 1510 ATTRIBUTE *atts) { << 1432 int attsMax, ATTRIBUTE *atts) >> 1433 { 1511 enum { other, inName, inValue } state = inN 1434 enum { other, inName, inValue } state = inName; 1512 int nAtts = 0; 1435 int nAtts = 0; 1513 int open = 0; /* defined when state == inVa 1436 int open = 0; /* defined when state == inValue; 1514 initialization just to shu 1437 initialization just to shut up compilers */ 1515 1438 1516 for (ptr += MINBPC(enc);; ptr += MINBPC(enc 1439 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { 1517 switch (BYTE_TYPE(enc, ptr)) { 1440 switch (BYTE_TYPE(enc, ptr)) { 1518 # define START_NAME << 1441 #define START_NAME \ 1519 if (state == other) { << 1442 if (state == other) { \ 1520 if (nAtts < attsMax) { << 1443 if (nAtts < attsMax) { \ 1521 atts[nAtts].name = ptr; << 1444 atts[nAtts].name = ptr; \ 1522 atts[nAtts].normalized = 1; << 1445 atts[nAtts].normalized = 1; \ 1523 } << 1446 } \ 1524 state = inName; << 1447 state = inName; \ 1525 } << 1448 } 1526 # define LEAD_CASE(n) << 1449 #define LEAD_CASE(n) \ 1527 case BT_LEAD##n: /* NOTE: The encoding has << 1450 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; 1528 START_NAME ptr += (n - MINBPC(enc)); << 1451 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1529 break; << 1452 #undef LEAD_CASE 1530 LEAD_CASE(2) << 1531 LEAD_CASE(3) << 1532 LEAD_CASE(4) << 1533 # undef LEAD_CASE << 1534 case BT_NONASCII: 1453 case BT_NONASCII: 1535 case BT_NMSTRT: 1454 case BT_NMSTRT: 1536 case BT_HEX: 1455 case BT_HEX: 1537 START_NAME 1456 START_NAME 1538 break; 1457 break; 1539 # undef START_NAME << 1458 #undef START_NAME 1540 case BT_QUOT: 1459 case BT_QUOT: 1541 if (state != inValue) { 1460 if (state != inValue) { 1542 if (nAtts < attsMax) 1461 if (nAtts < attsMax) 1543 atts[nAtts].valuePtr = ptr + MINBPC 1462 atts[nAtts].valuePtr = ptr + MINBPC(enc); 1544 state = inValue; 1463 state = inValue; 1545 open = BT_QUOT; 1464 open = BT_QUOT; 1546 } else if (open == BT_QUOT) { << 1465 } >> 1466 else if (open == BT_QUOT) { 1547 state = other; 1467 state = other; 1548 if (nAtts < attsMax) 1468 if (nAtts < attsMax) 1549 atts[nAtts].valueEnd = ptr; 1469 atts[nAtts].valueEnd = ptr; 1550 nAtts++; 1470 nAtts++; 1551 } 1471 } 1552 break; 1472 break; 1553 case BT_APOS: 1473 case BT_APOS: 1554 if (state != inValue) { 1474 if (state != inValue) { 1555 if (nAtts < attsMax) 1475 if (nAtts < attsMax) 1556 atts[nAtts].valuePtr = ptr + MINBPC 1476 atts[nAtts].valuePtr = ptr + MINBPC(enc); 1557 state = inValue; 1477 state = inValue; 1558 open = BT_APOS; 1478 open = BT_APOS; 1559 } else if (open == BT_APOS) { << 1479 } >> 1480 else if (open == BT_APOS) { 1560 state = other; 1481 state = other; 1561 if (nAtts < attsMax) 1482 if (nAtts < attsMax) 1562 atts[nAtts].valueEnd = ptr; 1483 atts[nAtts].valueEnd = ptr; 1563 nAtts++; 1484 nAtts++; 1564 } 1485 } 1565 break; 1486 break; 1566 case BT_AMP: 1487 case BT_AMP: 1567 if (nAtts < attsMax) 1488 if (nAtts < attsMax) 1568 atts[nAtts].normalized = 0; 1489 atts[nAtts].normalized = 0; 1569 break; 1490 break; 1570 case BT_S: 1491 case BT_S: 1571 if (state == inName) 1492 if (state == inName) 1572 state = other; 1493 state = other; 1573 else if (state == inValue && nAtts < at << 1494 else if (state == inValue >> 1495 && nAtts < attsMax >> 1496 && atts[nAtts].normalized 1574 && (ptr == atts[nAtts].valuePt 1497 && (ptr == atts[nAtts].valuePtr 1575 || BYTE_TO_ASCII(enc, ptr) 1498 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE 1576 || BYTE_TO_ASCII(enc, ptr 1499 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE 1577 || BYTE_TYPE(enc, ptr + MI 1500 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) 1578 atts[nAtts].normalized = 0; 1501 atts[nAtts].normalized = 0; 1579 break; 1502 break; 1580 case BT_CR: << 1503 case BT_CR: case BT_LF: 1581 case BT_LF: << 1582 /* This case ensures that the first att 1504 /* This case ensures that the first attribute name is counted 1583 Apart from that we could just change 1505 Apart from that we could just change state on the quote. */ 1584 if (state == inName) 1506 if (state == inName) 1585 state = other; 1507 state = other; 1586 else if (state == inValue && nAtts < at 1508 else if (state == inValue && nAtts < attsMax) 1587 atts[nAtts].normalized = 0; 1509 atts[nAtts].normalized = 0; 1588 break; 1510 break; 1589 case BT_GT: 1511 case BT_GT: 1590 case BT_SOL: 1512 case BT_SOL: 1591 if (state != inValue) 1513 if (state != inValue) 1592 return nAtts; 1514 return nAtts; 1593 break; 1515 break; 1594 default: 1516 default: 1595 break; 1517 break; 1596 } 1518 } 1597 } 1519 } 1598 /* not reached */ 1520 /* not reached */ 1599 } 1521 } 1600 1522 1601 static int PTRFASTCALL 1523 static int PTRFASTCALL 1602 PREFIX(charRefNumber)(const ENCODING *enc, co << 1524 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) >> 1525 { 1603 int result = 0; 1526 int result = 0; 1604 /* skip &# */ 1527 /* skip &# */ 1605 UNUSED_P(enc); << 1528 ptr += 2*MINBPC(enc); 1606 ptr += 2 * MINBPC(enc); << 1607 if (CHAR_MATCHES(enc, ptr, ASCII_x)) { 1529 if (CHAR_MATCHES(enc, ptr, ASCII_x)) { 1608 for (ptr += MINBPC(enc); ! CHAR_MATCHES(e << 1530 for (ptr += MINBPC(enc); >> 1531 !CHAR_MATCHES(enc, ptr, ASCII_SEMI); 1609 ptr += MINBPC(enc)) { 1532 ptr += MINBPC(enc)) { 1610 int c = BYTE_TO_ASCII(enc, ptr); 1533 int c = BYTE_TO_ASCII(enc, ptr); 1611 switch (c) { 1534 switch (c) { 1612 case ASCII_0: << 1535 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: 1613 case ASCII_1: << 1536 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: 1614 case ASCII_2: << 1615 case ASCII_3: << 1616 case ASCII_4: << 1617 case ASCII_5: << 1618 case ASCII_6: << 1619 case ASCII_7: << 1620 case ASCII_8: << 1621 case ASCII_9: << 1622 result <<= 4; 1537 result <<= 4; 1623 result |= (c - ASCII_0); 1538 result |= (c - ASCII_0); 1624 break; 1539 break; 1625 case ASCII_A: << 1540 case ASCII_A: case ASCII_B: case ASCII_C: 1626 case ASCII_B: << 1541 case ASCII_D: case ASCII_E: case ASCII_F: 1627 case ASCII_C: << 1628 case ASCII_D: << 1629 case ASCII_E: << 1630 case ASCII_F: << 1631 result <<= 4; 1542 result <<= 4; 1632 result += 10 + (c - ASCII_A); 1543 result += 10 + (c - ASCII_A); 1633 break; 1544 break; 1634 case ASCII_a: << 1545 case ASCII_a: case ASCII_b: case ASCII_c: 1635 case ASCII_b: << 1546 case ASCII_d: case ASCII_e: case ASCII_f: 1636 case ASCII_c: << 1637 case ASCII_d: << 1638 case ASCII_e: << 1639 case ASCII_f: << 1640 result <<= 4; 1547 result <<= 4; 1641 result += 10 + (c - ASCII_a); 1548 result += 10 + (c - ASCII_a); 1642 break; 1549 break; 1643 } 1550 } 1644 if (result >= 0x110000) 1551 if (result >= 0x110000) 1645 return -1; 1552 return -1; 1646 } 1553 } 1647 } else { << 1554 } 1648 for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEM << 1555 else { >> 1556 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { 1649 int c = BYTE_TO_ASCII(enc, ptr); 1557 int c = BYTE_TO_ASCII(enc, ptr); 1650 result *= 10; 1558 result *= 10; 1651 result += (c - ASCII_0); 1559 result += (c - ASCII_0); 1652 if (result >= 0x110000) 1560 if (result >= 0x110000) 1653 return -1; 1561 return -1; 1654 } 1562 } 1655 } 1563 } 1656 return checkCharRefNumber(result); 1564 return checkCharRefNumber(result); 1657 } 1565 } 1658 1566 1659 static int PTRCALL 1567 static int PTRCALL 1660 PREFIX(predefinedEntityName)(const ENCODING * 1568 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, 1661 const char *end) << 1569 const char *end) 1662 UNUSED_P(enc); << 1570 { 1663 switch ((end - ptr) / MINBPC(enc)) { << 1571 switch ((end - ptr)/MINBPC(enc)) { 1664 case 2: 1572 case 2: 1665 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), 1573 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { 1666 switch (BYTE_TO_ASCII(enc, ptr)) { 1574 switch (BYTE_TO_ASCII(enc, ptr)) { 1667 case ASCII_l: 1575 case ASCII_l: 1668 return ASCII_LT; 1576 return ASCII_LT; 1669 case ASCII_g: 1577 case ASCII_g: 1670 return ASCII_GT; 1578 return ASCII_GT; 1671 } 1579 } 1672 } 1580 } 1673 break; 1581 break; 1674 case 3: 1582 case 3: 1675 if (CHAR_MATCHES(enc, ptr, ASCII_a)) { 1583 if (CHAR_MATCHES(enc, ptr, ASCII_a)) { 1676 ptr += MINBPC(enc); 1584 ptr += MINBPC(enc); 1677 if (CHAR_MATCHES(enc, ptr, ASCII_m)) { 1585 if (CHAR_MATCHES(enc, ptr, ASCII_m)) { 1678 ptr += MINBPC(enc); 1586 ptr += MINBPC(enc); 1679 if (CHAR_MATCHES(enc, ptr, ASCII_p)) 1587 if (CHAR_MATCHES(enc, ptr, ASCII_p)) 1680 return ASCII_AMP; 1588 return ASCII_AMP; 1681 } 1589 } 1682 } 1590 } 1683 break; 1591 break; 1684 case 4: 1592 case 4: 1685 switch (BYTE_TO_ASCII(enc, ptr)) { 1593 switch (BYTE_TO_ASCII(enc, ptr)) { 1686 case ASCII_q: 1594 case ASCII_q: 1687 ptr += MINBPC(enc); 1595 ptr += MINBPC(enc); 1688 if (CHAR_MATCHES(enc, ptr, ASCII_u)) { 1596 if (CHAR_MATCHES(enc, ptr, ASCII_u)) { 1689 ptr += MINBPC(enc); 1597 ptr += MINBPC(enc); 1690 if (CHAR_MATCHES(enc, ptr, ASCII_o)) 1598 if (CHAR_MATCHES(enc, ptr, ASCII_o)) { 1691 ptr += MINBPC(enc); 1599 ptr += MINBPC(enc); 1692 if (CHAR_MATCHES(enc, ptr, ASCII_t) 1600 if (CHAR_MATCHES(enc, ptr, ASCII_t)) 1693 return ASCII_QUOT; 1601 return ASCII_QUOT; 1694 } 1602 } 1695 } 1603 } 1696 break; 1604 break; 1697 case ASCII_a: 1605 case ASCII_a: 1698 ptr += MINBPC(enc); 1606 ptr += MINBPC(enc); 1699 if (CHAR_MATCHES(enc, ptr, ASCII_p)) { 1607 if (CHAR_MATCHES(enc, ptr, ASCII_p)) { 1700 ptr += MINBPC(enc); 1608 ptr += MINBPC(enc); 1701 if (CHAR_MATCHES(enc, ptr, ASCII_o)) 1609 if (CHAR_MATCHES(enc, ptr, ASCII_o)) { 1702 ptr += MINBPC(enc); 1610 ptr += MINBPC(enc); 1703 if (CHAR_MATCHES(enc, ptr, ASCII_s) 1611 if (CHAR_MATCHES(enc, ptr, ASCII_s)) 1704 return ASCII_APOS; 1612 return ASCII_APOS; 1705 } 1613 } 1706 } 1614 } 1707 break; 1615 break; 1708 } 1616 } 1709 } 1617 } 1710 return 0; 1618 return 0; 1711 } 1619 } 1712 1620 1713 static int PTRCALL 1621 static int PTRCALL >> 1622 PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) >> 1623 { >> 1624 for (;;) { >> 1625 switch (BYTE_TYPE(enc, ptr1)) { >> 1626 #define LEAD_CASE(n) \ >> 1627 case BT_LEAD ## n: \ >> 1628 if (*ptr1++ != *ptr2++) \ >> 1629 return 0; >> 1630 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) >> 1631 #undef LEAD_CASE >> 1632 /* fall through */ >> 1633 if (*ptr1++ != *ptr2++) >> 1634 return 0; >> 1635 break; >> 1636 case BT_NONASCII: >> 1637 case BT_NMSTRT: >> 1638 #ifdef XML_NS >> 1639 case BT_COLON: >> 1640 #endif >> 1641 case BT_HEX: >> 1642 case BT_DIGIT: >> 1643 case BT_NAME: >> 1644 case BT_MINUS: >> 1645 if (*ptr2++ != *ptr1++) >> 1646 return 0; >> 1647 if (MINBPC(enc) > 1) { >> 1648 if (*ptr2++ != *ptr1++) >> 1649 return 0; >> 1650 if (MINBPC(enc) > 2) { >> 1651 if (*ptr2++ != *ptr1++) >> 1652 return 0; >> 1653 if (MINBPC(enc) > 3) { >> 1654 if (*ptr2++ != *ptr1++) >> 1655 return 0; >> 1656 } >> 1657 } >> 1658 } >> 1659 break; >> 1660 default: >> 1661 if (MINBPC(enc) == 1 && *ptr1 == *ptr2) >> 1662 return 1; >> 1663 switch (BYTE_TYPE(enc, ptr2)) { >> 1664 case BT_LEAD2: >> 1665 case BT_LEAD3: >> 1666 case BT_LEAD4: >> 1667 case BT_NONASCII: >> 1668 case BT_NMSTRT: >> 1669 #ifdef XML_NS >> 1670 case BT_COLON: >> 1671 #endif >> 1672 case BT_HEX: >> 1673 case BT_DIGIT: >> 1674 case BT_NAME: >> 1675 case BT_MINUS: >> 1676 return 0; >> 1677 default: >> 1678 return 1; >> 1679 } >> 1680 } >> 1681 } >> 1682 /* not reached */ >> 1683 } >> 1684 >> 1685 static int PTRCALL 1714 PREFIX(nameMatchesAscii)(const ENCODING *enc, 1686 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, 1715 const char *end1, co << 1687 const char *end1, const char *ptr2) 1716 UNUSED_P(enc); << 1688 { 1717 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) 1689 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { 1718 if (end1 - ptr1 < MINBPC(enc)) { << 1690 if (ptr1 == end1) 1719 /* This line cannot be executed. The i << 1691 return 0; 1720 * been tokenized once, so incomplete c << 1692 if (!CHAR_MATCHES(enc, ptr1, *ptr2)) 1721 * already been eliminated from the inp << 1722 * paranoia check is still valuable, ho << 1723 */ << 1724 return 0; /* LCOV_EXCL_LINE */ << 1725 } << 1726 if (! CHAR_MATCHES(enc, ptr1, *ptr2)) << 1727 return 0; 1693 return 0; 1728 } 1694 } 1729 return ptr1 == end1; 1695 return ptr1 == end1; 1730 } 1696 } 1731 1697 1732 static int PTRFASTCALL 1698 static int PTRFASTCALL 1733 PREFIX(nameLength)(const ENCODING *enc, const << 1699 PREFIX(nameLength)(const ENCODING *enc, const char *ptr) >> 1700 { 1734 const char *start = ptr; 1701 const char *start = ptr; 1735 for (;;) { 1702 for (;;) { 1736 switch (BYTE_TYPE(enc, ptr)) { 1703 switch (BYTE_TYPE(enc, ptr)) { 1737 # define LEAD_CASE(n) << 1704 #define LEAD_CASE(n) \ 1738 case BT_LEAD##n: << 1705 case BT_LEAD ## n: ptr += n; break; 1739 ptr += n; /* NOTE: The encoding has alrea << 1706 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1740 break; << 1707 #undef LEAD_CASE 1741 LEAD_CASE(2) << 1742 LEAD_CASE(3) << 1743 LEAD_CASE(4) << 1744 # undef LEAD_CASE << 1745 case BT_NONASCII: 1708 case BT_NONASCII: 1746 case BT_NMSTRT: 1709 case BT_NMSTRT: 1747 # ifdef XML_NS << 1710 #ifdef XML_NS 1748 case BT_COLON: 1711 case BT_COLON: 1749 # endif << 1712 #endif 1750 case BT_HEX: 1713 case BT_HEX: 1751 case BT_DIGIT: 1714 case BT_DIGIT: 1752 case BT_NAME: 1715 case BT_NAME: 1753 case BT_MINUS: 1716 case BT_MINUS: 1754 ptr += MINBPC(enc); 1717 ptr += MINBPC(enc); 1755 break; 1718 break; 1756 default: 1719 default: 1757 return (int)(ptr - start); 1720 return (int)(ptr - start); 1758 } 1721 } 1759 } 1722 } 1760 } 1723 } 1761 1724 1762 static const char *PTRFASTCALL << 1725 static const char * PTRFASTCALL 1763 PREFIX(skipS)(const ENCODING *enc, const char << 1726 PREFIX(skipS)(const ENCODING *enc, const char *ptr) >> 1727 { 1764 for (;;) { 1728 for (;;) { 1765 switch (BYTE_TYPE(enc, ptr)) { 1729 switch (BYTE_TYPE(enc, ptr)) { 1766 case BT_LF: 1730 case BT_LF: 1767 case BT_CR: 1731 case BT_CR: 1768 case BT_S: 1732 case BT_S: 1769 ptr += MINBPC(enc); 1733 ptr += MINBPC(enc); 1770 break; 1734 break; 1771 default: 1735 default: 1772 return ptr; 1736 return ptr; 1773 } 1737 } 1774 } 1738 } 1775 } 1739 } 1776 1740 1777 static void PTRCALL 1741 static void PTRCALL 1778 PREFIX(updatePosition)(const ENCODING *enc, c << 1742 PREFIX(updatePosition)(const ENCODING *enc, 1779 POSITION *pos) { << 1743 const char *ptr, 1780 while (HAS_CHAR(enc, ptr, end)) { << 1744 const char *end, 1781 switch (BYTE_TYPE(enc, ptr)) { << 1745 POSITION *pos) 1782 # define LEAD_CASE(n) << 1746 { 1783 case BT_LEAD##n: << 1747 while (ptr != end) { 1784 ptr += n; /* NOTE: The encoding has alrea << 1748 switch (BYTE_TYPE(enc, ptr)) { 1785 pos->columnNumber++; << 1749 #define LEAD_CASE(n) \ 1786 break; << 1750 case BT_LEAD ## n: \ 1787 LEAD_CASE(2) << 1751 ptr += n; \ 1788 LEAD_CASE(3) << 1752 break; 1789 LEAD_CASE(4) << 1753 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1790 # undef LEAD_CASE << 1754 #undef LEAD_CASE 1791 case BT_LF: 1755 case BT_LF: 1792 pos->columnNumber = 0; << 1756 pos->columnNumber = (XML_Size)-1; 1793 pos->lineNumber++; 1757 pos->lineNumber++; 1794 ptr += MINBPC(enc); 1758 ptr += MINBPC(enc); 1795 break; 1759 break; 1796 case BT_CR: 1760 case BT_CR: 1797 pos->lineNumber++; 1761 pos->lineNumber++; 1798 ptr += MINBPC(enc); 1762 ptr += MINBPC(enc); 1799 if (HAS_CHAR(enc, ptr, end) && BYTE_TYP << 1763 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) 1800 ptr += MINBPC(enc); 1764 ptr += MINBPC(enc); 1801 pos->columnNumber = 0; << 1765 pos->columnNumber = (XML_Size)-1; 1802 break; 1766 break; 1803 default: 1767 default: 1804 ptr += MINBPC(enc); 1768 ptr += MINBPC(enc); 1805 pos->columnNumber++; << 1806 break; 1769 break; 1807 } 1770 } >> 1771 pos->columnNumber++; 1808 } 1772 } 1809 } 1773 } 1810 1774 1811 # undef DO_LEAD_CASE << 1775 #undef DO_LEAD_CASE 1812 # undef MULTIBYTE_CASES << 1776 #undef MULTIBYTE_CASES 1813 # undef INVALID_CASES << 1777 #undef INVALID_CASES 1814 # undef CHECK_NAME_CASE << 1778 #undef CHECK_NAME_CASE 1815 # undef CHECK_NAME_CASES << 1779 #undef CHECK_NAME_CASES 1816 # undef CHECK_NMSTRT_CASE << 1780 #undef CHECK_NMSTRT_CASE 1817 # undef CHECK_NMSTRT_CASES << 1781 #undef CHECK_NMSTRT_CASES 1818 1782 1819 #endif /* XML_TOK_IMPL_C */ 1783 #endif /* XML_TOK_IMPL_C */ 1820 1784