Tim van der Lippe | 706ec96 | 2021-06-04 13:24:42 +0100 | [diff] [blame^] | 1 | var hasOwnProperty = Object.prototype.hasOwnProperty; |
| 2 | var matchGraph = require('./match-graph'); |
| 3 | var MATCH = matchGraph.MATCH; |
| 4 | var MISMATCH = matchGraph.MISMATCH; |
| 5 | var DISALLOW_EMPTY = matchGraph.DISALLOW_EMPTY; |
| 6 | var TYPE = require('../tokenizer/const').TYPE; |
| 7 | |
| 8 | var STUB = 0; |
| 9 | var TOKEN = 1; |
| 10 | var OPEN_SYNTAX = 2; |
| 11 | var CLOSE_SYNTAX = 3; |
| 12 | |
| 13 | var EXIT_REASON_MATCH = 'Match'; |
| 14 | var EXIT_REASON_MISMATCH = 'Mismatch'; |
| 15 | var EXIT_REASON_ITERATION_LIMIT = 'Maximum iteration number exceeded (please fill an issue on https://github.com/csstree/csstree/issues)'; |
| 16 | |
| 17 | var ITERATION_LIMIT = 15000; |
| 18 | var totalIterationCount = 0; |
| 19 | |
| 20 | function reverseList(list) { |
| 21 | var prev = null; |
| 22 | var next = null; |
| 23 | var item = list; |
| 24 | |
| 25 | while (item !== null) { |
| 26 | next = item.prev; |
| 27 | item.prev = prev; |
| 28 | prev = item; |
| 29 | item = next; |
| 30 | } |
| 31 | |
| 32 | return prev; |
| 33 | } |
| 34 | |
| 35 | function areStringsEqualCaseInsensitive(testStr, referenceStr) { |
| 36 | if (testStr.length !== referenceStr.length) { |
| 37 | return false; |
| 38 | } |
| 39 | |
| 40 | for (var i = 0; i < testStr.length; i++) { |
| 41 | var testCode = testStr.charCodeAt(i); |
| 42 | var referenceCode = referenceStr.charCodeAt(i); |
| 43 | |
| 44 | // testCode.toLowerCase() for U+0041 LATIN CAPITAL LETTER A (A) .. U+005A LATIN CAPITAL LETTER Z (Z). |
| 45 | if (testCode >= 0x0041 && testCode <= 0x005A) { |
| 46 | testCode = testCode | 32; |
| 47 | } |
| 48 | |
| 49 | if (testCode !== referenceCode) { |
| 50 | return false; |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | return true; |
| 55 | } |
| 56 | |
| 57 | function isContextEdgeDelim(token) { |
| 58 | if (token.type !== TYPE.Delim) { |
| 59 | return false; |
| 60 | } |
| 61 | |
| 62 | // Fix matching for unicode-range: U+30??, U+FF00-FF9F |
| 63 | // Probably we need to check out previous match instead |
| 64 | return token.value !== '?'; |
| 65 | } |
| 66 | |
| 67 | function isCommaContextStart(token) { |
| 68 | if (token === null) { |
| 69 | return true; |
| 70 | } |
| 71 | |
| 72 | return ( |
| 73 | token.type === TYPE.Comma || |
| 74 | token.type === TYPE.Function || |
| 75 | token.type === TYPE.LeftParenthesis || |
| 76 | token.type === TYPE.LeftSquareBracket || |
| 77 | token.type === TYPE.LeftCurlyBracket || |
| 78 | isContextEdgeDelim(token) |
| 79 | ); |
| 80 | } |
| 81 | |
| 82 | function isCommaContextEnd(token) { |
| 83 | if (token === null) { |
| 84 | return true; |
| 85 | } |
| 86 | |
| 87 | return ( |
| 88 | token.type === TYPE.RightParenthesis || |
| 89 | token.type === TYPE.RightSquareBracket || |
| 90 | token.type === TYPE.RightCurlyBracket || |
| 91 | token.type === TYPE.Delim |
| 92 | ); |
| 93 | } |
| 94 | |
| 95 | function internalMatch(tokens, state, syntaxes) { |
| 96 | function moveToNextToken() { |
| 97 | do { |
| 98 | tokenIndex++; |
| 99 | token = tokenIndex < tokens.length ? tokens[tokenIndex] : null; |
| 100 | } while (token !== null && (token.type === TYPE.WhiteSpace || token.type === TYPE.Comment)); |
| 101 | } |
| 102 | |
| 103 | function getNextToken(offset) { |
| 104 | var nextIndex = tokenIndex + offset; |
| 105 | |
| 106 | return nextIndex < tokens.length ? tokens[nextIndex] : null; |
| 107 | } |
| 108 | |
| 109 | function stateSnapshotFromSyntax(nextState, prev) { |
| 110 | return { |
| 111 | nextState: nextState, |
| 112 | matchStack: matchStack, |
| 113 | syntaxStack: syntaxStack, |
| 114 | thenStack: thenStack, |
| 115 | tokenIndex: tokenIndex, |
| 116 | prev: prev |
| 117 | }; |
| 118 | } |
| 119 | |
| 120 | function pushThenStack(nextState) { |
| 121 | thenStack = { |
| 122 | nextState: nextState, |
| 123 | matchStack: matchStack, |
| 124 | syntaxStack: syntaxStack, |
| 125 | prev: thenStack |
| 126 | }; |
| 127 | } |
| 128 | |
| 129 | function pushElseStack(nextState) { |
| 130 | elseStack = stateSnapshotFromSyntax(nextState, elseStack); |
| 131 | } |
| 132 | |
| 133 | function addTokenToMatch() { |
| 134 | matchStack = { |
| 135 | type: TOKEN, |
| 136 | syntax: state.syntax, |
| 137 | token: token, |
| 138 | prev: matchStack |
| 139 | }; |
| 140 | |
| 141 | moveToNextToken(); |
| 142 | syntaxStash = null; |
| 143 | |
| 144 | if (tokenIndex > longestMatch) { |
| 145 | longestMatch = tokenIndex; |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | function openSyntax() { |
| 150 | syntaxStack = { |
| 151 | syntax: state.syntax, |
| 152 | opts: state.syntax.opts || (syntaxStack !== null && syntaxStack.opts) || null, |
| 153 | prev: syntaxStack |
| 154 | }; |
| 155 | |
| 156 | matchStack = { |
| 157 | type: OPEN_SYNTAX, |
| 158 | syntax: state.syntax, |
| 159 | token: matchStack.token, |
| 160 | prev: matchStack |
| 161 | }; |
| 162 | } |
| 163 | |
| 164 | function closeSyntax() { |
| 165 | if (matchStack.type === OPEN_SYNTAX) { |
| 166 | matchStack = matchStack.prev; |
| 167 | } else { |
| 168 | matchStack = { |
| 169 | type: CLOSE_SYNTAX, |
| 170 | syntax: syntaxStack.syntax, |
| 171 | token: matchStack.token, |
| 172 | prev: matchStack |
| 173 | }; |
| 174 | } |
| 175 | |
| 176 | syntaxStack = syntaxStack.prev; |
| 177 | } |
| 178 | |
| 179 | var syntaxStack = null; |
| 180 | var thenStack = null; |
| 181 | var elseStack = null; |
| 182 | |
| 183 | // null – stashing allowed, nothing stashed |
| 184 | // false – stashing disabled, nothing stashed |
| 185 | // anithing else – fail stashable syntaxes, some syntax stashed |
| 186 | var syntaxStash = null; |
| 187 | |
| 188 | var iterationCount = 0; // count iterations and prevent infinite loop |
| 189 | var exitReason = null; |
| 190 | |
| 191 | var token = null; |
| 192 | var tokenIndex = -1; |
| 193 | var longestMatch = 0; |
| 194 | var matchStack = { |
| 195 | type: STUB, |
| 196 | syntax: null, |
| 197 | token: null, |
| 198 | prev: null |
| 199 | }; |
| 200 | |
| 201 | moveToNextToken(); |
| 202 | |
| 203 | while (exitReason === null && ++iterationCount < ITERATION_LIMIT) { |
| 204 | // function mapList(list, fn) { |
| 205 | // var result = []; |
| 206 | // while (list) { |
| 207 | // result.unshift(fn(list)); |
| 208 | // list = list.prev; |
| 209 | // } |
| 210 | // return result; |
| 211 | // } |
| 212 | // console.log('--\n', |
| 213 | // '#' + iterationCount, |
| 214 | // require('util').inspect({ |
| 215 | // match: mapList(matchStack, x => x.type === TOKEN ? x.token && x.token.value : x.syntax ? ({ [OPEN_SYNTAX]: '<', [CLOSE_SYNTAX]: '</' }[x.type] || x.type) + '!' + x.syntax.name : null), |
| 216 | // token: token && token.value, |
| 217 | // tokenIndex, |
| 218 | // syntax: syntax.type + (syntax.id ? ' #' + syntax.id : '') |
| 219 | // }, { depth: null }) |
| 220 | // ); |
| 221 | switch (state.type) { |
| 222 | case 'Match': |
| 223 | if (thenStack === null) { |
| 224 | // turn to MISMATCH when some tokens left unmatched |
| 225 | if (token !== null) { |
| 226 | // doesn't mismatch if just one token left and it's an IE hack |
| 227 | if (tokenIndex !== tokens.length - 1 || (token.value !== '\\0' && token.value !== '\\9')) { |
| 228 | state = MISMATCH; |
| 229 | break; |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | // break the main loop, return a result - MATCH |
| 234 | exitReason = EXIT_REASON_MATCH; |
| 235 | break; |
| 236 | } |
| 237 | |
| 238 | // go to next syntax (`then` branch) |
| 239 | state = thenStack.nextState; |
| 240 | |
| 241 | // check match is not empty |
| 242 | if (state === DISALLOW_EMPTY) { |
| 243 | if (thenStack.matchStack === matchStack) { |
| 244 | state = MISMATCH; |
| 245 | break; |
| 246 | } else { |
| 247 | state = MATCH; |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | // close syntax if needed |
| 252 | while (thenStack.syntaxStack !== syntaxStack) { |
| 253 | closeSyntax(); |
| 254 | } |
| 255 | |
| 256 | // pop stack |
| 257 | thenStack = thenStack.prev; |
| 258 | break; |
| 259 | |
| 260 | case 'Mismatch': |
| 261 | // when some syntax is stashed |
| 262 | if (syntaxStash !== null && syntaxStash !== false) { |
| 263 | // there is no else branches or a branch reduce match stack |
| 264 | if (elseStack === null || tokenIndex > elseStack.tokenIndex) { |
| 265 | // restore state from the stash |
| 266 | elseStack = syntaxStash; |
| 267 | syntaxStash = false; // disable stashing |
| 268 | } |
| 269 | } else if (elseStack === null) { |
| 270 | // no else branches -> break the main loop |
| 271 | // return a result - MISMATCH |
| 272 | exitReason = EXIT_REASON_MISMATCH; |
| 273 | break; |
| 274 | } |
| 275 | |
| 276 | // go to next syntax (`else` branch) |
| 277 | state = elseStack.nextState; |
| 278 | |
| 279 | // restore all the rest stack states |
| 280 | thenStack = elseStack.thenStack; |
| 281 | syntaxStack = elseStack.syntaxStack; |
| 282 | matchStack = elseStack.matchStack; |
| 283 | tokenIndex = elseStack.tokenIndex; |
| 284 | token = tokenIndex < tokens.length ? tokens[tokenIndex] : null; |
| 285 | |
| 286 | // pop stack |
| 287 | elseStack = elseStack.prev; |
| 288 | break; |
| 289 | |
| 290 | case 'MatchGraph': |
| 291 | state = state.match; |
| 292 | break; |
| 293 | |
| 294 | case 'If': |
| 295 | // IMPORTANT: else stack push must go first, |
| 296 | // since it stores the state of thenStack before changes |
| 297 | if (state.else !== MISMATCH) { |
| 298 | pushElseStack(state.else); |
| 299 | } |
| 300 | |
| 301 | if (state.then !== MATCH) { |
| 302 | pushThenStack(state.then); |
| 303 | } |
| 304 | |
| 305 | state = state.match; |
| 306 | break; |
| 307 | |
| 308 | case 'MatchOnce': |
| 309 | state = { |
| 310 | type: 'MatchOnceBuffer', |
| 311 | syntax: state, |
| 312 | index: 0, |
| 313 | mask: 0 |
| 314 | }; |
| 315 | break; |
| 316 | |
| 317 | case 'MatchOnceBuffer': |
| 318 | var terms = state.syntax.terms; |
| 319 | |
| 320 | if (state.index === terms.length) { |
| 321 | // no matches at all or it's required all terms to be matched |
| 322 | if (state.mask === 0 || state.syntax.all) { |
| 323 | state = MISMATCH; |
| 324 | break; |
| 325 | } |
| 326 | |
| 327 | // a partial match is ok |
| 328 | state = MATCH; |
| 329 | break; |
| 330 | } |
| 331 | |
| 332 | // all terms are matched |
| 333 | if (state.mask === (1 << terms.length) - 1) { |
| 334 | state = MATCH; |
| 335 | break; |
| 336 | } |
| 337 | |
| 338 | for (; state.index < terms.length; state.index++) { |
| 339 | var matchFlag = 1 << state.index; |
| 340 | |
| 341 | if ((state.mask & matchFlag) === 0) { |
| 342 | // IMPORTANT: else stack push must go first, |
| 343 | // since it stores the state of thenStack before changes |
| 344 | pushElseStack(state); |
| 345 | pushThenStack({ |
| 346 | type: 'AddMatchOnce', |
| 347 | syntax: state.syntax, |
| 348 | mask: state.mask | matchFlag |
| 349 | }); |
| 350 | |
| 351 | // match |
| 352 | state = terms[state.index++]; |
| 353 | break; |
| 354 | } |
| 355 | } |
| 356 | break; |
| 357 | |
| 358 | case 'AddMatchOnce': |
| 359 | state = { |
| 360 | type: 'MatchOnceBuffer', |
| 361 | syntax: state.syntax, |
| 362 | index: 0, |
| 363 | mask: state.mask |
| 364 | }; |
| 365 | break; |
| 366 | |
| 367 | case 'Enum': |
| 368 | if (token !== null) { |
| 369 | var name = token.value.toLowerCase(); |
| 370 | |
| 371 | // drop \0 and \9 hack from keyword name |
| 372 | if (name.indexOf('\\') !== -1) { |
| 373 | name = name.replace(/\\[09].*$/, ''); |
| 374 | } |
| 375 | |
| 376 | if (hasOwnProperty.call(state.map, name)) { |
| 377 | state = state.map[name]; |
| 378 | break; |
| 379 | } |
| 380 | } |
| 381 | |
| 382 | state = MISMATCH; |
| 383 | break; |
| 384 | |
| 385 | case 'Generic': |
| 386 | var opts = syntaxStack !== null ? syntaxStack.opts : null; |
| 387 | var lastTokenIndex = tokenIndex + Math.floor(state.fn(token, getNextToken, opts)); |
| 388 | |
| 389 | if (!isNaN(lastTokenIndex) && lastTokenIndex > tokenIndex) { |
| 390 | while (tokenIndex < lastTokenIndex) { |
| 391 | addTokenToMatch(); |
| 392 | } |
| 393 | |
| 394 | state = MATCH; |
| 395 | } else { |
| 396 | state = MISMATCH; |
| 397 | } |
| 398 | |
| 399 | break; |
| 400 | |
| 401 | case 'Type': |
| 402 | case 'Property': |
| 403 | var syntaxDict = state.type === 'Type' ? 'types' : 'properties'; |
| 404 | var dictSyntax = hasOwnProperty.call(syntaxes, syntaxDict) ? syntaxes[syntaxDict][state.name] : null; |
| 405 | |
| 406 | if (!dictSyntax || !dictSyntax.match) { |
| 407 | throw new Error( |
| 408 | 'Bad syntax reference: ' + |
| 409 | (state.type === 'Type' |
| 410 | ? '<' + state.name + '>' |
| 411 | : '<\'' + state.name + '\'>') |
| 412 | ); |
| 413 | } |
| 414 | |
| 415 | // stash a syntax for types with low priority |
| 416 | if (syntaxStash !== false && token !== null && state.type === 'Type') { |
| 417 | var lowPriorityMatching = |
| 418 | // https://drafts.csswg.org/css-values-4/#custom-idents |
| 419 | // When parsing positionally-ambiguous keywords in a property value, a <custom-ident> production |
| 420 | // can only claim the keyword if no other unfulfilled production can claim it. |
| 421 | (state.name === 'custom-ident' && token.type === TYPE.Ident) || |
| 422 | |
| 423 | // https://drafts.csswg.org/css-values-4/#lengths |
| 424 | // ... if a `0` could be parsed as either a <number> or a <length> in a property (such as line-height), |
| 425 | // it must parse as a <number> |
| 426 | (state.name === 'length' && token.value === '0'); |
| 427 | |
| 428 | if (lowPriorityMatching) { |
| 429 | if (syntaxStash === null) { |
| 430 | syntaxStash = stateSnapshotFromSyntax(state, elseStack); |
| 431 | } |
| 432 | |
| 433 | state = MISMATCH; |
| 434 | break; |
| 435 | } |
| 436 | } |
| 437 | |
| 438 | openSyntax(); |
| 439 | state = dictSyntax.match; |
| 440 | break; |
| 441 | |
| 442 | case 'Keyword': |
| 443 | var name = state.name; |
| 444 | |
| 445 | if (token !== null) { |
| 446 | var keywordName = token.value; |
| 447 | |
| 448 | // drop \0 and \9 hack from keyword name |
| 449 | if (keywordName.indexOf('\\') !== -1) { |
| 450 | keywordName = keywordName.replace(/\\[09].*$/, ''); |
| 451 | } |
| 452 | |
| 453 | if (areStringsEqualCaseInsensitive(keywordName, name)) { |
| 454 | addTokenToMatch(); |
| 455 | state = MATCH; |
| 456 | break; |
| 457 | } |
| 458 | } |
| 459 | |
| 460 | state = MISMATCH; |
| 461 | break; |
| 462 | |
| 463 | case 'AtKeyword': |
| 464 | case 'Function': |
| 465 | if (token !== null && areStringsEqualCaseInsensitive(token.value, state.name)) { |
| 466 | addTokenToMatch(); |
| 467 | state = MATCH; |
| 468 | break; |
| 469 | } |
| 470 | |
| 471 | state = MISMATCH; |
| 472 | break; |
| 473 | |
| 474 | case 'Token': |
| 475 | if (token !== null && token.value === state.value) { |
| 476 | addTokenToMatch(); |
| 477 | state = MATCH; |
| 478 | break; |
| 479 | } |
| 480 | |
| 481 | state = MISMATCH; |
| 482 | break; |
| 483 | |
| 484 | case 'Comma': |
| 485 | if (token !== null && token.type === TYPE.Comma) { |
| 486 | if (isCommaContextStart(matchStack.token)) { |
| 487 | state = MISMATCH; |
| 488 | } else { |
| 489 | addTokenToMatch(); |
| 490 | state = isCommaContextEnd(token) ? MISMATCH : MATCH; |
| 491 | } |
| 492 | } else { |
| 493 | state = isCommaContextStart(matchStack.token) || isCommaContextEnd(token) ? MATCH : MISMATCH; |
| 494 | } |
| 495 | |
| 496 | break; |
| 497 | |
| 498 | case 'String': |
| 499 | var string = ''; |
| 500 | |
| 501 | for (var lastTokenIndex = tokenIndex; lastTokenIndex < tokens.length && string.length < state.value.length; lastTokenIndex++) { |
| 502 | string += tokens[lastTokenIndex].value; |
| 503 | } |
| 504 | |
| 505 | if (areStringsEqualCaseInsensitive(string, state.value)) { |
| 506 | while (tokenIndex < lastTokenIndex) { |
| 507 | addTokenToMatch(); |
| 508 | } |
| 509 | |
| 510 | state = MATCH; |
| 511 | } else { |
| 512 | state = MISMATCH; |
| 513 | } |
| 514 | |
| 515 | break; |
| 516 | |
| 517 | default: |
| 518 | throw new Error('Unknown node type: ' + state.type); |
| 519 | } |
| 520 | } |
| 521 | |
| 522 | totalIterationCount += iterationCount; |
| 523 | |
| 524 | switch (exitReason) { |
| 525 | case null: |
| 526 | console.warn('[csstree-match] BREAK after ' + ITERATION_LIMIT + ' iterations'); |
| 527 | exitReason = EXIT_REASON_ITERATION_LIMIT; |
| 528 | matchStack = null; |
| 529 | break; |
| 530 | |
| 531 | case EXIT_REASON_MATCH: |
| 532 | while (syntaxStack !== null) { |
| 533 | closeSyntax(); |
| 534 | } |
| 535 | break; |
| 536 | |
| 537 | default: |
| 538 | matchStack = null; |
| 539 | } |
| 540 | |
| 541 | return { |
| 542 | tokens: tokens, |
| 543 | reason: exitReason, |
| 544 | iterations: iterationCount, |
| 545 | match: matchStack, |
| 546 | longestMatch: longestMatch |
| 547 | }; |
| 548 | } |
| 549 | |
| 550 | function matchAsList(tokens, matchGraph, syntaxes) { |
| 551 | var matchResult = internalMatch(tokens, matchGraph, syntaxes || {}); |
| 552 | |
| 553 | if (matchResult.match !== null) { |
| 554 | var item = reverseList(matchResult.match).prev; |
| 555 | |
| 556 | matchResult.match = []; |
| 557 | |
| 558 | while (item !== null) { |
| 559 | switch (item.type) { |
| 560 | case STUB: |
| 561 | break; |
| 562 | |
| 563 | case OPEN_SYNTAX: |
| 564 | case CLOSE_SYNTAX: |
| 565 | matchResult.match.push({ |
| 566 | type: item.type, |
| 567 | syntax: item.syntax |
| 568 | }); |
| 569 | break; |
| 570 | |
| 571 | default: |
| 572 | matchResult.match.push({ |
| 573 | token: item.token.value, |
| 574 | node: item.token.node |
| 575 | }); |
| 576 | break; |
| 577 | } |
| 578 | |
| 579 | item = item.prev; |
| 580 | } |
| 581 | } |
| 582 | |
| 583 | return matchResult; |
| 584 | } |
| 585 | |
| 586 | function matchAsTree(tokens, matchGraph, syntaxes) { |
| 587 | var matchResult = internalMatch(tokens, matchGraph, syntaxes || {}); |
| 588 | |
| 589 | if (matchResult.match === null) { |
| 590 | return matchResult; |
| 591 | } |
| 592 | |
| 593 | var item = matchResult.match; |
| 594 | var host = matchResult.match = { |
| 595 | syntax: matchGraph.syntax || null, |
| 596 | match: [] |
| 597 | }; |
| 598 | var hostStack = [host]; |
| 599 | |
| 600 | // revert a list and start with 2nd item since 1st is a stub item |
| 601 | item = reverseList(item).prev; |
| 602 | |
| 603 | // build a tree |
| 604 | while (item !== null) { |
| 605 | switch (item.type) { |
| 606 | case OPEN_SYNTAX: |
| 607 | host.match.push(host = { |
| 608 | syntax: item.syntax, |
| 609 | match: [] |
| 610 | }); |
| 611 | hostStack.push(host); |
| 612 | break; |
| 613 | |
| 614 | case CLOSE_SYNTAX: |
| 615 | hostStack.pop(); |
| 616 | host = hostStack[hostStack.length - 1]; |
| 617 | break; |
| 618 | |
| 619 | default: |
| 620 | host.match.push({ |
| 621 | syntax: item.syntax || null, |
| 622 | token: item.token.value, |
| 623 | node: item.token.node |
| 624 | }); |
| 625 | } |
| 626 | |
| 627 | item = item.prev; |
| 628 | } |
| 629 | |
| 630 | return matchResult; |
| 631 | } |
| 632 | |
| 633 | module.exports = { |
| 634 | matchAsList: matchAsList, |
| 635 | matchAsTree: matchAsTree, |
| 636 | getTotalIterationCount: function() { |
| 637 | return totalIterationCount; |
| 638 | } |
| 639 | }; |