blob: 12b4a5f40671af1288430bf0f826850a1a8c2eb0 [file] [log] [blame]
Yang Guo4fd355c2019-09-19 10:59:03 +02001/*! @author Toru Nagashima <https://github.com/mysticatea> */
2'use strict';
3
4Object.defineProperty(exports, '__esModule', { value: true });
5
6
7
8var ast = /*#__PURE__*/Object.freeze({
9
10});
11
Tim van der Lippe16aca392020-11-13 11:37:13 +000012let largeIdStartRanges = undefined;
13let largeIdContinueRanges = undefined;
Yang Guo4fd355c2019-09-19 10:59:03 +020014function isIdStart(cp) {
15 if (cp < 0x41)
16 return false;
17 if (cp < 0x5b)
18 return true;
19 if (cp < 0x61)
20 return false;
21 if (cp < 0x7b)
22 return true;
23 return isLargeIdStart(cp);
24}
25function isIdContinue(cp) {
26 if (cp < 0x30)
27 return false;
28 if (cp < 0x3a)
29 return true;
30 if (cp < 0x41)
31 return false;
32 if (cp < 0x5b)
33 return true;
34 if (cp === 0x5f)
35 return true;
36 if (cp < 0x61)
37 return false;
38 if (cp < 0x7b)
39 return true;
40 return isLargeIdStart(cp) || isLargeIdContinue(cp);
41}
42function isLargeIdStart(cp) {
Tim van der Lippe16aca392020-11-13 11:37:13 +000043 return isInRange(cp, largeIdStartRanges || (largeIdStartRanges = initLargeIdStartRanges()));
Yang Guo4fd355c2019-09-19 10:59:03 +020044}
45function isLargeIdContinue(cp) {
Tim van der Lippe16aca392020-11-13 11:37:13 +000046 return isInRange(cp, largeIdContinueRanges ||
47 (largeIdContinueRanges = initLargeIdContinueRanges()));
48}
49function initLargeIdStartRanges() {
50 return restoreRanges("170 0 11 0 5 0 6 22 2 30 2 457 5 11 15 4 8 0 2 0 130 4 2 1 3 3 2 0 7 0 2 2 2 0 2 19 2 82 2 138 9 165 2 37 3 0 7 40 72 26 5 3 46 42 36 1 2 98 2 0 16 1 8 1 11 2 3 0 17 0 2 29 30 88 12 0 25 32 10 1 5 0 6 21 5 0 10 0 4 0 24 24 8 10 54 20 2 17 61 53 4 0 19 0 8 9 16 15 5 7 3 1 3 21 2 6 2 0 4 3 4 0 17 0 14 1 2 2 15 1 11 0 9 5 5 1 3 21 2 6 2 1 2 1 2 1 32 3 2 0 20 2 17 8 2 2 2 21 2 6 2 1 2 4 4 0 19 0 16 1 24 0 12 7 3 1 3 21 2 6 2 1 2 4 4 0 31 1 2 2 16 0 18 0 2 5 4 2 2 3 4 1 2 0 2 1 4 1 4 2 4 11 23 0 53 7 2 2 2 22 2 15 4 0 27 2 6 1 31 0 5 7 2 2 2 22 2 9 2 4 4 0 33 0 2 1 16 1 18 8 2 2 2 40 3 0 17 0 6 2 9 2 25 5 6 17 4 23 2 8 2 0 3 6 59 47 2 1 13 6 59 1 2 0 2 4 2 23 2 0 2 9 2 1 10 0 3 4 2 0 22 3 33 0 64 7 2 35 28 4 116 42 21 0 17 5 5 3 4 0 4 1 8 2 5 12 13 0 18 37 2 0 6 0 3 42 2 332 2 3 3 6 2 0 2 3 3 40 2 3 3 32 2 3 3 6 2 0 2 3 3 14 2 56 2 3 3 66 38 15 17 85 3 5 4 619 3 16 2 25 6 74 4 10 8 12 2 3 15 17 15 17 15 12 2 2 16 51 36 0 5 0 68 88 8 40 2 0 6 69 11 30 50 29 3 4 12 43 5 25 55 22 10 52 83 0 94 46 18 6 56 29 14 1 11 43 27 35 42 2 11 35 3 8 8 42 3 2 42 3 2 5 2 1 4 0 6 191 65 277 3 5 3 37 3 5 3 7 2 0 2 0 2 0 2 30 3 52 2 6 2 0 4 2 2 6 4 3 3 5 5 12 6 2 2 6 117 0 14 0 17 12 102 0 5 0 3 9 2 0 3 5 7 0 2 0 2 0 2 15 3 3 6 4 5 0 18 40 2680 46 2 46 2 132 7 3 4 1 13 37 2 0 6 0 3 55 8 0 17 22 10 6 2 6 2 6 2 6 2 6 2 6 2 6 2 6 551 2 26 8 8 4 3 4 5 85 5 4 2 89 2 3 6 42 2 93 18 31 49 15 513 6591 65 20988 4 1164 68 45 3 268 4 15 11 1 21 46 17 30 3 79 40 8 3 102 3 52 3 8 43 12 2 2 2 3 2 22 30 51 15 49 63 5 4 0 2 1 12 27 11 22 26 28 8 46 29 0 17 4 2 9 11 4 2 40 24 2 2 7 21 22 4 0 4 49 2 0 4 1 3 4 3 0 2 0 25 2 3 10 8 2 13 5 3 5 3 5 10 6 2 6 2 42 2 13 7 114 30 11171 13 22 5 48 8453 365 3 105 39 6 13 4 6 0 2 9 2 12 2 4 2 0 2 1 2 1 2 107 34 362 19 63 3 53 41 11 117 4 2 134 37 25 7 25 12 88 4 5 3 5 3 5 3 2 36 11 2 25 2 18 2 1 2 14 3 13 35 122 70 52 268 28 4 48 48 31 14 29 6 37 11 29 3 35 5 7 2 4 43 157 19 35 5 35 5 39 9 51 157 310 10 21 11 7 153 5 3 0 2 43 2 1 4 0 3 22 11 22 10 30 66 18 2 1 11 21 11 25 71 55 7 1 65 0 16 3 2 2 2 28 43 28 4 28 36 7 2 27 28 53 11 21 11 18 14 17 111 72 56 50 14 50 14 35 349 41 7 1 79 28 11 0 9 21 107 20 28 22 13 52 76 44 33 24 27 35 30 0 3 0 9 34 4 0 13 47 15 3 22 0 2 0 36 17 2 24 85 6 2 0 2 3 2 14 2 9 8 46 39 7 3 1 3 21 2 6 2 1 2 4 4 0 19 0 13 4 159 52 19 3 21 2 31 47 21 1 2 0 185 46 42 3 37 47 21 0 60 42 14 0 72 26 230 43 117 63 32 7 3 0 3 7 2 1 2 23 16 0 2 0 95 7 3 38 17 0 2 0 29 0 11 39 8 0 22 0 12 45 20 0 35 56 264 8 2 36 18 0 50 29 113 6 2 1 2 37 22 0 26 5 2 1 2 31 15 0 328 18 190 0 80 921 103 110 18 195 2749 1070 4050 582 8634 568 8 30 114 29 19 47 17 3 32 20 6 18 689 63 129 74 6 0 67 12 65 1 2 0 29 6135 9 1237 43 8 8952 286 50 2 18 3 9 395 2309 106 6 12 4 8 8 9 5991 84 2 70 2 1 3 0 3 1 3 3 2 11 2 0 2 6 2 64 2 3 3 7 2 6 2 27 2 3 2 4 2 0 4 6 2 339 3 24 2 24 2 30 2 24 2 30 2 24 2 30 2 24 2 30 2 24 2 7 2357 44 11 6 17 0 370 43 1301 196 60 67 8 0 1205 3 2 26 2 1 2 0 3 0 2 9 2 3 2 0 2 0 7 0 5 0 2 0 2 0 2 2 2 1 2 0 3 0 2 0 2 0 2 0 2 0 2 1 2 0 3 3 2 6 2 3 2 3 2 0 2 9 2 16 6 2 2 4 2 16 4421 42717 35 4148 12 221 3 5761 15 7472 3104 541 1507 4938");
51}
52function initLargeIdContinueRanges() {
53 return restoreRanges("183 0 585 111 24 0 252 4 266 44 2 0 2 1 2 1 2 0 73 10 49 30 7 0 102 6 3 5 3 1 2 3 3 9 24 0 31 26 92 10 16 9 34 8 10 0 25 3 2 8 2 2 2 4 44 2 120 14 2 32 55 2 2 17 2 6 11 1 3 9 18 2 57 0 2 6 3 1 3 2 10 0 11 1 3 9 15 0 3 2 57 0 2 4 5 1 3 2 4 0 21 11 4 0 12 2 57 0 2 7 2 2 2 2 21 1 3 9 11 5 2 2 57 0 2 6 3 1 3 2 8 2 11 1 3 9 19 0 60 4 4 2 2 3 10 0 15 9 17 4 58 6 2 2 2 3 8 1 12 1 3 9 18 2 57 0 2 6 2 2 2 3 8 1 12 1 3 9 17 3 56 1 2 6 2 2 2 3 10 0 11 1 3 9 18 2 71 0 5 5 2 0 2 7 7 9 3 1 62 0 3 6 13 7 2 9 88 0 3 8 12 5 3 9 63 1 7 9 12 0 2 0 2 0 5 1 50 19 2 1 6 10 2 35 10 0 101 19 2 9 13 3 5 2 2 2 3 6 4 3 14 11 2 14 704 2 10 8 929 2 30 2 30 1 31 1 65 31 10 0 3 9 34 2 3 9 144 0 119 11 5 11 11 9 129 10 61 4 58 9 2 28 3 10 7 9 23 13 2 1 64 4 48 16 12 9 18 8 13 2 31 12 3 9 45 13 49 19 9 9 7 9 119 2 2 20 5 0 7 0 3 2 199 57 2 4 576 1 20 0 124 12 5 0 4 11 3071 2 142 0 97 31 555 5 106 1 30086 9 70 0 5 9 33 1 81 1 273 0 4 0 5 0 24 4 5 0 84 1 51 17 11 9 7 17 14 10 29 7 26 12 45 3 48 13 16 9 12 0 11 9 48 13 13 0 9 1 3 9 34 2 51 0 2 2 3 1 6 1 2 0 42 4 6 1 237 7 2 1 3 9 20261 0 738 15 17 15 4 1 25 2 193 9 38 0 702 0 227 0 150 4 294 9 1368 2 2 1 6 3 41 2 5 0 166 1 574 3 9 9 370 1 154 10 176 2 54 14 32 9 16 3 46 10 54 9 7 2 37 13 2 9 6 1 45 0 13 2 49 13 9 3 2 11 83 11 7 0 161 11 6 9 7 3 56 1 2 6 3 1 3 2 10 0 11 1 3 6 4 4 193 17 10 9 5 0 82 19 13 9 214 6 3 8 28 1 83 16 16 9 82 12 9 9 84 14 5 9 243 14 166 9 71 5 2 1 3 3 2 0 2 1 13 9 120 6 3 6 4 0 29 9 41 6 2 3 9 0 10 10 47 15 406 7 2 7 17 9 57 21 2 13 123 5 4 0 2 1 2 6 2 0 9 9 49 4 2 1 2 4 9 9 330 3 19306 9 135 4 60 6 26 9 1014 0 2 54 8 3 82 0 12 1 19628 1 5319 4 4 5 9 7 3 6 31 3 149 2 1418 49 513 54 5 49 9 0 15 0 23 4 2 14 1361 6 2 16 3 6 2 1 2 4 262 6 10 9 419 13 1495 6 110 6 6 9 4759 9 787719 239");
54}
55function isInRange(cp, ranges) {
56 let l = 0, r = (ranges.length / 2) | 0, i = 0, min = 0, max = 0;
57 while (l < r) {
58 i = ((l + r) / 2) | 0;
59 min = ranges[2 * i];
60 max = ranges[2 * i + 1];
61 if (cp < min) {
62 r = i;
Yang Guo4fd355c2019-09-19 10:59:03 +020063 }
Tim van der Lippe16aca392020-11-13 11:37:13 +000064 else if (cp > max) {
65 l = i + 1;
Yang Guo4fd355c2019-09-19 10:59:03 +020066 }
Tim van der Lippe16aca392020-11-13 11:37:13 +000067 else {
68 return true;
Yang Guo4fd355c2019-09-19 10:59:03 +020069 }
Yang Guo4fd355c2019-09-19 10:59:03 +020070 }
Yang Guo4fd355c2019-09-19 10:59:03 +020071 return false;
72}
Tim van der Lippe16aca392020-11-13 11:37:13 +000073function restoreRanges(data) {
74 let last = 0;
75 return data.split(" ").map(s => (last += parseInt(s, 10) | 0));
76}
Yang Guo4fd355c2019-09-19 10:59:03 +020077
Tim van der Lippe16aca392020-11-13 11:37:13 +000078class DataSet {
79 constructor(raw2018, raw2019, raw2020) {
80 this._raw2018 = raw2018;
81 this._raw2019 = raw2019;
82 this._raw2020 = raw2020;
83 }
84 get es2018() {
85 return (this._set2018 || (this._set2018 = new Set(this._raw2018.split(" "))));
86 }
87 get es2019() {
88 return (this._set2019 || (this._set2019 = new Set(this._raw2019.split(" "))));
89 }
90 get es2020() {
91 return (this._set2020 || (this._set2020 = new Set(this._raw2020.split(" "))));
92 }
93}
94const gcNameSet = new Set(["General_Category", "gc"]);
95const scNameSet = new Set(["Script", "Script_Extensions", "sc", "scx"]);
96const gcValueSets = new DataSet("C Cased_Letter Cc Cf Close_Punctuation Cn Co Combining_Mark Connector_Punctuation Control Cs Currency_Symbol Dash_Punctuation Decimal_Number Enclosing_Mark Final_Punctuation Format Initial_Punctuation L LC Letter Letter_Number Line_Separator Ll Lm Lo Lowercase_Letter Lt Lu M Mark Math_Symbol Mc Me Mn Modifier_Letter Modifier_Symbol N Nd Nl No Nonspacing_Mark Number Open_Punctuation Other Other_Letter Other_Number Other_Punctuation Other_Symbol P Paragraph_Separator Pc Pd Pe Pf Pi Po Private_Use Ps Punctuation S Sc Separator Sk Sm So Space_Separator Spacing_Mark Surrogate Symbol Titlecase_Letter Unassigned Uppercase_Letter Z Zl Zp Zs cntrl digit punct", "", "");
97const scValueSets = new DataSet("Adlam Adlm Aghb Ahom Anatolian_Hieroglyphs Arab Arabic Armenian Armi Armn Avestan Avst Bali Balinese Bamu Bamum Bass Bassa_Vah Batak Batk Beng Bengali Bhaiksuki Bhks Bopo Bopomofo Brah Brahmi Brai Braille Bugi Buginese Buhd Buhid Cakm Canadian_Aboriginal Cans Cari Carian Caucasian_Albanian Chakma Cham Cher Cherokee Common Copt Coptic Cprt Cuneiform Cypriot Cyrillic Cyrl Deseret Deva Devanagari Dsrt Dupl Duployan Egyp Egyptian_Hieroglyphs Elba Elbasan Ethi Ethiopic Geor Georgian Glag Glagolitic Gonm Goth Gothic Gran Grantha Greek Grek Gujarati Gujr Gurmukhi Guru Han Hang Hangul Hani Hano Hanunoo Hatr Hatran Hebr Hebrew Hira Hiragana Hluw Hmng Hung Imperial_Aramaic Inherited Inscriptional_Pahlavi Inscriptional_Parthian Ital Java Javanese Kaithi Kali Kana Kannada Katakana Kayah_Li Khar Kharoshthi Khmer Khmr Khoj Khojki Khudawadi Knda Kthi Lana Lao Laoo Latin Latn Lepc Lepcha Limb Limbu Lina Linb Linear_A Linear_B Lisu Lyci Lycian Lydi Lydian Mahajani Mahj Malayalam Mand Mandaic Mani Manichaean Marc Marchen Masaram_Gondi Meetei_Mayek Mend Mende_Kikakui Merc Mero Meroitic_Cursive Meroitic_Hieroglyphs Miao Mlym Modi Mong Mongolian Mro Mroo Mtei Mult Multani Myanmar Mymr Nabataean Narb Nbat New_Tai_Lue Newa Nko Nkoo Nshu Nushu Ogam Ogham Ol_Chiki Olck Old_Hungarian Old_Italic Old_North_Arabian Old_Permic Old_Persian Old_South_Arabian Old_Turkic Oriya Orkh Orya Osage Osge Osma Osmanya Pahawh_Hmong Palm Palmyrene Pau_Cin_Hau Pauc Perm Phag Phags_Pa Phli Phlp Phnx Phoenician Plrd Prti Psalter_Pahlavi Qaac Qaai Rejang Rjng Runic Runr Samaritan Samr Sarb Saur Saurashtra Sgnw Sharada Shavian Shaw Shrd Sidd Siddham SignWriting Sind Sinh Sinhala Sora Sora_Sompeng Soyo Soyombo Sund Sundanese Sylo Syloti_Nagri Syrc Syriac Tagalog Tagb Tagbanwa Tai_Le Tai_Tham Tai_Viet Takr Takri Tale Talu Tamil Taml Tang Tangut Tavt Telu Telugu Tfng Tglg Thaa Thaana Thai Tibetan Tibt Tifinagh Tirh Tirhuta Ugar Ugaritic Vai Vaii Wara Warang_Citi Xpeo Xsux Yi Yiii Zanabazar_Square Zanb Zinh Zyyy", "Dogr Dogra Gong Gunjala_Gondi Hanifi_Rohingya Maka Makasar Medefaidrin Medf Old_Sogdian Rohg Sogd Sogdian Sogo", "Elym Elymaic Hmnp Nand Nandinagari Nyiakeng_Puachue_Hmong Wancho Wcho");
98const binPropertySets = new DataSet("AHex ASCII ASCII_Hex_Digit Alpha Alphabetic Any Assigned Bidi_C Bidi_Control Bidi_M Bidi_Mirrored CI CWCF CWCM CWKCF CWL CWT CWU Case_Ignorable Cased Changes_When_Casefolded Changes_When_Casemapped Changes_When_Lowercased Changes_When_NFKC_Casefolded Changes_When_Titlecased Changes_When_Uppercased DI Dash Default_Ignorable_Code_Point Dep Deprecated Dia Diacritic Emoji Emoji_Component Emoji_Modifier Emoji_Modifier_Base Emoji_Presentation Ext Extender Gr_Base Gr_Ext Grapheme_Base Grapheme_Extend Hex Hex_Digit IDC IDS IDSB IDST IDS_Binary_Operator IDS_Trinary_Operator ID_Continue ID_Start Ideo Ideographic Join_C Join_Control LOE Logical_Order_Exception Lower Lowercase Math NChar Noncharacter_Code_Point Pat_Syn Pat_WS Pattern_Syntax Pattern_White_Space QMark Quotation_Mark RI Radical Regional_Indicator SD STerm Sentence_Terminal Soft_Dotted Term Terminal_Punctuation UIdeo Unified_Ideograph Upper Uppercase VS Variation_Selector White_Space XIDC XIDS XID_Continue XID_Start space", "Extended_Pictographic", "");
99function isValidUnicodeProperty(version, name, value) {
100 if (gcNameSet.has(name)) {
101 return version >= 2018 && gcValueSets.es2018.has(value);
102 }
103 if (scNameSet.has(name)) {
104 return ((version >= 2018 && scValueSets.es2018.has(value)) ||
105 (version >= 2019 && scValueSets.es2019.has(value)) ||
106 (version >= 2020 && scValueSets.es2020.has(value)));
107 }
108 return false;
109}
110function isValidLoneUnicodeProperty(version, value) {
111 return ((version >= 2018 && binPropertySets.es2018.has(value)) ||
112 (version >= 2019 && binPropertySets.es2019.has(value)));
113}
Yang Guo4fd355c2019-09-19 10:59:03 +0200114
115const Backspace = 0x08;
116const CharacterTabulation = 0x09;
117const LineFeed = 0x0a;
118const LineTabulation = 0x0b;
119const FormFeed = 0x0c;
120const CarriageReturn = 0x0d;
121const ExclamationMark = 0x21;
122const DollarSign = 0x24;
123const LeftParenthesis = 0x28;
124const RightParenthesis = 0x29;
125const Asterisk = 0x2a;
126const PlusSign = 0x2b;
127const Comma = 0x2c;
128const HyphenMinus = 0x2d;
129const FullStop = 0x2e;
130const Solidus = 0x2f;
131const DigitZero = 0x30;
132const DigitOne = 0x31;
133const DigitSeven = 0x37;
134const DigitNine = 0x39;
135const Colon = 0x3a;
136const LessThanSign = 0x3c;
137const EqualsSign = 0x3d;
138const GreaterThanSign = 0x3e;
139const QuestionMark = 0x3f;
140const LatinCapitalLetterA = 0x41;
141const LatinCapitalLetterB = 0x42;
142const LatinCapitalLetterD = 0x44;
143const LatinCapitalLetterF = 0x46;
144const LatinCapitalLetterP = 0x50;
145const LatinCapitalLetterS = 0x53;
146const LatinCapitalLetterW = 0x57;
147const LatinCapitalLetterZ = 0x5a;
148const LowLine = 0x5f;
149const LatinSmallLetterA = 0x61;
150const LatinSmallLetterB = 0x62;
151const LatinSmallLetterC = 0x63;
152const LatinSmallLetterD = 0x64;
153const LatinSmallLetterF = 0x66;
154const LatinSmallLetterG = 0x67;
155const LatinSmallLetterI = 0x69;
156const LatinSmallLetterK = 0x6b;
157const LatinSmallLetterM = 0x6d;
158const LatinSmallLetterN = 0x6e;
159const LatinSmallLetterP = 0x70;
160const LatinSmallLetterR = 0x72;
161const LatinSmallLetterS = 0x73;
162const LatinSmallLetterT = 0x74;
163const LatinSmallLetterU = 0x75;
164const LatinSmallLetterV = 0x76;
165const LatinSmallLetterW = 0x77;
166const LatinSmallLetterX = 0x78;
167const LatinSmallLetterY = 0x79;
168const LatinSmallLetterZ = 0x7a;
169const LeftSquareBracket = 0x5b;
170const ReverseSolidus = 0x5c;
171const RightSquareBracket = 0x5d;
172const CircumflexAccent = 0x5e;
173const LeftCurlyBracket = 0x7b;
174const VerticalLine = 0x7c;
175const RightCurlyBracket = 0x7d;
176const ZeroWidthNonJoiner = 0x200c;
177const ZeroWidthJoiner = 0x200d;
178const LineSeparator = 0x2028;
179const ParagraphSeparator = 0x2029;
180const MinCodePoint = 0x00;
181const MaxCodePoint = 0x10ffff;
182function isLatinLetter(code) {
183 return ((code >= LatinCapitalLetterA && code <= LatinCapitalLetterZ) ||
184 (code >= LatinSmallLetterA && code <= LatinSmallLetterZ));
185}
186function isDecimalDigit(code) {
187 return code >= DigitZero && code <= DigitNine;
188}
189function isOctalDigit(code) {
190 return code >= DigitZero && code <= DigitSeven;
191}
192function isHexDigit(code) {
193 return ((code >= DigitZero && code <= DigitNine) ||
194 (code >= LatinCapitalLetterA && code <= LatinCapitalLetterF) ||
195 (code >= LatinSmallLetterA && code <= LatinSmallLetterF));
196}
197function isLineTerminator(code) {
198 return (code === LineFeed ||
199 code === CarriageReturn ||
200 code === LineSeparator ||
201 code === ParagraphSeparator);
202}
203function isValidUnicode(code) {
204 return code >= MinCodePoint && code <= MaxCodePoint;
205}
206function digitToInt(code) {
207 if (code >= LatinSmallLetterA && code <= LatinSmallLetterF) {
208 return code - LatinSmallLetterA + 10;
209 }
210 if (code >= LatinCapitalLetterA && code <= LatinCapitalLetterF) {
211 return code - LatinCapitalLetterA + 10;
212 }
213 return code - DigitZero;
214}
Tim van der Lippe16aca392020-11-13 11:37:13 +0000215function isLeadSurrogate(code) {
216 return code >= 0xd800 && code <= 0xdbff;
217}
218function isTrailSurrogate(code) {
219 return code >= 0xdc00 && code <= 0xdfff;
220}
221function combineSurrogatePair(lead, trail) {
222 return (lead - 0xd800) * 0x400 + (trail - 0xdc00) + 0x10000;
223}
Yang Guo4fd355c2019-09-19 10:59:03 +0200224
225const legacyImpl = {
226 at(s, end, i) {
227 return i < end ? s.charCodeAt(i) : -1;
228 },
229 width(c) {
230 return 1;
231 },
232};
233const unicodeImpl = {
234 at(s, end, i) {
235 return i < end ? s.codePointAt(i) : -1;
236 },
237 width(c) {
238 return c > 0xffff ? 2 : 1;
239 },
240};
241class Reader {
242 constructor() {
243 this._impl = legacyImpl;
244 this._s = "";
245 this._i = 0;
246 this._end = 0;
247 this._cp1 = -1;
248 this._w1 = 1;
249 this._cp2 = -1;
250 this._w2 = 1;
251 this._cp3 = -1;
252 this._w3 = 1;
253 this._cp4 = -1;
254 }
255 get source() {
256 return this._s;
257 }
258 get index() {
259 return this._i;
260 }
261 get currentCodePoint() {
262 return this._cp1;
263 }
264 get nextCodePoint() {
265 return this._cp2;
266 }
267 get nextCodePoint2() {
268 return this._cp3;
269 }
270 get nextCodePoint3() {
271 return this._cp4;
272 }
273 reset(source, start, end, uFlag) {
274 this._impl = uFlag ? unicodeImpl : legacyImpl;
275 this._s = source;
276 this._end = end;
277 this.rewind(start);
278 }
279 rewind(index) {
280 const impl = this._impl;
281 this._i = index;
282 this._cp1 = impl.at(this._s, this._end, index);
283 this._w1 = impl.width(this._cp1);
284 this._cp2 = impl.at(this._s, this._end, index + this._w1);
285 this._w2 = impl.width(this._cp2);
286 this._cp3 = impl.at(this._s, this._end, index + this._w1 + this._w2);
287 this._w3 = impl.width(this._cp3);
288 this._cp4 = impl.at(this._s, this._end, index + this._w1 + this._w2 + this._w3);
289 }
290 advance() {
291 if (this._cp1 !== -1) {
292 const impl = this._impl;
293 this._i += this._w1;
294 this._cp1 = this._cp2;
295 this._w1 = this._w2;
296 this._cp2 = this._cp3;
297 this._w2 = impl.width(this._cp2);
298 this._cp3 = this._cp4;
299 this._w3 = impl.width(this._cp3);
300 this._cp4 = impl.at(this._s, this._end, this._i + this._w1 + this._w2 + this._w3);
301 }
302 }
303 eat(cp) {
304 if (this._cp1 === cp) {
305 this.advance();
306 return true;
307 }
308 return false;
309 }
310 eat2(cp1, cp2) {
311 if (this._cp1 === cp1 && this._cp2 === cp2) {
312 this.advance();
313 this.advance();
314 return true;
315 }
316 return false;
317 }
318 eat3(cp1, cp2, cp3) {
319 if (this._cp1 === cp1 && this._cp2 === cp2 && this._cp3 === cp3) {
320 this.advance();
321 this.advance();
322 this.advance();
323 return true;
324 }
325 return false;
326 }
327}
328
329class RegExpSyntaxError extends SyntaxError {
330 constructor(source, uFlag, index, message) {
331 if (source) {
Tim van der Lippe16aca392020-11-13 11:37:13 +0000332 if (!source.startsWith("/")) {
Yang Guo4fd355c2019-09-19 10:59:03 +0200333 source = `/${source}/${uFlag ? "u" : ""}`;
334 }
335 source = `: ${source}`;
336 }
337 super(`Invalid regular expression${source}: ${message}`);
338 this.index = index;
339 }
340}
341
342function isSyntaxCharacter(cp) {
343 return (cp === CircumflexAccent ||
344 cp === DollarSign ||
345 cp === ReverseSolidus ||
346 cp === FullStop ||
347 cp === Asterisk ||
348 cp === PlusSign ||
349 cp === QuestionMark ||
350 cp === LeftParenthesis ||
351 cp === RightParenthesis ||
352 cp === LeftSquareBracket ||
353 cp === RightSquareBracket ||
354 cp === LeftCurlyBracket ||
355 cp === RightCurlyBracket ||
356 cp === VerticalLine);
357}
358function isRegExpIdentifierStart(cp) {
359 return isIdStart(cp) || cp === DollarSign || cp === LowLine;
360}
361function isRegExpIdentifierPart(cp) {
362 return (isIdContinue(cp) ||
363 cp === DollarSign ||
364 cp === LowLine ||
365 cp === ZeroWidthNonJoiner ||
366 cp === ZeroWidthJoiner);
367}
368function isUnicodePropertyNameCharacter(cp) {
369 return isLatinLetter(cp) || cp === LowLine;
370}
371function isUnicodePropertyValueCharacter(cp) {
372 return isUnicodePropertyNameCharacter(cp) || isDecimalDigit(cp);
373}
Yang Guo4fd355c2019-09-19 10:59:03 +0200374class RegExpValidator {
375 constructor(options) {
376 this._reader = new Reader();
377 this._uFlag = false;
378 this._nFlag = false;
379 this._lastIntValue = 0;
380 this._lastMinValue = 0;
381 this._lastMaxValue = 0;
382 this._lastStrValue = "";
383 this._lastKeyValue = "";
384 this._lastValValue = "";
385 this._lastAssertionIsQuantifiable = false;
386 this._numCapturingParens = 0;
387 this._groupNames = new Set();
388 this._backreferenceNames = new Set();
389 this._options = options || {};
390 }
391 validateLiteral(source, start = 0, end = source.length) {
392 this._uFlag = this._nFlag = false;
393 this.reset(source, start, end);
394 this.onLiteralEnter(start);
395 if (this.eat(Solidus) && this.eatRegExpBody() && this.eat(Solidus)) {
396 const flagStart = this.index;
Tim van der Lippe16aca392020-11-13 11:37:13 +0000397 const uFlag = source.includes("u", flagStart);
Yang Guo4fd355c2019-09-19 10:59:03 +0200398 this.validateFlags(source, flagStart, end);
399 this.validatePattern(source, start + 1, flagStart - 1, uFlag);
400 }
401 else if (start >= end) {
402 this.raise("Empty");
403 }
404 else {
405 const c = String.fromCodePoint(this.currentCodePoint);
406 this.raise(`Unexpected character '${c}'`);
407 }
408 this.onLiteralLeave(start, end);
409 }
410 validateFlags(source, start = 0, end = source.length) {
411 const existingFlags = new Set();
412 let global = false;
413 let ignoreCase = false;
414 let multiline = false;
415 let sticky = false;
416 let unicode = false;
417 let dotAll = false;
418 for (let i = start; i < end; ++i) {
419 const flag = source.charCodeAt(i);
420 if (existingFlags.has(flag)) {
421 this.raise(`Duplicated flag '${source[i]}'`);
422 }
423 existingFlags.add(flag);
424 if (flag === LatinSmallLetterG) {
425 global = true;
426 }
427 else if (flag === LatinSmallLetterI) {
428 ignoreCase = true;
429 }
430 else if (flag === LatinSmallLetterM) {
431 multiline = true;
432 }
433 else if (flag === LatinSmallLetterU && this.ecmaVersion >= 2015) {
434 unicode = true;
435 }
436 else if (flag === LatinSmallLetterY && this.ecmaVersion >= 2015) {
437 sticky = true;
438 }
439 else if (flag === LatinSmallLetterS && this.ecmaVersion >= 2018) {
440 dotAll = true;
441 }
442 else {
443 this.raise(`Invalid flag '${source[i]}'`);
444 }
445 }
446 this.onFlags(start, end, global, ignoreCase, multiline, unicode, sticky, dotAll);
447 }
448 validatePattern(source, start = 0, end = source.length, uFlag = false) {
449 this._uFlag = uFlag && this.ecmaVersion >= 2015;
450 this._nFlag = uFlag && this.ecmaVersion >= 2018;
451 this.reset(source, start, end);
Tim van der Lippe16aca392020-11-13 11:37:13 +0000452 this.consumePattern();
Yang Guo4fd355c2019-09-19 10:59:03 +0200453 if (!this._nFlag &&
454 this.ecmaVersion >= 2018 &&
455 this._groupNames.size > 0) {
456 this._nFlag = true;
457 this.rewind(start);
Tim van der Lippe16aca392020-11-13 11:37:13 +0000458 this.consumePattern();
Yang Guo4fd355c2019-09-19 10:59:03 +0200459 }
460 }
461 get strict() {
462 return Boolean(this._options.strict || this._uFlag);
463 }
464 get ecmaVersion() {
Tim van der Lippe16aca392020-11-13 11:37:13 +0000465 return this._options.ecmaVersion || 2020;
Yang Guo4fd355c2019-09-19 10:59:03 +0200466 }
467 onLiteralEnter(start) {
468 if (this._options.onLiteralEnter) {
469 this._options.onLiteralEnter(start);
470 }
471 }
472 onLiteralLeave(start, end) {
473 if (this._options.onLiteralLeave) {
474 this._options.onLiteralLeave(start, end);
475 }
476 }
477 onFlags(start, end, global, ignoreCase, multiline, unicode, sticky, dotAll) {
478 if (this._options.onFlags) {
479 this._options.onFlags(start, end, global, ignoreCase, multiline, unicode, sticky, dotAll);
480 }
481 }
482 onPatternEnter(start) {
483 if (this._options.onPatternEnter) {
484 this._options.onPatternEnter(start);
485 }
486 }
487 onPatternLeave(start, end) {
488 if (this._options.onPatternLeave) {
489 this._options.onPatternLeave(start, end);
490 }
491 }
492 onDisjunctionEnter(start) {
493 if (this._options.onDisjunctionEnter) {
494 this._options.onDisjunctionEnter(start);
495 }
496 }
497 onDisjunctionLeave(start, end) {
498 if (this._options.onDisjunctionLeave) {
499 this._options.onDisjunctionLeave(start, end);
500 }
501 }
502 onAlternativeEnter(start, index) {
503 if (this._options.onAlternativeEnter) {
504 this._options.onAlternativeEnter(start, index);
505 }
506 }
507 onAlternativeLeave(start, end, index) {
508 if (this._options.onAlternativeLeave) {
509 this._options.onAlternativeLeave(start, end, index);
510 }
511 }
512 onGroupEnter(start) {
513 if (this._options.onGroupEnter) {
514 this._options.onGroupEnter(start);
515 }
516 }
517 onGroupLeave(start, end) {
518 if (this._options.onGroupLeave) {
519 this._options.onGroupLeave(start, end);
520 }
521 }
522 onCapturingGroupEnter(start, name) {
523 if (this._options.onCapturingGroupEnter) {
524 this._options.onCapturingGroupEnter(start, name);
525 }
526 }
527 onCapturingGroupLeave(start, end, name) {
528 if (this._options.onCapturingGroupLeave) {
529 this._options.onCapturingGroupLeave(start, end, name);
530 }
531 }
532 onQuantifier(start, end, min, max, greedy) {
533 if (this._options.onQuantifier) {
534 this._options.onQuantifier(start, end, min, max, greedy);
535 }
536 }
537 onLookaroundAssertionEnter(start, kind, negate) {
538 if (this._options.onLookaroundAssertionEnter) {
539 this._options.onLookaroundAssertionEnter(start, kind, negate);
540 }
541 }
542 onLookaroundAssertionLeave(start, end, kind, negate) {
543 if (this._options.onLookaroundAssertionLeave) {
544 this._options.onLookaroundAssertionLeave(start, end, kind, negate);
545 }
546 }
547 onEdgeAssertion(start, end, kind) {
548 if (this._options.onEdgeAssertion) {
549 this._options.onEdgeAssertion(start, end, kind);
550 }
551 }
552 onWordBoundaryAssertion(start, end, kind, negate) {
553 if (this._options.onWordBoundaryAssertion) {
554 this._options.onWordBoundaryAssertion(start, end, kind, negate);
555 }
556 }
557 onAnyCharacterSet(start, end, kind) {
558 if (this._options.onAnyCharacterSet) {
559 this._options.onAnyCharacterSet(start, end, kind);
560 }
561 }
562 onEscapeCharacterSet(start, end, kind, negate) {
563 if (this._options.onEscapeCharacterSet) {
564 this._options.onEscapeCharacterSet(start, end, kind, negate);
565 }
566 }
567 onUnicodePropertyCharacterSet(start, end, kind, key, value, negate) {
568 if (this._options.onUnicodePropertyCharacterSet) {
569 this._options.onUnicodePropertyCharacterSet(start, end, kind, key, value, negate);
570 }
571 }
572 onCharacter(start, end, value) {
573 if (this._options.onCharacter) {
574 this._options.onCharacter(start, end, value);
575 }
576 }
577 onBackreference(start, end, ref) {
578 if (this._options.onBackreference) {
579 this._options.onBackreference(start, end, ref);
580 }
581 }
582 onCharacterClassEnter(start, negate) {
583 if (this._options.onCharacterClassEnter) {
584 this._options.onCharacterClassEnter(start, negate);
585 }
586 }
587 onCharacterClassLeave(start, end, negate) {
588 if (this._options.onCharacterClassLeave) {
589 this._options.onCharacterClassLeave(start, end, negate);
590 }
591 }
592 onCharacterClassRange(start, end, min, max) {
593 if (this._options.onCharacterClassRange) {
594 this._options.onCharacterClassRange(start, end, min, max);
595 }
596 }
597 get source() {
598 return this._reader.source;
599 }
600 get index() {
601 return this._reader.index;
602 }
603 get currentCodePoint() {
604 return this._reader.currentCodePoint;
605 }
606 get nextCodePoint() {
607 return this._reader.nextCodePoint;
608 }
609 get nextCodePoint2() {
610 return this._reader.nextCodePoint2;
611 }
612 get nextCodePoint3() {
613 return this._reader.nextCodePoint3;
614 }
615 reset(source, start, end) {
616 this._reader.reset(source, start, end, this._uFlag);
617 }
618 rewind(index) {
619 this._reader.rewind(index);
620 }
621 advance() {
622 this._reader.advance();
623 }
624 eat(cp) {
625 return this._reader.eat(cp);
626 }
627 eat2(cp1, cp2) {
628 return this._reader.eat2(cp1, cp2);
629 }
630 eat3(cp1, cp2, cp3) {
631 return this._reader.eat3(cp1, cp2, cp3);
632 }
633 raise(message) {
634 throw new RegExpSyntaxError(this.source, this._uFlag, this.index, message);
635 }
636 eatRegExpBody() {
637 const start = this.index;
638 let inClass = false;
639 let escaped = false;
640 for (;;) {
641 const cp = this.currentCodePoint;
642 if (cp === -1 || isLineTerminator(cp)) {
643 const kind = inClass ? "character class" : "regular expression";
644 this.raise(`Unterminated ${kind}`);
645 }
646 if (escaped) {
647 escaped = false;
648 }
649 else if (cp === ReverseSolidus) {
650 escaped = true;
651 }
652 else if (cp === LeftSquareBracket) {
653 inClass = true;
654 }
655 else if (cp === RightSquareBracket) {
656 inClass = false;
657 }
658 else if ((cp === Solidus && !inClass) ||
659 (cp === Asterisk && this.index === start)) {
660 break;
661 }
662 this.advance();
663 }
664 return this.index !== start;
665 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000666 consumePattern() {
Yang Guo4fd355c2019-09-19 10:59:03 +0200667 const start = this.index;
668 this._numCapturingParens = this.countCapturingParens();
669 this._groupNames.clear();
670 this._backreferenceNames.clear();
671 this.onPatternEnter(start);
Tim van der Lippe16aca392020-11-13 11:37:13 +0000672 this.consumeDisjunction();
Yang Guo4fd355c2019-09-19 10:59:03 +0200673 const cp = this.currentCodePoint;
674 if (this.currentCodePoint !== -1) {
675 if (cp === RightParenthesis) {
676 this.raise("Unmatched ')'");
677 }
678 if (cp === ReverseSolidus) {
679 this.raise("\\ at end of pattern");
680 }
681 if (cp === RightSquareBracket || cp === RightCurlyBracket) {
682 this.raise("Lone quantifier brackets");
683 }
684 const c = String.fromCodePoint(cp);
685 this.raise(`Unexpected character '${c}'`);
686 }
687 for (const name of this._backreferenceNames) {
688 if (!this._groupNames.has(name)) {
689 this.raise("Invalid named capture referenced");
690 }
691 }
692 this.onPatternLeave(start, this.index);
693 }
694 countCapturingParens() {
695 const start = this.index;
696 let inClass = false;
697 let escaped = false;
698 let count = 0;
699 let cp = 0;
700 while ((cp = this.currentCodePoint) !== -1) {
701 if (escaped) {
702 escaped = false;
703 }
704 else if (cp === ReverseSolidus) {
705 escaped = true;
706 }
707 else if (cp === LeftSquareBracket) {
708 inClass = true;
709 }
710 else if (cp === RightSquareBracket) {
711 inClass = false;
712 }
713 else if (cp === LeftParenthesis &&
714 !inClass &&
715 (this.nextCodePoint !== QuestionMark ||
716 (this.nextCodePoint2 === LessThanSign &&
717 this.nextCodePoint3 !== EqualsSign &&
718 this.nextCodePoint3 !== ExclamationMark))) {
719 count += 1;
720 }
721 this.advance();
722 }
723 this.rewind(start);
724 return count;
725 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000726 consumeDisjunction() {
Yang Guo4fd355c2019-09-19 10:59:03 +0200727 const start = this.index;
728 let i = 0;
729 this.onDisjunctionEnter(start);
Tim van der Lippe16aca392020-11-13 11:37:13 +0000730 do {
731 this.consumeAlternative(i++);
732 } while (this.eat(VerticalLine));
733 if (this.consumeQuantifier(true)) {
Yang Guo4fd355c2019-09-19 10:59:03 +0200734 this.raise("Nothing to repeat");
735 }
736 if (this.eat(LeftCurlyBracket)) {
737 this.raise("Lone quantifier brackets");
738 }
739 this.onDisjunctionLeave(start, this.index);
740 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000741 consumeAlternative(i) {
Yang Guo4fd355c2019-09-19 10:59:03 +0200742 const start = this.index;
743 this.onAlternativeEnter(start, i);
Tim van der Lippe16aca392020-11-13 11:37:13 +0000744 while (this.currentCodePoint !== -1 && this.consumeTerm()) {
Yang Guo4fd355c2019-09-19 10:59:03 +0200745 }
746 this.onAlternativeLeave(start, this.index, i);
747 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000748 consumeTerm() {
749 if (this._uFlag || this.strict) {
750 return (this.consumeAssertion() ||
751 (this.consumeAtom() && this.consumeOptionalQuantifier()));
Yang Guo4fd355c2019-09-19 10:59:03 +0200752 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000753 return ((this.consumeAssertion() &&
754 (!this._lastAssertionIsQuantifiable ||
755 this.consumeOptionalQuantifier())) ||
756 (this.consumeExtendedAtom() && this.consumeOptionalQuantifier()));
Yang Guo4fd355c2019-09-19 10:59:03 +0200757 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000758 consumeOptionalQuantifier() {
759 this.consumeQuantifier();
760 return true;
761 }
762 consumeAssertion() {
Yang Guo4fd355c2019-09-19 10:59:03 +0200763 const start = this.index;
764 this._lastAssertionIsQuantifiable = false;
765 if (this.eat(CircumflexAccent)) {
766 this.onEdgeAssertion(start, this.index, "start");
767 return true;
768 }
769 if (this.eat(DollarSign)) {
770 this.onEdgeAssertion(start, this.index, "end");
771 return true;
772 }
773 if (this.eat2(ReverseSolidus, LatinCapitalLetterB)) {
774 this.onWordBoundaryAssertion(start, this.index, "word", true);
775 return true;
776 }
777 if (this.eat2(ReverseSolidus, LatinSmallLetterB)) {
778 this.onWordBoundaryAssertion(start, this.index, "word", false);
779 return true;
780 }
781 if (this.eat2(LeftParenthesis, QuestionMark)) {
782 const lookbehind = this.ecmaVersion >= 2018 && this.eat(LessThanSign);
783 let negate = false;
784 if (this.eat(EqualsSign) || (negate = this.eat(ExclamationMark))) {
785 const kind = lookbehind ? "lookbehind" : "lookahead";
786 this.onLookaroundAssertionEnter(start, kind, negate);
Tim van der Lippe16aca392020-11-13 11:37:13 +0000787 this.consumeDisjunction();
Yang Guo4fd355c2019-09-19 10:59:03 +0200788 if (!this.eat(RightParenthesis)) {
789 this.raise("Unterminated group");
790 }
791 this._lastAssertionIsQuantifiable = !lookbehind && !this.strict;
792 this.onLookaroundAssertionLeave(start, this.index, kind, negate);
793 return true;
794 }
795 this.rewind(start);
796 }
797 return false;
798 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000799 consumeQuantifier(noConsume = false) {
Yang Guo4fd355c2019-09-19 10:59:03 +0200800 const start = this.index;
801 let min = 0;
802 let max = 0;
803 let greedy = false;
804 if (this.eat(Asterisk)) {
805 min = 0;
806 max = Number.POSITIVE_INFINITY;
807 }
808 else if (this.eat(PlusSign)) {
809 min = 1;
810 max = Number.POSITIVE_INFINITY;
811 }
812 else if (this.eat(QuestionMark)) {
813 min = 0;
814 max = 1;
815 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000816 else if (this.eatBracedQuantifier(noConsume)) {
Yang Guo4fd355c2019-09-19 10:59:03 +0200817 min = this._lastMinValue;
818 max = this._lastMaxValue;
819 }
820 else {
821 return false;
822 }
823 greedy = !this.eat(QuestionMark);
Tim van der Lippe16aca392020-11-13 11:37:13 +0000824 if (!noConsume) {
Yang Guo4fd355c2019-09-19 10:59:03 +0200825 this.onQuantifier(start, this.index, min, max, greedy);
826 }
827 return true;
828 }
829 eatBracedQuantifier(noError) {
830 const start = this.index;
831 if (this.eat(LeftCurlyBracket)) {
832 this._lastMinValue = 0;
833 this._lastMaxValue = Number.POSITIVE_INFINITY;
834 if (this.eatDecimalDigits()) {
835 this._lastMinValue = this._lastMaxValue = this._lastIntValue;
836 if (this.eat(Comma)) {
837 this._lastMaxValue = this.eatDecimalDigits()
838 ? this._lastIntValue
839 : Number.POSITIVE_INFINITY;
840 }
841 if (this.eat(RightCurlyBracket)) {
842 if (!noError && this._lastMaxValue < this._lastMinValue) {
843 this.raise("numbers out of order in {} quantifier");
844 }
845 return true;
846 }
847 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000848 if (!noError && (this._uFlag || this.strict)) {
Yang Guo4fd355c2019-09-19 10:59:03 +0200849 this.raise("Incomplete quantifier");
850 }
851 this.rewind(start);
852 }
853 return false;
854 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000855 consumeAtom() {
856 return (this.consumePatternCharacter() ||
857 this.consumeDot() ||
858 this.consumeReverseSolidusAtomEscape() ||
859 this.consumeCharacterClass() ||
860 this.consumeUncapturingGroup() ||
861 this.consumeCapturingGroup());
Yang Guo4fd355c2019-09-19 10:59:03 +0200862 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000863 consumeDot() {
Yang Guo4fd355c2019-09-19 10:59:03 +0200864 if (this.eat(FullStop)) {
865 this.onAnyCharacterSet(this.index - 1, this.index, "any");
866 return true;
867 }
868 return false;
869 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000870 consumeReverseSolidusAtomEscape() {
Yang Guo4fd355c2019-09-19 10:59:03 +0200871 const start = this.index;
872 if (this.eat(ReverseSolidus)) {
Tim van der Lippe16aca392020-11-13 11:37:13 +0000873 if (this.consumeAtomEscape()) {
Yang Guo4fd355c2019-09-19 10:59:03 +0200874 return true;
875 }
876 this.rewind(start);
877 }
878 return false;
879 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000880 consumeUncapturingGroup() {
Yang Guo4fd355c2019-09-19 10:59:03 +0200881 const start = this.index;
882 if (this.eat3(LeftParenthesis, QuestionMark, Colon)) {
883 this.onGroupEnter(start);
Tim van der Lippe16aca392020-11-13 11:37:13 +0000884 this.consumeDisjunction();
Yang Guo4fd355c2019-09-19 10:59:03 +0200885 if (!this.eat(RightParenthesis)) {
886 this.raise("Unterminated group");
887 }
888 this.onGroupLeave(start, this.index);
889 return true;
890 }
891 return false;
892 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000893 consumeCapturingGroup() {
Yang Guo4fd355c2019-09-19 10:59:03 +0200894 const start = this.index;
895 if (this.eat(LeftParenthesis)) {
Tim van der Lippe16aca392020-11-13 11:37:13 +0000896 let name = null;
Yang Guo4fd355c2019-09-19 10:59:03 +0200897 if (this.ecmaVersion >= 2018) {
Tim van der Lippe16aca392020-11-13 11:37:13 +0000898 if (this.consumeGroupSpecifier()) {
899 name = this._lastStrValue;
900 }
Yang Guo4fd355c2019-09-19 10:59:03 +0200901 }
902 else if (this.currentCodePoint === QuestionMark) {
903 this.raise("Invalid group");
904 }
Yang Guo4fd355c2019-09-19 10:59:03 +0200905 this.onCapturingGroupEnter(start, name);
Tim van der Lippe16aca392020-11-13 11:37:13 +0000906 this.consumeDisjunction();
Yang Guo4fd355c2019-09-19 10:59:03 +0200907 if (!this.eat(RightParenthesis)) {
908 this.raise("Unterminated group");
909 }
910 this.onCapturingGroupLeave(start, this.index, name);
911 return true;
912 }
913 return false;
914 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000915 consumeExtendedAtom() {
916 return (this.consumeDot() ||
917 this.consumeReverseSolidusAtomEscape() ||
918 this.consumeReverseSolidusFollowedByC() ||
919 this.consumeCharacterClass() ||
920 this.consumeUncapturingGroup() ||
921 this.consumeCapturingGroup() ||
922 this.consumeInvalidBracedQuantifier() ||
923 this.consumeExtendedPatternCharacter());
Yang Guo4fd355c2019-09-19 10:59:03 +0200924 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000925 consumeReverseSolidusFollowedByC() {
926 const start = this.index;
Yang Guo4fd355c2019-09-19 10:59:03 +0200927 if (this.currentCodePoint === ReverseSolidus &&
928 this.nextCodePoint === LatinSmallLetterC) {
929 this._lastIntValue = this.currentCodePoint;
930 this.advance();
Tim van der Lippe16aca392020-11-13 11:37:13 +0000931 this.onCharacter(start, this.index, ReverseSolidus);
Yang Guo4fd355c2019-09-19 10:59:03 +0200932 return true;
933 }
934 return false;
935 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000936 consumeInvalidBracedQuantifier() {
Yang Guo4fd355c2019-09-19 10:59:03 +0200937 if (this.eatBracedQuantifier(true)) {
938 this.raise("Nothing to repeat");
939 }
940 return false;
941 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000942 consumePatternCharacter() {
Yang Guo4fd355c2019-09-19 10:59:03 +0200943 const start = this.index;
944 const cp = this.currentCodePoint;
945 if (cp !== -1 && !isSyntaxCharacter(cp)) {
946 this.advance();
947 this.onCharacter(start, this.index, cp);
948 return true;
949 }
950 return false;
951 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000952 consumeExtendedPatternCharacter() {
Yang Guo4fd355c2019-09-19 10:59:03 +0200953 const start = this.index;
954 const cp = this.currentCodePoint;
955 if (cp !== -1 &&
956 cp !== CircumflexAccent &&
957 cp !== DollarSign &&
958 cp !== ReverseSolidus &&
959 cp !== FullStop &&
960 cp !== Asterisk &&
961 cp !== PlusSign &&
962 cp !== QuestionMark &&
963 cp !== LeftParenthesis &&
964 cp !== RightParenthesis &&
965 cp !== LeftSquareBracket &&
966 cp !== VerticalLine) {
967 this.advance();
968 this.onCharacter(start, this.index, cp);
969 return true;
970 }
971 return false;
972 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000973 consumeGroupSpecifier() {
Yang Guo4fd355c2019-09-19 10:59:03 +0200974 if (this.eat(QuestionMark)) {
975 if (this.eatGroupName()) {
976 if (!this._groupNames.has(this._lastStrValue)) {
977 this._groupNames.add(this._lastStrValue);
Tim van der Lippe16aca392020-11-13 11:37:13 +0000978 return true;
Yang Guo4fd355c2019-09-19 10:59:03 +0200979 }
980 this.raise("Duplicate capture group name");
981 }
982 this.raise("Invalid group");
983 }
Yang Guo4fd355c2019-09-19 10:59:03 +0200984 return false;
985 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000986 consumeAtomEscape() {
987 if (this.consumeBackreference() ||
988 this.consumeCharacterClassEscape() ||
989 this.consumeCharacterEscape() ||
990 (this._nFlag && this.consumeKGroupName())) {
Yang Guo4fd355c2019-09-19 10:59:03 +0200991 return true;
992 }
993 if (this.strict || this._uFlag) {
994 this.raise("Invalid escape");
995 }
996 return false;
997 }
Tim van der Lippe16aca392020-11-13 11:37:13 +0000998 consumeBackreference() {
Yang Guo4fd355c2019-09-19 10:59:03 +0200999 const start = this.index;
1000 if (this.eatDecimalEscape()) {
1001 const n = this._lastIntValue;
1002 if (n <= this._numCapturingParens) {
1003 this.onBackreference(start - 1, this.index, n);
1004 return true;
1005 }
Tim van der Lippe16aca392020-11-13 11:37:13 +00001006 if (this.strict || this._uFlag) {
Yang Guo4fd355c2019-09-19 10:59:03 +02001007 this.raise("Invalid escape");
1008 }
1009 this.rewind(start);
1010 }
1011 return false;
1012 }
Tim van der Lippe16aca392020-11-13 11:37:13 +00001013 consumeCharacterClassEscape() {
Yang Guo4fd355c2019-09-19 10:59:03 +02001014 const start = this.index;
1015 if (this.eat(LatinSmallLetterD)) {
1016 this._lastIntValue = -1;
1017 this.onEscapeCharacterSet(start - 1, this.index, "digit", false);
1018 return true;
1019 }
1020 if (this.eat(LatinCapitalLetterD)) {
1021 this._lastIntValue = -1;
1022 this.onEscapeCharacterSet(start - 1, this.index, "digit", true);
1023 return true;
1024 }
1025 if (this.eat(LatinSmallLetterS)) {
1026 this._lastIntValue = -1;
1027 this.onEscapeCharacterSet(start - 1, this.index, "space", false);
1028 return true;
1029 }
1030 if (this.eat(LatinCapitalLetterS)) {
1031 this._lastIntValue = -1;
1032 this.onEscapeCharacterSet(start - 1, this.index, "space", true);
1033 return true;
1034 }
1035 if (this.eat(LatinSmallLetterW)) {
1036 this._lastIntValue = -1;
1037 this.onEscapeCharacterSet(start - 1, this.index, "word", false);
1038 return true;
1039 }
1040 if (this.eat(LatinCapitalLetterW)) {
1041 this._lastIntValue = -1;
1042 this.onEscapeCharacterSet(start - 1, this.index, "word", true);
1043 return true;
1044 }
1045 let negate = false;
1046 if (this._uFlag &&
1047 this.ecmaVersion >= 2018 &&
1048 (this.eat(LatinSmallLetterP) ||
1049 (negate = this.eat(LatinCapitalLetterP)))) {
1050 this._lastIntValue = -1;
1051 if (this.eat(LeftCurlyBracket) &&
1052 this.eatUnicodePropertyValueExpression() &&
1053 this.eat(RightCurlyBracket)) {
1054 this.onUnicodePropertyCharacterSet(start - 1, this.index, "property", this._lastKeyValue, this._lastValValue || null, negate);
1055 return true;
1056 }
1057 this.raise("Invalid property name");
1058 }
1059 return false;
1060 }
Tim van der Lippe16aca392020-11-13 11:37:13 +00001061 consumeCharacterEscape() {
1062 const start = this.index;
1063 if (this.eatControlEscape() ||
1064 this.eatCControlLetter() ||
1065 this.eatZero() ||
1066 this.eatHexEscapeSequence() ||
1067 this.eatRegExpUnicodeEscapeSequence() ||
1068 (!this.strict &&
1069 !this._uFlag &&
1070 this.eatLegacyOctalEscapeSequence()) ||
1071 this.eatIdentityEscape()) {
1072 this.onCharacter(start - 1, this.index, this._lastIntValue);
1073 return true;
1074 }
1075 return false;
1076 }
1077 consumeKGroupName() {
1078 const start = this.index;
1079 if (this.eat(LatinSmallLetterK)) {
1080 if (this.eatGroupName()) {
1081 const groupName = this._lastStrValue;
1082 this._backreferenceNames.add(groupName);
1083 this.onBackreference(start - 1, this.index, groupName);
1084 return true;
1085 }
1086 this.raise("Invalid named reference");
1087 }
1088 return false;
1089 }
1090 consumeCharacterClass() {
1091 const start = this.index;
1092 if (this.eat(LeftSquareBracket)) {
1093 const negate = this.eat(CircumflexAccent);
1094 this.onCharacterClassEnter(start, negate);
1095 this.consumeClassRanges();
1096 if (!this.eat(RightSquareBracket)) {
1097 this.raise("Unterminated character class");
1098 }
1099 this.onCharacterClassLeave(start, this.index, negate);
1100 return true;
1101 }
1102 return false;
1103 }
1104 consumeClassRanges() {
1105 const strict = this.strict || this._uFlag;
1106 for (;;) {
1107 const rangeStart = this.index;
1108 if (!this.consumeClassAtom()) {
1109 break;
1110 }
1111 const min = this._lastIntValue;
1112 if (!this.eat(HyphenMinus)) {
1113 continue;
1114 }
1115 this.onCharacter(this.index - 1, this.index, HyphenMinus);
1116 if (!this.consumeClassAtom()) {
1117 break;
1118 }
1119 const max = this._lastIntValue;
1120 if (min === -1 || max === -1) {
1121 if (strict) {
1122 this.raise("Invalid character class");
1123 }
1124 continue;
1125 }
1126 if (min > max) {
1127 this.raise("Range out of order in character class");
1128 }
1129 this.onCharacterClassRange(rangeStart, this.index, min, max);
1130 }
1131 }
1132 consumeClassAtom() {
1133 const start = this.index;
1134 const cp = this.currentCodePoint;
1135 if (cp !== -1 && cp !== ReverseSolidus && cp !== RightSquareBracket) {
1136 this.advance();
1137 this._lastIntValue = cp;
1138 this.onCharacter(start, this.index, this._lastIntValue);
1139 return true;
1140 }
1141 if (this.eat(ReverseSolidus)) {
1142 if (this.consumeClassEscape()) {
1143 return true;
1144 }
1145 if (!this.strict && this.currentCodePoint === LatinSmallLetterC) {
1146 this._lastIntValue = ReverseSolidus;
1147 this.onCharacter(start, this.index, this._lastIntValue);
1148 return true;
1149 }
1150 if (this.strict || this._uFlag) {
1151 this.raise("Invalid escape");
1152 }
1153 this.rewind(start);
1154 }
1155 return false;
1156 }
1157 consumeClassEscape() {
1158 const start = this.index;
1159 if (this.eat(LatinSmallLetterB)) {
1160 this._lastIntValue = Backspace;
1161 this.onCharacter(start - 1, this.index, this._lastIntValue);
1162 return true;
1163 }
1164 if (this._uFlag && this.eat(HyphenMinus)) {
1165 this._lastIntValue = HyphenMinus;
1166 this.onCharacter(start - 1, this.index, this._lastIntValue);
1167 return true;
1168 }
1169 let cp = 0;
1170 if (!this.strict &&
1171 !this._uFlag &&
1172 this.currentCodePoint === LatinSmallLetterC &&
1173 (isDecimalDigit((cp = this.nextCodePoint)) || cp === LowLine)) {
1174 this.advance();
1175 this.advance();
1176 this._lastIntValue = cp % 0x20;
1177 this.onCharacter(start - 1, this.index, this._lastIntValue);
1178 return true;
1179 }
1180 return (this.consumeCharacterClassEscape() || this.consumeCharacterEscape());
1181 }
1182 eatGroupName() {
1183 if (this.eat(LessThanSign)) {
1184 if (this.eatRegExpIdentifierName() && this.eat(GreaterThanSign)) {
1185 return true;
1186 }
1187 this.raise("Invalid capture group name");
1188 }
1189 return false;
1190 }
1191 eatRegExpIdentifierName() {
1192 if (this.eatRegExpIdentifierStart()) {
1193 this._lastStrValue = String.fromCodePoint(this._lastIntValue);
1194 while (this.eatRegExpIdentifierPart()) {
1195 this._lastStrValue += String.fromCodePoint(this._lastIntValue);
1196 }
1197 return true;
1198 }
1199 return false;
1200 }
1201 eatRegExpIdentifierStart() {
1202 const start = this.index;
1203 const forceUFlag = !this._uFlag && this.ecmaVersion >= 2020;
1204 let cp = this.currentCodePoint;
1205 this.advance();
1206 if (cp === ReverseSolidus &&
1207 this.eatRegExpUnicodeEscapeSequence(forceUFlag)) {
1208 cp = this._lastIntValue;
1209 }
1210 else if (forceUFlag &&
1211 isLeadSurrogate(cp) &&
1212 isTrailSurrogate(this.currentCodePoint)) {
1213 cp = combineSurrogatePair(cp, this.currentCodePoint);
1214 this.advance();
1215 }
1216 if (isRegExpIdentifierStart(cp)) {
1217 this._lastIntValue = cp;
1218 return true;
1219 }
1220 if (this.index !== start) {
1221 this.rewind(start);
1222 }
1223 return false;
1224 }
1225 eatRegExpIdentifierPart() {
1226 const start = this.index;
1227 const forceUFlag = !this._uFlag && this.ecmaVersion >= 2020;
1228 let cp = this.currentCodePoint;
1229 this.advance();
1230 if (cp === ReverseSolidus &&
1231 this.eatRegExpUnicodeEscapeSequence(forceUFlag)) {
1232 cp = this._lastIntValue;
1233 }
1234 else if (forceUFlag &&
1235 isLeadSurrogate(cp) &&
1236 isTrailSurrogate(this.currentCodePoint)) {
1237 cp = combineSurrogatePair(cp, this.currentCodePoint);
1238 this.advance();
1239 }
1240 if (isRegExpIdentifierPart(cp)) {
1241 this._lastIntValue = cp;
1242 return true;
1243 }
1244 if (this.index !== start) {
1245 this.rewind(start);
1246 }
1247 return false;
1248 }
1249 eatCControlLetter() {
1250 const start = this.index;
1251 if (this.eat(LatinSmallLetterC)) {
1252 if (this.eatControlLetter()) {
1253 return true;
1254 }
1255 this.rewind(start);
1256 }
1257 return false;
1258 }
1259 eatZero() {
1260 if (this.currentCodePoint === DigitZero &&
1261 !isDecimalDigit(this.nextCodePoint)) {
1262 this._lastIntValue = 0;
1263 this.advance();
1264 return true;
1265 }
1266 return false;
1267 }
1268 eatControlEscape() {
1269 if (this.eat(LatinSmallLetterF)) {
1270 this._lastIntValue = FormFeed;
1271 return true;
1272 }
1273 if (this.eat(LatinSmallLetterN)) {
1274 this._lastIntValue = LineFeed;
1275 return true;
1276 }
1277 if (this.eat(LatinSmallLetterR)) {
1278 this._lastIntValue = CarriageReturn;
1279 return true;
1280 }
1281 if (this.eat(LatinSmallLetterT)) {
1282 this._lastIntValue = CharacterTabulation;
1283 return true;
1284 }
1285 if (this.eat(LatinSmallLetterV)) {
1286 this._lastIntValue = LineTabulation;
1287 return true;
1288 }
1289 return false;
1290 }
1291 eatControlLetter() {
1292 const cp = this.currentCodePoint;
1293 if (isLatinLetter(cp)) {
1294 this.advance();
1295 this._lastIntValue = cp % 0x20;
1296 return true;
1297 }
1298 return false;
1299 }
1300 eatRegExpUnicodeEscapeSequence(forceUFlag = false) {
1301 const start = this.index;
1302 const uFlag = forceUFlag || this._uFlag;
1303 if (this.eat(LatinSmallLetterU)) {
1304 if ((uFlag && this.eatRegExpUnicodeSurrogatePairEscape()) ||
1305 this.eatFixedHexDigits(4) ||
1306 (uFlag && this.eatRegExpUnicodeCodePointEscape())) {
1307 return true;
1308 }
1309 if (this.strict || uFlag) {
1310 this.raise("Invalid unicode escape");
1311 }
1312 this.rewind(start);
1313 }
1314 return false;
1315 }
1316 eatRegExpUnicodeSurrogatePairEscape() {
1317 const start = this.index;
1318 if (this.eatFixedHexDigits(4)) {
1319 const lead = this._lastIntValue;
1320 if (isLeadSurrogate(lead) &&
1321 this.eat(ReverseSolidus) &&
1322 this.eat(LatinSmallLetterU) &&
1323 this.eatFixedHexDigits(4)) {
1324 const trail = this._lastIntValue;
1325 if (isTrailSurrogate(trail)) {
1326 this._lastIntValue = combineSurrogatePair(lead, trail);
1327 return true;
1328 }
1329 }
1330 this.rewind(start);
1331 }
1332 return false;
1333 }
1334 eatRegExpUnicodeCodePointEscape() {
1335 const start = this.index;
1336 if (this.eat(LeftCurlyBracket) &&
1337 this.eatHexDigits() &&
1338 this.eat(RightCurlyBracket) &&
1339 isValidUnicode(this._lastIntValue)) {
1340 return true;
1341 }
1342 this.rewind(start);
1343 return false;
1344 }
1345 eatIdentityEscape() {
1346 const cp = this.currentCodePoint;
1347 if (this.isValidIdentityEscape(cp)) {
1348 this._lastIntValue = cp;
1349 this.advance();
1350 return true;
1351 }
1352 return false;
1353 }
1354 isValidIdentityEscape(cp) {
1355 if (cp === -1) {
1356 return false;
1357 }
1358 if (this._uFlag) {
1359 return isSyntaxCharacter(cp) || cp === Solidus;
1360 }
1361 if (this.strict) {
1362 return !isIdContinue(cp);
1363 }
1364 if (this._nFlag) {
1365 return !(cp === LatinSmallLetterC || cp === LatinSmallLetterK);
1366 }
1367 return cp !== LatinSmallLetterC;
1368 }
1369 eatDecimalEscape() {
1370 this._lastIntValue = 0;
1371 let cp = this.currentCodePoint;
1372 if (cp >= DigitOne && cp <= DigitNine) {
1373 do {
1374 this._lastIntValue = 10 * this._lastIntValue + (cp - DigitZero);
1375 this.advance();
1376 } while ((cp = this.currentCodePoint) >= DigitZero &&
1377 cp <= DigitNine);
1378 return true;
1379 }
1380 return false;
1381 }
Yang Guo4fd355c2019-09-19 10:59:03 +02001382 eatUnicodePropertyValueExpression() {
1383 const start = this.index;
1384 if (this.eatUnicodePropertyName() && this.eat(EqualsSign)) {
1385 this._lastKeyValue = this._lastStrValue;
1386 if (this.eatUnicodePropertyValue()) {
1387 this._lastValValue = this._lastStrValue;
Tim van der Lippe16aca392020-11-13 11:37:13 +00001388 if (isValidUnicodeProperty(this.ecmaVersion, this._lastKeyValue, this._lastValValue)) {
Yang Guo4fd355c2019-09-19 10:59:03 +02001389 return true;
1390 }
1391 this.raise("Invalid property name");
1392 }
1393 }
1394 this.rewind(start);
1395 if (this.eatLoneUnicodePropertyNameOrValue()) {
1396 const nameOrValue = this._lastStrValue;
Tim van der Lippe16aca392020-11-13 11:37:13 +00001397 if (isValidUnicodeProperty(this.ecmaVersion, "General_Category", nameOrValue)) {
Yang Guo4fd355c2019-09-19 10:59:03 +02001398 this._lastKeyValue = "General_Category";
1399 this._lastValValue = nameOrValue;
1400 return true;
1401 }
Tim van der Lippe16aca392020-11-13 11:37:13 +00001402 if (isValidLoneUnicodeProperty(this.ecmaVersion, nameOrValue)) {
Yang Guo4fd355c2019-09-19 10:59:03 +02001403 this._lastKeyValue = nameOrValue;
1404 this._lastValValue = "";
1405 return true;
1406 }
1407 this.raise("Invalid property name");
1408 }
1409 return false;
1410 }
1411 eatUnicodePropertyName() {
1412 this._lastStrValue = "";
1413 while (isUnicodePropertyNameCharacter(this.currentCodePoint)) {
1414 this._lastStrValue += String.fromCodePoint(this.currentCodePoint);
1415 this.advance();
1416 }
1417 return this._lastStrValue !== "";
1418 }
1419 eatUnicodePropertyValue() {
1420 this._lastStrValue = "";
1421 while (isUnicodePropertyValueCharacter(this.currentCodePoint)) {
1422 this._lastStrValue += String.fromCodePoint(this.currentCodePoint);
1423 this.advance();
1424 }
1425 return this._lastStrValue !== "";
1426 }
1427 eatLoneUnicodePropertyNameOrValue() {
1428 return this.eatUnicodePropertyValue();
1429 }
Yang Guo4fd355c2019-09-19 10:59:03 +02001430 eatHexEscapeSequence() {
1431 const start = this.index;
1432 if (this.eat(LatinSmallLetterX)) {
1433 if (this.eatFixedHexDigits(2)) {
1434 return true;
1435 }
Tim van der Lippe16aca392020-11-13 11:37:13 +00001436 if (this._uFlag || this.strict) {
Yang Guo4fd355c2019-09-19 10:59:03 +02001437 this.raise("Invalid escape");
1438 }
1439 this.rewind(start);
1440 }
1441 return false;
1442 }
1443 eatDecimalDigits() {
1444 const start = this.index;
1445 this._lastIntValue = 0;
1446 while (isDecimalDigit(this.currentCodePoint)) {
1447 this._lastIntValue =
1448 10 * this._lastIntValue + digitToInt(this.currentCodePoint);
1449 this.advance();
1450 }
1451 return this.index !== start;
1452 }
1453 eatHexDigits() {
1454 const start = this.index;
1455 this._lastIntValue = 0;
1456 while (isHexDigit(this.currentCodePoint)) {
1457 this._lastIntValue =
1458 16 * this._lastIntValue + digitToInt(this.currentCodePoint);
1459 this.advance();
1460 }
1461 return this.index !== start;
1462 }
1463 eatLegacyOctalEscapeSequence() {
1464 if (this.eatOctalDigit()) {
1465 const n1 = this._lastIntValue;
1466 if (this.eatOctalDigit()) {
1467 const n2 = this._lastIntValue;
1468 if (n1 <= 3 && this.eatOctalDigit()) {
1469 this._lastIntValue = n1 * 64 + n2 * 8 + this._lastIntValue;
1470 }
1471 else {
1472 this._lastIntValue = n1 * 8 + n2;
1473 }
1474 }
1475 else {
1476 this._lastIntValue = n1;
1477 }
1478 return true;
1479 }
1480 return false;
1481 }
1482 eatOctalDigit() {
1483 const cp = this.currentCodePoint;
1484 if (isOctalDigit(cp)) {
1485 this.advance();
1486 this._lastIntValue = cp - DigitZero;
1487 return true;
1488 }
1489 this._lastIntValue = 0;
1490 return false;
1491 }
1492 eatFixedHexDigits(length) {
1493 const start = this.index;
1494 this._lastIntValue = 0;
1495 for (let i = 0; i < length; ++i) {
1496 const cp = this.currentCodePoint;
1497 if (!isHexDigit(cp)) {
1498 this.rewind(start);
1499 return false;
1500 }
1501 this._lastIntValue = 16 * this._lastIntValue + digitToInt(cp);
1502 this.advance();
1503 }
1504 return true;
1505 }
1506}
1507
1508const DummyPattern = {};
1509const DummyFlags = {};
1510const DummyCapturingGroup = {};
1511class RegExpParserState {
1512 constructor(options) {
1513 this._node = DummyPattern;
1514 this._flags = DummyFlags;
1515 this._backreferences = [];
1516 this._capturingGroups = [];
1517 this.source = "";
1518 this.strict = Boolean(options && options.strict);
Tim van der Lippe16aca392020-11-13 11:37:13 +00001519 this.ecmaVersion = (options && options.ecmaVersion) || 2020;
Yang Guo4fd355c2019-09-19 10:59:03 +02001520 }
1521 get pattern() {
1522 if (this._node.type !== "Pattern") {
1523 throw new Error("UnknownError");
1524 }
1525 return this._node;
1526 }
1527 get flags() {
1528 if (this._flags.type !== "Flags") {
1529 throw new Error("UnknownError");
1530 }
1531 return this._flags;
1532 }
1533 onFlags(start, end, global, ignoreCase, multiline, unicode, sticky, dotAll) {
1534 this._flags = {
1535 type: "Flags",
1536 parent: null,
1537 start,
1538 end,
1539 raw: this.source.slice(start, end),
1540 global,
1541 ignoreCase,
1542 multiline,
1543 unicode,
1544 sticky,
1545 dotAll,
1546 };
1547 }
1548 onPatternEnter(start) {
1549 this._node = {
1550 type: "Pattern",
1551 parent: null,
1552 start,
1553 end: start,
1554 raw: "",
1555 alternatives: [],
1556 };
1557 this._backreferences.length = 0;
1558 this._capturingGroups.length = 0;
1559 }
1560 onPatternLeave(start, end) {
1561 this._node.end = end;
1562 this._node.raw = this.source.slice(start, end);
1563 for (const reference of this._backreferences) {
1564 const ref = reference.ref;
1565 const group = typeof ref === "number"
1566 ? this._capturingGroups[ref - 1]
1567 : this._capturingGroups.find(g => g.name === ref);
1568 reference.resolved = group;
1569 group.references.push(reference);
1570 }
1571 }
1572 onAlternativeEnter(start) {
1573 const parent = this._node;
1574 if (parent.type !== "Assertion" &&
1575 parent.type !== "CapturingGroup" &&
1576 parent.type !== "Group" &&
1577 parent.type !== "Pattern") {
1578 throw new Error("UnknownError");
1579 }
1580 this._node = {
1581 type: "Alternative",
1582 parent,
1583 start,
1584 end: start,
1585 raw: "",
1586 elements: [],
1587 };
1588 parent.alternatives.push(this._node);
1589 }
1590 onAlternativeLeave(start, end) {
1591 const node = this._node;
1592 if (node.type !== "Alternative") {
1593 throw new Error("UnknownError");
1594 }
1595 node.end = end;
1596 node.raw = this.source.slice(start, end);
1597 this._node = node.parent;
1598 }
1599 onGroupEnter(start) {
1600 const parent = this._node;
1601 if (parent.type !== "Alternative") {
1602 throw new Error("UnknownError");
1603 }
1604 this._node = {
1605 type: "Group",
1606 parent,
1607 start,
1608 end: start,
1609 raw: "",
1610 alternatives: [],
1611 };
1612 parent.elements.push(this._node);
1613 }
1614 onGroupLeave(start, end) {
1615 const node = this._node;
1616 if (node.type !== "Group" || node.parent.type !== "Alternative") {
1617 throw new Error("UnknownError");
1618 }
1619 node.end = end;
1620 node.raw = this.source.slice(start, end);
1621 this._node = node.parent;
1622 }
1623 onCapturingGroupEnter(start, name) {
1624 const parent = this._node;
1625 if (parent.type !== "Alternative") {
1626 throw new Error("UnknownError");
1627 }
1628 this._node = {
1629 type: "CapturingGroup",
1630 parent,
1631 start,
1632 end: start,
1633 raw: "",
1634 name,
1635 alternatives: [],
1636 references: [],
1637 };
1638 parent.elements.push(this._node);
1639 this._capturingGroups.push(this._node);
1640 }
1641 onCapturingGroupLeave(start, end) {
1642 const node = this._node;
1643 if (node.type !== "CapturingGroup" ||
1644 node.parent.type !== "Alternative") {
1645 throw new Error("UnknownError");
1646 }
1647 node.end = end;
1648 node.raw = this.source.slice(start, end);
1649 this._node = node.parent;
1650 }
1651 onQuantifier(start, end, min, max, greedy) {
1652 const parent = this._node;
1653 if (parent.type !== "Alternative") {
1654 throw new Error("UnknownError");
1655 }
1656 const element = parent.elements.pop();
1657 if (element == null ||
1658 element.type === "Quantifier" ||
1659 (element.type === "Assertion" && element.kind !== "lookahead")) {
1660 throw new Error("UnknownError");
1661 }
1662 const node = {
1663 type: "Quantifier",
1664 parent,
1665 start: element.start,
1666 end,
1667 raw: this.source.slice(element.start, end),
1668 min,
1669 max,
1670 greedy,
1671 element,
1672 };
1673 parent.elements.push(node);
1674 element.parent = node;
1675 }
1676 onLookaroundAssertionEnter(start, kind, negate) {
1677 const parent = this._node;
1678 if (parent.type !== "Alternative") {
1679 throw new Error("UnknownError");
1680 }
Tim van der Lippe16aca392020-11-13 11:37:13 +00001681 const node = (this._node = {
Yang Guo4fd355c2019-09-19 10:59:03 +02001682 type: "Assertion",
1683 parent,
1684 start,
1685 end: start,
1686 raw: "",
1687 kind,
1688 negate,
1689 alternatives: [],
Tim van der Lippe16aca392020-11-13 11:37:13 +00001690 });
1691 parent.elements.push(node);
Yang Guo4fd355c2019-09-19 10:59:03 +02001692 }
1693 onLookaroundAssertionLeave(start, end) {
1694 const node = this._node;
1695 if (node.type !== "Assertion" || node.parent.type !== "Alternative") {
1696 throw new Error("UnknownError");
1697 }
1698 node.end = end;
1699 node.raw = this.source.slice(start, end);
1700 this._node = node.parent;
1701 }
1702 onEdgeAssertion(start, end, kind) {
1703 const parent = this._node;
1704 if (parent.type !== "Alternative") {
1705 throw new Error("UnknownError");
1706 }
1707 parent.elements.push({
1708 type: "Assertion",
1709 parent,
1710 start,
1711 end,
1712 raw: this.source.slice(start, end),
1713 kind,
1714 });
1715 }
1716 onWordBoundaryAssertion(start, end, kind, negate) {
1717 const parent = this._node;
1718 if (parent.type !== "Alternative") {
1719 throw new Error("UnknownError");
1720 }
1721 parent.elements.push({
1722 type: "Assertion",
1723 parent,
1724 start,
1725 end,
1726 raw: this.source.slice(start, end),
1727 kind,
1728 negate,
1729 });
1730 }
1731 onAnyCharacterSet(start, end, kind) {
1732 const parent = this._node;
1733 if (parent.type !== "Alternative") {
1734 throw new Error("UnknownError");
1735 }
1736 parent.elements.push({
1737 type: "CharacterSet",
1738 parent,
1739 start,
1740 end,
1741 raw: this.source.slice(start, end),
1742 kind,
1743 });
1744 }
1745 onEscapeCharacterSet(start, end, kind, negate) {
1746 const parent = this._node;
1747 if (parent.type !== "Alternative" && parent.type !== "CharacterClass") {
1748 throw new Error("UnknownError");
1749 }
1750 parent.elements.push({
1751 type: "CharacterSet",
1752 parent,
1753 start,
1754 end,
1755 raw: this.source.slice(start, end),
1756 kind,
1757 negate,
1758 });
1759 }
1760 onUnicodePropertyCharacterSet(start, end, kind, key, value, negate) {
1761 const parent = this._node;
1762 if (parent.type !== "Alternative" && parent.type !== "CharacterClass") {
1763 throw new Error("UnknownError");
1764 }
1765 parent.elements.push({
1766 type: "CharacterSet",
1767 parent,
1768 start,
1769 end,
1770 raw: this.source.slice(start, end),
1771 kind,
1772 key,
1773 value,
1774 negate,
1775 });
1776 }
1777 onCharacter(start, end, value) {
1778 const parent = this._node;
1779 if (parent.type !== "Alternative" && parent.type !== "CharacterClass") {
1780 throw new Error("UnknownError");
1781 }
1782 parent.elements.push({
1783 type: "Character",
1784 parent,
1785 start,
1786 end,
1787 raw: this.source.slice(start, end),
1788 value,
1789 });
1790 }
1791 onBackreference(start, end, ref) {
1792 const parent = this._node;
1793 if (parent.type !== "Alternative") {
1794 throw new Error("UnknownError");
1795 }
1796 const node = {
1797 type: "Backreference",
1798 parent,
1799 start,
1800 end,
1801 raw: this.source.slice(start, end),
1802 ref,
1803 resolved: DummyCapturingGroup,
1804 };
1805 parent.elements.push(node);
1806 this._backreferences.push(node);
1807 }
1808 onCharacterClassEnter(start, negate) {
1809 const parent = this._node;
1810 if (parent.type !== "Alternative") {
1811 throw new Error("UnknownError");
1812 }
1813 this._node = {
1814 type: "CharacterClass",
1815 parent,
1816 start,
1817 end: start,
1818 raw: "",
1819 negate,
1820 elements: [],
1821 };
1822 parent.elements.push(this._node);
1823 }
1824 onCharacterClassLeave(start, end) {
1825 const node = this._node;
1826 if (node.type !== "CharacterClass" ||
1827 node.parent.type !== "Alternative") {
1828 throw new Error("UnknownError");
1829 }
1830 node.end = end;
1831 node.raw = this.source.slice(start, end);
1832 this._node = node.parent;
1833 }
1834 onCharacterClassRange(start, end) {
1835 const parent = this._node;
1836 if (parent.type !== "CharacterClass") {
1837 throw new Error("UnknownError");
1838 }
1839 const elements = parent.elements;
1840 const max = elements.pop();
1841 const hyphen = elements.pop();
1842 const min = elements.pop();
1843 if (!min ||
1844 !max ||
1845 !hyphen ||
1846 min.type !== "Character" ||
1847 max.type !== "Character" ||
1848 hyphen.type !== "Character" ||
1849 hyphen.value !== HyphenMinus) {
1850 throw new Error("UnknownError");
1851 }
1852 const node = {
1853 type: "CharacterClassRange",
1854 parent,
1855 start,
1856 end,
1857 raw: this.source.slice(start, end),
1858 min,
1859 max,
1860 };
1861 min.parent = node;
1862 max.parent = node;
1863 elements.push(node);
1864 }
1865}
1866class RegExpParser {
1867 constructor(options) {
1868 this._state = new RegExpParserState(options);
1869 this._validator = new RegExpValidator(this._state);
1870 }
1871 parseLiteral(source, start = 0, end = source.length) {
1872 this._state.source = source;
1873 this._validator.validateLiteral(source, start, end);
1874 const pattern = this._state.pattern;
1875 const flags = this._state.flags;
1876 const literal = {
1877 type: "RegExpLiteral",
1878 parent: null,
1879 start,
1880 end,
1881 raw: source,
1882 pattern,
1883 flags,
1884 };
1885 pattern.parent = literal;
1886 flags.parent = literal;
1887 return literal;
1888 }
1889 parseFlags(source, start = 0, end = source.length) {
1890 this._state.source = source;
1891 this._validator.validateFlags(source, start, end);
1892 return this._state.flags;
1893 }
1894 parsePattern(source, start = 0, end = source.length, uFlag = false) {
1895 this._state.source = source;
1896 this._validator.validatePattern(source, start, end, uFlag);
1897 return this._state.pattern;
1898 }
1899}
1900
1901class RegExpVisitor {
1902 constructor(handlers) {
1903 this._handlers = handlers;
1904 }
1905 visit(node) {
1906 switch (node.type) {
1907 case "Alternative":
1908 this.visitAlternative(node);
1909 break;
1910 case "Assertion":
1911 this.visitAssertion(node);
1912 break;
1913 case "Backreference":
1914 this.visitBackreference(node);
1915 break;
1916 case "CapturingGroup":
1917 this.visitCapturingGroup(node);
1918 break;
1919 case "Character":
1920 this.visitCharacter(node);
1921 break;
1922 case "CharacterClass":
1923 this.visitCharacterClass(node);
1924 break;
1925 case "CharacterClassRange":
1926 this.visitCharacterClassRange(node);
1927 break;
1928 case "CharacterSet":
1929 this.visitCharacterSet(node);
1930 break;
1931 case "Flags":
1932 this.visitFlags(node);
1933 break;
1934 case "Group":
1935 this.visitGroup(node);
1936 break;
1937 case "Pattern":
1938 this.visitPattern(node);
1939 break;
1940 case "Quantifier":
1941 this.visitQuantifier(node);
1942 break;
1943 case "RegExpLiteral":
1944 this.visitRegExpLiteral(node);
1945 break;
1946 default:
1947 throw new Error(`Unknown type: ${node.type}`);
1948 }
1949 }
1950 visitAlternative(node) {
1951 if (this._handlers.onAlternativeEnter) {
1952 this._handlers.onAlternativeEnter(node);
1953 }
1954 node.elements.forEach(this.visit, this);
1955 if (this._handlers.onAlternativeLeave) {
1956 this._handlers.onAlternativeLeave(node);
1957 }
1958 }
1959 visitAssertion(node) {
1960 if (this._handlers.onAssertionEnter) {
1961 this._handlers.onAssertionEnter(node);
1962 }
1963 if (node.kind === "lookahead" || node.kind === "lookbehind") {
1964 node.alternatives.forEach(this.visit, this);
1965 }
1966 if (this._handlers.onAssertionLeave) {
1967 this._handlers.onAssertionLeave(node);
1968 }
1969 }
1970 visitBackreference(node) {
1971 if (this._handlers.onBackreferenceEnter) {
1972 this._handlers.onBackreferenceEnter(node);
1973 }
1974 if (this._handlers.onBackreferenceLeave) {
1975 this._handlers.onBackreferenceLeave(node);
1976 }
1977 }
1978 visitCapturingGroup(node) {
1979 if (this._handlers.onCapturingGroupEnter) {
1980 this._handlers.onCapturingGroupEnter(node);
1981 }
1982 node.alternatives.forEach(this.visit, this);
1983 if (this._handlers.onCapturingGroupLeave) {
1984 this._handlers.onCapturingGroupLeave(node);
1985 }
1986 }
1987 visitCharacter(node) {
1988 if (this._handlers.onCharacterEnter) {
1989 this._handlers.onCharacterEnter(node);
1990 }
1991 if (this._handlers.onCharacterLeave) {
1992 this._handlers.onCharacterLeave(node);
1993 }
1994 }
1995 visitCharacterClass(node) {
1996 if (this._handlers.onCharacterClassEnter) {
1997 this._handlers.onCharacterClassEnter(node);
1998 }
1999 node.elements.forEach(this.visit, this);
2000 if (this._handlers.onCharacterClassLeave) {
2001 this._handlers.onCharacterClassLeave(node);
2002 }
2003 }
2004 visitCharacterClassRange(node) {
2005 if (this._handlers.onCharacterClassRangeEnter) {
2006 this._handlers.onCharacterClassRangeEnter(node);
2007 }
2008 this.visitCharacter(node.min);
2009 this.visitCharacter(node.max);
2010 if (this._handlers.onCharacterClassRangeLeave) {
2011 this._handlers.onCharacterClassRangeLeave(node);
2012 }
2013 }
2014 visitCharacterSet(node) {
2015 if (this._handlers.onCharacterSetEnter) {
2016 this._handlers.onCharacterSetEnter(node);
2017 }
2018 if (this._handlers.onCharacterSetLeave) {
2019 this._handlers.onCharacterSetLeave(node);
2020 }
2021 }
2022 visitFlags(node) {
2023 if (this._handlers.onFlagsEnter) {
2024 this._handlers.onFlagsEnter(node);
2025 }
2026 if (this._handlers.onFlagsLeave) {
2027 this._handlers.onFlagsLeave(node);
2028 }
2029 }
2030 visitGroup(node) {
2031 if (this._handlers.onGroupEnter) {
2032 this._handlers.onGroupEnter(node);
2033 }
2034 node.alternatives.forEach(this.visit, this);
2035 if (this._handlers.onGroupLeave) {
2036 this._handlers.onGroupLeave(node);
2037 }
2038 }
2039 visitPattern(node) {
2040 if (this._handlers.onPatternEnter) {
2041 this._handlers.onPatternEnter(node);
2042 }
2043 node.alternatives.forEach(this.visit, this);
2044 if (this._handlers.onPatternLeave) {
2045 this._handlers.onPatternLeave(node);
2046 }
2047 }
2048 visitQuantifier(node) {
2049 if (this._handlers.onQuantifierEnter) {
2050 this._handlers.onQuantifierEnter(node);
2051 }
2052 this.visit(node.element);
2053 if (this._handlers.onQuantifierLeave) {
2054 this._handlers.onQuantifierLeave(node);
2055 }
2056 }
2057 visitRegExpLiteral(node) {
2058 if (this._handlers.onRegExpLiteralEnter) {
2059 this._handlers.onRegExpLiteralEnter(node);
2060 }
2061 this.visitPattern(node.pattern);
2062 this.visitFlags(node.flags);
2063 if (this._handlers.onRegExpLiteralLeave) {
2064 this._handlers.onRegExpLiteralLeave(node);
2065 }
2066 }
2067}
2068
2069function parseRegExpLiteral(source, options) {
2070 return new RegExpParser(options).parseLiteral(String(source));
2071}
2072function validateRegExpLiteral(source, options) {
2073 return new RegExpValidator(options).validateLiteral(source);
2074}
2075function visitRegExpAST(node, handlers) {
2076 new RegExpVisitor(handlers).visit(node);
2077}
2078
2079exports.AST = ast;
2080exports.RegExpParser = RegExpParser;
2081exports.RegExpValidator = RegExpValidator;
2082exports.parseRegExpLiteral = parseRegExpLiteral;
2083exports.validateRegExpLiteral = validateRegExpLiteral;
2084exports.visitRegExpAST = visitRegExpAST;
2085//# sourceMappingURL=index.js.map