Skip to content

Commit ca78cde

Browse files
authoredMar 21, 2022
JS: Added support for new regex syntax (#3399)
1 parent 8e648da commit ca78cde

File tree

4 files changed

+78
-5
lines changed

4 files changed

+78
-5
lines changed
 

‎components/prism-javascript.js

+18-2
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,24 @@ Prism.languages.javascript['class-name'][0].pattern = /(\b(?:class|extends|imple
5353

5454
Prism.languages.insertBefore('javascript', 'keyword', {
5555
'regex': {
56-
// eslint-disable-next-line regexp/no-dupe-characters-character-class
57-
pattern: /((?:^|[^$\w\xA0-\uFFFF."'\])\s]|\b(?:return|yield))\s*)\/(?:\[(?:[^\]\\\r\n]|\\.)*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}(?=(?:\s|\/\*(?:[^*]|\*(?!\/))*\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/,
56+
pattern: RegExp(
57+
// lookbehind
58+
// eslint-disable-next-line regexp/no-dupe-characters-character-class
59+
/((?:^|[^$\w\xA0-\uFFFF."'\])\s]|\b(?:return|yield))\s*)/.source +
60+
// Regex pattern:
61+
// There are 2 regex patterns here. The RegExp set notation proposal added support for nested character
62+
// classes if the `v` flag is present. Unfortunately, nested CCs are both context-free and incompatible
63+
// with the only syntax, so we have to define 2 different regex patterns.
64+
/\//.source +
65+
'(?:' +
66+
/(?:\[(?:[^\]\\\r\n]|\\.)*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}/.source +
67+
'|' +
68+
// `v` flag syntax. This supports 3 levels of nested character classes.
69+
/(?:\[(?:[^[\]\\\r\n]|\\.|\[(?:[^[\]\\\r\n]|\\.|\[(?:[^[\]\\\r\n]|\\.)*\])*\])*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}v[dgimyus]{0,7}/.source +
70+
')' +
71+
// lookahead
72+
/(?=(?:\s|\/\*(?:[^*]|\*(?!\/))*\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/.source
73+
),
5874
lookbehind: true,
5975
greedy: true,
6076
inside: {

‎components/prism-javascript.min.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎prism.js

+18-2
Original file line numberDiff line numberDiff line change
@@ -1623,8 +1623,24 @@ Prism.languages.javascript['class-name'][0].pattern = /(\b(?:class|extends|imple
16231623

16241624
Prism.languages.insertBefore('javascript', 'keyword', {
16251625
'regex': {
1626-
// eslint-disable-next-line regexp/no-dupe-characters-character-class
1627-
pattern: /((?:^|[^$\w\xA0-\uFFFF."'\])\s]|\b(?:return|yield))\s*)\/(?:\[(?:[^\]\\\r\n]|\\.)*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}(?=(?:\s|\/\*(?:[^*]|\*(?!\/))*\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/,
1626+
pattern: RegExp(
1627+
// lookbehind
1628+
// eslint-disable-next-line regexp/no-dupe-characters-character-class
1629+
/((?:^|[^$\w\xA0-\uFFFF."'\])\s]|\b(?:return|yield))\s*)/.source +
1630+
// Regex pattern:
1631+
// There are 2 regex patterns here. The RegExp set notation proposal added support for nested character
1632+
// classes if the `v` flag is present. Unfortunately, nested CCs are both context-free and incompatible
1633+
// with the only syntax, so we have to define 2 different regex patterns.
1634+
/\//.source +
1635+
'(?:' +
1636+
/(?:\[(?:[^\]\\\r\n]|\\.)*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}/.source +
1637+
'|' +
1638+
// `v` flag syntax. This supports 3 levels of nested character classes.
1639+
/(?:\[(?:[^[\]\\\r\n]|\\.|\[(?:[^[\]\\\r\n]|\\.|\[(?:[^[\]\\\r\n]|\\.)*\])*\])*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}v[dgimyus]{0,7}/.source +
1640+
')' +
1641+
// lookahead
1642+
/(?=(?:\s|\/\*(?:[^*]|\*(?!\/))*\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/.source
1643+
),
16281644
lookbehind: true,
16291645
greedy: true,
16301646
inside: {

‎tests/languages/javascript/regex_feature.test

+41
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99
/ '1' '2' '3' '4' '5' /
1010
[/foo/]
1111

12+
// RegExp set notation
13+
;/[A--B]/v;
14+
/[A--[0-9]]/v;
15+
/[\p{Decimal_Number}--[0-9]]/v;
16+
/[\p{Script=Khmer}&&[\p{Letter}\p{Mark}\p{Number}]]/v;
17+
1218
let a = /regex/m // comment
1319
let b = condition ? /regex/ : /another one/
1420
return /regex/;
@@ -88,6 +94,41 @@ yield /regex/;
8894
]],
8995
["punctuation", "]"],
9096

97+
["comment", "// RegExp set notation"],
98+
99+
["punctuation", ";"],
100+
["regex", [
101+
["regex-delimiter", "/"],
102+
["regex-source", "[A--B]"],
103+
["regex-delimiter", "/"],
104+
["regex-flags", "v"]
105+
]],
106+
["punctuation", ";"],
107+
108+
["regex", [
109+
["regex-delimiter", "/"],
110+
["regex-source", "[A--[0-9]]"],
111+
["regex-delimiter", "/"],
112+
["regex-flags", "v"]
113+
]],
114+
["punctuation", ";"],
115+
116+
["regex", [
117+
["regex-delimiter", "/"],
118+
["regex-source", "[\\p{Decimal_Number}--[0-9]]"],
119+
["regex-delimiter", "/"],
120+
["regex-flags", "v"]
121+
]],
122+
["punctuation", ";"],
123+
124+
["regex", [
125+
["regex-delimiter", "/"],
126+
["regex-source", "[\\p{Script=Khmer}&&[\\p{Letter}\\p{Mark}\\p{Number}]]"],
127+
["regex-delimiter", "/"],
128+
["regex-flags", "v"]
129+
]],
130+
["punctuation", ";"],
131+
91132
["keyword", "let"],
92133
" a ",
93134
["operator", "="],

0 commit comments

Comments
 (0)