Skip to content

Commit ceca984

Browse files
fuma-namaantfu
andauthoredJan 20, 2025··
feat(transformers): introduce matchAlgorithm option for new matching algorithm (#835)
Co-authored-by: Anthony Fu <github@antfu.me>
1 parent 4ed7fa3 commit ceca984

21 files changed

+448
-105
lines changed
 

‎docs/packages/transformers.md

+38
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,44 @@ const html = await codeToHtml(code, {
4141

4242
Transformers only applies classes and does not come with styles; you can provide your own CSS rules to style them properly.
4343

44+
## Matching Algorithm
45+
46+
We found that the algorithm for matching comments in v1 is sometime conterintuitive, where we are trying to fix it in a progressive way. Since v1.29.0, we introduced a new `matchAlgorithm` option to most of the transformer for you to toggle between different matching algorithms. Right now, the default is `v1` which is the old algorithm, and `v3` is the new algorithm. When Shiki v3 is landed, the default will be `v3`.
47+
48+
```ts
49+
const html = await codeToHtml(code, {
50+
lang: 'ts',
51+
theme: 'nord',
52+
transformers: [
53+
transformerNotationDiff({
54+
matchAlgorithm: 'v3', // [!code hl]
55+
}),
56+
],
57+
})
58+
```
59+
60+
### `matchAlgorithm: 'v1'`
61+
62+
The matching algorithm mostly affects the single-line comment matching, in `v1`, it will count the comment line as the first line, while in `v3`, it will count start from the comment line:
63+
64+
```ts
65+
// [\!code highlight:3]
66+
console.log('highlighted') // [!code hl]
67+
console.log('highlighted') // [!code hl]
68+
console.log('not highlighted')
69+
```
70+
71+
### `matchAlgorithm: 'v3'`
72+
73+
In `v3`, the matching algorithm will start counting from the line below the comment:
74+
75+
```ts
76+
// [\!code highlight:2]
77+
console.log('highlighted') // [!code hl]
78+
console.log('highlighted') // [!code hl]
79+
console.log('not highlighted')
80+
```
81+
4482
## Transformers
4583

4684
### `transformerNotationDiff`

‎packages/transformers/src/index.ts

-1
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,3 @@ export * from './transformers/remove-line-breaks'
1111
export * from './transformers/remove-notation-escape'
1212
export * from './transformers/render-whitespace'
1313
export * from './transformers/style-to-class'
14-
export * from './utils'

‎packages/transformers/src/shared/highlight-word.ts

+5-4
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ function getTextContent(element: ElementContent): string {
2424
* @param ignoredElement
2525
* @param index highlight beginning index
2626
* @param len highlight length
27+
* @param className class name to add to highlighted nodes
2728
*/
2829
function highlightRange(
2930
this: ShikiTransformerContext,
@@ -64,14 +65,14 @@ function highlightRange(
6465
}
6566
}
6667

67-
function hasOverlap(range1: [number, number], range2: [ number, number]): boolean {
68+
function hasOverlap(range1: [number, number], range2: [number, number]): boolean {
6869
return (range1[0] <= range2[1]) && (range1[1]) >= range2[0]
6970
}
7071

7172
function separateToken(span: Element, textNode: Text, index: number, len: number): [
72-
before: Element | undefined,
73-
med: Element,
74-
after: Element | undefined,
73+
before: Element | undefined,
74+
med: Element,
75+
after: Element | undefined,
7576
] {
7677
const text = textNode.value
7778

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import type { Element, Text } from 'hast'
2+
import type { ShikiTransformer, ShikiTransformerContext } from 'shiki'
3+
import { parseComments, type ParsedComments, v1ClearEndCommentPrefix } from './parse-comments'
4+
5+
export type MatchAlgorithm = 'v1' | 'v3'
6+
7+
export interface MatchAlgorithmOptions {
8+
/**
9+
* Match algorithm to use
10+
*
11+
* @see https://shiki.style/packages/transformers#matching-algorithm
12+
* @default 'v1'
13+
*/
14+
matchAlgorithm?: MatchAlgorithm
15+
}
16+
17+
export function createCommentNotationTransformer(
18+
name: string,
19+
regex: RegExp,
20+
onMatch: (
21+
this: ShikiTransformerContext,
22+
match: string[],
23+
line: Element,
24+
commentNode: Element,
25+
lines: Element[],
26+
index: number
27+
) => boolean,
28+
matchAlgorithm: MatchAlgorithm = 'v1',
29+
): ShikiTransformer {
30+
return {
31+
name,
32+
code(code) {
33+
const lines = code.children.filter(i => i.type === 'element')
34+
const linesToRemove: (Element | Text)[] = []
35+
36+
code.data ??= {} as any
37+
const data = code.data as {
38+
_shiki_notation?: ParsedComments
39+
}
40+
41+
data._shiki_notation ??= parseComments(lines, ['jsx', 'tsx'].includes(this.options.lang), matchAlgorithm)
42+
const parsed = data._shiki_notation
43+
44+
for (const comment of parsed) {
45+
if (comment.info[1].length === 0)
46+
continue
47+
48+
const isLineCommentOnly = comment.line.children.length === (comment.isJsxStyle ? 3 : 1)
49+
let lineIdx = lines.indexOf(comment.line)
50+
if (isLineCommentOnly && matchAlgorithm !== 'v1')
51+
lineIdx++
52+
53+
let replaced = false
54+
comment.info[1] = comment.info[1].replace(regex, (...match) => {
55+
if (onMatch.call(this, match, comment.line, comment.token, lines, lineIdx)) {
56+
replaced = true
57+
return ''
58+
}
59+
60+
return match[0]
61+
})
62+
63+
if (!replaced)
64+
continue
65+
66+
if (matchAlgorithm === 'v1') {
67+
comment.info[1] = v1ClearEndCommentPrefix(comment.info[1])
68+
}
69+
70+
const isEmpty = comment.info[1].trim().length === 0
71+
// ignore comment node
72+
if (isEmpty)
73+
comment.info[1] = ''
74+
75+
if (isEmpty && isLineCommentOnly) {
76+
linesToRemove.push(comment.line)
77+
}
78+
else if (isEmpty && comment.isJsxStyle) {
79+
comment.line.children.splice(comment.line.children.indexOf(comment.token) - 1, 3)
80+
}
81+
else if (isEmpty) {
82+
comment.line.children.splice(comment.line.children.indexOf(comment.token), 1)
83+
}
84+
else {
85+
const head = comment.token.children[0]
86+
87+
if (head.type === 'text') {
88+
head.value = comment.info.join('')
89+
}
90+
}
91+
}
92+
93+
for (const line of linesToRemove)
94+
code.children.splice(code.children.indexOf(line), 1)
95+
},
96+
}
97+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import type { Element, ElementContent } from 'hast'
2+
import type { MatchAlgorithm } from './notation-transformer'
3+
4+
export type ParsedComments = {
5+
line: Element
6+
token: Element
7+
info: [prefix: string, content: string, suffix?: string]
8+
isJsxStyle: boolean
9+
}[]
10+
11+
/**
12+
* some comment formats have to be located at the end of line
13+
* hence we can skip matching them for other tokens
14+
*/
15+
const matchers: [re: RegExp, endOfLine: boolean][] = [
16+
[/^(<!--)(.+)(-->)$/, false],
17+
[/^(\/\*)(.+)(\*\/)$/, false],
18+
[/^(\/\/|["'#]|;{1,2}|%{1,2}|--)(.*)$/, true],
19+
/**
20+
* for multi-line comments like this
21+
*/
22+
[/^(\*)(.+)$/, true],
23+
]
24+
25+
/**
26+
* @param lines line tokens
27+
* @param jsx enable JSX parsing
28+
* @param matchAlgorithm matching algorithm
29+
*/
30+
export function parseComments(
31+
lines: Element[],
32+
jsx: boolean,
33+
matchAlgorithm: MatchAlgorithm,
34+
): ParsedComments {
35+
const out: ParsedComments = []
36+
37+
for (const line of lines) {
38+
const elements = line.children
39+
let start = elements.length - 1
40+
if (matchAlgorithm === 'v1')
41+
start = 0
42+
else if (jsx)
43+
// one step further for JSX as comment is inside curly brackets
44+
start = elements.length - 2
45+
46+
for (let i = Math.max(start, 0); i < elements.length; i++) {
47+
const token = elements[i]
48+
if (token.type !== 'element')
49+
continue
50+
const head = token.children.at(0)
51+
if (head?.type !== 'text')
52+
continue
53+
54+
const isLast = i === elements.length - 1
55+
const match = matchToken(head.value, isLast)
56+
if (!match)
57+
continue
58+
59+
if (jsx && !isLast && i !== 0) {
60+
out.push({
61+
info: match,
62+
line,
63+
token,
64+
isJsxStyle: isValue(elements[i - 1], '{') && isValue(elements[i + 1], '}'),
65+
})
66+
}
67+
else {
68+
out.push({
69+
info: match,
70+
line,
71+
token,
72+
isJsxStyle: false,
73+
})
74+
}
75+
}
76+
}
77+
78+
return out
79+
}
80+
81+
function isValue(element: ElementContent, value: string): boolean {
82+
if (element.type !== 'element')
83+
return false
84+
const text = element.children[0]
85+
if (text.type !== 'text')
86+
return false
87+
88+
return text.value.trim() === value
89+
}
90+
91+
/**
92+
* @param text text value of comment node
93+
* @param isLast whether the token is located at the end of line
94+
*/
95+
function matchToken(text: string, isLast: boolean): [prefix: string, content: string, suffix?: string] | undefined {
96+
// no leading and trailing spaces allowed for matchers
97+
// we extract the spaces
98+
let trimmed = text.trimStart()
99+
const spaceFront = text.length - trimmed.length
100+
101+
trimmed = trimmed.trimEnd()
102+
const spaceEnd = text.length - trimmed.length - spaceFront
103+
104+
for (const [matcher, endOfLine] of matchers) {
105+
if (endOfLine && !isLast)
106+
continue
107+
108+
const result = matcher.exec(trimmed)
109+
if (!result)
110+
continue
111+
112+
return [
113+
' '.repeat(spaceFront) + result[1],
114+
result[2],
115+
result[3] ? result[3] + ' '.repeat(spaceEnd) : undefined,
116+
]
117+
}
118+
}
119+
120+
/**
121+
* Remove empty comment prefixes at line end, e.g. `// `
122+
*
123+
* For matchAlgorithm v1
124+
*/
125+
export function v1ClearEndCommentPrefix(text: string): string {
126+
const regex = /(?:\/\/|["'#]|;{1,2}|%{1,2}|--)(.*)$/
127+
const result = regex.exec(text)
128+
129+
if (result && result[1].trim().length === 0) {
130+
return text.slice(0, result.index)
131+
}
132+
133+
return text
134+
}

‎packages/transformers/src/transformers/meta-highlight.ts

+8-4
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ export function parseMetaHighlightString(meta: string): number[] | null {
1212
const num = v.split('-').map(v => Number.parseInt(v, 10))
1313
if (num.length === 1)
1414
return [num[0]]
15-
else
16-
return Array.from({ length: num[1] - num[0] + 1 }, (_, i) => i + num[0])
15+
return Array.from({ length: num[1] - num[0] + 1 }, (_, i) => i + num[0])
1716
})
1817
return lines
1918
}
@@ -45,8 +44,13 @@ export function transformerMetaHighlight(
4544
if (!this.options.meta?.__raw) {
4645
return
4746
}
48-
;(this.meta as any)[symbol] ||= parseMetaHighlightString(this.options.meta.__raw)
49-
const lines: number[] = (this.meta as any)[symbol] || []
47+
const meta = this.meta as {
48+
[symbol]: number[] | null
49+
}
50+
51+
meta[symbol] ??= parseMetaHighlightString(this.options.meta.__raw)
52+
const lines: number[] = meta[symbol] ?? []
53+
5054
if (lines.includes(line))
5155
this.addClassToHast(node, className)
5256
return node

‎packages/transformers/src/transformers/notation-diff.ts

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import type { ShikiTransformer } from 'shiki'
2+
import type { MatchAlgorithmOptions } from '../shared/notation-transformer'
23
import { transformerNotationMap } from './notation-map'
34

4-
export interface TransformerNotationDiffOptions {
5+
export interface TransformerNotationDiffOptions extends MatchAlgorithmOptions {
56
/**
67
* Class for added lines
78
*/
@@ -35,6 +36,7 @@ export function transformerNotationDiff(
3536
'--': classLineRemove,
3637
},
3738
classActivePre,
39+
matchAlgorithm: options.matchAlgorithm,
3840
},
3941
'@shikijs/transformers:notation-diff',
4042
)

‎packages/transformers/src/transformers/notation-error-level.ts

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import type { ShikiTransformer } from 'shiki'
2+
import type { MatchAlgorithmOptions } from '../shared/notation-transformer'
23
import { transformerNotationMap } from './notation-map'
34

4-
export interface TransformerNotationErrorLevelOptions {
5+
export interface TransformerNotationErrorLevelOptions extends MatchAlgorithmOptions {
56
classMap?: Record<string, string | string[]>
67
/**
78
* Class added to the <pre> element when the current code has diff
@@ -27,6 +28,7 @@ export function transformerNotationErrorLevel(
2728
{
2829
classMap,
2930
classActivePre,
31+
matchAlgorithm: options.matchAlgorithm,
3032
},
3133
'@shikijs/transformers:notation-error-level',
3234
)

‎packages/transformers/src/transformers/notation-focus.ts

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import type { ShikiTransformer } from 'shiki'
2+
import type { MatchAlgorithmOptions } from '../shared/notation-transformer'
23
import { transformerNotationMap } from './notation-map'
34

4-
export interface TransformerNotationFocusOptions {
5+
export interface TransformerNotationFocusOptions extends MatchAlgorithmOptions {
56
/**
67
* Class for focused lines
78
*/
@@ -29,6 +30,7 @@ export function transformerNotationFocus(
2930
focus: classActiveLine,
3031
},
3132
classActivePre,
33+
matchAlgorithm: options.matchAlgorithm,
3234
},
3335
'@shikijs/transformers:notation-focus',
3436
)

0 commit comments

Comments
 (0)
Please sign in to comment.