Skip to content

Commit f4d6072

Browse files
harlan-zwPhilipp Naderer-Puiu
and
Philipp Naderer-Puiu
authoredOct 15, 2024··
fix: case insensitive directives when merging robots.txt (#150)
Co-authored-by: Philipp Naderer-Puiu <philipp.naderer-puiu@orf.at>
1 parent 12c2087 commit f4d6072

File tree

3 files changed

+78
-8
lines changed

3 files changed

+78
-8
lines changed
 

Diff for: ‎src/runtime/util.ts

+7-7
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ export function parseRobotsTxt(s: string): ParsedRobotsTxt {
3434
if (sepIndex === -1)
3535
continue
3636
// get the rule, pop before the first :
37-
const rule = line.substring(0, sepIndex).trim()
37+
const rule = line.substring(0, sepIndex).trim().toLowerCase()
3838
const val = line.substring(sepIndex + 1).trim()
3939

4040
switch (rule) {
41-
case 'User-agent':
41+
case 'user-agent':
4242
if (createNewGroup) {
4343
groups.push({
4444
...currentGroup,
@@ -53,21 +53,21 @@ export function parseRobotsTxt(s: string): ParsedRobotsTxt {
5353
}
5454
currentGroup.userAgent.push(val)
5555
break
56-
case 'Allow':
56+
case 'allow':
5757
currentGroup.allow.push(val)
5858
createNewGroup = true
5959
break
60-
case 'Disallow':
60+
case 'disallow':
6161
currentGroup.disallow.push(val)
6262
createNewGroup = true
6363
break
64-
case 'Sitemap':
64+
case 'sitemap':
6565
sitemaps.push(val)
6666
break
67-
case 'Host':
67+
case 'host':
6868
currentGroup.host = val
6969
break
70-
case 'Clean-param':
70+
case 'clean-param':
7171
if (currentGroup.userAgent.includes('Yandex')) {
7272
currentGroup.cleanParam = currentGroup.cleanParam || []
7373
currentGroup.cleanParam.push(val)

Diff for: ‎test/fixtures/startgroupRobots.txt

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# see rfc 9309
2+
user-agent: ExampleBot
3+
disallow: /foo
4+
allow: /bar
5+
6+
user-agent: examplebot
7+
disallow: /baz
8+
allow: /boo
9+
10+
user-agent:
11+
disallow: /invalid
12+
13+
user-agent: *
14+
disallow: /star

Diff for: ‎test/unit/robotsTxtParser.test.ts

+57-1
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ describe('robotsTxtParser', () => {
245245
})
246246

247247
it('yandex', async () => {
248-
// read fixture yoastRobots.txt
248+
// read fixture yandex.txt
249249
const robotsTxt = await fsp.readFile('./test/fixtures/yandex.txt', { encoding: 'utf-8' })
250250
expect(parseRobotsTxt(robotsTxt)).toMatchInlineSnapshot(`
251251
{
@@ -281,4 +281,60 @@ describe('robotsTxtParser', () => {
281281
}
282282
`)
283283
})
284+
285+
it('case-insensitive startgroupline', async () => {
286+
// read fixture startgroupRobots.txt
287+
const robotsTxt = await fsp.readFile('./test/fixtures/startgroupRobots.txt', { encoding: 'utf-8' })
288+
expect(parseRobotsTxt(robotsTxt)).toMatchInlineSnapshot(`
289+
{
290+
"groups": [
291+
{
292+
"allow": [
293+
"/bar",
294+
],
295+
"comment": [],
296+
"disallow": [
297+
"/foo",
298+
],
299+
"userAgent": [
300+
"ExampleBot",
301+
],
302+
},
303+
{
304+
"allow": [
305+
"/boo",
306+
],
307+
"comment": [],
308+
"disallow": [
309+
"/baz",
310+
],
311+
"userAgent": [
312+
"examplebot",
313+
],
314+
},
315+
{
316+
"allow": [],
317+
"comment": [],
318+
"disallow": [
319+
"/invalid",
320+
],
321+
"userAgent": [
322+
"",
323+
],
324+
},
325+
{
326+
"allow": [],
327+
"comment": [],
328+
"disallow": [
329+
"/star",
330+
],
331+
"userAgent": [
332+
"*",
333+
],
334+
},
335+
],
336+
"sitemaps": [],
337+
}
338+
`)
339+
})
284340
})

0 commit comments

Comments
 (0)
Please sign in to comment.