Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(core): broken links optimization behaves differently than non-optimized logic #9791

Merged
merged 2 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions packages/docusaurus-types/src/routing.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ export type RouteConfig = {
routes?: RouteConfig[];
/** React router config option: `exact` routes would not match subroutes. */
exact?: boolean;
/**
* React router config option: `strict` routes are sensitive to the presence
* of a trailing slash.
*/
strict?: boolean;
/** Used to sort routes. Higher-priority routes will be placed first. */
priority?: number;
/** Extra props; will be copied to routes.js. */
Expand Down
134 changes: 130 additions & 4 deletions packages/docusaurus/src/server/__tests__/brokenLinks.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@ import type {RouteConfig} from '@docusaurus/types';
type Params = Parameters<typeof handleBrokenLinks>[0];

// We don't need all the routes attributes for our tests
type SimpleRoute = {path: string; routes?: SimpleRoute[]};
type SimpleRoute = {
path: string;
routes?: SimpleRoute[];
exact?: boolean;
strict?: boolean;
};

// Conveniently apply defaults to function under test
async function testBrokenLinks(params: {
Expand Down Expand Up @@ -43,6 +48,52 @@ describe('handleBrokenLinks', () => {
});
});

it('accepts valid non-exact link', async () => {
await testBrokenLinks({
routes: [{path: '/page1', exact: false}, {path: '/page2/'}],
collectedLinks: {
'/page1': {
links: [
'/page1',
'/page1/',
'/page2',
'/page2/',
'/page1/subPath',
'/page2/subPath',
],
anchors: [],
},
'/page2/': {
links: [
'/page1',
'/page1/',
'/page2',
'/page2/',
'/page1/subPath',
'/page2/subPath',
],
anchors: [],
},
},
});
});

it('accepts valid non-strict link', async () => {
await testBrokenLinks({
routes: [{path: '/page1', strict: false}, {path: '/page2/'}],
collectedLinks: {
'/page1': {
links: ['/page1', '/page1/', '/page2', '/page2/'],
anchors: [],
},
'/page2/': {
links: ['/page1', '/page1/', '/page2', '/page2/'],
anchors: [],
},
},
});
});

it('accepts valid link to uncollected page', async () => {
await testBrokenLinks({
routes: [{path: '/page1'}, {path: '/page2'}],
Expand Down Expand Up @@ -292,6 +343,76 @@ describe('handleBrokenLinks', () => {
`);
});

it('rejects broken link due to strict matching', async () => {
await expect(() =>
testBrokenLinks({
routes: [
{path: '/page1', strict: true},
{path: '/page2/', strict: true},
],

collectedLinks: {
'/page1': {
links: ['/page1', '/page1/', '/page2', '/page2/'],
anchors: [],
},
'/page2/': {
links: ['/page1', '/page1/', '/page2', '/page2/'],
anchors: [],
},
},
}),
).rejects.toThrowErrorMatchingInlineSnapshot(`
"Docusaurus found broken links!

Please check the pages of your site in the list below, and make sure you don't reference any path that does not exist.
Note: it's possible to ignore broken links with the 'onBrokenLinks' Docusaurus configuration, and let the build pass.

Exhaustive list of all broken links found:
- Broken link on source page path = /page1:
-> linking to /page2
- Broken link on source page path = /page2/:
-> linking to /page2
"
`);
});

it('rejects broken link due to strict exact matching', async () => {
await expect(() =>
testBrokenLinks({
routes: [
{path: '/page1', exact: true, strict: true},
{path: '/page2/', exact: true, strict: true},
],

collectedLinks: {
'/page1': {
links: ['/page1', '/page1/', '/page2', '/page2/'],
anchors: [],
},
'/page2/': {
links: ['/page1', '/page1/', '/page2', '/page2/'],
anchors: [],
},
},
}),
).rejects.toThrowErrorMatchingInlineSnapshot(`
"Docusaurus found broken links!

Please check the pages of your site in the list below, and make sure you don't reference any path that does not exist.
Note: it's possible to ignore broken links with the 'onBrokenLinks' Docusaurus configuration, and let the build pass.

Exhaustive list of all broken links found:
- Broken link on source page path = /page1:
-> linking to /page1/
-> linking to /page2
- Broken link on source page path = /page2/:
-> linking to /page1/
-> linking to /page2
"
`);
});

it('rejects broken link with anchor', async () => {
await expect(() =>
testBrokenLinks({
Expand Down Expand Up @@ -728,6 +849,10 @@ describe('handleBrokenLinks', () => {
const routes: SimpleRoute[] = [
...Array.from<SimpleRoute>({length: scale}).map((_, i) => ({
path: `/page${i}`,
exact: true,
})),
...Array.from<SimpleRoute>({length: scale}).map((_, i) => ({
path: `/page/nonExact/${i}`,
})),
...Array.from<SimpleRoute>({length: scale}).fill({
path: '/pageDynamic/:subpath1',
Expand All @@ -741,6 +866,7 @@ describe('handleBrokenLinks', () => {
links: [
...Array.from<SimpleRoute>({length: scale}).flatMap((_2, j) => [
`/page${j}`,
`/page/nonExact/${j}`,
`/page${j}?age=42`,
`/page${j}#anchor${j}`,
`/page${j}?age=42#anchor${j}`,
Expand Down Expand Up @@ -770,9 +896,9 @@ describe('handleBrokenLinks', () => {
// See https://github.com/facebook/docusaurus/issues/9754
// See https://twitter.com/sebastienlorber/status/1749392773415858587
// We expect no more matchRoutes calls than number of dynamic route links
expect(matchRoutesMock).toHaveBeenCalledTimes(scale);
expect(matchRoutesMock).toHaveBeenCalledTimes(scale * 2);
// We expect matchRoutes to be called with a reduced number of routes
expect(routes).toHaveLength(scale * 2);
expect(matchRoutesMock.mock.calls[0]![0]).toHaveLength(scale);
expect(routes).toHaveLength(scale * 3);
expect(matchRoutesMock.mock.calls[0]![0]).toHaveLength(scale * 2);
});
});
39 changes: 33 additions & 6 deletions packages/docusaurus/src/server/brokenLinks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
import _ from 'lodash';
import logger from '@docusaurus/logger';
import {matchRoutes as reactRouterMatchRoutes} from 'react-router-config';
import {parseURLPath, serializeURLPath, type URLPath} from '@docusaurus/utils';
import {
addTrailingSlash,
parseURLPath,
removeTrailingSlash,
serializeURLPath,
type URLPath,
} from '@docusaurus/utils';
import {getAllFinalRoutes} from './utils';
import type {RouteConfig, ReportingSeverity} from '@docusaurus/types';

Expand Down Expand Up @@ -55,16 +61,37 @@ function createBrokenLinksHelper({
}): BrokenLinksHelper {
const validPathnames = new Set(collectedLinks.keys());

// IMPORTANT: this is an optimization
// See https://github.com/facebook/docusaurus/issues/9754
// Matching against the route array can be expensive
// If the route is already in the valid pathnames,
// we can avoid matching against it as an optimization
const remainingRoutes = routes.filter(
(route) => !validPathnames.has(route.path),
);
// we can avoid matching against it
const remainingRoutes = (function filterRoutes() {
// Goal: unit tests should behave the same with this enabled or disabled
const disableOptimization = false;
if (disableOptimization) {
return routes;
}
// We must consider the "exact" and "strict" match attribute
// We can only infer pre-validated pathnames from a route from exact routes
const [validPathnameRoutes, otherRoutes] = _.partition(
routes,
(route) => route.exact && validPathnames.has(route.path),
);
// If a route is non-strict (non-sensitive to trailing slashes)
// We must pre-validate all possible paths
validPathnameRoutes.forEach((validPathnameRoute) => {
if (!validPathnameRoute.strict) {
validPathnames.add(addTrailingSlash(validPathnameRoute.path));
validPathnames.add(removeTrailingSlash(validPathnameRoute.path));
}
});
return otherRoutes;
})();

function isPathnameMatchingAnyRoute(pathname: string): boolean {
if (matchRoutes(remainingRoutes, pathname).length > 0) {
// IMPORTANT: this is an optimization here
// IMPORTANT: this is an optimization
// See https://github.com/facebook/docusaurus/issues/9754
// Large Docusaurus sites have many routes!
// We try to minimize calls to a possibly expensive matchRoutes function
Expand Down