Skip to content

Commit 5db3a1f

Browse files
author
Wanasit Tanakitrungruang
committedNov 19, 2023
Fix: Apply lazy loading for RU patterns
1 parent 5ebfcc0 commit 5db3a1f

11 files changed

+116
-145
lines changed
 

‎src/common/parsers/AbstractParserWithWordBoundary.ts

+17-9
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { ParsingComponents, ParsingResult } from "../../results";
33
import { Component } from "../../types";
44

55
/**
6-
*
6+
* A parser that checks for word boundary and applying the inner pattern and extraction.
77
*/
88
export abstract class AbstractParserWithWordBoundaryChecking implements Parser {
99
abstract innerPattern(context: ParsingContext): RegExp;
@@ -12,21 +12,29 @@ export abstract class AbstractParserWithWordBoundaryChecking implements Parser {
1212
match: RegExpMatchArray
1313
): ParsingComponents | ParsingResult | { [c in Component]?: number } | null;
1414

15-
private cachedInnerPattern?: RegExp = null;
16-
private cachedPattern?: RegExp = null;
15+
// Overrides this method if there is more efficient way to check for inner pattern change.
16+
innerPatternHasChange(context: ParsingContext, currentInnerPattern: RegExp): boolean {
17+
return this.innerPattern(context) !== currentInnerPattern;
18+
}
1719

1820
patternLeftBoundary(): string {
1921
return `(\\W|^)`;
2022
}
2123

24+
private cachedInnerPattern?: RegExp = null;
25+
private cachedPattern?: RegExp = null;
26+
2227
pattern(context: ParsingContext): RegExp {
23-
const innerPattern = this.innerPattern(context);
24-
if (innerPattern == this.cachedInnerPattern) {
25-
return this.cachedPattern;
28+
if (this.cachedInnerPattern) {
29+
if (!this.innerPatternHasChange(context, this.cachedInnerPattern)) {
30+
return this.cachedPattern;
31+
}
2632
}
27-
28-
this.cachedPattern = new RegExp(`${this.patternLeftBoundary()}${innerPattern.source}`, innerPattern.flags);
29-
this.cachedInnerPattern = innerPattern;
33+
this.cachedInnerPattern = this.innerPattern(context);
34+
this.cachedPattern = new RegExp(
35+
`${this.patternLeftBoundary()}${this.cachedInnerPattern.source}`,
36+
this.cachedInnerPattern.flags
37+
);
3038
return this.cachedPattern;
3139
}
3240

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
2+
import { REGEX_PARTS } from "../constants";
3+
import { ParsingContext } from "../../../chrono";
4+
5+
export abstract class AbstractParserWithLeftBoundaryChecking extends AbstractParserWithWordBoundaryChecking {
6+
abstract innerPatternString(context: ParsingContext): string;
7+
8+
patternLeftBoundary(): string {
9+
return REGEX_PARTS.leftBoundary;
10+
}
11+
12+
innerPattern(context: ParsingContext): RegExp {
13+
return new RegExp(this.innerPatternString(context), REGEX_PARTS.flags);
14+
}
15+
16+
innerPatternHasChange(context: ParsingContext, currentInnerPattern: RegExp): boolean {
17+
return false;
18+
}
19+
}
20+
21+
export abstract class AbstractParserWithLeftRightBoundaryChecking extends AbstractParserWithLeftBoundaryChecking {
22+
innerPattern(context: ParsingContext): RegExp {
23+
return new RegExp(`${this.innerPatternString(context)}${REGEX_PARTS.rightBoundary}`, REGEX_PARTS.flags);
24+
}
25+
}

‎src/locales/ru/parsers/RUCasualDateParser.ts

+4-14
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,11 @@
11
import { ParsingContext } from "../../../chrono";
22
import { ParsingComponents, ParsingResult } from "../../../results";
3-
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
43
import * as references from "../../../common/casualReferences";
5-
import { REGEX_PARTS } from "../constants";
4+
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";
65

7-
const PATTERN = new RegExp(
8-
`(?:с|со)?\\s*(сегодня|вчера|завтра|послезавтра|послепослезавтра|позапозавчера|позавчера)${REGEX_PARTS.rightBoundary}`,
9-
REGEX_PARTS.flags
10-
);
11-
12-
export default class RUCasualDateParser extends AbstractParserWithWordBoundaryChecking {
13-
patternLeftBoundary(): string {
14-
return REGEX_PARTS.leftBoundary;
15-
}
16-
17-
innerPattern(context: ParsingContext): RegExp {
18-
return PATTERN;
6+
export default class RUCasualDateParser extends AbstractParserWithLeftRightBoundaryChecking {
7+
innerPatternString(context: ParsingContext): string {
8+
return `(?:с|со)?\\s*(сегодня|вчера|завтра|послезавтра|послепослезавтра|позапозавчера|позавчера)`;
199
}
2010

2111
innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents | ParsingResult {

‎src/locales/ru/parsers/RUCasualTimeParser.ts

+4-14
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,12 @@
11
import { ParsingContext } from "../../../chrono";
2-
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
32
import * as references from "../../../common/casualReferences";
43
import { assignSimilarDate } from "../../../utils/dayjs";
54
import dayjs from "dayjs";
6-
import { REGEX_PARTS } from "../constants";
5+
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";
76

8-
const PATTERN = new RegExp(
9-
`(сейчас|прошлым\\s*вечером|прошлой\\s*ночью|следующей\\s*ночью|сегодня\\s*ночью|этой\\s*ночью|ночью|этим утром|утром|утра|в\\s*полдень|вечером|вечера|в\\s*полночь)` +
10-
`${REGEX_PARTS.rightBoundary}`,
11-
REGEX_PARTS.flags
12-
);
13-
export default class RUCasualTimeParser extends AbstractParserWithWordBoundaryChecking {
14-
patternLeftBoundary(): string {
15-
return REGEX_PARTS.leftBoundary;
16-
}
17-
18-
innerPattern() {
19-
return PATTERN;
7+
export default class RUCasualTimeParser extends AbstractParserWithLeftRightBoundaryChecking {
8+
innerPatternString(context: ParsingContext): string {
9+
return `(сейчас|прошлым\\s*вечером|прошлой\\s*ночью|следующей\\s*ночью|сегодня\\s*ночью|этой\\s*ночью|ночью|этим утром|утром|утра|в\\s*полдень|вечером|вечера|в\\s*полночь)`;
2010
}
2111

2212
innerExtract(context: ParsingContext, match: RegExpMatchArray) {

‎src/locales/ru/parsers/RUMonthNameLittleEndianParser.ts

+16-26
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,31 @@
11
import { ParsingContext } from "../../../chrono";
22
import { ParsingResult } from "../../../results";
33
import { findYearClosestToRef } from "../../../calculation/years";
4-
import { MONTH_DICTIONARY, REGEX_PARTS } from "../constants";
4+
import { MONTH_DICTIONARY } from "../constants";
55
import { YEAR_PATTERN, parseYear } from "../constants";
66
import { ORDINAL_NUMBER_PATTERN, parseOrdinalNumberPattern } from "../constants";
77
import { matchAnyPattern } from "../../../utils/pattern";
8-
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
9-
10-
// prettier-ignore
11-
const PATTERN = new RegExp(
12-
`(?:с)?\\s*(${ORDINAL_NUMBER_PATTERN})` +
13-
`(?:` +
14-
`\\s{0,3}(?:по|-|–|до)?\\s{0,3}` +
15-
`(${ORDINAL_NUMBER_PATTERN})` +
16-
`)?` +
17-
`(?:-|\\/|\\s{0,3}(?:of)?\\s{0,3})` +
18-
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
19-
`(?:` +
20-
`(?:-|\\/|,?\\s{0,3})` +
21-
`(${YEAR_PATTERN}(?![^\\s]\\d))` +
22-
`)?` +
23-
`${REGEX_PARTS.rightBoundary}`,
24-
REGEX_PARTS.flags
25-
);
8+
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";
269

2710
const DATE_GROUP = 1;
2811
const DATE_TO_GROUP = 2;
2912
const MONTH_NAME_GROUP = 3;
3013
const YEAR_GROUP = 4;
3114

32-
export default class RUMonthNameLittleEndianParser extends AbstractParserWithWordBoundaryChecking {
33-
patternLeftBoundary(): string {
34-
return REGEX_PARTS.leftBoundary;
35-
}
36-
37-
innerPattern(): RegExp {
38-
return PATTERN;
15+
export default class RUMonthNameLittleEndianParser extends AbstractParserWithLeftRightBoundaryChecking {
16+
innerPatternString(context: ParsingContext): string {
17+
// prettier-ignore
18+
return `(?:с)?\\s*(${ORDINAL_NUMBER_PATTERN})` +
19+
`(?:` +
20+
`\\s{0,3}(?:по|-|–|до)?\\s{0,3}` +
21+
`(${ORDINAL_NUMBER_PATTERN})` +
22+
`)?` +
23+
`(?:-|\\/|\\s{0,3}(?:of)?\\s{0,3})` +
24+
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
25+
`(?:` +
26+
`(?:-|\\/|,?\\s{0,3})` +
27+
`(${YEAR_PATTERN}(?![^\\s]\\d))` +
28+
`)?`;
3929
}
4030

4131
innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingResult {

‎src/locales/ru/parsers/RUMonthNameParser.ts

+13-20
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,9 @@
1-
import { FULL_MONTH_NAME_DICTIONARY, MONTH_DICTIONARY, REGEX_PARTS } from "../constants";
1+
import { FULL_MONTH_NAME_DICTIONARY, MONTH_DICTIONARY } from "../constants";
22
import { ParsingContext } from "../../../chrono";
33
import { findYearClosestToRef } from "../../../calculation/years";
44
import { matchAnyPattern } from "../../../utils/pattern";
55
import { YEAR_PATTERN, parseYear } from "../constants";
6-
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
7-
8-
const PATTERN = new RegExp(
9-
`((?:в)\\s*)?` +
10-
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
11-
`\\s*` +
12-
`(?:` +
13-
`[,-]?\\s*(${YEAR_PATTERN})?` +
14-
`)?` +
15-
`(?=[^\\s\\w]|\\s+[^0-9]|\\s+$|$)`,
16-
REGEX_PARTS.flags
17-
);
6+
import { AbstractParserWithLeftBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";
187

198
const MONTH_NAME_GROUP = 2;
209
const YEAR_GROUP = 3;
@@ -25,13 +14,17 @@ const YEAR_GROUP = 3;
2514
* - Январь 2012
2615
* - Январь
2716
*/
28-
export default class RUMonthNameParser extends AbstractParserWithWordBoundaryChecking {
29-
patternLeftBoundary(): string {
30-
return REGEX_PARTS.leftBoundary;
31-
}
32-
33-
innerPattern(): RegExp {
34-
return PATTERN;
17+
export default class RUMonthNameParser extends AbstractParserWithLeftBoundaryChecking {
18+
innerPatternString(context: ParsingContext): string {
19+
return (
20+
`((?:в)\\s*)?` +
21+
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
22+
`\\s*` +
23+
`(?:` +
24+
`[,-]?\\s*(${YEAR_PATTERN})?` +
25+
`)?` +
26+
`(?=[^\\s\\w]|\\s+[^0-9]|\\s+$|$)`
27+
);
3528
}
3629

3730
innerExtract(context: ParsingContext, match: RegExpMatchArray) {

‎src/locales/ru/parsers/RURelativeDateFormatParser.ts

+6-14
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,16 @@ import { ParsingComponents } from "../../../results";
44
import dayjs from "dayjs";
55
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
66
import { matchAnyPattern } from "../../../utils/pattern";
7-
8-
const PATTERN = new RegExp(
9-
`(в прошлом|на прошлой|на следующей|в следующем|на этой|в этом)\\s*(${matchAnyPattern(
10-
TIME_UNIT_DICTIONARY
11-
)})(?=\\s*)${REGEX_PARTS.rightBoundary}`,
12-
REGEX_PARTS.flags
13-
);
7+
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";
148

159
const MODIFIER_WORD_GROUP = 1;
1610
const RELATIVE_WORD_GROUP = 2;
1711

18-
export default class RURelativeDateFormatParser extends AbstractParserWithWordBoundaryChecking {
19-
patternLeftBoundary(): string {
20-
return REGEX_PARTS.leftBoundary;
21-
}
22-
23-
innerPattern(): RegExp {
24-
return PATTERN;
12+
export default class RURelativeDateFormatParser extends AbstractParserWithLeftRightBoundaryChecking {
13+
innerPatternString(context: ParsingContext): string {
14+
return `(в прошлом|на прошлой|на следующей|в следующем|на этой|в этом)\\s*(${matchAnyPattern(
15+
TIME_UNIT_DICTIONARY
16+
)})`;
2517
}
2618

2719
innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {

‎src/locales/ru/parsers/RUTimeUnitAgoFormatParser.ts

+5-11
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,12 @@
11
import { ParsingContext } from "../../../chrono";
2-
import { parseTimeUnits, REGEX_PARTS, TIME_UNITS_PATTERN } from "../constants";
2+
import { parseTimeUnits, TIME_UNITS_PATTERN } from "../constants";
33
import { ParsingComponents } from "../../../results";
4-
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
54
import { reverseTimeUnits } from "../../../utils/timeunits";
5+
import { AbstractParserWithLeftBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";
66

7-
const PATTERN = new RegExp(`(${TIME_UNITS_PATTERN})\\s{0,5}назад(?=(?:\\W|$))`, REGEX_PARTS.flags);
8-
9-
export default class RUTimeUnitAgoFormatParser extends AbstractParserWithWordBoundaryChecking {
10-
patternLeftBoundary(): string {
11-
return REGEX_PARTS.leftBoundary;
12-
}
13-
14-
innerPattern(): RegExp {
15-
return PATTERN;
7+
export default class RUTimeUnitAgoFormatParser extends AbstractParserWithLeftBoundaryChecking {
8+
innerPatternString(context: ParsingContext): string {
9+
return `(${TIME_UNITS_PATTERN})\\s{0,5}назад(?=(?:\\W|$))`;
1610
}
1711

1812
innerExtract(context: ParsingContext, match: RegExpMatchArray) {

‎src/locales/ru/parsers/RUTimeUnitCasualRelativeFormatParser.ts

+4-13
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,12 @@
11
import { TIME_UNITS_PATTERN, parseTimeUnits, REGEX_PARTS } from "../constants";
22
import { ParsingContext } from "../../../chrono";
33
import { ParsingComponents } from "../../../results";
4-
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
54
import { reverseTimeUnits } from "../../../utils/timeunits";
5+
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";
66

7-
const PATTERN = new RegExp(
8-
`(эти|последние|прошлые|следующие|после|спустя|через|\\+|-)\\s*(${TIME_UNITS_PATTERN})${REGEX_PARTS.rightBoundary}`,
9-
REGEX_PARTS.flags
10-
);
11-
12-
export default class RUTimeUnitCasualRelativeFormatParser extends AbstractParserWithWordBoundaryChecking {
13-
patternLeftBoundary(): string {
14-
return REGEX_PARTS.leftBoundary;
15-
}
16-
17-
innerPattern(): RegExp {
18-
return PATTERN;
7+
export default class RUTimeUnitCasualRelativeFormatParser extends AbstractParserWithLeftRightBoundaryChecking {
8+
innerPatternString(context: ParsingContext): string {
9+
return `(эти|последние|прошлые|следующие|после|спустя|через|\\+|-)\\s*(${TIME_UNITS_PATTERN})`;
1910
}
2011

2112
innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {

‎src/locales/ru/parsers/RUTimeUnitWithinFormatParser.ts

+10-4
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,23 @@ import { ParsingComponents } from "../../../results";
44
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
55

66
const PATTERN = `(?:(?:около|примерно)\\s*(?:~\\s*)?)?(${TIME_UNITS_PATTERN})${REGEX_PARTS.rightBoundary}`;
7-
const PATTERN_WITH_PREFIX = new RegExp(`(?:в течение|в течении)\\s*${PATTERN}`, REGEX_PARTS.flags);
8-
9-
const PATTERN_WITHOUT_PREFIX = new RegExp(PATTERN, "i");
107

118
export default class RUTimeUnitWithinFormatParser extends AbstractParserWithWordBoundaryChecking {
9+
private readonly patternWithPrefix: RegExp;
10+
private readonly patternWithoutPrefix: RegExp;
11+
12+
constructor() {
13+
super();
14+
this.patternWithPrefix = new RegExp(`(?:в течение|в течении)\\s*${PATTERN}`, REGEX_PARTS.flags);
15+
this.patternWithoutPrefix = new RegExp(PATTERN, REGEX_PARTS.flags);
16+
}
17+
1218
patternLeftBoundary(): string {
1319
return REGEX_PARTS.leftBoundary;
1420
}
1521

1622
innerPattern(context: ParsingContext): RegExp {
17-
return context.option.forwardDate ? PATTERN_WITHOUT_PREFIX : PATTERN_WITH_PREFIX;
23+
return context.option.forwardDate ? this.patternWithoutPrefix : this.patternWithPrefix;
1824
}
1925

2026
innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {

‎src/locales/ru/parsers/RUWeekdayParser.ts

+12-20
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,24 @@
11
import { ParsingContext } from "../../../chrono";
22
import { ParsingComponents } from "../../../results";
3-
import { REGEX_PARTS, WEEKDAY_DICTIONARY } from "../constants";
3+
import { WEEKDAY_DICTIONARY } from "../constants";
44
import { matchAnyPattern } from "../../../utils/pattern";
5-
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
65
import { createParsingComponentsAtWeekday } from "../../../common/calculation/weekdays";
7-
8-
const PATTERN = new RegExp(
9-
`(?:(?:,|\\(|()\\s*)?` +
10-
`(?:в\\s*?)?` +
11-
`(?:(эту|этот|прошлый|прошлую|следующий|следующую|следующего)\\s*)?` +
12-
`(${matchAnyPattern(WEEKDAY_DICTIONARY)})` +
13-
`(?:\\s*(?:,|\\)|)))?` +
14-
`(?:\\s*на\\s*(этой|прошлой|следующей)\\s*неделе)?` +
15-
`${REGEX_PARTS.rightBoundary}`,
16-
REGEX_PARTS.flags
17-
);
6+
import { AbstractParserWithLeftRightBoundaryChecking } from "./AbstractParserWithWordBoundaryChecking";
187

198
const PREFIX_GROUP = 1;
209
const WEEKDAY_GROUP = 2;
2110
const POSTFIX_GROUP = 3;
2211

23-
export default class RUWeekdayParser extends AbstractParserWithWordBoundaryChecking {
24-
innerPattern(): RegExp {
25-
return PATTERN;
26-
}
27-
28-
patternLeftBoundary(): string {
29-
return REGEX_PARTS.leftBoundary;
12+
export default class RUWeekdayParser extends AbstractParserWithLeftRightBoundaryChecking {
13+
innerPatternString(context: ParsingContext): string {
14+
return (
15+
`(?:(?:,|\\(|()\\s*)?` +
16+
`(?:в\\s*?)?` +
17+
`(?:(эту|этот|прошлый|прошлую|следующий|следующую|следующего)\\s*)?` +
18+
`(${matchAnyPattern(WEEKDAY_DICTIONARY)})` +
19+
`(?:\\s*(?:,|\\)|)))?` +
20+
`(?:\\s*на\\s*(этой|прошлой|следующей)\\s*неделе)?`
21+
);
3022
}
3123

3224
innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {

0 commit comments

Comments
 (0)
Please sign in to comment.