Skip to content

Commit

Permalink
timezone allows leading MINUS SIGN (U+2212)
Browse files Browse the repository at this point in the history
Timezone signage also allows MINUS SIGN (U+2212) as
specified by ISO 8601 and RFC 3339.

Not for RFC 2822 format or RFC 8536 transition string.

Issue #835
  • Loading branch information
jtmoon79 committed May 29, 2023
1 parent 5b9d7d3 commit bfa0835
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 9 deletions.
50 changes: 45 additions & 5 deletions src/format/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,8 @@ fn test_parse() {
check!("a", [lit!("a")]; );
check!("+", [lit!("+")]; );
check!("-", [lit!("-")]; );
check!("−", [lit!("−")]; ); // MINUS SIGN (U+2212)
// a Literal may contain whitespace and match whitespace, but this should not be done
check!(" ", [lit!(" ")]; );
check!("aa", [lit!("a")]; TOO_LONG);
check!("🤠", [lit!("a")]; INVALID);
Expand All @@ -681,6 +683,7 @@ fn test_parse() {
check!("1234", [lit!("1234")]; );
check!("+1234", [lit!("+1234")]; );
check!("-1234", [lit!("-1234")]; );
check!("−1234", [lit!("−1234")]; ); // MINUS SIGN (U+2212)
check!("PST", [lit!("PST")]; );
check!("🤠", [lit!("🤠")]; );
check!("🤠a", [lit!("🤠"), lit!("a")]; );
Expand Down Expand Up @@ -747,6 +750,7 @@ fn test_parse() {
check!("-0042", [num!(Year)]; year: -42);
check!("+0042", [num!(Year)]; year: 42);
check!("-42195", [num!(Year)]; year: -42195);
check!("−42195", [num!(Year)]; INVALID); // MINUS SIGN (U+2212)
check!("+42195", [num!(Year)]; year: 42195);
check!(" -42195", [num!(Year)]; INVALID);
check!(" +42195", [num!(Year)]; INVALID);
Expand All @@ -757,6 +761,7 @@ fn test_parse() {
check!(" - 42", [num!(Year)]; INVALID);
check!(" + 42", [num!(Year)]; INVALID);
check!(" -42195", [sp!(" "), num!(Year)]; year: -42195);
check!(" −42195", [sp!(" "), num!(Year)]; INVALID); // MINUS SIGN (U+2212)
check!(" +42195", [sp!(" "), num!(Year)]; year: 42195);
check!(" - 42", [sp!(" "), num!(Year)]; INVALID);
check!(" + 42", [sp!(" "), num!(Year)]; INVALID);
Expand All @@ -767,6 +772,7 @@ fn test_parse() {
check!("345", [num!(Ordinal)]; ordinal: 345);
check!("+345", [num!(Ordinal)]; INVALID);
check!("-345", [num!(Ordinal)]; INVALID);
check!("−345", [num!(Ordinal)]; INVALID); // MINUS SIGN (U+2212)
check!(" 345", [num!(Ordinal)]; INVALID);
check!("345 ", [num!(Ordinal)]; TOO_LONG);
check!(" 345", [sp!(" "), num!(Ordinal)]; ordinal: 345);
Expand Down Expand Up @@ -956,6 +962,7 @@ fn test_parse() {
check!("+12:3", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+12:34", [fix!(TimezoneOffset)]; offset: 45_240);
check!("-12:34", [fix!(TimezoneOffset)]; offset: -45_240);
check!("−12:34", [fix!(TimezoneOffset)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12:34:", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+12:34:5", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+12:34:56", [fix!(TimezoneOffset)]; TOO_LONG);
Expand All @@ -975,12 +982,14 @@ fn test_parse() {
check!("+1234:567", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("-00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("−00:00", [fix!(TimezoneOffset)]; offset: 0); // MINUS SIGN (U+2212)
check!("+00:01", [fix!(TimezoneOffset)]; offset: 60);
check!("-00:01", [fix!(TimezoneOffset)]; offset: -60);
check!("+00:30", [fix!(TimezoneOffset)]; offset: 1_800);
check!("-00:30", [fix!(TimezoneOffset)]; offset: -1_800);
check!("+24:00", [fix!(TimezoneOffset)]; offset: 86_400);
check!("-24:00", [fix!(TimezoneOffset)]; offset: -86_400);
check!("−24:00", [fix!(TimezoneOffset)]; offset: -86_400); // MINUS SIGN (U+2212)
check!("+99:59", [fix!(TimezoneOffset)]; offset: 359_940);
check!("-99:59", [fix!(TimezoneOffset)]; offset: -359_940);
check!("+00:60", [fix!(TimezoneOffset)]; OUT_OF_RANGE);
Expand All @@ -990,6 +999,7 @@ fn test_parse() {
check!("+12 34 ", [fix!(TimezoneOffset)]; INVALID);
check!(" +12:34", [fix!(TimezoneOffset)]; offset: 45_240);
check!(" -12:34", [fix!(TimezoneOffset)]; offset: -45_240);
check!(" −12:34", [fix!(TimezoneOffset)]; offset: -45_240); // MINUS SIGN (U+2212)
check!(" +12:34", [fix!(TimezoneOffset)]; INVALID);
check!(" -12:34", [fix!(TimezoneOffset)]; INVALID);
check!("\t -12:34", [fix!(TimezoneOffset)]; INVALID);
Expand All @@ -1012,13 +1022,16 @@ fn test_parse() {
check!("X12:34", [fix!(TimezoneOffset)]; INVALID);
check!("Z+12:34", [fix!(TimezoneOffset)]; INVALID);
check!("X+12:34", [fix!(TimezoneOffset)]; INVALID);
check!("X−12:34", [fix!(TimezoneOffset)]; INVALID); // MINUS SIGN (U+2212)
check!("🤠+12:34", [fix!(TimezoneOffset)]; INVALID);
check!("+12:34🤠", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+12:🤠34", [fix!(TimezoneOffset)]; INVALID);
check!("+1234🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: 45_240);
check!("-1234🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: -45_240);
check!("−1234🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12:34🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: 45_240);
check!("-12:34🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: -45_240);
check!("−12:34🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: -45_240); // MINUS SIGN (U+2212)
check!("🤠+12:34", [lit!("🤠"), fix!(TimezoneOffset)]; offset: 45_240);
check!("Z", [fix!(TimezoneOffset)]; INVALID);
check!("A", [fix!(TimezoneOffset)]; INVALID);
Expand Down Expand Up @@ -1047,6 +1060,7 @@ fn test_parse() {
check!("+123", [fix!(TimezoneOffsetColon)]; TOO_SHORT);
check!("+1234", [fix!(TimezoneOffsetColon)]; offset: 45_240);
check!("-1234", [fix!(TimezoneOffsetColon)]; offset: -45_240);
check!("−1234", [fix!(TimezoneOffsetColon)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12345", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+123456", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+1234567", [fix!(TimezoneOffsetColon)]; TOO_LONG);
Expand All @@ -1063,6 +1077,7 @@ fn test_parse() {
check!("+12:3", [fix!(TimezoneOffsetColon)]; TOO_SHORT);
check!("+12:34", [fix!(TimezoneOffsetColon)]; offset: 45_240);
check!("-12:34", [fix!(TimezoneOffsetColon)]; offset: -45_240);
check!("−12:34", [fix!(TimezoneOffsetColon)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12:34:", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+12:34:5", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+12:34:56", [fix!(TimezoneOffsetColon)]; TOO_LONG);
Expand All @@ -1071,6 +1086,8 @@ fn test_parse() {
check!("+12:34:56:78", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+12:3456", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+1234:56", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("−12:34", [fix!(TimezoneOffsetColon)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("−12 : 34", [fix!(TimezoneOffsetColon)]; INVALID); // MINUS SIGN (U+2212)
check!("+12 :34", [fix!(TimezoneOffsetColon)]; INVALID);
check!("+12: 34", [fix!(TimezoneOffsetColon)]; INVALID);
check!("+12 34", [fix!(TimezoneOffsetColon)]; INVALID);
Expand Down Expand Up @@ -1131,6 +1148,7 @@ fn test_parse() {
check!("+123", [fix!(TimezoneOffsetZ)]; TOO_SHORT);
check!("+1234", [fix!(TimezoneOffsetZ)]; offset: 45_240);
check!("-1234", [fix!(TimezoneOffsetZ)]; offset: -45_240);
check!("−1234", [fix!(TimezoneOffsetZ)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12345", [fix!(TimezoneOffsetZ)]; TOO_LONG);
check!("+123456", [fix!(TimezoneOffsetZ)]; TOO_LONG);
check!("+1234567", [fix!(TimezoneOffsetZ)]; TOO_LONG);
Expand All @@ -1147,6 +1165,7 @@ fn test_parse() {
check!("+12:3", [fix!(TimezoneOffsetZ)]; TOO_SHORT);
check!("+12:34", [fix!(TimezoneOffsetZ)]; offset: 45_240);
check!("-12:34", [fix!(TimezoneOffsetZ)]; offset: -45_240);
check!("−12:34", [fix!(TimezoneOffsetZ)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12:34:", [fix!(TimezoneOffsetZ)]; TOO_LONG);
check!("+12:34:5", [fix!(TimezoneOffsetZ)]; TOO_LONG);
check!("+12:34:56", [fix!(TimezoneOffsetZ)]; TOO_LONG);
Expand Down Expand Up @@ -1217,6 +1236,7 @@ fn test_parse() {
check!("+123", [internal_fix!(TimezoneOffsetPermissive)]; TOO_SHORT);
check!("+1234", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240);
check!("-1234", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240);
check!("−1234", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12345", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!("+123456", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!("+1234567", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
Expand All @@ -1233,6 +1253,7 @@ fn test_parse() {
check!("+12:3", [internal_fix!(TimezoneOffsetPermissive)]; TOO_SHORT);
check!("+12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240);
check!("-12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240);
check!("−12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12:34:", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!("+12:34:5", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!("+12:34:56", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
Expand Down Expand Up @@ -1261,6 +1282,7 @@ fn test_parse() {
check!("+12:34 ", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!(" +12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240);
check!(" -12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240);
check!(" −12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12345", [internal_fix!(TimezoneOffsetPermissive), num!(Day)]; offset: 45_240, day: 5);
check!("+12:345", [internal_fix!(TimezoneOffsetPermissive), num!(Day)]; offset: 45_240, day: 5);
check!("+12:34:", [internal_fix!(TimezoneOffsetPermissive), lit!(":")]; offset: 45_240);
Expand Down Expand Up @@ -1307,6 +1329,16 @@ fn test_parse() {
num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)];
year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2,
minute: 37, second: 5, offset: 32400);
check!("2015-02-04T14:37:05-09:00",
[num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"),
num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)];
year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2,
minute: 37, second: 5, offset: -32400);
check!("2015-02-04T14:37:05−09:00", // timezone offset using MINUS SIGN (U+2212)
[num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"),
num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)];
year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2,
minute: 37, second: 5, offset: -32400);
check!("20150204143705567",
[num!(Year), num!(Month), num!(Day),
num!(Hour), num!(Minute), num!(Second), internal_fix!(Nanosecond3NoDot)];
Expand Down Expand Up @@ -1549,15 +1581,21 @@ fn test_rfc3339() {
// Test data - (input, Ok(expected result after parse and format) or Err(error code))
let testdates = [
("2015-01-20T17:35:20-08:00", Ok("2015-01-20T17:35:20-08:00")), // normal case
("2015-01-20T17:35:20−08:00", Ok("2015-01-20T17:35:20-08:00")), // normal case with MINUS SIGN (U+2212)
("1944-06-06T04:04:00Z", Ok("1944-06-06T04:04:00+00:00")), // D-day
("2001-09-11T09:45:00-08:00", Ok("2001-09-11T09:45:00-08:00")),
("2015-01-20T17:35:20.001-08:00", Ok("2015-01-20T17:35:20.001-08:00")),
("2015-01-20T17:35:20.001−08:00", Ok("2015-01-20T17:35:20.001-08:00")), // with MINUS SIGN (U+2212)
("2015-01-20T17:35:20.000031-08:00", Ok("2015-01-20T17:35:20.000031-08:00")),
("2015-01-20T17:35:20.000000004-08:00", Ok("2015-01-20T17:35:20.000000004-08:00")),
("2015-01-20T17:35:20.000000004−08:00", Ok("2015-01-20T17:35:20.000000004-08:00")), // with MINUS SIGN (U+2212)
("2015-01-20T17:35:20.000000000452-08:00", Ok("2015-01-20T17:35:20-08:00")), // too small
("2015-01-20T17:35:20.000000000452−08:00", Ok("2015-01-20T17:35:20-08:00")), // too small with MINUS SIGN (U+2212)
("2015-01-20 17:35:20.001-08:00", Err(INVALID)), // missing separator 'T'
("2015/01/20T17:35:20.001-08:00", Err(INVALID)), // wrong separator char YMD
("2015-01-20T17-35-20.001-08:00", Err(INVALID)), // wrong separator char HMS
("-01-20T17:35:20-08:00", Err(INVALID)), // missing year
("99-01-20T17:35:20-08:00", Err(INVALID)), // bad year format
("99999-01-20T17:35:20-08:00", Err(INVALID)), // bad year value
("-2000-01-20T17:35:20-08:00", Err(INVALID)), // bad year value
("2015-02-30T17:35:20-08:00", Err(OUT_OF_RANGE)), // bad day of month value
Expand Down Expand Up @@ -1586,11 +1624,13 @@ fn test_rfc3339() {
("2015-01-20T17:35:20-08:ZZ", Err(INVALID)), // bad offset minutes
("2015-01-20T17:35:20.001-08 : 00", Err(INVALID)), // bad offset separator
("2015-01-20T17:35:20-08:00:00", Err(TOO_LONG)), // bad offset format
("2015-01-20T17:35:20-08:", Err(TOO_SHORT)), // bad offset format
("2015-01-20T17:35:20-08", Err(TOO_SHORT)), // bad offset format
("2015-01-20T", Err(TOO_SHORT)), // missing HMS
("2015-01-20T00:00:1", Err(TOO_SHORT)), // missing complete S
("2015-01-20T00:00:1-08:00", Err(INVALID)), // missing complete S
("2015-01-20T17:35:20+08:", Err(TOO_SHORT)), // bad offset format
("2015-01-20T17:35:20-08:", Err(TOO_SHORT)), // bad offset format
("2015-01-20T17:35:20−08:", Err(TOO_SHORT)), // bad offset format with MINUS SIGN (U+2212)
("2015-01-20T17:35:20-08", Err(TOO_SHORT)), // bad offset format
("2015-01-20T", Err(TOO_SHORT)), // missing HMS
("2015-01-20T00:00:1", Err(TOO_SHORT)), // missing complete S
("2015-01-20T00:00:1-08:00", Err(INVALID)), // missing complete S
];

fn rfc3339_to_datetime(date: &str) -> ParseResult<DateTime<FixedOffset>> {
Expand Down
32 changes: 29 additions & 3 deletions src/format/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,13 +243,28 @@ pub(super) fn timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str
where
F: FnMut(&str) -> ParseResult<&str>,
{
timezone_offset_internal(s, consume_colon, false)
timezone_offset_internal(s, consume_colon, false, true)
}

/// Parse a timezone from `s` and return the offset in seconds.
///
/// The `consume_colon` function is used to parse a mandatory or optional `:`
/// separator between hours offset and minutes offset.
///
/// The `allow_missing_minutes` flag allows the timezone minutes offset to be
/// missing from `s`.
///
/// The `allow_tz_minus_sign` flag allows the timezone offset negative character
/// to also be `−` MINUS SIGN (U+2212) in addition to the typical
/// ASCII-compatible `-` HYPHEN-MINUS (U+2D).
/// This is part of [RFC 3339 & ISO 8601].
///
/// [RFC 3339 & ISO 8601]: https://en.wikipedia.org/w/index.php?title=ISO_8601&oldid=1114309368#Time_offsets_from_UTC
fn timezone_offset_internal<F>(
mut s: &str,
mut consume_colon: F,
allow_missing_minutes: bool,
allow_tz_minus_sign: bool,
) -> ParseResult<(&str, i32)>
where
F: FnMut(&str) -> ParseResult<&str>,
Expand All @@ -264,15 +279,26 @@ where
}
let negative = match s.chars().next() {
Some('+') => {
// PLUS SIGN (U+2B)
s = &s['+'.len_utf8()..];

false
}
Some('-') => {
// HYPHEN-MINUS (U+2D)
s = &s['-'.len_utf8()..];

true
}
Some('−') => {
// MINUS SIGN (U+2212)
if !allow_tz_minus_sign {
return Err(INVALID);
}
s = &s['−'.len_utf8()..];

true
}
Some(_) => return Err(INVALID),
None => return Err(TOO_SHORT),
};
Expand Down Expand Up @@ -341,7 +367,7 @@ where
{
match s.as_bytes().first() {
Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
_ => timezone_offset_internal(s, colon, true),
_ => timezone_offset_internal(s, colon, true, true),
}
}

Expand Down Expand Up @@ -379,7 +405,7 @@ pub(super) fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)>
Ok((s, None))
}
} else {
let (s_, offset) = timezone_offset(s, |s| Ok(s))?;
let (s_, offset) = timezone_offset_internal(s, |s| Ok(s), false, false)?;
Ok((s_, Some(offset)))
}
}
Expand Down
20 changes: 19 additions & 1 deletion src/offset/local/tz_info/timezone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,12 @@ struct TimeZoneName {

impl TimeZoneName {
/// Construct a time zone name
///
/// Note: Converts `−` MINUS SIGN (U+2212) to `-` HYPHEN-MINUS (U+2D).
/// Multi-byte MINUS SIGN is allowed in [ISO 8601 / RFC 3339]. But
/// working with single-byte HYPHEN-MINUS is easier and more common.
///
/// [ISO 8601 / RFC 3339]: https://en.wikipedia.org/w/index.php?title=ISO_8601&oldid=1114309368#Time_offsets_from_UTC
fn new(input: &[u8]) -> Result<Self, Error> {
let s = match str::from_utf8(input) {
Ok(s) => s,
Expand All @@ -504,14 +510,21 @@ impl TimeZoneName {
for (i, c) in s.chars().enumerate() {
match c {
'0'..='9' | 'A'..='Z' | 'a'..='z'
// ISO 8601 / RFC 3339 proscribes use of `+` (U+2B) PLUS SIGN
// ISO 8601 / RFC 3339 proscribes use of `+` PLUS SIGN (U+2B)
// in timezone
| '+'
// ISO 8601 / RFC 3339 allows use of `-` HYPHEN-MINUS (U+2D)
// in timezone
| '-' => {
bytes[i + 1] = c as u8;
}
// ISO 8601 / RFC 3339 recommends the use of
// `−` MINUS SIGN (U+2212) in timezone.
// But replace with single-byte `-` HYPHEN-MINUS (U+2D) for
// easier byte <-> char conversions later on.
| '−' => {
bytes[i + 1] = b'-';
}
_ => return Err(Error::LocalTimeType("invalid characters in time zone name")),
}
copied += 1;
Expand Down Expand Up @@ -767,8 +780,10 @@ mod tests {
"1",
"+",
"-",
"−", // MINUS SIGN (U+2212)
"12",
"--",
"−−", // MINUS SIGN (U+2212)
"AB",
"ab",
"12345678",
Expand Down Expand Up @@ -799,9 +814,12 @@ mod tests {
("+1234", "+1234"),
("+1234", "+1234"),
("-1234", "-1234"),
("−1234", "-1234"), // MINUS SIGN (U+2212) to HYPHEN-MINUS (U+002D)
// Ok nonsense
("+++", "+++"),
("-----", "-----"),
("−−−", "---"), // MINUS SIGN (U+2212) to HYPHEN-MINUS (U+002D)
("−−−−−−−", "-------"), // MINUS SIGN (U+2212) to HYPHEN-MINUS (U+002D)
] {
eprintln!("TimeZoneName::new({:?})", input_);
let output = TimeZoneName::new(input_.as_bytes());
Expand Down

0 comments on commit bfa0835

Please sign in to comment.