Skip to content

Commit

Permalink
[FIX] Restrict end of regex to prevent matches with non-date strings (#…
Browse files Browse the repository at this point in the history
…136)

* Restrict end of regex to prevent matches with non-date strings

* Add hour and minute to regex to further increase robustness

* Bump version 0.5.3
  • Loading branch information
joweich committed Apr 30, 2024
1 parent c311cda commit 12ed8a0
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion chatminer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
#
__version__ = "0.5.2"
__version__ = "0.5.3"
2 changes: 1 addition & 1 deletion chatminer/chatparsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def __init__(self, filepath: str):

def _read_raw_messages_from_file(self):
def _is_new_message(line: str):
regex = r"^[\u200e]?\[?(\d{1,4})([./,-])\d{1,2}\2\d{2,4}([, ])"
regex = r"^[\u200e]?\[?(\d{1,4})([./,-])\d{1,2}\2\d{2,4}(?:\s|,\s)(0?\d|1\d|2[0-4]):([0-5]?\d)"
return re.match(regex, line)

with self._file.open(encoding="utf-8") as f:
Expand Down

0 comments on commit 12ed8a0

Please sign in to comment.