Skip to content

Commit

Permalink
BUG: np.loadtxt cannot load text file with quoted fields separated by…
Browse files Browse the repository at this point in the history
… whitespace (#22906)

Fix issue with `delimiter=None` and quote character not working properly (not using whitespace delimiter mode).

Closes gh-22899
  • Loading branch information
dmbelov authored and charris committed Jan 8, 2023
1 parent c341fcd commit d50106b
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 1 deletion.
3 changes: 2 additions & 1 deletion numpy/core/src/multiarray/textreading/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,8 @@ tokenizer_core(tokenizer_state *ts, parser_config *const config)
}
else {
/* continue parsing as if unquoted */
ts->state = TOKENIZE_UNQUOTED;
/* Set to TOKENIZE_UNQUOTED or TOKENIZE_UNQUOTED_WHITESPACE */
ts->state = ts->unquoted_state;
}
break;

Expand Down
8 changes: 8 additions & 0 deletions numpy/lib/npyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1303,6 +1303,14 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
array([('alpha, #42', 10.), ('beta, #64', 2.)],
dtype=[('label', '<U12'), ('value', '<f8')])
Quoted fields can be separated by multiple whitespace characters:
>>> s = StringIO('"alpha, #42" 10.0\n"beta, #64" 2.0\n')
>>> dtype = np.dtype([("label", "U12"), ("value", float)])
>>> np.loadtxt(s, dtype=dtype, delimiter=None, quotechar='"')
array([('alpha, #42', 10.), ('beta, #64', 2.)],
dtype=[('label', '<U12'), ('value', '<f8')])
Two consecutive quote characters within a quoted field are treated as a
single escaped character:
Expand Down
14 changes: 14 additions & 0 deletions numpy/lib/tests/test_loadtxt.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,20 @@ def test_quoted_field(q):
assert_array_equal(res, expected)


@pytest.mark.parametrize("q", ('"', "'", "`"))
def test_quoted_field_with_whitepace_delimiter(q):
txt = StringIO(
f"{q}alpha, x{q} 2.5\n{q}beta, y{q} 4.5\n{q}gamma, z{q} 5.0\n"
)
dtype = np.dtype([('f0', 'U8'), ('f1', np.float64)])
expected = np.array(
[("alpha, x", 2.5), ("beta, y", 4.5), ("gamma, z", 5.0)], dtype=dtype
)

res = np.loadtxt(txt, dtype=dtype, delimiter=None, quotechar=q)
assert_array_equal(res, expected)


def test_quote_support_default():
"""Support for quoted fields is disabled by default."""
txt = StringIO('"lat,long", 45, 30\n')
Expand Down

0 comments on commit d50106b

Please sign in to comment.