Skip to content

Commit

Permalink
Merge pull request #22968 from charris/backport-22906
Browse files Browse the repository at this point in the history
BUG: np.loadtxt cannot load text file with quoted fields separated by whitespace
  • Loading branch information
charris committed Jan 9, 2023
2 parents 4b53673 + d50106b commit 847c538
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 1 deletion.
3 changes: 2 additions & 1 deletion numpy/core/src/multiarray/textreading/tokenize.cpp
Expand Up @@ -223,7 +223,8 @@ tokenizer_core(tokenizer_state *ts, parser_config *const config)
}
else {
/* continue parsing as if unquoted */
ts->state = TOKENIZE_UNQUOTED;
/* Set to TOKENIZE_UNQUOTED or TOKENIZE_UNQUOTED_WHITESPACE */
ts->state = ts->unquoted_state;
}
break;

Expand Down
8 changes: 8 additions & 0 deletions numpy/lib/npyio.py
Expand Up @@ -1303,6 +1303,14 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
array([('alpha, #42', 10.), ('beta, #64', 2.)],
dtype=[('label', '<U12'), ('value', '<f8')])
Quoted fields can be separated by multiple whitespace characters:
>>> s = StringIO('"alpha, #42" 10.0\n"beta, #64" 2.0\n')
>>> dtype = np.dtype([("label", "U12"), ("value", float)])
>>> np.loadtxt(s, dtype=dtype, delimiter=None, quotechar='"')
array([('alpha, #42', 10.), ('beta, #64', 2.)],
dtype=[('label', '<U12'), ('value', '<f8')])
Two consecutive quote characters within a quoted field are treated as a
single escaped character:
Expand Down
14 changes: 14 additions & 0 deletions numpy/lib/tests/test_loadtxt.py
Expand Up @@ -534,6 +534,20 @@ def test_quoted_field(q):
assert_array_equal(res, expected)


@pytest.mark.parametrize("q", ('"', "'", "`"))
def test_quoted_field_with_whitepace_delimiter(q):
txt = StringIO(
f"{q}alpha, x{q} 2.5\n{q}beta, y{q} 4.5\n{q}gamma, z{q} 5.0\n"
)
dtype = np.dtype([('f0', 'U8'), ('f1', np.float64)])
expected = np.array(
[("alpha, x", 2.5), ("beta, y", 4.5), ("gamma, z", 5.0)], dtype=dtype
)

res = np.loadtxt(txt, dtype=dtype, delimiter=None, quotechar=q)
assert_array_equal(res, expected)


def test_quote_support_default():
"""Support for quoted fields is disabled by default."""
txt = StringIO('"lat,long", 45, 30\n')
Expand Down

0 comments on commit 847c538

Please sign in to comment.