Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve multiline string handling #1879

Merged
merged 18 commits into from Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Expand Up @@ -32,6 +32,7 @@
string lambda values are now wrapped in parentheses (#3440)
- Exclude string type annotations from improved string processing; fix crash when the
return type annotation is stringified and spans across multiple lines (#3462)
- Improve handling of multiline strings by changing line split behavior (#1879)

### Configuration

Expand Down
48 changes: 48 additions & 0 deletions docs/the_black_code_style/future_style.md
Expand Up @@ -152,3 +152,51 @@ with open("bla.txt") as f, open("x"):
async def main():
await asyncio.sleep(1)
```

### Improved multiline string handling

_Black_ is smarter when formatting multiline strings, especially in function arguments,
to avoid introducing extra line breaks. Previously, it would always consider multiline
strings as not fitting on a single line. With this new feature, _Black_ looks at the
context around the multiline string to decide if it should be inlined or split to a
separate line. For example, when a multiline string is passed to a function, _Black_
will only split the multiline string if a line is too long or if multiple arguments are
being passed.

For example, _Black_ will reformat

```python
textwrap.dedent(
"""\
This is a
multiline string
"""
)
```

to:

```python
textwrap.dedent("""\
This is a
multiline string
""")
```

And:

```python
MULTILINE = """
foobar
""".replace(
"\n", ""
)
```

to:

```python
MULTILINE = """
foobar
""".replace("\n", "")
```
43 changes: 19 additions & 24 deletions src/black/linegen.py
Expand Up @@ -2,7 +2,7 @@
Generating lines of code.
"""
import sys
from dataclasses import dataclass
from dataclasses import dataclass, replace
from enum import Enum, auto
from functools import partial, wraps
from typing import Collection, Iterator, List, Optional, Set, Union, cast
Expand Down Expand Up @@ -492,7 +492,7 @@ def transform_line(
and not line.should_split_rhs
and not line.magic_trailing_comma
and (
is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
is_line_short_enough(line, mode=mode, line_str=line_str)
or line.contains_unsplittable_type_ignore()
)
and not (line.inside_brackets and line.contains_standalone_comments())
Expand All @@ -516,24 +516,20 @@ def _rhs(
bracket pair instead.
"""
for omit in generate_trailers_to_omit(line, mode.line_length):
lines = list(
right_hand_split(line, mode.line_length, features, omit=omit)
)
lines = list(right_hand_split(line, mode, features, omit=omit))
# Note: this check is only able to figure out if the first line of the
# *current* transformation fits in the line length. This is true only
# for simple cases. All others require running more transforms via
# `transform_line()`. This check doesn't know if those would succeed.
if is_line_short_enough(lines[0], line_length=mode.line_length):
if is_line_short_enough(lines[0], mode=mode):
yield from lines
return

# All splits failed, best effort split with no omits.
# This mostly happens to multiline strings that are by definition
# reported as not fitting a single line, as well as lines that contain
# trailing commas (those have to be exploded).
yield from right_hand_split(
line, line_length=mode.line_length, features=features
)
yield from right_hand_split(line, mode, features=features)

# HACK: nested functions (like _rhs) compiled by mypyc don't retain their
# __name__ attribute which is needed in `run_transformer` further down.
Expand Down Expand Up @@ -649,7 +645,7 @@ class _RHSResult:

def right_hand_split(
line: Line,
line_length: int,
mode: Mode,
features: Collection[Feature] = (),
omit: Collection[LeafID] = (),
) -> Iterator[Line]:
Expand All @@ -663,7 +659,7 @@ def right_hand_split(
"""
rhs_result = _first_right_hand_split(line, omit=omit)
yield from _maybe_split_omitting_optional_parens(
rhs_result, line, line_length, features=features, omit=omit
rhs_result, line, mode, features=features, omit=omit
)


Expand Down Expand Up @@ -718,7 +714,7 @@ def _first_right_hand_split(
def _maybe_split_omitting_optional_parens(
rhs: _RHSResult,
line: Line,
line_length: int,
mode: Mode,
features: Collection[Feature] = (),
omit: Collection[LeafID] = (),
) -> Iterator[Line]:
Expand All @@ -736,7 +732,7 @@ def _maybe_split_omitting_optional_parens(
# there are no standalone comments in the body
and not rhs.body.contains_standalone_comments(0)
# and we can actually remove the parens
and can_omit_invisible_parens(rhs.body, line_length)
and can_omit_invisible_parens(rhs.body, mode.line_length)
):
omit = {id(rhs.closing_bracket), *omit}
try:
Expand All @@ -751,23 +747,24 @@ def _maybe_split_omitting_optional_parens(
and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1])
# the left side of assignment is short enough (the -1 is for the ending
# optional paren)
and is_line_short_enough(rhs.head, line_length=line_length - 1)
and is_line_short_enough(
rhs.head, mode=replace(mode, line_length=mode.line_length - 1)
)
# the left side of assignment won't explode further because of magic
# trailing comma
and rhs.head.magic_trailing_comma is None
# the split by omitting optional parens isn't preferred by some other
# reason
and not _prefer_split_rhs_oop(rhs_oop, line_length=line_length)
and not _prefer_split_rhs_oop(rhs_oop, mode)
):
yield from _maybe_split_omitting_optional_parens(
rhs_oop, line, line_length, features=features, omit=omit
rhs_oop, line, mode, features=features, omit=omit
)
return

except CannotSplit as e:
if not (
can_be_split(rhs.body)
or is_line_short_enough(rhs.body, line_length=line_length)
can_be_split(rhs.body) or is_line_short_enough(rhs.body, mode=mode)
):
raise CannotSplit(
"Splitting failed, body is still too long and can't be split."
Expand All @@ -791,7 +788,7 @@ def _maybe_split_omitting_optional_parens(
yield result


def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
def _prefer_split_rhs_oop(rhs_oop: _RHSResult, mode: Mode) -> bool:
"""
Returns whether we should prefer the result from a split omitting optional parens.
"""
Expand All @@ -811,7 +808,7 @@ def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
# the first line still contains the `=`)
any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves)
# the first line is short enough
and is_line_short_enough(rhs_oop.head, line_length=line_length)
and is_line_short_enough(rhs_oop.head, mode=mode)
)
# contains unsplittable type ignore
or rhs_oop.head.contains_unsplittable_type_ignore()
Expand Down Expand Up @@ -1428,7 +1425,7 @@ def run_transformer(
or line.contains_multiline_strings()
or result[0].contains_uncollapsable_type_comments()
or result[0].contains_unsplittable_type_ignore()
or is_line_short_enough(result[0], line_length=mode.line_length)
or is_line_short_enough(result[0], mode=mode)
# If any leaves have no parents (which _can_ occur since
# `transform(line)` potentially destroys the line's underlying node
# structure), then we can't proceed. Doing so would cause the below
Expand All @@ -1443,8 +1440,6 @@ def run_transformer(
second_opinion = run_transformer(
line_copy, transform, mode, features_fop, line_str=line_str
)
if all(
is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
):
if all(is_line_short_enough(ln, mode=mode) for ln in second_opinion):
result = second_opinion
return result
86 changes: 78 additions & 8 deletions src/black/lines.py
@@ -1,4 +1,5 @@
import itertools
import math
import sys
from dataclasses import dataclass, field
from typing import (
Expand All @@ -10,6 +11,7 @@
Sequence,
Tuple,
TypeVar,
Union,
cast,
)

Expand Down Expand Up @@ -37,6 +39,7 @@
T = TypeVar("T")
Index = int
LeafID = int
LN = Union[Leaf, Node]


@dataclass
Expand Down Expand Up @@ -711,18 +714,85 @@ def append_leaves(
new_line.append(comment_leaf, preformatted=True)


def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
"""Return True if `line` is no longer than `line_length`.

def is_line_short_enough( # noqa: C901
line: Line, *, mode: Mode, line_str: str = ""
) -> bool:
"""For non-multiline strings, return True if `line` is no longer than `line_length`.
For multiline strings, looks at the context around `line` to determine
if it should be inlined or split up.
Uses the provided `line_str` rendering, if any, otherwise computes a new one.
"""
if not line_str:
line_str = line_to_string(line)
return (
len(line_str) <= line_length
and "\n" not in line_str # multiline strings
and not line.contains_standalone_comments()
)

if Preview.multiline_string_handling not in mode:
return (
len(line_str) <= mode.line_length
and "\n" not in line_str # multiline strings
and not line.contains_standalone_comments()
)

if line.contains_standalone_comments():
return False
if "\n" not in line_str:
# No multiline strings (MLS) present
return len(line_str) <= mode.line_length
else:
felix-hilden marked this conversation as resolved.
Show resolved Hide resolved
first, *_, last = line_str.split("\n")
if len(first) > mode.line_length or len(last) > mode.line_length:
return False

commas: List[int] = [] # tracks number of commas per depth level
multiline_string: Optional[Leaf] = None
multiline_string_contexts: List[LN] = []

max_level_to_update = math.inf
for i, leaf in enumerate(line.leaves):
if max_level_to_update == math.inf:
had_comma: Optional[int] = None
if leaf.bracket_depth + 1 > len(commas):
commas.append(0)
elif leaf.bracket_depth + 1 < len(commas):
had_comma = commas.pop()
if (
had_comma is not None
and multiline_string is not None
and multiline_string.bracket_depth == leaf.bracket_depth + 1
):
# Have left the level with the MLS, stop tracking commas
max_level_to_update = leaf.bracket_depth
if had_comma > 0:
# MLS was in parens with at least one comma - force split
return False

if leaf.bracket_depth <= max_level_to_update and leaf.type == token.COMMA:
# Ignore non-nested trailing comma
# directly after MLS/MLS-containing expression
ignore_ctxs: List[Optional[LN]] = [None]
ignore_ctxs += multiline_string_contexts
if not (leaf.prev_sibling in ignore_ctxs and i == len(line.leaves) - 1):
commas[leaf.bracket_depth] += 1
if max_level_to_update != math.inf:
max_level_to_update = min(max_level_to_update, leaf.bracket_depth)

if is_multiline_string(leaf):
if len(multiline_string_contexts) > 0:
# >1 multiline string cannot fit on a single line - force split
return False
multiline_string = leaf
ctx: LN = leaf
while str(ctx) in line_str:
multiline_string_contexts.append(ctx)
if ctx.parent is None:
break
ctx = ctx.parent

# May not have a triple-quoted multiline string at all,
# in case of a regular string with embedded newlines and line continuations
if len(multiline_string_contexts) == 0:
return True

return all(val == 0 for val in commas)


def can_be_split(line: Line) -> bool:
Expand Down
1 change: 1 addition & 0 deletions src/black/mode.py
Expand Up @@ -153,6 +153,7 @@ class Preview(Enum):
empty_lines_before_class_or_def_with_leading_comments = auto()
handle_trailing_commas_in_head = auto()
long_docstring_quotes_on_newline = auto()
multiline_string_handling = auto()
normalize_docstring_quotes_and_prefixes_properly = auto()
one_element_subscript = auto()
prefer_splitting_right_hand_side_of_assignments = auto()
Expand Down