Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve multiline string handling #1879

Merged
merged 18 commits into from Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Expand Up @@ -17,6 +17,7 @@
- Enforce empty lines before classes and functions with sticky leading comments (#3302)
- Implicitly concatenated strings used as function args are now wrapped inside
parentheses (#3307)
- Improve handling of multiline strings by changing line split behavior (#1879)

### Configuration

Expand Down
48 changes: 48 additions & 0 deletions docs/the_black_code_style/future_style.md
Expand Up @@ -152,3 +152,51 @@ with open("bla.txt") as f, open("x"):
async def main():
await asyncio.sleep(1)
```

### Improved multiline string handling

_Black_ is smarter when formatting multiline strings, especially in function arguments,
to avoid introducing extra line breaks. Previously, it would always consider multiline
strings as not fitting on a single line. With this new feature, _Black_ looks at the
context around the multiline string to decide if it should be inlined or split to a
separate line. For example, when a multiline string is passed to a function, _Black_
will only split the multiline string if a line is too long or if multiple arguments are
being passed.

For example, _Black_ will reformat

```python
textwrap.dedent(
"""\
This is a
multiline string
"""
)
```

to:

```python
textwrap.dedent("""\
This is a
multiline string
""")
```

And:

```python
MULTILINE = """
foobar
""".replace(
"\n", ""
)
```

to:

```python
MULTILINE = """
foobar
""".replace("\n", "")
```
29 changes: 10 additions & 19 deletions src/black/linegen.py
Expand Up @@ -468,7 +468,7 @@ def transform_line(
and not line.should_split_rhs
and not line.magic_trailing_comma
and (
is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
is_line_short_enough(line, mode=mode, line_str=line_str)
or line.contains_unsplittable_type_ignore()
)
and not (line.inside_brackets and line.contains_standalone_comments())
Expand All @@ -492,24 +492,20 @@ def _rhs(
bracket pair instead.
"""
for omit in generate_trailers_to_omit(line, mode.line_length):
lines = list(
right_hand_split(line, mode.line_length, features, omit=omit)
)
lines = list(right_hand_split(line, mode, features, omit=omit))
# Note: this check is only able to figure out if the first line of the
# *current* transformation fits in the line length. This is true only
# for simple cases. All others require running more transforms via
# `transform_line()`. This check doesn't know if those would succeed.
if is_line_short_enough(lines[0], line_length=mode.line_length):
if is_line_short_enough(lines[0], mode=mode):
yield from lines
return

# All splits failed, best effort split with no omits.
# This mostly happens to multiline strings that are by definition
# reported as not fitting a single line, as well as lines that contain
# trailing commas (those have to be exploded).
yield from right_hand_split(
line, line_length=mode.line_length, features=features
)
yield from right_hand_split(line, mode, features=features)

# HACK: nested functions (like _rhs) compiled by mypyc don't retain their
# __name__ attribute which is needed in `run_transformer` further down.
Expand Down Expand Up @@ -602,7 +598,7 @@ def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator

def right_hand_split(
line: Line,
line_length: int,
mode: Mode,
features: Collection[Feature] = (),
omit: Collection[LeafID] = (),
) -> Iterator[Line]:
Expand Down Expand Up @@ -657,18 +653,15 @@ def right_hand_split(
# there are no standalone comments in the body
and not body.contains_standalone_comments(0)
# and we can actually remove the parens
and can_omit_invisible_parens(body, line_length)
and can_omit_invisible_parens(body, mode.line_length)
):
omit = {id(closing_bracket), *omit}
try:
yield from right_hand_split(line, line_length, features=features, omit=omit)
yield from right_hand_split(line, mode, features=features, omit=omit)
return

except CannotSplit as e:
if not (
can_be_split(body)
or is_line_short_enough(body, line_length=line_length)
):
if not (can_be_split(body) or is_line_short_enough(body, mode=mode)):
raise CannotSplit(
"Splitting failed, body is still too long and can't be split."
) from e
Expand Down Expand Up @@ -1272,7 +1265,7 @@ def run_transformer(
or line.contains_multiline_strings()
or result[0].contains_uncollapsable_type_comments()
or result[0].contains_unsplittable_type_ignore()
or is_line_short_enough(result[0], line_length=mode.line_length)
or is_line_short_enough(result[0], mode=mode)
# If any leaves have no parents (which _can_ occur since
# `transform(line)` potentially destroys the line's underlying node
# structure), then we can't proceed. Doing so would cause the below
Expand All @@ -1287,8 +1280,6 @@ def run_transformer(
second_opinion = run_transformer(
line_copy, transform, mode, features_fop, line_str=line_str
)
if all(
is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
):
if all(is_line_short_enough(ln, mode=mode) for ln in second_opinion):
result = second_opinion
return result
86 changes: 78 additions & 8 deletions src/black/lines.py
@@ -1,4 +1,5 @@
import itertools
import math
import sys
from dataclasses import dataclass, field
from typing import (
Expand All @@ -10,6 +11,7 @@
Sequence,
Tuple,
TypeVar,
Union,
cast,
)

Expand Down Expand Up @@ -37,6 +39,7 @@
T = TypeVar("T")
Index = int
LeafID = int
LN = Union[Leaf, Node]


@dataclass
Expand Down Expand Up @@ -708,18 +711,85 @@ def append_leaves(
new_line.append(comment_leaf, preformatted=True)


def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
"""Return True if `line` is no longer than `line_length`.

def is_line_short_enough( # noqa: C901
line: Line, *, mode: Mode, line_str: str = ""
) -> bool:
"""For non-multiline strings, return True if `line` is no longer than `line_length`.
For multiline strings, looks at the context around `line` to determine
if it should be inlined or split up.
Uses the provided `line_str` rendering, if any, otherwise computes a new one.
"""
if not line_str:
line_str = line_to_string(line)
return (
len(line_str) <= line_length
and "\n" not in line_str # multiline strings
and not line.contains_standalone_comments()
)

if Preview.multiline_string_handling not in mode:
return (
len(line_str) <= mode.line_length
and "\n" not in line_str # multiline strings
and not line.contains_standalone_comments()
)

if line.contains_standalone_comments():
return False
if "\n" not in line_str:
# No multiline strings (MLS) present
return len(line_str) <= mode.line_length
else:
felix-hilden marked this conversation as resolved.
Show resolved Hide resolved
first, *_, last = line_str.split("\n")
if len(first) > mode.line_length or len(last) > mode.line_length:
return False

commas: List[int] = [] # tracks number of commas per depth level
multiline_string: Optional[Leaf] = None
multiline_string_contexts: List[LN] = []

max_level_to_update = math.inf
for i, leaf in enumerate(line.leaves):
if max_level_to_update == math.inf:
had_comma: Optional[int] = None
if leaf.bracket_depth + 1 > len(commas):
commas.append(0)
elif leaf.bracket_depth + 1 < len(commas):
had_comma = commas.pop()
if (
had_comma is not None
and multiline_string is not None
and multiline_string.bracket_depth == leaf.bracket_depth + 1
):
# Have left the level with the MLS, stop tracking commas
max_level_to_update = leaf.bracket_depth
if had_comma > 0:
# MLS was in parens with at least one comma - force split
return False

if leaf.bracket_depth <= max_level_to_update and leaf.type == token.COMMA:
# Ignore non-nested trailing comma
# directly after MLS/MLS-containing expression
ignore_ctxs: List[Optional[LN]] = [None]
ignore_ctxs += multiline_string_contexts
if not (leaf.prev_sibling in ignore_ctxs and i == len(line.leaves) - 1):
commas[leaf.bracket_depth] += 1
if max_level_to_update != math.inf:
max_level_to_update = min(max_level_to_update, leaf.bracket_depth)

if is_multiline_string(leaf):
if len(multiline_string_contexts) > 0:
# >1 multiline string cannot fit on a single line - force split
return False
multiline_string = leaf
ctx: LN = leaf
while str(ctx) in line_str:
multiline_string_contexts.append(ctx)
if ctx.parent is None:
break
ctx = ctx.parent

# May not have a triple-quoted multiline string at all,
# in case of a regular string with embedded newlines and line continuations
if len(multiline_string_contexts) == 0:
return True

return all(val == 0 for val in commas)


def can_be_split(line: Line) -> bool:
Expand Down
1 change: 1 addition & 0 deletions src/black/mode.py
Expand Up @@ -152,6 +152,7 @@ class Preview(Enum):
annotation_parens = auto()
empty_lines_before_class_or_def_with_leading_comments = auto()
long_docstring_quotes_on_newline = auto()
multiline_string_handling = auto()
normalize_docstring_quotes_and_prefixes_properly = auto()
one_element_subscript = auto()
remove_block_trailing_newline = auto()
Expand Down