Skip to content

Commit

Permalink
Add intenal flag for per-line type checking peformance (#14173)
Browse files Browse the repository at this point in the history
This should help with the investigation of tricky performance
regressions like #13821.

I tried to implement in such a way that it will give minimal impact when
not used (since I am touching a hot method).
  • Loading branch information
ilevkivskyi committed Nov 23, 2022
1 parent 07139ef commit 04d44c1
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 16 deletions.
30 changes: 24 additions & 6 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from __future__ import annotations

import collections
import contextlib
import errno
import gc
Expand Down Expand Up @@ -278,6 +279,8 @@ def _build(
TypeState.reset_all_subtype_caches()
if options.timing_stats is not None:
dump_timing_stats(options.timing_stats, graph)
if options.line_checking_stats is not None:
dump_line_checking_stats(options.line_checking_stats, graph)
return BuildResult(manager, graph)
finally:
t0 = time.time()
Expand Down Expand Up @@ -1889,6 +1892,10 @@ class State:
# Cumulative time spent on this file, in microseconds (for profiling stats)
time_spent_us: int = 0

# Per-line type-checking time (cumulative time spent type-checking expressions
# on a given source code line).
per_line_checking_time_ns: dict[int, int]

def __init__(
self,
id: str | None,
Expand Down Expand Up @@ -1956,6 +1963,7 @@ def __init__(
source = ""
self.source = source
self.add_ancestors()
self.per_line_checking_time_ns = collections.defaultdict(int)
t0 = time.time()
self.meta = validate_meta(self.meta, self.id, self.path, self.ignore_all, manager)
self.manager.add_stats(validate_meta_time=time.time() - t0)
Expand Down Expand Up @@ -2320,6 +2328,7 @@ def type_checker(self) -> TypeChecker:
self.tree,
self.xpath,
manager.plugin,
self.per_line_checking_time_ns,
)
return self._type_checker

Expand Down Expand Up @@ -2945,13 +2954,22 @@ def dumps(self) -> str:


def dump_timing_stats(path: str, graph: Graph) -> None:
"""
Dump timing stats for each file in the given graph
"""
"""Dump timing stats for each file in the given graph."""
with open(path, "w") as f:
for k in sorted(graph.keys()):
v = graph[k]
f.write(f"{v.id} {v.time_spent_us}\n")
for id in sorted(graph):
f.write(f"{id} {graph[id].time_spent_us}\n")


def dump_line_checking_stats(path: str, graph: Graph) -> None:
"""Dump per-line expression type checking stats."""
with open(path, "w") as f:
for id in sorted(graph):
if not graph[id].per_line_checking_time_ns:
continue
f.write(f"{id}:\n")
for line in sorted(graph[id].per_line_checking_time_ns):
line_time = graph[id].per_line_checking_time_ns[line]
f.write(f"{line:>5} {line_time/1000:8.1f}\n")


def dump_graph(graph: Graph, stdout: TextIO | None = None) -> None:
Expand Down
5 changes: 4 additions & 1 deletion mypy/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ def __init__(
tree: MypyFile,
path: str,
plugin: Plugin,
per_line_checking_time_ns: dict[int, int],
) -> None:
"""Construct a type checker.
Expand All @@ -376,7 +377,9 @@ def __init__(
self.path = path
self.msg = MessageBuilder(errors, modules)
self.plugin = plugin
self.expr_checker = mypy.checkexpr.ExpressionChecker(self, self.msg, self.plugin)
self.expr_checker = mypy.checkexpr.ExpressionChecker(
self, self.msg, self.plugin, per_line_checking_time_ns
)
self.pattern_checker = PatternChecker(self, self.msg, self.plugin)
self.tscope = Scope()
self.scope = CheckerScope(tree)
Expand Down
29 changes: 26 additions & 3 deletions mypy/checkexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import itertools
import time
from contextlib import contextmanager
from typing import Callable, ClassVar, Iterator, List, Optional, Sequence, cast
from typing_extensions import Final, TypeAlias as _TypeAlias, overload
Expand Down Expand Up @@ -263,11 +264,22 @@ class ExpressionChecker(ExpressionVisitor[Type]):
strfrm_checker: StringFormatterChecker
plugin: Plugin

def __init__(self, chk: mypy.checker.TypeChecker, msg: MessageBuilder, plugin: Plugin) -> None:
def __init__(
self,
chk: mypy.checker.TypeChecker,
msg: MessageBuilder,
plugin: Plugin,
per_line_checking_time_ns: dict[int, int],
) -> None:
"""Construct an expression type checker."""
self.chk = chk
self.msg = msg
self.plugin = plugin
self.per_line_checking_time_ns = per_line_checking_time_ns
self.collect_line_checking_stats = self.chk.options.line_checking_stats is not None
# Are we already visiting some expression? This is used to avoid double counting
# time for nested expressions.
self.in_expression = False
self.type_context = [None]

# Temporary overrides for expression types. This is currently
Expand Down Expand Up @@ -4727,7 +4739,14 @@ def accept(
applies only to this expression and not any subexpressions.
"""
if node in self.type_overrides:
# This branch is very fast, there is no point timing it.
return self.type_overrides[node]
# We don't use context manager here to get most precise data (and avoid overhead).
record_time = False
if self.collect_line_checking_stats and not self.in_expression:
t0 = time.perf_counter_ns()
self.in_expression = True
record_time = True
self.type_context.append(type_context)
old_is_callee = self.is_callee
self.is_callee = is_callee
Expand Down Expand Up @@ -4762,9 +4781,13 @@ def accept(
self.msg.disallowed_any_type(typ, node)

if not self.chk.in_checked_function() or self.chk.current_node_deferred:
return AnyType(TypeOfAny.unannotated)
result: Type = AnyType(TypeOfAny.unannotated)
else:
return typ
result = typ
if record_time:
self.per_line_checking_time_ns[node.line] += time.perf_counter_ns() - t0
self.in_expression = False
return result

def named_type(self, name: str) -> Instance:
"""Return an instance type with type given by the name and no type
Expand Down
8 changes: 7 additions & 1 deletion mypy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,8 +1082,14 @@ def add_invertible_flag(
"--inferstats", action="store_true", dest="dump_inference_stats", help=argparse.SUPPRESS
)
parser.add_argument("--dump-build-stats", action="store_true", help=argparse.SUPPRESS)
# dump timing stats for each processed file into the given output file
# Dump timing stats for each processed file into the given output file
parser.add_argument("--timing-stats", dest="timing_stats", help=argparse.SUPPRESS)
# Dump per line type checking timing stats for each processed file into the given
# output file. Only total time spent in each top level expression will be shown.
# Times are show in microseconds.
parser.add_argument(
"--line-checking-stats", dest="line_checking_stats", help=argparse.SUPPRESS
)
# --debug-cache will disable any cache-related compressions/optimizations,
# which will make the cache writing process output pretty-printed JSON (which
# is easier to debug).
Expand Down
1 change: 1 addition & 0 deletions mypy/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ def __init__(self) -> None:
self.enable_incomplete_features = False # deprecated
self.enable_incomplete_feature: list[str] = []
self.timing_stats: str | None = None
self.line_checking_stats: str | None = None

# -- test options --
# Stop after the semantic analysis phase
Expand Down
8 changes: 3 additions & 5 deletions mypy/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,13 +807,11 @@ def unnamed_function(name: str | None) -> bool:
return name is not None and name == "_"


# TODO: replace with uses of perf_counter_ns when support for py3.6 is dropped
# (or when mypy properly handles alternate definitions based on python version check
time_ref = time.perf_counter
time_ref = time.perf_counter_ns


def time_spent_us(t0: float) -> int:
return int((time.perf_counter() - t0) * 1e6)
def time_spent_us(t0: int) -> int:
return int((time.perf_counter_ns() - t0) / 1000)


def plural_s(s: int | Sized) -> str:
Expand Down

0 comments on commit 04d44c1

Please sign in to comment.