Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Generate more precise width table using wcwidth
- Loading branch information
Showing
4 changed files
with
575 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
"""Generates a width table for Unicode characters. | ||
This script generates a width table for Unicode characters that are not | ||
narrow (width 1). The table is written to src/black/_width_table.py (note | ||
that although this file is generated, it is checked into Git) and is used | ||
by the char_width() function in src/black/strings.py. | ||
You should run this script when you upgrade wcwidth, which is expected to | ||
happen when a new Unicode version is released. The generated table contains | ||
the version of wcwidth and Unicode that it was generated for. | ||
In order to run this script, you need to install the latest version of wcwidth. | ||
You can do this by running: | ||
pip install -U wcwidth | ||
""" | ||
import sys | ||
from os.path import basename, dirname, join | ||
from typing import Iterable, Tuple | ||
|
||
import wcwidth | ||
|
||
|
||
def make_width_table() -> Iterable[Tuple[int, int, int]]: | ||
start_codepoint = -1 | ||
end_codepoint = -1 | ||
range_width = -2 | ||
for codepoint in range(0, sys.maxunicode + 1): | ||
width = wcwidth.wcwidth(chr(codepoint)) | ||
if width == 1: | ||
continue | ||
if start_codepoint < 0: | ||
start_codepoint = codepoint | ||
range_width = width | ||
elif width != range_width or codepoint != end_codepoint + 1: | ||
yield (start_codepoint, end_codepoint, range_width) | ||
start_codepoint = codepoint | ||
range_width = width | ||
end_codepoint = codepoint | ||
if start_codepoint >= 0: | ||
yield (start_codepoint, end_codepoint, range_width) | ||
|
||
|
||
def main() -> None: | ||
table_path = join(dirname(__file__), "..", "src", "black", "_width_table.py") | ||
with open(table_path, "w") as f: | ||
f.write( | ||
f"""# Generated by {basename(__file__)} | ||
# wcwidth {wcwidth.__version__} | ||
# Unicode {wcwidth.list_versions()[-1]} | ||
import sys | ||
from typing import List, Tuple | ||
if sys.version_info < (3, 8): | ||
from typing_extensions import Final | ||
else: | ||
from typing import Final | ||
WIDTH_TABLE: Final[List[Tuple[int, int, int]]] = [ | ||
""" | ||
) | ||
for triple in make_width_table(): | ||
f.write(f" {triple!r},\n") | ||
f.write("]\n") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.