Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[varStore] Improve optimize algorithm #3124

Merged
merged 8 commits into from
May 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
79 changes: 73 additions & 6 deletions Lib/fontTools/varLib/varStore.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ def interpolateFromDeltas(self, varDataIndex, deltas):
def VarStore_subset_varidxes(
self, varIdxes, optimize=True, retainFirstMap=False, advIdxes=set()
):

# Sort out used varIdxes by major/minor.
used = {}
for varIdx in varIdxes:
Expand Down Expand Up @@ -407,7 +406,7 @@ def _popcount(n):
def _characteristic_overhead(chars):
"""Returns overhead in bytes of encoding this characteristic
as a VarData."""
c = 6
c = 4 + 6 # 4 bytes for LOffset, 6 bytes for VarData header
while chars:
if chars & 0b1111:
c += 2
Expand All @@ -423,6 +422,8 @@ def _find_yourself_best_new_encoding(self, done_by_width):
else:
new_encoding = None
self.best_new_encoding = new_encoding
if new_encoding:
break


class _EncodingDict(dict):
Expand Down Expand Up @@ -468,6 +469,68 @@ def _row_characteristics(row):
def VarStore_optimize(self, use_NO_VARIATION_INDEX=True):
"""Optimize storage. Returns mapping from old VarIdxes to new ones."""

# Overview:
#
# For each VarData row, we first extend it with zeroes to have
# one column per region in VarRegionList. We then group the
# rows into _Encoding objects, by their "characteristic" bitmap.
# The characteristic bitmap is a binary number representing how
# many bytes each column of the data takes up to encode. Each
# column is encoded in four bits. For example, if a column has
# only values in the range -128..127, it would only have a single
# bit set in the characteristic bitmap for that column. If it has
# values in the range -32768..32767, it would have two bits set.
# The number of ones in the characteristic bitmap is the "width"
# of the encoding.
#
# Each encoding as such has a number of "active" (ie. non-zero)
# columns. The overhead of encoding the characteristic bitmap
# is 10 bytes, plus 2 bytes per active column.
#
# When an encoding is merged into another one, if the characteristic
# of the old encoding is a subset of the new one, then the overhead
# of the old encoding is completely eliminated. However, each row
# now would require more bytes to encode, to the tune of one byte
# per characteristic bit that is active in the new encoding but not
# in the old one. The number of bits that can be added to an encoding
# while still beneficial to merge it into another encoding is called
# the "room" for that encoding.
#
# The "gain" of an encodings is the maximum number of bytes we can
# save by merging it into another encoding. The "gain" of merging
# two encodings is how many bytes we save by doing so.
#
# High-level algorithm:
#
# - Each encoding has a minimal way to encode it. However, because
# of the overhead of encoding the characteristic bitmap, it may
# be beneficial to merge two encodings together, if there is
# gain in doing so. As such, we need to search for the best
# such successive merges.
#
# Algorithm:
#
# - For any encoding that has zero gain, encode it as is and put
# it in the "done" list. Put the remaining encodings into the
# "todo" list.
# - For each encoding in the todo list, find the encoding in the
# done list that has the highest gain when merged into it; call
# this the "best new encoding".
# - Sort todo list by encoding room.
# - While todo list is not empty:
# - Pop the first item from todo list, as current item.
# - For each each encoding in the todo list, try combining it
# with the current item. Calculate total gain as the gain of
# this combined encoding minus the gain of combining each of
# the two items with their best new encoding, if any.
# - If the total gain is positive and better than any previously
# remembered match, remember this as new match.
# - If a match was found, combine the two items and put them
# back in the todo list. Otherwise, if the current item's
# best new encoding is not None, combine current item with
# its best new encoding. Otherwise encode the current item
# by itself and put it in the done list.

# TODO
# Check that no two VarRegions are the same; if they are, fold them.

Expand All @@ -483,7 +546,6 @@ def VarStore_optimize(self, use_NO_VARIATION_INDEX=True):
regionIndices = data.VarRegionIndex

for minor, item in enumerate(data.Item):

row = list(zeroes)
for regionIdx, v in zip(regionIndices, item):
row[regionIdx] += v
Expand Down Expand Up @@ -553,14 +615,19 @@ def VarStore_optimize(self, use_NO_VARIATION_INDEX=True):
)
separate_gain = this_gain + other_gain

if combined_gain > separate_gain:
if combined_gain - separate_gain > best_gain:
best_idx = i
best_gain = combined_gain - separate_gain

if best_idx is None:
# Encoding is decided as is
done_by_width[encoding.width].append(encoding)
if encoding.best_new_encoding is None:
# Encoding is decided as is
done_by_width[encoding.width].append(encoding)
else:
# Merge with its best new encoding
encoding.best_new_encoding.extend(encoding.items)
else:
# Combine the two encodings
other_encoding = todo[best_idx]
combined_chars = other_encoding.chars | encoding.chars
combined_encoding = _Encoding(combined_chars)
Expand Down
93 changes: 93 additions & 0 deletions Tests/varLib/varStore_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import pytest
from io import StringIO
from fontTools.misc.xmlWriter import XMLWriter
from fontTools.varLib.models import VariationModel
from fontTools.varLib.varStore import OnlineVarStoreBuilder, VarStoreInstancer
from fontTools.ttLib import TTFont, newTable
Expand Down Expand Up @@ -80,3 +82,94 @@ def buildAxis(axisTag):
axis = Axis()
axis.axisTag = axisTag
return axis


@pytest.mark.parametrize(
"numRegions, varData, expectedNumVarData, expectedBytes",
[
(
5,
[
[10, 10, 0, 0, 20],
{3: 300},
],
1,
156,
),
(
5,
[
[10, 10, 0, 0, 20],
[10, 11, 0, 0, 20],
[10, 12, 0, 0, 20],
[10, 13, 0, 0, 20],
{3: 300},
],
1,
175,
),
(
5,
[
[10, 11, 0, 0, 20],
[10, 300, 0, 0, 20],
[10, 301, 0, 0, 20],
[10, 302, 0, 0, 20],
[10, 303, 0, 0, 20],
[10, 304, 0, 0, 20],
],
1,
180,
),
(
5,
[
[0, 11, 12, 0, 20],
[0, 13, 12, 0, 20],
[0, 14, 12, 0, 20],
[0, 15, 12, 0, 20],
[0, 16, 12, 0, 20],
[10, 300, 0, 0, 20],
[10, 301, 0, 0, 20],
[10, 302, 0, 0, 20],
[10, 303, 0, 0, 20],
[10, 304, 0, 0, 20],
],
2,
206,
),
],
)
def test_optimize(numRegions, varData, expectedNumVarData, expectedBytes):
locations = [{i: i / 16384.0} for i in range(numRegions)]
axisTags = sorted({k for loc in locations for k in loc})

model = VariationModel(locations)
builder = OnlineVarStoreBuilder(axisTags)
builder.setModel(model)

for data in varData:
if type(data) is dict:
newData = [0] * numRegions
for k, v in data.items():
newData[k] = v
data = newData

builder.storeMasters(data)

varStore = builder.finish()
mapping = varStore.optimize()

assert len(varStore.VarData) == expectedNumVarData

dummyFont = TTFont()

writer = XMLWriter(StringIO())
varStore.toXML(writer, dummyFont)
xml = writer.file.getvalue()

writer = OTTableWriter()
varStore.compile(writer, dummyFont)
data = writer.getAllData()

assert len(data) == expectedBytes, xml