Skip to content

Commit

Permalink
Merge pull request #3124 from fonttools/varStore-optimize-fix
Browse files Browse the repository at this point in the history
[varStore] Improve optimize algorithm
  • Loading branch information
behdad committed May 24, 2023
2 parents 55003d8 + abe2a37 commit 22c76c4
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 6 deletions.
79 changes: 73 additions & 6 deletions Lib/fontTools/varLib/varStore.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ def interpolateFromDeltas(self, varDataIndex, deltas):
def VarStore_subset_varidxes(
self, varIdxes, optimize=True, retainFirstMap=False, advIdxes=set()
):

# Sort out used varIdxes by major/minor.
used = {}
for varIdx in varIdxes:
Expand Down Expand Up @@ -407,7 +406,7 @@ def _popcount(n):
def _characteristic_overhead(chars):
"""Returns overhead in bytes of encoding this characteristic
as a VarData."""
c = 6
c = 4 + 6 # 4 bytes for LOffset, 6 bytes for VarData header
while chars:
if chars & 0b1111:
c += 2
Expand All @@ -423,6 +422,8 @@ def _find_yourself_best_new_encoding(self, done_by_width):
else:
new_encoding = None
self.best_new_encoding = new_encoding
if new_encoding:
break


class _EncodingDict(dict):
Expand Down Expand Up @@ -468,6 +469,68 @@ def _row_characteristics(row):
def VarStore_optimize(self, use_NO_VARIATION_INDEX=True):
"""Optimize storage. Returns mapping from old VarIdxes to new ones."""

# Overview:
#
# For each VarData row, we first extend it with zeroes to have
# one column per region in VarRegionList. We then group the
# rows into _Encoding objects, by their "characteristic" bitmap.
# The characteristic bitmap is a binary number representing how
# many bytes each column of the data takes up to encode. Each
# column is encoded in four bits. For example, if a column has
# only values in the range -128..127, it would only have a single
# bit set in the characteristic bitmap for that column. If it has
# values in the range -32768..32767, it would have two bits set.
# The number of ones in the characteristic bitmap is the "width"
# of the encoding.
#
# Each encoding as such has a number of "active" (ie. non-zero)
# columns. The overhead of encoding the characteristic bitmap
# is 10 bytes, plus 2 bytes per active column.
#
# When an encoding is merged into another one, if the characteristic
# of the old encoding is a subset of the new one, then the overhead
# of the old encoding is completely eliminated. However, each row
# now would require more bytes to encode, to the tune of one byte
# per characteristic bit that is active in the new encoding but not
# in the old one. The number of bits that can be added to an encoding
# while still beneficial to merge it into another encoding is called
# the "room" for that encoding.
#
# The "gain" of an encodings is the maximum number of bytes we can
# save by merging it into another encoding. The "gain" of merging
# two encodings is how many bytes we save by doing so.
#
# High-level algorithm:
#
# - Each encoding has a minimal way to encode it. However, because
# of the overhead of encoding the characteristic bitmap, it may
# be beneficial to merge two encodings together, if there is
# gain in doing so. As such, we need to search for the best
# such successive merges.
#
# Algorithm:
#
# - For any encoding that has zero gain, encode it as is and put
# it in the "done" list. Put the remaining encodings into the
# "todo" list.
# - For each encoding in the todo list, find the encoding in the
# done list that has the highest gain when merged into it; call
# this the "best new encoding".
# - Sort todo list by encoding room.
# - While todo list is not empty:
# - Pop the first item from todo list, as current item.
# - For each each encoding in the todo list, try combining it
# with the current item. Calculate total gain as the gain of
# this combined encoding minus the gain of combining each of
# the two items with their best new encoding, if any.
# - If the total gain is positive and better than any previously
# remembered match, remember this as new match.
# - If a match was found, combine the two items and put them
# back in the todo list. Otherwise, if the current item's
# best new encoding is not None, combine current item with
# its best new encoding. Otherwise encode the current item
# by itself and put it in the done list.

# TODO
# Check that no two VarRegions are the same; if they are, fold them.

Expand All @@ -483,7 +546,6 @@ def VarStore_optimize(self, use_NO_VARIATION_INDEX=True):
regionIndices = data.VarRegionIndex

for minor, item in enumerate(data.Item):

row = list(zeroes)
for regionIdx, v in zip(regionIndices, item):
row[regionIdx] += v
Expand Down Expand Up @@ -553,14 +615,19 @@ def VarStore_optimize(self, use_NO_VARIATION_INDEX=True):
)
separate_gain = this_gain + other_gain

if combined_gain > separate_gain:
if combined_gain - separate_gain > best_gain:
best_idx = i
best_gain = combined_gain - separate_gain

if best_idx is None:
# Encoding is decided as is
done_by_width[encoding.width].append(encoding)
if encoding.best_new_encoding is None:
# Encoding is decided as is
done_by_width[encoding.width].append(encoding)
else:
# Merge with its best new encoding
encoding.best_new_encoding.extend(encoding.items)
else:
# Combine the two encodings
other_encoding = todo[best_idx]
combined_chars = other_encoding.chars | encoding.chars
combined_encoding = _Encoding(combined_chars)
Expand Down
93 changes: 93 additions & 0 deletions Tests/varLib/varStore_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import pytest
from io import StringIO
from fontTools.misc.xmlWriter import XMLWriter
from fontTools.varLib.models import VariationModel
from fontTools.varLib.varStore import OnlineVarStoreBuilder, VarStoreInstancer
from fontTools.ttLib import TTFont, newTable
Expand Down Expand Up @@ -80,3 +82,94 @@ def buildAxis(axisTag):
axis = Axis()
axis.axisTag = axisTag
return axis


@pytest.mark.parametrize(
"numRegions, varData, expectedNumVarData, expectedBytes",
[
(
5,
[
[10, 10, 0, 0, 20],
{3: 300},
],
1,
156,
),
(
5,
[
[10, 10, 0, 0, 20],
[10, 11, 0, 0, 20],
[10, 12, 0, 0, 20],
[10, 13, 0, 0, 20],
{3: 300},
],
1,
175,
),
(
5,
[
[10, 11, 0, 0, 20],
[10, 300, 0, 0, 20],
[10, 301, 0, 0, 20],
[10, 302, 0, 0, 20],
[10, 303, 0, 0, 20],
[10, 304, 0, 0, 20],
],
1,
180,
),
(
5,
[
[0, 11, 12, 0, 20],
[0, 13, 12, 0, 20],
[0, 14, 12, 0, 20],
[0, 15, 12, 0, 20],
[0, 16, 12, 0, 20],
[10, 300, 0, 0, 20],
[10, 301, 0, 0, 20],
[10, 302, 0, 0, 20],
[10, 303, 0, 0, 20],
[10, 304, 0, 0, 20],
],
2,
206,
),
],
)
def test_optimize(numRegions, varData, expectedNumVarData, expectedBytes):
locations = [{i: i / 16384.0} for i in range(numRegions)]
axisTags = sorted({k for loc in locations for k in loc})

model = VariationModel(locations)
builder = OnlineVarStoreBuilder(axisTags)
builder.setModel(model)

for data in varData:
if type(data) is dict:
newData = [0] * numRegions
for k, v in data.items():
newData[k] = v
data = newData

builder.storeMasters(data)

varStore = builder.finish()
mapping = varStore.optimize()

assert len(varStore.VarData) == expectedNumVarData

dummyFont = TTFont()

writer = XMLWriter(StringIO())
varStore.toXML(writer, dummyFont)
xml = writer.file.getvalue()

writer = OTTableWriter()
varStore.compile(writer, dummyFont)
data = writer.getAllData()

assert len(data) == expectedBytes, xml

0 comments on commit 22c76c4

Please sign in to comment.