Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: foliojs/linebreak
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v0.3.0
Choose a base ref
...
head repository: foliojs/linebreak
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: v1.0.0
Choose a head ref
  • 5 commits
  • 12 files changed
  • 3 contributors

Commits on Sep 8, 2016

  1. Copy the full SHA
    0967d53 View commit details

Commits on Sep 9, 2016

  1. Merge pull request #2 from martinburch/patch-1

    last variable must be updated within the loop
    devongovett authored Sep 9, 2016
    Copy the full SHA
    23aa5e8 View commit details

Commits on Jun 17, 2019

  1. Convert to ES6 (#13)

    blikblum authored and devongovett committed Jun 17, 2019
    Copy the full SHA
    390b039 View commit details
  2. Copy the full SHA
    97f1851 View commit details
  3. 1.0.0

    devongovett committed Jun 17, 2019
    Copy the full SHA
    22794cc View commit details
Showing with 317 additions and 277 deletions.
  1. +3 −6 package.json
  2. +4 −2 readme.md
  3. +0 −43 src/classes.coffee
  4. +44 −0 src/classes.js
  5. +0 −42 src/generate_data.coffee
  6. +48 −0 src/generate_data.js
  7. +0 −101 src/linebreaker.coffee
  8. +130 −0 src/linebreaker.js
  9. +36 −35 src/{pairs.coffee → pairs.js}
  10. +0 −47 test/index.coffee
  11. +52 −0 test/index.js
  12. +0 −1 test/mocha.opts
9 changes: 3 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "linebreak",
"version": "0.3.0",
"version": "1.0.0",
"description": "An implementation of the Unicode Line Breaking Algorithm (UAX #14)",
"repository": {
"type": "git",
@@ -23,13 +23,10 @@
"unicode-trie": "^0.3.0"
},
"devDependencies": {
"coffee-script": "^1.7.1",
"mocha": "*"
"mocha": "^6.0.2"
},
"scripts": {
"prepublish": "coffee -c src/",
"postpublish": "rm -rf src/*.js",
"test": "node_modules/mocha/bin/mocha"
"test": "mocha"
},
"main": "src/linebreaker",
"browserify": {
6 changes: 4 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@ An implementation of the Unicode Line Breaking Algorithm (UAX #14)
> The selection of actual line break positions from the set of break opportunities is not covered by the Unicode Line Breaking Algorithm,
> but is in the domain of higher level software with knowledge of the available width and the display size of the text.
This is a JavaScript/CoffeeScript implementation of the
This is a JavaScript implementation of the
[Unicode Line Breaking Algorithm](http://www.unicode.org/reports/tr14/#SampleCode) for Node.js
(and browsers I guess). It is used by [PDFKit](http://github.com/devongovett/pdfkit/) for
line wrapping text in PDF documents, but since the algorithm knows nothing about the actual
@@ -38,6 +38,8 @@ while (bk = breaker.nextBreak()) {
if (bk.required) {
console.log('\n\n');
}

last = bk.position;
}
```

@@ -58,4 +60,4 @@ contributing or fixing bugs, these things might be of interest.

## License

MIT
MIT
43 changes: 0 additions & 43 deletions src/classes.coffee

This file was deleted.

44 changes: 44 additions & 0 deletions src/classes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// The following break classes are handled by the pair table

exports.OP = 0; // Opening punctuation
exports.CL = 1; // Closing punctuation
exports.CP = 2; // Closing parenthesis
exports.QU = 3; // Ambiguous quotation
exports.GL = 4; // Glue
exports.NS = 5; // Non-starters
exports.EX = 6; // Exclamation/Interrogation
exports.SY = 7; // Symbols allowing break after
exports.IS = 8; // Infix separator
exports.PR = 9; // Prefix
exports.PO = 10; // Postfix
exports.NU = 11; // Numeric
exports.AL = 12; // Alphabetic
exports.HL = 13; // Hebrew Letter
exports.ID = 14; // Ideographic
exports.IN = 15; // Inseparable characters
exports.HY = 16; // Hyphen
exports.BA = 17; // Break after
exports.BB = 18; // Break before
exports.B2 = 19; // Break on either side (but not pair)
exports.ZW = 20; // Zero-width space
exports.CM = 21; // Combining marks
exports.WJ = 22; // Word joiner
exports.H2 = 23; // Hangul LV
exports.H3 = 24; // Hangul LVT
exports.JL = 25; // Hangul L Jamo
exports.JV = 26; // Hangul V Jamo
exports.JT = 27; // Hangul T Jamo
exports.RI = 28; // Regional Indicator

// The following break classes are not handled by the pair table
exports.AI = 29; // Ambiguous (Alphabetic or Ideograph)
exports.BK = 30; // Break (mandatory)
exports.CB = 31; // Contingent break
exports.CJ = 32; // Conditional Japanese Starter
exports.CR = 33; // Carriage return
exports.LF = 34; // Line feed
exports.NL = 35; // Next line
exports.SA = 36; // South-East Asian
exports.SG = 37; // Surrogates
exports.SP = 38; // Space
exports.XX = 39; // Unknown
42 changes: 0 additions & 42 deletions src/generate_data.coffee

This file was deleted.

48 changes: 48 additions & 0 deletions src/generate_data.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
const fs = require('fs');
const request = require('request');
const classes = require('./classes');
const UnicodeTrieBuilder = require('unicode-trie/builder');

// this loads the LineBreak.txt file for Unicode and parses it to
// combine ranges and generate JavaScript
request('http://www.unicode.org/Public/7.0.0/ucd/LineBreak.txt', function(err, res, data) {
const matches = data.match(/^[0-9A-F]+(\.\.[0-9A-F]+)?;[A-Z][A-Z0-9]/gm);

let start = null;
let end = null;
let type = null;
const trie = new UnicodeTrieBuilder(classes.XX);

// collect entries in the linebreaking table into ranges
// to keep things smaller.
for (let match of matches) {
var rangeEnd, rangeType;
match = match.split(/;|\.\./);
const rangeStart = match[0];

if (match.length === 3) {
rangeEnd = match[1];
rangeType = match[2];
} else {
rangeEnd = rangeStart;
rangeType = match[1];
}

if ((type != null) && (rangeType !== type)) {
trie.setRange(parseInt(start, 16), parseInt(end, 16), classes[type], true);
type = null;
}

if ((type == null)) {
start = rangeStart;
type = rangeType;
}

end = rangeEnd;
}

trie.setRange(parseInt(start, 16), parseInt(end, 16), classes[type], true);

// write the trie to a file
fs.writeFile(__dirname + '/classes.trie', trie.toBuffer());
});
101 changes: 0 additions & 101 deletions src/linebreaker.coffee

This file was deleted.

130 changes: 130 additions & 0 deletions src/linebreaker.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
let AI, AL, BA, BK, CB, CJ, CR, ID, LF, NL, NS, SA, SG, SP, WJ, XX;
const UnicodeTrie = require('unicode-trie');
const fs = require('fs');
const base64 = require('base64-js');
({BK, CR, LF, NL, CB, BA, SP, WJ, SP, BK, LF, NL, AI, AL, SA, SG, XX, CJ, ID, NS} = require('./classes'));
const {DI_BRK, IN_BRK, CI_BRK, CP_BRK, PR_BRK, pairTable} = require('./pairs');

const data = base64.toByteArray(fs.readFileSync(__dirname + '/classes.trie', 'base64'));
const classTrie = new UnicodeTrie(data);

const mapClass = function(c) {
switch (c) {
case AI: return AL;
case SA: case SG: case XX: return AL;
case CJ: return NS;
default: return c;
}
};

const mapFirst = function(c) {
switch (c) {
case LF: case NL: return BK;
case CB: return BA;
case SP: return WJ;
default: return c;
}
};

class Break {
constructor(position, required = false) {
this.position = position;
this.required = required;
}
};

class LineBreaker {
constructor(string) {
this.string = string;
this.pos = 0;
this.lastPos = 0;
this.curClass = null;
this.nextClass = null;
}

nextCodePoint() {
const code = this.string.charCodeAt(this.pos++);
const next = this.string.charCodeAt(this.pos);

// If a surrogate pair
if ((0xd800 <= code && code <= 0xdbff) && (0xdc00 <= next && next <= 0xdfff)) {
this.pos++;
return ((code - 0xd800) * 0x400) + (next - 0xdc00) + 0x10000;
}

return code;
}

nextCharClass() {
return mapClass(classTrie.get(this.nextCodePoint()));
}

nextBreak() {
// get the first char if we're at the beginning of the string
if (this.curClass == null) { this.curClass = mapFirst(this.nextCharClass()); }

while (this.pos < this.string.length) {
this.lastPos = this.pos;
const lastClass = this.nextClass;
this.nextClass = this.nextCharClass();

// explicit newline
if ((this.curClass === BK) || ((this.curClass === CR) && (this.nextClass !== LF))) {
this.curClass = mapFirst(mapClass(this.nextClass));
return new Break(this.lastPos, true);
}

// handle classes not handled by the pair table
let cur
switch (this.nextClass) {
case SP: cur = this.curClass; break;
case BK: case LF: case NL: cur = BK; break;
case CR: cur = CR; break;
case CB: cur = BA; break;
}

if (cur != null) {
this.curClass = cur;
if (this.nextClass === CB) { return new Break(this.lastPos); }
continue;
}

// if not handled already, use the pair table
let shouldBreak = false;
switch (pairTable[this.curClass][this.nextClass]) {
case DI_BRK: // Direct break
shouldBreak = true;
break;

case IN_BRK: // possible indirect break
shouldBreak = lastClass === SP;
break;

case CI_BRK:
shouldBreak = lastClass === SP;
if (!shouldBreak) { continue; }
break;

case CP_BRK: // prohibited for combining marks
if (lastClass !== SP) { continue; }
break;
}

this.curClass = this.nextClass;
if (shouldBreak) {
return new Break(this.lastPos);
}
}

if (this.pos >= this.string.length) {
if (this.lastPos < this.string.length) {
this.lastPos = this.string.length;
return new Break(this.string.length);
} else {
return null;
}
}
}
};

module.exports = LineBreaker;
71 changes: 36 additions & 35 deletions src/pairs.coffee → src/pairs.js
47 changes: 0 additions & 47 deletions test/index.coffee

This file was deleted.

52 changes: 52 additions & 0 deletions test/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
const fs = require('fs');
const punycode = require('punycode');
const LineBreaker = require('../');
const assert = require('assert');

describe('unicode line break tests', function() {
// these tests are weird, possibly incorrect or just tailored differently. we skip them.
const skip = [812, 814, 848, 850, 864, 866, 900, 902, 956, 958, 1068, 1070, 1072, 1074, 1224, 1226,
1228, 1230, 1760, 1762, 2932, 2934, 4100, 4101, 4102, 4103, 4340, 4342, 4496, 4498, 4568, 4570,
4704, 4706, 4707, 4708, 4710, 4711, 4712, 4714, 4715, 4716, 4718, 4719, 4722, 4723, 4726, 4727,
4730, 4731, 4734, 4735, 4736, 4738, 4739, 4742, 4743, 4746, 4747, 4748, 4750, 4751, 4752, 4754,
4755, 4756, 4758, 4759, 4760, 4762, 4763, 4764, 4766, 4767, 4768, 4770, 4771, 4772, 4774, 4775,
4778, 4779, 4780, 4782, 4783, 4784, 4786, 4787, 4788, 4790, 4791, 4794, 4795, 4798, 4799, 4800,
4802, 4803, 4804, 4806, 4807, 4808, 4810, 4811, 4812, 4814, 4815, 4816, 4818, 4819, 4820, 4822,
4823, 4826, 4827, 4830, 4831, 4834, 4835, 4838, 4839, 4840, 4842, 4843, 4844, 4846, 4847, 4848,
4850, 4851, 4852, 4854, 4855, 4856, 4858, 4859, 4960, 4962, 5036, 5038, 6126, 6135, 6140, 6225,
6226, 6227, 6228, 6229, 6230, 6232, 6233, 6234, 6235, 6236, 6332];

const data = fs.readFileSync(__dirname + '/LineBreakTest.txt', 'utf8');
const lines = data.split('\n');

return lines.forEach(function(line, i) {
let bk;
if (!line || /^#/.test(line)) { return; }

const [cols, comment] = line.split('#');
const codePoints = cols.split(/\s*[×÷]\s*/).slice(1, -1).map(c => parseInt(c, 16));
const str = punycode.ucs2.encode(codePoints);

const breaker = new LineBreaker(str);
const breaks = [];
let last = 0;
while ((bk = breaker.nextBreak())) {
breaks.push(str.slice(last, bk.position));
last = bk.position;
}

const expected = cols.split(/\s*÷\s*/).slice(0, -1).map(function(c) {
let codes = c.split(/\s*×\s*/);
if (codes[0] === '') { codes.shift(); }
codes = codes.map(c => parseInt(c, 16));
return punycode.ucs2.encode(codes);
});

if (skip.includes(i)) {
it.skip(cols, function() {});
return;
}

it(cols, () => assert.deepEqual(breaks, expected, i + ' ' + JSON.stringify(breaks) + ' != ' + JSON.stringify(expected) + ' #' + comment));
});
});
1 change: 0 additions & 1 deletion test/mocha.opts
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
--compilers coffee:coffee-script/register
--reporter landing