Skip to content

Commit

Permalink
fix #546: Support complex entity value
Browse files Browse the repository at this point in the history
  • Loading branch information
amitguptagwl committed Feb 26, 2023
1 parent a874ce7 commit a4bdced
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 71 deletions.
46 changes: 46 additions & 0 deletions spec/entities_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -532,4 +532,50 @@ describe("XMLParser External Entites", function() {

expect(result).toEqual(expected);
});

fit("should support entites with tags in content", function() {
const xmlData = `
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" [
<!ENTITY Smile "
<rect x='.5' y='.5' width='29' height='39' fill='black' stroke='red'/>
<g transform='translate(0, 5)'>
<circle cx='15' cy='15' r='10' fill='yellow'/>
<circle cx='12' cy='12' r='1.5' fill='black'/>
<circle cx='17' cy='12' r='1.5' fill='black'/>
<path d='M 10 19 L 15 23 20 19' stroke='black' stroke-width='2'/></g>"
>
]>
<svg width="850px" height="700px" version="1.1" xmlns="http://www.w3.org/2000/svg">
<g transform="matrix(16,0,0,16,0,0)">&Smile;</g></svg> `;

const expected = {
"?xml": {
"version": "1.0",
"encoding": "utf-8"
},
"svg": {
"g": {
"#text": " \n \t<rect x='.5' y='.5' width='29' height='39' fill='black' stroke='red'/>\n\t\t<g transform='translate(0, 5)'> \n\t\t\t<circle cx='15' cy='15' r='10' fill='yellow'/>\n\t\t\t<circle cx='12' cy='12' r='1.5' fill='black'/>\n\t\t\t<circle cx='17' cy='12' r='1.5' fill='black'/>\n\t\t\t<path d='M 10 19 L 15 23 20 19' stroke='black' stroke-width='2'/></g>",
"transform": "matrix(16,0,0,16,0,0)"
},
"width": "850px",
"height": "700px",
"version": "1.1",
"xmlns": "http://www.w3.org/2000/svg"
}
};

const options = {
attributeNamePrefix: "",
ignoreAttributes: false,
processEntities: true,
// preserveOrder: true
};
const parser = new XMLParser(options);
let result = parser.parse(xmlData);
// console.log(JSON.stringify(result,null,4));

expect(result).toEqual(expected);
});
});
167 changes: 96 additions & 71 deletions src/xmlparser/DocTypeReader.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,80 +11,34 @@ function readDocType(xmlData, i){
{
i = i+9;
let angleBracketsCount = 1;
let hasBody = false, entity = false, comment = false;
let hasBody = false, comment = false;
let exp = "";
for(;i<xmlData.length;i++){
if (xmlData[i] === '<' && !comment) {
if( hasBody &&
xmlData[i+1] === '!' &&
xmlData[i+2] === 'E' &&
xmlData[i+3] === 'N' &&
xmlData[i+4] === 'T' &&
xmlData[i+5] === 'I' &&
xmlData[i+6] === 'T' &&
xmlData[i+7] === 'Y'
){
i += 7;
entity = true;
}else if( hasBody &&
xmlData[i+1] === '!' &&
xmlData[i+2] === 'E' &&
xmlData[i+3] === 'L' &&
xmlData[i+4] === 'E' &&
xmlData[i+5] === 'M' &&
xmlData[i+6] === 'E' &&
xmlData[i+7] === 'N' &&
xmlData[i+8] === 'T'
){
//Not supported
i += 8;
}else if( hasBody &&
xmlData[i+1] === '!' &&
xmlData[i+2] === 'A' &&
xmlData[i+3] === 'T' &&
xmlData[i+4] === 'T' &&
xmlData[i+5] === 'L' &&
xmlData[i+6] === 'I' &&
xmlData[i+7] === 'S' &&
xmlData[i+8] === 'T'
){
//Not supported
i += 8;
}else if( hasBody &&
xmlData[i+1] === '!' &&
xmlData[i+2] === 'N' &&
xmlData[i+3] === 'O' &&
xmlData[i+4] === 'T' &&
xmlData[i+5] === 'A' &&
xmlData[i+6] === 'T' &&
xmlData[i+7] === 'I' &&
xmlData[i+8] === 'O' &&
xmlData[i+9] === 'N'
){
//Not supported
i += 9;
}else if( //comment
xmlData[i+1] === '!' &&
xmlData[i+2] === '-' &&
xmlData[i+3] === '-'
){
comment = true;
}else{
throw new Error("Invalid DOCTYPE");
if (xmlData[i] === '<' && !comment) { //Determine the tag type
if( hasBody && isEntity(xmlData, i)){
i += 7;
[entityName, val,i] = readEntityExp(xmlData,i+1);
if(val.indexOf("&") === -1) //Parameter entities are not supported
entities[ entityName ] = {
regx : RegExp( `&${entityName};`,"g"),
val: val
};
}
else if( hasBody && isElement(xmlData, i)) i += 8;//Not supported
else if( hasBody && isAttlist(xmlData, i)) i += 8;//Not supported
else if( hasBody && isNotation(xmlData, i)) i += 9;//Not supported
else if( isComment) comment = true;
else throw new Error("Invalid DOCTYPE");

angleBracketsCount++;
exp = "";
} else if (xmlData[i] === '>') {
} else if (xmlData[i] === '>') { //Read tag content
if(comment){
if( xmlData[i - 1] === "-" && xmlData[i - 2] === "-"){
comment = false;
angleBracketsCount--;
}
}else{
if(entity) {
parseEntityExp(exp, entities);
entity = false;
}
angleBracketsCount--;
}
if (angleBracketsCount === 0) {
Expand All @@ -105,14 +59,85 @@ function readDocType(xmlData, i){
return {entities, i};
}

const entityRegex = RegExp("^\\s([a-zA-z0-0]+)[ \t](['\"])([^&]+)\\2");
function parseEntityExp(exp, entities){
const match = entityRegex.exec(exp);
if(match){
entities[ match[1] ] = {
regx : RegExp( `&${match[1]};`,"g"),
val: match[3]
};
function readEntityExp(xmlData,i){
//External entities are not supported
// <!ENTITY ext SYSTEM "http://normal-website.com" >

//Parameter entities are not supported
// <!ENTITY entityname "&anotherElement;">

//Internal entities are supported
// <!ENTITY entityname "replacement text">

//read EntityName
let entityName = "";
for (; i < xmlData.length && (xmlData[i] !== "'" && xmlData[i] !== '"' ); i++) {
// if(xmlData[i] === " ") continue;
// else
entityName += xmlData[i];
}
entityName = entityName.trim();
if(entityName.indexOf(" ") !== -1) throw new Error("External entites are not supported");

//read Entity Value
const startChar = xmlData[i++];
let val = ""
for (; i < xmlData.length && xmlData[i] !== startChar ; i++) {
val += xmlData[i];
}
return [entityName, val, i];
}

function isComment(xmlData, i){
if(xmlData[i+1] === '!' &&
xmlData[i+2] === '-' &&
xmlData[i+3] === '-') return true
return false
}
function isEntity(xmlData, i){
if(xmlData[i+1] === '!' &&
xmlData[i+2] === 'E' &&
xmlData[i+3] === 'N' &&
xmlData[i+4] === 'T' &&
xmlData[i+5] === 'I' &&
xmlData[i+6] === 'T' &&
xmlData[i+7] === 'Y') return true
return false
}
function isElement(xmlData, i){
if(xmlData[i+1] === '!' &&
xmlData[i+2] === 'E' &&
xmlData[i+3] === 'L' &&
xmlData[i+4] === 'E' &&
xmlData[i+5] === 'M' &&
xmlData[i+6] === 'E' &&
xmlData[i+7] === 'N' &&
xmlData[i+8] === 'T') return true
return false
}

function isAttlist(xmlData, i){
if(xmlData[i+1] === '!' &&
xmlData[i+2] === 'A' &&
xmlData[i+3] === 'T' &&
xmlData[i+4] === 'T' &&
xmlData[i+5] === 'L' &&
xmlData[i+6] === 'I' &&
xmlData[i+7] === 'S' &&
xmlData[i+8] === 'T') return true
return false
}
function isNotation(xmlData, i){
if(xmlData[i+1] === '!' &&
xmlData[i+2] === 'N' &&
xmlData[i+3] === 'O' &&
xmlData[i+4] === 'T' &&
xmlData[i+5] === 'A' &&
xmlData[i+6] === 'T' &&
xmlData[i+7] === 'I' &&
xmlData[i+8] === 'O' &&
xmlData[i+9] === 'N') return true
return false
}

module.exports = readDocType;

0 comments on commit a4bdced

Please sign in to comment.