diff --git a/apps/web-clipper/lib/JSDOMParser.js b/apps/web-clipper/lib/JSDOMParser.js deleted file mode 100644 index 7bfa2acf5..000000000 --- a/apps/web-clipper/lib/JSDOMParser.js +++ /dev/null @@ -1,1196 +0,0 @@ -/*eslint-env es6:false*/ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this file, - * You can obtain one at http://mozilla.org/MPL/2.0/. */ - -/** - * This is a relatively lightweight DOMParser that is safe to use in a web - * worker. This is far from a complete DOM implementation; however, it should - * contain the minimal set of functionality necessary for Readability.js. - * - * Aside from not implementing the full DOM API, there are other quirks to be - * aware of when using the JSDOMParser: - * - * 1) Properly formed HTML/XML must be used. This means you should be extra - * careful when using this parser on anything received directly from an - * XMLHttpRequest. Providing a serialized string from an XMLSerializer, - * however, should be safe (since the browser's XMLSerializer should - * generate valid HTML/XML). Therefore, if parsing a document from an XHR, - * the recommended approach is to do the XHR in the main thread, use - * XMLSerializer.serializeToString() on the responseXML, and pass the - * resulting string to the worker. - * - * 2) Live NodeLists are not supported. DOM methods and properties such as - * getElementsByTagName() and childNodes return standard arrays. If you - * want these lists to be updated when nodes are removed or added to the - * document, you must take care to manually update them yourself. - */ -(function (global) { - - // XML only defines these and the numeric ones: - - var entityTable = { - "lt": "<", - "gt": ">", - "amp": "&", - "quot": '"', - "apos": "'", - }; - - var reverseEntityTable = { - "<": "<", - ">": ">", - "&": "&", - '"': """, - "'": "'", - }; - - function encodeTextContentHTML(s) { - return s.replace(/[&<>]/g, function(x) { - return reverseEntityTable[x]; - }); - } - - function encodeHTML(s) { - return s.replace(/[&<>'"]/g, function(x) { - return reverseEntityTable[x]; - }); - } - - function decodeHTML(str) { - return str.replace(/&(quot|amp|apos|lt|gt);/g, function(match, tag) { - return entityTable[tag]; - }).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi, function(match, hex, numStr) { - var num = parseInt(hex || numStr, hex ? 16 : 10); // read num - return String.fromCharCode(num); - }); - } - - // When a style is set in JS, map it to the corresponding CSS attribute - var styleMap = { - "alignmentBaseline": "alignment-baseline", - "background": "background", - "backgroundAttachment": "background-attachment", - "backgroundClip": "background-clip", - "backgroundColor": "background-color", - "backgroundImage": "background-image", - "backgroundOrigin": "background-origin", - "backgroundPosition": "background-position", - "backgroundPositionX": "background-position-x", - "backgroundPositionY": "background-position-y", - "backgroundRepeat": "background-repeat", - "backgroundRepeatX": "background-repeat-x", - "backgroundRepeatY": "background-repeat-y", - "backgroundSize": "background-size", - "baselineShift": "baseline-shift", - "border": "border", - "borderBottom": "border-bottom", - "borderBottomColor": "border-bottom-color", - "borderBottomLeftRadius": "border-bottom-left-radius", - "borderBottomRightRadius": "border-bottom-right-radius", - "borderBottomStyle": "border-bottom-style", - "borderBottomWidth": "border-bottom-width", - "borderCollapse": "border-collapse", - "borderColor": "border-color", - "borderImage": "border-image", - "borderImageOutset": "border-image-outset", - "borderImageRepeat": "border-image-repeat", - "borderImageSlice": "border-image-slice", - "borderImageSource": "border-image-source", - "borderImageWidth": "border-image-width", - "borderLeft": "border-left", - "borderLeftColor": "border-left-color", - "borderLeftStyle": "border-left-style", - "borderLeftWidth": "border-left-width", - "borderRadius": "border-radius", - "borderRight": "border-right", - "borderRightColor": "border-right-color", - "borderRightStyle": "border-right-style", - "borderRightWidth": "border-right-width", - "borderSpacing": "border-spacing", - "borderStyle": "border-style", - "borderTop": "border-top", - "borderTopColor": "border-top-color", - "borderTopLeftRadius": "border-top-left-radius", - "borderTopRightRadius": "border-top-right-radius", - "borderTopStyle": "border-top-style", - "borderTopWidth": "border-top-width", - "borderWidth": "border-width", - "bottom": "bottom", - "boxShadow": "box-shadow", - "boxSizing": "box-sizing", - "captionSide": "caption-side", - "clear": "clear", - "clip": "clip", - "clipPath": "clip-path", - "clipRule": "clip-rule", - "color": "color", - "colorInterpolation": "color-interpolation", - "colorInterpolationFilters": "color-interpolation-filters", - "colorProfile": "color-profile", - "colorRendering": "color-rendering", - "content": "content", - "counterIncrement": "counter-increment", - "counterReset": "counter-reset", - "cursor": "cursor", - "direction": "direction", - "display": "display", - "dominantBaseline": "dominant-baseline", - "emptyCells": "empty-cells", - "enableBackground": "enable-background", - "fill": "fill", - "fillOpacity": "fill-opacity", - "fillRule": "fill-rule", - "filter": "filter", - "cssFloat": "float", - "floodColor": "flood-color", - "floodOpacity": "flood-opacity", - "font": "font", - "fontFamily": "font-family", - "fontSize": "font-size", - "fontStretch": "font-stretch", - "fontStyle": "font-style", - "fontVariant": "font-variant", - "fontWeight": "font-weight", - "glyphOrientationHorizontal": "glyph-orientation-horizontal", - "glyphOrientationVertical": "glyph-orientation-vertical", - "height": "height", - "imageRendering": "image-rendering", - "kerning": "kerning", - "left": "left", - "letterSpacing": "letter-spacing", - "lightingColor": "lighting-color", - "lineHeight": "line-height", - "listStyle": "list-style", - "listStyleImage": "list-style-image", - "listStylePosition": "list-style-position", - "listStyleType": "list-style-type", - "margin": "margin", - "marginBottom": "margin-bottom", - "marginLeft": "margin-left", - "marginRight": "margin-right", - "marginTop": "margin-top", - "marker": "marker", - "markerEnd": "marker-end", - "markerMid": "marker-mid", - "markerStart": "marker-start", - "mask": "mask", - "maxHeight": "max-height", - "maxWidth": "max-width", - "minHeight": "min-height", - "minWidth": "min-width", - "opacity": "opacity", - "orphans": "orphans", - "outline": "outline", - "outlineColor": "outline-color", - "outlineOffset": "outline-offset", - "outlineStyle": "outline-style", - "outlineWidth": "outline-width", - "overflow": "overflow", - "overflowX": "overflow-x", - "overflowY": "overflow-y", - "padding": "padding", - "paddingBottom": "padding-bottom", - "paddingLeft": "padding-left", - "paddingRight": "padding-right", - "paddingTop": "padding-top", - "page": "page", - "pageBreakAfter": "page-break-after", - "pageBreakBefore": "page-break-before", - "pageBreakInside": "page-break-inside", - "pointerEvents": "pointer-events", - "position": "position", - "quotes": "quotes", - "resize": "resize", - "right": "right", - "shapeRendering": "shape-rendering", - "size": "size", - "speak": "speak", - "src": "src", - "stopColor": "stop-color", - "stopOpacity": "stop-opacity", - "stroke": "stroke", - "strokeDasharray": "stroke-dasharray", - "strokeDashoffset": "stroke-dashoffset", - "strokeLinecap": "stroke-linecap", - "strokeLinejoin": "stroke-linejoin", - "strokeMiterlimit": "stroke-miterlimit", - "strokeOpacity": "stroke-opacity", - "strokeWidth": "stroke-width", - "tableLayout": "table-layout", - "textAlign": "text-align", - "textAnchor": "text-anchor", - "textDecoration": "text-decoration", - "textIndent": "text-indent", - "textLineThrough": "text-line-through", - "textLineThroughColor": "text-line-through-color", - "textLineThroughMode": "text-line-through-mode", - "textLineThroughStyle": "text-line-through-style", - "textLineThroughWidth": "text-line-through-width", - "textOverflow": "text-overflow", - "textOverline": "text-overline", - "textOverlineColor": "text-overline-color", - "textOverlineMode": "text-overline-mode", - "textOverlineStyle": "text-overline-style", - "textOverlineWidth": "text-overline-width", - "textRendering": "text-rendering", - "textShadow": "text-shadow", - "textTransform": "text-transform", - "textUnderline": "text-underline", - "textUnderlineColor": "text-underline-color", - "textUnderlineMode": "text-underline-mode", - "textUnderlineStyle": "text-underline-style", - "textUnderlineWidth": "text-underline-width", - "top": "top", - "unicodeBidi": "unicode-bidi", - "unicodeRange": "unicode-range", - "vectorEffect": "vector-effect", - "verticalAlign": "vertical-align", - "visibility": "visibility", - "whiteSpace": "white-space", - "widows": "widows", - "width": "width", - "wordBreak": "word-break", - "wordSpacing": "word-spacing", - "wordWrap": "word-wrap", - "writingMode": "writing-mode", - "zIndex": "z-index", - "zoom": "zoom", - }; - - // Elements that can be self-closing - var voidElems = { - "area": true, - "base": true, - "br": true, - "col": true, - "command": true, - "embed": true, - "hr": true, - "img": true, - "input": true, - "link": true, - "meta": true, - "param": true, - "source": true, - "wbr": true - }; - - var whitespace = [" ", "\t", "\n", "\r"]; - - // See https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType - var nodeTypes = { - ELEMENT_NODE: 1, - ATTRIBUTE_NODE: 2, - TEXT_NODE: 3, - CDATA_SECTION_NODE: 4, - ENTITY_REFERENCE_NODE: 5, - ENTITY_NODE: 6, - PROCESSING_INSTRUCTION_NODE: 7, - COMMENT_NODE: 8, - DOCUMENT_NODE: 9, - DOCUMENT_TYPE_NODE: 10, - DOCUMENT_FRAGMENT_NODE: 11, - NOTATION_NODE: 12 - }; - - function getElementsByTagName(tag) { - tag = tag.toUpperCase(); - var elems = []; - var allTags = (tag === "*"); - function getElems(node) { - var length = node.children.length; - for (var i = 0; i < length; i++) { - var child = node.children[i]; - if (allTags || (child.tagName === tag)) - elems.push(child); - getElems(child); - } - } - getElems(this); - elems._isLiveNodeList = true; - return elems; - } - - var Node = function () {}; - - Node.prototype = { - attributes: null, - childNodes: null, - localName: null, - nodeName: null, - parentNode: null, - textContent: null, - nextSibling: null, - previousSibling: null, - - get firstChild() { - return this.childNodes[0] || null; - }, - - get firstElementChild() { - return this.children[0] || null; - }, - - get lastChild() { - return this.childNodes[this.childNodes.length - 1] || null; - }, - - get lastElementChild() { - return this.children[this.children.length - 1] || null; - }, - - appendChild: function (child) { - if (child.parentNode) { - child.parentNode.removeChild(child); - } - - var last = this.lastChild; - if (last) - last.nextSibling = child; - child.previousSibling = last; - - if (child.nodeType === Node.ELEMENT_NODE) { - child.previousElementSibling = this.children[this.children.length - 1] || null; - this.children.push(child); - child.previousElementSibling && (child.previousElementSibling.nextElementSibling = child); - } - this.childNodes.push(child); - child.parentNode = this; - }, - - removeChild: function (child) { - var childNodes = this.childNodes; - var childIndex = childNodes.indexOf(child); - if (childIndex === -1) { - throw "removeChild: node not found"; - } else { - child.parentNode = null; - var prev = child.previousSibling; - var next = child.nextSibling; - if (prev) - prev.nextSibling = next; - if (next) - next.previousSibling = prev; - - if (child.nodeType === Node.ELEMENT_NODE) { - prev = child.previousElementSibling; - next = child.nextElementSibling; - if (prev) - prev.nextElementSibling = next; - if (next) - next.previousElementSibling = prev; - this.children.splice(this.children.indexOf(child), 1); - } - - child.previousSibling = child.nextSibling = null; - child.previousElementSibling = child.nextElementSibling = null; - - return childNodes.splice(childIndex, 1)[0]; - } - }, - - replaceChild: function (newNode, oldNode) { - var childNodes = this.childNodes; - var childIndex = childNodes.indexOf(oldNode); - if (childIndex === -1) { - throw "replaceChild: node not found"; - } else { - // This will take care of updating the new node if it was somewhere else before: - if (newNode.parentNode) - newNode.parentNode.removeChild(newNode); - - childNodes[childIndex] = newNode; - - // update the new node's sibling properties, and its new siblings' sibling properties - newNode.nextSibling = oldNode.nextSibling; - newNode.previousSibling = oldNode.previousSibling; - if (newNode.nextSibling) - newNode.nextSibling.previousSibling = newNode; - if (newNode.previousSibling) - newNode.previousSibling.nextSibling = newNode; - - newNode.parentNode = this; - - // Now deal with elements before we clear out those values for the old node, - // because it can help us take shortcuts here: - if (newNode.nodeType === Node.ELEMENT_NODE) { - if (oldNode.nodeType === Node.ELEMENT_NODE) { - // Both were elements, which makes this easier, we just swap things out: - newNode.previousElementSibling = oldNode.previousElementSibling; - newNode.nextElementSibling = oldNode.nextElementSibling; - if (newNode.previousElementSibling) - newNode.previousElementSibling.nextElementSibling = newNode; - if (newNode.nextElementSibling) - newNode.nextElementSibling.previousElementSibling = newNode; - this.children[this.children.indexOf(oldNode)] = newNode; - } else { - // Hard way: - newNode.previousElementSibling = (function() { - for (var i = childIndex - 1; i >= 0; i--) { - if (childNodes[i].nodeType === Node.ELEMENT_NODE) - return childNodes[i]; - } - return null; - })(); - if (newNode.previousElementSibling) { - newNode.nextElementSibling = newNode.previousElementSibling.nextElementSibling; - } else { - newNode.nextElementSibling = (function() { - for (var i = childIndex + 1; i < childNodes.length; i++) { - if (childNodes[i].nodeType === Node.ELEMENT_NODE) - return childNodes[i]; - } - return null; - })(); - } - if (newNode.previousElementSibling) - newNode.previousElementSibling.nextElementSibling = newNode; - if (newNode.nextElementSibling) - newNode.nextElementSibling.previousElementSibling = newNode; - - if (newNode.nextElementSibling) - this.children.splice(this.children.indexOf(newNode.nextElementSibling), 0, newNode); - else - this.children.push(newNode); - } - } else if (oldNode.nodeType === Node.ELEMENT_NODE) { - // new node is not an element node. - // if the old one was, update its element siblings: - if (oldNode.previousElementSibling) - oldNode.previousElementSibling.nextElementSibling = oldNode.nextElementSibling; - if (oldNode.nextElementSibling) - oldNode.nextElementSibling.previousElementSibling = oldNode.previousElementSibling; - this.children.splice(this.children.indexOf(oldNode), 1); - - // If the old node wasn't an element, neither the new nor the old node was an element, - // and the children array and its members shouldn't need any updating. - } - - - oldNode.parentNode = null; - oldNode.previousSibling = null; - oldNode.nextSibling = null; - if (oldNode.nodeType === Node.ELEMENT_NODE) { - oldNode.previousElementSibling = null; - oldNode.nextElementSibling = null; - } - return oldNode; - } - }, - - __JSDOMParser__: true, - }; - - for (var nodeType in nodeTypes) { - Node[nodeType] = Node.prototype[nodeType] = nodeTypes[nodeType]; - } - - var Attribute = function (name, value) { - this.name = name; - this._value = value; - }; - - Attribute.prototype = { - get value() { - return this._value; - }, - setValue: function(newValue) { - this._value = newValue; - }, - getEncodedValue: function() { - return encodeHTML(this._value); - }, - }; - - var Comment = function () { - this.childNodes = []; - }; - - Comment.prototype = { - __proto__: Node.prototype, - - nodeName: "#comment", - nodeType: Node.COMMENT_NODE - }; - - var Text = function () { - this.childNodes = []; - }; - - Text.prototype = { - __proto__: Node.prototype, - - nodeName: "#text", - nodeType: Node.TEXT_NODE, - get textContent() { - if (typeof this._textContent === "undefined") { - this._textContent = decodeHTML(this._innerHTML || ""); - } - return this._textContent; - }, - get innerHTML() { - if (typeof this._innerHTML === "undefined") { - this._innerHTML = encodeTextContentHTML(this._textContent || ""); - } - return this._innerHTML; - }, - - set innerHTML(newHTML) { - this._innerHTML = newHTML; - delete this._textContent; - }, - set textContent(newText) { - this._textContent = newText; - delete this._innerHTML; - }, - }; - - var Document = function (url) { - this.documentURI = url; - this.styleSheets = []; - this.childNodes = []; - this.children = []; - }; - - Document.prototype = { - __proto__: Node.prototype, - - nodeName: "#document", - nodeType: Node.DOCUMENT_NODE, - title: "", - - getElementsByTagName: getElementsByTagName, - - getElementById: function (id) { - function getElem(node) { - var length = node.children.length; - if (node.id === id) - return node; - for (var i = 0; i < length; i++) { - var el = getElem(node.children[i]); - if (el) - return el; - } - return null; - } - return getElem(this); - }, - - createElement: function (tag) { - var node = new Element(tag); - return node; - }, - - createTextNode: function (text) { - var node = new Text(); - node.textContent = text; - return node; - }, - - get baseURI() { - if (!this.hasOwnProperty("_baseURI")) { - this._baseURI = this.documentURI; - var baseElements = this.getElementsByTagName("base"); - var href = baseElements[0] && baseElements[0].getAttribute("href"); - if (href) { - try { - this._baseURI = (new URL(href, this._baseURI)).href; - } catch (ex) {/* Just fall back to documentURI */} - } - } - return this._baseURI; - }, - }; - - var Element = function (tag) { - // We use this to find the closing tag. - this._matchingTag = tag; - // We're explicitly a non-namespace aware parser, we just pretend it's all HTML. - var lastColonIndex = tag.lastIndexOf(":"); - if (lastColonIndex != -1) { - tag = tag.substring(lastColonIndex + 1); - } - this.attributes = []; - this.childNodes = []; - this.children = []; - this.nextElementSibling = this.previousElementSibling = null; - this.localName = tag.toLowerCase(); - this.tagName = tag.toUpperCase(); - this.style = new Style(this); - }; - - Element.prototype = { - __proto__: Node.prototype, - - nodeType: Node.ELEMENT_NODE, - - getElementsByTagName: getElementsByTagName, - - get className() { - return this.getAttribute("class") || ""; - }, - - set className(str) { - this.setAttribute("class", str); - }, - - get id() { - return this.getAttribute("id") || ""; - }, - - set id(str) { - this.setAttribute("id", str); - }, - - get href() { - return this.getAttribute("href") || ""; - }, - - set href(str) { - this.setAttribute("href", str); - }, - - get src() { - return this.getAttribute("src") || ""; - }, - - set src(str) { - this.setAttribute("src", str); - }, - - get srcset() { - return this.getAttribute("srcset") || ""; - }, - - set srcset(str) { - this.setAttribute("srcset", str); - }, - - get nodeName() { - return this.tagName; - }, - - get innerHTML() { - function getHTML(node) { - var i = 0; - for (i = 0; i < node.childNodes.length; i++) { - var child = node.childNodes[i]; - if (child.localName) { - arr.push("<" + child.localName); - - // serialize attribute list - for (var j = 0; j < child.attributes.length; j++) { - var attr = child.attributes[j]; - // the attribute value will be HTML escaped. - var val = attr.getEncodedValue(); - var quote = (val.indexOf('"') === -1 ? '"' : "'"); - arr.push(" " + attr.name + "=" + quote + val + quote); - } - - if (child.localName in voidElems && !child.childNodes.length) { - // if this is a self-closing element, end it here - arr.push("/>"); - } else { - // otherwise, add its children - arr.push(">"); - getHTML(child); - arr.push(""); - } - } else { - // This is a text node, so asking for innerHTML won't recurse. - arr.push(child.innerHTML); - } - } - } - - // Using Array.join() avoids the overhead from lazy string concatenation. - var arr = []; - getHTML(this); - return arr.join(""); - }, - - set innerHTML(html) { - var parser = new JSDOMParser(); - var node = parser.parse(html); - var i; - for (i = this.childNodes.length; --i >= 0;) { - this.childNodes[i].parentNode = null; - } - this.childNodes = node.childNodes; - this.children = node.children; - for (i = this.childNodes.length; --i >= 0;) { - this.childNodes[i].parentNode = this; - } - }, - - set textContent(text) { - // clear parentNodes for existing children - for (var i = this.childNodes.length; --i >= 0;) { - this.childNodes[i].parentNode = null; - } - - var node = new Text(); - this.childNodes = [ node ]; - this.children = []; - node.textContent = text; - node.parentNode = this; - }, - - get textContent() { - function getText(node) { - var nodes = node.childNodes; - for (var i = 0; i < nodes.length; i++) { - var child = nodes[i]; - if (child.nodeType === 3) { - text.push(child.textContent); - } else { - getText(child); - } - } - } - - // Using Array.join() avoids the overhead from lazy string concatenation. - // See http://blog.cdleary.com/2012/01/string-representation-in-spidermonkey/#ropes - var text = []; - getText(this); - return text.join(""); - }, - - getAttribute: function (name) { - for (var i = this.attributes.length; --i >= 0;) { - var attr = this.attributes[i]; - if (attr.name === name) { - return attr.value; - } - } - return undefined; - }, - - setAttribute: function (name, value) { - for (var i = this.attributes.length; --i >= 0;) { - var attr = this.attributes[i]; - if (attr.name === name) { - attr.setValue(value); - return; - } - } - this.attributes.push(new Attribute(name, value)); - }, - - removeAttribute: function (name) { - for (var i = this.attributes.length; --i >= 0;) { - var attr = this.attributes[i]; - if (attr.name === name) { - this.attributes.splice(i, 1); - break; - } - } - }, - - hasAttribute: function (name) { - return this.attributes.some(function (attr) { - return attr.name == name; - }); - }, - }; - - var Style = function (node) { - this.node = node; - }; - - // getStyle() and setStyle() use the style attribute string directly. This - // won't be very efficient if there are a lot of style manipulations, but - // it's the easiest way to make sure the style attribute string and the JS - // style property stay in sync. Readability.js doesn't do many style - // manipulations, so this should be okay. - Style.prototype = { - getStyle: function (styleName) { - var attr = this.node.getAttribute("style"); - if (!attr) - return undefined; - - var styles = attr.split(";"); - for (var i = 0; i < styles.length; i++) { - var style = styles[i].split(":"); - var name = style[0].trim(); - if (name === styleName) - return style[1].trim(); - } - - return undefined; - }, - - setStyle: function (styleName, styleValue) { - var value = this.node.getAttribute("style") || ""; - var index = 0; - do { - var next = value.indexOf(";", index) + 1; - var length = next - index - 1; - var style = (length > 0 ? value.substr(index, length) : value.substr(index)); - if (style.substr(0, style.indexOf(":")).trim() === styleName) { - value = value.substr(0, index).trim() + (next ? " " + value.substr(next).trim() : ""); - break; - } - index = next; - } while (index); - - value += " " + styleName + ": " + styleValue + ";"; - this.node.setAttribute("style", value.trim()); - } - }; - - // For each item in styleMap, define a getter and setter on the style - // property. - for (var jsName in styleMap) { - (function (cssName) { - Style.prototype.__defineGetter__(jsName, function () { - return this.getStyle(cssName); - }); - Style.prototype.__defineSetter__(jsName, function (value) { - this.setStyle(cssName, value); - }); - })(styleMap[jsName]); - } - - var JSDOMParser = function () { - this.currentChar = 0; - - // In makeElementNode() we build up many strings one char at a time. Using - // += for this results in lots of short-lived intermediate strings. It's - // better to build an array of single-char strings and then join() them - // together at the end. And reusing a single array (i.e. |this.strBuf|) - // over and over for this purpose uses less memory than using a new array - // for each string. - this.strBuf = []; - - // Similarly, we reuse this array to return the two arguments from - // makeElementNode(), which saves us from having to allocate a new array - // every time. - this.retPair = []; - - this.errorState = ""; - }; - - JSDOMParser.prototype = { - error: function(m) { - if (typeof dump !== "undefined") { - dump("JSDOMParser error: " + m + "\n"); - } else if (typeof console !== "undefined") { - console.log("JSDOMParser error: " + m + "\n"); - } - this.errorState += m + "\n"; - }, - - /** - * Look at the next character without advancing the index. - */ - peekNext: function () { - return this.html[this.currentChar]; - }, - - /** - * Get the next character and advance the index. - */ - nextChar: function () { - return this.html[this.currentChar++]; - }, - - /** - * Called after a quote character is read. This finds the next quote - * character and returns the text string in between. - */ - readString: function (quote) { - var str; - var n = this.html.indexOf(quote, this.currentChar); - if (n === -1) { - this.currentChar = this.html.length; - str = null; - } else { - str = this.html.substring(this.currentChar, n); - this.currentChar = n + 1; - } - - return str; - }, - - /** - * Called when parsing a node. This finds the next name/value attribute - * pair and adds the result to the attributes list. - */ - readAttribute: function (node) { - var name = ""; - - var n = this.html.indexOf("=", this.currentChar); - if (n === -1) { - this.currentChar = this.html.length; - } else { - // Read until a '=' character is hit; this will be the attribute key - name = this.html.substring(this.currentChar, n); - this.currentChar = n + 1; - } - - if (!name) - return; - - // After a '=', we should see a '"' for the attribute value - var c = this.nextChar(); - if (c !== '"' && c !== "'") { - this.error("Error reading attribute " + name + ", expecting '\"'"); - return; - } - - // Read the attribute value (and consume the matching quote) - var value = this.readString(c); - - node.attributes.push(new Attribute(name, decodeHTML(value))); - - return; - }, - - /** - * Parses and returns an Element node. This is called after a '<' has been - * read. - * - * @returns an array; the first index of the array is the parsed node; - * the second index is a boolean indicating whether this is a void - * Element - */ - makeElementNode: function (retPair) { - var c = this.nextChar(); - - // Read the Element tag name - var strBuf = this.strBuf; - strBuf.length = 0; - while (whitespace.indexOf(c) == -1 && c !== ">" && c !== "/") { - if (c === undefined) - return false; - strBuf.push(c); - c = this.nextChar(); - } - var tag = strBuf.join(""); - - if (!tag) - return false; - - var node = new Element(tag); - - // Read Element attributes - while (c !== "/" && c !== ">") { - if (c === undefined) - return false; - while (whitespace.indexOf(this.html[this.currentChar++]) != -1) { - // Advance cursor to first non-whitespace char. - } - this.currentChar--; - c = this.nextChar(); - if (c !== "/" && c !== ">") { - --this.currentChar; - this.readAttribute(node); - } - } - - // If this is a self-closing tag, read '/>' - var closed = false; - if (c === "/") { - closed = true; - c = this.nextChar(); - if (c !== ">") { - this.error("expected '>' to close " + tag); - return false; - } - } - - retPair[0] = node; - retPair[1] = closed; - return true; - }, - - /** - * If the current input matches this string, advance the input index; - * otherwise, do nothing. - * - * @returns whether input matched string - */ - match: function (str) { - var strlen = str.length; - if (this.html.substr(this.currentChar, strlen).toLowerCase() === str.toLowerCase()) { - this.currentChar += strlen; - return true; - } - return false; - }, - - /** - * Searches the input until a string is found and discards all input up to - * and including the matched string. - */ - discardTo: function (str) { - var index = this.html.indexOf(str, this.currentChar) + str.length; - if (index === -1) - this.currentChar = this.html.length; - this.currentChar = index; - }, - - /** - * Reads child nodes for the given node. - */ - readChildren: function (node) { - var child; - while ((child = this.readNode())) { - // Don't keep Comment nodes - if (child.nodeType !== 8) { - node.appendChild(child); - } - } - }, - - discardNextComment: function() { - if (this.match("--")) { - this.discardTo("-->"); - } else { - var c = this.nextChar(); - while (c !== ">") { - if (c === undefined) - return null; - if (c === '"' || c === "'") - this.readString(c); - c = this.nextChar(); - } - } - return new Comment(); - }, - - - /** - * Reads the next child node from the input. If we're reading a closing - * tag, or if we've reached the end of input, return null. - * - * @returns the node - */ - readNode: function () { - var c = this.nextChar(); - - if (c === undefined) - return null; - - // Read any text as Text node - var textNode; - if (c !== "<") { - --this.currentChar; - textNode = new Text(); - var n = this.html.indexOf("<", this.currentChar); - if (n === -1) { - textNode.innerHTML = this.html.substring(this.currentChar, this.html.length); - this.currentChar = this.html.length; - } else { - textNode.innerHTML = this.html.substring(this.currentChar, n); - this.currentChar = n; - } - return textNode; - } - - if (this.match("![CDATA[")) { - var endChar = this.html.indexOf("]]>", this.currentChar); - if (endChar === -1) { - this.error("unclosed CDATA section"); - return null; - } - textNode = new Text(); - textNode.textContent = this.html.substring(this.currentChar, endChar); - this.currentChar = endChar + ("]]>").length; - return textNode; - } - - c = this.peekNext(); - - // Read Comment node. Normally, Comment nodes know their inner - // textContent, but we don't really care about Comment nodes (we throw - // them away in readChildren()). So just returning an empty Comment node - // here is sufficient. - if (c === "!" || c === "?") { - // We're still before the ! or ? that is starting this comment: - this.currentChar++; - return this.discardNextComment(); - } - - // If we're reading a closing tag, return null. This means we've reached - // the end of this set of child nodes. - if (c === "/") { - --this.currentChar; - return null; - } - - // Otherwise, we're looking at an Element node - var result = this.makeElementNode(this.retPair); - if (!result) - return null; - - var node = this.retPair[0]; - var closed = this.retPair[1]; - var localName = node.localName; - - // If this isn't a void Element, read its child nodes - if (!closed) { - this.readChildren(node); - var closingTag = ""; - if (!this.match(closingTag)) { - this.error("expected '" + closingTag + "' and got " + this.html.substr(this.currentChar, closingTag.length)); - return null; - } - } - - // Only use the first title, because SVG might have other - // title elements which we don't care about (medium.com - // does this, at least). - if (localName === "title" && !this.doc.title) { - this.doc.title = node.textContent.trim(); - } else if (localName === "head") { - this.doc.head = node; - } else if (localName === "body") { - this.doc.body = node; - } else if (localName === "html") { - this.doc.documentElement = node; - } - - return node; - }, - - /** - * Parses an HTML string and returns a JS implementation of the Document. - */ - parse: function (html, url) { - this.html = html; - var doc = this.doc = new Document(url); - this.readChildren(doc); - - // If this is an HTML document, remove root-level children except for the - // node - if (doc.documentElement) { - for (var i = doc.childNodes.length; --i >= 0;) { - var child = doc.childNodes[i]; - if (child !== doc.documentElement) { - doc.removeChild(child); - } - } - } - - return doc; - } - }; - - // Attach the standard DOM types to the global scope - global.Node = Node; - global.Comment = Comment; - global.Document = Document; - global.Element = Element; - global.Text = Text; - - // Attach JSDOMParser to the global scope - global.JSDOMParser = JSDOMParser; - -})(this); - -if (typeof module === "object") { - module.exports = this.JSDOMParser; -} diff --git a/apps/web-clipper/lib/Readability-readerable.js b/apps/web-clipper/lib/Readability-readerable.js deleted file mode 100644 index 64be5e15e..000000000 --- a/apps/web-clipper/lib/Readability-readerable.js +++ /dev/null @@ -1,108 +0,0 @@ -/* eslint-env es6:false */ -/* - * Copyright (c) 2010 Arc90 Inc - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This code is heavily based on Arc90's readability.js (1.7.1) script - * available at: http://code.google.com/p/arc90labs-readability - */ - -var REGEXPS = { - // NOTE: These two regular expressions are duplicated in - // Readability.js. Please keep both copies in sync. - unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i, - okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i, -}; - -function isNodeVisible(node) { - // Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes. - return (!node.style || node.style.display != "none") - && !node.hasAttribute("hidden") - //check for "fallback-image" so that wikimedia math images are displayed - && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true" || (node.className && node.className.indexOf && node.className.indexOf("fallback-image") !== -1)); -} - -/** - * Decides whether or not the document is reader-able without parsing the whole thing. - * @param {Object} options Configuration object. - * @param {number} [options.minContentLength=140] The minimum node content length used to decide if the document is readerable. - * @param {number} [options.minScore=20] The minumum cumulated 'score' used to determine if the document is readerable. - * @param {Function} [options.visibilityChecker=isNodeVisible] The function used to determine if a node is visible. - * @return {boolean} Whether or not we suspect Readability.parse() will suceeed at returning an article object. - */ -function isProbablyReaderable(doc, options = {}) { - // For backward compatibility reasons 'options' can either be a configuration object or the function used - // to determine if a node is visible. - if (typeof options == "function") { - options = { visibilityChecker: options }; - } - - var defaultOptions = { minScore: 20, minContentLength: 140, visibilityChecker: isNodeVisible }; - options = Object.assign(defaultOptions, options); - - var nodes = doc.querySelectorAll("p, pre, article"); - - // Get
nodes which have
node(s) and append them into the `nodes` variable. - // Some articles' DOM structures might look like - //
- // Sentences
- //
- // Sentences
- //
- var brNodes = doc.querySelectorAll("div > br"); - if (brNodes.length) { - var set = new Set(nodes); - [].forEach.call(brNodes, function (node) { - set.add(node.parentNode); - }); - nodes = Array.from(set); - } - - var score = 0; - // This is a little cheeky, we use the accumulator 'score' to decide what to return from - // this callback: - return [].some.call(nodes, function (node) { - if (!options.visibilityChecker(node)) { - return false; - } - - var matchString = node.className + " " + node.id; - if (REGEXPS.unlikelyCandidates.test(matchString) && - !REGEXPS.okMaybeItsACandidate.test(matchString)) { - return false; - } - - if (node.matches("li p")) { - return false; - } - - var textContentLength = node.textContent.trim().length; - if (textContentLength < options.minContentLength) { - return false; - } - - score += Math.sqrt(textContentLength - options.minContentLength); - - if (score > options.minScore) { - return true; - } - return false; - }); -} - -if (typeof module === "object") { - module.exports = isProbablyReaderable; -}