mirror of
				https://github.com/zadam/trilium.git
				synced 2025-10-31 19:49:01 +01:00 
			
		
		
		
	Readability.js update
This commit is contained in:
		
							parent
							
								
									338f01be01
								
							
						
					
					
						commit
						d3539ec9d7
					
				
							
								
								
									
										2
									
								
								.idea/vcs.xml
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										2
									
								
								.idea/vcs.xml
									
									
									
										generated
									
									
									
								
							| @ -1,6 +1,6 @@ | |||||||
| <?xml version="1.0" encoding="UTF-8"?> | <?xml version="1.0" encoding="UTF-8"?> | ||||||
| <project version="4"> | <project version="4"> | ||||||
|   <component name="VcsDirectoryMappings"> |   <component name="VcsDirectoryMappings"> | ||||||
|     <mapping directory="$PROJECT_DIR$" vcs="Git" /> |     <mapping directory="$PROJECT_DIR$/../.." vcs="Git" /> | ||||||
|   </component> |   </component> | ||||||
| </project> | </project> | ||||||
| @ -1316,6 +1316,101 @@ Readability.prototype = { | |||||||
|     return metadata; |     return metadata; | ||||||
|   }, |   }, | ||||||
| 
 | 
 | ||||||
|  |   /** | ||||||
|  |    * Check if node is image, or if node contains exactly only one image | ||||||
|  |    * whether as a direct child or as its descendants. | ||||||
|  |    * | ||||||
|  |    * @param Element | ||||||
|  |    **/ | ||||||
|  |   _isSingleImage: function(node) { | ||||||
|  |     if (node.tagName === "IMG") { | ||||||
|  |       return true; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (node.children.length !== 1 || node.textContent.trim() !== "") { | ||||||
|  |       return false; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return this._isSingleImage(node.children[0]); | ||||||
|  |   }, | ||||||
|  | 
 | ||||||
|  |   /** | ||||||
|  |    * Find all <noscript> that are located after <img> nodes, and which contain only one | ||||||
|  |    * <img> element. Replace the first image with the image from inside the <noscript> tag, | ||||||
|  |    * and remove the <noscript> tag. This improves the quality of the images we use on | ||||||
|  |    * some sites (e.g. Medium). | ||||||
|  |    * | ||||||
|  |    * @param Element | ||||||
|  |    **/ | ||||||
|  |   _unwrapNoscriptImages: function(doc) { | ||||||
|  |     // Find img without source or attributes that might contains image, and remove it.
 | ||||||
|  |     // This is done to prevent a placeholder img is replaced by img from noscript in next step.
 | ||||||
|  |     var imgs = Array.from(doc.getElementsByTagName("img")); | ||||||
|  |     this._forEachNode(imgs, function(img) { | ||||||
|  |       for (var i = 0; i < img.attributes.length; i++) { | ||||||
|  |         var attr = img.attributes[i]; | ||||||
|  |         switch (attr.name) { | ||||||
|  |           case "src": | ||||||
|  |           case "srcset": | ||||||
|  |           case "data-src": | ||||||
|  |           case "data-srcset": | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (/\.(jpg|jpeg|png|webp)/i.test(attr.value)) { | ||||||
|  |           return; | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  | 
 | ||||||
|  |       img.parentNode.removeChild(img); | ||||||
|  |     }); | ||||||
|  | 
 | ||||||
|  |     // Next find noscript and try to extract its image
 | ||||||
|  |     var noscripts = Array.from(doc.getElementsByTagName("noscript")); | ||||||
|  |     this._forEachNode(noscripts, function(noscript) { | ||||||
|  |       // Parse content of noscript and make sure it only contains image
 | ||||||
|  |       var tmp = doc.createElement("div"); | ||||||
|  |       tmp.innerHTML = noscript.innerHTML; | ||||||
|  |       if (!this._isSingleImage(tmp)) { | ||||||
|  |         return; | ||||||
|  |       } | ||||||
|  | 
 | ||||||
|  |       // If noscript has previous sibling and it only contains image,
 | ||||||
|  |       // replace it with noscript content. However we also keep old
 | ||||||
|  |       // attributes that might contains image.
 | ||||||
|  |       var prevElement = noscript.previousElementSibling; | ||||||
|  |       if (prevElement && this._isSingleImage(prevElement)) { | ||||||
|  |         var prevImg = prevElement; | ||||||
|  |         if (prevImg.tagName !== "IMG") { | ||||||
|  |           prevImg = prevElement.getElementsByTagName("img")[0]; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         var newImg = tmp.getElementsByTagName("img")[0]; | ||||||
|  |         for (var i = 0; i < prevImg.attributes.length; i++) { | ||||||
|  |           var attr = prevImg.attributes[i]; | ||||||
|  |           if (attr.value === "") { | ||||||
|  |             continue; | ||||||
|  |           } | ||||||
|  | 
 | ||||||
|  |           if (attr.name === "src" || attr.name === "srcset" || /\.(jpg|jpeg|png|webp)/i.test(attr.value)) { | ||||||
|  |             if (newImg.getAttribute(attr.name) === attr.value) { | ||||||
|  |               continue; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             var attrName = attr.name; | ||||||
|  |             if (newImg.hasAttribute(attrName)) { | ||||||
|  |               attrName = "data-old-" + attrName; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             newImg.setAttribute(attrName, attr.value); | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         noscript.parentNode.replaceChild(tmp.firstElementChild, prevElement); | ||||||
|  |       } | ||||||
|  |     }); | ||||||
|  |   }, | ||||||
|  | 
 | ||||||
|   /** |   /** | ||||||
|    * Removes script tags from the document. |    * Removes script tags from the document. | ||||||
|    * |    * | ||||||
| @ -1828,6 +1923,9 @@ Readability.prototype = { | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     // Unwrap image from noscript
 | ||||||
|  |     this._unwrapNoscriptImages(this._doc); | ||||||
|  | 
 | ||||||
|     // Remove script tags from the document.
 |     // Remove script tags from the document.
 | ||||||
|     this._removeScripts(this._doc); |     this._removeScripts(this._doc); | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 zadam
						zadam