🏠 Home 

SingleFile Pure - Pure html downloader

将当前网页保存为一个纯文本的.html网页文件,不保存二进制


Install this script?
  1. // ==UserScript==
  2. // @name SingleFile Pure - Pure html downloader
  3. // @name:zh SingleFile Pure - 保存纯HTML
  4. // @namespace https://gist.github.com/KnIfER
  5. // @version 3
  6. // @description 将当前网页保存为一个纯文本的.html网页文件,不保存二进制
  7. // @description:en Save webpages into one pure html file, without binary data.
  8. // @author PY-DNG
  9. // @license MIT
  10. // @grant GM_registerMenuCommand
  11. // @grant GM_unregisterMenuCommand
  12. // @grant unsafeWindow
  13. // @icon 
  14. // @match *://*/*
  15. // @include *
  16. // @noframes
  17. // ==/UserScript==
  18. // based on @PY-DNG https://greasyfork.org/zh-CN/scripts/419798-singlefile-单文件保存网页
  19. (function() {
  20. 'use strict';
  21. function debug(...args) {
  22. console.log("%c SingleFile: ", "color:#333!important;background:#0FF;", ...args);
  23. }
  24. var win = window.unsafeWindow || window, doc=document, d=doc
  25. , bank=win._sfpr_bank;
  26. var rM=debug, rMd=debug, err=console.error;
  27. if(!bank) {
  28. bank = win._sfpr_bank = {};
  29. rM = GM_registerMenuCommand;
  30. rMd = GM_unregisterMenuCommand;
  31. } else try{
  32. bank.unreg();
  33. } catch(e){debug(e)}
  34. bank.unreg = uninstall;var unregs = [];
  35. function uninstall() { // hot-reload
  36. for(var i=0;i<unregs.length;i++) {
  37. unregs[i]();
  38. }
  39. return 1;
  40. }
  41. function addEvent(a, b, c, d) {
  42. if(!d) d = win;
  43. ((a, b, c, d)=>{
  44. d.addEventListener(a, b, c);
  45. unregs.push(function(){ d.removeEventListener(a, b, c)} );
  46. })(a, b, c, d);
  47. }
  48. const MSG = {
  49. 'zh': {
  50. SavePage: '保存纯网页',
  51. Saving: '保存中……'
  52. },
  53. 'en': {
  54. SavePage: 'Save pure webpage',
  55. Saving: 'Saving, please wait……'
  56. },
  57. }
  58. var btn, evtSt="single-file-on-before-capture-request", evtEd=evtSt.replace('before', 'after');
  59. addEvent(evtSt, (e) => {
  60. btn = doc.getElementById('sf-pure');
  61. if(btn) btn.remove();
  62. });
  63. addEvent(evtEd, (e) => {
  64. if(btn) {
  65. doc.body.append(btn);
  66. btn = 0;
  67. }
  68. });
  69. var t = navigator.language;
  70. if(t.includes('-')) t = t.slice(0, t.indexOf('-'));
  71. // GUI
  72. var GT=MSG[t]||MSG['en'], fnMenu, menu = rM(GT.SavePage, fnMenu = function() {
  73. Generate_Single_File({
  74. onfinish: (FinalHTML) => {
  75. var title = doc.title;
  76. saveTextToFile(FinalHTML, '{Title}.html'.replace('{Title}', title).replace('{Time}', getTime('-', '-')));
  77. dispatchEvent(new CustomEvent(evtEd));
  78. rMd(menu);
  79. menu = rM(GT.SavePage, fnMenu);
  80. }
  81. });
  82. });
  83. addEvent("single-file-pure-save", (e) => {
  84. fnMenu();
  85. stop(e);
  86. });
  87. if(!win.saveAsTaken)
  88. addEvent('keydown', (e) => {
  89. if(e.key=='s' && e.altKey && e.ctrlKey) {
  90. fnMenu();
  91. stop(e);
  92. }
  93. });
  94. function Generate_Single_File(details) {
  95. debug('Generate started...');
  96. if(!bank.init) {
  97. dispatchEvent(new CustomEvent("single-file-user-script-init"));
  98. bank.init = 1;
  99. }
  100. dispatchEvent(new CustomEvent(evtSt));
  101. // Init DOM
  102. var html, tmp, dom = doc;
  103. // Functions
  104. var _J = (args) => {const a = []; for (let i = 0; i < args.length; i++) {a.push(args[i]);}; return a;};
  105. var $ = function() {return dom.querySelector.apply(dom, _J(arguments))};
  106. var $_ = function() {return dom.querySelectorAll.apply(dom, _J(arguments))};
  107. var $C = function() {return dom.createElement.apply(dom, _J(arguments))};
  108. var $A = (a,b) => (a.appendChild(b));
  109. var $R = (e) => (e.parentElement ? e.parentElement.removeChild(e) : null);
  110. function ishttp(s) {
  111. // !/^[^\/:]*:/.test(s)
  112. if(s) return s.startsWith('/') || s.startsWith('http')
  113. }
  114. dom = doc.cloneNode(1);
  115. const ElmProps = new (function() {
  116. const props = this.props = {};
  117. const cssMap = this.cssMap = new Map();
  118. this.getCssPath = function(elm) {
  119. return cssMap.get(elm) || (cssMap.set(elm, cssPath(elm)), cssMap.get(elm));
  120. }
  121. this.add = function(elm, type, value) {
  122. var path = cssPath(elm), store=props[path];
  123. if(!store) store = props[path] = [];
  124. store.push({type:type, value:value});
  125. }
  126. });
  127. // Generate info button!
  128. function about() {
  129. var t=$C('A');
  130. t.id = 'sf-pure';
  131. t.style = 'position:fixed;right:16px;top:16px;width:24px;height:24px;color:#2d2d2d;background-color:#737373;border:2px solid;border-color:#eee;border-radius:16px;z-index:2147483647;opacity:0.7;display:flex;justify-content:center;align-items:center;';
  132. t.innerHTML = '<svg style=\'width:65%;height:65%;margin-left:1px;\' xmlns="http://www.w3.org/2000/svg"viewBox="0 0 64 64"width="64"height="64"><style>.p{fill:#f0f0f0}</style><path class="p"d="M30 3A3 3 0 1130 21 3 3 0 1130 3ZM16 25 23 29 23 58 16 63 46 63 39 58 39 25Z"/></svg>';
  133. t.name = Date.now()+'';
  134. t.title = doc.title;
  135. t.href = location.ohref || location.href;
  136. t.target = 'blank';
  137. return t;
  138. }
  139. const AM = new AsyncManager();
  140. AM.onfinish = function() {
  141. // Add applyProps script
  142. var script = $C('script');
  143. script.innerHTML = "window.addEventListener('load', function(){"+
  144. // show info button
  145. "setTimeout(function(){var btn=document.getElementById('sf-pure');btn.title=new Date(parseInt(btn.name))+'\\n\\n'+btn.title;location.ohref=btn.href;btn.oncontextmenu=function(e){btn.style.display='none';e.preventDefault()}}, 800);"
  146. +
  147. // {FUNC}
  148. "(function(c){var fs={Canvas_DataUrl:function(a,b){var e=new Image(),v=a.getContext('2d');e.onload=function(){v.drawImage(e,0,0)};e.src=b},Input_Value:function(a,b){a.value=b}};for(var i=0,arr=Object.entries(c),t,el;i<arr.length;i++){try{t=arr[i];if(el=document.querySelector(t[0]))for(var p of t[1])fs[p.type](el,p.value)}catch(e){console.error(e)}}}"
  149. +")("+
  150. // {PROPS}
  151. JSON.stringify(ElmProps.props)
  152. +")})";
  153. $A(dom.head, script);
  154. $A(dom.body, about());
  155. // Generate html
  156. var FinalHTML = dom.querySelector('html').outerHTML;
  157. debug('Generation Complete.', FinalHTML.length)
  158. details.onfinish(FinalHTML)
  159. };
  160. // debug('Setting charset');
  161. if (doc.characterSet !== 'UTF-8') {
  162. const meta = $('meta[http-equiv="Content-Type"][content*="charset"]');
  163. meta && (meta.content = meta.content.replace(/charset\s*=\s*[^;\s]*/i, 'charset=UTF-8'));
  164. }
  165. // debug('strip scripts');
  166. for (var tmp of $_('script')) {
  167. $R(tmp);
  168. }
  169. // debug('strip inline scripts');
  170. for (var tmp of $_('*')) {
  171. var ISKeys = ['onabort', 'onerror', 'onresize', 'onscroll', 'onunload', 'oncancel', 'oncanplay', 'oncanplaythrough', 'onchange', 'onclick', 'onclose', 'oncuechange', 'ondblclick', 'ondrag', 'ondragend', 'ondragenter', 'ondragexit', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'ondurationchange', 'onemptied', 'onended', 'onerror', 'onfocus', 'oninput', 'oninvalid', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onloadeddata', 'onloadedmetadata', 'onloadstart', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onpause', 'onplay', 'onplaying', 'onprogress', 'onratechange', 'onreset', 'onresize', 'onscroll', 'onseeked', 'onseeking', 'onselect', 'onshow', 'onstalled', 'onsubmit', 'onsuspend', 'ontimeupdate', 'ontoggle', 'onvolumechange', 'onwaiting', 'onbegin', 'onend', 'onrepeat'];
  172. for (var key of ISKeys) {
  173. tmp.removeAttribute(key);
  174. tmp[key] = undefined;
  175. }
  176. }
  177. // debug('strip preload scripts');
  178. for (var tmp of $_('link[rel*=modulepreload]')) {
  179. $R(tmp);
  180. }
  181. // debug('strip meta headers');
  182. for (var tmp of $_('meta[http-equiv="Content-Security-Policy"]')) {
  183. $R(tmp);
  184. }
  185. //debug('Resolve style urls');
  186. for (var tmp of $_('link[rel*=stylesheet][href]')) {
  187. resolveStyleLinked(tmp)
  188. }
  189. for (var elm of $_('style')) {
  190. //debug('style elm=', elm.id, elm)
  191. resolveStyle(elm.innerText, (style, elm) => (elm.innerHTML = style), elm);
  192. }
  193. //debug('Resolve links');
  194. for (const link of $_('link[href]')) {
  195. // Only for http[s] links
  196. if (!link.href) {continue;}
  197. if (!ishttp(link.href)) {continue;}
  198. // Only for links that rel includes one of the following:
  199. // icon, apple-touch-icon, apple-touch-startup-image, prefetch, preload, prerender, manifest, stylesheet
  200. // And in the same time NOT includes any of the following:
  201. // alternate
  202. var deal = false;
  203. const accepts = ['icon', 'apple-touch-icon', 'apple-touch-startup-image', 'prefetch', 'preload', 'prerender', 'manifest', 'stylesheet'];
  204. const excludes = ['alternate']
  205. const rels = link.rel.split(' ');
  206. for (const rel of rels) {
  207. deal = deal || (accepts.includes(rel) && !excludes.includes(rel));
  208. }
  209. if (!deal) {continue;}
  210. // Save original href to link.ohref
  211. link.ohref = link.href;
  212. AM.add();
  213. requestDataURL(link.href, function(durl, link) {
  214. link.href = durl;
  215. // Deal style if links to a stylesheet
  216. if (rels.includes('stylesheet')) {
  217. resolveStyleLinked(link);
  218. }
  219. AM.finish();
  220. }, link);
  221. }
  222. var arr = dom.links;
  223. for(var i=0;tmp=arr[i++];) {
  224. tmp.href = fullUrl(tmp.href);
  225. }
  226. //debug('Resolve image src');
  227. for (var img of $_('img[src], source[src]')) {
  228. // Get full src
  229. // if (img.src.length > 3999) {continue;}
  230. if (!img.src) {continue;}
  231. if (!ishttp(img.src)) {continue;}
  232. img.src = fullUrl(img.src);
  233. }
  234. //debug('Resolve image srcset');
  235. for (var img of $_('img[srcset], source[srcset]')) {
  236. if (img.srcset) {
  237. var list = img.srcset.split(',');
  238. for (let i = 0; i < list.length; i++) { // Get all srcs list
  239. var srcitem = list[i].trim();
  240. if (srcitem.length > 3999) {continue;}
  241. if (!srcitem) {continue}
  242. var parts = srcitem.replaceAll(/(\s){2,}/g, '$1').split(' ');
  243. if (!ishttp(parts[0])) {continue};
  244. var src = fullUrl(parts[0]);
  245. list[i] = {
  246. src: src,
  247. rest: parts.slice(1, parts.length).join(' '),
  248. parts: parts,
  249. dataurl: null,
  250. string: null
  251. };
  252. }
  253. img.srcset = list.join(',');
  254. }
  255. }
  256. //debug('Resolve canvas');
  257. for (var tmp of $_('canvas')) {
  258. try {
  259. var url = img2url(tmp);
  260. ElmProps.add(tmp, 'Canvas_DataUrl', url);
  261. } catch (e) {}
  262. }
  263. debug('Resolve styles', dom);
  264. for (var tmp of $_('style')) {
  265. try {
  266. if(!tmp.firstChild) {
  267. var oelm = doc.querySelector(ElmProps.getCssPath(tmp));
  268. debug('cssRulesX', tmp, oelm);
  269. if(oelm && oelm.sheet?.cssRules?.length) {
  270. var cssRules = oelm.sheet.cssRules;
  271. var text = '';
  272. for (var i = 0; i < cssRules.length; i++) {
  273. var rule = cssRules[i];
  274. text += rule.cssText;
  275. text += '\n';
  276. }
  277. //debug('cssRules', text);
  278. tmp.innerHTML = text;
  279. }
  280. }
  281. } catch (e) {}
  282. }
  283. //debug('Resolve background-images');
  284. var urlReg = /^\s*url\(\s*['"]?([^\(\)'"]+)['"]?\s*\)\s*$/;
  285. for (var elm of $_('*')) {
  286. var url = elm.style.backgroundImage;
  287. if(url && url.length < 3999 // CONST.Number.MaxUrlLength
  288. && url.lastIndexOf('data:', 10)==-1) { // not /^data:/.test(url)
  289. url = url.match(urlReg);
  290. if (url) { // Get full image url
  291. url = fullUrl(url[1]);
  292. elm.style.backgroundImage = 'url('+url+')';
  293. }
  294. }
  295. }
  296. //debug('Resolve input/textarea/progress values');
  297. for (var tmp of $_('input,textarea,progress')) {
  298. // Query origin element's value
  299. var oelm = doc.querySelector(ElmProps.getCssPath(tmp));
  300. // Add to property map
  301. oelm.value && ElmProps.add(tmp, 'Input_Value', oelm.value);
  302. }
  303. // Get favicon.ico if no icon found
  304. debug('Resolve favicon.ico');
  305. if (!$('link[rel*=icon]')) {
  306. var icon = $C('link');
  307. icon.rel = 'icon';
  308. icon.href = getHost() + 'favicon.ico',
  309. $A(dom.head, icon);
  310. }
  311. // Start generating the finish event
  312. debug('Waiting for async tasks to be finished');
  313. AM.finishEvent = true;
  314. function resolveStyle(style, callback, args=[]) {
  315. const argvs = [style].concat(args);
  316. if(!style) {
  317. return callback.apply(null, argvs);
  318. }
  319. const re = /url\(\s*['"]?([^\(\)'"]+)['"]?\s*\)/;
  320. const rg = /url\(\s*['"]?([^\(\)'"]+)['"]?\s*\)/g;
  321. const replace = (durl, urlexp, arg1, arg2, arg3) => {
  322. // Replace style text
  323. const durlexp = 'url("'+durl+'")';
  324. style = style.replaceAll(urlexp, durlexp);
  325. // Get args
  326. argvs[0]=style;
  327. callback.apply(null, argvs);
  328. AM.finish();
  329. };
  330. const all = style.match(rg);
  331. if (!all) {return;}
  332. for (const urlexp of all) {
  333. // Check url
  334. if (urlexp.length > 3999) {continue;}
  335. const osrc = urlexp.match(re)[1];
  336. const baseurl = args instanceof HTMLLinkElement && args.ohref ? args.ohref : location.href;
  337. if (!ishttp(osrc)) {continue;}
  338. const src = fullUrl(osrc, baseurl);
  339. // Request
  340. AM.add();
  341. requestDataURL(src, replace, [urlexp].concat(args));
  342. }
  343. }
  344. function resolveStyleLinked(link) {
  345. const durl = link.href;
  346. if ((durl||'')[0]!=='d') {return;} // not /^data:/.test()
  347. const blob = dataURLToBlob(durl);
  348. const reader = new FileReader();
  349. reader.onload = () => {
  350. resolveStyle(reader.r###lt, (style, link) => {
  351. const blob = new Blob([style],{type:"text/css"});
  352. AM.add();
  353. blobToDataURL(blob, function(durl, link) {
  354. //debug('style elm=', link.id, link)
  355. link.href = durl;
  356. AM.finish();
  357. }, link)
  358. }, link);
  359. AM.finish();
  360. }
  361. AM.add();
  362. reader.readAsText(blob);
  363. }
  364. }
  365. var t0 = doc.createElement('a');
  366. function fullUrl(url, baseurl) {
  367. if(url) {
  368. if (url.startsWith('//')) {url = location.protocol + url;}
  369. if (!url.startsWith('http')) {
  370. var base = (baseurl||location.href).replace(/(.+\/).*?$/, '$1');
  371. t0.href = base + url;
  372. url = t0.href;
  373. }
  374. }
  375. return url;
  376. }
  377. function cssPath(el) {
  378. if (!(el instanceof Element)) return;
  379. var path = [];
  380. while (el.nodeType === Node.ELEMENT_NODE) {
  381. var selector = el.nodeName.toLowerCase();
  382. if (el.id) {
  383. selector += '#' + el.id;
  384. path.unshift(selector);
  385. break;
  386. } else {
  387. var sib = el,
  388. nth = 1;
  389. while (sib = sib.previousElementSibling) {
  390. if (sib.nodeName.toLowerCase() == selector) nth++;
  391. }
  392. if (nth != 1) selector += ":nth-of-type(" + nth + ")";
  393. }
  394. path.unshift(selector);
  395. el = el.parentNode;
  396. }
  397. return path.join(" > ");
  398. }
  399. function requestDataURL(url, callback, args=[]) {
  400. try{
  401. //debug('requestDataURL::', url, args);
  402. const argvs = [url].concat(args);
  403. callback.apply(null, argvs);
  404. }catch(e){err(e)}
  405. }
  406. function blobToDataURL(blob, callback, args=[]) {
  407. const reader = new FileReader();
  408. reader.onload = function () {
  409. callback.apply(null, [reader.r###lt].concat(args));
  410. }
  411. reader.readAsDataURL(blob);
  412. }
  413. function dataURLToBlob(dataurl) {
  414. let arr = dataurl.split(','),
  415. mime = arr[0].match(/:(.*?);/)[1],
  416. bstr = atob(arr[1]),
  417. n = bstr.length,
  418. u8arr = new Uint8Array(n)
  419. while (n--) {
  420. u8arr[n] = bstr.charCodeAt(n)
  421. }
  422. return new Blob([u8arr], { type: mime })
  423. }
  424. function AsyncManager() {
  425. const AM = this;
  426. // Ongoing xhr count
  427. this.taskCount = 0;
  428. // Whether generate finish events
  429. let finishEvent = false;
  430. Object.defineProperty(this, 'finishEvent', {
  431. configurable: true,
  432. enumerable: true,
  433. get: () => (finishEvent),
  434. set: (b) => {
  435. finishEvent = b;
  436. b && AM.taskCount === 0 && AM.onfinish && AM.onfinish();
  437. }
  438. });
  439. // Add one task
  440. this.add = () => (++AM.taskCount);
  441. // Finish one task
  442. this.finish = () => ((--AM.taskCount === 0 && AM.finishEvent && AM.onfinish && AM.onfinish(), AM.taskCount));
  443. }
  444. function img2url(img) {
  445. var cvs = doc.createElement('canvas');
  446. var v = cvs.getContext('2d');
  447. cvs.width = img.width;
  448. cvs.height = img.height;
  449. v.drawImage(img, 0, 0)
  450. return cvs.toDataURL();
  451. }
  452. // Format timecode like 1970-01-01 00:00:00
  453. // if data-sep provided false, there will be no data part.
  454. function getTime(dateSep='-', timeSep=':') {
  455. var d = new Date(), fulltime = ''
  456. fulltime += dateSep ? f0(d.getFullYear(), 4) + dateSep + f0((d.getMonth() + 1), 2) + dateSep + f0(d.getDate(), 2) : '';
  457. fulltime += dateSep && timeSep ? ' ' : '';
  458. fulltime += timeSep ? f0(d.getHours(), 2) + timeSep + f0(d.getMinutes(), 2) + timeSep + f0(d.getSeconds(), 2) : '';
  459. return fulltime;
  460. }
  461. function f0(number, ln) {
  462. var str = String(number);
  463. for (var i = str.length; i < ln; i++) {
  464. str = '0' + str;
  465. }
  466. return str;
  467. }
  468. function stop(e) {
  469. try{
  470. e.stopPropagation();
  471. e.preventDefault();
  472. } catch(e) {debug(e)}
  473. }
  474. function saveTextToFile(text, name) {
  475. const blob = new Blob([text],{type:"text/plain;charset=utf-8"});
  476. const url = URL.createObjectURL(blob);
  477. const a = doc.createElement('a');
  478. a.href = url;
  479. a.download = name;
  480. a.click();
  481. }
  482. // get host part from a url(includes '^https://', '/$')
  483. function getHost(url=location.href) {
  484. const match = location.href.match(/https?:\/\/[^\/]+\//);
  485. return match ? match[0] : match;
  486. }
  487. })();