markdown downloader
// ==UserScript== // @name Markdown Grabber // @namespace http://tampermonkey.net/ // @version 1.0 // @description markdown downloader // @author 5ec1cff // @match *://*/* // @license AGPL // @grant unsafeWindow // @grant GM_registerMenuCommand // @grant GM_xmlhttpRequest // @connect * // ==/UserScript== // 2021.12.24 Fri: 修正 // 2022.03.15 Tue: 增加下载图片支持(默认启用) // 2023.03.01 Wed: 支持 xz.aliyun.cn ;下载图片附代 Referer (function () { 'use strict'; if (window.top !== window) return; // 阻止在 iframe 启用 const downloadPics = true; const picMap = new Map(); const console = unsafeWindow.console.context(); function getPictureKey(url) { return url; } function getPicture(url) { const key = getPictureKey(url); if (picMap.get(url) == null) { picMap.set(url, new Promise((rs, rj) => { if (!downloadPics || !url?.startsWith("http")) { rs([key, url]); return; } GM_xmlhttpRequest({ url: url, headers: { Referer: location.href }, responseType: "blob", onload(r) { const fr = new FileReader(); fr.onloadend = () => { console.log('load done:', url); rs([key, fr.r###lt]); } fr.onerror = (e) => { rj(e); } fr.readAsDataURL(r.response); }, onerror(e) { rj(e); }, onabort(e) { rj(e); } }) }) ); } return key; } function parseSimpleStyle(e) { let r = ''; switch (e.tagName.toLowerCase()) { case 'b': case 'strong': r += `**${parseSingleLine(e)}**`; break; case 'i': case 'em': r += `*${parseSingleLine(e)}*`; break; case 's': case 'strike': r += `~~${parseSingleLine(e)}~~`; break; case 'a': { if (e.href) { r += `[${parseSingleLine(e)}](${e.getAttribute('href')})`; } break; } case 'code': r += `\`${e.innerText}\``; break; case 'img': r += `\n![][${getPicture(e.src)}]\n`; break; default: r += parseSingleLine(e); } return r; } function parseSingleLine(element) { if (element instanceof Text) return element.data.trim(); let r = ''; if (element instanceof HTMLElement) { for (let e of element.childNodes) { if (e instanceof Text) r += e.data; if (!(e instanceof HTMLElement)) continue; r += parseSimpleStyle(e); } } return r.trim(); } function isSingleLine(node) { return !node.querySelector('p,ul,ol,br'); } function parseNode(element) { let lines = [], singleLine = null; if (element instanceof HTMLElement) { for (let e of element.childNodes) { if (!(e instanceof HTMLElement) && !(e instanceof Text)) continue; let tagName; if (e instanceof Text) { tagName = 'TEXT'; } else { tagName = e.tagName.toLowerCase(); } switch (tagName) { case 'TEXT': case 'a': case 'b': case 'strong': case 'i': case 'em': case 's': case 'strike': case 'a': case 'code': { if (singleLine == null) singleLine = ''; if (tagName == 'TEXT') { singleLine += e.data.trim(); } else { singleLine += parseSimpleStyle(e); } continue; } default: if (singleLine != null) { lines.push(singleLine); singleLine = null; } } switch (tagName) { // ignores case 'button': case 'style': case 'header': case 'script': continue; case 'p': lines.push(parseSingleLine(e) + '\n'); break; case 'br': lines.push('\n'); break; case 'ul': case 'ol': { lines.push(''); let is_order = tagName == 'ol', j = 1; for (let item of e.childNodes) { let pref = is_order ? `${j}. ` : `- `; if (item instanceof HTMLLIElement) { if (!isSingleLine(item)) { let item_lines = parseNode(item); for (let i = 0; i < item_lines.length; i++) { const l = item_lines[i].trim() if (l) { lines.push(`${i==0?pref:' '}${item_lines[i]}`); } } } else { lines.push(`${pref}${parseSingleLine(item)}`); } j++; } } lines.push(''); break; } case 'pre': { // debugger lines.push('```'); lines.push(...(e.querySelector('code') || e).innerText.trim().split('\n')); lines.push('```'); break; } case 'blockquote': { lines.push(''); let item_lines = parseNode(e); for (let i = 0; i < item_lines.length; i++) { lines.push(`> ${item_lines[i]}`); } lines.push(''); break; } case 'table': { lines.push(''); let head = e.querySelector('thead'); if (!head) { console.warn('unknown table!'); // resolve body as normal tag let body; if (body = e.querySelector('tbody')) { lines.push(...parseNode(body)); } continue; } let head_line = '|', sep_line = '|'; for (let h of head.querySelectorAll('th')) { head_line += `${parseSingleLine(h)}|`; sep_line += `--|` } lines.push(head_line); lines.push(sep_line); let body = e.querySelector('tbody'); for (let b of body.querySelectorAll('tr')) { let line = '|'; for (let d of b.querySelectorAll('td')) { line += `${parseSingleLine(d)}|`; } lines.push(line); } lines.push(''); break; } case 'hr': lines.push('\n---\n'); break; case 'img': lines.push(`\n![][${getPicture(e.src)}]\n`); break; case 'figure': { if (e.classList.contains('highlight')) { let lang = e.classList[1] || ''; let code = e.querySelector('td.code pre'); if (code != null) { lines.push('```' + lang); lines.push(...code.innerText.trim().split('\n')); lines.push('```'); break; } } // fallthrough } case 'td': { if (e.classList.contains('gutter')) continue; // fallthrough } default: { let r; if (r = tagName.match(/h(\d+)/)) { lines.push(`\n${'#'.repeat(Number(r[1]))} ${parseSingleLine(e)} \n`); } else { lines.splice(lines.length, 0, ...parseNode(e)); } } } } if (singleLine != null) lines.push(singleLine); } return lines; } function findArticle() { let article = document.body.querySelector('article'); if (article) return article; article = document.body.querySelector('div.markdown-body,div.mod-content'); if (article) return article; let maxChild = 0, node = null; for (let n of document.querySelectorAll('h1')) { if (n.parentNode && n.parentNode.childElementCount >= maxChild) { node = n.parentNode; } } return node; } async function html2MD() { let article = findArticle(); let title = document.querySelector('h1'); let r = ''; if (title) { r += `# ${parseSingleLine(title)}`; } else { r += `# ${document.title}`; } r += `\n${location.href}\n\n`; r += await nodeToMD(article); return r; } async function nodeToMD(node) { picMap.clear(); let r = ''; let lines = parseNode(node); for (let l of lines) { r += `${l}\n`; } let pics = await Promise.race([ Promise.all(picMap.values()), new Promise((_, rj) => { console.log("waiting 10s for downloading pictures...", picMap.size); setTimeout(() => { rj('time out!'); }, 10000) }) ]); r += '\n'; for (let [key, url] of pics) { r += `[${key}]:${url}\n`; } return r; } unsafeWindow.md = nodeToMD; // unsafeWindow.__xhr = GM_xmlhttpRequest; // unsafeWindow._getpic = getPicture async function onClick() { let url = URL.createObjectURL(new Blob([await html2MD()], { type: 'text/plain' })); let a = document.createElement('a'); a.download = `${document.title}.md`; a.href = url; document.body.append(a); a.click(); a.remove(); } GM_registerMenuCommand('下载 Markdown', () => { onClick(); }) })();