Greasy Fork is available in English.
字幕格式是 "中文 \n 英语"(\n 是换行符的意思)
// ==UserScript== // @name Youtube 双语字幕下载 v14 (中文+任选的一门双语,比如英语) (v13 开始自动字幕不再支持双语,自动字幕只能下载翻译后的中文) // @include https://*youtube.com/* // @author Cheng Zheng // @version 14 // @copyright Zheng Cheng // @grant GM_xmlhttpRequest // @grant unsafeWindow // @description 字幕格式是 "中文 \n 英语"(\n 是换行符的意思) // @license MIT // @namespace https://greasyfork.org/users/5711 // @require https://code.jquery.com/jquery-1.12.4.min.js // @require https://cdn.jsdelivr.net/npm/[email protected]/dist/vue.js // ==/UserScript== /* 使用提示: 如果本脚本不能使用,有一定概率是因为 jQuery 的 CDN 在你的网络环境下无法载入, 就是文件顶部的这一句有问题: @require https://code.jquery.com/jquery-1.12.4.min.js 解决办法:去网上随便找一个 jQuery 的 CDN 地址,替换掉这个,比如 https://cdn.bootcdn.net/ajax/libs/jquery/1.12.4/jquery.js https://cdn.staticfile.org/jquery/1.12.4/jquery.min.js 作者联系方式: QQ 1003211008 邮件 [email protected] Github@1c7 本脚本使用场景: Tampermonkey (Chrome 上的一款插件) (意思是需要安装在 Tampermonkey 里来运行) 解决什么问题: 下载中外双语的字幕,格式是 中文 \n 外语, \n 是换行符的意思。 术语说明: auto 自动字幕 closed 完整字幕 (或者叫人工字幕也可以) 原理说明: 对于"完整字幕", Youtube 返回的时间轴完全一致,因此只需要结合在一起即可,相对比较简单。 对于"自动字幕",中文是一个个句子,英文是一个个单词,格式不同,时间轴也不同 因此,会基于中文的句子时间(时间轴),把英文放进去 特别感谢: ytian(tianyebj):解决英文字幕匹配错误的问题 (https://github.com/1c7/Youtube-Auto-Subtitle-Download/pull/11) 备忘: 如果要把字符串保存下来, 使用: downloadString(srt_string, "text/plain", file_name); 用于测试的视频: https://www.youtube.com/watch?v=JfBZfnkg1uM 版本更新日志 2022-12-23 把 v13 升级到 v14,因为 Youtube 又更新 UI 了。 2021-12-21 把 v12 升级到 v13 有多名用户分别在微博,微信,邮件,greasyfork, 四个渠道向我反馈了无法使用问题。 此时版本是 v12 测试视频1 https://www.youtube.com/watch?v=OWaFPsVa3ig&t=16s (有1个字幕,英语(自动生成)) 绿色下拉菜单里选择 option "中文 + 英语 (自动生成)" 的确没有任何反应,无法下载。 实测发现: 英文字幕长度 284行,中文字幕长度 162行,没法一一对应,所以出错了。 测试视频2 https://www.youtube.com/watch?v=3RkhZgRNC1k (有2个字幕,英语(美国),英语(自动生成)) 如果是中文+英语(美国) 那么两个都是885行,可以正常下载 但是 中文+英语(自动生成),英语是937行,中文是471行。 结论: 如果是中文+完整字幕,那么长度是一一对应的。没问题 如果是中文+自动生成字幕,那么长度不一样,就会有问题。 解决办法: 如果是自动字幕,只下载中文。 完整字幕不需要做额外修改,保留现在这样就行,可以正常工作。 */ ; (function () { 'use strict' // Config var NO_SUBTITLE = '无字幕' var HAVE_SUBTITLE = '下载双语字幕 (中文+外语)' const NEW_LINE = '\n' const BUTTON_ID = 'youtube-dual-lang-downloader-by-1c7-last-update-2020-12-3' const anchor_element = "#above-the-fold #title"; // Config var HASH_BUTTON_ID = `#${BUTTON_ID}` // initialize var first_load = true // indicate if first load this webpage or not var youtube_playerResponse_1c7 = null // for auto subtitle unsafeWindow.caption_array = [] // store all subtitle // trigger when first load // $(document).ready(function () { // console.log('开始执行??'); // start() // }) // trigger when loading new page // (actually this would also trigger when first loading, that's not what we want, that's why we need to use firsr_load === false) // (new Material design version would trigger this "yt-navigate-finish" event. old version would not.) var body = document.getElementsByTagName('body')[0] body.addEventListener('yt-navigate-finish', function (event) { if (current_page_is_video_page() === false) { return } youtube_playerResponse_1c7 = event.detail.response.playerResponse // for auto subtitle unsafeWindow.caption_array = [] // clean up (important, otherwise would have more and more item and cause error) // if use click to another page, init again to get correct subtitle if (first_load === false) { remove_subtitle_download_button() init() } }) // trigger when loading new page // (old version would trigger "spfdone" event. new Material design version not sure yet.) window.addEventListener('spfdone', function (e) { if (current_page_is_video_page()) { remove_subtitle_download_button() var checkExist = setInterval(function () { if ($('#watch7-headline').length) { init() clearInterval(checkExist) } }, 330) } }) // return true / false // Detect [new version UI(material design)] OR [old version UI] // I tested this, accurated. function new_material_design_version() { var old_title_element = document.getElementById('watch7-headline') if (old_title_element) { return false } else { return true } } // return true / false function current_page_is_video_page() { return get_video_id() !== null } // return string like "RW1ChiWyiZQ", from "https://www.youtube.com/watch?v=RW1ChiWyiZQ" // or null function get_video_id() { return getURLParameter('v') } //https://stackoverflow.com/questions/11582512/how-to-get-url-parameters-with-javascript/11582513#11582513 function getURLParameter(name) { return ( decodeURIComponent( (new RegExp('[?|&]' + name + '=' + '([^&;]+?)(&|#|;|$)').exec( location.search ) || [null, ''])[1].replace(/\+/g, '%20') ) || null ) } function remove_subtitle_download_button() { $(HASH_BUTTON_ID).remove() } // 把 console.log 包装一层, 方便"开启"/"关闭" // 这样可以在代码里遗留很多 console.log,实际运行时"关闭"掉不输出, 调试时"开启" // function logging(...args) { // if(typeof(console) !== 'undefined') { // console.log(...args); // } // } function inject_our_script() { var div = document.createElement('div'), select = document.createElement('select'), option = document.createElement('option'); div.setAttribute( 'style', `display: table; margin-top:4px; border: 1px solid rgb(0, 183, 90); cursor: pointer; color: rgb(255, 255, 255); border-top-left-radius: 3px; border-top-right-radius: 3px; border-bottom-right-radius: 3px; border-bottom-left-radius: 3px; background-color: #00B75A; ` ) div.id = BUTTON_ID div.title = 'Youtube Subtitle Downloader' // display when cursor hover select.id = 'captions_selector' select.disabled = true select.setAttribute( 'style', `display:block; border: 1px solid rgb(0, 183, 90); cursor: pointer; color: rgb(255, 255, 255); background-color: #00B75A; padding: 4px; ` ) option.textContent = 'Loading...' option.selected = true select.appendChild(option) // 下拉菜单里,选择一项后触发下载 select.addEventListener( 'change', function () { download_subtitle(this) }, false ) div.appendChild(select) // put <select> into <div> // Put the div into page: new material design var title_element = document.querySelectorAll( '.title.style-scope.ytd-video-primary-info-renderer' ) if (title_element) { $(title_element[0]).after(div) } // 把按钮加到页面上。 document.querySelector(anchor_element).appendChild(div) load_language_list(select) // <a> element is for download var a = document.createElement('a') a.style.cssText = 'display:none;' a.setAttribute('id', 'ForSubtitleDownload') var body = document.getElementsByTagName('body')[0] body.appendChild(a) } // Trigger when user select <option> async function download_subtitle(selector) { console.log('进入download_subtitle') // if user select first <option> // we just return, do nothing. if (selector.selectedIndex == 0) { return } // 核心概念 // 对于完整字幕而言,英文和中文的时间轴是一致的,只需要一行行的 match 即可 // 但是对于自动字幕就不是这样了,"自动字幕的英文"只能拿到一个个单词的开始时间和结束时间 // "自动字幕的中文"只能拿到一个个句子 // 现在的做法是,先拿到中文,处理成 SRT 格式, // 然后去拿英文,然后把英文的每个词,拿去和中文的每个句子的开始时间和结束时间进行对比 // 如果"英文单词的开始时间"在"中文句子的开始-结束时间"区间内,那么认为这个英文单词属于这一句中文 // 2021-8-11 更新 // 自动字幕的改了,和完整字幕一样了。 var caption = caption_array[selector.selectedIndex - 1] // because first <option> is for display, so index-1 var lang_code = caption.lang_code var lang_name = caption.lang_name // 初始化2个变量 var origin_url = null var translated_url = null // if user choose auto subtitle // 如果用户选的是自动字幕 if (caption.lang_code == 'AUTO') { origin_url = get_auto_subtitle_xml_url() translated_url = origin_url + '&tlang=zh-Hans' var translated_xml = await get(translated_url) var translated_srt = parse_youtube_XML_to_object_list(translated_xml) var srt_string = object_array_to_SRT_string(translated_srt) var title = get_file_name(lang_name) downloadString(srt_string, 'text/plain', title) // after download, select first <option> selector.options[0].selected = true return // 别忘了 return } // 如果用户选的是完整字幕 origin_url = await get_closed_subtitle_url(lang_code) translated_url = origin_url + '&tlang=zh-Hans' var original_xml = await get(origin_url) var translated_xml = await get(translated_url) // 根据时间轴融合这俩 var original_srt = parse_youtube_XML_to_object_list(original_xml) var translated_srt = parse_youtube_XML_to_object_list(translated_xml) var dual_language_srt = merge_srt(original_srt, translated_srt) var srt_string = object_array_to_SRT_string(dual_language_srt) var title = get_file_name(lang_name) downloadString(srt_string, 'text/plain', title) // after download, select first <option> selector.options[0].selected = true } // 把两个语言的 srt 数组组合起来, // 比如把英文和中文的组合起来。 // 这个函数假设两个数组的长度是一模一样的。如果不一样就会出错,比如 srt_A 是 284 个,srt_B 是 164 个元素。 function merge_srt(srt_A, srt_B) { var dual_language_srt = [] for (let index = 0; index < srt_A.length; index++) { const element_A = srt_A[index] const element_B = srt_B[index] var text = element_B.text + NEW_LINE + element_A.text // 中文 \n 英文 var item = { startTime: element_A.startTime, endTime: element_A.endTime, text: text, } dual_language_srt.push(item) } return dual_language_srt } // Return something like: "(English)How Did Python Become A Data Science Powerhouse?.srt" function get_file_name(x) { return `(${x})${get_title()}.srt` } // Detect if "auto subtitle" and "closed subtitle" exist // And add <option> into <select> // 加载语言列表 function load_language_list(select) { // auto var auto_subtitle_exist = false // 自动字幕是否存在(默认 false) // closed var closed_subtitle_exist = false // get auto subtitle var auto_subtitle_url = get_auto_subtitle_xml_url() if (auto_subtitle_url != false) { auto_subtitle_exist = true } // if there are "closed" subtitle? var captionTracks = get_captionTracks() if ( captionTracks != undefined && typeof captionTracks === 'object' && captionTracks.length > 0 ) { closed_subtitle_exist = true } // if no subtitle at all, just say no and stop if (auto_subtitle_exist == false && closed_subtitle_exist == false) { select.options[0].textContent = NO_SUBTITLE disable_download_button() return false } // if at least one type of subtitle exist select.options[0].textContent = HAVE_SUBTITLE select.disabled = false var option = null // for <option> var caption_info = null // for our custom object // 自动字幕 if (auto_subtitle_exist) { var auto_sub_name = get_auto_subtitle_name() var lang_name = `${auto_sub_name} 翻译的中文` caption_info = { lang_code: 'AUTO', // later we use this to know if it's auto subtitle lang_name: lang_name, // for display only } caption_array.push(caption_info) option = document.createElement('option') option.textContent = caption_info.lang_name select.appendChild(option) } // if closed_subtitle_exist if (closed_subtitle_exist) { for (var i = 0, il = captionTracks.length; i < il; i++) { var caption = captionTracks[i] if (caption.kind == 'asr') { continue } let lang_code = caption.languageCode let lang_translated = caption.name.simpleText var lang_name = `中文 + ${lang_code_to_local_name( lang_code, lang_translated )}` caption_info = { lang_code: lang_code, // for AJAX request lang_name: lang_name, // display to user } caption_array.push(caption_info) // 注意这里是加到 caption_array, 一个全局变量, 待会要靠它来下载 option = document.createElement('option') option.textContent = caption_info.lang_name select.appendChild(option) } } } // 禁用下载按钮 function disable_download_button() { $(HASH_BUTTON_ID) .css('border', '#95a5a6') .css('cursor', 'not-allowed') .css('background-color', '#95a5a6') $('#captions_selector') .css('border', '#95a5a6') .css('cursor', 'not-allowed') .css('background-color', '#95a5a6') if (new_material_design_version()) { $(HASH_BUTTON_ID).css('padding', '6px') } else { $(HASH_BUTTON_ID).css('padding', '5px') } } // 处理时间. 比如 start="671.33" start="37.64" start="12" start="23.029" // 处理成 srt 时间, 比如 00:00:00,090 00:00:08,460 00:10:29,350 function process_time(s) { s = s.toFixed(3) // 超棒的函数, 不论是整数还是小数都给弄成3位小数形式 // 举个柚子: // 671.33 -> 671.330 // 671 -> 671.000 // 注意函数会四舍五入. 具体读文档 var array = s.split('.') // 把开始时间根据句号分割 // 671.330 会分割成数组: [671, 330] var Hour = 0 var Minute = 0 var Second = array[0] // 671 var MilliSecond = array[1] // 330 // 先声明下变量, 待会把这几个拼好就行了 // 我们来处理秒数. 把"分钟"和"小时"除出来 if (Second >= 60) { Minute = Math.floor(Second / 60) Second = Second - Minute * 60 // 把 秒 拆成 分钟和秒, 比如121秒, 拆成2分钟1秒 Hour = Math.floor(Minute / 60) Minute = Minute - Hour * 60 // 把 分钟 拆成 小时和分钟, 比如700分钟, 拆成11小时40分钟 } // 分钟,如果位数不够两位就变成两位,下面两个if语句的作用也是一样。 if (Minute < 10) { Minute = '0' + Minute } // 小时 if (Hour < 10) { Hour = '0' + Hour } // 秒 if (Second < 10) { Second = '0' + Second } return Hour + ':' + Minute + ':' + Second + ',' + MilliSecond } // Copy from: https://gist.github.com/danallison/3ec9d5314788b337b682 // Thanks! https://github.com/danallison // Work in Chrome 66 // Test passed: 2018-5-19 function downloadString(text, fileType, fileName) { var blob = new Blob([text], { type: fileType, }) var a = document.createElement('a') a.download = fileName a.href = URL.createObjectURL(blob) a.dataset.downloadurl = [fileType, a.download, a.href].join(':') a.style.display = 'none' document.body.appendChild(a) a.click() document.body.removeChild(a) setTimeout(function () { URL.revokeObjectURL(a.href) }, 1500) } // https://css-tricks.com/snippets/javascript/unescape-html-in-js/ // turn HTML entity back to text, example: " should be " function htmlDecode(input) { var e = document.createElement('div') e.class = 'dummy-element-for-tampermonkey-Youtube-cn-other-subtitle-script-to-decode-html-entity-2021-8-11' e.innerHTML = input return e.childNodes.length === 0 ? '' : e.childNodes[0].nodeValue } // 获得自动字幕的地址 // return URL or null; // later we can send a AJAX and get XML subtitle // 例子输出: https://www.youtube.com/api/timedtext?v=JfBZfnkg1uM&asr_langs=de,en,es,fr,it,ja,ko,nl,pt,ru&caps=asr&exp=xftt,xctw&xorp=true&xoaf=5&hl=zh-CN&ip=0.0.0.0&ipbits=0&expire=1628691971&sparams=ip,ipbits,expire,v,asr_langs,caps,exp,xorp,xoaf&signature=55984444BD75E34DB9FE809058CCF7DE5B1AB3B5.193DC32A1E0183D8D627D229C9C111E174FF56FF&key=yt8&kind=asr&lang=en /* 如果直接访问这个地址,里面的格式是 XML,比如 <transcript> <text start="0.589" dur="6.121">hello in this video I would like to</text> <text start="3.6" dur="5.88">share what I've learned about setting up</text> <text start="6.71" dur="5.08">shadows and shadow casting and shadow</text> <text start="9.48" dur="5.6">occlusion and stuff like that in a</text> </transcript> */ function get_auto_subtitle_xml_url() { try { var captionTracks = get_captionTracks() for (var index in captionTracks) { var caption = captionTracks[index] if (typeof caption.kind === 'string' && caption.kind == 'asr') { return captionTracks[index].baseUrl } // ASR – A caption track generated using automatic speech recognition. // https://developers.google.com/youtube/v3/docs/captions } } catch (error) { return false } } // Input: lang_code like 'en' // Output: URL (String) async function get_closed_subtitle_url(lang_code) { try { var captionTracks = get_captionTracks() for (var index in captionTracks) { var caption = captionTracks[index] if (caption.languageCode === lang_code && caption.kind != 'asr') { var url = captionTracks[index].baseUrl return url } } } catch (error) { console.log(error) return false } } // 把 ajax 请求简单 wrap 一下方便使用 // Usage: var r###lt = await get(url) function get(url) { return $.ajax({ url: url, type: 'get', success: function (r) { return r }, fail: function (error) { return error }, }) } // Input: XML (provide by Youtube) // Output: Array of object // each object look like: /* { startTime: "", endTime: "", text: "" } */ // it's intermediate representation for SRT function parse_youtube_XML_to_object_list(youtube_xml_string) { if ( youtube_xml_string === '' || youtube_xml_string === undefined || youtube_xml_string === null ) { return false } var r###lt_array = [] var text_nodes = youtube_xml_string.getElementsByTagName('text') var len = text_nodes.length for (var i = 0; i < len; i++) { var text = text_nodes[i].textContent.toString() text = text.replace(/(<([^>]+)>)/gi, '') // remove all html tag. text = htmlDecode(text) var attr_start = text_nodes[i].getAttribute('start') // 开始时间 var attr_dur = text_nodes[i].getAttribute('dur') var start = parseFloat(attr_start) var dur = parseFloat(attr_dur) var end = start + dur // 结束时间 // 如果下一行的"开始时间", 在当前行的"结束时间"之后,那么代表这一行和下一行没有串行。屏幕上不会同时显示两行字幕。 // 如果下一行的"开始时间",早于当前行的"结束时间",那么我们把当前行的结束时间,改为下一行的开始时间 var next_line_index = i + 1 if (next_line_index < len) { var next_line = text_nodes[next_line_index] var next_line_attr_start = next_line.getAttribute('start') var next_line_start = parseFloat(next_line_attr_start) if (end > next_line_start) { end = next_line_start } } var start_time = process_time(parseFloat(start)) var end_time = process_time(parseFloat(end)) var item = { startTime: start_time, endTime: end_time, text: text, } r###lt_array.push(item) } return r###lt_array } /* Input: [ {startTime: "", endTime: "", text: ""}, {...}, {...} ] Output: SRT */ function object_array_to_SRT_string(object_array) { var r###lt = '' var BOM = '\uFEFF' r###lt = BOM + r###lt // store final SRT r###lt for (var i = 0; i < object_array.length; i++) { var item = object_array[i] var index = i + 1 var start_time = item.startTime var end_time = item.endTime var text = item.text var new_line = NEW_LINE r###lt = r###lt + index + new_line r###lt = r###lt + start_time r###lt = r###lt + ' --> ' r###lt = r###lt + end_time + new_line r###lt = r###lt + text + new_line + new_line } return r###lt } // return "English (auto-generated)" or a default name; function get_auto_subtitle_name() { try { var captionTracks = get_captionTracks() for (var index in captionTracks) { var caption = captionTracks[index] if (typeof caption.kind === 'string' && caption.kind == 'asr') { return captionTracks[index].name.simpleText } } return 'Auto Subtitle' } catch (error) { return 'Auto Subtitle' } } // return player_response // or return null function get_youtube_data() { return document.getElementsByTagName('ytd-app')[0].data.playerResponse } function get_captionTracks() { let data = get_youtube_data() var captionTracks = data?.captions?.playerCaptionsTracklistRenderer?.captionTracks return captionTracks } // Input a language code, output that language name in current locale // 如果当前语言是中文简体, Input: "de" Output: 德语 // if current locale is English(US), Input: "de" Output: "Germany" function lang_code_to_local_name(languageCode, fallback_name) { try { var captionTracks = get_captionTracks() for (var i in captionTracks) { var caption = captionTracks[i] if (caption.languageCode === languageCode) { let simpleText = captionTracks[i].name.simpleText if (simpleText) { return simpleText } else { return fallback_name } } } } catch (error) { return fallback_name } } // 获取当前视频的标题 (String), 比如 "How Does the Earth Move? Crash Course Geography #5" // https://www.youtube.com/watch?v=ljjLV-5Sa98 // 获取视频标题 function get_title() { // 方法1:先尝试拿到标题 var title_element = document.querySelector( 'h1.title.style-scope.ytd-video-primary-info-renderer' ) if (title_element != null) { var title = title_element.innerText // 能拿到就返回 if (title != undefined && title != null && title != '') { return title } } // 方法2:如果方法1失效用这个 return ytplayer.config.args.title // 这个会 delay, 如果页面跳转了,这个获得的标题还是旧的 } const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms)) // 等待一个元素存在 // https://stackoverflow.com/questions/5525071/how-to-wait-until-an-element-exists function waitForElm(selector) { return new Promise((resolve) => { if (document.querySelector(selector)) { return resolve(document.querySelector(selector)) } const observer = new MutationObserver((mutations) => { if (document.querySelector(selector)) { resolve(document.querySelector(selector)) observer.disconnect() } }) observer.observe(document.body, { childList: true, subtree: true, }) }) } function init() { // console.log('进入 init'); inject_our_script() first_load = false } async function main() { // console.log("[开始工作] Youtube 双语字幕下载"); await waitForElm(anchor_element) init() } setTimeout(main, 2000); })()