字幕格式是 "中文 \n 英语"(\n 是换行符的意思)
// ==UserScript==// @name Youtube 双语字幕下载 v14 (中文+任选的一门双语,比如英语) (v13 开始自动字幕不再支持双语,自动字幕只能下载翻译后的中文)// @include https://*youtube.com/*// @author Cheng Zheng// @version 14// @copyright Zheng Cheng// @grant GM_xmlhttpRequest// @grant unsafeWindow// @description 字幕格式是 "中文 \n 英语"(\n 是换行符的意思)// @license MIT// @namespace https://greasyfork.org/users/5711// @require https://code.jquery.com/jquery-1.12.4.min.js// @require https://cdn.jsdelivr.net/npm/[email protected]/dist/vue.js// ==/UserScript==/*使用提示:如果本脚本不能使用,有一定概率是因为 jQuery 的 CDN 在你的网络环境下无法载入,就是文件顶部的这一句有问题:@require https://code.jquery.com/jquery-1.12.4.min.js解决办法:去网上随便找一个 jQuery 的 CDN 地址,替换掉这个,比如https://cdn.bootcdn.net/ajax/libs/jquery/1.12.4/jquery.jshttps://cdn.staticfile.org/jquery/1.12.4/jquery.min.js作者联系方式:QQ 1003211008邮件 [email protected]Github@1c7本脚本使用场景:Tampermonkey (Chrome 上的一款插件)(意思是需要安装在 Tampermonkey 里来运行)解决什么问题:下载中外双语的字幕,格式是 中文 \n 外语, \n 是换行符的意思。术语说明:auto 自动字幕closed 完整字幕 (或者叫人工字幕也可以)原理说明:对于"完整字幕", Youtube 返回的时间轴完全一致,因此只需要结合在一起即可,相对比较简单。对于"自动字幕",中文是一个个句子,英文是一个个单词,格式不同,时间轴也不同因此,会基于中文的句子时间(时间轴),把英文放进去特别感谢:ytian(tianyebj):解决英文字幕匹配错误的问题 (https://github.com/1c7/Youtube-Auto-Subtitle-Download/pull/11)备忘:如果要把字符串保存下来, 使用:downloadString(srt_string, "text/plain", file_name);用于测试的视频:https://www.youtube.com/watch?v=JfBZfnkg1uM版本更新日志2022-12-23 把 v13 升级到 v14,因为 Youtube 又更新 UI 了。2021-12-21 把 v12 升级到 v13有多名用户分别在微博,微信,邮件,greasyfork, 四个渠道向我反馈了无法使用问题。此时版本是 v12测试视频1https://www.youtube.com/watch?v=OWaFPsVa3ig&t=16s (有1个字幕,英语(自动生成))绿色下拉菜单里选择 option "中文 + 英语 (自动生成)" 的确没有任何反应,无法下载。实测发现: 英文字幕长度 284行,中文字幕长度 162行,没法一一对应,所以出错了。测试视频2https://www.youtube.com/watch?v=3RkhZgRNC1k (有2个字幕,英语(美国),英语(自动生成))如果是中文+英语(美国) 那么两个都是885行,可以正常下载但是 中文+英语(自动生成),英语是937行,中文是471行。结论:如果是中文+完整字幕,那么长度是一一对应的。没问题如果是中文+自动生成字幕,那么长度不一样,就会有问题。解决办法:如果是自动字幕,只下载中文。完整字幕不需要做额外修改,保留现在这样就行,可以正常工作。*/;(function () {'use strict'// Configvar NO_SUBTITLE = '无字幕'var HAVE_SUBTITLE = '下载双语字幕 (中文+外语)'const NEW_LINE = '\n'const BUTTON_ID = 'youtube-dual-lang-downloader-by-1c7-last-update-2020-12-3'const anchor_element = "#above-the-fold #title";// Configvar HASH_BUTTON_ID = `#${BUTTON_ID}`// initializevar first_load = true // indicate if first load this webpage or notvar youtube_playerResponse_1c7 = null // for auto subtitleunsafeWindow.caption_array = [] // store all subtitle// trigger when first load// $(document).ready(function () {// console.log('开始执行??');// start()// })// trigger when loading new page// (actually this would also trigger when first loading, that's not what we want, that's why we need to use firsr_load === false)// (new Material design version would trigger this "yt-navigate-finish" event. old version would not.)var body = document.getElementsByTagName('body')[0]body.addEventListener('yt-navigate-finish', function (event) {if (current_page_is_video_page() === false) {return}youtube_playerResponse_1c7 = event.detail.response.playerResponse // for auto subtitleunsafeWindow.caption_array = [] // clean up (important, otherwise would have more and more item and cause error)// if use click to another page, init again to get correct subtitleif (first_load === false) {remove_subtitle_download_button()init()}})// trigger when loading new page// (old version would trigger "spfdone" event. new Material design version not sure yet.)window.addEventListener('spfdone', function (e) {if (current_page_is_video_page()) {remove_subtitle_download_button()var checkExist = setInterval(function () {if ($('#watch7-headline').length) {init()clearInterval(checkExist)}}, 330)}})// return true / false// Detect [new version UI(material design)] OR [old version UI]// I tested this, accurated.function new_material_design_version() {var old_title_element = document.getElementById('watch7-headline')if (old_title_element) {return false} else {return true}}// return true / falsefunction current_page_is_video_page() {return get_video_id() !== null}// return string like "RW1ChiWyiZQ", from "https://www.youtube.com/watch?v=RW1ChiWyiZQ"// or nullfunction get_video_id() {return getURLParameter('v')}//https://stackoverflow.com/questions/11582512/how-to-get-url-parameters-with-javascript/11582513#11582513function getURLParameter(name) {return (decodeURIComponent((new RegExp('[?|&]' + name + '=' + '([^&;]+?)(&|#|;|$)').exec(location.search) || [null, ''])[1].replace(/\+/g, '%20')) || null)}function remove_subtitle_download_button() {$(HASH_BUTTON_ID).remove()}// 把 console.log 包装一层, 方便"开启"/"关闭"// 这样可以在代码里遗留很多 console.log,实际运行时"关闭"掉不输出, 调试时"开启"// function logging(...args) {// if(typeof(console) !== 'undefined') {// console.log(...args);// }// }function inject_our_script() {var div = document.createElement('div'),select = document.createElement('select'),option = document.createElement('option');div.setAttribute('style',`display: table;margin-top:4px;border: 1px solid rgb(0, 183, 90);cursor: pointer; color: rgb(255, 255, 255);border-top-left-radius: 3px;border-top-right-radius: 3px;border-bottom-right-radius: 3px;border-bottom-left-radius: 3px;background-color: #00B75A;`)div.id = BUTTON_IDdiv.title = 'Youtube Subtitle Downloader' // display when cursor hoverselect.id = 'captions_selector'select.disabled = trueselect.setAttribute('style',`display:block;border: 1px solid rgb(0, 183, 90);cursor: pointer;color: rgb(255, 255, 255);background-color: #00B75A;padding: 4px;`)option.textContent = 'Loading...'option.selected = trueselect.appendChild(option)// 下拉菜单里,选择一项后触发下载select.addEventListener('change',function () {download_subtitle(this)},false)div.appendChild(select) // put <select> into <div>// Put the div into page: new material designvar title_element = document.querySelectorAll('.title.style-scope.ytd-video-primary-info-renderer')if (title_element) {$(title_element[0]).after(div)}// 把按钮加到页面上。document.querySelector(anchor_element).appendChild(div)load_language_list(select)// <a> element is for downloadvar a = document.createElement('a')a.style.cssText = 'display:none;'a.setAttribute('id', 'ForSubtitleDownload')var body = document.getElementsByTagName('body')[0]body.appendChild(a)}// Trigger when user select <option>async function download_subtitle(selector) {console.log('进入download_subtitle')// if user select first <option>// we just return, do nothing.if (selector.selectedIndex == 0) {return}// 核心概念// 对于完整字幕而言,英文和中文的时间轴是一致的,只需要一行行的 match 即可// 但是对于自动字幕就不是这样了,"自动字幕的英文"只能拿到一个个单词的开始时间和结束时间// "自动字幕的中文"只能拿到一个个句子// 现在的做法是,先拿到中文,处理成 SRT 格式,// 然后去拿英文,然后把英文的每个词,拿去和中文的每个句子的开始时间和结束时间进行对比// 如果"英文单词的开始时间"在"中文句子的开始-结束时间"区间内,那么认为这个英文单词属于这一句中文// 2021-8-11 更新// 自动字幕的改了,和完整字幕一样了。var caption = caption_array[selector.selectedIndex - 1] // because first <option> is for display, so index-1var lang_code = caption.lang_codevar lang_name = caption.lang_name// 初始化2个变量var origin_url = nullvar translated_url = null// if user choose auto subtitle// 如果用户选的是自动字幕if (caption.lang_code == 'AUTO') {origin_url = get_auto_subtitle_xml_url()translated_url = origin_url + '&tlang=zh-Hans'var translated_xml = await get(translated_url)var translated_srt = parse_youtube_XML_to_object_list(translated_xml)var srt_string = object_array_to_SRT_string(translated_srt)var title = get_file_name(lang_name)downloadString(srt_string, 'text/plain', title)// after download, select first <option>selector.options[0].selected = truereturn // 别忘了 return}// 如果用户选的是完整字幕origin_url = await get_closed_subtitle_url(lang_code)translated_url = origin_url + '&tlang=zh-Hans'var original_xml = await get(origin_url)var translated_xml = await get(translated_url)// 根据时间轴融合这俩var original_srt = parse_youtube_XML_to_object_list(original_xml)var translated_srt = parse_youtube_XML_to_object_list(translated_xml)var dual_language_srt = merge_srt(original_srt, translated_srt)var srt_string = object_array_to_SRT_string(dual_language_srt)var title = get_file_name(lang_name)downloadString(srt_string, 'text/plain', title)// after download, select first <option>selector.options[0].selected = true}// 把两个语言的 srt 数组组合起来,// 比如把英文和中文的组合起来。// 这个函数假设两个数组的长度是一模一样的。如果不一样就会出错,比如 srt_A 是 284 个,srt_B 是 164 个元素。function merge_srt(srt_A, srt_B) {var dual_language_srt = []for (let index = 0; index < srt_A.length; index++) {const element_A = srt_A[index]const element_B = srt_B[index]var text = element_B.text + NEW_LINE + element_A.text // 中文 \n 英文var item = {startTime: element_A.startTime,endTime: element_A.endTime,text: text,}dual_language_srt.push(item)}return dual_language_srt}// Return something like: "(English)How Did Python Become A Data Science Powerhouse?.srt"function get_file_name(x) {return `(${x})${get_title()}.srt`}// Detect if "auto subtitle" and "closed subtitle" exist// And add <option> into <select>// 加载语言列表function load_language_list(select) {// autovar auto_subtitle_exist = false // 自动字幕是否存在(默认 false)// closedvar closed_subtitle_exist = false// get auto subtitlevar auto_subtitle_url = get_auto_subtitle_xml_url()if (auto_subtitle_url != false) {auto_subtitle_exist = true}// if there are "closed" subtitle?var captionTracks = get_captionTracks()if (captionTracks != undefined &&typeof captionTracks === 'object' &&captionTracks.length > 0) {closed_subtitle_exist = true}// if no subtitle at all, just say no and stopif (auto_subtitle_exist == false && closed_subtitle_exist == false) {select.options[0].textContent = NO_SUBTITLEdisable_download_button()return false}// if at least one type of subtitle existselect.options[0].textContent = HAVE_SUBTITLEselect.disabled = falsevar option = null // for <option>var caption_info = null // for our custom object// 自动字幕if (auto_subtitle_exist) {var auto_sub_name = get_auto_subtitle_name()var lang_name = `${auto_sub_name} 翻译的中文`caption_info = {lang_code: 'AUTO', // later we use this to know if it's auto subtitlelang_name: lang_name, // for display only}caption_array.push(caption_info)option = document.createElement('option')option.textContent = caption_info.lang_nameselect.appendChild(option)}// if closed_subtitle_existif (closed_subtitle_exist) {for (var i = 0, il = captionTracks.length; i < il; i++) {var caption = captionTracks[i]if (caption.kind == 'asr') {continue}let lang_code = caption.languageCodelet lang_translated = caption.name.simpleTextvar lang_name = `中文 + ${lang_code_to_local_name(lang_code,lang_translated)}`caption_info = {lang_code: lang_code, // for AJAX requestlang_name: lang_name, // display to user}caption_array.push(caption_info)// 注意这里是加到 caption_array, 一个全局变量, 待会要靠它来下载option = document.createElement('option')option.textContent = caption_info.lang_nameselect.appendChild(option)}}}// 禁用下载按钮function disable_download_button() {$(HASH_BUTTON_ID).css('border', '#95a5a6').css('cursor', 'not-allowed').css('background-color', '#95a5a6')$('#captions_selector').css('border', '#95a5a6').css('cursor', 'not-allowed').css('background-color', '#95a5a6')if (new_material_design_version()) {$(HASH_BUTTON_ID).css('padding', '6px')} else {$(HASH_BUTTON_ID).css('padding', '5px')}}// 处理时间. 比如 start="671.33" start="37.64" start="12" start="23.029"// 处理成 srt 时间, 比如 00:00:00,090 00:00:08,460 00:10:29,350function process_time(s) {s = s.toFixed(3)// 超棒的函数, 不论是整数还是小数都给弄成3位小数形式// 举个柚子:// 671.33 -> 671.330// 671 -> 671.000// 注意函数会四舍五入. 具体读文档var array = s.split('.')// 把开始时间根据句号分割// 671.330 会分割成数组: [671, 330]var Hour = 0var Minute = 0var Second = array[0] // 671var MilliSecond = array[1] // 330// 先声明下变量, 待会把这几个拼好就行了// 我们来处理秒数. 把"分钟"和"小时"除出来if (Second >= 60) {Minute = Math.floor(Second / 60)Second = Second - Minute * 60// 把 秒 拆成 分钟和秒, 比如121秒, 拆成2分钟1秒Hour = Math.floor(Minute / 60)Minute = Minute - Hour * 60// 把 分钟 拆成 小时和分钟, 比如700分钟, 拆成11小时40分钟}// 分钟,如果位数不够两位就变成两位,下面两个if语句的作用也是一样。if (Minute < 10) {Minute = '0' + Minute}// 小时if (Hour < 10) {Hour = '0' + Hour}// 秒if (Second < 10) {Second = '0' + Second}return Hour + ':' + Minute + ':' + Second + ',' + MilliSecond}// Copy from: https://gist.github.com/danallison/3ec9d5314788b337b682// Thanks! https://github.com/danallison// Work in Chrome 66// Test passed: 2018-5-19function downloadString(text, fileType, fileName) {var blob = new Blob([text], {type: fileType,})var a = document.createElement('a')a.download = fileNamea.href = URL.createObjectURL(blob)a.dataset.downloadurl = [fileType, a.download, a.href].join(':')a.style.display = 'none'document.body.appendChild(a)a.click()document.body.removeChild(a)setTimeout(function () {URL.revokeObjectURL(a.href)}, 1500)}// https://css-tricks.com/snippets/javascript/unescape-html-in-js/// turn HTML entity back to text, example: " should be "function htmlDecode(input) {var e = document.createElement('div')e.class ='dummy-element-for-tampermonkey-Youtube-cn-other-subtitle-script-to-decode-html-entity-2021-8-11'e.innerHTML = inputreturn e.childNodes.length === 0 ? '' : e.childNodes[0].nodeValue}// 获得自动字幕的地址// return URL or null;// later we can send a AJAX and get XML subtitle// 例子输出: https://www.youtube.com/api/timedtext?v=JfBZfnkg1uM&asr_langs=de,en,es,fr,it,ja,ko,nl,pt,ru&caps=asr&exp=xftt,xctw&xorp=true&xoaf=5&hl=zh-CN&ip=0.0.0.0&ipbits=0&expire=1628691971&sparams=ip,ipbits,expire,v,asr_langs,caps,exp,xorp,xoaf&signature=55984444BD75E34DB9FE809058CCF7DE5B1AB3B5.193DC32A1E0183D8D627D229C9C111E174FF56FF&key=yt8&kind=asr&lang=en/*如果直接访问这个地址,里面的格式是 XML,比如<transcript><text start="0.589" dur="6.121">hello in this video I would like to</text><text start="3.6" dur="5.88">share what I've learned about setting up</text><text start="6.71" dur="5.08">shadows and shadow casting and shadow</text><text start="9.48" dur="5.6">occlusion and stuff like that in a</text></transcript>*/function get_auto_subtitle_xml_url() {try {var captionTracks = get_captionTracks()for (var index in captionTracks) {var caption = captionTracks[index]if (typeof caption.kind === 'string' && caption.kind == 'asr') {return captionTracks[index].baseUrl}// ASR – A caption track generated using automatic speech recognition.// https://developers.google.com/youtube/v3/docs/captions}} catch (error) {return false}}// Input: lang_code like 'en'// Output: URL (String)async function get_closed_subtitle_url(lang_code) {try {var captionTracks = get_captionTracks()for (var index in captionTracks) {var caption = captionTracks[index]if (caption.languageCode === lang_code && caption.kind != 'asr') {var url = captionTracks[index].baseUrlreturn url}}} catch (error) {console.log(error)return false}}// 把 ajax 请求简单 wrap 一下方便使用// Usage: var r###lt = await get(url)function get(url) {return $.ajax({url: url,type: 'get',success: function (r) {return r},fail: function (error) {return error},})}// Input: XML (provide by Youtube)// Output: Array of object// each object look like:/*{startTime: "",endTime: "",text: ""}*/// it's intermediate representation for SRTfunction parse_youtube_XML_to_object_list(youtube_xml_string) {if (youtube_xml_string === '' ||youtube_xml_string === undefined ||youtube_xml_string === null) {return false}var r###lt_array = []var text_nodes = youtube_xml_string.getElementsByTagName('text')var len = text_nodes.lengthfor (var i = 0; i < len; i++) {var text = text_nodes[i].textContent.toString()text = text.replace(/(<([^>]+)>)/gi, '') // remove all html tag.text = htmlDecode(text)var attr_start = text_nodes[i].getAttribute('start') // 开始时间var attr_dur = text_nodes[i].getAttribute('dur')var start = parseFloat(attr_start)var dur = parseFloat(attr_dur)var end = start + dur // 结束时间// 如果下一行的"开始时间", 在当前行的"结束时间"之后,那么代表这一行和下一行没有串行。屏幕上不会同时显示两行字幕。// 如果下一行的"开始时间",早于当前行的"结束时间",那么我们把当前行的结束时间,改为下一行的开始时间var next_line_index = i + 1if (next_line_index < len) {var next_line = text_nodes[next_line_index]var next_line_attr_start = next_line.getAttribute('start')var next_line_start = parseFloat(next_line_attr_start)if (end > next_line_start) {end = next_line_start}}var start_time = process_time(parseFloat(start))var end_time = process_time(parseFloat(end))var item = {startTime: start_time,endTime: end_time,text: text,}r###lt_array.push(item)}return r###lt_array}/*Input: [ {startTime: "", endTime: "", text: ""}, {...}, {...} ]Output: SRT*/function object_array_to_SRT_string(object_array) {var r###lt = ''var BOM = '\uFEFF'r###lt = BOM + r###lt // store final SRT r###ltfor (var i = 0; i < object_array.length; i++) {var item = object_array[i]var index = i + 1var start_time = item.startTimevar end_time = item.endTimevar text = item.textvar new_line = NEW_LINEr###lt = r###lt + index + new_liner###lt = r###lt + start_timer###lt = r###lt + ' --> 'r###lt = r###lt + end_time + new_liner###lt = r###lt + text + new_line + new_line}return r###lt}// return "English (auto-generated)" or a default name;function get_auto_subtitle_name() {try {var captionTracks = get_captionTracks()for (var index in captionTracks) {var caption = captionTracks[index]if (typeof caption.kind === 'string' && caption.kind == 'asr') {return captionTracks[index].name.simpleText}}return 'Auto Subtitle'} catch (error) {return 'Auto Subtitle'}}// return player_response// or return nullfunction get_youtube_data() {return document.getElementsByTagName('ytd-app')[0].data.playerResponse}function get_captionTracks() {let data = get_youtube_data()var captionTracks =data?.captions?.playerCaptionsTracklistRenderer?.captionTracksreturn captionTracks}// Input a language code, output that language name in current locale// 如果当前语言是中文简体, Input: "de" Output: 德语// if current locale is English(US), Input: "de" Output: "Germany"function lang_code_to_local_name(languageCode, fallback_name) {try {var captionTracks = get_captionTracks()for (var i in captionTracks) {var caption = captionTracks[i]if (caption.languageCode === languageCode) {let simpleText = captionTracks[i].name.simpleTextif (simpleText) {return simpleText} else {return fallback_name}}}} catch (error) {return fallback_name}}// 获取当前视频的标题 (String), 比如 "How Does the Earth Move? Crash Course Geography #5"// https://www.youtube.com/watch?v=ljjLV-5Sa98// 获取视频标题function get_title() {// 方法1:先尝试拿到标题var title_element = document.querySelector('h1.title.style-scope.ytd-video-primary-info-renderer')if (title_element != null) {var title = title_element.innerText// 能拿到就返回if (title != undefined && title != null && title != '') {return title}}// 方法2:如果方法1失效用这个return ytplayer.config.args.title // 这个会 delay, 如果页面跳转了,这个获得的标题还是旧的}const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms))// 等待一个元素存在// https://stackoverflow.com/questions/5525071/how-to-wait-until-an-element-existsfunction waitForElm(selector) {return new Promise((resolve) => {if (document.querySelector(selector)) {return resolve(document.querySelector(selector))}const observer = new MutationObserver((mutations) => {if (document.querySelector(selector)) {resolve(document.querySelector(selector))observer.disconnect()}})observer.observe(document.body, {childList: true,subtree: true,})})}function init() {// console.log('进入 init');inject_our_script()first_load = false}async function main() {// console.log("[开始工作] Youtube 双语字幕下载");await waitForElm(anchor_element)init()}setTimeout(main, 2000);})()