返回首頁 

Youtube 下载自动字幕 (字词级) v6

(下载 .json 文件)字词级字幕仅适用于自动字幕(也就是机器用语音转文字识别出来的字幕)(完整字幕没有字词级的)下载字词级的意义是方便分句。可下载两种格式:原版 (&fmt=json3 从 Youtube 获取的原样返回) 和简化版 {startTime: "开始时间(毫秒)", endTime: "结束时间(毫秒)", text: "文字"}。 json 格式不可配合视频直接播放,需要其他软件进行进一步处理(把词拼成句子,转成 srt 格式)


Install this script?
Author's suggested script

You may also like Youtube Subtitle Downloader v36.


Install this script
// ==UserScript==// @name           Youtube 下载自动字幕 (字词级) v6// @include        https://*youtube.com/*// @author         Cheng Zheng// @require        https://code.jquery.com/jquery-1.12.4.min.js// @version        6// @grant GM_xmlhttpRequest// @namespace https://greasyfork.org/users/5711// @description   (下载 .json 文件)字词级字幕仅适用于自动字幕(也就是机器用语音转文字识别出来的字幕)(完整字幕没有字词级的)下载字词级的意义是方便分句。可下载两种格式:原版 (&fmt=json3 从 Youtube 获取的原样返回) 和简化版 {startTime: "开始时间(毫秒)", endTime: "结束时间(毫秒)", text: "文字"}。 json 格式不可配合视频直接播放,需要其他软件进行进一步处理(把词拼成句子,转成 srt 格式)// @license  MIT// ==/UserScript==(function () {// 可配置项var NO_SUBTITLE = '无自动字幕';var HAVE_SUBTITLE = '下载自动字幕 (字词级)';var TEXT_LOADING = '加载中...';const BUTTON_ID = 'youtube-download-word-level-subtitle-last-update-2021-2-21'// 可配置项var HASH_BUTTON_ID = `#${BUTTON_ID}`// 初始化var first_load = true; // indicate if first load this webpage or notvar youtube_playerResponse_1c7 = null; // for auto subtitleunsafeWindow.caption_array = []; // store all subtitle$(document).ready(function () {make_sure_it_load_properly_before_continue();});async function wait_until_element_exists(element_identifier) {var retry_count = 0;var RETRY_LIMIT = 30;return new Promise(function (resolve, reject) {var intervalID = setInterval(function () {try {var element = document.querySelector(element_identifier);if (element != null) {resolve(true);} else {retry_count = retry_count + 1;// console.log(`重试次数 ${retry_count}`);if (retry_count > RETRY_LIMIT) {clearInterval(intervalID);reject(false);}}} catch (error) {reject(false);}}, 330);});}async function make_sure_it_load_properly_before_continue() {var id = new_Youtube_2022_UI_element_identifier();var r###lt = await wait_until_element_exists(id);if (r###lt) {init_UI();}}// trigger when loading new page// (actually this would also trigger when first loading, that's not what we want, that's why we need to use firsr_load === false)// (new Material design version would trigger this "yt-navigate-finish" event. old version would not.)var body = document.getElementsByTagName("body")[0];body.addEventListener("yt-navigate-finish", function (event) {if (current_page_is_video_page() === false) {return;}youtube_playerResponse_1c7 = event.detail.response.playerResponse; // for auto subtitleunsafeWindow.caption_array = []; // clean up (important, otherwise would have more and more item and cause error)// if use click to another page, init again to get correct subtitleif (first_load === false) {remove_subtitle_download_button();init_UI();}});// trigger when loading new page// (old version would trigger "spfdone" event. new Material design version not sure yet.)window.addEventListener("spfdone", function (e) {if (current_page_is_video_page()) {remove_subtitle_download_button();var checkExist = setInterval(function () {if ($('#watch7-headline').length) {init_UI();clearInterval(checkExist);}}, 330);}});// return true / false// Detect [new version UI(material design)] OR [old version UI]// I tested this, accurated.function new_material_design_version() {var old_title_element = document.getElementById('watch7-headline');if (old_title_element) {return false;} else {return true;}}// return true / falsefunction current_page_is_video_page() {return get_url_video_id() !== null;}// return string like "RW1ChiWyiZQ",  from "https://www.youtube.com/watch?v=RW1ChiWyiZQ"// or nullfunction get_url_video_id() {return getURLParameter('v');}//https://stackoverflow.com/questions/11582512/how-to-get-url-parameters-with-javascript/11582513#11582513function getURLParameter(name) {return decodeURIComponent((new RegExp('[?|&]' + name + '=' + '([^&;]+?)(&|#|;|$)').exec(location.search) || [null, ''])[1].replace(/\+/g, '%20')) || null;}function remove_subtitle_download_button() {$(HASH_BUTTON_ID).remove();}// 初始化function init_UI() {var html_element = get_main_UI_element();var old_anchor_element = document.getElementById("watch7-headline");if (old_anchor_element != null) {old_anchor_element.appendChild(html_element);}var anchor = document.querySelector('#above-the-fold')if (anchor) {anchor.appendChild(html_element);}first_load = false;}function get_main_UI_element() {var div = document.createElement('div'),select = document.createElement('select'),option = document.createElement('option');var css_div = `display: table;margin-top:4px;border: 1px solid rgb(0, 183, 90);cursor: pointer; color: rgb(255, 255, 255);border-top-left-radius: 3px;border-top-right-radius: 3px;border-bottom-right-radius: 3px;border-bottom-left-radius: 3px;background-color: #00B75A;`;div.setAttribute('style', css_div);div.id = BUTTON_ID;select.id = 'captions_selector';select.disabled = true;let css_select = `display:block;border: 1px solid rgb(0, 183, 90);cursor: pointer;color: rgb(255, 255, 255);background-color: #00B75A;padding: 4px;`;select.setAttribute('style', css_select);option.textContent = TEXT_LOADING;option.selected = true;select.appendChild(option);// 下拉菜单里,选择一项后触发下载select.addEventListener('change', function () {download_subtitle(this);}, false);div.appendChild(select); // put <select> into <div>load_language_list(select);// <a> element is for downloadvar a = document.createElement('a');a.style.cssText = 'display:none;';a.setAttribute("id", "ForSubtitleDownload");var body = document.getElementsByTagName('body')[0];body.appendChild(a);return div;}// trigger when user select <option>async function download_subtitle(selector) {// if user select first <option>, we just return, do nothing.if (selector.selectedIndex == 0) {return;}var caption = caption_array[selector.selectedIndex - 1];// because first <option> is for display, so index - 1var r###lt = null;var filename = null; // 保存文件名// if user choose auto subtitleif (caption.lang_code == 'AUTO-original') {r###lt = await get_auto_subtitle();filename = get_file_name(`原版 JSON-${get_auto_subtitle_name()}`);downloadString(JSON.stringify(r###lt), "text/plain", filename);}if (caption.lang_code == 'AUTO-simplify') {r###lt = await get_auto_subtitle();filename = get_file_name(`简化版 JSON-${get_auto_subtitle_name()}`);let json = parse_youtube_XML_to_JSON(r###lt);downloadString(JSON.stringify(json), "text/plain", filename);}// After download, select first <option>selector.options[0].selected = true;}function get_file_name(x) {var suffix = 'json'var method_3 = `(${x})${get_title()}_video_id_${get_video_id()}.${suffix}`;return method_3}// detect if "auto subtitle" and "closed subtitle" exist// and add <option> into <select>function load_language_list(select) {var auto_subtitle_exist = false;// get auto subtitlevar auto_subtitle_url = get_auto_subtitle_xml_url();if (auto_subtitle_url != false) {auto_subtitle_exist = true;}// if no subtitle at all, just say no and stopif (auto_subtitle_exist == false) {select.options[0].textContent = NO_SUBTITLE;disable_download_button();return false;}// if at least one type of subtitle existselect.options[0].textContent = HAVE_SUBTITLE;select.disabled = false;var option = null; // for <option>var caption_info = null; // for our custom object// if auto subtitle existif (auto_subtitle_exist) {caption_info = {lang_code: 'AUTO-original',lang_name: `${get_auto_subtitle_name()} (原版)`,};caption_array.push(caption_info);option = document.createElement('option');option.textContent = caption_info.lang_name;select.appendChild(option);caption_info = {lang_code: 'AUTO-simplify',lang_name: `${get_auto_subtitle_name()} (简化版)`,};caption_array.push(caption_info);option = document.createElement('option');option.textContent = caption_info.lang_name;select.appendChild(option);}}function disable_download_button() {$(HASH_BUTTON_ID).css('border', '#95a5a6').css('cursor', 'not-allowed').css('background-color', '#95a5a6');$('#captions_selector').css('border', '#95a5a6').css('cursor', 'not-allowed').css('background-color', '#95a5a6');if (new_material_design_version()) {$(HASH_BUTTON_ID).css('padding', '6px');} else {$(HASH_BUTTON_ID).css('padding', '5px');}}// 处理时间. 比如 start="671.33"  start="37.64"  start="12" start="23.029"// 处理成 srt 时间, 比如 00:00:00,090    00:00:08,460    00:10:29,350function process_time(s) {s = s.toFixed(3);// 超棒的函数, 不论是整数还是小数都给弄成3位小数形式// 举个柚子:// 671.33 -> 671.330// 671 -> 671.000// 注意函数会四舍五入. 具体读文档var array = s.split('.');// 把开始时间根据句号分割// 671.330 会分割成数组: [671, 330]var Hour = 0;var Minute = 0;var Second = array[0]; // 671var MilliSecond = array[1]; // 330// 先声明下变量, 待会把这几个拼好就行了// 我们来处理秒数.  把"分钟"和"小时"除出来if (Second >= 60) {Minute = Math.floor(Second / 60);Second = Second - Minute * 60;// 把 秒 拆成 分钟和秒, 比如121秒, 拆成2分钟1秒Hour = Math.floor(Minute / 60);Minute = Minute - Hour * 60;// 把 分钟 拆成 小时和分钟, 比如700分钟, 拆成11小时40分钟}// 分钟,如果位数不够两位就变成两位,下面两个if语句的作用也是一样。if (Minute < 10) {Minute = '0' + Minute;}// 小时if (Hour < 10) {Hour = '0' + Hour;}// 秒if (Second < 10) {Second = '0' + Second;}return Hour + ':' + Minute + ':' + Second + ',' + MilliSecond;}// copy from: https://gist.github.com/danallison/3ec9d5314788b337b682// Thanks! https://github.com/danallison// work in Chrome 66// test passed: 2018-5-19function downloadString(text, fileType, fileName) {var blob = new Blob([text], {type: fileType});var a = document.createElement('a');a.download = fileName;a.href = URL.createObjectURL(blob);a.dataset.downloadurl = [fileType, a.download, a.href].join(':');a.style.display = "none";document.body.appendChild(a);a.click();document.body.removeChild(a);setTimeout(function () {URL.revokeObjectURL(a.href);}, 1500);}// https://css-tricks.com/snippets/javascript/unescape-html-in-js/// turn HTML entity back to text, example: &quot; should be "function htmlDecode(input) {var e = document.createElement('div');e.class = 'dummy-element-for-tampermonkey-Youtube-Subtitle-Downloader-script-to-decode-html-entity';e.innerHTML = input;return e.childNodes.length === 0 ? "" : e.childNodes[0].nodeValue;}// return URL or null;// later we can send a AJAX and get XML subtitlefunction get_auto_subtitle_xml_url() {try {var captionTracks = get_captionTracks()for (var index in captionTracks) {var caption = captionTracks[index];if (caption.kind === 'asr') {return captionTracks[index].baseUrl;}// ASR – A caption track generated using automatic speech recognition.// https://developers.google.com/youtube/v3/docs/captions}return false;} catch (error) {return false;}}function get_auto_subtitle_json_url() {return `${get_auto_subtitle_xml_url()}&fmt=json3`}async function get_auto_subtitle() {var url = get_auto_subtitle_json_url();if (url == false) {return false;}var r###lt = await get(url)return r###lt}function parse_youtube_XML_to_JSON(json) {var final_r###lt = [];// var template_example = {//   startTime: null,//   endTime: null,//   text: null// }var events = json.eventsfor (var i = 0; i < events.length; i++) {var event = events[i];// 对于内容(segs)为空的,直接跳过if (event.segs == undefined) {continue}// aAppend 就是只有一个 \nif (event.aAppend != undefined) {continue}var startTime = nullvar endTime = event.tStartMs + event.dDurationMs;var text = null;var segs = event.segsfor (var j = 0; j < segs.length; j++) {var seg = segs[j];if (seg.tOffsetMs) {startTime = event.tStartMs + seg.tOffsetMs} else {startTime = event.tStartMs}text = seg.utf8;var one = {startTime: startTime,endTime: endTime,text: text,}final_r###lt.push(one);}}return final_r###lt;}// return "English (auto-generated)" or a default name;function get_auto_subtitle_name() {try {var captionTracks = get_captionTracks();for (var index in captionTracks) {var caption = captionTracks[index];if (typeof caption.kind === 'string' && caption.kind == 'asr') {return captionTracks[index].name.simpleText;}}return 'Auto Subtitle';} catch (error) {return 'Auto Subtitle';}}function get_youtube_data(){return document.getElementsByTagName("ytd-app")[0].data.playerResponse}function get_captionTracks() {let data = get_youtube_data();var captionTracks = data?.captions?.playerCaptionsTracklistRenderer?.captionTracksreturn captionTracks}function get_title() {return ytplayer.config.args.title;}function get_video_id() {return ytplayer.config.args.video_id;}// Usage: var r###lt = await get(url)function get(url) {return $.ajax({url: url,type: 'get',success: function (r) {return r},fail: function (error) {return error}});}// 我们用这个元素判断是不是 2022 年新 UI 。// return Element;function new_Youtube_2022_UI_element() {return document.querySelector(new_Youtube_2022_UI_element_identifier());}function new_Youtube_2022_UI_element_identifier() {var document_querySelector = "#owner.item.style-scope.ytd-watch-metadata";return document_querySelector;}})();