🏠 Home 

Bangumi-To Romaji Title

Retrieve the Romaji title of the subject and display it in the infobox


Install this script?
  1. // ==UserScript==
  2. // @name Bangumi-To Romaji Title
  3. // @name:zh-CN 班固米-获取条目罗马字标题
  4. // @version 0.3.5
  5. // @description Retrieve the Romaji title of the subject and display it in the infobox
  6. // @author weiduhuo
  7. // @namespace https://github.com/weiduhuo/scripts
  8. // @match *://bgm.tv/subject/*
  9. // @match *://bangumi.tv/subject/*
  10. // @match *://chii.in/subject/*
  11. // @grant none
  12. // @license MIT
  13. // @description:zh-CN 基于MyAnimeList的非官方API Jikan,获取条目的罗马字标题,并呈现于infobox
  14. // ==/UserScript==
  15. (function () {
  16. 'use strict';
  17. const SCRIPT_NAME = '班固米-罗马名获取组件';
  18. /** 单次查询返回的结果数量上限 */
  19. const QueryLimit = 10;
  20. /** 相关度的最低采用阈值 (含自身) */
  21. const minRelThr = 2.5;
  22. /** 相关度的触发再尝阈值 (不含自身) */
  23. const retryRelThr = 7.5;
  24. /** 枚举启用状态 */
  25. const EnableState = {
  26. /** 全部 */
  27. ALL_ENABLED: 'allEnabled',
  28. /** 仅中日 */
  29. ONLY_CJ: 'onlyChinese&Japanese',
  30. /** 仅日文 */
  31. ONLY_JAPANESE: 'onlyJapanese',
  32. };
  33. /** 启用状态 */
  34. let enableState = EnableState.ONLY_CJ;
  35. /** 所支持的条目类型 */
  36. const SubjectType = ['anime'];
  37. /** 地区待选标签 */
  38. const RegionTags = [
  39. ['##', '##'],
  40. ['日本', '日本动画'],
  41. ];
  42. const Region = {
  43. cn: 1,
  44. jp: 2,
  45. parse(value) {
  46. for (const [k, v] of Object.entries(this)) if (value === v) return k;
  47. return 'null';
  48. }
  49. };
  50. /** 媒体类型映射 BGM to MAL */
  51. const PlatformMap = {
  52. 'anime': {
  53. 'TV': 'tv', // ['tv','tv_special'] 将通过 subTags 尝试区分
  54. '剧场版': 'movie',
  55. 'OVA': 'ova',
  56. 'WEB': '', // ['ona','music','special','cm','pv',...] 一对多,但 Jikan API 不支持多参数,因此空缺转而全范围搜索
  57. // 'music' 将通过 subTags 尝试区分
  58. '动态漫画': '',
  59. },
  60. };
  61. /** 匹配假名 */
  62. const KanaRe = /[\p{sc=Hiragana}\p{sc=Katakana}]/u;
  63. /** 匹配汉字与假名 */
  64. const ZnJpRe = /[\p{sc=Hiragana}\p{sc=Katakana}\u30FC\u31F0-\u31FF\uFF61-\uFF9F\p{sc=Han}]/u;
  65. /** 匹配仅包含拉丁字母与符号 */
  66. // const OnlyLatinRe = /^[\s\u0020-\u00FF\u2000-\u206F\u2150-\u218F\u25A0-\u26FF\u3000-\u301E\uFE30-\uFF65\uFFE0-\uFFEF]+$/;
  67. // 上述分别匹配了空白字符、基本拉丁字母及补充、常用标点符号、数字形式、几何图形及杂项符号、CJK常见标点符号、 CJK兼容符号
  68. /** 匹配标题前缀 */
  69. const PrefTitleRe = /^(((劇場版?)?総集|短)編|(映画|劇場|同人)版?)\s*|.?(Official )?Music Video.?/i;
  70. /** 匹配标题后缀 */
  71. const SuffTitleRe = /\s[^\s]*(版)$/;
  72. // const SuffTitleRe = /\s[^\s]*([\d\u2150-\u218F][^\s]*|版)$/;
  73. /** 匹配标题短语 */
  74. const PhrasesRe = /(((劇場版?)?総集|短)編|(映画|劇場|同人)版?)|(\d+)|([a-z]{2,}|[之的]|(?:(?![之的])[\p{sc=Han}])+|[\p{sc=Hiragana}\u30FC]+|[\p{sc=Katakana}\u30FC\u31F0-\u31FF\uFF61-\uFF9F]+)/ug;
  75. // 注意 \p{scx=Han} 会匹配 '『』'符号
  76. /** 匹配标题短语分层过滤 */
  77. const PhrasesFilterRe = /^(映画|アニメ|第|st|nd|rd|th|season|章)$/;
  78. /** 匹配标题内符号 */
  79. const PunctRe = /[\u2000-\u206F\u25A0-\u26FF\u3000-\u301E\uFE30-\uFF65\uFFE0-\uFFEF]/g;
  80. async function main() {
  81. const subType = getSubjectType();
  82. if (!SubjectType.includes(subType)) return;
  83. // 基于条目地区,判断是否启用功能
  84. const infobox = document.querySelector('#infobox');
  85. const rawTitle = getSubjectTitle();
  86. const isLatinTitle = !ZnJpRe.test(rawTitle); // 判断标题是否仅包含拉丁字母
  87. const subTags = getSubjectTags();
  88. let region = includeTargetTag(subTags, ...RegionTags);
  89. if (!region) {
  90. // 通过标题与角色名进行兜底 (公共标签未完全覆盖)
  91. if (KanaRe.test(rawTitle) || charNameHasKana() || KanaRe.test(getSubjectSummary())) {
  92. region = Region.jp;
  93. } else {
  94. if (enableState === EnableState.ALL_ENABLED) addTitle(infobox, region, isLatinTitle, rawTitle);
  95. return;
  96. }}
  97. if (region === Region.cn && enableState === EnableState.ONLY_JAPANESE) return;
  98. // 添加待定的名称
  99. const titleLis = addTitle(infobox, region, isLatinTitle);
  100. // 尝试先通过 sessionStorage 获取已存储的数据
  101. const [, id] = getSubjectId();
  102. const key = `subtitle-${id}`;
  103. let data = sessionStorage.getItem(key);
  104. // data = null;
  105. if (data) {
  106. data = JSON.parse(data);
  107. updateTitle(titleLis, [data.romaji, data.english]);
  108. console.log(`${SCRIPT_NAME}:`, {
  109. 'relScore': data.relScore,
  110. 'romaji': data.romaji,
  111. 'english': data.english,
  112. });
  113. if(data.url) console.log(`${SCRIPT_NAME}:`, data.url); // url单独打印,以可直接点击
  114. return;
  115. }
  116. // 初步解析网页数据 (用于API查询的数据优先)
  117. let platform = getPlatform(subType);
  118. if (includeTargetTag(subTags, ['MV'])) {
  119. platform = 'music';
  120. }
  121. if (platform === 'tv' && includeTargetTag(subTags, ['OVA', 'SP', 'TVSP'], ['MV']) === 1) {
  122. platform = 'tv_special';
  123. }
  124. const tips = infobox.querySelectorAll('span.tip');
  125. const startDate = getStartDate(infobox, tips);
  126. let episodes, notFirstPart; // 延后解析
  127. // 尝试获取名称
  128. let subs, relScore, titles, url, mainTitle, phraseSet;
  129. mainTitle = rawTitle.replace(PrefTitleRe, ''); // 修复 Jikan API 首字符匹配权重过大的问题
  130. mainTitle = mainTitle.replace(PunctRe, ' '); // 修复 Jikan API 对诸如「」等符号匹配权重过大的问题
  131. const queryStartDate = startDate ? `${startDate.year - 1}-01-01` : ''; // 保守起见,仅精确到年份,并回退一年
  132. await handlerQuery(platform, queryStartDate);
  133. const data_1 = packData();
  134. if (relScore >= minRelThr ) {
  135. updateTitle(titleLis, titles);
  136. sessionStorage.setItem(key, JSON.stringify(data_1));
  137. }
  138. if (relScore >= retryRelThr) return;
  139. // 相关度较低,扩大搜索范围
  140. console.log(`${SCRIPT_NAME}:相关度较低,扩大搜索范围,再次尝试`);
  141. const preCl = relScore;
  142. mainTitle = mainTitle.replace(SuffTitleRe, ''); // 删除如 'シーズン2' 的后缀,只保留主标题
  143. await handlerQuery();
  144. // 由于搜索的##范围扩大,降低相关度得分
  145. if (platform && relScore) relScore -= 0.5;
  146. const data_2 = packData();
  147. if (relScore >= minRelThr && relScore > preCl) {
  148. updateTitle(titleLis, titles);
  149. sessionStorage.setItem(key, JSON.stringify(data_2));
  150. } else if (relScore < minRelThr && preCl < minRelThr ) {
  151. updateTitle(titleLis, ['NULL', 'NULL']);
  152. }
  153. function packData() {
  154. const data = {
  155. 'relScore': relScore / 10,
  156. 'romaji': titles[0],
  157. 'english': titles[1],
  158. };
  159. console.log(`${SCRIPT_NAME}:`, data);
  160. if (url) console.log(`${SCRIPT_NAME}:`, url);
  161. data.url = url;
  162. return data;
  163. }
  164. /** 执行一次查询 */
  165. async function handlerQuery(_platform = '', _startDate ='') {
  166. const promise = querySubject(mainTitle, subType, _platform, _startDate);
  167. // 同步解析网页数据 (减少忙等API)
  168. episodes ??= getEpisodes(infobox, tips);
  169. // 判断首集序号是否为开头,防止 BGM 与 MyAnimeList 条目合并不同
  170. notFirstPart ??= !isFirstPart();
  171. phraseSet = getPhraseSet(mainTitle);
  172. if (!titles) {
  173. console.log(`${SCRIPT_NAME}:`, {
  174. 'region': Region.parse(region),
  175. 'platform': platform,
  176. 'episodes': episodes,
  177. 'startDate': startDate,
  178. 'phraseSet': phraseSet,
  179. });
  180. }
  181. subs = await promise;
  182. if (Array.isArray(subs)) {
  183. [relScore, titles, url] = searchSubject(subs, phraseSet, isLatinTitle, startDate, episodes);
  184. } else {
  185. [relScore, titles, url] = [0, [subs, subs], null];
  186. }
  187. // 对于非首Part的条目的开播时间参考的相关度降低
  188. if (notFirstPart) relScore *= 0.75;
  189. titles = titles.map((title) => title.replace(/\s\((TV|OVA)\)/, '')); // 删除后缀
  190. }
  191. }
  192. /**
  193. * 通过条目原标题获取相关条目数据集
  194. * @param {string} title 原标题
  195. * @param {string} subType 条目类型
  196. * @param {string} platform 媒体类型
  197. * @param {string} startDate 起始日期 'Y-m-d'
  198. * @param {number} limit 指定返回的结果数量
  199. * @returns {Promise<string | Array<Object>>} 条目数据集
  200. */
  201. async function querySubject(title, subType, platform, startDate, limit = QueryLimit) {
  202. const url = new URL(`https://api.jikan.moe/v4/${subType}`);
  203. url.searchParams.set('limit', limit);
  204. url.searchParams.set('q', title);
  205. if (platform) url.searchParams.set('type', platform);
  206. if (startDate) url.searchParams.set('start_date', startDate);
  207. try {
  208. console.time(`Jikan API`);
  209. const response = await fetch(url);
  210. console.timeEnd(`Jikan API`);
  211. const data = await response.json();
  212. const subs = data.data;
  213. if (!subs || subs.length === 0) return 'NULL';
  214. else return subs;
  215. } catch (error) {
  216. console.error('Jikan API请求失败:', error);
  217. return 'ERROR';
  218. }
  219. }
  220. /**
  221. * @param {Array<Object>} subs
  222. * @param {Set<string>} phraseSet
  223. * @param {boolean} isLatinTitle
  224. * @param {{year: number, month: number, day: number} | null} startDate 开播时间
  225. * @param {number | null} episodes 集数
  226. * @returns {[number, [Object], string]} [`relScore`, [`romaji`, `english`], `url`]
  227. * - `relScore`相关度 - 10分值,6分为原标题短语的匹配度,4分为开播时间与集数的匹配度
  228. */
  229. function searchSubject(subs, phraseSet, isLatinTitle, startDate, episodes) {
  230. const tmpSubs = [];
  231. console.groupCollapsed(`${SCRIPT_NAME}:详情`);
  232. subs.forEach((sub, index) => {
  233. let similarity;
  234. // 计算 jaccard 相似度
  235. if (!isLatinTitle) {
  236. similarity = jaccardSimilarity(phraseSet, sub.title_japanese);
  237. } else {
  238. // 当搜索词全为拉丁字母时,同时考虑罗马音标题与英文标题
  239. similarity = Math.max(
  240. jaccardSimilarity(phraseSet, sub.title),
  241. jaccardSimilarity(phraseSet, sub.title_english)
  242. );
  243. }
  244. sub.relScore = similarity * 6;
  245. // 旧方法难以区分 '日常 Eテレ版' 2012-1 在 ['日常' 2011-4, '男子高校生の日常' 2012-1 ]
  246. /* const title = (isLatinTitle ? sub.title : sub.title_japanese).toLowerCase();
  247. const simScore = phraseSet.keys().reduce((acc, val) => acc + title.includes(val), 0);
  248. sub.relScore = simScore * 6 / phraseSet.size; */
  249. if (sub.relScore) tmpSubs.push(sub);
  250. sub.index = index;
  251. console.log({
  252. 'index': index,
  253. 'simScore': sub.relScore,
  254. 'type': sub.type,
  255. 'startDate': sub.aired.from ? sub.aired.from.split('T')[0] : null,
  256. 'episodes': sub.episodes,
  257. 'japanese': sub.title_japanese,
  258. 'romanji': sub.title,
  259. 'english': sub.title_english,
  260. 'url': sub.url,
  261. });
  262. });
  263. if (tmpSubs.length) {
  264. subs = tmpSubs;
  265. subs.sort((a, b) => b.relScore - a.relScore);
  266. }
  267. if (!startDate) {
  268. return getR###lt(1, subs);
  269. }
  270. const sameYearSubs = subs.filter(sub => sub.aired.prop.from.year === startDate.year);
  271. if (sameYearSubs.length === 0) {
  272. return getR###lt(0, subs);
  273. }
  274. const sameMonthSubs = sameYearSubs.filter(sub => startDate.month && sub.aired.prop.from.month === startDate.month);
  275. if (sameMonthSubs.length === 0) {
  276. return getR###lt(2, sameYearSubs);
  277. }
  278. if (sameMonthSubs.length === 1) {
  279. return getR###lt(4, sameMonthSubs);
  280. }
  281. if (!episodes) {
  282. return getR###lt(3, sameMonthSubs);
  283. }
  284. // 开播时间相同的有多个,寻找集数差异最小的
  285. let minDiff = Infinity, index = 0;
  286. sameMonthSubs.forEach((sub, _index) => {
  287. const diff = Math.abs(episodes - sub.episodes);
  288. if (diff < minDiff) {
  289. minDiff = diff;
  290. index = _index;
  291. }
  292. });
  293. sameMonthSubs[index].relScore += 2;
  294. return getR###lt(2, sameMonthSubs);
  295. /** 计算最终相似度,并获取结果 */
  296. function getR###lt(offset, _subs) {
  297. _subs.forEach(sub => { sub.relScore += offset });
  298. subs.sort((a, b) => b.relScore - a.relScore);
  299. // console.groupEnd();
  300. console.log('sortedByRelScore:', subs.map(sub => [sub.index, sub.relScore]));
  301. const sub = subs[0];
  302. console.log('r###lt:', sub.index);
  303. console.groupEnd();
  304. // console.log(sub);
  305. sub.title_english ??= 'NULL';
  306. return [sub.relScore, [sub.title, sub.title_english], sub.url];
  307. }
  308. }
  309. function jaccardSimilarity(set1, str2) {
  310. const set2 = getPhraseSet(str2);
  311. const intersection = new Set([...set1].filter(x => set2.has(x)));
  312. const union = new Set([...set1, ...set2]);
  313. return intersection.size / union.size;
  314. }
  315. function getPhraseSet(title) {
  316. if (!title) return new Set();
  317. const phrases = title.toLowerCase().match(PhrasesRe);
  318. if (!phrases) return new Set();
  319. return new Set(phrases
  320. .filter((s) => !PhrasesFilterRe.test(s))
  321. );
  322. }
  323. function getSubjectType() {
  324. return document.querySelector('#navMenuNeue .focus').getAttribute('href').split('/')[1];
  325. }
  326. function getSubjectTitle() {
  327. return document.querySelector('#headerSubject > h1 > a').textContent.trim();
  328. }
  329. function getSubjectTags() {
  330. return document.querySelectorAll('.subject_tag_section > .inner span');
  331. }
  332. function getSubjectSummary() {
  333. return document.querySelector('#subject_summary').textContent;
  334. }
  335. function getSubjectId() {
  336. const urlPattern = /^\/(.+)\/(\d+)$/;
  337. const match = window.location.pathname.match(urlPattern);
  338. if (!match) return [null, null];
  339. const [, patternType, subId] = match;
  340. return [patternType, subId];
  341. }
  342. /**
  343. * @param {NodeListOf<Element>} subTags
  344. * @param {...Array<string>} targetTypeTags 目标种类的标签
  345. * @returns {number} 种类编号由1开始,0表不存在
  346. */
  347. function includeTargetTag(subTags, ...targetTypeTags) {
  348. for (const tag of subTags) {
  349. const _tag = tag.textContent.trim();
  350. for (const [type, targetTags] of targetTypeTags.entries()) {
  351. if (targetTags.includes(_tag)) return type + 1;
  352. }
  353. }
  354. return 0;
  355. }
  356. function charNameHasKana() {
  357. const chars = document.querySelectorAll('#browserItemList strong');
  358. for (const char of chars) {
  359. if (KanaRe.test(char.innerText)) return true;
  360. }
  361. return false;
  362. }
  363. function getPlatform(subType) {
  364. const smallTag = document.querySelector('#headerSubject > h1 > small.grey');
  365. if (smallTag) {
  366. const platform = smallTag.innerText.trim();
  367. return platform in PlatformMap[subType] ? PlatformMap[subType][platform] : '';
  368. } else return '';
  369. }
  370. function isFirstPart() {
  371. const firstEp = document.querySelector('#subject_detail > .subject_prg > .prg_list > li:first-child');
  372. if (firstEp) {
  373. return ['00', '01'].includes(firstEp.innerText.trim());
  374. } else return true;
  375. }
  376. /**
  377. * @param {HTMLElement} infobox
  378. * @param {NodeListOf<HTMLElement>} tips
  379. * @returns {number | null}
  380. */
  381. function getEpisodes(infobox, tips) {
  382. const limit = 10;
  383. let ep = null;
  384. for (const [i, tip] of tips.entries()) {
  385. if (i > limit) return null;
  386. if (tip.innerText.trim() === '话数:') {
  387. ep = tip;
  388. break;
  389. }
  390. }
  391. if (!ep) return null;
  392. while (ep.parentElement !== infobox) {
  393. ep = ep.parentElement;
  394. }
  395. const match = ep.textContent.match(/(\d+)/);
  396. if (match) return +match[1];
  397. else return null;
  398. }
  399. /**
  400. * @param {HTMLElement} infobox
  401. * @param {NodeListOf<HTMLElement>} tips
  402. * @param {number} region
  403. * @returns {{year: number, month: number, day: number} | null}
  404. */
  405. function getStartDate(infobox, tips, region) {
  406. const regex = /(开始|(?:放送|播出)(?:开始|日期))|([上公]映(?!许可))|(发售)/;
  407. // 优先级 1 > 2 > 3 其大部分时是准确的
  408. let date = null;
  409. let preIndex = 10, index;
  410. for (const tip of tips) {
  411. const match = tip.innerText.match(regex);
  412. if (match) {
  413. for (const [i, m] of match.slice(1, 4).entries()) if (m) {
  414. index = i + 1; break;
  415. }
  416. if (index < preIndex) {
  417. date = tip; // 仅优先级更高的才可覆盖
  418. preIndex = index;
  419. }
  420. if (index === 1) break;
  421. }
  422. }
  423. if (!date) return null;
  424. while (date.parentElement !== infobox) {
  425. date = date.parentElement;
  426. }
  427. const dateText = date.textContent;
  428. let match;
  429. if (region === Region.jp) {
  430. // 优先匹配日本时间
  431. match = dateText.match(/日本[^))]*(\d{4})[-/年]?(\d{1,2})?[-/月]?(\d{1,2})?[-/日]?/);
  432. match ??= dateText.match(/(\d{4})[-/年]?(\d{1,2})?[-/月]?(\d{1,2})?[-/日]?[\s((]+日本/);
  433. }
  434. match ??= dateText.match(/(\d{4})[-/年]?(\d{1,2})?[-/月]?(\d{1,2})?[-/日]?/);
  435. if (match) {
  436. return {
  437. year: +match[1],
  438. month: +match[2],
  439. day: +match[3]
  440. };
  441. } else return null;
  442. }
  443. /**
  444. * @param {HTMLElement} infobox
  445. * @param {number} region
  446. * @param {boolean} isLatinTitle
  447. * @param {string} [title='···']
  448. * @returns {[HTMLElement]}
  449. */
  450. function addTitle(infobox, region, isLatinTitle, title = '···') {
  451. const romajiLi = document.createElement('li');
  452. let romajiTip, englishLi = null;
  453. if (!region) {
  454. romajiTip = '索引名';
  455. } else {
  456. if (isLatinTitle) {
  457. romajiTip = '索引名';
  458. } else if (region === Region.jp) {
  459. romajiTip = '罗马名';
  460. } else if (region === Region.cn) {
  461. romajiTip = '拼音名';
  462. }
  463. englishLi = document.createElement('li');
  464. englishLi.className = 'folded';
  465. englishLi.innerHTML = `<span class="tip" style="user-select: none">英文名: </span>${title}`;
  466. }
  467. romajiLi.innerHTML = `<span class="tip" style="user-select: none">${romajiTip}: </span>${title}`;
  468. const firstLi = infobox.children[0];
  469. const tip = firstLi.querySelector('span.tip');
  470. const ref = tip && tip.innerText.trim() === '中文名:' ? firstLi.nextSibling : firstLi;
  471. infobox.insertBefore(romajiLi, ref);
  472. if (region) {
  473. infobox.insertBefore(englishLi, ref);
  474. return [romajiLi, englishLi];
  475. } else {
  476. return [romajiLi];
  477. }
  478. }
  479. /**
  480. * @param {[HTMLElement]} lis
  481. * @param {[string]} titles
  482. */
  483. function updateTitle(lis, titles) {
  484. lis.forEach((li, index) => {
  485. li.childNodes[1].textContent = titles[index];
  486. })
  487. }
  488. main();
  489. })();