🏠 Home 

MusicBrainz: Archive.org importer

Import audio files and collections into Musicbrainz. Also supports scanning bookmarks and search r###lts for MusicBrainz relations.

// ==UserScript==
// @name        MusicBrainz: Archive.org importer
// @namespace   http://www.jens-bertram.net/userscripts/import-internetarchive
// @description Import audio files and collections into Musicbrainz. Also supports scanning bookmarks and search r###lts for MusicBrainz relations.
// @icon        http://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Internet_Archive_logo_and_wordmark.png/240px-Internet_Archive_logo_and_wordmark.png
// @supportURL  https://github.com/JensBee/userscripts
// @license     MIT
// @version     0.4.7beta
// @grant       none
// @require     https://code.jquery.com/jquery-2.1.1.min.js
// @require     https://greasyfork.org/scripts/5140-musicbrainz-function-library/code/MusicBrainz%20function%20library.js?version=21997
// @include     *://archive.org/details/*
// @include     *://archive.org/bookmarks.php
// @include     *//archive.org/search.php*
// ==/UserScript==
var mbz = mbz || {};
mbz.archive_org_importer = {
// https://archive.org/about/faqs.php#Audio
audioFormats: [
* Check file type for audio format. Filters out most (but not all) other
* file types.
* @formatStr file format name
isAudioFile: function(formatStr) {
formatStr = formatStr.toLowerCase();
for (var format in this.audioFormats) {
if (formatStr.contains(format)) {
return true;
return false;
* Functions to parse a list of links for MusicBrainz relations.
mbz.archive_org_importer.linkCheck = {
btn: MBZ.Html.getMbzButton('Check link relations',
'Check entries being linked from MusicBrainz.'),
* Link scanner status values
links: {
found: null,
checked: 0,
matched: 0
* RexEx to strip off current base-url.
re: new RegExp('^'+window.location.origin),
* Scan status elements.
status: {
base: $('<span>'),
current: $('<span>'),
matched: $('<span>')
* Start scanner.
* @params[links] jQuery object with target links
* @params[controlEl] jQuery element to append controls to
scan: function(params) {
this.links.found = params.links;
if (this.links.found.length > 0 && params.controlEl) {
var self = this;
this.status.base.append('&nbsp;Checked: ')
.append('&nbsp;Matches: ')
this.btn.click(function () {
self.btn.prop("disabled", true);
var urls = [];
$.each(self.links.found, function(idx, link) {
urls: MBZ.Util.expandProtocols(urls),
cb: self.rel.attach,
cbInc: self.rel.inc,
cbDone: self.rel.done,
scope: self
* Callback handlers for relation parsing.
rel: {
* Relation was found, data is attached.
attach: function(data) {
if (!data._res) {
var res = data._res.replace(this.re, '');
var self = this;
$.each(self.links.found, function(idx, link) {
var link = $(link);
if (link.attr('href') == res) {
MBZ.Release.insertMBLink(data, link);
* All relations have been resolved.
done: function() {
this.status.base.html('&nbsp;' + this.links.checked
+ ' links checked with ' + this.links.matched + ' matches.');
this.btn.text('Check done');
* A relation was checked.
inc: function() {
* Functions to import a single release.
mbz.archive_org_importer.Release = function() {
this.btn = MBZ.Html.getMbzButton('Import',
'Import this release to MusicBrainz');
this.dEl = $('<div id="mbzDialog">').hide(); // dialog elements
this.mbLinkTarget = null;
this.importRunning = false;
this.importInitialized = false;
// release data object finally passed on to MusicBrainz.
this.release = new MBZ.Release();
this.tracks = new mbz.archive_org_importer.Release.Tracks();
var self = this;
var submitted = false;
* Initialize release parsing.
function init() {
var playerJSON = this.tracks.getPlayerJSON();
if (playerJSON.length == 0) {
console.error('Player JSON data not found. Disabling MusicBrainz import.');
var cEl = $('<div id="mbzControls">'); // control elements
var url = MBZ.Util.rmTrSlash($(location).attr('href'));
var urlJSON = url + '&output=json';
var trackData = $.parseJSON(playerJSON);
var pageJSON = null; // page data as JSON object
this.btn.click(function () {
if (submitted) {
if (!self.importInitialized) {
self.btn.prop("disabled", true);
self.btn.text("Initializing import");
// prepare source data
$.getJSON(urlJSON, function (data) {
pageJSON = data;
self.tracks.parseSources.call(self, data);
}).fail(function(jqxhr, textStatus, error) {
var err = textStatus + ', ' + error;
console.error("Request (" + urlJSON + ") failed: " + err);
self.btn.prop("disabled", true);
// *** static data
idx: 0,
fmt: 'Digital Media'
self.release.setNote('Imported from The Internet Archive (' + url + ')');
// *** parsed data from release JSON object
self.parseJSON.urls(self.release, pageJSON);
self.parseJSON.artists(self.release, pageJSON);
self.parseJSON.title(self.release, pageJSON);
self.parseJSON.labels(self.release, pageJSON);
self.parseJSON.release(self.release, pageJSON);
self.parseJSON.annotation(self.release, pageJSON);
// submit
self.release.submitRelease(function() {
submitted = true;
self.btn.prop("disabled", false);
self.btn.text("Submit again");
self.mbLinkTarget = self.btn;
urls: MBZ.Util.expandProtocol(url),
cb: MBZ.Release.insertMBLink,
scope: self
mbz.archive_org_importer.Release.prototype = {
* Callback function. Called when all sources are parsed.
enableImport: function() {
this.importInitialized = true;
if (this.tracks.validSources > 1) {
this.btn.text("Start import");
this.btn.prop("disabled", false);
} else {
* Parse JSON response for a release.
mbz.archive_org_importer.Release.prototype.parseJSON = {
annotation: function (release, data) {
if (data.metadata.notes) {
artists: function (release, data) {
if (data.metadata.creator) {
$.each(data.metadata.creator, function (idx, val) {
labels: function (release, data) {
if (data.metadata.collection) {
$.each(data.metadata.collection, function (idx, val) {
name: val,
catNo: data.metadata.identifier[0]
release: function (releaseObj, data) {
var dates = data.metadata.date || data.metadata.publicdate;
if (dates) {
$.each(dates, function (idx, val) {
var date = val.match(/([0-9]{4})-([0-9]{2})-([0-9]{2}).*/);
if (date && date.length == 4) {
y: date[1],
m: date[2],
d: date[3],
urls: function (release, data) {
var url = $(location).attr('href');
release.addUrl(url, '75');
release.addUrl(url, '85');
if (data.creativecommons && data.creativecommons.license_url) {
release.addUrl(data.creativecommons.license_url, '301');
title: function (release, data) {
if (data.metadata.title) {
* First parse track list from player JSON data. The provided information
* may not be complete, so gather the parsed data in a local array.
tracksFromPlayer: function(data) {
if (data.length > 0) {
var self = this;
$.each(data, function(idx, val) {
var duration = MBZ.Util.hmsToSeconds(val.duration);
duration = Math.round(parseFloat(duration) * 1000); // sec to msec
if (isNaN(duration)) {
duration = null;
// get source file name
var file = val.sources[0].file;
if (file) {
med: 0,
tit: val.title.replace(/^[0-9]+\.\s/,''),
idx: idx,
dur: duration,
file: MBZ.Util.getLastPathSegment(file)
} else {
console.log("Could not parse file name from player JSON.");
tracksFromPage: function(data) {
if (data && data.files) {
var self = this;
$.each(data.files, function(file, val){
if (mbz.archive_org_importer.isAudioFile(val.format)) {
var fileName = file.replace(/^\//, ''); // remove leading slash
var duration = MBZ.Util.hmsToSeconds(val.duration);
duration = Math.round(parseFloat(duration) * 1000); // sec to msec
if (isNaN(duration)) {
duration = null;
med: 0,
tit: val.title,
dur: duration,
file: fileName
* Handle track sources and the displaying of those data.
mbz.archive_org_importer.Release.Tracks = function() {
* Target element to display track source contents.
var contentHtml = $('<div>');
* Store parsed track data objects to allow multiple data editing passes.
var tracks = {};
* Track data sources available.
var sources = [];
* Track source to use.
var selectedSource = null;
* Number of unique valid sources.
var validSources = 0;
* Add all available track sources to a user dialog.
function addSources(show) {
var sourceSelect = $('<select>');
sourceSelect.on('change', function(){
selectedSource = this.value;
// add sources
$.each(sources, function(idx, source) {
if (!source.dupe && source.files && source.files.length > 0) {
var sourceTitle = '';
if (source.type == 'player') {
sourceTitle = 'Web Player';
} else {
sourceTitle = 'Playlist (' + source.name + ')';
sourceSelect.append('<option value="' + idx + '">'
+ sourceTitle + '</option>');
// add elements
sourceSelect.before('Found multiple track listings with different items.'
+ '<br/>Please select a track data source to import: ');
* Commit currently selected tracks source to be included in MusicBrainz
* submission.
this.commit = function(release) {
$.each(sources[selectedSource].files, function(idx, val) {
tracks[val].idx = idx; // reset track number
* Check which track sources are available. Called on page loading.
this.detectSources = function() {
// internal player data
var playerJSON = this.getPlayerJSON();
if (playerJSON.length > 0) {
type: 'player',
name: 'web-player',
data: $.parseJSON(playerJSON)
// playlists
$('#ff0 a').each(function(idx, item){
var url = $(item).attr('href');
if (url.endsWith('.m3u')) {
type: 'playlist',
name: MBZ.Util.getLastPathSegment(decodeURIComponent(url)),
url: url
if (sources.length > 0) {
// default to first entry
selectedSource = 0;
* Parse track data from all available sources. Called, when import is
* initialized.
* @pageData page data as JSON object
this.parseSources = function(pageData) {
var self = this;
var sourceParsedCount = 0;
function incParsedCount() {
// increase parsed sources counter
if (++sourceParsedCount == sources.length) {
if (validSources > 1) {
// all data parsed, proceed with import
function getTrackList(source) {
if (source.files && source.files.length > 0) {
// looks like data is already set
source.files = [];
if (source.type == 'player') {
$.each(source.data, function(idx, val) {
var file = val.sources[0].file;
if (file) {
// done
} else if (source.type == 'playlist') {
// needed, since we get redirected to differet subdomain
var url = 'https://cors-anywhere.herokuapp.com/archive.org:443'
+ source.url;
$.get(url, function(data) {
//source.data = data;
var files = data.split('\n');
$.each(files, function(idx, file) {
file = MBZ.Util.getLastPathSegment(file.trim());
if (file.length > 0) {
}, 'text').fail(function(jqxhr, textStatus, error) {
var err = textStatus + ', ' + error;
console.error("Request (" + url + ") failed: " + err);
}).always(function() {
// done
// First try to parse data from the internal player as a basis. This data
// may be incomplete (cropped track names) so add it first and overwrite it
// later with more complete data from the page's JSON.
$.each(sources, function(idx, val) {
var source = sources[idx];
if (source.type == 'player') {
// parse some track data from the player
self.parseJSON.tracksFromPlayer.call(self, source.data);
// try to get missing data from page's JSON object
if (pageData.files) {
self.parseJSON.tracksFromPage.call(self, pageData);
// since track data is available, pase the track list for each source
$.each(sources, function(idx, val) {
* Initialize and show the source's track data dialog. Also called, to update
* on track source data select change.
this.showSources = function() {
var self = this;
var trackTable = $('<table id="mbzImportTrackTable">'
+ '<thead>'
+ '<tr>'
+ '<td>#</td><td>Title</td><td>Length</td>'
+ '</tr></thead></table>');
var trackList = $('<tbody>');
function(idx, val) {
if (tracks[val]) {
var duration = data[val].dur;
duration = (duration ? MBZ.Util.msToHms(duration) : '&mdash;');
+ '<td>' + (idx + 1) + '</td>'
+ '<td>' + tracks[val].tit + '</td>'
+ '<td>' + duration + '</td>'
+ '</tr>'));
} else {
console.warn('No data for file "' + val + '" found.');
* Remove duplicated sources which have the same track lists.
function squashSources() {
// go through all source's files
for (var i=0; i<sources.length; i++) {
var src = sources[i];
if (!src.dupe) {
var a = src.files;
if (!a || a.length == 0) {
src.dupe = true;
console.warn("Remove source '" + src.name + "' no files found.");
} else if ((i + 1) < sources.length) {
for (var j=i + 1; j<sources.length; j++) {
var b = sources[j];
if (!b.dupe) {
if (mbz.archive_org_importer.Release.Tracks
.compareSourceFiles(a, b.files)) {
b.dupe = true;
// count valid sources
$.each(sources, function(idx, val) {
if (!val.dupe && val.files.length > 0) {
* Update track metadata with new values. If a value is already set, it will
* get overwritten with the new one.
this.updateData = function(data) {
var isValid = mbz.archive_org_importer.Release.Tracks.isValidTrackData;
if (tracks[data.file]) {
var tData = tracks[data.file];
// update
if (isValid(data.med)) {
tData.med = data.med;
if (isValid(data.tit)) {
tData.tit = data.tit.trim();
if (isValid(data.idx)) {
tData.idx = data.idx;
if (isValid(data.dur)) {
tData.dur = data.dur;
} else {
// add new
tracks[data.file] = data;
* Check if some track data is valid (i.e. not empty or undefined).
mbz.archive_org_importer.Release.Tracks.isValidTrackData = function (dataEntry) {
if (typeof dataEntry !== 'undefined' && dataEntry != null) {
if (typeof dataEntry === 'string') {
if (dataEntry.trim().length > 0) {
return true;
return false;
} else {
return true;
return false;
* Compare files for two sources.
mbz.archive_org_importer.Release.Tracks.compareSourceFiles = function(a, b) {
if (a.length != b.length) {
return false;
for (var i=0; i<a.length; i++) {
if (a[i] != b[i]) {
return false;
return true;
mbz.archive_org_importer.Release.Tracks.prototype = {
* Get player JSON data as string.
* @return player JSON data or empty string, if nothing was found
getPlayerJSON: function() {
var pJSON = $('#midcol > script').text().trim()
if (pJSON && pJSON[1]) {
return pJSON[1];
return "";
mbz.archive_org_importer.init = function() {
var pageType = window.location.pathname.split('/');
if (pageType.length >= 2) {
pageType = pageType[1].toLowerCase()
} else {
if (pageType == 'details' && $('body').hasClass('Audio')) {
// import a release
'#mbzImportTrackTable {margin-top:0.5em;margin-left:0.5em;}'
+ '#mbzImportTrackTable thead {'
+ 'font-weight:bold;'
+ 'background-color:rgba(115,108,174,0.5);'
+ '}'
+ '#mbzImportTrackTable tbody td:nth-child(1) {'
+ 'border-right:1px solid #666;'
+ 'padding-right:0.15em;'
+ '}'
+ '#mbzImportTrackTable tbody tr:nth-child(odd) {'
+ 'background-color:rgba(0,0,0,0.1);'
+ '}'
+ '#mbzImportTrackTable tbody td:nth-child(2) {'
+ 'padding-left:0.3em;'
+ '}'
+ '#mbzImportTrackTable tbody td:nth-child(3) {'
+ 'padding-left:0.3em;'
+ 'font-family:courier,monospace;'
+ 'text-align:right;'
+ '}'
new mbz.archive_org_importer.Release();
} else if (pageType == 'bookmarks.php') {
// check all bookmarks for MusicBrainz relations
var links = $('.box>table>tbody a').filter(function(idx) {
// no way to check type for audio here
return $(this).attr('href').startsWith('/details/');
var control = $('<div id="mbzControls">');
if (links.length > 0) {
links: links,
controlEl: control
} else if (pageType == 'search.php') {
var links = [];
// check audio links for MusicBrainz relations
var audioItems = $('.numberCell>img[alt="[audio]"]').filter(function(idx) {
// get the first linked audio item..
var el = $(this).parent().next().children('a')[0];
if (el) {
el = $(el);
if (el.attr('href').startsWith('/details/')) {
// ..and extract it's url
var control = $('<div>');
var col = $('<td colspan="2">');
var row = $('<tr>').append(col);
if (links.length > 0) {
links: links,
controlEl: control