A plugin to use tango.info with MusicBrainz picard
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

360 lines
14 KiB

# -*- coding: utf-8 -*-
PLUGIN_NAME = "Tango.info Adapter"
PLUGIN_AUTHOR = "Felix Elsner, Sambhav Kothari, Philipp Wolfer"
PLUGIN_DESCRIPTION = """
<p>Load genre, date and vocalist tags for latin dance music
from <a href="https://tango.info">tango.info</a>.</p>
"""
PLUGIN_VERSION = "0.2.0"
PLUGIN_API_VERSIONS = ["2.6", "2.7"]
PLUGIN_LICENSE = "GPL-2.0"
PLUGIN_LICENSE_URL = "https://www.gnu.org/licenses/gpl-2.0.html"
import re
from functools import partial
from picard import log
from picard.util import LockableObject
from picard.metadata import register_track_metadata_processor
table_regex = re.compile(
r'<h2><a href="\/tracks?">Tracks?<\/a><\/h2>(?!<\/table>)(.+?)<\/table>'
) # Match the 'tracks'/'track' <table>
tr_regex = re.compile(r"<tr>((?!</tr>).+?)</tr>") # Match <tr> elements
td_regex = re.compile(r"<td[^>]*>((?!</td>).+?)</td>") # Match <td> elements
tint_regex = re.compile(r"TINT:([0-9]+-[0-9]{1,2}-[0-9]{1,2})")
TANGO_INFO_HOST = "tango.info"
TANGO_INFO_PORT = 443
# FIXME Remove all this
# This is all just a hack to get around server issues at tango.info
# The server will reply with an empty body if the Host header is set to
# "tango.info:443", but just "tango.info" is fine.
# Picard 2.8+ will by default not append the port to the Host header and thus
# not need this workaround
from picard import PICARD_VERSION
from picard.version import Version
host_workaround_needed = False
if PICARD_VERSION < Version(2, 8, 0, 'dev', 1):
log.warning("%s: Picard version is older than 2.8, using workaround for "
"tango.info Host header issue", PLUGIN_NAME)
host_workaround_needed = True
if host_workaround_needed:
from picard.webservice import WSGetRequest
from PyQt5.QtCore import QUrl
class WrappedWSGetRequest(WSGetRequest): # noqa
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
url_without_port = QUrl(self.url().url())
# -1 is QUrl defaultPort
url_without_port.setPort(-1)
self.setUrl(url_without_port)
class TangoInfoTagger:
class TangoInfoScrapeQueue(LockableObject):
def __init__(self):
LockableObject.__init__(self)
self.queue = {}
def __contains__(self, name):
return name in self.queue
def __iter__(self):
return self.queue.__iter__()
def __getitem__(self, name):
self.lock_for_read()
value = self.queue.get(name)
self.unlock()
return value
def __setitem__(self, name, value):
self.lock_for_write()
self.queue[name] = value
self.unlock()
def append(self, name, value):
self.lock_for_write()
if name in self.queue:
self.queue[name].append(value)
value = False
else:
self.queue[name] = [value]
value = True
self.unlock()
return value
def pop(self, name):
self.lock_for_write()
value = None
if name in self.queue:
value = self.queue[name]
del self.queue[name]
self.unlock()
return value
def __init__(self):
self.albumpage_cache = {}
self.albumpage_queue = self.TangoInfoScrapeQueue()
def add_tangoinfo_data(self, album, track_metadata, track, release):
# Look for BARCODE or barcode tag
if track_metadata.get("barcode"):
barcode = str(track_metadata["barcode"])
elif track_metadata.get("BARCODE"):
barcode = str(track_metadata["BARCODE"])
else:
# Abort if no barcode in track_metadata
return
# https://tango.info/wiki/TINT = <TINP>-<Side#>-<Track#>
# https://tango.info/wiki/TINP - "Tango Info Number for a Product" is a
# 14-digit numeric tango.info code used by tango.info and others.
# Example: TINT:00743216335725-1-5
# This plugin normalizes TINTs by stripping all zeros from the left
tint = "%s-%s-%s" % (
barcode.lstrip("0"),
str(track_metadata.get("discnumber", "1")),
str(track_metadata.get("tracknumber")),
)
if barcode in self.albumpage_cache:
if self.albumpage_cache[barcode].get(tint):
for field in ("genre", "date", "vocal"):
# Do no overwrite with empty data
if not self.albumpage_cache[barcode][tint].get(field):
continue
track_metadata[field] = \
self.albumpage_cache[barcode][tint][field]
else:
log.debug(
"%s: No information on tango.info for barcode %s",
PLUGIN_NAME, barcode)
else:
self.website_add_track(album, album._new_tracks[-1], barcode, tint)
def website_add_track(self, album, track, barcode, tint, zeros=0):
"""
:param zeros: Number of zeros that have been prepended to the barcode
"""
self.album_add_request(album)
if self.albumpage_queue.append(barcode, (track, album, tint)):
# Barcode, zero-padded as needed
path = "/%s" % ("0" * zeros) + barcode
log.debug("%s: Downloading from tango.info: track %s, album %s "
"with TINT %s from %s",
PLUGIN_NAME, str(track), str(album), tint,
"https://%s%s" % (TANGO_INFO_HOST, path))
# FIXME: Remove this
if host_workaround_needed:
ws = album.tagger.webservice
def get_patched(self, *args, **kwargs): # noqa
request = WrappedWSGetRequest(self, *args, **kwargs)
return ws.add_request(request)
ws.get_patched = get_patched
return ws.get_patched(
TANGO_INFO_HOST,
TANGO_INFO_PORT,
path,
partial(self.website_process, barcode, zeros),
priority=False,
important=False,
parse_response_type=None,
queryargs=None,
)
# Call website_process() as a partial func
return album.tagger.webservice.get(
TANGO_INFO_HOST,
TANGO_INFO_PORT,
path,
partial(self.website_process, barcode, zeros),
priority=False,
important=False,
parse_response_type=None,
queryargs=None,
)
def website_process(self, barcode, zeros, response_bytes, reply, error):
"""
response_bytes: PyQt5.QtCore.QByteArray, equals reply.readAll()
reply: PyQt5.QtNetwork.QNetworkReply
error: PyQt5.QtNetwork.QNetworkReply.NetworkError (optional)
"""
if error:
log.warning("%s: Network error retrieving info for barcode %s",
PLUGIN_NAME, barcode)
track_triple = self.albumpage_queue.pop(barcode)
for track, album, tint in track_triple:
self.album_remove_request(album)
return
# Decode QByteArray into unicode
response_decoded = response_bytes.data().decode('utf-8')
tangoinfo_albumdata = self.extract_data(barcode, response_decoded)
self.albumpage_cache[barcode] = tangoinfo_albumdata
track_triple = self.albumpage_queue.pop(barcode)
if tangoinfo_albumdata:
if zeros:
log.debug("%s: tango.info does not seem to have data for "
"barcode %s. "
"However, retrying with barcode %s (i.e. the same "
"with %s prepended) was successful. "
"This most likely means either MusicBrainz or "
"tango.info has stored a wrong barcode for this "
"release. You might want to investigate this "
"discrepancy and report it.",
PLUGIN_NAME, barcode, ("0" * zeros) + barcode,
("a zero" if zeros == 1 else "%d zeros" % zeros))
for track, album, tint in track_triple:
tm = track.metadata
if not self.albumpage_cache[barcode].get(tint):
self.album_remove_request(album)
continue
for field in ("genre", "date", "vocal"):
# Write track metadata
if self.albumpage_cache[barcode][tint].get(field):
tm[field] = self.albumpage_cache[barcode][tint][field]
for file in track.iterfiles():
fm = file.metadata
for field in ("genre", "date", "vocal"):
if not self.albumpage_cache[barcode][tint].get(field):
continue
# Write file metadata
fm[field] = self.albumpage_cache[barcode][tint][field]
self.album_remove_request(album)
else:
if zeros >= 2:
log.debug("%s: Could not load album with barcode %s even with "
"zero prepended(%s). This most likely means "
"tango.info does not have a release for this "
"barcode (or MusicBrainz has a wrong barcode)",
PLUGIN_NAME, barcode, ("0" * zeros) + barcode)
for track, album, tint in track_triple:
self.album_remove_request(album)
return
log.debug("%s: Retrying with 0-padded barcode for barcode %s",
PLUGIN_NAME, barcode)
for track, album, tint in track_triple:
# Try again with zero-prepended barcode, but at most two times
self.website_add_track(
album, track, barcode, tint, zeros=(zeros + 1)
)
self.album_remove_request(album)
def album_add_request(self, album):
album._requests += 1
def album_remove_request(self, album):
album._requests -= 1
album._finalize_loading(None)
def extract_data(self, barcode, response):
# Check whether we have a concealed 404 and get the homepage
if "<title>Contents - tango.info</title>" in response:
log.debug("%s: No album with barcode %s on tango.info",
PLUGIN_NAME, barcode)
return
table = re.findall(table_regex, response)
if not table:
log.warning("%s: Could not extract table from album webpage - "
"regex failed or page structure changed", PLUGIN_NAME)
return
table = table[0] # re.findall() returns a list
# Content inside of <tr> elements
trcontent = (match.groups()[0] for match in tr_regex.finditer(table))
page_structure_warned = False # Ratelimit warnings
albuminfo = {}
for tr in trcontent:
# Content inside of <td> elements
trackinfo = [m.groups()[0] for m in td_regex.finditer(tr)]
# Example of expected structure:
# <tr>
# <td class="side_num">1</td>
# <td class="track_num">6</td>
# <td><a href="/T0370182390">Ese sos vos</a></td>
# <td><a href="/genre.tango">tango</a></td>
# <td><a href="/RicarTantu">Ricardo Tanturi</a></td>
# <td><a href="/AlberDeluc">Alberto Castillo</a></td>
# <td><a class="date" href="/1941-12-23">1941-12-23</a></td>
# <td>02:38</td>
# <td><a href="/00743216335725-1-6"
# title="TINT:00743216335725-1-6">info</a><br /></td>
# </tr>
# Sanity checks
if not trackinfo:
# Check if list is empty, e.g. contains a <th> for table header
continue
if len(trackinfo) < 9:
if not page_structure_warned: # Only warn once per <tr>
log.warning("%s: Table '<tr>' structure on webpage "
"unexpected for barcode %s",
PLUGIN_NAME, barcode)
page_structure_warned = True
# Bail out early
continue
# Get tango.info TINT, e.g.
# <a href="/00743216335725-1-4" title="TINT:00743216335725-1-4">
tint = re.findall(tint_regex, trackinfo[8])
if tint:
# Normalize TINT by stripping all leading slashes and zeros
tint = tint[0].lstrip("/").lstrip("0")
albuminfo[tint] = {}
else:
# This really shouldn't happen
log.warning("%s: No TINT found on webpage for barcode %s",
PLUGIN_NAME, barcode)
continue
# Get genre, e.g.
# <a href="/genre.tango">tango</a>
if trackinfo[3] != "-":
genre = re.split("<|>", trackinfo[3])[2].title()
albuminfo[tint]['genre'] = genre
# Get date, e.g.
# <a class="date" href="/1941-08-14">1941-08-14</a>
if trackinfo[6] != "-":
date = re.split("<|>", trackinfo[6])[2]
albuminfo[tint]['date'] = date
# Get singers, e.g.
# <a href="/AlberDeluc">Alberto Castillo</a>
if trackinfo[5] != "-":
# Catch and strip <a> tags
vocal = re.sub("<[^>]*>", "", trackinfo[5])
albuminfo[tint]['vocal'] = vocal
return albuminfo or None
tagger = TangoInfoTagger()
register_track_metadata_processor(tagger.add_tangoinfo_data)