Browse Source

Rework handling of missing data

master
Felix 6 months ago
parent
commit
ee5242f634
  1. 35
      tangoinfo.py

35
tangoinfo.py

@ -201,9 +201,9 @@ class TangoInfoTagger:
return
# Decode QByteArray into unicode
resonse_decoded = response_bytes.data().decode('utf-8')
response_decoded = response_bytes.data().decode('utf-8')
tangoinfo_albumdata = self.extract_data(barcode, resonse_decoded)
tangoinfo_albumdata = self.extract_data(barcode, response_decoded)
self.albumpage_cache[barcode] = tangoinfo_albumdata
track_triple = self.albumpage_queue.pop(barcode)
@ -286,9 +286,10 @@ class TangoInfoTagger:
# Content inside of <tr> elements
trcontent = [match.groups()[0] for match in tr_regex.finditer(table)]
albuminfo = dict(genre=None, date=None, vocal=None)
page_structure_warned = False # Ratelimit warnings
albuminfo = {}
for tr in trcontent:
# Content inside of <td> elements
trackinfo = [m.groups()[0] for m in td_regex.finditer(tr)]
@ -320,33 +321,37 @@ class TangoInfoTagger:
# Bail out early
continue
# Get tango.info TINT, e.g.
# <a href="/00743216335725-1-4" title="TINT:00743216335725-1-4">
tint = re.findall(tint_regex, trackinfo[8])
if tint:
# Normalize TINT by stripping all leading slashes and zeros
tint = tint[0].lstrip("/").lstrip("0")
albuminfo[tint] = {}
else:
# This really shouldn't happen
log.warning("%s: No TINT found on webpage for barcode %s",
PLUGIN_NAME, barcode)
continue
# Get genre, e.g.
# <a href="/genre.tango">tango</a>
if trackinfo[3] != "-":
genre = re.split("<|>", trackinfo[3])[2].title()
albuminfo[tint]['genre'] = genre
# Get date, e.g.
# <a class="date" href="/1941-08-14">1941-08-14</a>
if trackinfo[6] != "-":
date = re.split("<|>", trackinfo[6])[2]
albuminfo[tint]['date'] = date
# Get singers, e.g.
# <a href="/AlberDeluc">Alberto Castillo</a>
if trackinfo[5] != "-":
# Catch and strip <a> tags
vocal = re.sub("<[^>]*>", "", trackinfo[5])
# Get tango.info TINT, e.g.
# <a href="/00743216335725-1-4" title="TINT:00743216335725-1-4">
tint = re.findall(tint_regex, trackinfo[8])
if tint:
# Normalize TINT by stripping all leading slashes and zeros
tint = tint[0].lstrip("/").lstrip("0")
albuminfo[tint] = dict(genre=genre, date=date, vocal=vocal)
else:
# This really shouldn't happen
log.warning("%s: No TINT found on webpage for barcode %s",
PLUGIN_NAME, barcode)
albuminfo[tint]['vocal'] = vocal
return albuminfo or None

Loading…
Cancel
Save