This is the first patch I've ever submitted so be forgiving.. I changed the order so unescaping is done before lyrics extraction instead of after. Then we can be sure to find "<lyrics>" and not "<lyrics>" or some other mixed notation.
I also updated the url to lyrics.wikia.com, it gets redirected there anyways. --- a/sonata/info.py 2011-06-30 00:51:46.000000000 +0200 +++ b/sonata/info.py 2011-06-30 01:13:05.000000000 +0200 @@ -350,7 +350,7 @@ def lyricwiki_editlink(self, songinfo): artist, title = [self.lyricwiki_format(mpdh.get(songinfo, key)) for key in ('artist', 'title')] - return "http://lyricwiki.org/index.php?title=%s:%s&action=edit" % (artist, title) + return "http://lyrics.wikia.com/index.php?title=%s:%s&action=edit" % (artist, title) def get_lyrics_thread(self, search_artist, search_title, filename_artist, filename_title, song_dir): filename_artist = misc.strip_all_slashes(filename_artist) @@ -385,17 +385,18 @@ else: # Use default filename: filename = self.target_lyrics_filename(filename_artist, filename_title, song_dir) - # Fetch lyrics from lyricwiki.org + # Fetch lyrics from lyrics.wikia.com (formerly lyricwiki.org) gobject.idle_add(self.info_show_lyrics, _("Fetching lyrics..."), filename_artist, filename_title) try: - lyricpage = urllib.urlopen("http://lyricwiki.org/index.php?title=%s:%s&action=edit" % (self.lyricwiki_format(search_artist), self.lyricwiki_format(search_title))).read() + lyricpage = urllib.urlopen("http://lyrics.wikia.com/index.php?title=%s:%s&action=edit" % (self.lyricwiki_format(search_artist), self.lyricwiki_format(search_title))).read() content = re.split("<textarea[^>]*>", lyricpage)[1].split("</textarea>")[0] if content.startswith("#REDIRECT [["): - addr = "http://lyricwiki.org/index.php?title=%s&action=edit" % urllib.quote(content.split("[[")[1].split("]]")[0]) + addr = "http://lyrics.wikia.com/index.php?title=%s&action=edit" % urllib.quote(content.split("[[")[1].split("]]")[0]) content = urllib.urlopen(addr).read() - lyrics = content.split("<lyrics>")[1].split("</lyrics>")[0] - if lyrics.strip() != "<!-- PUT LYRICS HERE (and delete this entire line) -->": - lyrics = misc.unescape_html(lyrics) + # To avoid problems with mixed escaped/unescaped characters unescape before extracting lyrics (fixes bug #631375) + content = misc.unescape_html(content) + lyrics = content.split("<lyrics>")[1].split("</lyrics>")[0] + if lyrics.strip() != "<!-- PUT LYRICS HERE (and delete this entire line) -->": lyrics = misc.wiki_to_html(lyrics) lyrics = lyrics.decode("utf-8") # Save lyrics to file:
--- a/sonata/info.py 2011-06-30 00:51:46.000000000 +0200 +++ b/sonata/info.py 2011-06-30 01:13:05.000000000 +0200 @@ -350,7 +350,7 @@ def lyricwiki_editlink(self, songinfo): artist, title = [self.lyricwiki_format(mpdh.get(songinfo, key)) for key in ('artist', 'title')] - return "http://lyricwiki.org/index.php?title=%s:%s&action=edit" % (artist, title) + return "http://lyrics.wikia.com/index.php?title=%s:%s&action=edit" % (artist, title) def get_lyrics_thread(self, search_artist, search_title, filename_artist, filename_title, song_dir): filename_artist = misc.strip_all_slashes(filename_artist) @@ -385,17 +385,18 @@ else: # Use default filename: filename = self.target_lyrics_filename(filename_artist, filename_title, song_dir) - # Fetch lyrics from lyricwiki.org + # Fetch lyrics from lyrics.wikia.com (formerly lyricwiki.org) gobject.idle_add(self.info_show_lyrics, _("Fetching lyrics..."), filename_artist, filename_title) try: - lyricpage = urllib.urlopen("http://lyricwiki.org/index.php?title=%s:%s&action=edit" % (self.lyricwiki_format(search_artist), self.lyricwiki_format(search_title))).read() + lyricpage = urllib.urlopen("http://lyrics.wikia.com/index.php?title=%s:%s&action=edit" % (self.lyricwiki_format(search_artist), self.lyricwiki_format(search_title))).read() content = re.split("<textarea[^>]*>", lyricpage)[1].split("</textarea>")[0] if content.startswith("#REDIRECT [["): - addr = "http://lyricwiki.org/index.php?title=%s&action=edit" % urllib.quote(content.split("[[")[1].split("]]")[0]) + addr = "http://lyrics.wikia.com/index.php?title=%s&action=edit" % urllib.quote(content.split("[[")[1].split("]]")[0]) content = urllib.urlopen(addr).read() - lyrics = content.split("<lyrics>")[1].split("</lyrics>")[0] - if lyrics.strip() != "<!-- PUT LYRICS HERE (and delete this entire line) -->": - lyrics = misc.unescape_html(lyrics) + # To avoid problems with mixed escaped/unescaped characters unescape before extracting lyrics (fixes bug #631375) + content = misc.unescape_html(content) + lyrics = content.split("<lyrics>")[1].split("</lyrics>")[0] + if lyrics.strip() != "<!-- PUT LYRICS HERE (and delete this entire line) -->": lyrics = misc.wiki_to_html(lyrics) lyrics = lyrics.decode("utf-8") # Save lyrics to file: