Bug#631375: sonata: fetching lyrics fails, patch attached

Stefan Fleischmann Wed, 29 Jun 2011 16:39:16 -0700

This is the first patch I've ever submitted so be forgiving..

I changed the order so unescaping is done before lyrics extraction instead of 
after.
Then we can be sure to find "<lyrics>" and not "&lt;lyrics>" or some other 
mixed notation.


I also updated the url to lyrics.wikia.com, it gets redirected there anyways.


--- a/sonata/info.py    2011-06-30 00:51:46.000000000 +0200
+++ b/sonata/info.py    2011-06-30 01:13:05.000000000 +0200
@@ -350,7 +350,7 @@
        def lyricwiki_editlink(self, songinfo):
                artist, title = [self.lyricwiki_format(mpdh.get(songinfo, key))
                                 for key in ('artist', 'title')]
-               return "http://lyricwiki.org/index.php?title=%s:%s&action=edit"; 
% (artist, title)
+               return 
"http://lyrics.wikia.com/index.php?title=%s:%s&action=edit"; % (artist, title)
 
        def get_lyrics_thread(self, search_artist, search_title, 
filename_artist, filename_title, song_dir):
                filename_artist = misc.strip_all_slashes(filename_artist)
@@ -385,17 +385,18 @@
                else:
                        # Use default filename:
                        filename = self.target_lyrics_filename(filename_artist, 
filename_title, song_dir)
-                       # Fetch lyrics from lyricwiki.org
+                       # Fetch lyrics from lyrics.wikia.com (formerly 
lyricwiki.org)
                        gobject.idle_add(self.info_show_lyrics, _("Fetching 
lyrics..."), filename_artist, filename_title)
                        try:
-                               lyricpage = 
urllib.urlopen("http://lyricwiki.org/index.php?title=%s:%s&action=edit"; % 
(self.lyricwiki_format(search_artist), 
self.lyricwiki_format(search_title))).read()
+                               lyricpage = 
urllib.urlopen("http://lyrics.wikia.com/index.php?title=%s:%s&action=edit"; % 
(self.lyricwiki_format(search_artist), 
self.lyricwiki_format(search_title))).read()
                                content = re.split("<textarea[^>]*>", 
lyricpage)[1].split("</textarea>")[0]
                                if content.startswith("#REDIRECT [["):
-                                       addr = 
"http://lyricwiki.org/index.php?title=%s&action=edit"; % 
urllib.quote(content.split("[[")[1].split("]]")[0])
+                                       addr = 
"http://lyrics.wikia.com/index.php?title=%s&action=edit"; % 
urllib.quote(content.split("[[")[1].split("]]")[0])
                                        content = urllib.urlopen(addr).read()
-                               lyrics = 
content.split("&lt;lyrics&gt;")[1].split("&lt;/lyrics&gt;")[0]
-                               if lyrics.strip() != "&lt;!-- PUT LYRICS HERE 
(and delete this entire line) --&gt;":
-                                       lyrics = misc.unescape_html(lyrics)
+                               # To avoid problems with mixed 
escaped/unescaped characters unescape before extracting lyrics (fixes bug 
#631375)
+                               content = misc.unescape_html(content)
+                               lyrics = 
content.split("<lyrics>")[1].split("</lyrics>")[0]
+                               if lyrics.strip() != "<!-- PUT LYRICS HERE (and 
delete this entire line) -->":
                                        lyrics = misc.wiki_to_html(lyrics)
                                        lyrics = lyrics.decode("utf-8")
 # Save lyrics to file:

--- a/sonata/info.py	2011-06-30 00:51:46.000000000 +0200
+++ b/sonata/info.py	2011-06-30 01:13:05.000000000 +0200
@@ -350,7 +350,7 @@
 	def lyricwiki_editlink(self, songinfo):
 		artist, title = [self.lyricwiki_format(mpdh.get(songinfo, key))
 				 for key in ('artist', 'title')]
-		return "http://lyricwiki.org/index.php?title=%s:%s&action=edit"; % (artist, title)
+		return "http://lyrics.wikia.com/index.php?title=%s:%s&action=edit"; % (artist, title)
 
 	def get_lyrics_thread(self, search_artist, search_title, filename_artist, filename_title, song_dir):
 		filename_artist = misc.strip_all_slashes(filename_artist)
@@ -385,17 +385,18 @@
 		else:
 			# Use default filename:
 			filename = self.target_lyrics_filename(filename_artist, filename_title, song_dir)
-			# Fetch lyrics from lyricwiki.org
+			# Fetch lyrics from lyrics.wikia.com (formerly lyricwiki.org)
 			gobject.idle_add(self.info_show_lyrics, _("Fetching lyrics..."), filename_artist, filename_title)
 			try:
-				lyricpage = urllib.urlopen("http://lyricwiki.org/index.php?title=%s:%s&action=edit"; % (self.lyricwiki_format(search_artist), self.lyricwiki_format(search_title))).read()
+				lyricpage = urllib.urlopen("http://lyrics.wikia.com/index.php?title=%s:%s&action=edit"; % (self.lyricwiki_format(search_artist), self.lyricwiki_format(search_title))).read()
 				content = re.split("<textarea[^>]*>", lyricpage)[1].split("</textarea>")[0]
 				if content.startswith("#REDIRECT [["):
-					addr = "http://lyricwiki.org/index.php?title=%s&action=edit"; % urllib.quote(content.split("[[")[1].split("]]")[0])
+					addr = "http://lyrics.wikia.com/index.php?title=%s&action=edit"; % urllib.quote(content.split("[[")[1].split("]]")[0])
 					content = urllib.urlopen(addr).read()
-				lyrics = content.split("&lt;lyrics&gt;")[1].split("&lt;/lyrics&gt;")[0]
-				if lyrics.strip() != "&lt;!-- PUT LYRICS HERE (and delete this entire line) --&gt;":
-					lyrics = misc.unescape_html(lyrics)
+				# To avoid problems with mixed escaped/unescaped characters unescape before extracting lyrics (fixes bug #631375)
+				content = misc.unescape_html(content)
+				lyrics = content.split("<lyrics>")[1].split("</lyrics>")[0]
+				if lyrics.strip() != "<!-- PUT LYRICS HERE (and delete this entire line) -->":
 					lyrics = misc.wiki_to_html(lyrics)
 					lyrics = lyrics.decode("utf-8")
 # Save lyrics to file:

Bug#631375: sonata: fetching lyrics fails, patch attached

Reply via email to