diff options
author | bendikro <bendikro@gmail.com> | 2012-11-25 13:01:12 +0100 |
---|---|---|
committer | bendikro <bendikro@gmail.com> | 2012-11-25 20:33:00 +0100 |
commit | 60f196ff933795980e62d579da6713df7e76dc1f (patch) | |
tree | b447a1857937dd4dea5199b08e5fb3fba8223604 | |
parent | ffb902ba0644f3c45bb80cd46818d2d1e628e3fb (diff) | |
download | deluge-60f196ff93.tar.gz deluge-60f196ff93.tar.bz2 deluge-60f196ff93.zip |
Changed decode_string to always return unicode.
-rw-r--r-- | deluge/common.py | 33 | ||||
-rw-r--r-- | deluge/ui/common.py | 10 |
2 files changed, 29 insertions, 14 deletions
diff --git a/deluge/common.py b/deluge/common.py index ea615df20..9a91531ae 100644 --- a/deluge/common.py +++ b/deluge/common.py @@ -608,22 +608,34 @@ def xml_encode(string): def decode_string(s, encoding="utf8"): """ - Decodes a string and re-encodes it in utf8. If it cannot decode using - `:param:encoding` then it will try to detect the string encoding and - decode it. + Decodes a string and return unicode. If it cannot decode using + `:param:encoding` then it will try latin1, and if that fails, + try to detect the string encoding. If that fails, decode with + ignore. :param s: string to decode :type s: string :keyword encoding: the encoding to use in the decoding :type encoding: string + :returns: s converted to unicode + :rtype: unicode """ + if not s: + return u'' + elif isinstance(s, unicode): + return s - try: - s = s.decode(encoding).encode("utf8", "ignore") - except UnicodeDecodeError: - s = s.decode(chardet.detect(s)["encoding"], "ignore").encode("utf8", "ignore") - return s + encodings = [(encoding, 'strict'), ("utf8", 'strict'), + ("iso-8859-1", 'strict'), + (chardet.detect(s)["encoding"], 'strict'), + (chardet.detect(s)["encoding"], 'ignore')] + for i in range(len(encodings)): + try: + return s.decode(encodings[i][0], encodings[i][1]) + except UnicodeDecodeError: + pass + return u'' def utf8_encoded(s): """ @@ -636,7 +648,10 @@ def utf8_encoded(s): """ if isinstance(s, str): - s = decode_string(s) + try: + s = decode_string(s).encode("utf8") + except UnicodeEncodeError: + log.warn("Error when encoding to utf8: %s" % s) elif isinstance(s, unicode): s = s.encode("utf8", "ignore") return s diff --git a/deluge/ui/common.py b/deluge/ui/common.py index 3b754cef7..000a85064 100644 --- a/deluge/ui/common.py +++ b/deluge/ui/common.py @@ -51,7 +51,7 @@ except ImportError: from sha import sha from deluge import bencode -from deluge.common import decode_string, path_join +from deluge.common import utf8_encoded, path_join import deluge.configmanager log = logging.getLogger(__name__) @@ -88,9 +88,9 @@ class TorrentInfo(object): # Check if 'name.utf-8' is in the torrent and if not try to decode the string # using the encoding found. if "name.utf-8" in self.__m_metadata["info"]: - self.__m_name = decode_string(self.__m_metadata["info"]["name.utf-8"]) + self.__m_name = utf8_encoded(self.__m_metadata["info"]["name.utf-8"]) else: - self.__m_name = decode_string(self.__m_metadata["info"]["name"], self.encoding) + self.__m_name = utf8_encoded(self.__m_metadata["info"]["name"], self.encoding) # Get list of files from torrent info paths = {} @@ -104,7 +104,7 @@ class TorrentInfo(object): if "path.utf-8" in f: path = os.path.join(prefix, *f["path.utf-8"]) else: - path = decode_string(os.path.join(prefix, decode_string(os.path.join(*f["path"]), self.encoding)), self.encoding) + path = utf8_encoded(os.path.join(prefix, utf8_encoded(os.path.join(*f["path"]), self.encoding)), self.encoding) f["index"] = index paths[path] = f @@ -160,7 +160,7 @@ class TorrentInfo(object): if "path.utf-8" in f: path = os.path.join(prefix, *f["path.utf-8"]) else: - path = decode_string(os.path.join(prefix, decode_string(os.path.join(*f["path"]), self.encoding)), self.encoding) + path = utf8_encoded(os.path.join(prefix, utf8_encoded(os.path.join(*f["path"]), self.encoding)), self.encoding) self.__m_files.append({ 'path': path, 'size': f["length"], |