summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCalum Lind <calumlind+deluge@gmail.com>2021-02-20 19:39:07 +0000
committerCalum Lind <calumlind+deluge@gmail.com>2021-02-20 21:18:32 +0000
commit4d970754a4a700ba45409e11275b89e6523d87ce (patch)
tree09d49caf9e8f749bf1c52ab053e6f6f64fa4b04d
parentf331b6c754dc23797b1ebccda1e38dd048ed0468 (diff)
downloaddeluge-4d970754a4a700ba45409e11275b89e6523d87ce.tar.gz
deluge-4d970754a4a700ba45409e11275b89e6523d87ce.tar.bz2
deluge-4d970754a4a700ba45409e11275b89e6523d87ce.zip
[#3440] Fix httpdownloader reencoding torrent file downloads
Torrent downloads from rutracker responds with the header: Content-Type: application/x-bittorrent; charset=Windows-1251 The problem is that httpdownloader was using the charset to re-encode the downloaded file, corrupting the binary torrent file download. Fixed by only re-encoding text content types, since it is very rare that non-text content types would actually have a non-utf8 codeset and if there is a requirement we would need to determine it on a type by type basis.
-rw-r--r--deluge/httpdownloader.py9
-rw-r--r--deluge/tests/test_httpdownloader.py55
2 files changed, 55 insertions, 9 deletions
diff --git a/deluge/httpdownloader.py b/deluge/httpdownloader.py
index b4acd0784..f0fe09ec0 100644
--- a/deluge/httpdownloader.py
+++ b/deluge/httpdownloader.py
@@ -151,9 +151,12 @@ class HTTPDownloaderAgent(object):
self.filename = new_file_name
- cont_type = headers.getRawHeaders(b'content-type')[0].decode()
- params = cgi.parse_header(cont_type)[1]
- encoding = params.get('charset', None)
+ cont_type_header = headers.getRawHeaders(b'content-type')[0].decode()
+ cont_type, params = cgi.parse_header(cont_type_header)
+ # Only re-ecode text content types.
+ encoding = None
+ if cont_type.startswith('text/'):
+ encoding = params.get('charset', None)
response.deliverBody(
BodyHandler(response.request, finished, body_length, self, encoding)
)
diff --git a/deluge/tests/test_httpdownloader.py b/deluge/tests/test_httpdownloader.py
index a503e46de..2a52744e1 100644
--- a/deluge/tests/test_httpdownloader.py
+++ b/deluge/tests/test_httpdownloader.py
@@ -9,6 +9,7 @@ from __future__ import unicode_literals
import tempfile
from email.utils import formatdate
+from io import open
from twisted.internet import reactor
from twisted.internet.error import CannotListenError
@@ -47,9 +48,30 @@ class RenameResource(Resource):
class AttachmentResource(Resource):
def render(self, request):
- request.setHeader(b'Content-Type', b'text/plain')
+ content_type = b'text/plain'
+ charset = request.getHeader(b'content-charset')
+ if charset:
+ content_type += b'; charset=' + charset
+ request.setHeader(b'Content-Type', content_type)
request.setHeader(b'Content-Disposition', b'attachment')
- return b'Attachement with no filename set'
+ append = request.getHeader(b'content-append') or b''
+ content = 'Attachment with no filename set{}'.format(append.decode('utf8'))
+ return (
+ content.encode(charset.decode('utf8'))
+ if charset
+ else content.encode('utf8')
+ )
+
+
+class TorrentResource(Resource):
+ def render(self, request):
+ content_type = b'application/x-bittorrent'
+ charset = request.getHeader(b'content-charset')
+ if charset:
+ content_type += b'; charset=' + charset
+ request.setHeader(b'Content-Type', content_type)
+ request.setHeader(b'Content-Disposition', b'attachment; filename=test.torrent')
+ return 'Binary attachment ignore charset 世丕且\n'.encode('utf8')
class CookieResource(Resource):
@@ -101,6 +123,7 @@ class TopLevelResource(Resource):
self.putChild(b'redirect', self.redirect_rsrc)
self.putChild(b'rename', RenameResource())
self.putChild(b'attachment', AttachmentResource())
+ self.putChild(b'torrent', TorrentResource())
self.putChild(b'partial', PartialDownloadResource())
def getChild(self, path, request): # NOQA: N802
@@ -110,7 +133,7 @@ class TopLevelResource(Resource):
return Resource.getChild(self, path, request)
def render(self, request):
- if request.getHeader('If-Modified-Since'):
+ if request.getHeader(b'If-Modified-Since'):
request.setResponseCode(NOT_MODIFIED)
return b'<h1>Deluge HTTP Downloader tests webserver here</h1>'
@@ -139,7 +162,7 @@ class DownloadFileTestCase(unittest.TestCase):
return self.webserver.stopListening()
def assertContains(self, filename, contents): # NOQA
- with open(filename) as _file:
+ with open(filename, 'r', encoding='utf8') as _file:
try:
self.assertEqual(_file.read(), contents)
except Exception as ex:
@@ -147,7 +170,7 @@ class DownloadFileTestCase(unittest.TestCase):
return filename
def assertNotContains(self, filename, contents, file_mode=''): # NOQA
- with open(filename, file_mode) as _file:
+ with open(filename, 'r', encoding='utf8') as _file:
try:
self.assertNotEqual(_file.read(), contents)
except Exception as ex:
@@ -212,7 +235,7 @@ class DownloadFileTestCase(unittest.TestCase):
url = self.get_url('attachment')
d = download_file(url, fname('original'))
d.addCallback(self.assertEqual, fname('original'))
- d.addCallback(self.assertContains, 'Attachement with no filename set')
+ d.addCallback(self.assertContains, 'Attachment with no filename set')
return d
def test_download_with_rename_prevented(self):
@@ -264,3 +287,23 @@ class DownloadFileTestCase(unittest.TestCase):
d.addCallback(self.fail)
d.addErrback(self.assertIsInstance, Failure)
return d
+
+ def test_download_text_reencode_charset(self):
+ """Re-encode as UTF-8 specified charset for text content-type header"""
+ url = self.get_url('attachment')
+ filepath = fname('test.txt')
+ headers = {'content-charset': 'Windows-1251', 'content-append': 'бвгде'}
+ d = download_file(url, filepath, headers=headers)
+ d.addCallback(self.assertEqual, filepath)
+ d.addCallback(self.assertContains, 'Attachment with no filename setбвгде')
+ return d
+
+ def test_download_binary_ignore_charset(self):
+ """Ignore charset for binary content-type header e.g. torrent files"""
+ url = self.get_url('torrent')
+ headers = {'content-charset': 'Windows-1251'}
+ filepath = fname('test.torrent')
+ d = download_file(url, fname('test.torrent'), headers=headers)
+ d.addCallback(self.assertEqual, filepath)
+ d.addCallback(self.assertContains, 'Binary attachment ignore charset 世丕且\n')
+ return d