commit: d42179d8d444cc35698e06d2d7520e6f7b383f17
Author: Alex Legler <alex <AT> a3li <DOT> li>
AuthorDate: Mon Feb 23 23:54:59 2015 +0000
Commit: Alex Legler <a3li <AT> gentoo <DOT> org>
CommitDate: Mon Feb 23 23:55:20 2015 +0000
URL: http://sources.gentoo.org/gitweb/?p=proj/ag.git;a=commit;h=d42179d8
Fall back to charlock_holmes; also add stub encoding extraction from the
message for later on
---
lib/rendering.rb | 22 ++++++++++++++++------
lib/utils.rb | 4 ++--
2 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/lib/rendering.rb b/lib/rendering.rb
index 3e77414..7649fcf 100644
--- a/lib/rendering.rb
+++ b/lib/rendering.rb
@@ -8,33 +8,43 @@ module Ag::Rendering
content_type = mime_split(mail.parts.first.content_type)
if content_type == 'text/plain' or content_type == 'text/html'
- to_content(content_type, mail.parts.first.decoded)
+ to_content(content_type, mail.parts.first.decoded,
get_encoding(mail.parts.first))
else
# Nested multipart?
if mail.parts.first.multipart?
content_type =
mime_split(mail.parts.first.parts.first.content_type)
if content_type == 'text/plain' or content_type == 'text/html'
- to_content(content_type, mail.parts.first.parts.first.decoded)
+ to_content(content_type, mail.parts.first.parts.first.decoded,
get_encoding(mail.parts.first.parts.first))
else
raise "Cannot find body: #{mail.message_id}"
end
# Specialty: Gnus/Emacs signed emails with no explicit multipart type
elsif mime_split(mail.content_type) == 'multipart/signed'
- to_content('text/plain', mail.parts.first.decoded)
+ to_content('text/plain', mail.parts.first.decoded,
get_encoding(mail.parts.first))
end
end
else
# No Content-Type, assume plain text (git-send-email)
if mail.content_type == nil
- to_content('text/plain', mail.body.decoded)
+ to_content('text/plain', mail.body.decoded, get_encoding(mail))
else
- to_content(mime_split(mail.content_type), mail.body.decoded)
+ to_content(mime_split(mail.content_type), mail.body.decoded,
get_encoding(mail))
end
end
end
- def self.to_content(content_type, content)
+ def self.get_encoding(part)
+ if part.content_type_parameters
+ part.content_type_parameters['charset']
+ else
+ nil
+ end
+ end
+
+ def self.to_content(content_type, content, charset = nil)
+ #content = content.force_encoding(charset) if charset
+
if content_type == 'text/plain'
escaped_content = CGI::escapeHTML(content)
escaped_content.lines.map do |line|
diff --git a/lib/utils.rb b/lib/utils.rb
index f8d546e..38349e0 100644
--- a/lib/utils.rb
+++ b/lib/utils.rb
@@ -8,13 +8,13 @@ module Ag
module Utils
module_function
- def fix_encoding(str)
+ def fix_encoding2(str)
s = str.encode('UTF-8', 'UTF-8', invalid: :replace, replace: '')
s = s.unpack('C*').pack('U*') unless s.valid_encoding?
s
end
- def fix_encoding_old(str, fail_hard = false)
+ def fix_encoding(str, fail_hard = false)
detection = CharlockHolmes::EncodingDetector.detect(str)
CharlockHolmes::Converter.convert(str, detection[:encoding], 'UTF-8')
rescue => e