The 3GPP Timed Text (TTXT / tx3g / mov_text) specification counts multibyte
UTF-8 characters as one single character, ffmpeg currently counts bytes. This
patch inserts an if test such that:
1. continuation bytes are not counted during decoding
2. style boxes will not split these characters
Fixes trac #6021 (decoding part).
---
libavcodec/movtextdec.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/libavcodec/movtextdec.c b/libavcodec/movtextdec.c
index 6de1500..2c7a204 100644
--- a/libavcodec/movtextdec.c
+++ b/libavcodec/movtextdec.c
@@ -342,6 +342,7 @@ static int text_to_ass(AVBPrint *buf, const char *text,
const char *text_end,
}
while (text < text_end) {
+ if ((*text & 0xC0) != 0x80) { /* Boxes never split multibyte chars */
if (m->box_flags & STYL_BOX) {
for (i = 0; i < m->style_entries; i++) {
if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) {
@@ -387,6 +388,8 @@ static int text_to_ass(AVBPrint *buf, const char *text,
const char *text_end,
}
}
}
+ text_pos++;
+ }
switch (*text) {
case '\r':
@@ -399,7 +402,6 @@ static int text_to_ass(AVBPrint *buf, const char *text,
const char *text_end,
break;
}
text++;
- text_pos++;
}
return 0;
--
1.9.5 (Apple Git-50.3)
_______________________________________________
ffmpeg-devel mailing list
[email protected]
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel