From 1bb63b08bd1137c8054c58a3c719d57ddae263b1 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Wed, 20 Jun 2018 20:56:19 +0200 Subject: [PATCH] SRT textsubs: try UTF-8 if no encoding was specified with fallback to system's encoding Part of the implementation of #2246. --- NEWS.md | 3 +++ src/output/p_textsubs.cpp | 12 +++++++++--- src/output/p_textsubs.h | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 9e3dad496..067b5df71 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,9 @@ complete `gettext`. Together with an additional fix to how translation files are located, MKVToolNix can now use all interface languages on macOS, too. Fixes #2110, #2307, #2323. +* mkvmerge: SRT text subtitles: for files for which no encoding has been + specified, mkvmerge will try UTF-8 first before falling back to the system's + default encoding. Part of the implementation of #2246. ## Bug fixes diff --git a/src/output/p_textsubs.cpp b/src/output/p_textsubs.cpp index f413de2c5..988e15e41 100644 --- a/src/output/p_textsubs.cpp +++ b/src/output/p_textsubs.cpp @@ -18,6 +18,7 @@ #include "common/codec.h" #include "common/strings/editing.h" #include "common/strings/parsing.h" +#include "common/strings/utf8.h" #include "merge/connection_checks.h" #include "merge/output_control.h" #include "merge/packet_extensions.h" @@ -32,8 +33,10 @@ textsubs_packetizer_c::textsubs_packetizer_c(generic_reader_c *p_reader, : generic_packetizer_c(p_reader, p_ti) , m_codec_id{codec_id} { - if (recode) - m_cc_utf8 = charset_converter_c::init(m_ti.m_sub_charset); + if (recode) { + m_cc_utf8 = charset_converter_c::init(m_ti.m_sub_charset); + m_try_utf8 = m_ti.m_sub_charset.empty(); + } set_track_type(track_subtitle); if (m_codec_id == MKV_S_TEXTUSF) @@ -88,7 +91,10 @@ textsubs_packetizer_c::process(packet_cptr packet) { auto subs = std::string{reinterpret_cast(packet->data->get_buffer()), packet->data->get_size()}; subs = chomp(normalize_line_endings(subs, m_line_ending_style)); - if (m_cc_utf8) + if (m_try_utf8 && !mtx::utf8::is_valid(subs)) + m_try_utf8 = false; + + if (!m_try_utf8 && m_cc_utf8) subs = m_cc_utf8->utf8(subs); packet->data = memory_c::borrow(subs); diff --git a/src/output/p_textsubs.h b/src/output/p_textsubs.h index 51ef508a5..f28294b38 100644 --- a/src/output/p_textsubs.h +++ b/src/output/p_textsubs.h @@ -23,6 +23,7 @@ protected: unsigned int m_packetno{}; boost::optional m_force_rerender_track_headers_on_packetno; charset_converter_cptr m_cc_utf8; + bool m_try_utf8{}; std::string m_codec_id; line_ending_style_e m_line_ending_style{line_ending_style_e::cr_lf};