SRT textsubs: try UTF-8 if no encoding was specified with fallback to system's encoding

Part of the implementation of #2246.
This commit is contained in:
Moritz Bunkus 2018-06-20 20:56:19 +02:00
parent 5ff7aed0b9
commit 1bb63b08bd
No known key found for this signature in database
GPG Key ID: 74AF00ADF2E32C85
3 changed files with 13 additions and 3 deletions

View File

@ -6,6 +6,9 @@
complete `gettext`. Together with an additional fix to how translation files
are located, MKVToolNix can now use all interface languages on macOS,
too. Fixes #2110, #2307, #2323.
* mkvmerge: SRT text subtitles: for files for which no encoding has been
specified, mkvmerge will try UTF-8 first before falling back to the system's
default encoding. Part of the implementation of #2246.
## Bug fixes

View File

@ -18,6 +18,7 @@
#include "common/codec.h"
#include "common/strings/editing.h"
#include "common/strings/parsing.h"
#include "common/strings/utf8.h"
#include "merge/connection_checks.h"
#include "merge/output_control.h"
#include "merge/packet_extensions.h"
@ -32,8 +33,10 @@ textsubs_packetizer_c::textsubs_packetizer_c(generic_reader_c *p_reader,
: generic_packetizer_c(p_reader, p_ti)
, m_codec_id{codec_id}
{
if (recode)
m_cc_utf8 = charset_converter_c::init(m_ti.m_sub_charset);
if (recode) {
m_cc_utf8 = charset_converter_c::init(m_ti.m_sub_charset);
m_try_utf8 = m_ti.m_sub_charset.empty();
}
set_track_type(track_subtitle);
if (m_codec_id == MKV_S_TEXTUSF)
@ -88,7 +91,10 @@ textsubs_packetizer_c::process(packet_cptr packet) {
auto subs = std::string{reinterpret_cast<char *>(packet->data->get_buffer()), packet->data->get_size()};
subs = chomp(normalize_line_endings(subs, m_line_ending_style));
if (m_cc_utf8)
if (m_try_utf8 && !mtx::utf8::is_valid(subs))
m_try_utf8 = false;
if (!m_try_utf8 && m_cc_utf8)
subs = m_cc_utf8->utf8(subs);
packet->data = memory_c::borrow(subs);

View File

@ -23,6 +23,7 @@ protected:
unsigned int m_packetno{};
boost::optional<unsigned int> m_force_rerender_track_headers_on_packetno;
charset_converter_cptr m_cc_utf8;
bool m_try_utf8{};
std::string m_codec_id;
line_ending_style_e m_line_ending_style{line_ending_style_e::cr_lf};