mirror of
https://gitlab.com/mbunkus/mkvtoolnix.git
synced 2024-12-29 06:15:24 +00:00
SRT textsubs: try UTF-8 if no encoding was specified with fallback to system's encoding
Part of the implementation of #2246.
This commit is contained in:
parent
5ff7aed0b9
commit
1bb63b08bd
3
NEWS.md
3
NEWS.md
@ -6,6 +6,9 @@
|
||||
complete `gettext`. Together with an additional fix to how translation files
|
||||
are located, MKVToolNix can now use all interface languages on macOS,
|
||||
too. Fixes #2110, #2307, #2323.
|
||||
* mkvmerge: SRT text subtitles: for files for which no encoding has been
|
||||
specified, mkvmerge will try UTF-8 first before falling back to the system's
|
||||
default encoding. Part of the implementation of #2246.
|
||||
|
||||
## Bug fixes
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "common/codec.h"
|
||||
#include "common/strings/editing.h"
|
||||
#include "common/strings/parsing.h"
|
||||
#include "common/strings/utf8.h"
|
||||
#include "merge/connection_checks.h"
|
||||
#include "merge/output_control.h"
|
||||
#include "merge/packet_extensions.h"
|
||||
@ -32,8 +33,10 @@ textsubs_packetizer_c::textsubs_packetizer_c(generic_reader_c *p_reader,
|
||||
: generic_packetizer_c(p_reader, p_ti)
|
||||
, m_codec_id{codec_id}
|
||||
{
|
||||
if (recode)
|
||||
m_cc_utf8 = charset_converter_c::init(m_ti.m_sub_charset);
|
||||
if (recode) {
|
||||
m_cc_utf8 = charset_converter_c::init(m_ti.m_sub_charset);
|
||||
m_try_utf8 = m_ti.m_sub_charset.empty();
|
||||
}
|
||||
|
||||
set_track_type(track_subtitle);
|
||||
if (m_codec_id == MKV_S_TEXTUSF)
|
||||
@ -88,7 +91,10 @@ textsubs_packetizer_c::process(packet_cptr packet) {
|
||||
auto subs = std::string{reinterpret_cast<char *>(packet->data->get_buffer()), packet->data->get_size()};
|
||||
subs = chomp(normalize_line_endings(subs, m_line_ending_style));
|
||||
|
||||
if (m_cc_utf8)
|
||||
if (m_try_utf8 && !mtx::utf8::is_valid(subs))
|
||||
m_try_utf8 = false;
|
||||
|
||||
if (!m_try_utf8 && m_cc_utf8)
|
||||
subs = m_cc_utf8->utf8(subs);
|
||||
|
||||
packet->data = memory_c::borrow(subs);
|
||||
|
@ -23,6 +23,7 @@ protected:
|
||||
unsigned int m_packetno{};
|
||||
boost::optional<unsigned int> m_force_rerender_track_headers_on_packetno;
|
||||
charset_converter_cptr m_cc_utf8;
|
||||
bool m_try_utf8{};
|
||||
std::string m_codec_id;
|
||||
line_ending_style_e m_line_ending_style{line_ending_style_e::cr_lf};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user