SRT, SSA readers: ignore --sub-charset for files containing a BOM

Fixes #1620.
This commit is contained in:
Moritz Bunkus 2016-03-09 20:15:15 +01:00
parent cddbc18ea1
commit 8ca1160de0
6 changed files with 22 additions and 8 deletions

View File

@ -1,3 +1,9 @@
2016-03-09 Moritz Bunkus <moritz@bunkus.org>
* mkvmerge: bug fix: the --sub-charset option is now ignored for
text subtitle files that start with a byte-order mark (BOM)
bringing the behavior in line with the documentation. Fixes #1620.
2016-03-07 Moritz Bunkus <moritz@bunkus.org>
* mkvextract: new feature: implemented the extraction of Big

View File

@ -48,9 +48,9 @@ ssa_reader_c::read_headers() {
if (!ssa_reader_c::probe_file(text_in.get(), 0))
throw mtx::input::invalid_format_x();
charset_converter_cptr cc_utf8 = mtx::includes(m_ti.m_sub_charsets, 0) ? charset_converter_c::init(m_ti.m_sub_charsets[ 0])
charset_converter_cptr cc_utf8 = text_in->get_byte_order() != BO_NONE ? charset_converter_c::init("UTF-8")
: mtx::includes(m_ti.m_sub_charsets, 0) ? charset_converter_c::init(m_ti.m_sub_charsets[ 0])
: mtx::includes(m_ti.m_sub_charsets, -1) ? charset_converter_c::init(m_ti.m_sub_charsets[-1])
: text_in->get_byte_order() != BO_NONE ? charset_converter_c::init("UTF-8")
: g_cc_local_utf8;
m_ti.m_id = 0;

View File

@ -36,11 +36,10 @@ textsubs_packetizer_c::textsubs_packetizer_c(generic_reader_c *p_reader,
bool is_utf8)
: generic_packetizer_c(p_reader, p_ti)
, m_packetno{}
, m_codec_id(codec_id)
, m_recode(recode)
, m_codec_id{codec_id}
{
if (m_recode)
m_cc_utf8 = charset_converter_c::init((m_ti.m_sub_charset != "") || !is_utf8 ? m_ti.m_sub_charset : "UTF-8");
if (recode && !is_utf8 && !m_ti.m_sub_charset.empty())
m_cc_utf8 = charset_converter_c::init(m_ti.m_sub_charset);
set_track_type(track_subtitle);
if (m_codec_id == MKV_S_TEXTUSF)
@ -93,7 +92,7 @@ textsubs_packetizer_c::process(packet_cptr packet) {
subs = boost::regex_replace(subs, s_re_remove_trailing_nl, "", boost::match_default | boost::match_single_line);
subs = boost::regex_replace(subs, s_re_translate_nl, "\r\n", boost::match_default | boost::match_single_line);
if (m_recode)
if (m_cc_utf8)
subs = m_cc_utf8->utf8(subs);
packet->data = memory_cptr(new memory_c((unsigned char *)subs.c_str(), subs.length(), false));

View File

@ -24,7 +24,6 @@ private:
boost::optional<unsigned int> m_force_rerender_track_headers_on_packetno;
charset_converter_cptr m_cc_utf8;
std::string m_codec_id;
bool m_recode;
public:
textsubs_packetizer_c(generic_reader_c *p_reader, track_info_c &p_ti, const char *codec_id, bool recode, bool is_utf8);

View File

@ -382,3 +382,4 @@ T_533chapter_generation_interval:63486951fe0717eec1e93cb8fadaed92-5a57214bb210f5
T_534chapter_generation_when_appending_audio_only:000e4bfced4fae128abbb741545768c8-39028cc1508e5a37f879b3a459f3dbd2-fb20e8f516702d6a0fb9299120e6b508+e66dbfdf351112c62aa65faccdce8ee1+c4dc3cd790ee8397d0a6bdca84091981+3d716d93d0178aeabc66111b4dc10d9a+4fe4b6a15803e0c8d400ce07b4a9d48e+8b5ccf0e1b9fcba119d40727b9b7e8e4+f3f355cb0549efabdd9954f4448fc48d+ok-e668981666602602e2ed5a1b8d47f6db:passed:20160302-130929:0.645667954
T_535chapter_generation_interval_audio_only:4231b50be18c4320584d7e18c611431d-7149e4b581f601145db038035aba3ec4-7b37e30bfede450fec2a5e7b1e982b35+bb66d81871625190fbd7b15b02f6ca57+1285f17cbdb1259606bd7174e993b3f1+c0877adb7bf88212aae2212ab819cf57+78fcd2002d1a48752c5a8e4730cc2158+b2a259375cb03683b7fe6ffe95a53e21+f3f355cb0549efabdd9954f4448fc48d+ok-8f16b1baaedec4b22df0f566b39a105e:passed:20160302-131002:0.64531153
T_536extract_big_endian_pcm:8e57291db3e924e9bb45acb306426a0a:passed:20160307-190156:0.015845204
T_537srt_bom_precedence_over_sub_charset:9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-32eaa074a254eab81b90bd97be50c425:passed:20160309-180444:0.036282259

View File

@ -0,0 +1,9 @@
#!/usr/bin/ruby -w
# T_537srt_bom_precedence_over_sub_charset
describe "mkvmerge / SRT: BOMs have precedence over --sub-charset"
test_merge "--sub-charset 0:ISO-8859-15 data/subtitles/srt/vde-utf-8-bom.srt"
test_merge " data/subtitles/srt/vde-utf-8-bom.srt"
test_merge "--sub-charset 0:ISO-8859-15 data/subtitles/srt/vde.srt"
test_merge " data/subtitles/srt/vde.srt"