mirror of
https://gitlab.com/mbunkus/mkvtoolnix.git
synced 2025-01-12 13:14:38 +00:00
SRT, SSA readers: ignore --sub-charset for files containing a BOM
Fixes #1620.
This commit is contained in:
parent
cddbc18ea1
commit
8ca1160de0
@ -1,3 +1,9 @@
|
|||||||
|
2016-03-09 Moritz Bunkus <moritz@bunkus.org>
|
||||||
|
|
||||||
|
* mkvmerge: bug fix: the --sub-charset option is now ignored for
|
||||||
|
text subtitle files that start with a byte-order mark (BOM)
|
||||||
|
bringing the behavior in line with the documentation. Fixes #1620.
|
||||||
|
|
||||||
2016-03-07 Moritz Bunkus <moritz@bunkus.org>
|
2016-03-07 Moritz Bunkus <moritz@bunkus.org>
|
||||||
|
|
||||||
* mkvextract: new feature: implemented the extraction of Big
|
* mkvextract: new feature: implemented the extraction of Big
|
||||||
|
@ -48,9 +48,9 @@ ssa_reader_c::read_headers() {
|
|||||||
if (!ssa_reader_c::probe_file(text_in.get(), 0))
|
if (!ssa_reader_c::probe_file(text_in.get(), 0))
|
||||||
throw mtx::input::invalid_format_x();
|
throw mtx::input::invalid_format_x();
|
||||||
|
|
||||||
charset_converter_cptr cc_utf8 = mtx::includes(m_ti.m_sub_charsets, 0) ? charset_converter_c::init(m_ti.m_sub_charsets[ 0])
|
charset_converter_cptr cc_utf8 = text_in->get_byte_order() != BO_NONE ? charset_converter_c::init("UTF-8")
|
||||||
|
: mtx::includes(m_ti.m_sub_charsets, 0) ? charset_converter_c::init(m_ti.m_sub_charsets[ 0])
|
||||||
: mtx::includes(m_ti.m_sub_charsets, -1) ? charset_converter_c::init(m_ti.m_sub_charsets[-1])
|
: mtx::includes(m_ti.m_sub_charsets, -1) ? charset_converter_c::init(m_ti.m_sub_charsets[-1])
|
||||||
: text_in->get_byte_order() != BO_NONE ? charset_converter_c::init("UTF-8")
|
|
||||||
: g_cc_local_utf8;
|
: g_cc_local_utf8;
|
||||||
|
|
||||||
m_ti.m_id = 0;
|
m_ti.m_id = 0;
|
||||||
|
@ -36,11 +36,10 @@ textsubs_packetizer_c::textsubs_packetizer_c(generic_reader_c *p_reader,
|
|||||||
bool is_utf8)
|
bool is_utf8)
|
||||||
: generic_packetizer_c(p_reader, p_ti)
|
: generic_packetizer_c(p_reader, p_ti)
|
||||||
, m_packetno{}
|
, m_packetno{}
|
||||||
, m_codec_id(codec_id)
|
, m_codec_id{codec_id}
|
||||||
, m_recode(recode)
|
|
||||||
{
|
{
|
||||||
if (m_recode)
|
if (recode && !is_utf8 && !m_ti.m_sub_charset.empty())
|
||||||
m_cc_utf8 = charset_converter_c::init((m_ti.m_sub_charset != "") || !is_utf8 ? m_ti.m_sub_charset : "UTF-8");
|
m_cc_utf8 = charset_converter_c::init(m_ti.m_sub_charset);
|
||||||
|
|
||||||
set_track_type(track_subtitle);
|
set_track_type(track_subtitle);
|
||||||
if (m_codec_id == MKV_S_TEXTUSF)
|
if (m_codec_id == MKV_S_TEXTUSF)
|
||||||
@ -93,7 +92,7 @@ textsubs_packetizer_c::process(packet_cptr packet) {
|
|||||||
subs = boost::regex_replace(subs, s_re_remove_trailing_nl, "", boost::match_default | boost::match_single_line);
|
subs = boost::regex_replace(subs, s_re_remove_trailing_nl, "", boost::match_default | boost::match_single_line);
|
||||||
subs = boost::regex_replace(subs, s_re_translate_nl, "\r\n", boost::match_default | boost::match_single_line);
|
subs = boost::regex_replace(subs, s_re_translate_nl, "\r\n", boost::match_default | boost::match_single_line);
|
||||||
|
|
||||||
if (m_recode)
|
if (m_cc_utf8)
|
||||||
subs = m_cc_utf8->utf8(subs);
|
subs = m_cc_utf8->utf8(subs);
|
||||||
|
|
||||||
packet->data = memory_cptr(new memory_c((unsigned char *)subs.c_str(), subs.length(), false));
|
packet->data = memory_cptr(new memory_c((unsigned char *)subs.c_str(), subs.length(), false));
|
||||||
|
@ -24,7 +24,6 @@ private:
|
|||||||
boost::optional<unsigned int> m_force_rerender_track_headers_on_packetno;
|
boost::optional<unsigned int> m_force_rerender_track_headers_on_packetno;
|
||||||
charset_converter_cptr m_cc_utf8;
|
charset_converter_cptr m_cc_utf8;
|
||||||
std::string m_codec_id;
|
std::string m_codec_id;
|
||||||
bool m_recode;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
textsubs_packetizer_c(generic_reader_c *p_reader, track_info_c &p_ti, const char *codec_id, bool recode, bool is_utf8);
|
textsubs_packetizer_c(generic_reader_c *p_reader, track_info_c &p_ti, const char *codec_id, bool recode, bool is_utf8);
|
||||||
|
@ -382,3 +382,4 @@ T_533chapter_generation_interval:63486951fe0717eec1e93cb8fadaed92-5a57214bb210f5
|
|||||||
T_534chapter_generation_when_appending_audio_only:000e4bfced4fae128abbb741545768c8-39028cc1508e5a37f879b3a459f3dbd2-fb20e8f516702d6a0fb9299120e6b508+e66dbfdf351112c62aa65faccdce8ee1+c4dc3cd790ee8397d0a6bdca84091981+3d716d93d0178aeabc66111b4dc10d9a+4fe4b6a15803e0c8d400ce07b4a9d48e+8b5ccf0e1b9fcba119d40727b9b7e8e4+f3f355cb0549efabdd9954f4448fc48d+ok-e668981666602602e2ed5a1b8d47f6db:passed:20160302-130929:0.645667954
|
T_534chapter_generation_when_appending_audio_only:000e4bfced4fae128abbb741545768c8-39028cc1508e5a37f879b3a459f3dbd2-fb20e8f516702d6a0fb9299120e6b508+e66dbfdf351112c62aa65faccdce8ee1+c4dc3cd790ee8397d0a6bdca84091981+3d716d93d0178aeabc66111b4dc10d9a+4fe4b6a15803e0c8d400ce07b4a9d48e+8b5ccf0e1b9fcba119d40727b9b7e8e4+f3f355cb0549efabdd9954f4448fc48d+ok-e668981666602602e2ed5a1b8d47f6db:passed:20160302-130929:0.645667954
|
||||||
T_535chapter_generation_interval_audio_only:4231b50be18c4320584d7e18c611431d-7149e4b581f601145db038035aba3ec4-7b37e30bfede450fec2a5e7b1e982b35+bb66d81871625190fbd7b15b02f6ca57+1285f17cbdb1259606bd7174e993b3f1+c0877adb7bf88212aae2212ab819cf57+78fcd2002d1a48752c5a8e4730cc2158+b2a259375cb03683b7fe6ffe95a53e21+f3f355cb0549efabdd9954f4448fc48d+ok-8f16b1baaedec4b22df0f566b39a105e:passed:20160302-131002:0.64531153
|
T_535chapter_generation_interval_audio_only:4231b50be18c4320584d7e18c611431d-7149e4b581f601145db038035aba3ec4-7b37e30bfede450fec2a5e7b1e982b35+bb66d81871625190fbd7b15b02f6ca57+1285f17cbdb1259606bd7174e993b3f1+c0877adb7bf88212aae2212ab819cf57+78fcd2002d1a48752c5a8e4730cc2158+b2a259375cb03683b7fe6ffe95a53e21+f3f355cb0549efabdd9954f4448fc48d+ok-8f16b1baaedec4b22df0f566b39a105e:passed:20160302-131002:0.64531153
|
||||||
T_536extract_big_endian_pcm:8e57291db3e924e9bb45acb306426a0a:passed:20160307-190156:0.015845204
|
T_536extract_big_endian_pcm:8e57291db3e924e9bb45acb306426a0a:passed:20160307-190156:0.015845204
|
||||||
|
T_537srt_bom_precedence_over_sub_charset:9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-32eaa074a254eab81b90bd97be50c425:passed:20160309-180444:0.036282259
|
||||||
|
9
tests/test-537srt_bom_precedence_over_sub_charset.rb
Executable file
9
tests/test-537srt_bom_precedence_over_sub_charset.rb
Executable file
@ -0,0 +1,9 @@
|
|||||||
|
#!/usr/bin/ruby -w
|
||||||
|
|
||||||
|
# T_537srt_bom_precedence_over_sub_charset
|
||||||
|
describe "mkvmerge / SRT: BOMs have precedence over --sub-charset"
|
||||||
|
|
||||||
|
test_merge "--sub-charset 0:ISO-8859-15 data/subtitles/srt/vde-utf-8-bom.srt"
|
||||||
|
test_merge " data/subtitles/srt/vde-utf-8-bom.srt"
|
||||||
|
test_merge "--sub-charset 0:ISO-8859-15 data/subtitles/srt/vde.srt"
|
||||||
|
test_merge " data/subtitles/srt/vde.srt"
|
Loading…
Reference in New Issue
Block a user