From 91debcd8eae0dce4e96708618b7634da882fba28 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Thu, 21 Jun 2018 19:51:15 +0200 Subject: [PATCH] Matroska/MPEG TS readers: allow recoding text subtitles via `--sub-charset` --- NEWS.md | 3 +++ src/input/r_matroska.cpp | 3 ++- src/input/r_mpeg_ts.cpp | 5 +++-- tests/results.txt | 1 + tests/test-647recode_textsubs_from_matroska.rb | 9 +++++++++ 5 files changed, 18 insertions(+), 3 deletions(-) create mode 100755 tests/test-647recode_textsubs_from_matroska.rb diff --git a/NEWS.md b/NEWS.md index 09132d757..ef929ea1f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,9 @@ * mkvmerge: SRT/ASS/SSA/WebVTT text subtitles: a warning is now emitted if invalid 8-bit characters are encountered outside valid multi-byte UTF-8 sequences. Part of the implementation of #2246. +* mkvmerge: Matroska & MPEG transport stream readers: the encoding of text + subtitles read from Matroska files can now be changed with the + `--sub-charset` parameter. ## Bug fixes diff --git a/src/input/r_matroska.cpp b/src/input/r_matroska.cpp index 90ea453d2..122b2c62a 100644 --- a/src/input/r_matroska.cpp +++ b/src/input/r_matroska.cpp @@ -2007,7 +2007,8 @@ kax_reader_c::create_subtitle_packetizer(kax_track_t *t, } else if (balg::starts_with(t->codec_id, "S_TEXT") || (t->codec_id == "S_SSA") || (t->codec_id == "S_ASS")) { std::string new_codec_id = ((t->codec_id == "S_SSA") || (t->codec_id == "S_ASS")) ? "S_TEXT/"s + std::string(&t->codec_id[2]) : t->codec_id; - set_track_packetizer(t, new textsubs_packetizer_c(this, nti, new_codec_id.c_str())); + auto recoding_requested = mtx::includes(m_ti.m_sub_charsets, t->tnum) || mtx::includes(m_ti.m_sub_charsets, t->tnum); + set_track_packetizer(t, new textsubs_packetizer_c(this, nti, new_codec_id.c_str(), recoding_requested)); show_packetizer_info(t->tnum, t->ptzr_ptr); t->sub_type = 't'; diff --git a/src/input/r_mpeg_ts.cpp b/src/input/r_mpeg_ts.cpp index bc7d9d46e..06871f6c2 100644 --- a/src/input/r_mpeg_ts.cpp +++ b/src/input/r_mpeg_ts.cpp @@ -2343,9 +2343,10 @@ reader_c::create_hdmv_textst_subtitles_packetizer(track_ptr const &track) { void reader_c::create_srt_subtitles_packetizer(track_ptr const &track) { - track->ptzr = add_packetizer(new textsubs_packetizer_c(this, m_ti, MKV_S_TEXTUTF8)); + auto recoding_requested = mtx::includes(m_ti.m_sub_charsets, track->m_id) || mtx::includes(m_ti.m_sub_charsets, track->m_id); + track->ptzr = add_packetizer(new textsubs_packetizer_c(this, m_ti, MKV_S_TEXTUTF8, recoding_requested)); - auto &converter = dynamic_cast(*track->converter); + auto &converter = dynamic_cast(*track->converter); converter.demux_page(*track->m_ttx_wanted_page, PTZR(track->ptzr)); converter.override_encoding(*track->m_ttx_wanted_page, track->language); diff --git a/tests/results.txt b/tests/results.txt index e310081e7..b5260e26d 100644 --- a/tests/results.txt +++ b/tests/results.txt @@ -492,3 +492,4 @@ T_643mpeg_ts_bad_utf8_in_service_names:f967b2bf3fb4265ec723f14eb667bb9a:passed:2 T_644mp3_with_discard_padding:e68074afaedbb820210e31e3cf62febc:passed:20180619-214051:0.038730751 T_645ogg_opus_first_timestamp_negative:4b513ca85e1cf12b378eedd07c8b4ffa-78778f6067b903cc326bf0f998f9790b:passed:20180620-185732:0.925863056 T_646ogg_opus_gap_page_missing:c3c8ff65984de89ce37c2f03520ae83f:passed:20180621-154830:0.009743698 +T_647recode_textsubs_from_matroska:2e63dc90381d8f5191b852aac6cc3b05-b297cba0182c465fa537abd668f1e6a0:passed:20180621-194122:0.024017386 diff --git a/tests/test-647recode_textsubs_from_matroska.rb b/tests/test-647recode_textsubs_from_matroska.rb new file mode 100755 index 000000000..b7cefc49f --- /dev/null +++ b/tests/test-647recode_textsubs_from_matroska.rb @@ -0,0 +1,9 @@ +#!/usr/bin/ruby -w + +# T_647recode_textsubs_from_matroska +describe "mkvmerge / recode text subtitles read from Matroska files" + +file = "data/mkv/attachments.mkv" + +test_merge file, :args => "--no-attachments", :exit_code => :warning +test_merge file, :args => "--no-attachments --sub-charset 0:iso-8859-1"