diff --git a/ChangeLog b/ChangeLog index 622ca8803..c1f79e84a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2016-03-16 Moritz Bunkus + + * mkvmerge: bug fix: fixed two issues in the conversion of + teletext subtitles to SRT subtitles: 1. the handling of streams in + which consecutive teletext packets contain the same content and + 2. the calculation of a packet's duration for certain + situations. Fixes #1623. + 2016-03-14 Moritz Bunkus * MKVToolNix GUI: merge tool (playlist selection dialog) diff --git a/src/input/teletext_to_srt_packet_converter.cpp b/src/input/teletext_to_srt_packet_converter.cpp index e3207957d..b9c70cdc0 100644 --- a/src/input/teletext_to_srt_packet_converter.cpp +++ b/src/input/teletext_to_srt_packet_converter.cpp @@ -13,6 +13,7 @@ #include "common/common_pch.h" +#include "common/at_scope_exit.h" #include "common/strings/formatting.h" #include "input/teletext_to_srt_packet_converter.h" #include "merge/generic_packetizer.h" @@ -109,7 +110,6 @@ teletext_to_srt_packet_converter_c::teletext_to_srt_packet_converter_c(generic_p , m_pos{} , m_data_length{} , m_buf{} - , m_previous_timecode{-1} , m_page_re1{" *\\n[ \\n]+", boost::regex::perl} , m_page_re2{" +", boost::regex::perl} , m_page_re3{"^[ \\n]+|[ \\n]+$", boost::regex::perl} @@ -213,6 +213,66 @@ teletext_to_srt_packet_converter_c::decode_line(unsigned char const *buffer, } } +void +teletext_to_srt_packet_converter_c::process_single_row(int row_number, + timestamp_c const &packet_timestamp) { + remove_parity(&m_buf[m_pos + 6], m_data_length + 2 - 6); + decode_line(&m_buf[m_pos + 6], row_number); + + m_page_changed = true; + if (!m_current_timestamp.valid()) + m_current_timestamp = packet_timestamp; +} + +void +teletext_to_srt_packet_converter_c::decode_page_data(unsigned char ttx_header_magazine) { + unsigned char ttx_packet_0_header[4]; + unham(&m_buf[m_pos + 6], ttx_packet_0_header, 8); + + if (ttx_packet_0_header[0] != 0xff) { + m_ttx_page_data.page = ttx_to_page(ttx_packet_0_header[0]); + m_ttx_page_data.page += 100 * ttx_header_magazine; + } + + m_ttx_page_data.subpage = ((ttx_packet_0_header[2] << 8) | (ttx_packet_0_header[1])) & 0x3f7f; + m_ttx_page_data.subpage = ttx_to_page(m_ttx_page_data.subpage); + + m_ttx_page_data.flags = (ttx_packet_0_header[1] & 0x80) + | ((ttx_packet_0_header[3] << 4) & 0x10) + | ((ttx_packet_0_header[3] << 2) & 0x08) + | ((ttx_packet_0_header[3] >> 0) & 0x04) + | ((ttx_packet_0_header[3] >> 1) & 0x02) + | ((ttx_packet_0_header[3] >> 4) & 0x01); + + m_ttx_page_data.erase_flag = !!(m_ttx_page_data.flags & 0x80); + m_ttx_page_data.national_set = (m_ttx_page_data.flags >> 21) & 0x07; + + mxdebug_if(m_debug, + boost::format(" ttx page %1% subpage %2% erase? %3% national set %4% changed? %5%\n") + % m_ttx_page_data.page % m_ttx_page_data.subpage % m_ttx_page_data.erase_flag % m_ttx_page_data.national_set % m_page_changed); +} + +void +teletext_to_srt_packet_converter_c::deliver_queued_content(timestamp_c const &packet_timestamp) { + if (!m_page_changed && !m_ttx_page_data.erase_flag) + return; + + if ( !m_queued_timestamp.valid() + || m_queued_content.empty() + || (!m_ttx_page_data.erase_flag && (m_queued_content == m_current_content))) + return; + + auto duration = ((m_current_timestamp.valid() ? m_current_timestamp : packet_timestamp) - m_queued_timestamp).abs(); + auto new_packet = std::make_shared(memory_c::clone(m_queued_content), m_queued_timestamp.to_ns(), duration.to_ns()); + + mxdebug_if(m_debug, boost::format(" delivering packet at %1% duration %2% content %3%\n") % format_timestamp(m_queued_timestamp) % format_timestamp(new_packet->duration) % m_queued_content); + + m_ptzr->process(new_packet); + + m_queued_timestamp.reset(); + m_queued_content.clear(); +} + void teletext_to_srt_packet_converter_c::process_ttx_packet(packet_cptr const &packet) { auto data_unit_id = m_buf[m_pos]; // 0x02 = teletext, 0x03 = subtitling, 0xff = stuffing @@ -239,60 +299,55 @@ teletext_to_srt_packet_converter_c::process_ttx_packet(packet_cptr const &packet mxdebug_if(m_debug, boost::format(" m_pos %1% packet_id/row_number %2%\n") % m_pos % static_cast(row_number)); if ((row_number > 0) && (row_number <= TTX_PAGE_ROW_SIZE)) { - remove_parity(&m_buf[m_pos + 6], m_data_length + 2 - 6); - decode_line(&m_buf[m_pos + 6], row_number); - + process_single_row(row_number, timestamp_c::ns(packet->timecode)); return; } if (row_number != 0) return; - unsigned char ttx_packet_0_header[4]; - unham(&m_buf[m_pos + 6], ttx_packet_0_header, 8); + at_scope_exit_c cleanup([&] { + m_ttx_page_data.reset(); + m_page_changed = false; + m_current_timestamp.reset(); + }); - if (ttx_packet_0_header[0] != 0xff) { - m_ttx_page_data.page = ttx_to_page(ttx_packet_0_header[0]); - m_ttx_page_data.page += 100 * ttx_header_magazine; + decode_page_data(ttx_header_magazine); + + if (m_ttx_page_data.erase_flag) + deliver_queued_content(timestamp_c::ns(packet->timecode)); + + if ( m_wanted_page + && ( (m_wanted_page->first != m_ttx_page_data.page) + || (m_wanted_page->second != m_ttx_page_data.subpage))) { + return; } - m_ttx_page_data.subpage = ((ttx_packet_0_header[2] << 8) | (ttx_packet_0_header[1])) & 0x3f7f; - m_ttx_page_data.subpage = ttx_to_page(m_ttx_page_data.subpage); - - m_ttx_page_data.flags = (ttx_packet_0_header[1] & 0x80) - | ((ttx_packet_0_header[3] << 4) & 0x10) - | ((ttx_packet_0_header[3] << 2) & 0x08) - | ((ttx_packet_0_header[3] >> 0) & 0x04) - | ((ttx_packet_0_header[3] >> 1) & 0x02) - | ((ttx_packet_0_header[3] >> 4) & 0x01); - - m_ttx_page_data.erase_flag = !!(m_ttx_page_data.flags & 0x80); - m_ttx_page_data.national_set = (m_ttx_page_data.flags >> 21) & 0x07; - - mxdebug_if(m_debug, boost::format(" ttx page %1% subpage %2% erase? %3% national set %4%\n") % m_ttx_page_data.page % m_ttx_page_data.subpage % m_ttx_page_data.erase_flag % m_ttx_page_data.national_set); - - auto current_content = page_to_string(); + m_current_content = page_to_string(); mxdebug_if(m_debug, - boost::format(" case !packet_id. page content before clearing it at timecode %2% (prev %3%): %1% PREV content: %4%\n") - % current_content % format_timestamp(packet->timecode) % format_timestamp(m_previous_timecode) % m_previous_content); + boost::format(" page complete; sub page %5%; current content at timecode %2% (queued %3%): %1% PREV content: %4%\n") + % m_current_content % format_timestamp(packet->timecode) % format_timestamp(m_queued_timestamp) % m_queued_content % m_ttx_page_data.subpage); - if (!m_previous_content.empty()) { - m_previous_timecode = std::max(m_previous_timecode, 0); - auto new_packet = std::make_shared(memory_c::clone(m_previous_content), m_previous_timecode, std::abs(packet->timecode - m_previous_timecode)); - - mxdebug_if(m_debug, boost::format(" WILL DELIVER at %1% duration %2% content %3%\n") % format_timestamp(m_previous_timecode) % format_timestamp(new_packet->duration) % m_previous_content); - - m_ptzr->process(new_packet); - - m_previous_timecode = -1; - // packet->timecode = -1; + if (!m_wanted_page && !m_current_content.empty()) { + m_wanted_page.reset({ m_ttx_page_data.page, m_ttx_page_data.subpage }); + mxdebug_if(m_debug, boost::format(" First page/subpage with content is %1%/%2%.\n") % m_wanted_page->first % m_wanted_page->second); } - if (!current_content.empty() && (m_previous_timecode == -1)) - m_previous_timecode = packet->timecode; + if (!m_wanted_page) { + return; + } - m_ttx_page_data.reset(); - m_previous_content = current_content; + deliver_queued_content(timestamp_c::ns(packet->timecode)); + + if (m_current_content.empty()) + return; + + m_queued_content = m_current_content; + if (!m_queued_timestamp.valid()) + // m_queued_timestamp = timestamp_c::ns(packet->timecode); + m_queued_timestamp = m_current_timestamp.valid() ? m_current_timestamp : timestamp_c::ns(packet->timecode); + + mxdebug_if(m_debug, boost::format(" queueing page content at %1% content %2%\n") % format_timestamp(m_queued_timestamp) % m_queued_content); } std::string @@ -341,8 +396,5 @@ teletext_to_srt_packet_converter_c::convert(packet_cptr const &packet) { m_pos += 2 + m_data_length; } - if ((-1 == m_previous_timecode) && (-1 != packet->timecode) && !page_to_string().empty()) - m_previous_timecode = packet->timecode; - return true; } diff --git a/src/input/teletext_to_srt_packet_converter.h b/src/input/teletext_to_srt_packet_converter.h index 3d1b65f02..f3d2383ed 100644 --- a/src/input/teletext_to_srt_packet_converter.h +++ b/src/input/teletext_to_srt_packet_converter.h @@ -17,6 +17,7 @@ #include "common/common_pch.h" #include "common/debugging.h" +#include "common/timestamp.h" #include "input/packet_converter.h" #define TTX_PAGE_ROW_SIZE 24 @@ -39,8 +40,10 @@ public: protected: size_t m_in_size, m_pos, m_data_length; unsigned char *m_buf; - int64_t m_previous_timecode; - std::string m_previous_content; + timestamp_c m_queued_timestamp, m_current_timestamp; + std::string m_queued_content, m_current_content; + boost::optional> m_wanted_page; + bool m_page_changed{}; ttx_page_data_t m_ttx_page_data; @@ -58,6 +61,9 @@ protected: void process_ttx_packet(packet_cptr const &packet); std::string page_to_string() const; void decode_line(unsigned char const *buffer, unsigned int row); + void process_single_row(int row_number, timestamp_c const &packet_timestamp); + void decode_page_data(unsigned char ttx_header_magazine); + void deliver_queued_content(timestamp_c const &packet_timestamp); protected: static int ttx_to_page(int ttx); diff --git a/tests/results.txt b/tests/results.txt index 6a039cae9..ce10b6de3 100644 --- a/tests/results.txt +++ b/tests/results.txt @@ -383,3 +383,4 @@ T_534chapter_generation_when_appending_audio_only:000e4bfced4fae128abbb741545768 T_535chapter_generation_interval_audio_only:4231b50be18c4320584d7e18c611431d-7149e4b581f601145db038035aba3ec4-7b37e30bfede450fec2a5e7b1e982b35+bb66d81871625190fbd7b15b02f6ca57+1285f17cbdb1259606bd7174e993b3f1+c0877adb7bf88212aae2212ab819cf57+78fcd2002d1a48752c5a8e4730cc2158+b2a259375cb03683b7fe6ffe95a53e21+f3f355cb0549efabdd9954f4448fc48d+ok-8f16b1baaedec4b22df0f566b39a105e:passed:20160302-131002:0.64531153 T_536extract_big_endian_pcm:8e57291db3e924e9bb45acb306426a0a:passed:20160307-190156:0.015845204 T_537srt_bom_precedence_over_sub_charset:9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-32eaa074a254eab81b90bd97be50c425:passed:20160309-180444:0.036282259 +T_538teletext_many_packets_same_content:409dc709d9530f7fe85066af8fc07b7c:passed:20160316-143840:0.650010976 diff --git a/tests/test-538teletext_many_packets_same_content.rb b/tests/test-538teletext_many_packets_same_content.rb new file mode 100755 index 000000000..6a7ab9621 --- /dev/null +++ b/tests/test-538teletext_many_packets_same_content.rb @@ -0,0 +1,5 @@ +#!/usr/bin/ruby -w + +# T_538teletext_many_packets_same_content +describe "mkvmerge / MPEG TS with teletext subtitles with many packets having the same content" +test_merge "data/ts/teletext_subs_many_packets_same_content.ts", args: "--no-audio --no-video"