mirror of
https://gitlab.com/mbunkus/mkvtoolnix.git
synced 2024-12-24 11:54:01 +00:00
teletext subtitles: collapse multiple entries with same content into single entry
The sample file in question contains many teletext packets that show the same content. The old code would convert those to separate Matroska frames. Additionally each frame's duration was calculated from the wrong timestamp. A teletext page can be drawn over the space of several packets with different timestamps. Therefore the timestamp of the first packet rendering a line of a teletext page must be used as the start timestamp, and the timestamp of the first packet rendering a line of the following teletext page as its end. The old code was using a wrong packet for the end, though: the timestamp of packet containing the end-of-page block. Fixes #1623.
This commit is contained in:
parent
c3ee7d3d90
commit
6e2a738e82
@ -1,3 +1,11 @@
|
||||
2016-03-16 Moritz Bunkus <moritz@bunkus.org>
|
||||
|
||||
* mkvmerge: bug fix: fixed two issues in the conversion of
|
||||
teletext subtitles to SRT subtitles: 1. the handling of streams in
|
||||
which consecutive teletext packets contain the same content and
|
||||
2. the calculation of a packet's duration for certain
|
||||
situations. Fixes #1623.
|
||||
|
||||
2016-03-14 Moritz Bunkus <moritz@bunkus.org>
|
||||
|
||||
* MKVToolNix GUI: merge tool (playlist selection dialog)
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include "common/common_pch.h"
|
||||
|
||||
#include "common/at_scope_exit.h"
|
||||
#include "common/strings/formatting.h"
|
||||
#include "input/teletext_to_srt_packet_converter.h"
|
||||
#include "merge/generic_packetizer.h"
|
||||
@ -109,7 +110,6 @@ teletext_to_srt_packet_converter_c::teletext_to_srt_packet_converter_c(generic_p
|
||||
, m_pos{}
|
||||
, m_data_length{}
|
||||
, m_buf{}
|
||||
, m_previous_timecode{-1}
|
||||
, m_page_re1{" *\\n[ \\n]+", boost::regex::perl}
|
||||
, m_page_re2{" +", boost::regex::perl}
|
||||
, m_page_re3{"^[ \\n]+|[ \\n]+$", boost::regex::perl}
|
||||
@ -213,6 +213,66 @@ teletext_to_srt_packet_converter_c::decode_line(unsigned char const *buffer,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
teletext_to_srt_packet_converter_c::process_single_row(int row_number,
|
||||
timestamp_c const &packet_timestamp) {
|
||||
remove_parity(&m_buf[m_pos + 6], m_data_length + 2 - 6);
|
||||
decode_line(&m_buf[m_pos + 6], row_number);
|
||||
|
||||
m_page_changed = true;
|
||||
if (!m_current_timestamp.valid())
|
||||
m_current_timestamp = packet_timestamp;
|
||||
}
|
||||
|
||||
void
|
||||
teletext_to_srt_packet_converter_c::decode_page_data(unsigned char ttx_header_magazine) {
|
||||
unsigned char ttx_packet_0_header[4];
|
||||
unham(&m_buf[m_pos + 6], ttx_packet_0_header, 8);
|
||||
|
||||
if (ttx_packet_0_header[0] != 0xff) {
|
||||
m_ttx_page_data.page = ttx_to_page(ttx_packet_0_header[0]);
|
||||
m_ttx_page_data.page += 100 * ttx_header_magazine;
|
||||
}
|
||||
|
||||
m_ttx_page_data.subpage = ((ttx_packet_0_header[2] << 8) | (ttx_packet_0_header[1])) & 0x3f7f;
|
||||
m_ttx_page_data.subpage = ttx_to_page(m_ttx_page_data.subpage);
|
||||
|
||||
m_ttx_page_data.flags = (ttx_packet_0_header[1] & 0x80)
|
||||
| ((ttx_packet_0_header[3] << 4) & 0x10)
|
||||
| ((ttx_packet_0_header[3] << 2) & 0x08)
|
||||
| ((ttx_packet_0_header[3] >> 0) & 0x04)
|
||||
| ((ttx_packet_0_header[3] >> 1) & 0x02)
|
||||
| ((ttx_packet_0_header[3] >> 4) & 0x01);
|
||||
|
||||
m_ttx_page_data.erase_flag = !!(m_ttx_page_data.flags & 0x80);
|
||||
m_ttx_page_data.national_set = (m_ttx_page_data.flags >> 21) & 0x07;
|
||||
|
||||
mxdebug_if(m_debug,
|
||||
boost::format(" ttx page %1% subpage %2% erase? %3% national set %4% changed? %5%\n")
|
||||
% m_ttx_page_data.page % m_ttx_page_data.subpage % m_ttx_page_data.erase_flag % m_ttx_page_data.national_set % m_page_changed);
|
||||
}
|
||||
|
||||
void
|
||||
teletext_to_srt_packet_converter_c::deliver_queued_content(timestamp_c const &packet_timestamp) {
|
||||
if (!m_page_changed && !m_ttx_page_data.erase_flag)
|
||||
return;
|
||||
|
||||
if ( !m_queued_timestamp.valid()
|
||||
|| m_queued_content.empty()
|
||||
|| (!m_ttx_page_data.erase_flag && (m_queued_content == m_current_content)))
|
||||
return;
|
||||
|
||||
auto duration = ((m_current_timestamp.valid() ? m_current_timestamp : packet_timestamp) - m_queued_timestamp).abs();
|
||||
auto new_packet = std::make_shared<packet_t>(memory_c::clone(m_queued_content), m_queued_timestamp.to_ns(), duration.to_ns());
|
||||
|
||||
mxdebug_if(m_debug, boost::format(" delivering packet at %1% duration %2% content %3%\n") % format_timestamp(m_queued_timestamp) % format_timestamp(new_packet->duration) % m_queued_content);
|
||||
|
||||
m_ptzr->process(new_packet);
|
||||
|
||||
m_queued_timestamp.reset();
|
||||
m_queued_content.clear();
|
||||
}
|
||||
|
||||
void
|
||||
teletext_to_srt_packet_converter_c::process_ttx_packet(packet_cptr const &packet) {
|
||||
auto data_unit_id = m_buf[m_pos]; // 0x02 = teletext, 0x03 = subtitling, 0xff = stuffing
|
||||
@ -239,60 +299,55 @@ teletext_to_srt_packet_converter_c::process_ttx_packet(packet_cptr const &packet
|
||||
mxdebug_if(m_debug, boost::format(" m_pos %1% packet_id/row_number %2%\n") % m_pos % static_cast<unsigned int>(row_number));
|
||||
|
||||
if ((row_number > 0) && (row_number <= TTX_PAGE_ROW_SIZE)) {
|
||||
remove_parity(&m_buf[m_pos + 6], m_data_length + 2 - 6);
|
||||
decode_line(&m_buf[m_pos + 6], row_number);
|
||||
|
||||
process_single_row(row_number, timestamp_c::ns(packet->timecode));
|
||||
return;
|
||||
}
|
||||
|
||||
if (row_number != 0)
|
||||
return;
|
||||
|
||||
unsigned char ttx_packet_0_header[4];
|
||||
unham(&m_buf[m_pos + 6], ttx_packet_0_header, 8);
|
||||
at_scope_exit_c cleanup([&] {
|
||||
m_ttx_page_data.reset();
|
||||
m_page_changed = false;
|
||||
m_current_timestamp.reset();
|
||||
});
|
||||
|
||||
if (ttx_packet_0_header[0] != 0xff) {
|
||||
m_ttx_page_data.page = ttx_to_page(ttx_packet_0_header[0]);
|
||||
m_ttx_page_data.page += 100 * ttx_header_magazine;
|
||||
decode_page_data(ttx_header_magazine);
|
||||
|
||||
if (m_ttx_page_data.erase_flag)
|
||||
deliver_queued_content(timestamp_c::ns(packet->timecode));
|
||||
|
||||
if ( m_wanted_page
|
||||
&& ( (m_wanted_page->first != m_ttx_page_data.page)
|
||||
|| (m_wanted_page->second != m_ttx_page_data.subpage))) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_ttx_page_data.subpage = ((ttx_packet_0_header[2] << 8) | (ttx_packet_0_header[1])) & 0x3f7f;
|
||||
m_ttx_page_data.subpage = ttx_to_page(m_ttx_page_data.subpage);
|
||||
|
||||
m_ttx_page_data.flags = (ttx_packet_0_header[1] & 0x80)
|
||||
| ((ttx_packet_0_header[3] << 4) & 0x10)
|
||||
| ((ttx_packet_0_header[3] << 2) & 0x08)
|
||||
| ((ttx_packet_0_header[3] >> 0) & 0x04)
|
||||
| ((ttx_packet_0_header[3] >> 1) & 0x02)
|
||||
| ((ttx_packet_0_header[3] >> 4) & 0x01);
|
||||
|
||||
m_ttx_page_data.erase_flag = !!(m_ttx_page_data.flags & 0x80);
|
||||
m_ttx_page_data.national_set = (m_ttx_page_data.flags >> 21) & 0x07;
|
||||
|
||||
mxdebug_if(m_debug, boost::format(" ttx page %1% subpage %2% erase? %3% national set %4%\n") % m_ttx_page_data.page % m_ttx_page_data.subpage % m_ttx_page_data.erase_flag % m_ttx_page_data.national_set);
|
||||
|
||||
auto current_content = page_to_string();
|
||||
m_current_content = page_to_string();
|
||||
mxdebug_if(m_debug,
|
||||
boost::format(" case !packet_id. page content before clearing it at timecode %2% (prev %3%): %1% PREV content: %4%\n")
|
||||
% current_content % format_timestamp(packet->timecode) % format_timestamp(m_previous_timecode) % m_previous_content);
|
||||
boost::format(" page complete; sub page %5%; current content at timecode %2% (queued %3%): %1% PREV content: %4%\n")
|
||||
% m_current_content % format_timestamp(packet->timecode) % format_timestamp(m_queued_timestamp) % m_queued_content % m_ttx_page_data.subpage);
|
||||
|
||||
if (!m_previous_content.empty()) {
|
||||
m_previous_timecode = std::max<int64_t>(m_previous_timecode, 0);
|
||||
auto new_packet = std::make_shared<packet_t>(memory_c::clone(m_previous_content), m_previous_timecode, std::abs(packet->timecode - m_previous_timecode));
|
||||
|
||||
mxdebug_if(m_debug, boost::format(" WILL DELIVER at %1% duration %2% content %3%\n") % format_timestamp(m_previous_timecode) % format_timestamp(new_packet->duration) % m_previous_content);
|
||||
|
||||
m_ptzr->process(new_packet);
|
||||
|
||||
m_previous_timecode = -1;
|
||||
// packet->timecode = -1;
|
||||
if (!m_wanted_page && !m_current_content.empty()) {
|
||||
m_wanted_page.reset({ m_ttx_page_data.page, m_ttx_page_data.subpage });
|
||||
mxdebug_if(m_debug, boost::format(" First page/subpage with content is %1%/%2%.\n") % m_wanted_page->first % m_wanted_page->second);
|
||||
}
|
||||
|
||||
if (!current_content.empty() && (m_previous_timecode == -1))
|
||||
m_previous_timecode = packet->timecode;
|
||||
if (!m_wanted_page) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_ttx_page_data.reset();
|
||||
m_previous_content = current_content;
|
||||
deliver_queued_content(timestamp_c::ns(packet->timecode));
|
||||
|
||||
if (m_current_content.empty())
|
||||
return;
|
||||
|
||||
m_queued_content = m_current_content;
|
||||
if (!m_queued_timestamp.valid())
|
||||
// m_queued_timestamp = timestamp_c::ns(packet->timecode);
|
||||
m_queued_timestamp = m_current_timestamp.valid() ? m_current_timestamp : timestamp_c::ns(packet->timecode);
|
||||
|
||||
mxdebug_if(m_debug, boost::format(" queueing page content at %1% content %2%\n") % format_timestamp(m_queued_timestamp) % m_queued_content);
|
||||
}
|
||||
|
||||
std::string
|
||||
@ -341,8 +396,5 @@ teletext_to_srt_packet_converter_c::convert(packet_cptr const &packet) {
|
||||
m_pos += 2 + m_data_length;
|
||||
}
|
||||
|
||||
if ((-1 == m_previous_timecode) && (-1 != packet->timecode) && !page_to_string().empty())
|
||||
m_previous_timecode = packet->timecode;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "common/common_pch.h"
|
||||
|
||||
#include "common/debugging.h"
|
||||
#include "common/timestamp.h"
|
||||
#include "input/packet_converter.h"
|
||||
|
||||
#define TTX_PAGE_ROW_SIZE 24
|
||||
@ -39,8 +40,10 @@ public:
|
||||
protected:
|
||||
size_t m_in_size, m_pos, m_data_length;
|
||||
unsigned char *m_buf;
|
||||
int64_t m_previous_timecode;
|
||||
std::string m_previous_content;
|
||||
timestamp_c m_queued_timestamp, m_current_timestamp;
|
||||
std::string m_queued_content, m_current_content;
|
||||
boost::optional<std::pair<int, int>> m_wanted_page;
|
||||
bool m_page_changed{};
|
||||
|
||||
ttx_page_data_t m_ttx_page_data;
|
||||
|
||||
@ -58,6 +61,9 @@ protected:
|
||||
void process_ttx_packet(packet_cptr const &packet);
|
||||
std::string page_to_string() const;
|
||||
void decode_line(unsigned char const *buffer, unsigned int row);
|
||||
void process_single_row(int row_number, timestamp_c const &packet_timestamp);
|
||||
void decode_page_data(unsigned char ttx_header_magazine);
|
||||
void deliver_queued_content(timestamp_c const &packet_timestamp);
|
||||
|
||||
protected:
|
||||
static int ttx_to_page(int ttx);
|
||||
|
@ -383,3 +383,4 @@ T_534chapter_generation_when_appending_audio_only:000e4bfced4fae128abbb741545768
|
||||
T_535chapter_generation_interval_audio_only:4231b50be18c4320584d7e18c611431d-7149e4b581f601145db038035aba3ec4-7b37e30bfede450fec2a5e7b1e982b35+bb66d81871625190fbd7b15b02f6ca57+1285f17cbdb1259606bd7174e993b3f1+c0877adb7bf88212aae2212ab819cf57+78fcd2002d1a48752c5a8e4730cc2158+b2a259375cb03683b7fe6ffe95a53e21+f3f355cb0549efabdd9954f4448fc48d+ok-8f16b1baaedec4b22df0f566b39a105e:passed:20160302-131002:0.64531153
|
||||
T_536extract_big_endian_pcm:8e57291db3e924e9bb45acb306426a0a:passed:20160307-190156:0.015845204
|
||||
T_537srt_bom_precedence_over_sub_charset:9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-32eaa074a254eab81b90bd97be50c425:passed:20160309-180444:0.036282259
|
||||
T_538teletext_many_packets_same_content:409dc709d9530f7fe85066af8fc07b7c:passed:20160316-143840:0.650010976
|
||||
|
5
tests/test-538teletext_many_packets_same_content.rb
Executable file
5
tests/test-538teletext_many_packets_same_content.rb
Executable file
@ -0,0 +1,5 @@
|
||||
#!/usr/bin/ruby -w
|
||||
|
||||
# T_538teletext_many_packets_same_content
|
||||
describe "mkvmerge / MPEG TS with teletext subtitles with many packets having the same content"
|
||||
test_merge "data/ts/teletext_subs_many_packets_same_content.ts", args: "--no-audio --no-video"
|
Loading…
Reference in New Issue
Block a user