From 971decd51546274d150e1f01d6e66ec9f4703e36 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Sat, 9 Jul 2016 17:24:29 +0200 Subject: [PATCH] MPEG PS, TS: make probe size depend on source file size Instead of using a fixed maximum both readers now use a limit that's based on a percentage of the source file's size. There's still a hard-coded lower limit of 5 MB, though. This is in preparation of allowing the user to change the probe range by specifying a percentage of the source file size. With that the user can solve situations where certain tracks start very late in the file without impacting performance for every user (e.g. by making mkvmerge always scan the whole file). See #1734. --- src/input/r_mpeg_ps.cpp | 38 ++++++++++++++++-------------------- src/input/r_mpeg_ps.h | 2 ++ src/input/r_mpeg_ts.cpp | 11 ++++++----- src/input/r_mpeg_ts.h | 1 + src/merge/generic_reader.cpp | 23 ++++++++++++++++++++++ src/merge/generic_reader.h | 5 +++++ 6 files changed, 54 insertions(+), 26 deletions(-) diff --git a/src/input/r_mpeg_ps.cpp b/src/input/r_mpeg_ps.cpp index f2e138edb..33b2993da 100644 --- a/src/input/r_mpeg_ps.cpp +++ b/src/input/r_mpeg_ps.cpp @@ -37,21 +37,15 @@ #include "output/p_truehd.h" #include "output/p_vc1.h" -#define PS_PROBE_SIZE 10 * 1024 * 1024 - int mpeg_ps_reader_c::probe_file(mm_io_c *in, uint64_t) { try { - memory_c af_buf((unsigned char *)safemalloc(PS_PROBE_SIZE), 0, true); - unsigned char *buf = af_buf.get_buffer(); - int num_read; + unsigned char buf[4]; in->setFilePointer(0, seek_beginning); - num_read = in->read(buf, PS_PROBE_SIZE); - if (4 > num_read) + if (in->read(buf, 4) != 4) return 0; - in->setFilePointer(0, seek_beginning); if (get_uint32_be(buf) != MPEGVIDEO_PACKET_START_CODE) return 0; @@ -67,6 +61,7 @@ mpeg_ps_reader_c::mpeg_ps_reader_c(const track_info_c &ti, const mm_io_cptr &in) : generic_reader_c(ti, in) , file_done(false) + , m_probe_range{} , m_debug_timecodes{"mpeg_ps|mpeg_ps_timecodes"} { } @@ -82,6 +77,7 @@ mpeg_ps_reader_c::read_headers() { } m_size = m_in->get_size(); + m_probe_range = calculate_probe_range(m_size, 5 * 1024 * 1024); uint32_t header = m_in->read_uint32_be(); bool done = m_in->eof(); version = -1; @@ -156,7 +152,7 @@ mpeg_ps_reader_c::read_headers() { break; } - done |= m_in->eof() || (m_in->getFilePointer() >= PS_PROBE_SIZE); + done |= m_in->eof() || (m_in->getFilePointer() >= m_probe_range); } // while (!done) } catch (...) { @@ -492,7 +488,7 @@ mpeg_ps_reader_c::new_stream_v_avc_or_mpeg_1_2(mpeg_ps_id_t id, int pos = 0; while (4 > buffer.get_size()) { - if (!find_next_packet_for_id(id, PS_PROBE_SIZE)) + if (!find_next_packet_for_id(id, m_probe_range)) throw false; auto packet = parse_packet(id); @@ -565,7 +561,7 @@ mpeg_ps_reader_c::new_stream_v_avc_or_mpeg_1_2(mpeg_ps_id_t id, } } - if (!find_next_packet_for_id(id, PS_PROBE_SIZE)) + if (!find_next_packet_for_id(id, m_probe_range)) break; auto packet = parse_packet(id); @@ -608,8 +604,8 @@ mpeg_ps_reader_c::new_stream_v_mpeg_1_2(mpeg_ps_id_t id, while ( (MPV_PARSER_STATE_EOS != state) && (MPV_PARSER_STATE_ERROR != state) - && (PS_PROBE_SIZE >= m_in->getFilePointer())) { - if (find_next_packet_for_id(id, PS_PROBE_SIZE)) { + && (m_probe_range >= m_in->getFilePointer())) { + if (find_next_packet_for_id(id, m_probe_range)) { auto packet = parse_packet(id); if (!packet) break; @@ -702,8 +698,8 @@ mpeg_ps_reader_c::new_stream_v_avc(mpeg_ps_id_t id, parser.add_bytes(buf, length); - while (!parser.headers_parsed() && (PS_PROBE_SIZE >= m_in->getFilePointer())) { - if (!find_next_packet_for_id(id, PS_PROBE_SIZE)) + while (!parser.headers_parsed() && (m_probe_range >= m_in->getFilePointer())) { + if (!find_next_packet_for_id(id, m_probe_range)) break; auto packet = parse_packet(id); @@ -736,8 +732,8 @@ mpeg_ps_reader_c::new_stream_v_vc1(mpeg_ps_id_t id, parser.add_bytes(buf, length); - while (!parser.is_sequence_header_available() && (PS_PROBE_SIZE >= m_in->getFilePointer())) { - if (!find_next_packet_for_id(id, PS_PROBE_SIZE)) + while (!parser.is_sequence_header_available() && (m_probe_range >= m_in->getFilePointer())) { + if (!find_next_packet_for_id(id, m_probe_range)) break; auto packet = parse_packet(id); @@ -803,8 +799,8 @@ mpeg_ps_reader_c::new_stream_a_dts(mpeg_ps_id_t id, buffer.add(buf, length); - while ((-1 == mtx::dts::find_header(buffer.get_buffer(), buffer.get_size(), track->dts_header, false)) && (PS_PROBE_SIZE >= m_in->getFilePointer())) { - if (!find_next_packet_for_id(id, PS_PROBE_SIZE)) + while ((-1 == mtx::dts::find_header(buffer.get_buffer(), buffer.get_size(), track->dts_header, false)) && (m_probe_range >= m_in->getFilePointer())) { + if (!find_next_packet_for_id(id, m_probe_range)) throw false; auto packet = parse_packet(id); @@ -846,10 +842,10 @@ mpeg_ps_reader_c::new_stream_a_truehd(mpeg_ps_id_t id, return; } - if (PS_PROBE_SIZE < m_in->getFilePointer()) + if (m_probe_range < m_in->getFilePointer()) throw false; - if (!find_next_packet_for_id(id, PS_PROBE_SIZE)) + if (!find_next_packet_for_id(id, m_probe_range)) throw false; auto packet = parse_packet(id); diff --git a/src/input/r_mpeg_ps.h b/src/input/r_mpeg_ps.h index 2ac43562d..8521f9be7 100644 --- a/src/input/r_mpeg_ps.h +++ b/src/input/r_mpeg_ps.h @@ -204,6 +204,8 @@ private: std::vector tracks; std::map m_ptzr_to_track_map; + uint64_t m_probe_range; + debugging_option_c m_debug_timecodes; public: diff --git a/src/input/r_mpeg_ts.cpp b/src/input/r_mpeg_ts.cpp index 38ce17b89..192af82cc 100644 --- a/src/input/r_mpeg_ts.cpp +++ b/src/input/r_mpeg_ts.cpp @@ -53,7 +53,6 @@ #define TS_CONSECUTIVE_PACKETS 16 #define TS_PROBE_SIZE (2 * TS_CONSECUTIVE_PACKETS * 204) -#define TS_PIDS_DETECT_SIZE (10 * 1024 * 1024) #define TS_PACKET_SIZE 188 #define TS_MAX_PACKET_SIZE 204 @@ -723,6 +722,7 @@ mpeg_ts_reader_c::mpeg_ts_reader_c(const track_info_c &ti, , m_global_timestamp_offset{} , m_stream_timestamp{timestamp_c::ns(0)} , m_state{ps_probing} + , m_probe_range{} , file_done{} , m_packet_sent_to_packetizer{} , m_dont_use_audio_pts{ "mpeg_ts|mpeg_ts_dont_use_audio_pts"} @@ -747,9 +747,10 @@ mpeg_ts_reader_c::mpeg_ts_reader_c(const track_info_c &ti, void mpeg_ts_reader_c::read_headers() { try { - size_t size_to_probe = std::min(m_size, static_cast(TS_PIDS_DETECT_SIZE)); - + m_probe_range = calculate_probe_range(m_in->get_size(), 5 * 1024 * 1024); + size_t size_to_probe = std::min(m_size, m_probe_range); m_detected_packet_size = detect_packet_size(m_in.get(), size_to_probe); + m_in->setFilePointer(0); mxdebug_if(m_debug_headers, boost::format("read_headers: Starting to build PID list. (packet size: %1%)\n") % m_detected_packet_size); @@ -844,12 +845,12 @@ mpeg_ts_reader_c::determine_global_timestamp_offset() { m_in->setFilePointer(0); m_in->clear_eof(); - mxdebug_if(m_debug_headers, boost::format("determine_global_timestamp_offset: determining global timestamp offset from the first %1% bytes\n") % TS_PIDS_DETECT_SIZE); + mxdebug_if(m_debug_headers, boost::format("determine_global_timestamp_offset: determining global timestamp offset from the first %1% bytes\n") % m_probe_range); try { unsigned char buf[TS_MAX_PACKET_SIZE]; // maximum TS packet size + 1 - while (m_in->getFilePointer() < TS_PIDS_DETECT_SIZE) { + while (m_in->getFilePointer() < m_probe_range) { if (m_in->read(buf, m_detected_packet_size) != static_cast(m_detected_packet_size)) break; diff --git a/src/input/r_mpeg_ts.h b/src/input/r_mpeg_ts.h index e66653a32..a016cb30f 100644 --- a/src/input/r_mpeg_ts.h +++ b/src/input/r_mpeg_ts.h @@ -395,6 +395,7 @@ protected: timestamp_c m_global_timestamp_offset, m_stream_timestamp, m_last_non_subtitle_timestamp; processing_state_e m_state; + uint64_t m_probe_range; bool file_done, m_packet_sent_to_packetizer; diff --git a/src/merge/generic_reader.cpp b/src/merge/generic_reader.cpp index c079c558e..950ac31c4 100644 --- a/src/merge/generic_reader.cpp +++ b/src/merge/generic_reader.cpp @@ -41,6 +41,7 @@ generic_reader_c::generic_reader_c(const track_info_c &ti, , m_num_audio_tracks{} , m_num_subtitle_tracks{} , m_reference_timecode_tolerance{} + , m_probe_range_percentage{boost::rational{3u, 10u}} // 0.3% { add_all_requested_track_ids(*this, m_ti.m_atracks.m_items); add_all_requested_track_ids(*this, m_ti.m_vtracks.m_items); @@ -510,3 +511,25 @@ generic_reader_c::get_underlying_input() actual_in = static_cast(actual_in)->get_proxied(); return actual_in; } + +void +generic_reader_c::set_probe_range_percentage(boost::rational const &probe_range_percentage) { + m_probe_range_percentage = probe_range_percentage; +} + +int64_t +generic_reader_c::calculate_probe_range(uint64_t file_size, + uint64_t fixed_minimum) + const { + static debugging_option_c s_debug{"probe_range"}; + + auto factor = boost::rational{1u, 100u} * m_probe_range_percentage; + auto probe_range = boost::rational_cast(factor * file_size);;;; + auto to_use = std::max(fixed_minimum, probe_range); + + mxdebug_if(s_debug, + boost::format("calculate_probe_range: calculated %1% based on file size %2% fixed minimum %3% percentage %4%/%5% percentage of size %6%\n") + % to_use % file_size % fixed_minimum % m_probe_range_percentage.numerator() % m_probe_range_percentage.denominator() % probe_range); + + return to_use; +} diff --git a/src/merge/generic_reader.h b/src/merge/generic_reader.h index 5dc35d8c6..8208cae22 100644 --- a/src/merge/generic_reader.h +++ b/src/merge/generic_reader.h @@ -68,6 +68,8 @@ protected: timestamp_c m_restricted_timecodes_min, m_restricted_timecodes_max; + boost::rational m_probe_range_percentage; + public: generic_reader_c(const track_info_c &ti, const mm_io_cptr &in); virtual ~generic_reader_c(); @@ -124,6 +126,9 @@ public: virtual void display_identification_results(); + virtual void set_probe_range_percentage(boost::rational const &probe_range_percentage); + virtual int64_t calculate_probe_range(uint64_t file_size, uint64_t fixed_minimum) const; + protected: virtual bool demuxing_requested(char type, int64_t id, std::string const &language = "");