From dfee7a2226a3fb47385fcf664d9c9af431e8636d Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Sun, 29 Nov 2015 11:03:51 +0100 Subject: [PATCH] Matroska reader: only analyze last 5 MB of a file if no seek head is found --- ChangeLog | 9 +++++++++ src/common/kax_analyzer.cpp | 14 ++++++++++++++ src/common/kax_analyzer.h | 2 ++ src/input/r_matroska.cpp | 12 +++++------- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 349efe59a..8b3d68367 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2015-11-29 Moritz Bunkus + + * mkvmerge: bug fix: the change to do a deeper file analysis if no + seek head was found was causing huge increases in file type + detection time as popular tools like x264 don't write seek + heads. The way elements at the end are searched has been changed + to only scan the last 5 MB of the file instead of iterating over + every level 1 element from the beginning of the file. + 2015-11-28 Moritz Bunkus * Released v8.6.0. diff --git a/src/common/kax_analyzer.cpp b/src/common/kax_analyzer.cpp index 34165e815..0c462974c 100644 --- a/src/common/kax_analyzer.cpp +++ b/src/common/kax_analyzer.cpp @@ -216,6 +216,12 @@ kax_analyzer_c::set_throw_on_error(bool throw_on_error) { return *this; } +kax_analyzer_c & +kax_analyzer_c::set_parser_start_position(uint64_t position) { + m_parser_start_position.reset(position); + return *this; +} + bool kax_analyzer_c::process() { try { @@ -278,6 +284,14 @@ kax_analyzer_c::process_internal() { auto segment_end = m_segment->IsFiniteSize() ? m_segment->GetElementPosition() + m_segment->HeadSize() + m_segment->GetSize() : m_file->get_size(); EbmlElement *l1 = nullptr; + // In certain situations the caller doesn't way to have to pay the + // price for full analysis. Then it can configure the parser to + // start parsing at a certain offset. EbmlStream::FindNextElement() + // should take care of re-syncing to a known level 1 element. But + // take care not to start before the segment's data start position. + if (m_parser_start_position) + m_file->setFilePointer(std::max(*m_parser_start_position, m_segment->GetElementPosition() + m_segment->HeadSize())); + // We've got our segment, so let's find all level 1 elements. while (m_file->getFilePointer() < segment_end) { if (!l1) diff --git a/src/common/kax_analyzer.h b/src/common/kax_analyzer.h index b8c7e9077..fb8e0a7af 100644 --- a/src/common/kax_analyzer.h +++ b/src/common/kax_analyzer.h @@ -102,6 +102,7 @@ private: parse_mode_e m_parse_mode{parse_mode_full}; open_mode m_open_mode{MODE_WRITE}; bool m_throw_on_error{}; + boost::optional m_parser_start_position; public: // Static functions static bool probe(std::string file_name); @@ -130,6 +131,7 @@ public: virtual kax_analyzer_c &set_parse_mode(parse_mode_e parse_mode); virtual kax_analyzer_c &set_open_mode(open_mode mode); virtual kax_analyzer_c &set_throw_on_error(bool throw_on_error); + virtual kax_analyzer_c &set_parser_start_position(uint64_t position); virtual bool process(); diff --git a/src/input/r_matroska.cpp b/src/input/r_matroska.cpp index cf6a6e8e0..7d253f3b2 100644 --- a/src/input/r_matroska.cpp +++ b/src/input/r_matroska.cpp @@ -1205,15 +1205,13 @@ kax_reader_c::read_headers() { void kax_reader_c::find_level1_elements_via_analyzer() { - if (!debugging_c::requested("kax_reader_deep_scan")) - return; - try { - auto analyzer = std::make_shared(m_in.get()); - auto ok = analyzer - ->set_parse_mode(kax_analyzer_c::parse_mode_fast) + auto start_pos = m_in->get_size() - std::min(m_in->get_size(), 5 * 1024 * 1024); + auto analyzer = std::make_shared(m_in.get()); + auto ok = analyzer + ->set_parse_mode(kax_analyzer_c::parse_mode_full) .set_open_mode(MODE_READ) - .set_throw_on_error(true) + .set_parser_start_position(start_pos) .process(); if (!ok)