From 226c0b5927826fe52c5bb58616cd1e5265b14c36 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Tue, 30 Jul 2024 16:13:26 +0200 Subject: [PATCH] AAC: LOAS/LATM: defer parsing LATM frames until first AudioSpecificConfig is found fixes #3727 --- NEWS.md | 7 ++ src/common/aac.cpp | 81 +++++++++++++++++-- src/common/aac.h | 3 +- tests/results.txt | 1 + ...0767aac_latm_audio_specific_config_late.rb | 5 ++ 5 files changed, 89 insertions(+), 8 deletions(-) create mode 100755 tests/test-0767aac_latm_audio_specific_config_late.rb diff --git a/NEWS.md b/NEWS.md index 0119a9218..7419a850b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,12 @@ # Version ? +## Bug fixes + +* mkvmerge: AAC parser: LOAS/LATM streams: if the first `AudioSpecificConfig` + element is not found within the first LOAS/LATM frame, `mkvmerge` will no + longer discard the frames before it but defer parsing them until after it + has found the `AudioSpecificConfig` element. Fixes #3727. + ## Build system changes * The bundled `fmt` library was updated to v10.2.1. diff --git a/src/common/aac.cpp b/src/common/aac.cpp index 72bcd8f6b..60d250a78 100644 --- a/src/common/aac.cpp +++ b/src/common/aac.cpp @@ -710,32 +710,96 @@ parser_c::decode_loas_latm_header(uint8_t const *buffer, std::pair parser_c::decode_header(uint8_t const *buffer, size_t buffer_size) { - if (adif_multiplex == m_multiplex_type) - return { failure, 0 }; - if (adts_multiplex == m_multiplex_type) return decode_adts_header(buffer, buffer_size); if (loas_latm_multiplex == m_multiplex_type) return decode_loas_latm_header(buffer, buffer_size); - auto result = decode_adts_header(buffer, buffer_size); + return { failure, 0 }; +} + +bool +parser_c::determine_multiplex_type(uint8_t const *new_buffer, + std::size_t new_buffer_size) { + at_scope_exit_c cleanup{[this]() { + m_copy_data = true; + + if (m_multiplex_type != unknown_multiplex) + m_multiplex_type_detection_buffer.clear(); + }}; + + m_multiplex_type_detection_buffer.add(new_buffer, new_buffer_size); + + m_copy_data = false; + + auto buffer = m_multiplex_type_detection_buffer.get_buffer(); + auto buffer_size = m_multiplex_type_detection_buffer.get_size(); + auto result = decode_adts_header(buffer, buffer_size); + if (result.first == success) { + mxdebug_if(m_debug, fmt::format("determine_multiplex_type: successfully detected ADTS at the start\n")); m_multiplex_type = adts_multiplex; - return result; + return true; } result = decode_loas_latm_header(buffer, buffer_size); + if (result.first == success) { + mxdebug_if(m_debug, fmt::format("determine_multiplex_type: successfully detected LOAS LATM at the start\n")); m_multiplex_type = loas_latm_multiplex; - return result; + return true; } - return result; + std::vector> multiplexes_to_try{ { loas_latm_multiplex, 0u }, { adif_multiplex, 0u } }; + + for (auto &multiplex : multiplexes_to_try) { + auto position = 0u; + + while (position < buffer_size) { + auto remaining_bytes = buffer_size - position; + result = multiplex.first == loas_latm_multiplex ? decode_loas_latm_header(&buffer[position], remaining_bytes) : decode_adts_header(&buffer[position], remaining_bytes); + + if (result.first == need_more_data) + break; + + auto num_bytes = std::max(std::min(result.second, remaining_bytes), 1); + position += num_bytes; + + if (result.first == success) + ++multiplex.second; + } + } + + auto latm_config_parsed = m_latm_parser.config_parsed(); + + std::optional winner; + + for (unsigned int idx = 0; idx < multiplexes_to_try.size(); ++idx) { + mxdebug_if(m_debug, fmt::format("determine_multiplex_type: idx {0} type {1} size {2}\n", idx, static_cast(multiplexes_to_try[idx].first), multiplexes_to_try[idx].second)); + + auto num_frames = multiplexes_to_try[idx].second; + + if ((num_frames > 0) && (!winner || (num_frames > multiplexes_to_try[*winner].second))) + winner = idx; + } + + if (winner.has_value()) { + m_multiplex_type = multiplexes_to_try[*winner].first; + mxdebug_if(m_debug, fmt::format("determine_multiplex_type: we have a winner: {0} LATM configuration parsed: {1}\n", static_cast(m_multiplex_type), latm_config_parsed)); + return true; + } + + mxdebug_if(m_debug, fmt::format("determine_multiplex_type: no winner\n")); + + return false; } void parser_c::push_frame(frame_c &frame) { + if (m_multiplex_type == unknown_multiplex) + return; + if (!m_provided_timestamps.empty()) { frame.m_timestamp = m_provided_timestamps.front(); m_provided_timestamps.pop_front(); @@ -763,6 +827,9 @@ parser_c::parse() { auto buffer_size = m_fixed_buffer ? m_fixed_buffer_size : m_buffer.get_size(); auto position = 0u; + if ((m_multiplex_type == unknown_multiplex) && !determine_multiplex_type(buffer, buffer_size)) + return; + while (position < buffer_size) { auto remaining_bytes = buffer_size - position; auto result = decode_header(&buffer[position], remaining_bytes); diff --git a/src/common/aac.h b/src/common/aac.h index e68a3da93..b3c7ead46 100644 --- a/src/common/aac.h +++ b/src/common/aac.h @@ -169,7 +169,7 @@ protected: protected: std::deque m_frames; std::deque m_provided_timestamps; - mtx::bytes::buffer_c m_buffer; + mtx::bytes::buffer_c m_buffer, m_multiplex_type_detection_buffer; uint8_t const *m_fixed_buffer; size_t m_fixed_buffer_size; uint64_t m_parsed_stream_position, m_total_stream_position; @@ -216,6 +216,7 @@ protected: std::pair decode_header(uint8_t const *buffer, size_t buffer_size); std::pair decode_adts_header(uint8_t const *buffer, size_t buffer_size); std::pair decode_loas_latm_header(uint8_t const *buffer, size_t buffer_size); + bool determine_multiplex_type(uint8_t const *buffer, std::size_t buffer_size); void push_frame(frame_c &frame); }; using parser_cptr = std::shared_ptr; diff --git a/tests/results.txt b/tests/results.txt index 0e36ae8c6..9d4bdf33b 100644 --- a/tests/results.txt +++ b/tests/results.txt @@ -611,3 +611,4 @@ T_0763vp9_alpha_channel_data:ac329c9d810d9b19fb3ffcde6f12af2a-OK:passed:20231203 T_0764ui_locale_be_BY:a44c54eadfb4c8fbdc104b75aa1de1c1-72b98d331b58a0f95e10159fca191b52:passed:20240120-191944:0.043782405 T_0765ffmpeg_metadata_chapters:f16630c4019413c98b75b959a5697391-6b2b843310e80367b5fe5aaa8a5d51c4:passed:20240310-145016:0.047790171 T_0766ui_locale_nb_NO:6e0054bcf8d381306adc9d4d212d1f6a-5a0be94aab291615f8ebd47f887e6eba:passed:20240422-215240:0.044197325 +T_0767aac_latm_audio_specific_config_late:6ef4bed121ea8c3bcb94d9e460d5c67a:passed:20240730-160739:0.022664154 diff --git a/tests/test-0767aac_latm_audio_specific_config_late.rb b/tests/test-0767aac_latm_audio_specific_config_late.rb new file mode 100755 index 000000000..07ca95aa4 --- /dev/null +++ b/tests/test-0767aac_latm_audio_specific_config_late.rb @@ -0,0 +1,5 @@ +#!/usr/bin/ruby -w + +# T_767aac_latm_audio_specific_config_late +describe "mkvmerge / AAC LOAS/LATM stream, AudioSpecificConfig comes in later frame" +test_merge "data/aac/aac_lc_latm_problem.aac"