AAC common: implement ADTS-to-raw parser & framing packet converter

This is groundwork for adding a LOAS/LATM parser on top of it.
2025-01-17 15:42:10 +00:00 · 2014-12-15 21:42:29 +01:00 · 2014-12-15 21:42:29 +01:00 · 6603d22ea3
commit 6603d22ea3
parent fb3e6de77f
4 changed files with 414 additions and 4 deletions
--- a/src/common/aac.cpp
+++ b/src/common/aac.cpp
@ -13,11 +13,7 @@

 #include "common/common_pch.h"

-#include <stdio.h>
-#include <string.h>
-
 #include "common/aac.h"
-#include "common/bit_cursor.h"
 #include "common/codec.h"
 #include "common/strings/formatting.h"

@ -25,6 +21,286 @@ const int g_aac_sampling_freq[16] = {96000, 88200, 64000, 48000, 44100, 32000,
                                     24000, 22050, 16000, 12000, 11025,  8000,
                                      7350,     0,     0,     0}; // filling

+namespace aac {
+
+frame_c::frame_c() {
+  init();
+}
+
+void
+frame_c::init() {
+  m_id               = 0;
+  m_profile          = 0;
+  m_sample_rate      = 0;
+  m_bit_rate         = 0;
+  m_channels         = 0;
+  m_frame_size       = 0;
+  m_header_bit_size  = 0;
+  m_header_byte_size = 0;
+  m_data_byte_size   = 0;
+  m_stream_position  = 0;
+  m_garbage_size     = 0;
+  m_valid            = false;
+  m_timecode.reset();
+  m_data.reset();
+}
+
+bool
+frame_c::decode_adts_header(unsigned char const *buffer,
+                            size_t buffer_size) {
+  try {
+    init();
+
+    auto bc = bit_reader_c{buffer, static_cast<unsigned int>(buffer_size)};
+
+    if (bc.get_bits(12) != 0xfff)            // ADTS header
+      return false;
+
+    m_id = bc.get_bit();        // ID: 0 = MPEG-4, 1 = MPEG-2
+    if (bc.get_bits(2) != 0)      // layer == 0 !
+      return false;
+    bool protection_absent = bc.get_bit();
+    m_profile              = bc.get_bits(2);
+    int sfreq_index        = bc.get_bits(4);
+    bc.skip_bits(1);              // private
+    m_channels = bc.get_bits(3);
+    bc.skip_bits(1 + 1);          // original/copy & home
+    bc.skip_bits(1 + 1);          // copyright_id_bit & copyright_id_start
+
+    m_frame_size      = bc.get_bits(13);
+    m_header_bit_size = 56 + (!protection_absent ? 16 : 0);
+
+    if (m_header_bit_size >= m_frame_size * 8)
+      return false;
+
+    bc.skip_bits(11);             // adts_buffer_fullness
+    bc.skip_bits(2);              // no_raw_blocks_in_frame
+    if (!protection_absent)
+      bc.skip_bits(16);
+
+    m_sample_rate      = g_aac_sampling_freq[sfreq_index];
+    m_bit_rate         = 1024;
+    m_header_byte_size = (m_header_bit_size + 7) / 8;
+    m_data_byte_size   = m_frame_size - m_header_byte_size;
+    m_valid            = m_header_bit_size != 0;
+
+  } catch (mtx::mm_io::end_of_file_x &) {
+  }
+
+  return m_valid;
+}
+
+int
+frame_c::find_in(memory_cptr const &buffer) {
+  return find_in(buffer->get_buffer(), buffer->get_size());
+}
+
+int
+frame_c::find_in(unsigned char const *buffer,
+                 size_t buffer_size) {
+  for (size_t offset = 0; offset < buffer_size; ++offset)
+    if (decode_adts_header(&buffer[offset], buffer_size - offset))
+      return offset;
+  return -1;
+}
+
+std::string
+frame_c::to_string(bool verbose)
+  const {
+  if (!verbose)
+    return (boost::format("position %1% size %2% ID %3% profile %4%") % m_stream_position % m_frame_size % m_id % m_profile).str();
+
+  return (boost::format("position %1% size %2% garbage %3% ID %4% profile %5% sample rate %6% bit rate %7% channels %8%")
+          % m_stream_position
+          % m_frame_size
+          % m_garbage_size
+          % m_id
+          % m_profile
+          % m_sample_rate
+          % m_bit_rate
+          % m_channels
+          ).str();
+}
+
+// ------------------------------------------------------------
+
+parser_c::parser_c()
+  : m_parsed_stream_position{}
+  , m_total_stream_position{}
+  , m_garbage_size{}
+  , m_multiplex_type{unknown_multiplex}
+{
+}
+
+void
+parser_c::add_timecode(timecode_c const &timecode) {
+  m_provided_timecodes.push_back(timecode);
+}
+
+void
+parser_c::add_bytes(memory_cptr const &mem) {
+  add_bytes(mem->get_buffer(), mem->get_size());
+}
+
+void
+parser_c::add_bytes(unsigned char *const buffer,
+                    size_t size) {
+  m_buffer.add(buffer, size);
+  m_total_stream_position += size;
+  parse();
+}
+
+void
+parser_c::flush() {
+  // no-op
+}
+
+size_t
+parser_c::frames_available()
+  const {
+  return m_frames.size();
+}
+
+frame_c
+parser_c::get_frame() {
+  auto frame = m_frames.front();
+  m_frames.pop_front();
+  return frame;
+}
+
+uint64_t
+parser_c::get_total_stream_position()
+  const {
+  return m_total_stream_position;
+}
+
+uint64_t
+parser_c::get_parsed_stream_position()
+  const {
+  return m_parsed_stream_position;
+}
+
+bool
+parser_c::decode_header(unsigned char const *buffer,
+                        size_t buffer_size,
+                        frame_c &frame) {
+  if (m_multiplex_type == adts_multiplex)
+    return frame.decode_adts_header(buffer, buffer_size);
+
+  if (frame.decode_adts_header(buffer, buffer_size)) {
+    m_multiplex_type = adts_multiplex;
+    return true;
+  }
+
+  return false;
+}
+
+void
+parser_c::parse() {
+  auto buffer      = m_buffer.get_buffer();
+  auto buffer_size = m_buffer.get_size();
+  auto position    = 0u;
+
+  while ((position + 8) < buffer_size) {
+    frame_c frame;
+
+    if (!decode_header(&buffer[position], buffer_size - position, frame)) {
+      ++position;
+      ++m_garbage_size;
+      continue;
+    }
+
+    if ((position + frame.m_frame_size) > buffer_size)
+      break;
+
+    if (!m_provided_timecodes.empty()) {
+      frame.m_timecode = m_provided_timecodes.front();
+      m_provided_timecodes.pop_front();
+    }
+
+    frame.m_data             = memory_c::clone(&buffer[position + frame.m_header_byte_size], frame.m_data_byte_size);
+    frame.m_stream_position  = m_parsed_stream_position + position;
+    frame.m_garbage_size     = m_garbage_size;
+
+    m_garbage_size           = 0;
+    position                += frame.m_frame_size;
+
+    m_frames.push_back(frame);
+  }
+
+  m_buffer.remove(position);
+  m_parsed_stream_position += position;
+}
+
+int
+parser_c::find_consecutive_frames(unsigned char const *buffer,
+                                  size_t buffer_size,
+                                  size_t num_required_headers) {
+  static auto s_debug = debugging_option_c{"aac_consecutive_frames"};
+  size_t base = 0;
+
+  do {
+    mxdebug_if(s_debug, boost::format("Starting search for %2% headers with base %1%, buffer size %3%\n") % base % num_required_headers % buffer_size);
+
+    size_t position = base;
+
+    frame_c first_frame;
+    while (((position + 8) < buffer_size) && !first_frame.decode_adts_header(&buffer[position], buffer_size - position))
+      ++position;
+
+    mxdebug_if(s_debug, boost::format("First frame at %1% valid %2%\n") % position % first_frame.m_valid);
+
+    if (!first_frame.m_valid)
+      return -1;
+
+    size_t offset            = position + first_frame.m_frame_size;
+    size_t num_headers_found = 1;
+
+    while (   (num_headers_found < num_required_headers)
+           && (offset            < buffer_size)) {
+
+      frame_c current_frame;
+      if (!current_frame.decode_adts_header(&buffer[offset], buffer_size - offset))
+        break;
+
+      if (8 > current_frame.m_frame_size) {
+        mxdebug_if(s_debug, boost::format("Current frame at %1% has invalid size %2%\n") % offset % current_frame.m_frame_size);
+        break;
+      }
+
+      if (   (current_frame.m_id          != first_frame.m_id)
+          && (current_frame.m_profile     != first_frame.m_profile)
+          && (current_frame.m_channels    != first_frame.m_channels)
+          && (current_frame.m_sample_rate != first_frame.m_sample_rate)) {
+        mxdebug_if(s_debug,
+                   boost::format("Current frame at %9% differs from first frame. (first/current) ID: %1%/%2% profile: %3%/%4% channels: %5%/%6% sample rate: %7%/%8%\n")
+                   % first_frame.m_id          % current_frame.m_id
+                   % first_frame.m_profile     % current_frame.m_profile
+                   % first_frame.m_channels    % current_frame.m_channels
+                   % first_frame.m_sample_rate % current_frame.m_sample_rate
+                   % offset);
+        break;
+      }
+
+      mxdebug_if(s_debug, boost::format("Current frame at %1% equals first frame, found %2%\n") % offset % (num_headers_found + 1));
+
+      ++num_headers_found;
+      offset += current_frame.m_frame_size;
+    }
+
+    if (num_headers_found == num_required_headers) {
+      mxdebug_if(s_debug, boost::format("Found required number of headers at %1%\n") % position);
+      return position;
+    }
+
+    base = position + 2;
+  } while (base < buffer_size);
+
+  return -1;
+}
+
+}
+
 aac_header_c::aac_header_c()
  : object_type{}
  , extension_object_type{}
--- a/src/common/aac.h
+++ b/src/common/aac.h
@ -17,6 +17,8 @@
 #include "common/common_pch.h"

 #include "common/bit_cursor.h"
+#include "common/byte_buffer.h"
+#include "common/timecode.h"

 #define AAC_ID_MPEG4 0
 #define AAC_ID_MPEG2 1
@ -63,6 +65,65 @@ protected:

 bool operator ==(const aac_header_c &h1, const aac_header_c &h2);

+namespace aac {
+
+class frame_c {
+public:
+  unsigned int m_id, m_profile, m_sample_rate, m_bit_rate, m_channels, m_frame_size, m_header_bit_size, m_header_byte_size, m_data_byte_size;
+  uint64_t m_stream_position;
+  size_t m_garbage_size;
+  timecode_c m_timecode;
+  bool m_valid;
+  memory_cptr m_data;
+
+public:
+  frame_c();
+  void init();
+  bool decode_adts_header(unsigned char const *buffer, size_t buffer_size);
+
+  std::string to_string(bool verbose = false) const;
+
+  int find_in(memory_cptr const &buffer);
+  int find_in(unsigned char const *buffer, size_t buffer_size);
+};
+
+class parser_c {
+public:
+  enum multiplex_type_e {
+      unknown_multiplex = 0
+    , adts_multiplex
+    , adif_multiplex
+  };
+
+protected:
+  std::deque<frame_c> m_frames;
+  std::deque<timecode_c> m_provided_timecodes;
+  byte_buffer_c m_buffer;
+  uint64_t m_parsed_stream_position, m_total_stream_position;
+  size_t m_garbage_size;
+  multiplex_type_e m_multiplex_type;
+
+public:
+  parser_c();
+  void add_timecode(timecode_c const &timecode);
+  void add_bytes(memory_cptr const &mem);
+  void add_bytes(unsigned char *const buffer, size_t size);
+  void flush();
+  size_t frames_available() const;
+  frame_c get_frame();
+  uint64_t get_parsed_stream_position() const;
+  uint64_t get_total_stream_position() const;
+
+  int find_consecutive_frames(unsigned char const *buffer, size_t buffer_size, size_t num_required_headers);
+
+protected:
+  void parse();
+  bool decode_header(unsigned char const *buffer, size_t buffer_size, frame_c &frame);
+};
+typedef std::shared_ptr<parser_c> parser_cptr;
+
+}
+
 bool parse_aac_adif_header(const unsigned char *buf, int size, aac_header_c *aac_header);
 int find_aac_header(const unsigned char *buf, int size, aac_header_c *aac_header, bool emphasis_present);
 int find_consecutive_aac_headers(const unsigned char *buf, int size, int num);
--- a/src/input/aac_framing_packet_converter.cpp
+++ b/src/input/aac_framing_packet_converter.cpp
@ -0,0 +1,40 @@
+/*
+   mkvmerge -- utility for splicing together matroska files
+   from component media subtypes
+
+   Distributed under the GPL v2
+   see the file COPYING for details
+   or visit http://www.gnu.org/copyleft/gpl.html
+
+   AAC framing type converter
+
+   Written by Moritz Bunkus <moritz@bunkus.org>.
+*/
+
+#include "common/common_pch.h"
+
+#include "common/strings/formatting.h"
+#include "merge/pr_generic.h"
+#include "input/aac_framing_packet_converter.h"
+
+aac_framing_packet_converter_c::aac_framing_packet_converter_c(generic_packetizer_c *ptzr)
+  : packet_converter_c{ptzr}
+{
+}
+
+bool
+aac_framing_packet_converter_c::convert(packet_cptr const &packet) {
+  if (packet->has_timecode()) {
+    m_parser.add_timecode(timecode_c::ns(packet->timecode));
+  }
+
+  m_parser.add_bytes(packet->data);
+
+  while (m_parser.frames_available()) {
+    auto frame      = m_parser.get_frame();
+    auto packet_out = std::make_shared<packet_t>(frame.m_data, frame.m_timecode.to_ns(-1));
+    m_ptzr->process(packet_out);
+  }
+
+  return true;
+}
--- a/src/input/aac_framing_packet_converter.h
+++ b/src/input/aac_framing_packet_converter.h
@ -0,0 +1,33 @@
+/*
+   mkvmerge -- utility for splicing together matroska files
+   from component media subtypes
+
+   Distributed under the GPL v2
+   see the file COPYING for details
+   or visit http://www.gnu.org/copyleft/gpl.html
+
+   class definitions for the AAC framing type converter
+
+   Written by Moritz Bunkus <moritz@bunkus.org>.
+*/
+
+#ifndef MTX_INPUT_AAC_FRAMING_PACKET_CONVERTER_H
+#define MTX_INPUT_AAC_FRAMING_PACKET_CONVERTER_H
+
+#include "common/common_pch.h"
+
+#include "common/aac.h"
+#include "input/packet_converter.h"
+
+class aac_framing_packet_converter_c: public packet_converter_c {
+protected:
+  aac::parser_c m_parser;
+
+public:
+  aac_framing_packet_converter_c(generic_packetizer_c *ptzr);
+  virtual ~aac_framing_packet_converter_c() {};
+
+  virtual bool convert(packet_cptr const &packet);
+};
+
+#endif  // MTX_INPUT_AAC_FRAMING_PACKET_CONVERTER_H