From b3ac860ad458b07fae4c2d247e4d2d874b1c3449 Mon Sep 17 00:00:00 2001
From: Moritz Bunkus <moritz@bunkus.org>
Date: Thu, 24 Feb 2005 09:16:23 +0000
Subject: [PATCH] Reworked the code for native MPEG4 B frames completely. Still
 not working (playback) :( (Recommit because I f'cked up the repository
 database)

---
 src/common/mpeg4_common.cpp |   7 +-
 src/common/mpeg4_common.h   |  12 +-
 src/output/p_video.cpp      | 224 +++++++++++++++++-------------------
 src/output/p_video.h        |  10 +-
 4 files changed, 124 insertions(+), 129 deletions(-)

diff --git a/src/common/mpeg4_common.cpp b/src/common/mpeg4_common.cpp
index 0265c4953..036b9a932 100644
--- a/src/common/mpeg4_common.cpp
+++ b/src/common/mpeg4_common.cpp
@@ -162,8 +162,8 @@ mpeg4_p2_find_frame_types(const unsigned char *buffer,
           first_frame = false;
           frame.pos = first_frame_start;
         }
-        frame.type = frame_type == 0 ? 'I' : frame_type == 1 ? 'P' :
-          frame_type == 2 ? 'B' : 'S';
+        frame.type = 0 == frame_type ? FRAME_TYPE_I :
+          2 == frame_type ? FRAME_TYPE_B : FRAME_TYPE_P;
 
       } else if (first_frame &&
                  ((MPEGVIDEO_VOS_START_CODE == marker) ||
@@ -187,7 +187,8 @@ mpeg4_p2_find_frame_types(const unsigned char *buffer,
   if (2 <= verbose) {
     mxverb(2, "mpeg4_frames:   summary: found %d frames ", frames.size());
     for (fit = frames.begin(); fit < frames.end(); fit++)
-      mxverb(2, "'%c' (%d at %d) ", fit->type, fit->size, fit->pos);
+      mxverb(2, "'%c' (%d at %d) ", FRAME_TYPE_TO_CHAR(fit->type), fit->size,
+             fit->pos);
     mxverb(2, "\n");
   }
 
diff --git a/src/common/mpeg4_common.h b/src/common/mpeg4_common.h
index 9984f8eb6..caa1cf58b 100644
--- a/src/common/mpeg4_common.h
+++ b/src/common/mpeg4_common.h
@@ -87,6 +87,14 @@ enum mpeg_video_type_e {
   MPEG_VIDEO_V4_LAYER_10
 };
 
+enum frame_type_e {
+  FRAME_TYPE_I,
+  FRAME_TYPE_P,
+  FRAME_TYPE_B
+};
+#define FRAME_TYPE_TO_CHAR(t) \
+  (FRAME_TYPE_I == (t) ? 'I' : FRAME_TYPE_P == (t) ? 'P' : 'B')
+
 /** Pointers to MPEG4 video frames and their data
 
    MPEG4 video can be stored in a "packed" format, e.g. in AVI. This means
@@ -105,7 +113,7 @@ struct video_frame_t {
   /** The position of the frame in the original buffer. */
   int pos;
   /** The frame type: \c 'I', \c 'P' or \c 'B'. */
-  char type;
+  frame_type_e type;
   /** Private data. */
   unsigned char *priv;
   /** The timecode of the frame in \c ns. */
@@ -122,7 +130,7 @@ struct video_frame_t {
   int64_t fref;
 
   video_frame_t():
-    data(NULL), size(0), pos(0), type('?'), priv(NULL),
+    data(NULL), size(0), pos(0), type(FRAME_TYPE_I), priv(NULL),
     timecode(0), duration(0), bref(0), fref(0) {};
 };
 
diff --git a/src/output/p_video.cpp b/src/output/p_video.cpp
index 3d8e900fe..309f0109e 100644
--- a/src/output/p_video.cpp
+++ b/src/output/p_video.cpp
@@ -295,8 +295,6 @@ mpeg1_2_video_packetizer_c::create_private_data() {
 
 // ----------------------------------------------------------------
 
-
-
 mpeg4_p2_video_packetizer_c::
 mpeg4_p2_video_packetizer_c(generic_reader_c *_reader,
                             double _fps,
@@ -307,7 +305,7 @@ mpeg4_p2_video_packetizer_c(generic_reader_c *_reader,
   video_packetizer_c(_reader, MKV_V_MPEG4_ASP, _fps, _width, _height, _ti),
   timecodes_generated(0),
   aspect_ratio_extracted(false), input_is_native(_input_is_native),
-  output_is_native(hack_engaged(ENGAGE_NATIVE_MPEG4)), csum(0) {
+  output_is_native(hack_engaged(ENGAGE_NATIVE_MPEG4)) {
 
   if (input_is_native && !output_is_native)
     mxerror("mkvmerge does not support muxing from native MPEG-4 to "
@@ -327,10 +325,6 @@ mpeg4_p2_video_packetizer_c(generic_reader_c *_reader,
   }
 }
 
-mpeg4_p2_video_packetizer_c::~mpeg4_p2_video_packetizer_c() {
-  mxinfo("\nCSUM: %lld\n", csum);
-}
-
 int
 mpeg4_p2_video_packetizer_c::process(memory_c &mem,
                                      int64_t old_timecode,
@@ -357,7 +351,7 @@ mpeg4_p2_video_packetizer_c::process_non_native(memory_c &mem,
                                                 int64_t bref,
                                                 int64_t fref) {
   vector<video_frame_t> frames;
-  vector<video_frame_t>::iterator new_frame;
+  vector<video_frame_t>::iterator frame;
 
   if (NULL == ti->private_data) {
     uint32_t pos, size;
@@ -372,77 +366,40 @@ mpeg4_p2_video_packetizer_c::process_non_native(memory_c &mem,
 
   mpeg4_p2_find_frame_types(mem.data, mem.size, frames);
 
-  foreach(new_frame, frames) {
-    if ((1 == frames.size()) && (new_frame->size == mem.size) &&
-        (0 == new_frame->pos))
-      new_frame->data = mem.grab();
-    else
-      new_frame->data = (unsigned char *)safememdup(&mem.data[new_frame->pos],
-                                                    new_frame->size);
-    available_frames.push_back(*new_frame);
-  }
-
+  // Add a timecode and a duration if they've been given.
   if (-1 != old_timecode)
     available_timecodes.push_back(old_timecode);
+  else if (0.0 == fps)
+    mxerror("Cannot convert non-native MPEG4 video frames into native ones "
+            "if the source container provides neither timecodes nor a "
+            "number of frames per second.\n");
   if (-1 != old_duration)
     available_durations.push_back(old_duration);
 
-  while (!available_frames.empty()) {
-    int64_t timecode;
+  foreach(frame, frames) {
+    // Maybe we can flush queued frames now. But only if we don't have
+    // a B frame.
+    if (FRAME_TYPE_B != frame->type)
+      flush_frames_maybe(frame->type);
 
-    if ((-1 == old_timecode) && (0 == available_timecodes.size())) {
+    // Add a timecode and a duration for each frame if none have been
+    // given and we have a fixed number of FPS.
+    if (-1 == old_timecode) {
       available_timecodes.push_back((int64_t)(timecodes_generated *
                                               1000000000.0 / fps));
       ++timecodes_generated;
     }
-
-    if (0 == available_timecodes.size())
-      break;
-
-    timecode = available_timecodes[0];
-    available_timecodes.pop_front();
-    video_frame_t &frame = available_frames[0];
-
-    if ((-1 == old_duration) && (0 == available_durations.size()))
+    if (-1 == old_duration)
       available_durations.push_back((int64_t)(1000000000.0 / fps));
-    if (0 == available_durations.size())
-      frame.duration = -1;
-    else {
-      frame.duration = available_durations[0];
-      available_durations.pop_front();
-    }
-    
-    if (('I' == frame.type) ||
-        (('B' != frame.type) && ('?' != fref_frame.type)))
-      flush_frames(frame.type);
 
-    frames_output++;
-    frame.timecode = timecode;
-
-    if (frame.type == 'I') {
-      frame.bref = -1;
-      frame.fref = -1;
-      if (bref_frame.type == '?') {
-        bref_frame = frame;
-        memory_c mem(frame.data, frame.size, true);
-        add_packet(mem, frame.timecode, frame.duration);
-      } else
-        fref_frame = frame;
-
-    } else if (frame.type != 'B') {
-      frames_output--;
-      if (bref_frame.type == '?')
-        mxerror("video_packetizer: Found a P frame but no I frame. This "
-                "should not have happened. Either this is a bug in mkvmerge "
-                "or the video stream is damaged.\n");
-      frame.bref = bref_frame.timecode;
-      frame.fref = -1;
-      fref_frame = frame;
-
-    } else
-      queued_frames.push_back(frame);
-
-    available_frames.pop_front();
+    // Copy the data. If there's only one frame in this packet then
+    // we might save a memcpy.
+    if ((1 == frames.size()) && (frame->size == mem.size))
+      frame->data = mem.grab();
+    else
+      frame->data = (unsigned char *)safememdup(&mem.data[frame->pos],
+                                                frame->size);
+    queued_frames.push_back(*frame);
   }
 
   return FILE_STATUS_MOREDATA;
@@ -459,69 +416,100 @@ mpeg4_p2_video_packetizer_c::process_native(memory_c &mem,
 }
 
 void
-mpeg4_p2_video_packetizer_c::flush_frames(char next_frame,
-                                          bool flush_all) {
-  uint32_t i;
+mpeg4_p2_video_packetizer_c::flush_frames_maybe(frame_type_e next_frame) {
+  int i, num_bframes;
 
-  if (bref_frame.type == '?') {
-    if (fref_frame.type != '?')
-      die("video_packetizer: bref_frame.type == '?' but fref_frame.type != "
-          "'?'. This should not have happened.\n");
-    if (queued_frames.size() > 0) {
-      mxwarn("video_packetizer: No I frame found but B frames queued. This "
-             "indicates a broken video stream.\n");
-      for (i = 0; i < queued_frames.size(); i++)
-        safefree(queued_frames[i].data);
-      queued_frames.clear();
-    }
+  if (0 == queued_frames.size())
     return;
+
+  num_bframes = 0;
+  for (i = 0; i < queued_frames.size(); ++i)
+    if (FRAME_TYPE_B == queued_frames[i].type)
+      ++num_bframes;
+
+  if ((FRAME_TYPE_I == next_frame) ||
+      (num_bframes > 0) || (FRAME_TYPE_P == queued_frames[0].type))
+    flush_frames();
+}
+
+
+void
+mpeg4_p2_video_packetizer_c::flush_frames() {
+  int i, num_bframes, b_offset;
+  int64_t b_bref, b_fref;
+
+  if ((available_timecodes.size() < queued_frames.size()) ||
+      (available_durations.size() < queued_frames.size())) {
+    int64_t timecode;
+
+    if (available_timecodes.empty())
+      timecode = 0;
+    else
+      timecode = available_timecodes[0];
+
+    mxerror("Invalid/unsupported sequence of MPEG4 video frames regarding "
+            "B frames. If your video plays normally around timecode "
+            FMT_TIMECODE " then this is a bug in mkvmerge and you should "
+            "contact the author Moritz Bunkus <moritz@bunkus.org>.\n",
+            ARG_TIMECODE_NS(timecode));
   }
 
-  if (fref_frame.type == '?') {
-    if (queued_frames.size() != 0) {
-      mxwarn("video_packetizer: B frames queued but only one reference frame "
-             "found. This indicates a broken video stream, or the frames are "
-             "placed in display order which is not supported.\n");
-      for (i = 0; i < queued_frames.size(); i++)
-        safefree(queued_frames[i].data);
-      queued_frames.clear();
-    }
-    if (flush_all) {
-      memory_c mem(bref_frame.data, bref_frame.size, false);
-      add_packet(mem, bref_frame.duration, false, bref_frame.bref,
-                 bref_frame.fref);
-      bref_frame.type = '?';
-    }
-    return;
+  if ((2 <= queued_frames.size()) && (FRAME_TYPE_B != queued_frames[1].type))
+    b_offset = 1;
+  else
+    b_offset = 0;
+
+  num_bframes = 0;
+  b_bref = last_i_p_frame;
+  for (i = 0; i < queued_frames.size(); ++i) {
+    if (FRAME_TYPE_I == queued_frames[i].type) {
+      queued_frames[i].timecode = available_timecodes[0];
+      queued_frames[i].duration = available_durations[0];
+      queued_frames[i].bref = -1;
+      queued_frames[i].fref = -1;
+      if (-1 == last_i_p_frame) {
+        last_i_p_frame = queued_frames[i].timecode;
+        b_bref = queued_frames[i].timecode;
+      }
+      b_fref = queued_frames[i].timecode;
+
+    } else if (FRAME_TYPE_P == queued_frames[i].type) {
+      queued_frames[i].timecode =
+        available_timecodes[queued_frames.size() - 1];
+      queued_frames[i].duration =
+        available_durations[queued_frames.size() - 1];
+      queued_frames[i].bref = last_i_p_frame;
+      last_i_p_frame = queued_frames[i].timecode;
+      b_fref = last_i_p_frame;
+      queued_frames[i].fref = -1;
+
+    } else {
+      queued_frames[i].timecode = available_timecodes[num_bframes + b_offset];
+      queued_frames[i].duration = available_durations[num_bframes + b_offset];
+      queued_frames[i].bref = b_bref;
+      queued_frames[i].fref = b_fref;
+      ++num_bframes;
+    }      
   }
 
-  if (fref_frame.type != '?') {
-    if ((fref_frame.type == 'P') || (fref_frame.type == 'S'))
-      frames_output++;
-    fref_frame.timecode = (int64_t)(fref_frame.timecode +
-                                    queued_frames.size() * 1000000000 / fps);
-    memory_c fref_mem(fref_frame.data, fref_frame.size, false);
-    add_packet(fref_mem, fref_frame.timecode,
-               fref_frame.duration, false, fref_frame.bref, fref_frame.fref);
-
-    for (i = 0; i < queued_frames.size(); i++) {
-      memory_c mem(queued_frames[i].data, queued_frames[i].size, false);
-      add_packet(mem, queued_frames[i].timecode, queued_frames[i].duration,
-                 false, bref_frame.timecode, fref_frame.timecode);
-    }
-    queued_frames.clear();
-
-    bref_frame = fref_frame;
-    fref_frame.type = '?';
+  for (i = 0; i < queued_frames.size(); ++i) {
+    memory_c mem(queued_frames[i].data, queued_frames[i].size, true);
+    add_packet(mem, queued_frames[i].timecode, queued_frames[i].duration,
+               false, queued_frames[i].bref, queued_frames[i].fref);
   }
 
-  if (flush_all || ((next_frame == 'I') && (bref_frame.type == 'P')))
-    bref_frame.type = '?';
+  available_timecodes.erase(available_timecodes.begin(),
+                            available_timecodes.begin() +
+                            queued_frames.size());
+  available_durations.erase(available_durations.begin(),
+                            available_durations.begin() +
+                            queued_frames.size());
+  queued_frames.clear();
 }
 
 void
 mpeg4_p2_video_packetizer_c::flush() {
-  flush_frames(true);
+  flush_frames();
 }
 
 void
diff --git a/src/output/p_video.h b/src/output/p_video.h
index 8bda53c9c..88980a60d 100644
--- a/src/output/p_video.h
+++ b/src/output/p_video.h
@@ -79,18 +79,15 @@ protected:
 
 class mpeg4_p2_video_packetizer_c: public video_packetizer_c {
 protected:
-  deque<video_frame_t> available_frames, queued_frames;
+  deque<video_frame_t> queued_frames;
   deque<int64_t> available_timecodes, available_durations;
-  int64_t timecodes_generated;
-  video_frame_t bref_frame, fref_frame;
+  int64_t timecodes_generated, last_i_p_frame;
   bool aspect_ratio_extracted, input_is_native, output_is_native;
-  int64_t csum;
 
 public:
   mpeg4_p2_video_packetizer_c(generic_reader_c *_reader,
                               double _fps, int _width, int _height,
                               bool _input_is_native, track_info_c *_ti);
-  virtual ~mpeg4_p2_video_packetizer_c();
 
   virtual int process(memory_c &mem, int64_t old_timecode = -1,
                       int64_t duration = -1, int64_t bref = VFT_IFRAME,
@@ -104,7 +101,8 @@ protected:
   virtual int process_non_native(memory_c &mem, int64_t old_timecode,
                                  int64_t old_duration, int64_t bref,
                                  int64_t fref);
-  virtual void flush_frames(char next_frame = '?', bool flush_all = false);
+  virtual void flush_frames_maybe(frame_type_e next_frame);
+  virtual void flush_frames();
   virtual void extract_aspect_ratio(const unsigned char *buffer, int size);
 };