Use Boost based regular expressions for the simple chapter parser and extended it to handle more than 100 chapters. Fix for bug 320.

2025-02-26 08:22:31 +00:00 · 2008-12-17 18:00:44 +00:00 · 2008-12-17 18:00:44 +00:00 · 7227888f22
commit 7227888f22
parent e80b3654e0
3 changed files with 38 additions and 48 deletions
--- a/4
+++ b/4
@ -1,5 +1,9 @@
 2008-12-17  Moritz Bunkus  <moritz@bunkus.org>

+	* mkvmerge: bug fix: The parser for the simple chapter
+	format (CHAPTERxx=...) can now handle more than 100 chapters. Fix
+	for bug 320.
+
 	* mmg: bug fix: The commands "Save command line" and "Create
 	option file" did not save mmg's current state but the state it was
 	in when the command "Show command line" was last used or when mmg
--- a/Makefile.in
+++ b/Makefile.in
@ -449,7 +449,8 @@ mkvmerge_LDADD = -lmtxinput -lmtxoutput \
 #

 mkvinfo_SOURCES = src/info/mkvinfo.cpp src/info/console_ui.cpp
-mkvinfo_LDADD = -lmtxcommon $(MAGIC_LIBS) -lmatroska -lebml
+mkvinfo_LDADD = -lmtxcommon $(MAGIC_LIBS) -lmatroska -lebml \
+		$(BOOST_REGEX_LIB)

 ifeq (yes,$(USE_QT))
 mkvinfo_SOURCES += src/info/qt_ui.cpp src/info/qt_ui.moc.cpp \
@ -495,7 +496,7 @@ mmg_OBJECTS := $(patsubst %.cpp,%.o,$(mmg_SOURCES)) \
 mmg_DEPENDENCIES += $(DEP_COMMON)
 mmg_LDADD = -lmtxcommon $(MAGIC_LIBS) -lmatroska -lebml -lexpat $(ICONV_LIBS) \
 		$(WXWIDGETS_LIBS) $(LIBINTL_LIBS) $(MINGW_GUIAPP) \
-		$(LIBRPCRT)
+		$(BOOST_REGEX_LIB) $(LIBRPCRT)

 mkvmerge: src/mkvmerge@EXEEXT@

--- a/src/common/chapters.cpp
+++ b/src/common/chapters.cpp
@ -13,11 +13,14 @@
   \author Written by Moritz Bunkus <moritz@bunkus.org>.
 */

+#include "os.h"
+
 #include <ctype.h>
 #include <stdarg.h>

 #include <cassert>
 #include <string>
+#include <boost/regex.hpp>

 #include <matroska/KaxChapters.h>

@ -33,36 +36,9 @@ string default_chapter_language;
 /** The default country for all chapter entries that don't have their own. */
 string default_chapter_country;

-/** Is the current char an equal sign? */
-#define isequal(s) (*(s) == '=')
-/** Is the current char a colon, comman or a dot? */
-#define isseparator(s) ((':' == *(s)) || (',' == *(s)) || ('.' == *(s)))
-/** Do we have two consecutive digits? */
-#define istwodigits(s) (isdigit(*(s)) && isdigit(*(s + 1)))
-/** Do we have three consecutive digits? */
-#define isthreedigits(s) (istwodigits(s) && isdigit(*(s + 2)))
-/** Does \c s point to the string "CHAPTER"? */
-#define ischapter(s) (!strncmp("CHAPTER", (s), 7))
-/** Does \c s point to the string "NAME"? */
-#define isname(s) (!strncmp("NAME", (s), 4))
-/** Does \c s point to a valid OGM style chapter timecode entry? */
-#define ischapterline(s) ((strlen(s) == 22) && \
-                          ischapter(s) && \
-                          istwodigits(s + 7) && \
-                          isequal(s + 9) && \
-                          istwodigits(s + 10) && \
-                          isseparator(s + 12) && \
-                          istwodigits(s + 13) && \
-                          isseparator(s + 15) && \
-                          istwodigits(s + 16) && \
-                          isseparator(s + 18) && \
-                          isthreedigits(s + 19))
-/** Does \c s point to a valid OGM style chapter name entry? */
-#define ischapternameline(s) ((strlen(s) >= 14) && \
-                          ischapter(s) && \
-                          istwodigits(s + 7) && \
-                          isname(s + 9) && \
-                          isequal(s + 13))
+#define SIMCHAP_RE_TIMECODE_LINE "^\\s*CHAPTER\\d+\\s*=\\s*(\\d+)\\s*:\\s*(\\d+)\\s*:\\s*(\\d+)\\s*[\\.,]\\s*(\\d+)"
+#define SIMCHAP_RE_TIMECODE      "^\\s*CHAPTER\\d+\\s*=(.*)"
+#define SIMCHAP_RE_NAME_LINE     "^\\s*CHAPTER\\d+NAME\\s*=(.*)"

 /** \brief Throw a special chapter parser exception.

@ -93,6 +69,10 @@ chapter_error(const boost::format &format) {
 */
 bool
 probe_simple_chapters(mm_text_io_c *in) {
+  boost::regex timecode_line_re(SIMCHAP_RE_TIMECODE_LINE, boost::regex::perl);
+  boost::regex name_line_re(    SIMCHAP_RE_NAME_LINE,     boost::regex::perl);
+  boost::match_results<string::const_iterator> matches;
+
  string line;

  assert(in != NULL);
@ -100,21 +80,18 @@ probe_simple_chapters(mm_text_io_c *in) {
  in->setFilePointer(0);
  while (in->getline2(line)) {
    strip(line);
-    if (line.length() == 0)
+    if (line.empty())
      continue;

-    if (!ischapterline(line.c_str()))
+    if (!boost::regex_search(line, timecode_line_re))
      return false;

    while (in->getline2(line)) {
      strip(line);
-      if (line.length() == 0)
+      if (line.empty())
        continue;

-      if (!ischapternameline(line.c_str()))
-        return false;
-
-      return true;
+      return boost::regex_search(line, name_line_re);
    }

    return false;
@ -206,6 +183,11 @@ parse_simple_chapters(mm_text_io_c *in,
  } else
    use_language = language;

+  boost::regex timecode_line_re(SIMCHAP_RE_TIMECODE_LINE, boost::regex::perl);
+  boost::regex timecode_re(     SIMCHAP_RE_TIMECODE,      boost::regex::perl);
+  boost::regex name_line_re(    SIMCHAP_RE_NAME_LINE,     boost::regex::perl);
+  boost::match_results<string::const_iterator> matches;
+
  try {
    while (in->getline2(line)) {
      strip(line);
@ -213,12 +195,12 @@ parse_simple_chapters(mm_text_io_c *in,
        continue;

      if (mode == 0) {
-        if (!ischapterline(line.c_str()))
+        if (!boost::regex_match(line, matches, timecode_line_re))
          chapter_error(boost::format(Y("'%1%' is not a CHAPTERxx=... line.")) % line);
-        parse_int(line.substr(10, 2), hour);
-        parse_int(line.substr(13, 2), minute);
-        parse_int(line.substr(16, 2), second);
-        parse_int(line.substr(19, 3), msecs);
+        parse_int(matches[1].str(), hour);
+        parse_int(matches[2].str(), minute);
+        parse_int(matches[3].str(), second);
+        parse_int(matches[4].str(), msecs);
        if (minute > 59)
          chapter_error(boost::format(Y("Invalid minute: %1%")) % minute);
        if (second > 59)
@ -226,13 +208,16 @@ parse_simple_chapters(mm_text_io_c *in,
        start = msecs + second * 1000 + minute * 1000 * 60 +
          hour * 1000 * 60 * 60;
        mode = 1;
-        s_timecode = line.substr(10);
+
+        if (!boost::regex_match(line, matches, timecode_re))
+          chapter_error(boost::format(Y("'%1%' is not a CHAPTERxx=... line.")) % line);
+        s_timecode = matches[1].str();

      } else {
-        if (!ischapternameline(line.c_str()))
+        if (!boost::regex_match(line, matches, name_line_re))
          chapter_error(boost::format(Y("'%1%' is not a CHAPTERxxNAME=... line.")) % line);
-        name = line.substr(14);
-        if (name == "")
+        name = matches[1].str();
+        if (name.empty())
          name = s_timecode;
        mode = 0;