From b54d9923d670d2136c12a0aff91dc9c089b60eb4 Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Sun, 18 May 2014 15:58:59 +0200 Subject: [PATCH] mkvmerge: probe text subs after unambiguous binary formats --- ChangeLog | 6 +++ src/merge/output_control.cpp | 67 ++++++++++++++----------- tests/results.txt | 1 + tests/test-428mkv_misdetected_as_ass.rb | 5 ++ 4 files changed, 50 insertions(+), 29 deletions(-) create mode 100755 tests/test-428mkv_misdetected_as_ass.rb diff --git a/ChangeLog b/ChangeLog index f1a4010b3..64aa7f6d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,11 @@ 2014-05-18 Moritz Bunkus + * mkvmerge: bug fix: Changed the file type detection order + again. The text subtitle formats are now probed after those binary + formats that can be detected quickly and unambiguously. This + avoids some mis-detection if e.g. Matroska files as ASS text + subtitles if they do contain such a track. + * mmg: enhancement: The chapter editor will only use fast-mode parsing when loading chapters from Matroska files. diff --git a/src/merge/output_control.cpp b/src/merge/output_control.cpp index 4daea89e6..0f36149e8 100644 --- a/src/merge/output_control.cpp +++ b/src/merge/output_control.cpp @@ -349,6 +349,36 @@ open_playlist_file(filelist_t &file, return true; } +static file_type_e +detect_text_file_formats(filelist_t const &file) { + auto text_io = mm_text_io_cptr{}; + try { + text_io = std::make_shared(new mm_file_io_c(file.name)); + auto text_size = text_io->get_size(); + + if (srt_reader_c::probe_file(text_io.get(), text_size)) + return FILE_TYPE_SRT; + else if (ssa_reader_c::probe_file(text_io.get(), text_size)) + return FILE_TYPE_SSA; + else if (vobsub_reader_c::probe_file(text_io.get(), text_size)) + return FILE_TYPE_VOBSUB; + else if (usf_reader_c::probe_file(text_io.get(), text_size)) + return FILE_TYPE_USF; + + // Unsupported text subtitle formats + else if (microdvd_reader_c::probe_file(text_io.get(), text_size)) + return FILE_TYPE_MICRODVD; + + } catch (mtx::mm_io::exception &ex) { + mxerror(boost::format(Y("The file '%1%' could not be opened for reading: %2%.\n")) % file.name % ex); + + } catch (...) { + mxerror(boost::format(Y("The source file '%1%' could not be opened successfully, or retrieving its size by seeking to the end did not work.\n")) % file.name); + } + + return FILE_TYPE_IS_UNKNOWN; +} + /** \brief Probe the file type Opens the input file and calls the \c probe_file function for each known @@ -366,35 +396,6 @@ get_file_type_internal(filelist_t &file) { file_type_e type = FILE_TYPE_IS_UNKNOWN; - // All text file types (subtitles). - auto text_io = mm_text_io_cptr{}; - try { - text_io = std::make_shared(new mm_file_io_c(file.name)); - auto text_size = text_io->get_size(); - - if (srt_reader_c::probe_file(text_io.get(), text_size)) - type = FILE_TYPE_SRT; - else if (ssa_reader_c::probe_file(text_io.get(), text_size)) - type = FILE_TYPE_SSA; - else if (vobsub_reader_c::probe_file(text_io.get(), text_size)) - type = FILE_TYPE_VOBSUB; - else if (usf_reader_c::probe_file(text_io.get(), text_size)) - type = FILE_TYPE_USF; - - // Unsupported text subtitle formats - else if (microdvd_reader_c::probe_file(text_io.get(), text_size)) - type = FILE_TYPE_MICRODVD; - - if (type != FILE_TYPE_IS_UNKNOWN) - return std::make_pair(type, text_size); - - } catch (mtx::mm_io::exception &ex) { - mxerror(boost::format(Y("The file '%1%' could not be opened for reading: %2%.\n")) % file.name % ex); - - } catch (...) { - mxerror(boost::format(Y("The source file '%1%' could not be opened successfully, or retrieving its size by seeking to the end did not work.\n")) % file.name); - } - // File types that can be detected unambiguously but are not supported if (aac_adif_reader_c::probe_file(io, size)) type = FILE_TYPE_AAC; @@ -406,6 +407,7 @@ get_file_type_internal(filelist_t &file) { type = FILE_TYPE_FLV; else if (hdsub_reader_c::probe_file(io, size)) type = FILE_TYPE_HDSUB; + // File types that can be detected unambiguously else if (avi_reader_c::probe_file(io, size)) type = FILE_TYPE_AVI; @@ -435,6 +437,13 @@ get_file_type_internal(filelist_t &file) { type = FILE_TYPE_COREAUDIO; else if (dirac_es_reader_c::probe_file(io, size)) type = FILE_TYPE_DIRAC; + + // All text file types (subtitles). + else + type = detect_text_file_formats(file); + + if (FILE_TYPE_IS_UNKNOWN != type) + ; // intentional fall-through // File types that are misdetected sometimes and that aren't supported else if (dv_reader_c::probe_file(io, size)) type = FILE_TYPE_DV; diff --git a/tests/results.txt b/tests/results.txt index 02990606f..c083dc2d7 100644 --- a/tests/results.txt +++ b/tests/results.txt @@ -273,3 +273,4 @@ T_424avc_recover_point_sei_before_second_field:b66042e704ced1b55a5283d4b09876de: T_425mpeg_ts_timestamp_outlier:d9c6a4a1c7e815fed80a038128899586:passed:20140305-203603:2.509694471 T_426extract_write_bom_only_once:a9255d40de93e2731aaead0a746e582f-a9255d40de93e2731aaead0a746e582f:passed:20140310-195606:0.0 T_427ui_locale_pt_BR:8719aedc77a0435129c79e3a061642bf-344b51e9ae6fe2d8ce60fef18ee0e7d1:passed:20140418-103113:0.143370167 +T_428mkv_misdetected_as_ass:e93bf556dd2814f52c44e523ae5b8721:passed:20140518-155446:0.033341203 diff --git a/tests/test-428mkv_misdetected_as_ass.rb b/tests/test-428mkv_misdetected_as_ass.rb new file mode 100755 index 000000000..33d226bd8 --- /dev/null +++ b/tests/test-428mkv_misdetected_as_ass.rb @@ -0,0 +1,5 @@ +#!/usr/bin/ruby -w + +# T_428mkv_misdetected_as_ass +describe "mkvmerge / Matroska file mis-detected as ASS subtitles" +test_identify "data/mkv/detected-as-ass.mkv"