From 5e6d91ccc3fba9d92fbcf40fe87b3ed7278d6ddd Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Sun, 9 Jul 2017 21:35:51 +0200 Subject: [PATCH] XML: replace XML declaration's `encoding` attribute with `UTF-8` Old pugixml versions didn't recode to UTF-8 themselves. Therefore mkvmerge contains code to determine the encoding from the XML declaration's `encoding` attribute and recoding the content to UTF-8 before passing it to pugixml. The old `encoding` attribute was left untouched. Newer pugixml versions do recode to UTF-8. In order to prevent that (and therefore recoding twice), the XML declaration's `encoding` attribute must be set to `UTF-8`. --- src/common/xml/xml.cpp | 12 +++++++++--- tests/results.txt | 8 ++++---- tests/test-355chapters.rb | 2 +- tests/test-407empty_tag_and_chapter_files.rb | 6 +++--- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/common/xml/xml.cpp b/src/common/xml/xml.cpp index e5b0fa300..31fdad213 100644 --- a/src/common/xml/xml.cpp +++ b/src/common/xml/xml.cpp @@ -74,9 +74,15 @@ load_file(std::string const &file_name, "encoding \\s* = \\s*" // encoding attribute "\" ( [^\"]+ ) \"", // attribute value boost::regex::perl | boost::regex::mod_x | boost::regex::icase); - boost::match_results matches; - if (boost::regex_search(content, matches, encoding_re)) - content = charset_converter_c::init(matches[1].str())->utf8(content); + + boost::smatch matches; + if (boost::regex_search(content, matches, encoding_re)) { + // Extract the old encoding, replace the string with "UTF-8" so + // that pugixml doesn't recode, and recode to UTF-8. + auto encoding = matches[1].str(); + content.replace(matches[1].first, matches[1].second, "UTF-8"); + content = charset_converter_c::init(encoding)->utf8(content); + } } std::stringstream scontent(content); diff --git a/tests/results.txt b/tests/results.txt index 77710ccc9..8a50b740e 100644 --- a/tests/results.txt +++ b/tests/results.txt @@ -200,10 +200,10 @@ T_352timecode_scale_auto_libmatroska_assert:3ee28c25aa8d8efd03259e94e276d2aa:pas T_353ac3-from-ts-with-missing-tcs-with-non-zero-first-tc::new:20120312-134345:0.0 T_353ac3_from_ts_with_missing_tcs_with_non_zero_first_tc:d22c8fa039e1f511de0b64f20cb35ce4:passed:20120312-134456:1.302275752 T_354h264_60000_1001i_def_duration_60000_1000:2f410ba8c137f1cc436f409183c89660:passed:20120314-090846:0.349677199 -T_355chapters:f3ee6cec38579be51e58d8e2ee4c3e46-4ed50851c2bc40094512201d9174f5ac-fdfebfa48bbd5fc21088827b0ad8f616-87a60c81c05fb0a153a2e041485ae2cb-245c2ac9cc2605bd97dbbe220e992720-ca4eafeb2a30375e4a931019df164b36-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok:passed:20120324-121601:0.346666482 -T_356tags:3174001fdd4879cd0e206ec3036ad9d3-144ae344a5bd298039d9204cd8db4d10-adfb8c5a2aa5c4b181d00b52f9244a2e-f6526cfaaef01627c52ee2ba25f03255-fdfebfa48bbd5fc21088827b0ad8f616-df66ac315e716f046903602cf395bf0f-d6292e0c55458f39c9d1aa7e962896ab-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok:passed:20120324-121752:0.430471559 +T_355chapters:f3ee6cec38579be51e58d8e2ee4c3e46-4ed50851c2bc40094512201d9174f5ac-87a60c81c05fb0a153a2e041485ae2cb-245c2ac9cc2605bd97dbbe220e992720-ca4eafeb2a30375e4a931019df164b36-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok:passed:20120324-121601:0.346666482 +T_356tags:3174001fdd4879cd0e206ec3036ad9d3-144ae344a5bd298039d9204cd8db4d10-adfb8c5a2aa5c4b181d00b52f9244a2e-f6526cfaaef01627c52ee2ba25f03255-df66ac315e716f046903602cf395bf0f-d6292e0c55458f39c9d1aa7e962896ab-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok-ok:passed:20120324-121752:0.430471559 T_357segment_info:c734542adcdeca270db3b6e41fd85ffc-61d4730547bcd79e9a692caa4c214a84-ok-ok-ok-ok-ok-ok-ok:passed:20120324-122844:0.227807646 -T_358usf:3b773812a7b028cece3b0c555ab1667a:passed:20120329-142144:0.051754089 +T_358usf:57ae0e1a12bca75de4277b8052cbdd95:passed:20120329-142144:0.051754089 T_359split_parts:be3881a6d90a7d2011c1e74f4ad31664+409aad9a66b8bc221692983eafa21d2f+ok-c62446819af26a351963b51fb7b1d0f6+ok-fd0291d21750277fd9627779207b0292+ok-497448adf872f4050f5e34d9aacf5fa0+4f25451d573042cfdff4809a28638e93+782db07154db9054ff9f0ef3bf1235c6+f129ab41e298b6358fc0197c92c3da5f+ea7605c6da77b5181202d0806191d635+4e8b657d7e371b84a7d89232e5c9fb3f+d8a3056038a13acc3caa11fa8581d085+ok-585946cee0f3e37ab27d7fb09d894081+ok-bbb672ffe01a2f1028aae61551429d8d+ok:passed:20120331-133448:2.321768368 T_360X_chapters_hex_format:87a60c81c05fb0a153a2e041485ae2cb-3853793b0d88fc10efadb146ca948833:passed:20120404-152038:0.047282116 T_361file_concatenation:VTS_01_2.VOB+VTS_01_3.VOB-5150304df7cc54a8a7ce123fb2139915-5150304df7cc54a8a7ce123fb2139915-18d827a751d6a222faa88f6653217eaa-18d827a751d6a222faa88f6653217eaa-18d827a751d6a222faa88f6653217eaa-16d653d956e995f4703b76b1b49b216a-7a9aca3ab76152ba1208edc1f0658ae3-812c9a65bd6f6e0cea9a4bfd55d4e60d-812c9a65bd6f6e0cea9a4bfd55d4e60d-812c9a65bd6f6e0cea9a4bfd55d4e60d-812c9a65bd6f6e0cea9a4bfd55d4e60d-174af56dfde3a6a981c5a1330cdeec9e-35725dd002d6504fdaf74aa10374ba8b-d5c0b1e44d4264a735728989b341766d-d5c0b1e44d4264a735728989b341766d:passed:20120406-144928:18.646532342 @@ -252,7 +252,7 @@ T_403opus_remux_final:f0bed02ce77c7500626d1fa853180d1c:passed:20130705-135811:0. T_404opus_extraction:0aba264a50870d5cd62d8d12543898bd:passed:20130915-201931:0.050758351 T_405packet_ordering_and_default_duration:4e777a2b2516d47230e04010dc6d2c21:passed:20130916-211719:0.258475566 T_406ogm_chapters_ansi_encoded:27fff755c51440ca09cce5c3ff9a885a-9671aceb413291015ba250c8b88cce63-8ce95f8788df9adcff26d86a43ded89f:passed:20131002-230255:0.275447568 -T_407empty_tag_and_chapter_files:fdfebfa48bbd5fc21088827b0ad8f616-fdfebfa48bbd5fc21088827b0ad8f616-fdfebfa48bbd5fc21088827b0ad8f616:passed:20131018-202312:0.131204626 +T_407empty_tag_and_chapter_files:error-error-error:passed:20131018-202312:0.131204626 T_408utf_encodings_with_bom:9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c:passed:20131019-155216:0.215939391 T_409mux_vp9:a0ec1c19b50d2222712bd1046a89b917-6dc39ff738ebc616901de3831fbf3a46:passed:20131019-195820:0.071892447 T_410extract_vp9:b6135380fa07f827384ad1004015d79c:passed:20131019-200643:0.033861429 diff --git a/tests/test-355chapters.rb b/tests/test-355chapters.rb index f35ff36e9..211ced224 100755 --- a/tests/test-355chapters.rb +++ b/tests/test-355chapters.rb @@ -14,7 +14,7 @@ end # Invalid files: invalid.sort.each do |chapters| test chapters do - messages, exit_code = merge("#{source} --chapters #{chapters}", :exit_code => 2) + messages, _ = merge("#{source} --chapters #{chapters}", :exit_code => :error) messages.detect { |line| /The\s+XML\s+chapter\s+file.*contains\s+an\s+error/i.match line } ? :ok : :bad end end diff --git a/tests/test-407empty_tag_and_chapter_files.rb b/tests/test-407empty_tag_and_chapter_files.rb index c84bc4e5f..9a7132927 100755 --- a/tests/test-407empty_tag_and_chapter_files.rb +++ b/tests/test-407empty_tag_and_chapter_files.rb @@ -5,6 +5,6 @@ describe "mkvmerge / empty tag and chapter files" file = "data/subtitles/srt/ven.srt" -test_merge file, :args => "--global-tags data/text/tags-empty.xml" -test_merge file, :args => "--tags 0:data/text/tags-empty.xml" -test_merge file, :args => "--chapters data/text/chapters-empty.xml" +test_merge file, :args => "--global-tags data/text/tags-invalid-empty.xml", :exit_code => :error +test_merge file, :args => "--tags 0:data/text/tags-invalid-empty.xml", :exit_code => :error +test_merge file, :args => "--chapters data/text/chapters-invalid-empty.xml", :exit_code => :error