mirror of
https://gitlab.com/mbunkus/mkvtoolnix.git
synced 2024-12-25 04:11:44 +00:00
mm_text_io_c: fix seeking in UTF-16/-32 encoded files
The `getline` function tries to handle different line ending styles (carriage returns, new lines, a mix of both). For that it has to probe one more character after having found one of them. If that probed character is not one of them, then the previous position has to be restored — and that was wrongfully assuming that each character is only one byte long. Which it isn't. Fixes #2160.
This commit is contained in:
parent
0f1514dab5
commit
5934eddc75
3
NEWS.md
3
NEWS.md
@ -19,6 +19,9 @@
|
||||
* mkvmerge, GUI's multiplexer: MIME types: added the `font` top-level media
|
||||
types from RFC 8081. This means that the following new MIME types for fonts
|
||||
can be used: `font/ttf`, `font/otf`, `font/woff` and `font/woff2`.
|
||||
* mkvmerge: fixed reading text files encoded in UTF-16 oder UTF-32 that have
|
||||
different forms of line endings (new lines, carriage returns or a mix of
|
||||
both). Fixes #2160.
|
||||
|
||||
|
||||
# Version 18.0.0 "Apricity" 2017-11-18
|
||||
|
@ -1086,13 +1086,15 @@ mm_text_io_c::getline(boost::optional<std::size_t> max_chars) {
|
||||
while (1) {
|
||||
memset(utf8char, 0, 9);
|
||||
|
||||
int len = read_next_char(utf8char);
|
||||
auto previous_pos = getFilePointer();
|
||||
auto len = read_next_char(utf8char);
|
||||
|
||||
if (0 == len)
|
||||
return s;
|
||||
|
||||
if ((1 == len) && (utf8char[0] == '\r')) {
|
||||
if (previous_was_carriage_return && !m_uses_newlines) {
|
||||
setFilePointer(-1, seek_current);
|
||||
setFilePointer(previous_pos);
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -1104,7 +1106,7 @@ mm_text_io_c::getline(boost::optional<std::size_t> max_chars) {
|
||||
return s;
|
||||
|
||||
if (previous_was_carriage_return) {
|
||||
setFilePointer(-len, seek_current);
|
||||
setFilePointer(previous_pos);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -468,3 +468,4 @@ T_619ac_3_misdetected_as_mpeg_ps_and_encrypted:795e9be4c1601e9853378a1fee1bfd01:
|
||||
T_620ac3_incomplete_frame_with_timestamp_from_matroska:b2fa8c28c5a45d40460905464e3a3d5f:passed:20171014-153427:0.397688103
|
||||
T_621propedit_remove_date:fdfebfa48bbd5fc21088827b0ad8f616-ok:passed:20171101-180348:0.062479826
|
||||
T_622aac_adts_8_channels_no_pce:76a81307fdd14e0c033ea8e9b42a2b78-ok:passed:20171117-190136:0.053130324
|
||||
T_623text_files_utf16le_different_line_endings:ed339cd48ef4350f1e1e52eb49af3543:passed:20171201-143509:0.014821508
|
||||
|
5
tests/test-623text_files_utf16le_different_line_endings.rb
Executable file
5
tests/test-623text_files_utf16le_different_line_endings.rb
Executable file
@ -0,0 +1,5 @@
|
||||
#!/usr/bin/ruby -w
|
||||
|
||||
# T_623text_files_utf16le_different_line_endings
|
||||
describe "mkvmerge / text files encoded in UTF-16LE with different line ending styles"
|
||||
test_merge "data/subtitles/srt/utf16le_different_line_ending_styles.srt"
|
Loading…
Reference in New Issue
Block a user