diff --git a/ChangeLog b/ChangeLog index 317f660f1..85db0cd41 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2005-07-08 Moritz Bunkus + + * mkvmerge: bug fix: Fixed support for USF subtitles stored in + UTF-16 and UTF-32. Added support for USF subtitles stored in UTF-8 + without a BOM. + 2005-07-01 Moritz Bunkus * Released v1.5.0. diff --git a/src/common/mm_io.h b/src/common/mm_io.h index 6dd4f31e6..32fe74cee 100644 --- a/src/common/mm_io.h +++ b/src/common/mm_io.h @@ -235,6 +235,9 @@ public: virtual string getline(); virtual int read_next_char(char *buffer); virtual byte_order_e get_byte_order(); + virtual void set_byte_order(byte_order_e new_byte_order) { + byte_order = new_byte_order; + } }; class MTX_DLL_API mm_stdio_c: public mm_io_c { diff --git a/src/common/xml_element_parser.cpp b/src/common/xml_element_parser.cpp index fb09b3b47..c824bf006 100644 --- a/src/common/xml_element_parser.cpp +++ b/src/common/xml_element_parser.cpp @@ -640,6 +640,8 @@ xml_parser_c::parse_one_xml_line() { if (!m_xml_source->getline2(line)) return false; + handle_xml_encoding(line); + line += "\n"; if ((XML_Parse(m_xml_parser, line.c_str(), line.length(), m_xml_source->eof()) == 0) && @@ -664,3 +666,71 @@ xml_parser_c::throw_error(const xml_parser_error_c &error) { m_saved_parser_error = error; longjmp(m_parser_error_jmp_buf, 1); } + +void +xml_parser_c::handle_xml_encoding(string &line) { + int pos; + string new_line; + + if ((XMLP_STATE_AFTER_HEADER == m_xml_parser_state) || + (BO_NONE == m_xml_source->get_byte_order())) + return; + + pos = 0; + + if (XMLP_STATE_INITIAL == m_xml_parser_state) { + pos = line.find(" pos) + return; + m_xml_parser_state = XMLP_STATE_ATTRIBUTE_NAME; + pos += 5; + new_line = line.substr(0, pos); + } + + while ((line.length() > pos) && + (XMLP_STATE_AFTER_HEADER != m_xml_parser_state)) { + char cur_char = line[pos]; + ++pos; + + if (XMLP_STATE_ATTRIBUTE_NAME == m_xml_parser_state) { + if (('?' == cur_char) && (line.length() > pos) && + ('>' == line[pos])) { + new_line += "?>" + line.substr(pos + 1, line.length() - pos - 1); + m_xml_parser_state = XMLP_STATE_AFTER_HEADER; + + } else if ('"' == cur_char) + m_xml_parser_state = XMLP_STATE_ATTRIBUTE_VALUE; + + else if ((' ' != cur_char) && ('=' != cur_char)) + m_xml_attribute_name += cur_char; + + } else { + // XMLP_STATE_ATTRIBUTE_VALUE + if ('"' == cur_char) { + m_xml_parser_state = XMLP_STATE_ATTRIBUTE_NAME; + strip(m_xml_attribute_name); + strip(m_xml_attribute_value); + if (m_xml_attribute_name == "encoding") { + m_xml_attribute_value = downcase(m_xml_attribute_value); + if ((m_xml_source->get_byte_order() == BO_NONE) && + ((m_xml_attribute_value == "utf-8") || + (m_xml_attribute_value == "utf8"))) + m_xml_source->set_byte_order(BO_UTF8); + + else if (starts_with_case(m_xml_attribute_value, "utf")) + m_xml_attribute_value = "UTF-8"; + } + + new_line += " " + m_xml_attribute_name + "=\"" + + m_xml_attribute_value + "\""; + m_xml_attribute_name = ""; + m_xml_attribute_value = ""; + + } else + m_xml_attribute_value += cur_char; + } + } + + line = new_line; +} + diff --git a/src/common/xml_element_parser.h b/src/common/xml_element_parser.h index 91df6f050..2bf2d572f 100644 --- a/src/common/xml_element_parser.h +++ b/src/common/xml_element_parser.h @@ -60,11 +60,13 @@ class MTX_DLL_API xml_parser_c { private: jmp_buf m_parser_error_jmp_buf; xml_parser_error_c m_saved_parser_error; + string m_xml_attribute_name, m_xml_attribute_value; protected: enum state_t { XMLP_STATE_INITIAL, - XMLP_STATE_IN_HEADER, + XMLP_STATE_ATTRIBUTE_NAME, + XMLP_STATE_ATTRIBUTE_VALUE, XMLP_STATE_AFTER_HEADER }; @@ -91,6 +93,9 @@ public: virtual bool parse_one_xml_line(); virtual void throw_error(const xml_parser_error_c &error); + +private: + void handle_xml_encoding(string &line); }; typedef struct {