Fixed support for USF subtitles stored in UTF-16 and UTF-32. Added support for USF subtitles stored in UTF-8 without a BOM. Both work by parsing the "<?xml ... encoding="..." ...?>" line and adjusting the "encoding" attribute if necessary.

This commit is contained in:
Moritz Bunkus 2005-07-08 12:12:51 +00:00
parent 84f7683e07
commit c8426104bc
4 changed files with 85 additions and 1 deletions

View File

@ -1,3 +1,9 @@
2005-07-08 Moritz Bunkus <moritz@bunkus.org>
* mkvmerge: bug fix: Fixed support for USF subtitles stored in
UTF-16 and UTF-32. Added support for USF subtitles stored in UTF-8
without a BOM.
2005-07-01 Moritz Bunkus <moritz@bunkus.org>
* Released v1.5.0.

View File

@ -235,6 +235,9 @@ public:
virtual string getline();
virtual int read_next_char(char *buffer);
virtual byte_order_e get_byte_order();
virtual void set_byte_order(byte_order_e new_byte_order) {
byte_order = new_byte_order;
}
};
class MTX_DLL_API mm_stdio_c: public mm_io_c {

View File

@ -640,6 +640,8 @@ xml_parser_c::parse_one_xml_line() {
if (!m_xml_source->getline2(line))
return false;
handle_xml_encoding(line);
line += "\n";
if ((XML_Parse(m_xml_parser, line.c_str(), line.length(),
m_xml_source->eof()) == 0) &&
@ -664,3 +666,71 @@ xml_parser_c::throw_error(const xml_parser_error_c &error) {
m_saved_parser_error = error;
longjmp(m_parser_error_jmp_buf, 1);
}
void
xml_parser_c::handle_xml_encoding(string &line) {
int pos;
string new_line;
if ((XMLP_STATE_AFTER_HEADER == m_xml_parser_state) ||
(BO_NONE == m_xml_source->get_byte_order()))
return;
pos = 0;
if (XMLP_STATE_INITIAL == m_xml_parser_state) {
pos = line.find("<?xml");
if (0 > pos)
return;
m_xml_parser_state = XMLP_STATE_ATTRIBUTE_NAME;
pos += 5;
new_line = line.substr(0, pos);
}
while ((line.length() > pos) &&
(XMLP_STATE_AFTER_HEADER != m_xml_parser_state)) {
char cur_char = line[pos];
++pos;
if (XMLP_STATE_ATTRIBUTE_NAME == m_xml_parser_state) {
if (('?' == cur_char) && (line.length() > pos) &&
('>' == line[pos])) {
new_line += "?>" + line.substr(pos + 1, line.length() - pos - 1);
m_xml_parser_state = XMLP_STATE_AFTER_HEADER;
} else if ('"' == cur_char)
m_xml_parser_state = XMLP_STATE_ATTRIBUTE_VALUE;
else if ((' ' != cur_char) && ('=' != cur_char))
m_xml_attribute_name += cur_char;
} else {
// XMLP_STATE_ATTRIBUTE_VALUE
if ('"' == cur_char) {
m_xml_parser_state = XMLP_STATE_ATTRIBUTE_NAME;
strip(m_xml_attribute_name);
strip(m_xml_attribute_value);
if (m_xml_attribute_name == "encoding") {
m_xml_attribute_value = downcase(m_xml_attribute_value);
if ((m_xml_source->get_byte_order() == BO_NONE) &&
((m_xml_attribute_value == "utf-8") ||
(m_xml_attribute_value == "utf8")))
m_xml_source->set_byte_order(BO_UTF8);
else if (starts_with_case(m_xml_attribute_value, "utf"))
m_xml_attribute_value = "UTF-8";
}
new_line += " " + m_xml_attribute_name + "=\"" +
m_xml_attribute_value + "\"";
m_xml_attribute_name = "";
m_xml_attribute_value = "";
} else
m_xml_attribute_value += cur_char;
}
}
line = new_line;
}

View File

@ -60,11 +60,13 @@ class MTX_DLL_API xml_parser_c {
private:
jmp_buf m_parser_error_jmp_buf;
xml_parser_error_c m_saved_parser_error;
string m_xml_attribute_name, m_xml_attribute_value;
protected:
enum state_t {
XMLP_STATE_INITIAL,
XMLP_STATE_IN_HEADER,
XMLP_STATE_ATTRIBUTE_NAME,
XMLP_STATE_ATTRIBUTE_VALUE,
XMLP_STATE_AFTER_HEADER
};
@ -91,6 +93,9 @@ public:
virtual bool parse_one_xml_line();
virtual void throw_error(const xml_parser_error_c &error);
private:
void handle_xml_encoding(string &line);
};
typedef struct {