diff --git a/doc/matroska-tags.xml b/doc/matroska-tags.xml index a7ff1f78d..13f465c73 100644 --- a/doc/matroska-tags.xml +++ b/doc/matroska-tags.xml @@ -19,7 +19,8 @@ Subject Bibliography - Language + + ger QUJDREVG Encoder EncodeSettings @@ -131,7 +132,7 @@
Address
URL Email - Language + eng @@ -142,7 +143,7 @@ <Address>Address2</Address> <URL>URL2</URL> <Email>Email2</Email> - <Language>Language2</Language> + <Language>fre</Language> @@ -150,7 +151,7 @@ - + diff --git a/src/iso639.cpp b/src/iso639.cpp index 599279c27..3d26f51da 100644 --- a/src/iso639.cpp +++ b/src/iso639.cpp @@ -535,7 +535,7 @@ iso639_language_t iso639_languages[] = {"Zuni", "zun", NULL}, {NULL, NULL, NULL}}; -char *get_iso639_english_name(char *iso639_2_code) { +char *get_iso639_english_name(const char *iso639_2_code) { int i; i = 0; @@ -548,7 +548,7 @@ char *get_iso639_english_name(char *iso639_2_code) { return NULL; } -int is_valid_iso639_2_code(char *iso639_2_code) { +int is_valid_iso639_2_code(const char *iso639_2_code) { int i; i = 0; diff --git a/src/iso639.h b/src/iso639.h index cf1b51bcf..7cc3edc74 100644 --- a/src/iso639.h +++ b/src/iso639.h @@ -27,8 +27,8 @@ typedef struct { extern iso639_language_t iso639_language_list[]; -int is_valid_iso639_2_code(char *iso639_2_code); -char *get_iso639_english_name(char *iso639_2_code); +int is_valid_iso639_2_code(const char *iso639_2_code); +char *get_iso639_english_name(const char *iso639_2_code); void list_iso639_languages(); #endif // __ISO639_H diff --git a/src/tagparser.h b/src/tagparser.h index cbdc6b227..d62338f89 100644 --- a/src/tagparser.h +++ b/src/tagparser.h @@ -115,8 +115,9 @@ using namespace libmatroska; #define E_MultiComment 77 #define E_CommentName 78 #define E_Comments 79 +#define E_CommentLanguage 81 -// MAX: 80 +// MAX: 81 typedef struct { XML_Parser parser; @@ -190,20 +191,7 @@ void perror(parser_data_t *pdata, const char *fmt, ...); "allowed under <%s>.", name, \ parent_name.c_str()); -void start_element(void *user_data, const char *name, - const char **atts); -void start_level1(parser_data_t *pdata, const char *name); -void start_level2(parser_data_t *pdata, const char *name); -void start_level3(parser_data_t *pdata, const char *name); -void start_level4(parser_data_t *pdata, const char *name); -void start_level5(parser_data_t *pdata, const char *name); - void end_element(void *user_data, const char *name); -void end_level1(parser_data_t *pdata, const char *name); -void end_level2(parser_data_t *pdata, const char *name); -void end_level3(parser_data_t *pdata, const char *name); -void end_level4(parser_data_t *pdata, const char *name); -void end_level5(parser_data_t *pdata, const char *name); void parse_xml_tags(const char *name, KaxTags *tags); diff --git a/src/tagparser_end.cpp b/src/tagparser_end.cpp index 1d0f60906..0a02b2bb2 100644 --- a/src/tagparser_end.cpp +++ b/src/tagparser_end.cpp @@ -34,6 +34,7 @@ #include "base64.h" #include "common.h" +#include "iso639.h" #include "mm_io.h" #include "tagparser.h" @@ -41,8 +42,8 @@ using namespace std; using namespace libebml; using namespace libmatroska; -void el_get_uint(parser_data_t *pdata, EbmlElement *el, - uint64_t min_value = 0) { +static void el_get_uint(parser_data_t *pdata, EbmlElement *el, + uint64_t min_value = 0) { int64 value; strip(*pdata->bin); @@ -56,8 +57,8 @@ void el_get_uint(parser_data_t *pdata, EbmlElement *el, *(static_cast(el)) = value; } -void el_get_sint(parser_data_t *pdata, EbmlElement *el, - int64_t min_value = -9223372036854775807LL-1) { +static void el_get_sint(parser_data_t *pdata, EbmlElement *el, + int64_t min_value = -9223372036854775807LL-1) { int64 value; strip(*pdata->bin); @@ -71,8 +72,8 @@ void el_get_sint(parser_data_t *pdata, EbmlElement *el, *(static_cast(el)) = value; } -void el_get_float(parser_data_t *pdata, EbmlElement *el, - float min_value = (float)1.40129846432481707e-45) { +static void el_get_float(parser_data_t *pdata, EbmlElement *el, + float min_value = (float)1.40129846432481707e-45) { char *endptr; float value; @@ -87,15 +88,21 @@ void el_get_float(parser_data_t *pdata, EbmlElement *el, *(static_cast(el)) = value; } -void el_get_string(parser_data_t *pdata, EbmlElement *el) { +static void el_get_string(parser_data_t *pdata, EbmlElement *el, + bool check_language = false) { strip(*pdata->bin); if (pdata->bin->length() == 0) perror(pdata, "Expected a string but found only whitespaces."); + if (check_language && !is_valid_iso639_2_code(pdata->bin->c_str())) + perror(pdata, "'%s' is not a valid ISO639-2 language code. See the " + "output of 'mkvmerge --list-languages' for a list of all " + "valid language codes.", pdata->bin->c_str()); + *(static_cast(el)) = pdata->bin->c_str(); } -void el_get_utf8string(parser_data_t *pdata, EbmlElement *el) { +static void el_get_utf8string(parser_data_t *pdata, EbmlElement *el) { strip(*pdata->bin); if (pdata->bin->length() == 0) perror(pdata, "Expected a string but found only whitespaces."); @@ -104,7 +111,7 @@ void el_get_utf8string(parser_data_t *pdata, EbmlElement *el) { cstrutf8_to_UTFstring(pdata->bin->c_str()); } -void el_get_binary(parser_data_t *pdata, EbmlElement *el) { +static void el_get_binary(parser_data_t *pdata, EbmlElement *el) { int64_t result; binary *buffer; mm_io_c *io; @@ -146,7 +153,7 @@ void el_get_binary(parser_data_t *pdata, EbmlElement *el) { // ISO 8601 format: 2003-07-17T19:50:53+0200 // 012345678901234567890123 // 1 2 -void el_get_date(parser_data_t *pdata, EbmlElement *el) { +static void el_get_date(parser_data_t *pdata, EbmlElement *el) { const char *errmsg = "Expected a date in ISO 8601 format but found '%s'. " "The ISO 8601 date format looks like this: YYYY-MM-DDTHH:MM:SS:-TZTZ, " "e.g. 2003-07-17T19:50:52+0200. The time zone (TZ) may also be negative."; @@ -223,7 +230,27 @@ void el_get_date(parser_data_t *pdata, EbmlElement *el) { (static_cast(el))->SetEpochDate(tme); } -void end_level1(parser_data_t *pdata, const char *) { +static bool is_multicomment(parser_data_t *pdata, const char *name) { + int parent; + + parent = (*pdata->parents)[pdata->parents->size() - 2]; + + if (parent != E_MultiComment) + return false; + + if (!strcmp(name, "Name")) + el_get_string(pdata, &GetChild(*pdata->m_comment)); + else if (!strcmp(name, "Comments")) + el_get_utf8string(pdata, &GetChild + (*pdata->m_comment)); + else if (!strcmp(name, "Language")) + el_get_string(pdata, &GetChild + (*pdata->m_comment), true); + + return true; +} + +static void end_level1(parser_data_t *pdata, const char *) { // Can only be "Tag" pdata->targets = NULL; pdata->general = NULL; @@ -239,7 +266,7 @@ void end_level1(parser_data_t *pdata, const char *) { pdata->m_comment = NULL; } -void end_level2(parser_data_t *pdata, const char *name) { +static void end_level2(parser_data_t *pdata, const char *name) { if (!strcmp(name, "Targets")) { pdata->track_uid = NULL; pdata->chapter_uid = NULL; @@ -290,10 +317,13 @@ void end_level2(parser_data_t *pdata, const char *name) { } } -void end_level3(parser_data_t *pdata, const char *name) { +static void end_level3(parser_data_t *pdata, const char *name) { string parent_name; int parent; + if (is_multicomment(pdata, name)) + return; + parent_name = (*pdata->parent_names)[pdata->parent_names->size() - 2]; parent = (*pdata->parents)[pdata->parents->size() - 2]; @@ -309,7 +339,7 @@ void end_level3(parser_data_t *pdata, const char *name) { else if (!strcmp(name, "Bibliography")) el_get_utf8string(pdata, &GetChild(*pdata->general)); else if (!strcmp(name, "Language")) - el_get_string(pdata, &GetChild(*pdata->general)); + el_get_string(pdata, &GetChild(*pdata->general), true); else if (!strcmp(name, "Rating")) el_get_binary(pdata, &GetChild(*pdata->general)); else if (!strcmp(name, "Encoder")) @@ -428,10 +458,13 @@ void end_level3(parser_data_t *pdata, const char *name) { die("Unknown parent: level 3, %d", parent); } -void end_level4(parser_data_t *pdata, const char *name) { +static void end_level4(parser_data_t *pdata, const char *name) { string parent_name; int parent; + if (is_multicomment(pdata, name)) + return; + parent_name = (*pdata->parent_names)[pdata->parent_names->size() - 2]; parent = (*pdata->parents)[pdata->parents->size() - 2]; @@ -522,16 +555,19 @@ void end_level4(parser_data_t *pdata, const char *name) { el_get_string(pdata, pdata->t_email); else if (!strcmp(name, "Language")) el_get_string(pdata, &GetChild - (*pdata->title)); + (*pdata->title), true); } else die("Unknown parent: level 4, %d", parent); } -void end_level5(parser_data_t *pdata, const char *name) { +static void end_level5(parser_data_t *pdata, const char *name) { string parent_name; int parent; + if (is_multicomment(pdata, name)) + return; + parent_name = (*pdata->parent_names)[pdata->parent_names->size() - 2]; parent = (*pdata->parents)[pdata->parents->size() - 2]; @@ -550,6 +586,13 @@ void end_level5(parser_data_t *pdata, const char *name) { die("Unknown parent: level 4, %d", parent); } +static void end_level6(parser_data_t *pdata, const char *name) { + if (is_multicomment(pdata, name)) + return; + + die("tagparser_end: Unknown element. This should not have happened."); +} + void end_element(void *user_data, const char *name) { parser_data_t *pdata; @@ -558,7 +601,9 @@ void end_element(void *user_data, const char *name) { if (pdata->data_allowed && (pdata->bin == NULL)) perror(pdata, "Element <%s> does not contain any data.", name); - if (pdata->depth == 2) + if (pdata->depth == 1) + ; // Nothing to do here! + else if (pdata->depth == 2) end_level1(pdata, name); else if (pdata->depth == 3) end_level2(pdata, name); @@ -568,6 +613,10 @@ void end_element(void *user_data, const char *name) { end_level4(pdata, name); else if (pdata->depth == 6) end_level5(pdata, name); + else if (pdata->depth == 7) + end_level6(pdata, name); + else + die("tagparser_end: depth > 7: %d", pdata->depth); if (pdata->bin != NULL) { delete pdata->bin; diff --git a/src/tagparser_start.cpp b/src/tagparser_start.cpp index 85ce20297..4d7b4f52b 100644 --- a/src/tagparser_start.cpp +++ b/src/tagparser_start.cpp @@ -77,7 +77,53 @@ template Type &GetNextEmptyChild(EbmlMaster &master, return *(static_cast(e)); } -void start_level1(parser_data_t *pdata, const char *name) { +static bool is_multicomment(parser_data_t *pdata, const char *name, + EbmlElement *parent_elt) { + string parent_name; + int parent; + + parent_name = (*pdata->parent_names)[pdata->parent_names->size() - 2]; + parent = (*pdata->parents)[pdata->parents->size() - 1]; + + if (((parent == E_MultiComment) || (parent == E_CommentName) || + (parent == E_Comments) || (parent == E_CommentLanguage)) && + !strcmp(name, "MultiComment")) + perror_nochild(); + + if ((parent != E_MultiComment) && !strcmp(name, "MultiComment")) { + if (pdata->m_comment == NULL) + pdata->m_comment = + &GetEmptyChild(*((EbmlMaster *)parent_elt)); + else + pdata->m_comment = + &GetNextEmptyChild(*((EbmlMaster *)parent_elt), + *pdata->m_comment); + pdata->parents->push_back(E_MultiComment); + + return true; + + } else if (parent == E_MultiComment) { + pdata->data_allowed = true; + + if (!strcmp(name, "Name")) { + check_instances(pdata->m_comment, KaxTagMultiCommentName); + pdata->parents->push_back(E_CommentName); + } else if (!strcmp(name, "Comments")) { + check_instances(pdata->m_comment, KaxTagMultiCommentComments); + pdata->parents->push_back(E_Comments); + } else if (!strcmp(name, "Language")) { + check_instances(pdata->m_comment, KaxTagMultiCommentLanguage); + pdata->parents->push_back(E_CommentLanguage); + } else + perror_nochild(); + + return true; + } + + return false; +} + +static void start_level1(parser_data_t *pdata, const char *name) { string parent_name; parent_name = (*pdata->parent_names)[pdata->parent_names->size() - 2]; @@ -94,9 +140,12 @@ void start_level1(parser_data_t *pdata, const char *name) { pdata->parents->push_back(E_Tag); } -void start_level2(parser_data_t *pdata, const char *name) { +static void start_level2(parser_data_t *pdata, const char *name) { string parent_name; + if (is_multicomment(pdata, name, pdata->tag)) + return; + parent_name = (*pdata->parent_names)[pdata->parent_names->size() - 2]; if (!strcmp(name, "Targets")) { @@ -188,7 +237,7 @@ void start_level2(parser_data_t *pdata, const char *name) { perror_nochild(); } -void start_level3(parser_data_t *pdata, const char *name) { +static void start_level3(parser_data_t *pdata, const char *name) { string parent_name; int parent; @@ -196,6 +245,9 @@ void start_level3(parser_data_t *pdata, const char *name) { parent = (*pdata->parents)[pdata->parents->size() - 1]; if (parent == E_Targets) { + if (is_multicomment(pdata, name, pdata->targets)) + return; + pdata->data_allowed = true; if (!strcmp(name, "TrackUID")) { @@ -219,6 +271,9 @@ void start_level3(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_General) { + if (is_multicomment(pdata, name, pdata->general)) + return; + pdata->data_allowed = true; if (!strcmp(name, "Subject")) { @@ -287,6 +342,9 @@ void start_level3(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_Genres) { + if (is_multicomment(pdata, name, pdata->genres)) + return; + pdata->data_allowed = true; if (!strcmp(name, "AudioGenre")) { @@ -312,6 +370,9 @@ void start_level3(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_AudioSpecific) { + if (is_multicomment(pdata, name, pdata->audio_specific)) + return; + pdata->data_allowed = true; if (!strcmp(name, "AudioEncryption")) { @@ -348,6 +409,9 @@ void start_level3(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_ImageSpecific) { + if (is_multicomment(pdata, name, pdata->image_specific)) + return; + pdata->data_allowed = true; if (!strcmp(name, "CaptureDPI")) { @@ -372,6 +436,9 @@ void start_level3(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_MultiCommercial) { + if (is_multicomment(pdata, name, pdata->m_commercial)) + return; + if (!strcmp(name, "Commercial")) { if (pdata->commercial == NULL) pdata->commercial = @@ -385,6 +452,9 @@ void start_level3(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_MultiDate) { + if (is_multicomment(pdata, name, pdata->m_date)) + return; + if (!strcmp(name, "Date")) { if (pdata->date == NULL) pdata->date = &GetEmptyChild(*pdata->m_date); @@ -396,6 +466,9 @@ void start_level3(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_MultiEntity) { + if (is_multicomment(pdata, name, pdata->m_entity)) + return; + if (!strcmp(name, "Entity")) { if (pdata->entity == NULL) pdata->entity = &GetEmptyChild(*pdata->m_entity); @@ -407,6 +480,9 @@ void start_level3(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_MultiIdentifier) { + if (is_multicomment(pdata, name, pdata->m_identifier)) + return; + if (!strcmp(name, "Identifier")) { if (pdata->identifier == NULL) pdata->identifier = @@ -420,6 +496,9 @@ void start_level3(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_MultiLegal) { + if (is_multicomment(pdata, name, pdata->m_legal)) + return; + if (!strcmp(name, "Legal")) { if (pdata->legal == NULL) pdata->legal = &GetEmptyChild(*pdata->m_legal); @@ -431,6 +510,9 @@ void start_level3(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_MultiTitle) { + if (is_multicomment(pdata, name, pdata->m_title)) + return; + if (!strcmp(name, "Title")) { if (pdata->title == NULL) pdata->title = &GetEmptyChild(*pdata->m_title); @@ -445,7 +527,7 @@ void start_level3(parser_data_t *pdata, const char *name) { die("Unknown parent: level 3, %d", parent); } -void start_level4(parser_data_t *pdata, const char *name) { +static void start_level4(parser_data_t *pdata, const char *name) { string parent_name; int parent; @@ -453,6 +535,9 @@ void start_level4(parser_data_t *pdata, const char *name) { parent = (*pdata->parents)[pdata->parents->size() - 1]; if (parent == E_Commercial) { + if (is_multicomment(pdata, name, pdata->commercial)) + return; + pdata->data_allowed = true; if (!strcmp(name, "Type")) { @@ -492,6 +577,9 @@ void start_level4(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_Date) { + if (is_multicomment(pdata, name, pdata->date)) + return; + pdata->data_allowed = true; if (!strcmp(name, "Type")) { @@ -507,6 +595,9 @@ void start_level4(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_Entity) { + if (is_multicomment(pdata, name, pdata->entity)) + return; + pdata->data_allowed = true; if (!strcmp(name, "Type")) { @@ -539,6 +630,9 @@ void start_level4(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_Identifier) { + if (is_multicomment(pdata, name, pdata->identifier)) + return; + pdata->data_allowed = true; if (!strcmp(name, "Type")) { @@ -554,6 +648,9 @@ void start_level4(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_Legal) { + if (is_multicomment(pdata, name, pdata->legal)) + return; + pdata->data_allowed = true; if (!strcmp(name, "Type")) { @@ -577,6 +674,9 @@ void start_level4(parser_data_t *pdata, const char *name) { perror_nochild(); } else if (parent == E_Title) { + if (is_multicomment(pdata, name, pdata->title)) + return; + pdata->data_allowed = true; if (!strcmp(name, "Type")) { @@ -620,7 +720,7 @@ void start_level4(parser_data_t *pdata, const char *name) { die("Unknown parent: level 4, %d", parent); } -void start_level5(parser_data_t *pdata, const char *name) { +static void start_level5(parser_data_t *pdata, const char *name) { string parent_name; int parent; @@ -628,6 +728,9 @@ void start_level5(parser_data_t *pdata, const char *name) { parent = (*pdata->parents)[pdata->parents->size() - 1]; if (parent == E_MultiPrice) { + if (is_multicomment(pdata, name, pdata->m_price)) + return; + pdata->data_allowed = true; if (!strcmp(name, "Currency")) { @@ -643,7 +746,12 @@ void start_level5(parser_data_t *pdata, const char *name) { perror_nochild(); } else - die("Unknown parent: level 4, %d", parent); + die("Unknown parent: level 5, %d", parent); +} + +static void start_level6(parser_data_t *pdata, const char *name) { + if (!is_multicomment(pdata, name, NULL)) + die("tagparser_start: Unknown element - should not have happened."); } void start_element(void *user_data, const char *name, @@ -681,8 +789,10 @@ void start_element(void *user_data, const char *name, start_level4(pdata, name); else if (pdata->depth == 5) start_level5(pdata, name); + else if (pdata->depth == 6) + start_level6(pdata, name); else - die("Depth > 5: %d", pdata->depth); + die("Depth > 6: %d", pdata->depth); if (pdata->parent_names->size() != pdata->parents->size()) perror_unknown();