mkvmerge: identify track's "encoding" if known and fixed

This is preparation for making the GUI present the encoding detected
by e.g. a byte order mark to the user so that she doesn't have to (and
cannot) chose a subtitle character set herself. Implements mkvmerge's
part of #2053.
This commit is contained in:
Moritz Bunkus 2017-07-23 09:22:31 +02:00
parent dc25f2ec20
commit 95092247aa
10 changed files with 33 additions and 10 deletions

View File

@ -26,6 +26,10 @@
removing all tag elements not supported by the WebM spec.
* translations: added a Romanian translation of the programs by Daniel (see
AUTHORS).
* mkvmerge: identification: if the encoding/character set of a text subtitle
track is known (e.g. because a byte order mark is present in the file), then
it will be output during identification as the `encoding`
property. Implements mkvmerge's part of #2053.
## Bug fixes

View File

@ -167,6 +167,7 @@ avi_reader_c::parse_subtitle_chunks() {
= srt_parser_c::probe(&text_io) ? avi_subs_demuxer_t::TYPE_SRT
: ssa_parser_c::probe(&text_io) ? avi_subs_demuxer_t::TYPE_SSA
: avi_subs_demuxer_t::TYPE_UNKNOWN;
demuxer.m_encoding = text_io.get_encoding();
if (avi_subs_demuxer_t::TYPE_UNKNOWN != demuxer.m_type)
m_subtitle_demuxers.push_back(demuxer);
@ -894,6 +895,9 @@ avi_reader_c::identify_subtitles() {
|| (avi_subs_demuxer_t::TYPE_SSA == m_subtitle_demuxers[i].m_type))
info.add(mtx::id::text_subtitles, true);
if (m_subtitle_demuxers[i].m_encoding)
info.add(mtx::id::encoding, *m_subtitle_demuxers[i].m_encoding);
id_result_track(1 + AVI_audio_tracks(m_avi) + i, ID_RESULT_TRACK_SUBTITLES,
avi_subs_demuxer_t::TYPE_SRT == m_subtitle_demuxers[i].m_type ? codec_c::get_name(codec_c::type_e::S_SRT, "SRT")
: avi_subs_demuxer_t::TYPE_SSA == m_subtitle_demuxers[i].m_type ? codec_c::get_name(codec_c::type_e::S_SSA_ASS, "SSA/ASS")

View File

@ -48,6 +48,7 @@ struct avi_subs_demuxer_t {
mm_text_io_cptr m_text_io;
subtitles_cptr m_subs;
boost::optional<std::string> m_encoding;
};
class avi_reader_c: public generic_reader_c {

View File

@ -2640,8 +2640,10 @@ kax_reader_c::identify() {
info.add(mtx::id::audio_bits_per_sample, track->a_bps);
} else if ('s' == track->type) {
if (track->codec.is(codec_c::type_e::S_SRT) || track->codec.is(codec_c::type_e::S_SSA_ASS) || track->codec.is(codec_c::type_e::S_KATE))
if (track->codec.is(codec_c::type_e::S_SRT) || track->codec.is(codec_c::type_e::S_SSA_ASS) || track->codec.is(codec_c::type_e::S_KATE)) {
info.add(mtx::id::text_subtitles, true);
info.add(mtx::id::encoding, "UTF-8");
}
}
if (track->content_decoder.has_encodings())

View File

@ -734,8 +734,10 @@ ogm_reader_c::identify() {
if ((0 != sdemuxers[i]->display_width) && (0 != sdemuxers[i]->display_height))
info.add(mtx::id::display_dimensions, boost::format("%1%x%2%") % sdemuxers[i]->display_width % sdemuxers[i]->display_height);
if (dynamic_cast<ogm_s_text_demuxer_c *>(sdemuxers[i].get()) || dynamic_cast<ogm_s_kate_demuxer_c *>(sdemuxers[i].get()))
if (dynamic_cast<ogm_s_text_demuxer_c *>(sdemuxers[i].get()) || dynamic_cast<ogm_s_kate_demuxer_c *>(sdemuxers[i].get())) {
info.add(mtx::id::text_subtitles, true);
info.add(mtx::id::encoding, "UTF-8");
}
auto pixel_dimensions = sdemuxers[i]->get_pixel_dimensions();
if (pixel_dimensions.first && pixel_dimensions.second)

View File

@ -82,8 +82,12 @@ srt_reader_c::get_progress() {
void
srt_reader_c::identify() {
auto info = mtx::id::info_c{};
auto info = mtx::id::info_c{};
auto encoding = m_text_in->get_encoding();
info.add(mtx::id::text_subtitles, true);
if (encoding)
info.add(mtx::id::encoding, *encoding);
id_result_container();
id_result_track(0, ID_RESULT_TRACK_SUBTITLES, codec_c::get_name(codec_c::type_e::S_SRT, "SRT"), info.get());

View File

@ -53,8 +53,9 @@ ssa_reader_c::read_headers() {
: mtx::includes(m_ti.m_sub_charsets, -1) ? charset_converter_c::init(m_ti.m_sub_charsets[-1])
: g_cc_local_utf8;
m_ti.m_id = 0;
m_subs = ssa_parser_cptr(new ssa_parser_c(this, text_in.get(), m_ti.m_fname, 0));
m_ti.m_id = 0;
m_subs = ssa_parser_cptr(new ssa_parser_c(this, text_in.get(), m_ti.m_fname, 0));
m_encoding = text_in->get_encoding();
m_subs->set_charset_converter(cc_utf8);
m_subs->parse();
@ -94,7 +95,10 @@ ssa_reader_c::get_progress() {
void
ssa_reader_c::identify() {
auto info = mtx::id::info_c{};
info.add(mtx::id::text_subtitles, true);
if (m_encoding)
info.add(mtx::id::encoding, *m_encoding);
id_result_container();
id_result_track(0, ID_RESULT_TRACK_SUBTITLES, codec_c::get_name(codec_c::type_e::S_SSA_ASS, "SSA/ASS"), info.get());

View File

@ -23,6 +23,7 @@
class ssa_reader_c: public generic_reader_c {
private:
ssa_parser_cptr m_subs;
boost::optional<std::string> m_encoding;
public:
ssa_reader_c(const track_info_c &ti, const mm_io_cptr &in);

View File

@ -108,6 +108,7 @@ void
webvtt_reader_c::identify() {
auto info = mtx::id::info_c{};
info.add(mtx::id::text_subtitles, true);
info.add(mtx::id::encoding, "UTF-8");
id_result_container();
id_result_track(0, ID_RESULT_TRACK_SUBTITLES, codec_c::get_name(codec_c::type_e::S_WEBVTT, "WebVTT"), info.get());

View File

@ -63,7 +63,7 @@ T_213mp4_broken_pixel_dimensions:82cd619cff1f3ec446b9be16204c844c:passed:2005091
T_214one_frame_avi:683d26d5c30a903e4bfba86448fc3ec2:passed:20051004-192755:0.039489971
T_215X_codec_extradata_avi:e8c0c4dad4908f20062be3c1f3b9ad85-74ac799ad899f703cbb6c6654e5f9f51:passed:20051004-194707:0.052219855
T_216mp4_editlists:bea53d8f0e626436e97fada22ce6ac0d:passed:20051118-191453:0.106975045
T_217file_identification:7a2a506954a56f21739e7912897e07ad-903514f1cd84055d9b06ecff5e8d1ea1-07bcdabfac85ad1ec8daffbd3ec4d6b8-066048d39c245bbda03d357bec5b9593-279167d14c30edc4788cc066f22c941f-ccd6c289299801382b09a9bc13326092-44256b48783ae449ceca65e8419d72e8-cd6039be3553f8bf857c77c8bb99ad1b-55543e6744a979d4a3ffbf0e0bcc855c-d2389a900373adc4864fb3534e145fb0-7febbb46072c2b256735786471c02df4-e2853a83b5964834faa6aa34318aad9c-734e635a1b254319593e24ba89e2434c-1c31748e6eaabc9b84eb737124f457d3-f4bf52b0bf7c773ec337a9d29043b84c-ab8604871b63846cd7df9d5282db3f35-2b05e8b45ff5b8568be5f4aed1bd18bc-4efb23ae937e58377bf4cc00d93f85d0-3b9e8de7136f2fffa4e2bbf1b4aa38e0-db05f705a1e059b29f4db1ea3ee9d59e-e38a8502cd0c407d8ce517913a2db8c0-588d6dd39935990b73cbb2158cf960fe:passed:20051209-180815:1.882517588
T_217file_identification:7a2a506954a56f21739e7912897e07ad-903514f1cd84055d9b06ecff5e8d1ea1-c9b4cd2f0a85926db88e6f32e91b642c-066048d39c245bbda03d357bec5b9593-279167d14c30edc4788cc066f22c941f-ccd6c289299801382b09a9bc13326092-44256b48783ae449ceca65e8419d72e8-cd6039be3553f8bf857c77c8bb99ad1b-55543e6744a979d4a3ffbf0e0bcc855c-d2389a900373adc4864fb3534e145fb0-7febbb46072c2b256735786471c02df4-e2853a83b5964834faa6aa34318aad9c-734e635a1b254319593e24ba89e2434c-1c31748e6eaabc9b84eb737124f457d3-f4bf52b0bf7c773ec337a9d29043b84c-ab8604871b63846cd7df9d5282db3f35-2b05e8b45ff5b8568be5f4aed1bd18bc-4efb23ae937e58377bf4cc00d93f85d0-3b9e8de7136f2fffa4e2bbf1b4aa38e0-db05f705a1e059b29f4db1ea3ee9d59e-e38a8502cd0c407d8ce517913a2db8c0-588d6dd39935990b73cbb2158cf960fe:passed:20051209-180815:1.882517588
T_218theora:049c4b011a7269d1f9682f0eb673c451-6aa1d73668db13ac875f4325d8797c3f:passed:20060428-105054:0.392912102
T_219srt_short_timecodes:4d58c1d5ddab6368080d54a7585b0f83:passed:20060926-112658:0.117747192
T_220ass_with_comments_at_start:30926355189808086b52edf95c8f49d0:passed:20060926-120101:0.382410266
@ -251,7 +251,7 @@ T_402opus_output_order:35ddcb9621bce14b9d3ad1b5def65b60:passed:20130705-115856:0
T_403opus_remux_final:f0bed02ce77c7500626d1fa853180d1c:passed:20130705-135811:0.068533558
T_404opus_extraction:0aba264a50870d5cd62d8d12543898bd:passed:20130915-201931:0.050758351
T_405packet_ordering_and_default_duration:4e777a2b2516d47230e04010dc6d2c21:passed:20130916-211719:0.258475566
T_406ogm_chapters_ansi_encoded:27fff755c51440ca09cce5c3ff9a885a-9671aceb413291015ba250c8b88cce63-8ce95f8788df9adcff26d86a43ded89f:passed:20131002-230255:0.275447568
T_406ogm_chapters_ansi_encoded:b43e32d47438eca76ac6eb17767d370a-9671aceb413291015ba250c8b88cce63-8ce95f8788df9adcff26d86a43ded89f:passed:20131002-230255:0.275447568
T_407empty_tag_and_chapter_files:error-error-error:passed:20131018-202312:0.131204626
T_408utf_encodings_with_bom:9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c-9687bc3195f16a852b88c599c17a9f5c:passed:20131019-155216:0.215939391
T_409mux_vp9:a0ec1c19b50d2222712bd1046a89b917-6dc39ff738ebc616901de3831fbf3a46:passed:20131019-195820:0.071892447
@ -273,7 +273,7 @@ T_424avc_recover_point_sei_before_second_field:15ef9998f82f4f554c6a16db01791eaa:
T_425mpeg_ts_timestamp_outlier:7f20a9e9d6e7e9e3e0a7d0d4db89ac32:passed:20140305-203603:2.509694471
T_426extract_write_bom_only_once:a9255d40de93e2731aaead0a746e582f-a9255d40de93e2731aaead0a746e582f:passed:20140310-195606:0.0
T_427ui_locale_pt_BR:8719aedc77a0435129c79e3a061642bf-344b51e9ae6fe2d8ce60fef18ee0e7d1:passed:20140418-103113:0.143370167
T_428mkv_misdetected_as_ass:240ccc1dd272e8785b9a417f3753d86c:passed:20140518-155446:0.033341203
T_428mkv_misdetected_as_ass:0eb27fde5224e65f6ea69a2b426acd8e:passed:20140518-155446:0.033341203
T_429track_statistics_tags:f262df87ee15d60bbbe30ec5e4dea073-ca4823172c0e22ca0f92d9290d20109b-fd2bc7d3deba0ef9c08238d09714e60d-20659b6bcd8e1b37ef5362507adf4ca3:passed:20140524-194544:0.635343822
T_430cues_multiple_blocks_same_timecode:f1ab5c927064537eb59ab0f5195d6a1d:passed:20140525-173642:0.033316759
T_431ssa_comments_exclamation_mark:3caa9ad1716134cc1f3e229b88ff94ea:passed:20140618-232324:0.072735677
@ -357,7 +357,7 @@ T_508splitting_by_parts_with_segment_linking:existence0-true-true-true-existence
T_509rerender_track_headers_chapters_attachments:aca9879facd444a739b8ea9ff0c471dc:passed:20151115-230226:0.287840782
T_510propedit_add_attachments_without_meta_seek_present:770103c238a0f502c9ec55f0599d8544:passed:20151121-101043:0.070892905
T_511propedit_ensure_seek_head_exists_at_front:20f53afd94e39f5bbf3f1091eefbe31d:passed:20151129-194025:0.152563199
T_512json_identification:e2c1bd814ea805d9711a3875e646a51d+ok-d815a6390e25b861d21093ef66f17191+ok-27fd82d5208da05a2ab71d685aac666a+ok-a86b43982a842c6b9c8572534e40dcca+ok-9167cad47e1f91998e3622e6d3aef1c1+ok-e6e342718925ef84c9c65288d477ac76+ok-59c47f40f7c35d90a4a8ba0f15aebf5b+ok-92edb5e757f553b344192b8cf9961834+ok-63ac054cbe3d9eee3ac47d41f33186e3+ok-01f6792cb05fb5dd4823a5919a5565b4+ok-3f9fd90d34f591fa205eb235b1efe36c+ok-684882d2b787328dc582a7676a8baa67+ok-7c6ccc4d705a480fdba2c6c73047cd5c+ok-b137e5af83e68f2c52af9bfe3976a977+ok-c8b2bd3f66486fc47ad98d7b6a06f713+ok-e824d350b4502a9897275820fced46bf+ok-dc49b24056e793c2f61f129b80417e1a+ok:passed:20151207-223859:1.325386646
T_512json_identification:e2c1bd814ea805d9711a3875e646a51d+ok-d815a6390e25b861d21093ef66f17191+ok-26c87eebc7778cb59dee0d0bb328e8f4+ok-a86b43982a842c6b9c8572534e40dcca+ok-9167cad47e1f91998e3622e6d3aef1c1+ok-e6e342718925ef84c9c65288d477ac76+ok-59c47f40f7c35d90a4a8ba0f15aebf5b+ok-02bf83f6c3e2e80871c5c4bcd3a85551+ok-63ac054cbe3d9eee3ac47d41f33186e3+ok-01f6792cb05fb5dd4823a5919a5565b4+ok-3f9fd90d34f591fa205eb235b1efe36c+ok-684882d2b787328dc582a7676a8baa67+ok-7c6ccc4d705a480fdba2c6c73047cd5c+ok-b137e5af83e68f2c52af9bfe3976a977+ok-c8b2bd3f66486fc47ad98d7b6a06f713+ok-e824d350b4502a9897275820fced46bf+ok-dc49b24056e793c2f61f129b80417e1a+ok:passed:20151207-223859:1.325386646
T_513vp9_10bit_key_frame_detection:3bdaa369dc5af73ced610d978f3bd53d:passed:20151208-224613:0.267556245
T_514remove_track_statistics_tags_during_remux:f262df87ee15d60bbbe30ec5e4dea073-4342871017061370ac0989a9bb71e5c6-75205f286329069b201e4d5745f2cae4:passed:20151215-134129:1.426290351
T_515aac_sampling_frequency_8000_is_not_sbr:545f3eae0c4163d31de81b3bf921e639:passed:20151219-130357:0.066237884
@ -435,7 +435,7 @@ T_586h265_invalid_default_display_window_in_sps_vui:f49d79d17235b95a154b5d951e48
T_587X_ssa_ass_shorter_non_standard_event_format:8a247e76b55536c66e8f0c6b03b14de7:passed:20170320-133053:0.011749409
T_588h265_must_copy_bitstream_restriction_info_in_vui_parameters:73adcf66e93a909ca150885fd5f1eb4d:passed:20170330-194958:0.579648793
T_589h264_forcing_default_duration_in_fields_with_source_matroska:57ec2c6f2b5f526a0bc0ae4b7d58a7fc-40000000+40000000+true-21388568453e93db9c2d8f57af26eb0b-20000000+20000000+true-6122f1005d1f64f9958f275220251ed9-30000000+30000000+true-6ee9a2e29ac6cd451bb971daf7b3a41e-60000000+60000000+true-205658c0f0458073f94f06a6b300ebb0-20000000+20000000+true:passed:20170331-165013:0.244234938
T_590invalid_track_language_elements:39273b8f3b06c67c444c698d4c206847-fbb7ad6e65f0d85cacefd7765605757a-5193400637dc65f4028be97aea853e4b:passed:20170404-191832:0.036965692
T_590invalid_track_language_elements:39273b8f3b06c67c444c698d4c206847-fbb7ad6e65f0d85cacefd7765605757a-4c72a48d41670a0afa856d30c4d1d820:passed:20170404-191832:0.036965692
T_591hevc_wrong_number_of_parameter_sets:74d06ae9a994edf652865fb12e75f09c:passed:20170412-165246:0.341087191
T_592mpeg_ts_aac_wrong_track_parameters_detected:25116993128e73fe9251dc7161ae8030:passed:20170412-225238:0.044257474
T_593flac_with_picture_metadata:c5779b653e274bbb49d3ddf0a274c63c-c58da16285f056972ce09e617e0bd19e-998802dac83743b286c37a681742f296-7cda56d8aceb15753fc915338f1c0fbb-0e4d2b364f8e535d64286ea154948709:passed:20170415-182414:0.302331784