Matroska main part integrated

This commit is contained in:
Evgeny Shulgin 2017-03-01 21:50:20 +03:00
parent 28f84c768e
commit 20b557ff97
3 changed files with 1141 additions and 0 deletions

View File

@ -115,5 +115,6 @@
#endif
typedef int64_t LLONG;
typedef uint64_t ULLONG;
#endif // CCX_PLATFORM_H

View File

@ -1,6 +1,906 @@
#include "lib_ccx.h"
#include "file_buffer.h"
#include "ccx_common_platform.h"
#include "matroska.h"
#include <limits.h>
void skip_bytes(FILE* file, matroska_int n) {
FSEEK(file, n, SEEK_CUR);
}
void set_bytes(FILE* file, matroska_int n) {
FSEEK(file, n, SEEK_SET);
}
void set_byte_at_the_end(FILE* file) {
FSEEK(file, 0, SEEK_END);
}
matroska_int get_current_byte(FILE* file) {
return FTELL(file);
}
matroska_byte* read_byte_block(FILE* file, matroska_int n) {
matroska_byte* buffer = malloc((size_t)(sizeof(matroska_byte) * n));
fread(buffer, 1, (size_t)n, file);
return buffer;
}
char* read_bytes_signed(FILE* file, matroska_int n) {
char* buffer = malloc((size_t)(sizeof(matroska_byte) * (n + 1)));
fread(buffer, 1, (size_t)n, file);
buffer[n] = 0;
return buffer;
}
matroska_byte mkv_read_byte(FILE* file) {
return (matroska_byte)fgetc(file);
}
matroska_int read_vint_length(FILE* file) {
matroska_byte ch = mkv_read_byte(file);
#ifdef _WIN32
int cnt = 8 - __lzcnt16(ch);
#else
int cnt = __builtin_clz(ch) - 24 + 1;
#endif
ch ^= (1 << (8 - cnt));
matroska_int ret = ch;
for (int i = 1; i < cnt; i++) {
ret <<= 8;
ret += mkv_read_byte(file);
}
return ret;
}
matroska_byte* read_vint_block(FILE* file) {
matroska_int len = read_vint_length(file);
return read_byte_block(file, len);
}
char* read_vint_block_signed(FILE* file) {
matroska_int len = read_vint_length(file);
return read_bytes_signed(file, len);
}
matroska_int read_vint_block_int(FILE* file) {
matroska_int len = read_vint_length(file);
matroska_byte* s = read_byte_block(file, len);
matroska_int res = 0;
for (int i = 0; i < len; i++) {
res <<= 8;
res += s[i];
}
free(s);
return res;
}
char* read_vint_block_string(FILE* file) {
return read_vint_block_signed(file);
}
void read_vint_block_skip(FILE* file) {
matroska_int len = read_vint_length(file);
skip_bytes(file, len);
}
void parse_ebml(FILE* file) {
matroska_int len = read_vint_length(file);
matroska_int pos = get_current_byte(file);
int code = 0, code_len = 0;
while (pos + len > get_current_byte(file)) {
code <<= 8;
code += mkv_read_byte(file);
code_len++;
switch (code) {
/* EBML ids */
case MATROSKA_EBML_VERSION:
read_vint_block_int(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_EBML_READ_VERSION:
read_vint_block_int(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_EBML_MAX_ID_LENGTH:
read_vint_block_int(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_EBML_MAX_SIZE_LENGTH:
read_vint_block_int(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_EBML_DOC_TYPE:
mprint("Document type: %s\n", read_vint_block_string(file));
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_EBML_DOC_TYPE_VERSION:
read_vint_block_int(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_EBML_DOC_TYPE_READ_VERSION:
read_vint_block_int(file);
MATROSKA_SWITCH_BREAK(code, code_len);
/* Misc ids */
case MATROSKA_VOID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_CRC32:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
default:
if (code_len == MATROSKA_MAX_ID_LENGTH) {
mprint(MATROSKA_ERROR "Unknown element 0x%x at position %lld, skipping EBML block\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
}
break;
}
}
}
void parse_segment_info(FILE* file) {
matroska_int len = read_vint_length(file);
matroska_int pos = get_current_byte(file);
int code = 0, code_len = 0;
while (pos + len > get_current_byte(file)) {
code <<= 8;
code += mkv_read_byte(file);
code_len++;
switch (code) {
/* Segment info ids */
case MATROSKA_SEGMENT_INFO_SEGMENT_UID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_INFO_SEGMENT_FILENAME:
mprint("Filename: %s\n", read_vint_block_string(file));
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_INFO_PREV_UID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_INFO_PREV_FILENAME:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_INFO_NEXT_UID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_INFO_NEXT_FILENAME:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_INFO_SEGMENT_FAMILY:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_INFO_CHAPTER_TRANSLATE:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_INFO_TIMECODE_SCALE:
mprint("Timecode scale: %lld\n", read_vint_block_int(file));
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_INFO_DURATION:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_INFO_DATE_UTC:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_INFO_TITLE:
mprint("Title: %s\n", read_vint_block_string(file));
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_MUXING_APP:
mprint("Muxing app: %s\n", read_vint_block_string(file));
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_WRITING_APP:
mprint("Writing app: %s\n", read_vint_block_string(file));
MATROSKA_SWITCH_BREAK(code, code_len);
/* Misc ids */
case MATROSKA_VOID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_CRC32:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
default:
if (code_len == MATROSKA_MAX_ID_LENGTH) {
mprint(MATROSKA_ERROR "Unknown element 0x%x at position %lld, skipping segment info block\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
}
break;
}
}
}
char* generate_timestamp_utf8(matroska_int milliseconds) {
matroska_int millis = milliseconds % 1000;
milliseconds /= 1000;
matroska_int seconds = milliseconds % 60;
milliseconds /= 60;
matroska_int minutes = milliseconds % 60;
milliseconds /= 60;
matroska_int hours = milliseconds;
char* buf = malloc(sizeof(char) * 15);
sprintf(buf, "%02ld:%02ld:%02ld,%03ld", hours, minutes, seconds, millis);
return buf;
}
char* generate_timestamp_ass_ssa(matroska_int milliseconds) {
matroska_int millis = (milliseconds % 1000) / 10;
milliseconds /= 1000;
matroska_int seconds = milliseconds % 60;
milliseconds /= 60;
matroska_int minutes = milliseconds % 60;
milliseconds /= 60;
matroska_int hours = milliseconds;
char* buf = malloc(sizeof(char) * 15);
sprintf(buf, "%ld:%02ld:%02ld.%02ld", hours, minutes, seconds, millis);
return buf;
}
int find_sub_track_index(struct matroska_ctx* mkv_ctx, matroska_int track_number) {
for (int i = 0; i < mkv_ctx->sub_tracks_count; i++)
if (mkv_ctx->sub_tracks[i]->track_number == track_number)
return i;
return -1;
}
struct matroska_sub_sentence* parse_segment_cluster_block_group_block(struct matroska_ctx* mkv_ctx, matroska_int cluster_timecode) {
FILE* file = mkv_ctx->file;
matroska_int len = read_vint_length(file);
matroska_int pos = get_current_byte(file);
matroska_int track_number = read_vint_length(file); // track number is length, not int
int sub_track_index = find_sub_track_index(mkv_ctx, track_number);
if (sub_track_index == -1) {
set_bytes(file, pos + len);
return NULL;
}
matroska_int timecode = mkv_read_byte(file);
timecode <<= 8; timecode += mkv_read_byte(file);
mkv_read_byte(file); // skip one byte
matroska_int size = pos + len - get_current_byte(file);
char* message = read_bytes_signed(file, size);
struct matroska_sub_sentence* sentence = malloc(sizeof(struct matroska_sub_sentence));
sentence->text = message;
sentence->text_size = size;
sentence->time_start = timecode + cluster_timecode;
struct matroska_sub_track* track = mkv_ctx->sub_tracks[sub_track_index];
track->sentences[track->sentence_count] = sentence;
track->sentence_count++;
activity_progress((int) (get_current_byte(file) * 100 / mkv_ctx->ctx->inputsize), 0, 0);
return sentence;
}
void parse_segment_cluster_block_group(struct matroska_ctx* mkv_ctx, matroska_int cluster_timecode) {
FILE* file = mkv_ctx->file;
matroska_int len = read_vint_length(file);
matroska_int pos = get_current_byte(file);
matroska_int block_duration = ULONG_MAX;
struct matroska_sub_sentence* new_sentence;
struct matroska_sub_sentence** sentence_list = NULL;
int sentence_count = 0;
int code = 0, code_len = 0;
while (pos + len > get_current_byte(file))
{
code <<= 8;
code += mkv_read_byte(file);
code_len++;
switch (code) {
/* Segment cluster block group ids */
case MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_BLOCK:
new_sentence = parse_segment_cluster_block_group_block(mkv_ctx, cluster_timecode);
if (new_sentence != NULL) {
sentence_list = realloc(sentence_list, sizeof(struct matroska_sub_track*) * (sentence_count + 1));
sentence_list[sentence_count] = new_sentence;
sentence_count++;
}
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_BLOCK_VIRTUAL:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_BLOCK_ADDITIONS:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_BLOCK_DURATION:
block_duration = read_vint_block_int(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_REFERENCE_PRIORITY:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_REFERENCE_BLOCK:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_CODEC_STATE:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_DISCARD_PADDING:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_SLICES:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_REFERENCE_FRAME:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
/* Misc ids */
case MATROSKA_VOID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_CRC32:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
default:
if (code_len == MATROSKA_MAX_ID_LENGTH)
{
mprint(MATROSKA_ERROR "Unknown element 0x%x at position %lld, skipping segment cluster block group\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
}
break;
}
}
for (int i = 0; i < sentence_count; i++)
{
// When BlockDuration is not written, the value is assumed to be the difference
// between the timestamp of this Block and the timestamp of the next Block in "display" order
if (block_duration == ULONG_MAX)
sentence_list[i]->time_end = ULONG_MAX;
else
sentence_list[i]->time_end = sentence_list[i]->time_start + block_duration;
}
free(sentence_list);
}
void parse_segment_cluster(struct matroska_ctx* mkv_ctx) {
FILE* file = mkv_ctx->file;
matroska_int len = read_vint_length(file);
matroska_int pos = get_current_byte(file);
matroska_int timecode = 0;
int code = 0, code_len = 0;
while (pos + len > get_current_byte(file)) {
code <<= 8;
code += mkv_read_byte(file);
code_len++;
switch (code) {
/* Segment cluster ids */
case MATROSKA_SEGMENT_CLUSTER_TIMECODE:
timecode = read_vint_block_int(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_SILENT_TRACKS:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_POSITION:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_PREV_SIZE:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_SIMPLE_BLOCK:
// Same as Block inside the Block Group, but we can't save subs in this structure
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP:
parse_segment_cluster_block_group(mkv_ctx, timecode);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER_ENCRYPTED_BLOCK:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
/* Misc ids */
case MATROSKA_VOID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_CRC32:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
default:
if (code_len == MATROSKA_MAX_ID_LENGTH) {
mprint(MATROSKA_ERROR "Unknown element 0x%x at position %lld, skipping segment cluster block\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
}
break;
}
}
// We already update activity progress in subtitle block, but we also need to show percents
// in samples without captions
activity_progress((int) (get_current_byte(file) * 100 / mkv_ctx->ctx->inputsize), 0, 0);
}
char* get_track_entry_type_description(enum matroska_track_entry_type type) {
switch (type) {
case MATROSKA_TRACK_TYPE_VIDEO:
return "video";
case MATROSKA_TRACK_TYPE_AUDIO:
return "audio";
case MATROSKA_TRACK_TYPE_COMPLEX:
return "complex";
case MATROSKA_TRACK_TYPE_LOGO:
return "logo";
case MATROSKA_TRACK_TYPE_SUBTITLE:
return "subtitle";
case MATROSKA_TRACK_TYPE_BUTTONS:
return "buttons";
case MATROSKA_TRACK_TYPE_CONTROL:
return "control";
default:
return NULL;
}
}
enum matroska_track_subtitle_codec_id get_track_subtitle_codec_id(char* codec_id) {
for (int i = MATROSKA_TRACK_SUBTITLE_CODEC_ID_UTF8; i <= MATROSKA_TRACK_SUBTITLE_CODEC_ID_KATE; i++)
if (strcmp(codec_id, matroska_track_text_subtitle_id_strings[i]) == 0)
return (enum matroska_track_subtitle_codec_id) i;
return (enum matroska_track_subtitle_codec_id) 0;
}
void parse_segment_track_entry(struct matroska_ctx* mkv_ctx) {
FILE* file = mkv_ctx->file;
mprint("\nTrack entry:\n");
matroska_int len = read_vint_length(file);
matroska_int pos = get_current_byte(file);
matroska_int track_number = 0;
enum matroska_track_entry_type track_type = MATROSKA_TRACK_TYPE_VIDEO;
char* lang = strdup("eng");
char* header = NULL;
char* codec_id_string = NULL;
enum matroska_track_subtitle_codec_id codec_id = MATROSKA_TRACK_SUBTITLE_CODEC_ID_UTF8;
int code = 0, code_len = 0;
while (pos + len > get_current_byte(file)) {
code <<= 8;
code += mkv_read_byte(file);
code_len++;
switch (code) {
/* Track entry ids*/
case MATROSKA_SEGMENT_TRACK_TRACK_NUMBER:
track_number = read_vint_block_int(file);
mprint(" Track number: %lld\n", track_number);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_TRACK_UID:
mprint(" UID: %llu\n", read_vint_block_int(file));
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_TRACK_TYPE:
track_type = (enum matroska_track_entry_type) read_vint_block_int(file);
mprint(" Type: %s\n", get_track_entry_type_description(track_type));
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_FLAG_ENABLED:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_FLAG_DEFAULT:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_FLAG_FORCED:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_FLAG_LACING:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_MIN_CACHE:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_MAX_CACHE:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_DEFAULT_DURATION:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_DEFAULT_DECODED_FIELD_DURATION:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_MAX_BLOCK_ADDITION_ID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_NAME:
mprint(" Name: %s\n", read_vint_block_string(file));
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_LANGUAGE:
lang = read_vint_block_string(file);
mprint(" Language: %s\n", lang);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_CODEC_ID:
codec_id_string = read_vint_block_string(file);
codec_id = get_track_subtitle_codec_id(codec_id_string);
mprint(" Codec ID: %s\n", codec_id_string);
free(codec_id_string);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_CODEC_PRIVATE:
if (track_type == MATROSKA_TRACK_TYPE_SUBTITLE)
header = read_vint_block_string(file);
else
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_CODEC_NAME:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_CODEC_ATTACHMENT_LINK:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_CODEC_DECODE_ALL:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_TRACK_OVERLAY:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_CODEC_DELAY:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_SEEK_PRE_ROLL:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_TRACK_TRANSLATE:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_VIDEO:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_AUDIO:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_TRACK_OPERATION:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_CONTENT_ENCODINGS:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
/* Deprecated IDs */
case MATROSKA_SEGMENT_TRACK_TRACK_TIMECODE_SCALE:
mprint(MATROSKA_WARNING "Deprecated element 0x%x at position %lld\n", code,
get_current_byte(file) - 3); // minus length of the ID
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_TRACK_OFFSET:
mprint(MATROSKA_WARNING "Deprecated element 0x%x at position %lld\n", code,
get_current_byte(file) - 2); // minus length of the ID
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
/* DivX trick track extenstions */
case MATROSKA_SEGMENT_TRACK_TRICK_TRACK_UID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_TRICK_TRACK_SEGMENT_UID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_TRICK_TRACK_FLAG:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_TRICK_MASTER_TRACK_UID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_TRICK_MASTER_TRACK_SEGMENT_UID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
/* Misc ids */
case MATROSKA_VOID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_CRC32:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
default:
if (code_len == MATROSKA_MAX_ID_LENGTH) {
mprint(MATROSKA_ERROR "Unknown element 0x%x at position %lld, skipping segment track entry block\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
}
break;
}
}
if (track_type == MATROSKA_TRACK_TYPE_SUBTITLE)
{
struct matroska_sub_track* sub_track = malloc(sizeof(struct matroska_sub_track));
sub_track->header = header;
sub_track->lang = lang;
sub_track->track_number = track_number;
sub_track->lang_index = 0;
sub_track->codec_id = codec_id;
sub_track->sentence_count = 0;
for (int i = 0; i < mkv_ctx->sub_tracks_count; i++)
if (strcmp((const char *)mkv_ctx->sub_tracks[i]->lang, (const char *)lang) == 0)
sub_track->lang_index++;
mkv_ctx->sub_tracks[mkv_ctx->sub_tracks_count] = sub_track;
mkv_ctx->sub_tracks_count++;
}
else
free(lang);
}
void parse_segment_tracks(struct matroska_ctx* mkv_ctx)
{
FILE* file = mkv_ctx->file;
matroska_int len = read_vint_length(file);
matroska_int pos = get_current_byte(file);
int code = 0, code_len = 0;
while (pos + len > get_current_byte(file)) {
code <<= 8;
code += mkv_read_byte(file);
code_len++;
switch (code) {
/* Tracks ids*/
case MATROSKA_SEGMENT_TRACK_ENTRY:
parse_segment_track_entry(mkv_ctx);
MATROSKA_SWITCH_BREAK(code, code_len);
/* Misc ids */
case MATROSKA_VOID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_CRC32:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
default:
if (code_len == MATROSKA_MAX_ID_LENGTH) {
mprint(MATROSKA_ERROR "Unknown element 0x%x at position %lld, skipping segment tracks block\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
}
break;
}
}
}
void parse_segment(struct matroska_ctx* mkv_ctx)
{
FILE* file = mkv_ctx->file;
matroska_int len = read_vint_length(file);
matroska_int pos = get_current_byte(file);
int code = 0, code_len = 0;
while (pos + len > get_current_byte(file)) {
code <<= 8;
code += mkv_read_byte(file);
code_len++;
switch (code) {
/* Segment ids */
case MATROSKA_SEGMENT_SEEK_HEAD:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_INFO:
parse_segment_info(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CLUSTER:
//read_vint_block_skip(file);
parse_segment_cluster(mkv_ctx);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACKS:
parse_segment_tracks(mkv_ctx);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CUES:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_ATTACHMENTS:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_CHAPTERS:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TAGS:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
/* Misc ids */
case MATROSKA_VOID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_CRC32:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
default:
if (code_len == MATROSKA_MAX_ID_LENGTH) {
mprint(MATROSKA_ERROR "Unknown element 0x%x at position %lld, skipping segment block\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
}
break;
}
}
}
char* generate_filename_from_track(struct matroska_ctx* mkv_ctx, struct matroska_sub_track* track)
{
char* buf = malloc(sizeof(char) * 200);
if (track->lang_index == 0)
sprintf(buf, "%s_%s.%s", mkv_ctx->filename, track->lang, matroska_track_text_subtitle_id_extensions[track->codec_id]);
else
sprintf(buf, "%s_%s_%ld.%s", mkv_ctx->filename, track->lang, track->lang_index,
matroska_track_text_subtitle_id_extensions[track->codec_id]);
write(1, buf, strlen(buf));
write(1, "\n", 1);
return buf;
}
char* ass_ssa_sentence_erase_read_order(char* text)
{
// crop text after second ','
int cnt = 0;
int index = 0;
while (cnt < 2)
{
if (text[index] == ',')
cnt++;
index++;
}
size_t len = strlen(text) - index;
char* buf = malloc(sizeof(char) * (len + 1));
memcpy(buf, &text[index], len);
buf[len] = '\0';
return buf;
}
void save_sub_track(struct matroska_ctx* mkv_ctx, struct matroska_sub_track* track)
{
char* filename = generate_filename_from_track(mkv_ctx, track);
int desc;
#ifdef WIN32
desc = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, S_IREAD | S_IWRITE);
#else
desc = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_APPEND, S_IWUSR | S_IRUSR);
#endif
free(filename);
if (track->header != NULL)
write(desc, track->header, strlen(track->header));
for (int i = 0; i < track->sentence_count; i++)
{
struct matroska_sub_sentence* sentence = track->sentences[i];
if (track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_UTF8)
{
char number[9];
sprintf(number, "%d", i + 1);
char *timestamp_start = generate_timestamp_utf8(sentence->time_start);
matroska_int time_end = sentence->time_end;
if (i + 1 < track->sentence_count)
time_end = MIN(time_end, track->sentences[i + 1]->time_start - 1);
char *timestamp_end = generate_timestamp_utf8(time_end);
write(desc, number, strlen(number));
write(desc, "\n", 1);
write(desc, timestamp_start, strlen(timestamp_start));
write(desc, " --> ", 5);
write(desc, timestamp_end, strlen(timestamp_start));
write(desc, "\n", 1);
write(desc, sentence->text, sentence->text_size);
write(desc, "\n\n", 2);
free(timestamp_start);
free(timestamp_end);
}
else if (track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_ASS || track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_SSA)
{
char *timestamp_start = generate_timestamp_ass_ssa(sentence->time_start);
matroska_int time_end = sentence->time_end;
if (i + 1 < track->sentence_count)
time_end = MIN(time_end, track->sentences[i + 1]->time_start - 1);
char *timestamp_end = generate_timestamp_ass_ssa(time_end);
write(desc, "Dialogue: Marked=0,", strlen("Dialogue: Marked=0,"));
write(desc, timestamp_start, strlen(timestamp_start));
write(desc, ",", 1);
write(desc, timestamp_end, strlen(timestamp_start));
write(desc, ",", 1);
char* text = ass_ssa_sentence_erase_read_order(sentence->text);
write(desc, text, strlen(text));
write(desc, "\n", 1);
free(timestamp_start);
free(timestamp_end);
}
}
}
void free_sub_track(struct matroska_sub_track* track)
{
if (track->header != NULL)
free(track->header);
if (track->lang != NULL)
free(track->lang);
for (int i = 0; i < track->sentence_count; i++)
{
struct matroska_sub_sentence* sentence = track->sentences[i];
free(sentence->text);
free(sentence);
}
free(track);
}
void matroska_save_all(struct matroska_ctx* mkv_ctx)
{
for (int i = 0; i < mkv_ctx->sub_tracks_count; i++)
save_sub_track(mkv_ctx, mkv_ctx->sub_tracks[i]);
}
void matroska_free_all(struct matroska_ctx* mkv_ctx)
{
for (int i = 0; i < mkv_ctx->sub_tracks_count; i++)
free_sub_track(mkv_ctx->sub_tracks[i]);
free(mkv_ctx);
}
void matroska_parse(struct matroska_ctx* mkv_ctx)
{
int code = 0, code_len = 0;
mprint("\n");
FILE* file = mkv_ctx->file;
while (!feof(file)) {
code <<= 8;
code += mkv_read_byte(file);
code_len++;
switch (code) {
/* Header ids*/
case MATROSKA_EBML_HEADER:
parse_ebml(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_HEADER:
parse_segment(mkv_ctx);
MATROSKA_SWITCH_BREAK(code, code_len);
/* Misc ids */
case MATROSKA_VOID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_CRC32:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
default:
if (code_len == MATROSKA_MAX_ID_LENGTH) {
mprint(MATROSKA_ERROR "Unknown element 0x%x at position %lld, skipping file parsing\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
return;
}
break;
}
}
// Close file stream
fclose(file);
mprint("\n");
}
FILE* create_file(struct lib_ccx_ctx *ctx)
{
@ -11,5 +911,22 @@ FILE* create_file(struct lib_ccx_ctx *ctx)
int matroska_loop(struct lib_ccx_ctx *ctx)
{
// Don't need generated input file
// Will read bytes by FILE*
close_input_file(ctx);
struct matroska_ctx *mkv_ctx = malloc(sizeof(struct matroska_ctx));
mkv_ctx->ctx = ctx;
mkv_ctx->sub_tracks_count = 0;
mkv_ctx->filename = ctx->inputfile[ctx->current_file];
mkv_ctx->file = create_file(ctx);
matroska_parse(mkv_ctx);
matroska_save_all(mkv_ctx);
matroska_free_all(mkv_ctx);
// 100% done
activity_progress(100, 0, 0);
return 1;
}

223
src/lib_ccx/matroska.h Normal file
View File

@ -0,0 +1,223 @@
#ifndef MATROSKA_H
#define MATROSKA_H
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
/* EBML header ids */
#define MATROSKA_EBML_HEADER 0x1A45DFA3
#define MATROSKA_EBML_VERSION 0x4286
#define MATROSKA_EBML_READ_VERSION 0x42F7
#define MATROSKA_EBML_MAX_ID_LENGTH 0x42F2
#define MATROSKA_EBML_MAX_SIZE_LENGTH 0x42F3
#define MATROSKA_EBML_DOC_TYPE 0x4282
#define MATROSKA_EBML_DOC_TYPE_VERSION 0x4287
#define MATROSKA_EBML_DOC_TYPE_READ_VERSION 0x4285
/* Segment ids */
#define MATROSKA_SEGMENT_HEADER 0x18538067
#define MATROSKA_SEGMENT_SEEK_HEAD 0x114D9B74
#define MATROSKA_SEGMENT_INFO 0x1549A966
#define MATROSKA_SEGMENT_CLUSTER 0x1F43B675
#define MATROSKA_SEGMENT_TRACKS 0x1654AE6B
#define MATROSKA_SEGMENT_CUES 0x1C53BB6B
#define MATROSKA_SEGMENT_ATTACHMENTS 0x1941A469
#define MATROSKA_SEGMENT_CHAPTERS 0x1043A770
#define MATROSKA_SEGMENT_TAGS 0x1254C367
/* Segment info ids */
#define MATROSKA_SEGMENT_INFO_SEGMENT_UID 0x73A4
#define MATROSKA_SEGMENT_INFO_SEGMENT_FILENAME 0x7384
#define MATROSKA_SEGMENT_INFO_PREV_UID 0x3CB923
#define MATROSKA_SEGMENT_INFO_PREV_FILENAME 0x3C83AB
#define MATROSKA_SEGMENT_INFO_NEXT_UID 0x3EB923
#define MATROSKA_SEGMENT_INFO_NEXT_FILENAME 0x3E83BB
#define MATROSKA_SEGMENT_INFO_SEGMENT_FAMILY 0x4444
#define MATROSKA_SEGMENT_INFO_CHAPTER_TRANSLATE 0x6924
#define MATROSKA_SEGMENT_INFO_TIMECODE_SCALE 0x2AD7B1
#define MATROSKA_SEGMENT_INFO_DURATION 0x4489
#define MATROSKA_SEGMENT_INFO_DATE_UTC 0x4461
#define MATROSKA_SEGMENT_INFO_TITLE 0x7BA9
#define MATROSKA_SEGMENT_MUXING_APP 0x4D80
#define MATROSKA_SEGMENT_WRITING_APP 0x5741
/* Segment cluster ids */
#define MATROSKA_SEGMENT_CLUSTER_TIMECODE 0xE7
#define MATROSKA_SEGMENT_CLUSTER_SILENT_TRACKS 0x5854
#define MATROSKA_SEGMENT_CLUSTER_POSITION 0xA7
#define MATROSKA_SEGMENT_CLUSTER_PREV_SIZE 0xAB
#define MATROSKA_SEGMENT_CLUSTER_SIMPLE_BLOCK 0xA3
#define MATROSKA_SEGMENT_CLUSTER_ENCRYPTED_BLOCK 0xAF
/* Segment cluster block group ids */
#define MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP 0xA0
#define MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_BLOCK 0xA1
#define MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_BLOCK_VIRTUAL 0xA2
#define MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_BLOCK_ADDITIONS 0x75A1
#define MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_BLOCK_DURATION 0x9B
#define MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_REFERENCE_PRIORITY 0xFA
#define MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_REFERENCE_BLOCK 0xFB
#define MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_CODEC_STATE 0xA4
#define MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_DISCARD_PADDING 0x75A2
#define MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_SLICES 0x8E
#define MATROSKA_SEGMENT_CLUSTER_BLOCK_GROUP_REFERENCE_FRAME 0xC8
/* Segment tracks ids */
#define MATROSKA_SEGMENT_TRACK_ENTRY 0xAE
#define MATROSKA_SEGMENT_TRACK_TRACK_NUMBER 0xD7
#define MATROSKA_SEGMENT_TRACK_TRACK_UID 0x73C5
#define MATROSKA_SEGMENT_TRACK_TRACK_TYPE 0x83
#define MATROSKA_SEGMENT_TRACK_FLAG_ENABLED 0xB9
#define MATROSKA_SEGMENT_TRACK_FLAG_DEFAULT 0x88
#define MATROSKA_SEGMENT_TRACK_FLAG_FORCED 0x55AA
#define MATROSKA_SEGMENT_TRACK_FLAG_LACING 0x9C
#define MATROSKA_SEGMENT_TRACK_MIN_CACHE 0x6DE7
#define MATROSKA_SEGMENT_TRACK_MAX_CACHE 0x6DF8
#define MATROSKA_SEGMENT_TRACK_DEFAULT_DURATION 0x23E383
#define MATROSKA_SEGMENT_TRACK_DEFAULT_DECODED_FIELD_DURATION 0x234E7A
#define MATROSKA_SEGMENT_TRACK_MAX_BLOCK_ADDITION_ID 0x55EE
#define MATROSKA_SEGMENT_TRACK_NAME 0x536E
#define MATROSKA_SEGMENT_TRACK_LANGUAGE 0x22B59C
#define MATROSKA_SEGMENT_TRACK_CODEC_ID 0x86
#define MATROSKA_SEGMENT_TRACK_CODEC_PRIVATE 0x63A2
#define MATROSKA_SEGMENT_TRACK_CODEC_NAME 0x258688
#define MATROSKA_SEGMENT_TRACK_CODEC_ATTACHMENT_LINK 0x7446
#define MATROSKA_SEGMENT_TRACK_CODEC_DECODE_ALL 0xAA
#define MATROSKA_SEGMENT_TRACK_TRACK_OVERLAY 0x6FAB
#define MATROSKA_SEGMENT_TRACK_CODEC_DELAY 0x56AA
#define MATROSKA_SEGMENT_TRACK_SEEK_PRE_ROLL 0x56BB
#define MATROSKA_SEGMENT_TRACK_TRACK_TRANSLATE 0x6624
#define MATROSKA_SEGMENT_TRACK_VIDEO 0xE0
#define MATROSKA_SEGMENT_TRACK_AUDIO 0xE1
#define MATROSKA_SEGMENT_TRACK_TRACK_OPERATION 0xE2
#define MATROSKA_SEGMENT_TRACK_CONTENT_ENCODINGS 0x6D80
/* Misc ids */
#define MATROSKA_VOID 0xEC
#define MATROSKA_CRC32 0xBF
/* DEFENCE FROM THE FOOL - deprecated IDs */
#define MATROSKA_SEGMENT_TRACK_TRACK_TIMECODE_SCALE 0x23314F
#define MATROSKA_SEGMENT_TRACK_TRACK_OFFSET 0x537F
/* DivX trick track extenstions (in track entry) */
#define MATROSKA_SEGMENT_TRACK_TRICK_TRACK_UID 0xC0
#define MATROSKA_SEGMENT_TRACK_TRICK_TRACK_SEGMENT_UID 0xC1
#define MATROSKA_SEGMENT_TRACK_TRICK_TRACK_FLAG 0xC6
#define MATROSKA_SEGMENT_TRACK_TRICK_MASTER_TRACK_UID 0xC7
#define MATROSKA_SEGMENT_TRACK_TRICK_MASTER_TRACK_SEGMENT_UID 0xC4
/* Other defines */
#define MATROSKA_MAX_ID_LENGTH 4
#define MATROSKA_MAX_TRACKS 128
#define MATROSKA_MAX_SENTENCES 8192
/* Enums */
enum matroska_track_entry_type {
MATROSKA_TRACK_TYPE_VIDEO = 1,
MATROSKA_TRACK_TYPE_AUDIO = 2,
MATROSKA_TRACK_TYPE_COMPLEX = 3,
MATROSKA_TRACK_TYPE_LOGO = 0x10,
MATROSKA_TRACK_TYPE_SUBTITLE = 0x11,
MATROSKA_TRACK_TYPE_BUTTONS = 0x12,
MATROSKA_TRACK_TYPE_CONTROL = 0x20,
};
enum matroska_track_subtitle_codec_id {
MATROSKA_TRACK_SUBTITLE_CODEC_ID_UTF8 = 0,
MATROSKA_TRACK_SUBTITLE_CODEC_ID_SSA,
MATROSKA_TRACK_SUBTITLE_CODEC_ID_ASS,
MATROSKA_TRACK_SUBTITLE_CODEC_ID_USF,
MATROSKA_TRACK_SUBTITLE_CODEC_ID_WEBVTT,
MATROSKA_TRACK_SUBTITLE_CODEC_ID_BITMAP,
MATROSKA_TRACK_SUBTITLE_CODEC_ID_VOBSUB,
MATROSKA_TRACK_SUBTITLE_CODEC_ID_KATE
};
char* matroska_track_text_subtitle_id_strings[] = {
"S_TEXT/UTF8",
"S_TEXT/SSA",
"S_TEXT/ASS",
"S_TEXT/USF",
"S_TEXT/WEBVTT",
"S_IMAGE/BMP",
"S_VOBSUB",
"S_KATE"
};
char* matroska_track_text_subtitle_id_extensions[] = {
"srt", "ssa", "ass",
"usf", "vtt", "bmp",
NULL, NULL // Unknown
};
/* Messages */
#define MATROSKA_INFO "Matroska parser info: "
#define MATROSKA_WARNING "Matroska parser warning: "
#define MATROSKA_ERROR "Matroska parser error: "
/* Boilerplate code */
#define MATROSKA_SWITCH_BREAK(a,b) (a)=0;(b)=0;break
/* Typedefs */
typedef ULLONG matroska_int;
typedef unsigned char matroska_byte;
/* Structures */
struct matroska_sub_sentence {
char* text;
matroska_int text_size;
matroska_int time_start;
matroska_int time_end;
};
struct matroska_sub_track {
char* header; // Style header for ASS/SSA (and other) subtitles
char* lang;
matroska_int track_number;
matroska_int lang_index;
enum matroska_track_subtitle_codec_id codec_id;
int sentence_count;
struct matroska_sub_sentence* sentences[MATROSKA_MAX_SENTENCES];
};
struct matroska_ctx {
struct matroska_sub_track* sub_tracks[MATROSKA_MAX_TRACKS];
int sub_tracks_count;
char* filename;
FILE* file;
// Context must be out of this stuff...
struct lib_ccx_ctx* ctx;
};
/* Temporarily closed due to refactoring */
/* Bytestream and parser functions */
/*void skip_bytes(FILE* file, matroska_int n);
void set_bytes(FILE* file, matroska_int n);
matroska_int get_current_byte(FILE* file);
matroska_byte* read_byte_block(FILE* file, matroska_int n);
matroska_byte read_byte(FILE* file);
matroska_int read_vint_length(FILE* file);
matroska_byte* read_vint_block(FILE* file);
matroska_byte* read_vint_block_with_len(FILE* file, matroska_int* ptr_len);
matroska_int read_vint_block_int(FILE* file);
//matroska_byte* read_vint_block_string(FILE* file);
void read_vint_block_skip(FILE* file);
void parse_ebml(FILE* file);
void parse_segment_info(FILE* file);
struct matroska_sub_sentence* parse_segment_cluster_block_group_block(FILE* file, matroska_int cluster_timecode);
void parse_segment_cluster_block_group(FILE* file, matroska_int cluster_timecode);
void parse_segment_cluster(FILE* file);
void parse_segment_track_entry(FILE* file);
void parse_segment_tracks(FILE* file);
void parse_segment(FILE* file);
void matroska_parse(FILE* file);*/
/* Writing and helper functions */
/*char* get_track_entry_type_description(enum matroska_track_entry_type type);
int find_sub_track_index(matroska_int track_number);
char* generate_filename_from_track(struct matroska_sub_track* track);*/
#endif // MATROSKA_H