From 483540488ab6b504b49649ecf9b51f25347f6387 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 2 Jan 2017 20:22:02 +0300 Subject: [PATCH 1/5] Added webvtt-full parameter --- src/lib_ccx/ccx_common_option.h | 1 + src/lib_ccx/params.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/lib_ccx/ccx_common_option.h b/src/lib_ccx/ccx_common_option.h index 3ebe83f3..e2425083 100644 --- a/src/lib_ccx/ccx_common_option.h +++ b/src/lib_ccx/ccx_common_option.h @@ -143,6 +143,7 @@ struct ccx_s_options // Options from user parameters unsigned send_to_srv; enum ccx_output_format write_format; // 0=Raw, 1=srt, 2=SMI int use_ass_instead_of_ssa; + int use_webvtt_styling; LLONG debug_mask; // dbg_print will use this mask to print or ignore different types LLONG debug_mask_on_debug; // If we're using temp_debug to enable/disable debug "live", this is the mask when temp_debug=1 /* Networking */ diff --git a/src/lib_ccx/params.c b/src/lib_ccx/params.c index a3703509..47a64e08 100644 --- a/src/lib_ccx/params.c +++ b/src/lib_ccx/params.c @@ -180,9 +180,13 @@ void set_output_format (struct ccx_s_options *opt, const char *format) opt->write_format = CCX_OF_SSA; if (strcmp (format,"ass")==0) opt->use_ass_instead_of_ssa = 1; - } else if (strcmp(format, "webvtt") == 0) + } + else if (strcmp(format, "webvtt")==0 || strcmp(format, "webvtt-full")==0) { opt->write_format = CCX_OF_WEBVTT; - else if (strcmp (format,"sami")==0 || strcmp (format,"smi")==0) + if (strcmp(format, "webvtt-full")==0) + opt->use_webvtt_styling = 1; + } + else if (strcmp(format, "sami") == 0 || strcmp(format, "smi") == 0) opt->write_format=CCX_OF_SAMI; else if (strcmp (format,"transcript")==0 || strcmp (format,"txt")==0) { @@ -384,6 +388,7 @@ void print_usage (void) mprint (" srt -> SubRip (default, so not actually needed).\n"); mprint (" ass/ssa -> SubStation Alpha.\n"); mprint (" webvtt -> WebVTT format\n"); + mprint (" webvtt-full -> WebVTT format with styling\n"); mprint (" sami -> MS Synchronized Accesible Media Interface.\n"); mprint (" bin -> CC data in CCExtractor's own binary format.\n"); mprint (" raw -> CC data in McPoodle's Broadcast format.\n"); From d87b269bae394c313ec2327d628e63d6c2b65cc2 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 2 Jan 2017 20:43:17 +0300 Subject: [PATCH 2/5] Fixed bug with multiple headers --- src/lib_ccx/dvb_subtitle_decoder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib_ccx/dvb_subtitle_decoder.c b/src/lib_ccx/dvb_subtitle_decoder.c index e6102cd8..a4a79cf4 100644 --- a/src/lib_ccx/dvb_subtitle_decoder.c +++ b/src/lib_ccx/dvb_subtitle_decoder.c @@ -1650,7 +1650,9 @@ int dvbsub_decode(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, co enc_ctx->srt_counter = enc_ctx->prev->srt_counter; //for dvb subs we need to update the current srt counter because we always encode the previous subtitle (and the counter is increased for the previous context) enc_ctx->prev_start = enc_ctx->prev->prev_start; sub->prev->got_output = 0; - + if (enc_ctx->write_format == CCX_OF_WEBVTT) { // we already wrote header, but since we encoded last sub, we must prevent multiple headers in future + enc_ctx->wrote_webvtt_sync_header = 1; + } } memcpy(enc_ctx->prev, enc_ctx, sizeof(struct encoder_ctx)); //we save the current encoder context memcpy(sub->prev, sub, sizeof(struct cc_subtitle)); //we save the current subtitle From 2c30f5eb5b076a7352af82871b26b0fa6c06cb56 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 3 Jan 2017 20:55:15 +0300 Subject: [PATCH 3/5] Fixed file invalidation Don't need to add CRLF because file becomes invalid. We need to write cue just after the timestamp. --- src/lib_ccx/ccx_encoders_webvtt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lib_ccx/ccx_encoders_webvtt.c b/src/lib_ccx/ccx_encoders_webvtt.c index 86d7e3e0..041ef93e 100644 --- a/src/lib_ccx/ccx_encoders_webvtt.c +++ b/src/lib_ccx/ccx_encoders_webvtt.c @@ -106,8 +106,6 @@ int write_xtimestamp_header(struct encoder_ctx *context) write(context->out->fh, context->buffer, used); } - // Add the additional CRLF to finish the header - write(context->out->fh, context->encoded_crlf, context->encoded_crlf_length); context->wrote_webvtt_sync_header = 1; // Do it even if couldn't write the header, because it won't be possible anyway } From 2b0c8ba7a04c71a3e92bbd527e8f2df374d50932 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 3 Jan 2017 22:48:50 +0300 Subject: [PATCH 4/5] Added WebVTT color and font support --- src/lib_ccx/ccx_encoders_helpers.c | 9 ++ src/lib_ccx/ccx_encoders_helpers.h | 1 + src/lib_ccx/ccx_encoders_webvtt.c | 217 +++++++++++++++++++---------- 3 files changed, 154 insertions(+), 73 deletions(-) diff --git a/src/lib_ccx/ccx_encoders_helpers.c b/src/lib_ccx/ccx_encoders_helpers.c index c1d69ca1..d2e6f4b1 100644 --- a/src/lib_ccx/ccx_encoders_helpers.c +++ b/src/lib_ccx/ccx_encoders_helpers.c @@ -325,6 +325,15 @@ unsigned get_decoder_line_encoded(struct encoder_ctx *ctx, unsigned char *buffer return (unsigned)(buffer - orig); // Return length } +void get_sentence_borders(int *first, int *last, int line_num, struct eia608_screen *data) { + *first = 0; + *last = 32; + while (data->colors[line_num][*first] == COL_TRANSPARENT) + (*first)++; + while (data->colors[line_num][*last] == COL_TRANSPARENT) + (*last)--; +} + /*void delete_all_lines_but_current(ccx_decoder_608_context *context, struct eia608_screen *data, int row) { for (int i=0;i<15;i++) diff --git a/src/lib_ccx/ccx_encoders_helpers.h b/src/lib_ccx/ccx_encoders_helpers.h index e02e369a..80d6c623 100644 --- a/src/lib_ccx/ccx_encoders_helpers.h +++ b/src/lib_ccx/ccx_encoders_helpers.h @@ -28,6 +28,7 @@ int clever_capitalize(struct encoder_ctx *context, int line_num, struct eia608_s void telx_correct_case(char *sub_line); unsigned get_decoder_line_encoded_for_gui(unsigned char *buffer, int line_num, struct eia608_screen *data); unsigned get_decoder_line_encoded(struct encoder_ctx *ctx, unsigned char *buffer, int line_num, struct eia608_screen *data); +void get_sentence_borders(int *first, int *last, int line_num, struct eia608_screen *data); int string_cmp(const void *p1, const void *p2); int string_cmp_function(const void *p1, const void *p2, void *arg); diff --git a/src/lib_ccx/ccx_encoders_webvtt.c b/src/lib_ccx/ccx_encoders_webvtt.c index 041ef93e..0039d2de 100644 --- a/src/lib_ccx/ccx_encoders_webvtt.c +++ b/src/lib_ccx/ccx_encoders_webvtt.c @@ -191,6 +191,93 @@ int write_cc_subtitle_as_webvtt(struct cc_subtitle *sub, struct encoder_ctx *con return ret; } + +// TODO: move this repeating function from ccx_encoders_g608.c to the files ccx_encoders_helpers.(c|h) +int get_line_encoded(struct encoder_ctx *ctx, unsigned char *buffer, int line_num, struct eia608_screen *data) +{ + unsigned char *orig = buffer; + unsigned char *line = data->characters[line_num]; + for (int i = 0; i < 32; i++) + { + int bytes = 0; + switch (ctx->encoding) + { + case CCX_ENC_UTF_8: + bytes = get_char_in_utf_8(buffer, line[i]); + break; + case CCX_ENC_LATIN_1: + get_char_in_latin_1(buffer, line[i]); + bytes = 1; + break; + case CCX_ENC_UNICODE: + get_char_in_unicode(buffer, line[i]); + bytes = 2; + case CCX_ENC_ASCII: + *buffer = line[i]; + bytes = 1; + break; + } + buffer += bytes; + } + return (unsigned int)(buffer - orig); // Return length +} + +void get_color_events(int *color_events, int line_num, struct eia608_screen *data) +{ + int first, last; + get_sentence_borders(&first, &last, line_num, data); + + int last_color = COL_WHITE; + for (int i = first; i <= last; i++) + { + if (data->colors[line_num][i] != last_color) + { + // It does not make sense to keep the default white color in the events + // WebVTT supports colors only is [COL_WHITE..COL_MAGENTA] + if (data->colors[line_num][i] <= COL_MAGENTA) + color_events[i] |= data->colors[line_num][i]; // Add this new color + + if (last_color != COL_WHITE && last_color <= COL_MAGENTA) + color_events[i - 1] |= last_color << 16; // Remove old color (event in the second part of the integer) + + last_color = data->colors[line_num][i]; + } + } + + if (last_color != COL_WHITE) + { + color_events[last] |= last_color << 16; + } +} + +void get_font_events(int *font_events, int line_num, struct eia608_screen *data) +{ + int first, last; + get_sentence_borders(&first, &last, line_num, data); + + int last_font = FONT_REGULAR; + for (int i = first; i <= last; i++) + { + if (data->fonts[line_num][i] != last_font) + { + // It does not make sense to keep the regular font in the events + // WebVTT supports all fonts from C608 + if (data->fonts[line_num][i] != FONT_REGULAR) // Really can do it without condition because FONT_REGULAR == 0 + font_events[i] |= data->fonts[line_num][i]; // Add this new font + + if (last_font != FONT_REGULAR) + font_events[i] |= last_font << 16; // Remove old font (event in the second part of the integer) + + last_font = data->fonts[line_num][i]; + } + } + + if (last_font != FONT_REGULAR) + { + font_events[last] |= last_font << 16; + } +} + int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *context) { int used; @@ -201,8 +288,6 @@ int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *co int wrote_something = 0; ms_start = data->start_time; - int prev_line_start = -1, prev_line_end = -1; // Column in which the previous line started and ended, for autodash - int prev_line_center1 = -1, prev_line_center2 = -1; // Center column of previous line text int empty_buf = 1; char timeline[128] = ""; for (int i = 0; i<15; i++) @@ -242,92 +327,78 @@ int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *co { if (data->row_used[i]) { - if (context->sentence_cap) - { - if (clever_capitalize(context, i, data)) - correct_case_with_dictionary(i, data); - } - if (context->autodash && context->trim_subs) - { - int first = 0, last = 31, center1 = -1, center2 = -1; - unsigned char *line = data->characters[i]; - int do_dash = 1, colon_pos = -1; - find_limit_characters(line, &first, &last, CCX_DECODER_608_SCREEN_WIDTH); - if (first == -1 || last == -1) // Probably a bug somewhere though - break; - // Is there a speaker named, for example: TOM: What are you doing? - for (int j = first; j <= last; j++) - { - if (line[j] == ':') - { - colon_pos = j; - break; - } - if (!isupper(line[j])) - break; - } - if (prev_line_start == -1) - do_dash = 0; - if (first == prev_line_start) // Case of left alignment - do_dash = 0; - if (last == prev_line_end) // Right align - do_dash = 0; - if (first>prev_line_start && lastprev_line_start && firstprev_line_start && lastsubline, i, data); - center1 = (first + last) / 2; - if (colon_pos != -1) - { - while (colon_pos= prev_line_center1 - 1 && center1 <= prev_line_center1 + 1 && center1 != -1) // Center align - do_dash = 0; - if (center2 >= prev_line_center2 - 2 && center1 <= prev_line_center2 + 2 && center1 != -1) // Center align - do_dash = 0; - - if (do_dash) - { - written = write(context->out->fh, "- ", 2); - if (written != 2) - return -1; - } - prev_line_start = first; - prev_line_end = last; - prev_line_center1 = center1; - prev_line_center2 = center2; - - } - int length = get_decoder_line_encoded(context, context->subline, i, data); if (context->encoding != CCX_ENC_UNICODE) { dbg_print(CCX_DMT_DECODER_608, "\r"); dbg_print(CCX_DMT_DECODER_608, "%s\n", context->subline); } - written = write(context->out->fh, context->subline, length); - if (written != length) - return -1; + + int *color_events = (int *)malloc(sizeof(int) * length); + int *font_events = (int *)malloc(sizeof(int) * length); + memset(color_events, 0, sizeof(int) * length); + memset(font_events, 0, sizeof(int) * length); + + get_color_events(color_events, i, data); + get_font_events(font_events, i, data); + + // Write symbol by symbol with events + for (int j = 0; j < length; j++) + { + // opening events for fonts + int open_font = font_events[j] & 0xFF; // Last 16 bytes + if (open_font != FONT_REGULAR) + { + if (open_font & FONT_ITALICS) + write(context->out->fh, strdup(""), 3); + if (open_font & FONT_UNDERLINED) + write(context->out->fh, strdup(""), 3); + } + + // opening events for colors + int open_color = color_events[j] & 0xFF; // Last 16 bytes + if (open_color != COL_WHITE) + { + write(context->out->fh, strdup("out->fh, color_text[open_color], strlen(color_text[open_color])); + write(context->out->fh, ">", 1); + } + + // write current text symbol + write(context->out->fh, &(context->subline[j]), 1); + + // closing events for colors + int close_color = color_events[j] >> 16; // First 16 bytes + if (close_color != COL_WHITE) + { + write(context->out->fh, strdup(""), 4); + } + + // closing events for fonts + int close_font = font_events[j] >> 16; // First 16 bytes + if (close_font != FONT_REGULAR) + { + if (close_font & FONT_ITALICS) + write(context->out->fh, strdup(""), 4); + if (close_font & FONT_UNDERLINED) + write(context->out->fh, strdup(""), 4); + } + } + + free(color_events); + free(font_events); + written = write(context->out->fh, context->encoded_crlf, context->encoded_crlf_length); if (written != context->encoded_crlf_length) return -1; + wrote_something = 1; - // fprintf (wb->fh,encoded_crlf); } } dbg_print(CCX_DMT_DECODER_608, "- - - - - - - - - - - -\r\n"); - // fprintf (wb->fh, encoded_crlf); written = write(context->out->fh, context->encoded_crlf, context->encoded_crlf_length); if (written != context->encoded_crlf_length) return -1; From 34a21a931d0cac1eb8824f3b812be82d2d138718 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Tue, 3 Jan 2017 22:57:28 +0300 Subject: [PATCH 5/5] Enabled the support of raw WebVTT --- src/lib_ccx/ccx_encoders_webvtt.c | 86 ++++++++++++++++++------------- 1 file changed, 50 insertions(+), 36 deletions(-) diff --git a/src/lib_ccx/ccx_encoders_webvtt.c b/src/lib_ccx/ccx_encoders_webvtt.c index 0039d2de..e608137e 100644 --- a/src/lib_ccx/ccx_encoders_webvtt.c +++ b/src/lib_ccx/ccx_encoders_webvtt.c @@ -335,59 +335,73 @@ int write_cc_buffer_as_webvtt(struct eia608_screen *data, struct encoder_ctx *co dbg_print(CCX_DMT_DECODER_608, "%s\n", context->subline); } - int *color_events = (int *)malloc(sizeof(int) * length); - int *font_events = (int *)malloc(sizeof(int) * length); - memset(color_events, 0, sizeof(int) * length); - memset(font_events, 0, sizeof(int) * length); + int *color_events; + int *font_events; + if (ccx_options.use_webvtt_styling) + { + color_events = (int *)malloc(sizeof(int) * length); + font_events = (int *)malloc(sizeof(int) * length); + memset(color_events, 0, sizeof(int) * length); + memset(font_events, 0, sizeof(int) * length); - get_color_events(color_events, i, data); - get_font_events(font_events, i, data); + get_color_events(color_events, i, data); + get_font_events(font_events, i, data); + } // Write symbol by symbol with events for (int j = 0; j < length; j++) { - // opening events for fonts - int open_font = font_events[j] & 0xFF; // Last 16 bytes - if (open_font != FONT_REGULAR) + if (ccx_options.use_webvtt_styling) { - if (open_font & FONT_ITALICS) - write(context->out->fh, strdup(""), 3); - if (open_font & FONT_UNDERLINED) - write(context->out->fh, strdup(""), 3); - } + // opening events for fonts + int open_font = font_events[j] & 0xFF; // Last 16 bytes + if (open_font != FONT_REGULAR) + { + if (open_font & FONT_ITALICS) + write(context->out->fh, strdup(""), 3); + if (open_font & FONT_UNDERLINED) + write(context->out->fh, strdup(""), 3); + } - // opening events for colors - int open_color = color_events[j] & 0xFF; // Last 16 bytes - if (open_color != COL_WHITE) - { - write(context->out->fh, strdup("out->fh, color_text[open_color], strlen(color_text[open_color])); - write(context->out->fh, ">", 1); + // opening events for colors + int open_color = color_events[j] & 0xFF; // Last 16 bytes + if (open_color != COL_WHITE) + { + write(context->out->fh, strdup("out->fh, color_text[open_color][0], strlen(color_text[open_color][0])); + write(context->out->fh, ">", 1); + } } // write current text symbol write(context->out->fh, &(context->subline[j]), 1); - // closing events for colors - int close_color = color_events[j] >> 16; // First 16 bytes - if (close_color != COL_WHITE) + if (ccx_options.use_webvtt_styling) { - write(context->out->fh, strdup(""), 4); - } + // closing events for colors + int close_color = color_events[j] >> 16; // First 16 bytes + if (close_color != COL_WHITE) + { + write(context->out->fh, strdup(""), 4); + } - // closing events for fonts - int close_font = font_events[j] >> 16; // First 16 bytes - if (close_font != FONT_REGULAR) - { - if (close_font & FONT_ITALICS) - write(context->out->fh, strdup(""), 4); - if (close_font & FONT_UNDERLINED) - write(context->out->fh, strdup(""), 4); + // closing events for fonts + int close_font = font_events[j] >> 16; // First 16 bytes + if (close_font != FONT_REGULAR) + { + if (close_font & FONT_ITALICS) + write(context->out->fh, strdup(""), 4); + if (close_font & FONT_UNDERLINED) + write(context->out->fh, strdup(""), 4); + } } } - free(color_events); - free(font_events); + if (ccx_options.use_webvtt_styling) + { + free(color_events); + free(font_events); + } written = write(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);