mirror of
https://github.com/CCExtractor/ccextractor.git
synced 2024-12-25 20:32:10 +00:00
Break incoming subs into sentences (through a buffer), and remove duplicates
This commit is contained in:
parent
d453d9327e
commit
66393a80f2
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,3 +1,9 @@
|
|||||||
|
####
|
||||||
|
# Ignore tests tmp files and results
|
||||||
|
tests/runtest
|
||||||
|
tests/**/*.gcda
|
||||||
|
tests/**/*.gcno
|
||||||
|
|
||||||
####
|
####
|
||||||
# Ignore CVS related files
|
# Ignore CVS related files
|
||||||
|
|
||||||
|
@ -957,14 +957,10 @@ struct encoder_ctx *init_encoder(struct encoder_cfg *opt)
|
|||||||
ctx->force_flush = opt->force_flush;
|
ctx->force_flush = opt->force_flush;
|
||||||
ctx->ucla = opt->ucla;
|
ctx->ucla = opt->ucla;
|
||||||
ctx->splitbysentence = opt->splitbysentence;
|
ctx->splitbysentence = opt->splitbysentence;
|
||||||
ctx->sbs_newblock_start_time = -1;
|
ctx->sbs_time_from = -1;
|
||||||
ctx->sbs_newblock_end_time = -1;
|
ctx->sbs_time_trim = -1;
|
||||||
ctx->sbs_newblock = NULL;
|
ctx->sbs_capacity = 0;
|
||||||
ctx->sbs_newblock_capacity = 0;
|
|
||||||
ctx->sbs_newblock_size = 0;
|
|
||||||
ctx->sbs_buffer = NULL;
|
ctx->sbs_buffer = NULL;
|
||||||
ctx->sbs_buffer_capacity = 0;
|
|
||||||
ctx->sbs_buffer_size = 0;
|
|
||||||
|
|
||||||
ctx->subline = (unsigned char *) malloc (SUBLINESIZE);
|
ctx->subline = (unsigned char *) malloc (SUBLINESIZE);
|
||||||
if(!ctx->subline)
|
if(!ctx->subline)
|
||||||
@ -1045,203 +1041,204 @@ int encode_sub(struct encoder_ctx *context, struct cc_subtitle *sub)
|
|||||||
// Write to a buffer that is later s+plit to generate split
|
// Write to a buffer that is later s+plit to generate split
|
||||||
// in sentences
|
// in sentences
|
||||||
if (sub->type == CC_BITMAP)
|
if (sub->type == CC_BITMAP)
|
||||||
wrote_something = write_cc_bitmap_to_sentence_buffer(sub, context);
|
sub = reformat_cc_bitmap_through_sentence_buffer(sub, context);
|
||||||
|
|
||||||
|
if (NULL==sub)
|
||||||
|
return wrote_something;
|
||||||
}
|
}
|
||||||
else
|
// Write subtitles as they come
|
||||||
|
if (sub->type == CC_608)
|
||||||
{
|
{
|
||||||
// Write subtitles as they come
|
struct eia608_screen *data = NULL;
|
||||||
if (sub->type == CC_608)
|
struct ccx_s_write *out;
|
||||||
|
for (data = sub->data; sub->nb_data; sub->nb_data--, data++)
|
||||||
{
|
{
|
||||||
struct eia608_screen *data = NULL;
|
// Determine context based on channel. This replaces the code that was above, as this was incomplete (for cases where -12 was used for example)
|
||||||
struct ccx_s_write *out;
|
out = get_output_ctx(context, data->my_field);
|
||||||
for (data = sub->data; sub->nb_data; sub->nb_data--, data++)
|
|
||||||
|
if (data->format == SFORMAT_XDS)
|
||||||
{
|
{
|
||||||
// Determine context based on channel. This replaces the code that was above, as this was incomplete (for cases where -12 was used for example)
|
|
||||||
out = get_output_ctx(context, data->my_field);
|
|
||||||
|
|
||||||
if (data->format == SFORMAT_XDS)
|
|
||||||
{
|
|
||||||
data->end_time = data->end_time + context->subs_delay;
|
|
||||||
xds_write_transcript_line_prefix(context, out, data->start_time, data->end_time, data->cur_xds_packet_class);
|
|
||||||
if (data->xds_len > 0)
|
|
||||||
{
|
|
||||||
ret = write(out->fh, data->xds_str, data->xds_len);
|
|
||||||
if (ret < data->xds_len)
|
|
||||||
{
|
|
||||||
mprint("WARNING:Loss of data\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
freep(&data->xds_str);
|
|
||||||
write_newline(context, 0);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
data->end_time = data->end_time + context->subs_delay;
|
data->end_time = data->end_time + context->subs_delay;
|
||||||
switch (context->write_format)
|
xds_write_transcript_line_prefix(context, out, data->start_time, data->end_time, data->cur_xds_packet_class);
|
||||||
|
if (data->xds_len > 0)
|
||||||
{
|
{
|
||||||
case CCX_OF_SRT:
|
ret = write(out->fh, data->xds_str, data->xds_len);
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
if (ret < data->xds_len)
|
||||||
try_to_add_start_credits(context, data->start_time);
|
{
|
||||||
wrote_something = write_cc_buffer_as_srt(data, context);
|
mprint("WARNING:Loss of data\n");
|
||||||
break;
|
}
|
||||||
case CCX_OF_SSA:
|
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
|
||||||
try_to_add_start_credits(context, data->start_time);
|
|
||||||
wrote_something = write_cc_buffer_as_ssa(data, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_G608:
|
|
||||||
wrote_something = write_cc_buffer_as_g608(data, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_WEBVTT:
|
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
|
||||||
try_to_add_start_credits(context, data->start_time);
|
|
||||||
wrote_something = write_cc_buffer_as_webvtt(data, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_SAMI:
|
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
|
||||||
try_to_add_start_credits(context, data->start_time);
|
|
||||||
wrote_something = write_cc_buffer_as_sami(data, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_SMPTETT:
|
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
|
||||||
try_to_add_start_credits(context, data->start_time);
|
|
||||||
wrote_something = write_cc_buffer_as_smptett(data, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_TRANSCRIPT:
|
|
||||||
wrote_something = write_cc_buffer_as_transcript2(data, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_SPUPNG:
|
|
||||||
wrote_something = write_cc_buffer_as_spupng(data, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_SIMPLE_XML:
|
|
||||||
if (ccx_options.keep_output_closed && context->out->temporarily_closed)
|
|
||||||
{
|
|
||||||
temporarily_open_output(context->out);
|
|
||||||
write_subtitle_file_header(context, context->out);
|
|
||||||
}
|
|
||||||
wrote_something = write_cc_buffer_as_simplexml(data, context);
|
|
||||||
if (ccx_options.keep_output_closed)
|
|
||||||
{
|
|
||||||
write_subtitle_file_footer(context, context->out);
|
|
||||||
temporarily_close_output(context->out);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
if (wrote_something)
|
freep(&data->xds_str);
|
||||||
context->last_displayed_subs_ms = data->end_time;
|
write_newline(context, 0);
|
||||||
|
continue;
|
||||||
if (context->gui_mode_reports)
|
|
||||||
write_cc_buffer_to_gui(sub->data, context);
|
|
||||||
}
|
|
||||||
freep(&sub->data);
|
|
||||||
}
|
|
||||||
if (sub->type == CC_BITMAP)
|
|
||||||
{
|
|
||||||
switch (context->write_format)
|
|
||||||
{
|
|
||||||
case CCX_OF_SRT:
|
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
|
||||||
try_to_add_start_credits(context, sub->start_time);
|
|
||||||
wrote_something = write_cc_bitmap_as_srt(sub, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_SSA:
|
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
|
||||||
try_to_add_start_credits(context, sub->start_time);
|
|
||||||
wrote_something = write_cc_bitmap_as_ssa(sub, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_WEBVTT:
|
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
|
||||||
try_to_add_start_credits(context, sub->start_time);
|
|
||||||
wrote_something = write_cc_bitmap_as_webvtt(sub, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_SAMI:
|
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
|
||||||
try_to_add_start_credits(context, sub->start_time);
|
|
||||||
wrote_something = write_cc_bitmap_as_sami(sub, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_SMPTETT:
|
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
|
||||||
try_to_add_start_credits(context, sub->start_time);
|
|
||||||
wrote_something = write_cc_bitmap_as_smptett(sub, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_TRANSCRIPT:
|
|
||||||
wrote_something = write_cc_bitmap_as_transcript(sub, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_SPUPNG:
|
|
||||||
wrote_something = write_cc_bitmap_as_spupng(sub, context);
|
|
||||||
break;
|
|
||||||
case CCX_OF_SIMPLE_XML:
|
|
||||||
wrote_something = write_cc_bitmap_as_simplexml(sub, context);
|
|
||||||
break;
|
|
||||||
#ifdef WITH_LIBCURL
|
|
||||||
case CCX_OF_CURL:
|
|
||||||
wrote_something = write_cc_bitmap_as_libcurl(sub, context);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
data->end_time = data->end_time + context->subs_delay;
|
||||||
if (sub->type == CC_RAW)
|
|
||||||
{
|
|
||||||
if (context->send_to_srv)
|
|
||||||
net_send_header(sub->data, sub->nb_data);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ret = write(context->out->fh, sub->data, sub->nb_data);
|
|
||||||
if (ret < sub->nb_data) {
|
|
||||||
mprint("WARNING: Loss of data\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sub->nb_data = 0;
|
|
||||||
}
|
|
||||||
if (sub->type == CC_TEXT)
|
|
||||||
{
|
|
||||||
switch (context->write_format)
|
switch (context->write_format)
|
||||||
{
|
{
|
||||||
case CCX_OF_SRT:
|
case CCX_OF_SRT:
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
try_to_add_start_credits(context, sub->start_time);
|
try_to_add_start_credits(context, data->start_time);
|
||||||
wrote_something = write_cc_subtitle_as_srt(sub, context);
|
wrote_something = write_cc_buffer_as_srt(data, context);
|
||||||
break;
|
break;
|
||||||
case CCX_OF_SSA:
|
case CCX_OF_SSA:
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
try_to_add_start_credits(context, sub->start_time);
|
try_to_add_start_credits(context, data->start_time);
|
||||||
wrote_something = write_cc_subtitle_as_ssa(sub, context);
|
wrote_something = write_cc_buffer_as_ssa(data, context);
|
||||||
break;
|
break;
|
||||||
case CCX_OF_WEBVTT:
|
case CCX_OF_G608:
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
wrote_something = write_cc_buffer_as_g608(data, context);
|
||||||
try_to_add_start_credits(context, sub->start_time);
|
break;
|
||||||
wrote_something = write_cc_subtitle_as_webvtt(sub, context);
|
case CCX_OF_WEBVTT:
|
||||||
break;
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
case CCX_OF_SAMI:
|
try_to_add_start_credits(context, data->start_time);
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
wrote_something = write_cc_buffer_as_webvtt(data, context);
|
||||||
try_to_add_start_credits(context, sub->start_time);
|
break;
|
||||||
wrote_something = write_cc_subtitle_as_sami(sub, context);
|
case CCX_OF_SAMI:
|
||||||
break;
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
case CCX_OF_SMPTETT:
|
try_to_add_start_credits(context, data->start_time);
|
||||||
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
wrote_something = write_cc_buffer_as_sami(data, context);
|
||||||
try_to_add_start_credits(context, sub->start_time);
|
break;
|
||||||
wrote_something = write_cc_subtitle_as_smptett(sub, context);
|
case CCX_OF_SMPTETT:
|
||||||
break;
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
case CCX_OF_TRANSCRIPT:
|
try_to_add_start_credits(context, data->start_time);
|
||||||
wrote_something = write_cc_subtitle_as_transcript(sub, context);
|
wrote_something = write_cc_buffer_as_smptett(data, context);
|
||||||
break;
|
break;
|
||||||
case CCX_OF_SPUPNG:
|
case CCX_OF_TRANSCRIPT:
|
||||||
wrote_something = write_cc_subtitle_as_spupng(sub, context);
|
wrote_something = write_cc_buffer_as_transcript2(data, context);
|
||||||
break;
|
break;
|
||||||
case CCX_OF_SIMPLE_XML:
|
case CCX_OF_SPUPNG:
|
||||||
wrote_something = write_cc_subtitle_as_simplexml(sub, context);
|
wrote_something = write_cc_buffer_as_spupng(data, context);
|
||||||
break;
|
break;
|
||||||
default:
|
case CCX_OF_SIMPLE_XML:
|
||||||
break;
|
if (ccx_options.keep_output_closed && context->out->temporarily_closed)
|
||||||
|
{
|
||||||
|
temporarily_open_output(context->out);
|
||||||
|
write_subtitle_file_header(context, context->out);
|
||||||
|
}
|
||||||
|
wrote_something = write_cc_buffer_as_simplexml(data, context);
|
||||||
|
if (ccx_options.keep_output_closed)
|
||||||
|
{
|
||||||
|
write_subtitle_file_footer(context, context->out);
|
||||||
|
temporarily_close_output(context->out);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
sub->nb_data = 0;
|
if (wrote_something)
|
||||||
|
context->last_displayed_subs_ms = data->end_time;
|
||||||
|
|
||||||
|
if (context->gui_mode_reports)
|
||||||
|
write_cc_buffer_to_gui(sub->data, context);
|
||||||
}
|
}
|
||||||
|
freep(&sub->data);
|
||||||
}
|
}
|
||||||
|
if (sub->type == CC_BITMAP)
|
||||||
|
{
|
||||||
|
switch (context->write_format)
|
||||||
|
{
|
||||||
|
case CCX_OF_SRT:
|
||||||
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
|
try_to_add_start_credits(context, sub->start_time);
|
||||||
|
wrote_something = write_cc_bitmap_as_srt(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_SSA:
|
||||||
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
|
try_to_add_start_credits(context, sub->start_time);
|
||||||
|
wrote_something = write_cc_bitmap_as_ssa(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_WEBVTT:
|
||||||
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
|
try_to_add_start_credits(context, sub->start_time);
|
||||||
|
wrote_something = write_cc_bitmap_as_webvtt(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_SAMI:
|
||||||
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
|
try_to_add_start_credits(context, sub->start_time);
|
||||||
|
wrote_something = write_cc_bitmap_as_sami(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_SMPTETT:
|
||||||
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
|
try_to_add_start_credits(context, sub->start_time);
|
||||||
|
wrote_something = write_cc_bitmap_as_smptett(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_TRANSCRIPT:
|
||||||
|
wrote_something = write_cc_bitmap_as_transcript(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_SPUPNG:
|
||||||
|
wrote_something = write_cc_bitmap_as_spupng(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_SIMPLE_XML:
|
||||||
|
wrote_something = write_cc_bitmap_as_simplexml(sub, context);
|
||||||
|
break;
|
||||||
|
#ifdef WITH_LIBCURL
|
||||||
|
case CCX_OF_CURL:
|
||||||
|
wrote_something = write_cc_bitmap_as_libcurl(sub, context);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if (sub->type == CC_RAW)
|
||||||
|
{
|
||||||
|
if (context->send_to_srv)
|
||||||
|
net_send_header(sub->data, sub->nb_data);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ret = write(context->out->fh, sub->data, sub->nb_data);
|
||||||
|
if (ret < sub->nb_data) {
|
||||||
|
mprint("WARNING: Loss of data\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sub->nb_data = 0;
|
||||||
|
}
|
||||||
|
if (sub->type == CC_TEXT)
|
||||||
|
{
|
||||||
|
switch (context->write_format)
|
||||||
|
{
|
||||||
|
case CCX_OF_SRT:
|
||||||
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
|
try_to_add_start_credits(context, sub->start_time);
|
||||||
|
wrote_something = write_cc_subtitle_as_srt(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_SSA:
|
||||||
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
|
try_to_add_start_credits(context, sub->start_time);
|
||||||
|
wrote_something = write_cc_subtitle_as_ssa(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_WEBVTT:
|
||||||
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
|
try_to_add_start_credits(context, sub->start_time);
|
||||||
|
wrote_something = write_cc_subtitle_as_webvtt(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_SAMI:
|
||||||
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
|
try_to_add_start_credits(context, sub->start_time);
|
||||||
|
wrote_something = write_cc_subtitle_as_sami(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_SMPTETT:
|
||||||
|
if (!context->startcredits_displayed && context->start_credits_text != NULL)
|
||||||
|
try_to_add_start_credits(context, sub->start_time);
|
||||||
|
wrote_something = write_cc_subtitle_as_smptett(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_TRANSCRIPT:
|
||||||
|
wrote_something = write_cc_subtitle_as_transcript(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_SPUPNG:
|
||||||
|
wrote_something = write_cc_subtitle_as_spupng(sub, context);
|
||||||
|
break;
|
||||||
|
case CCX_OF_SIMPLE_XML:
|
||||||
|
wrote_something = write_cc_subtitle_as_simplexml(sub, context);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sub->nb_data = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (!sub->nb_data)
|
if (!sub->nb_data)
|
||||||
freep(&sub->data);
|
freep(&sub->data);
|
||||||
if (wrote_something && context->force_flush)
|
if (wrote_something && context->force_flush)
|
||||||
|
@ -118,15 +118,14 @@ struct encoder_ctx
|
|||||||
|
|
||||||
/* split-by-sentence stuff */
|
/* split-by-sentence stuff */
|
||||||
int splitbysentence;
|
int splitbysentence;
|
||||||
LLONG sbs_newblock_start_time; // Used by the split-by-sentence code to know when the current block starts...
|
|
||||||
LLONG sbs_newblock_end_time; // ... and ends
|
|
||||||
ccx_sbs_utf8_character *sbs_newblock;
|
|
||||||
int sbs_newblock_capacity;
|
|
||||||
int sbs_newblock_size;
|
|
||||||
ccx_sbs_utf8_character *sbs_buffer;
|
|
||||||
int sbs_buffer_capacity;
|
|
||||||
int sbs_buffer_size;
|
|
||||||
|
|
||||||
|
unsigned char * sbs_buffer; /// Storage for sentence-split buffer
|
||||||
|
size_t sbs_handled_len; /// The length of the string in the SBS-buffer, already handled, but preserved for DUP-detection.
|
||||||
|
|
||||||
|
//ccx_sbs_utf8_character *sbs_newblock;
|
||||||
|
LLONG sbs_time_from; // Used by the split-by-sentence code to know when the current block starts...
|
||||||
|
LLONG sbs_time_trim; // ... and ends
|
||||||
|
size_t sbs_capacity;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define INITIAL_ENC_BUFFER_CAPACITY 2048
|
#define INITIAL_ENC_BUFFER_CAPACITY 2048
|
||||||
@ -196,10 +195,9 @@ int write_cc_bitmap_as_sami (struct cc_subtitle *sub, struct encoder_
|
|||||||
int write_cc_bitmap_as_smptett (struct cc_subtitle *sub, struct encoder_ctx *context);
|
int write_cc_bitmap_as_smptett (struct cc_subtitle *sub, struct encoder_ctx *context);
|
||||||
int write_cc_bitmap_as_spupng (struct cc_subtitle *sub, struct encoder_ctx *context);
|
int write_cc_bitmap_as_spupng (struct cc_subtitle *sub, struct encoder_ctx *context);
|
||||||
int write_cc_bitmap_as_transcript (struct cc_subtitle *sub, struct encoder_ctx *context);
|
int write_cc_bitmap_as_transcript (struct cc_subtitle *sub, struct encoder_ctx *context);
|
||||||
int write_cc_bitmap_to_sentence_buffer (struct cc_subtitle *sub, struct encoder_ctx *context);
|
|
||||||
int write_cc_bitmap_as_libcurl (struct cc_subtitle *sub, struct encoder_ctx *context);
|
int write_cc_bitmap_as_libcurl (struct cc_subtitle *sub, struct encoder_ctx *context);
|
||||||
|
|
||||||
|
struct cc_subtitle * reformat_cc_bitmap_through_sentence_buffer (struct cc_subtitle *sub, struct encoder_ctx *context);
|
||||||
|
|
||||||
void set_encoder_last_displayed_subs_ms(struct encoder_ctx *ctx, LLONG last_displayed_subs_ms);
|
void set_encoder_last_displayed_subs_ms(struct encoder_ctx *ctx, LLONG last_displayed_subs_ms);
|
||||||
void set_encoder_subs_delay(struct encoder_ctx *ctx, LLONG subs_delay);
|
void set_encoder_subs_delay(struct encoder_ctx *ctx, LLONG subs_delay);
|
||||||
|
@ -1,135 +1,457 @@
|
|||||||
#include "ccx_decoders_common.h"
|
#include "ccx_common_platform.h"
|
||||||
#include "ccx_encoders_common.h"
|
#include "ccx_encoders_common.h"
|
||||||
#include "spupng_encoder.h"
|
#include "lib_ccx.h"
|
||||||
#include "ccx_encoders_spupng.h"
|
|
||||||
#include "utility.h"
|
|
||||||
#include "ocr.h"
|
#include "ocr.h"
|
||||||
#include "ccx_decoders_608.h"
|
#include "debug_def.h"
|
||||||
#include "ccx_decoders_708.h"
|
|
||||||
#include "ccx_decoders_708_output.h"
|
|
||||||
#include "ccx_encoders_xds.h"
|
|
||||||
#include "ccx_encoders_helpers.h"
|
|
||||||
#include "utf8proc.h"
|
|
||||||
|
|
||||||
#ifdef ENABLE_SHARING
|
#ifdef ENABLE_SHARING
|
||||||
#include "ccx_share.h"
|
#include "ccx_share.h"
|
||||||
#endif //ENABLE_SHARING
|
#endif //ENABLE_SHARING
|
||||||
|
|
||||||
void lbl_start_block(LLONG start_time, struct encoder_ctx *context)
|
int sbs_is_pointer_on_sentence_breaker(char * start, char * current)
|
||||||
{
|
{
|
||||||
context->sbs_newblock_start_time = start_time;
|
char c = *current;
|
||||||
}
|
char n = *(current + 1);
|
||||||
|
char p = *(current - 1);
|
||||||
|
|
||||||
void lbl_add_character(struct encoder_ctx *context, ccx_sbs_utf8_character ch)
|
if (0 == c) n = 0;
|
||||||
{
|
if (current == start) p = 0;
|
||||||
if (context->sbs_newblock_capacity == context->sbs_newblock_size)
|
|
||||||
|
if (0 == c) return 1;
|
||||||
|
|
||||||
|
if ('.' == c
|
||||||
|
|| '!' == c
|
||||||
|
|| '?' == c
|
||||||
|
)
|
||||||
{
|
{
|
||||||
int newcapacity = (context->sbs_newblock_capacity < 512) ? 1024 : context->sbs_newblock_capacity * 2;
|
if ('.' == n
|
||||||
context->sbs_newblock = (ccx_sbs_utf8_character *)realloc(context->sbs_newblock, newcapacity*sizeof(ccx_sbs_utf8_character));
|
|| '!' == n
|
||||||
if (!context->sbs_newblock)
|
|| '?' == n
|
||||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "Not enough memory in lbl_add_character");
|
)
|
||||||
context->sbs_newblock_capacity = newcapacity;
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
memcpy(&context->sbs_newblock[context->sbs_newblock_size++], &ch, sizeof ch);
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void lbl_end_block(LLONG end_time, struct encoder_ctx *context)
|
int sbs_fuzzy_strncmp(const char * a, const char * b, size_t n, const size_t maxerr)
|
||||||
{
|
{
|
||||||
context->sbs_newblock_end_time = end_time;
|
// TODO: implement fuzzy comparing
|
||||||
|
// Error counter DOES NOT WORK!!!
|
||||||
|
|
||||||
|
int i;
|
||||||
|
//int err;
|
||||||
|
char A, B;
|
||||||
|
|
||||||
|
i = -1;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
i++;
|
||||||
|
|
||||||
|
// Bound check (compare to N)
|
||||||
|
if (i == n) return 0;
|
||||||
|
|
||||||
|
A = a[i];
|
||||||
|
B = b[i];
|
||||||
|
|
||||||
|
// bound check (line endings)
|
||||||
|
if (A == 0)
|
||||||
|
{
|
||||||
|
if (B == 0) return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (B == 0) return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (A == B) continue;
|
||||||
|
if (isspace(A) && isspace(B)) continue;
|
||||||
|
|
||||||
|
if (A > B) return 1;
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
} while(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
int write_cc_bitmap_to_sentence_buffer(struct cc_subtitle *sub, struct encoder_ctx *context)
|
void sbs_strcpy_without_dup(const unsigned char * str, struct encoder_ctx * context)
|
||||||
|
{
|
||||||
|
int intersect_len;
|
||||||
|
unsigned char * suffix;
|
||||||
|
const unsigned char * prefix = str;
|
||||||
|
|
||||||
|
unsigned long sbs_len;
|
||||||
|
unsigned long str_len;
|
||||||
|
|
||||||
|
str_len = strlen(str);
|
||||||
|
sbs_len = strlen(context->sbs_buffer);
|
||||||
|
|
||||||
|
intersect_len = str_len;
|
||||||
|
if (sbs_len < intersect_len)
|
||||||
|
intersect_len = sbs_len;
|
||||||
|
|
||||||
|
while (intersect_len>0)
|
||||||
|
{
|
||||||
|
suffix = context->sbs_buffer + sbs_len - intersect_len;
|
||||||
|
if (0 == sbs_fuzzy_strncmp(prefix, suffix, intersect_len, 1))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
intersect_len--;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_DEBUG("Sentence Buffer: sbs_strcpy_without_dup, intersection len [%4d]\n", intersect_len);
|
||||||
|
|
||||||
|
// check, that new string does not contain data, from
|
||||||
|
// already handled sentence:
|
||||||
|
LOG_DEBUG("Sentence Buffer: sbs_strcpy_without_dup, sbslen [%4d] handled len [%4d]\n", sbs_len, context->sbs_handled_len);
|
||||||
|
if ( (sbs_len - intersect_len) >= context->sbs_handled_len)
|
||||||
|
{
|
||||||
|
// there is no intersection.
|
||||||
|
// It is time to clean the buffer. Excepting the last uncomplete sentence
|
||||||
|
strcpy(context->sbs_buffer, context->sbs_buffer + context->sbs_handled_len);
|
||||||
|
context->sbs_handled_len = 0;
|
||||||
|
sbs_len = strlen(context->sbs_buffer);
|
||||||
|
|
||||||
|
LOG_DEBUG("Sentence Buffer: Clean buffer, after BUF [%s]\n\n\n", context->sbs_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (intersect_len > 0)
|
||||||
|
{
|
||||||
|
// there is a common part (suffix of old sentence equals to prefix of new str)
|
||||||
|
//
|
||||||
|
// remove dup from buffer
|
||||||
|
// we will use an appropriate part from the new string
|
||||||
|
context->sbs_buffer[sbs_len-intersect_len] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
sbs_len = strlen(context->sbs_buffer);
|
||||||
|
|
||||||
|
// whitespace control. Add space between subs
|
||||||
|
if (
|
||||||
|
!isspace(str[0]) // not a space char in the beginning of new str
|
||||||
|
&& context->sbs_handled_len >0 // buffer is not empty (there is uncomplete sentence)
|
||||||
|
&& !isspace(context->sbs_buffer[sbs_len-1]) // not a space char at the end of existing buf
|
||||||
|
)
|
||||||
|
{
|
||||||
|
//strcat(context->sbs_buffer, " ");
|
||||||
|
}
|
||||||
|
|
||||||
|
strcat(context->sbs_buffer, str);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sbs_str_autofix(unsigned char * str)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
// replace all whitespaces with spaces:
|
||||||
|
for (i = 0; str[i] != 0; i++)
|
||||||
|
{
|
||||||
|
if (isspace(str[i]))
|
||||||
|
{
|
||||||
|
str[i] = ' ';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
str[i] == '|'
|
||||||
|
&& (i==0 || isspace(str[i-1]))
|
||||||
|
&& (str[i+1] == 0 || isspace(str[i+1]) || str[i+1]=='\'')
|
||||||
|
)
|
||||||
|
{
|
||||||
|
// try to convert to "I"
|
||||||
|
str[i] = 'I';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Appends the function to the sentence buffer, and returns a list of full sentences (if there are any), or NULL
|
||||||
|
*
|
||||||
|
* @param str Partial (or full) sub to append.
|
||||||
|
* @param time_from Starting timestamp
|
||||||
|
* @param time_trim Ending timestamp
|
||||||
|
* @param context Encoder context
|
||||||
|
* @return New <struct cc_subtitle *> subtitle, or NULL, if <str> doesn't contain the ending part of the sentence. If there are more than one sentence, the remaining sentences will be chained using <result->next> reference.
|
||||||
|
*/
|
||||||
|
struct cc_subtitle * sbs_append_string(unsigned char * str, const LLONG time_from, const LLONG time_trim, struct encoder_ctx * context)
|
||||||
|
{
|
||||||
|
struct cc_subtitle * resub;
|
||||||
|
struct cc_subtitle * tmpsub;
|
||||||
|
|
||||||
|
unsigned char * bp_current;
|
||||||
|
unsigned char * bp_last_break;
|
||||||
|
unsigned char * sbs_undone_start;
|
||||||
|
|
||||||
|
int is_buf_initialized;
|
||||||
|
int required_capacity;
|
||||||
|
int new_capacity;
|
||||||
|
|
||||||
|
LLONG alphanum_total;
|
||||||
|
LLONG alphanum_cur;
|
||||||
|
|
||||||
|
LLONG anychar_total;
|
||||||
|
LLONG anychar_cur;
|
||||||
|
|
||||||
|
LLONG duration;
|
||||||
|
LLONG available_time;
|
||||||
|
int use_alphanum_counters;
|
||||||
|
|
||||||
|
if (! str)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
sbs_str_autofix(str);
|
||||||
|
|
||||||
|
is_buf_initialized = (NULL == context->sbs_buffer || context->sbs_capacity == 0)
|
||||||
|
? 0
|
||||||
|
: 1;
|
||||||
|
|
||||||
|
// ===============================
|
||||||
|
// grow sentence buffer
|
||||||
|
// ===============================
|
||||||
|
required_capacity =
|
||||||
|
(is_buf_initialized ? strlen(context->sbs_buffer) : 0) // existing data in buf
|
||||||
|
+ strlen(str) // length of new string
|
||||||
|
+ 1 // trailing \0
|
||||||
|
+ 1 // space control (will add one space , if required)
|
||||||
|
;
|
||||||
|
|
||||||
|
if (required_capacity >= context->sbs_capacity)
|
||||||
|
{
|
||||||
|
new_capacity = context->sbs_capacity;
|
||||||
|
if (! is_buf_initialized) new_capacity = 16;
|
||||||
|
|
||||||
|
while (new_capacity < required_capacity)
|
||||||
|
{
|
||||||
|
// increase NEW_capacity, and check, that increment
|
||||||
|
// is less than 8 Mb. Because 8Mb - it is a lot
|
||||||
|
// for a TEXT buffer. It is weird...
|
||||||
|
new_capacity += (new_capacity > 1048576 * 8)
|
||||||
|
? 1048576 * 8
|
||||||
|
: new_capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
context->sbs_buffer = (unsigned char *)realloc(
|
||||||
|
context->sbs_buffer,
|
||||||
|
new_capacity * sizeof(/*unsigned char*/ context->sbs_buffer[0] )
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!context->sbs_buffer)
|
||||||
|
fatal(EXIT_NOT_ENOUGH_MEMORY, "Not enough memory in sbs_append_string");
|
||||||
|
|
||||||
|
context->sbs_capacity = new_capacity;
|
||||||
|
|
||||||
|
// if buffer wasn't initialized, we will se trash in buffer.
|
||||||
|
// but we need just empty string, so here we will get it:
|
||||||
|
if (! is_buf_initialized)
|
||||||
|
{
|
||||||
|
// INIT SBS
|
||||||
|
context->sbs_buffer[0] = 0;
|
||||||
|
context->sbs_handled_len = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===============================
|
||||||
|
// append to buffer
|
||||||
|
//
|
||||||
|
// will update sbs_buffer, sbs_handled_len
|
||||||
|
// ===============================
|
||||||
|
sbs_strcpy_without_dup(str, context);
|
||||||
|
|
||||||
|
// ===============================
|
||||||
|
// break to sentences
|
||||||
|
// ===============================
|
||||||
|
resub = NULL;
|
||||||
|
tmpsub = NULL;
|
||||||
|
|
||||||
|
alphanum_total = 0;
|
||||||
|
alphanum_cur = 0;
|
||||||
|
|
||||||
|
anychar_total = 0;
|
||||||
|
anychar_cur = 0;
|
||||||
|
|
||||||
|
sbs_undone_start = context->sbs_buffer + context->sbs_handled_len;
|
||||||
|
bp_last_break = sbs_undone_start;
|
||||||
|
|
||||||
|
LOG_DEBUG("Sentence Buffer: BEFORE sentence break. Last break: [%s] sbs_undone_start: [%d], sbs_undone: [%s]\n",
|
||||||
|
bp_last_break, context->sbs_handled_len, sbs_undone_start
|
||||||
|
);
|
||||||
|
|
||||||
|
for (bp_current = sbs_undone_start; bp_current && *bp_current; bp_current++)
|
||||||
|
{
|
||||||
|
if (
|
||||||
|
0 < anychar_cur // skip empty!
|
||||||
|
&& sbs_is_pointer_on_sentence_breaker(bp_last_break, bp_current) )
|
||||||
|
{
|
||||||
|
// it is new sentence!
|
||||||
|
tmpsub = malloc(sizeof(struct cc_subtitle));
|
||||||
|
|
||||||
|
tmpsub->type = CC_TEXT;
|
||||||
|
// length of new string:
|
||||||
|
tmpsub->nb_data =
|
||||||
|
bp_current - bp_last_break
|
||||||
|
+ 1 // terminating '\0'
|
||||||
|
+ 1 // skip '.'
|
||||||
|
;
|
||||||
|
tmpsub->data = strndup(bp_last_break, tmpsub->nb_data - 1);
|
||||||
|
tmpsub->got_output = 1;
|
||||||
|
|
||||||
|
tmpsub->start_time = alphanum_cur;
|
||||||
|
alphanum_cur = 0;
|
||||||
|
tmpsub->end_time = anychar_cur;
|
||||||
|
anychar_cur = 0;
|
||||||
|
|
||||||
|
bp_last_break = bp_current + 1;
|
||||||
|
|
||||||
|
// tune last break:
|
||||||
|
while (
|
||||||
|
*bp_last_break
|
||||||
|
&& isspace(*bp_last_break)
|
||||||
|
)
|
||||||
|
{
|
||||||
|
bp_last_break++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ???
|
||||||
|
// tmpsub->info = NULL;
|
||||||
|
// tmpsub->mode = NULL;
|
||||||
|
|
||||||
|
// link with prev sub:
|
||||||
|
tmpsub->next = NULL;
|
||||||
|
tmpsub->prev = resub;
|
||||||
|
if (NULL != resub)
|
||||||
|
{
|
||||||
|
resub->next = tmpsub;
|
||||||
|
}
|
||||||
|
|
||||||
|
resub = tmpsub;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*bp_current && isalnum(*bp_current))
|
||||||
|
{
|
||||||
|
alphanum_total++;
|
||||||
|
alphanum_cur++;
|
||||||
|
}
|
||||||
|
anychar_total++;
|
||||||
|
anychar_cur++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===============================
|
||||||
|
// okay, we have extracted several sentences, now we should
|
||||||
|
// save the position of the "remainder" - start of the last
|
||||||
|
// incomplete sentece
|
||||||
|
// ===============================
|
||||||
|
if (bp_last_break != sbs_undone_start)
|
||||||
|
{
|
||||||
|
context->sbs_handled_len = bp_last_break - sbs_undone_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_DEBUG("Sentence Buffer: AFTER sentence break: Handled Len [%4d]\n", context->sbs_handled_len);
|
||||||
|
|
||||||
|
LOG_DEBUG("Sentence Buffer: Alphanum Total: [%4d] Overall chars: [%4d] STRING:[%20s] BUFFER:[%20s]\n", alphanum_total, anychar_total, str, context->sbs_buffer);
|
||||||
|
|
||||||
|
// ===============================
|
||||||
|
// Calculate time spans
|
||||||
|
// ===============================
|
||||||
|
if (!is_buf_initialized)
|
||||||
|
{
|
||||||
|
context->sbs_time_from = time_from;
|
||||||
|
context->sbs_time_trim = time_trim;
|
||||||
|
}
|
||||||
|
|
||||||
|
available_time = time_trim - context->sbs_time_from;
|
||||||
|
use_alphanum_counters = alphanum_total > 0 ? 1 : 0;
|
||||||
|
|
||||||
|
tmpsub = resub;
|
||||||
|
while (tmpsub)
|
||||||
|
{
|
||||||
|
alphanum_cur = tmpsub->start_time;
|
||||||
|
anychar_cur = tmpsub->end_time;
|
||||||
|
|
||||||
|
if (use_alphanum_counters)
|
||||||
|
{
|
||||||
|
duration = available_time * alphanum_cur / alphanum_total;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
duration = available_time * anychar_cur / anychar_total;
|
||||||
|
}
|
||||||
|
|
||||||
|
tmpsub->start_time = context->sbs_time_from;
|
||||||
|
tmpsub->end_time = tmpsub->start_time + duration;
|
||||||
|
|
||||||
|
context->sbs_time_from = tmpsub->end_time + 1;
|
||||||
|
|
||||||
|
tmpsub = tmpsub->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return resub;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct cc_subtitle * reformat_cc_bitmap_through_sentence_buffer(struct cc_subtitle *sub, struct encoder_ctx *context)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
|
||||||
#ifdef ENABLE_OCR
|
|
||||||
struct cc_bitmap* rect;
|
struct cc_bitmap* rect;
|
||||||
|
|
||||||
LLONG ms_start, ms_end;
|
LLONG ms_start, ms_end;
|
||||||
|
int used;
|
||||||
|
int i = 0;
|
||||||
|
char *str;
|
||||||
|
|
||||||
if (context->prev_start != -1 && (sub->flags & SUB_EOD_MARKER))
|
// this is a sub with a full sentence (or chain of such subs)
|
||||||
|
struct cc_subtitle * resub = NULL;
|
||||||
|
|
||||||
|
#ifdef ENABLE_OCR
|
||||||
|
|
||||||
|
if (sub->flags & SUB_EOD_MARKER)
|
||||||
{
|
{
|
||||||
ms_start = context->prev_start;
|
// the last sub from input
|
||||||
ms_end = sub->start_time;
|
|
||||||
|
if (context->prev_start == -1)
|
||||||
|
{
|
||||||
|
ms_start = 1;
|
||||||
|
ms_end = sub->start_time;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ms_start = context->prev_start;
|
||||||
|
ms_end = sub->start_time;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (!(sub->flags & SUB_EOD_MARKER))
|
else
|
||||||
{
|
{
|
||||||
|
// not the last sub from input
|
||||||
ms_start = sub->start_time;
|
ms_start = sub->start_time;
|
||||||
ms_end = sub->end_time;
|
ms_end = sub->end_time;
|
||||||
}
|
}
|
||||||
else if (context->prev_start == -1 && (sub->flags & SUB_EOD_MARKER))
|
|
||||||
{
|
|
||||||
ms_start = 1;
|
|
||||||
ms_end = sub->start_time;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sub->nb_data == 0)
|
if (sub->nb_data == 0)
|
||||||
return ret;
|
return 0;
|
||||||
rect = sub->data;
|
|
||||||
|
|
||||||
if (sub->flags & SUB_EOD_MARKER)
|
if (sub->flags & SUB_EOD_MARKER)
|
||||||
context->prev_start = sub->start_time;
|
context->prev_start = sub->start_time;
|
||||||
|
|
||||||
|
str = paraof_ocrtext(sub, " ", 1);
|
||||||
if (rect[0].ocr_text && *(rect[0].ocr_text))
|
if (str)
|
||||||
{
|
{
|
||||||
lbl_start_block(ms_start, context);
|
|
||||||
if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER))
|
if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER))
|
||||||
{
|
{
|
||||||
char *token = NULL;
|
resub = sbs_append_string(str, ms_start, ms_end, context);
|
||||||
token = paraof_ocrtext(sub, " ", 1); // Get text with spaces instead of newlines
|
|
||||||
uint32_t offset=0;
|
|
||||||
utf8proc_ssize_t ls; // Last size
|
|
||||||
char *s = token;
|
|
||||||
int32_t uc;
|
|
||||||
while ((ls=utf8proc_iterate(s, -1, &uc)))
|
|
||||||
{
|
|
||||||
ccx_sbs_utf8_character sbsc;
|
|
||||||
// Note: We don't care about uc here, since we will be writing the encoded bytes, not the code points in binary.
|
|
||||||
//TODO: Deal with ls < 0
|
|
||||||
if (!uc) // End of string
|
|
||||||
break;
|
|
||||||
printf("%3ld | %08X | %c %c %c %c\n", ls, uc, ((uc & 0xFF000000) >> 24), ((uc & 0xFF0000) >> 16),
|
|
||||||
((uc & 0xFF00) >> 8), ( uc & 0xFF));
|
|
||||||
sbsc.ch = uc;
|
|
||||||
sbsc.encoded[0] = 0; sbsc.encoded[1] = 0; sbsc.encoded[2] = 0; sbsc.encoded[3] = 0;
|
|
||||||
memcpy(sbsc.encoded, s, ls);
|
|
||||||
sbsc.enc_len = ls;
|
|
||||||
sbsc.ts = 0; // We don't know yet
|
|
||||||
lbl_add_character(context, sbsc);
|
|
||||||
s += ls;
|
|
||||||
|
|
||||||
// TO-DO: Add each of these characters to the buffer, splitting the timestamps. Remember to add character length to the array
|
|
||||||
}
|
|
||||||
printf("-------\n");
|
|
||||||
|
|
||||||
/*
|
|
||||||
while (token)
|
|
||||||
{
|
|
||||||
char *newline_pos = strstr(token, context->encoded_crlf);
|
|
||||||
if (!newline_pos)
|
|
||||||
{
|
|
||||||
fdprintf(context->out->fh, "%s", token);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
while (token != newline_pos)
|
|
||||||
{
|
|
||||||
fdprintf(context->out->fh, "%c", *token);
|
|
||||||
token++;
|
|
||||||
}
|
|
||||||
token += context->encoded_crlf_length;
|
|
||||||
fdprintf(context->out->fh, "%c", ' ');
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
|
|
||||||
}
|
}
|
||||||
lbl_end_block(ms_end, context);
|
freep(&str);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0, rect = sub->data; i < sub->nb_data; i++, rect++)
|
||||||
|
{
|
||||||
|
freep(rect->data);
|
||||||
|
freep(rect->data+1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
sub->nb_data = 0;
|
sub->nb_data = 0;
|
||||||
freep(&sub->data);
|
freep(&sub->data);
|
||||||
return ret;
|
return resub;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
11
src/lib_ccx/debug_def.h
Normal file
11
src/lib_ccx/debug_def.h
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#ifndef _DEBUG_DEF_H_
|
||||||
|
#define _DEBUG_DEF_H_
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
#define LOG_DEBUG(...) printf(__VA_ARGS__)
|
||||||
|
#else
|
||||||
|
#define LOG_DEBUG ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
59
tests/Makefile
Normal file
59
tests/Makefile
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
SHELL = /bin/sh
|
||||||
|
|
||||||
|
CC=gcc
|
||||||
|
# SYS := $(shell gcc -dumpmachine)
|
||||||
|
CFLAGS=-O0 -std=gnu99 -D ENABLE_OCR -g -ggdb -rdynamic
|
||||||
|
#-Q -da -v
|
||||||
|
|
||||||
|
# enable COVERAGE
|
||||||
|
# CFLAGS+=-fprofile-arcs -ftest-coverage
|
||||||
|
|
||||||
|
# add debug flag
|
||||||
|
ifdef DEBUG
|
||||||
|
CFLAGS+=-DDEBUG
|
||||||
|
endif
|
||||||
|
|
||||||
|
#ALL_FLAGS = -Wno-write-strings -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT
|
||||||
|
LDFLAGS=-lm -g
|
||||||
|
|
||||||
|
CFLAGS+=$(shell pkg-config --cflags check)
|
||||||
|
LDFLAGS+=$(shell pkg-config --libs check)
|
||||||
|
|
||||||
|
# TODO: need to rewrite this. Need new way to load sources for testing
|
||||||
|
SRC=$(wildcard ../src/lib_ccx/ccx_encoders_splitbysentence.c)
|
||||||
|
OBJS=
|
||||||
|
|
||||||
|
SRC_SUITE=$(wildcard *_suite.c)
|
||||||
|
OBJ_SUITE=$(patsubst %_suite.c, %_suite.o, $(SRC_SUITE))
|
||||||
|
|
||||||
|
OBJS+=$(OBJ_SUITE)
|
||||||
|
|
||||||
|
all: clean test
|
||||||
|
|
||||||
|
%.o: %.c
|
||||||
|
# explicit output name : -o $@
|
||||||
|
$(CC) -c $(ALL_FLAGS) $(CFLAGS) $<
|
||||||
|
|
||||||
|
runtest: $(OBJS)
|
||||||
|
@echo "+----------------------------------------------+"
|
||||||
|
@echo "| BUILD TESTS |"
|
||||||
|
@echo "+----------------------------------------------+"
|
||||||
|
$(CC) -c $(ALL_FLAGS) $(CFLAGS) $@.c
|
||||||
|
$(CC) $(SRC) $@.o $^ $(ALL_FLAGS) $(CFLAGS) $(LDFLAGS) -o $@
|
||||||
|
|
||||||
|
.PHONY: test
|
||||||
|
test: runtest
|
||||||
|
@echo "+----------------------------------------------+"
|
||||||
|
@echo "| START TESTS |"
|
||||||
|
@echo "+----------------------------------------------+"
|
||||||
|
./runtest
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean:
|
||||||
|
rm runtest || true
|
||||||
|
rm *.o || true
|
||||||
|
# coverage info
|
||||||
|
rm *.gcda || true
|
||||||
|
rm *.gcno || true
|
||||||
|
# debug info
|
||||||
|
rm *.c.* || true
|
43
tests/README.md
Normal file
43
tests/README.md
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
# UNIT TESTING
|
||||||
|
|
||||||
|
This folder contains a archetype and several unit-tests for CCExtractor
|
||||||
|
|
||||||
|
## RUN TESTS
|
||||||
|
|
||||||
|
```shell
|
||||||
|
cd tests
|
||||||
|
make
|
||||||
|
```
|
||||||
|
|
||||||
|
This will build and run all test-suite.
|
||||||
|
|
||||||
|
If you want MORE output:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
DEBUG=1 make
|
||||||
|
```
|
||||||
|
|
||||||
|
Where `DEBUG` is just an environment variable.
|
||||||
|
|
||||||
|
## DEBUGGING
|
||||||
|
|
||||||
|
If tests failed after your changes, you could debug them (almost all flags for this are set in the `tests/Makefile`.
|
||||||
|
|
||||||
|
Run:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# build test runner
|
||||||
|
make
|
||||||
|
# load test runner to the debgger:
|
||||||
|
gdb runner
|
||||||
|
|
||||||
|
# run under debugger:
|
||||||
|
(gdb) run
|
||||||
|
|
||||||
|
# on segfault:
|
||||||
|
(gdb) where
|
||||||
|
```
|
||||||
|
|
||||||
|
## DEPENDENCIES
|
||||||
|
|
||||||
|
Tests are built around this library: [**libcheck**](https://github.com/libcheck/check), here is [**documentation**](https://libcheck.github.io/check/)
|
305
tests/ccx_encoders_splitbysentence_suite.c
Normal file
305
tests/ccx_encoders_splitbysentence_suite.c
Normal file
@ -0,0 +1,305 @@
|
|||||||
|
#include <check.h>
|
||||||
|
#include "ccx_encoders_splitbysentence_suite.h"
|
||||||
|
|
||||||
|
// -------------------------------------
|
||||||
|
// MOCKS
|
||||||
|
// -------------------------------------
|
||||||
|
typedef int64_t LLONG;
|
||||||
|
#include "../src/lib_ccx/ccx_encoders_common.h"
|
||||||
|
|
||||||
|
// -------------------------------------
|
||||||
|
// Private SBS-functions (for testing only)
|
||||||
|
// -------------------------------------
|
||||||
|
struct cc_subtitle * sbs_append_string(unsigned char * str, LLONG time_from, LLONG time_trim, struct encoder_ctx * context);
|
||||||
|
|
||||||
|
// -------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// -------------------------------------
|
||||||
|
struct cc_subtitle * helper_create_sub(char * str, LLONG time_from, LLONG time_trim)
|
||||||
|
{
|
||||||
|
struct cc_subtitle * sub = (struct cc_subtitle *)malloc(sizeof(struct cc_subtitle));
|
||||||
|
sub->type = CC_BITMAP;
|
||||||
|
sub->start_time = 1;
|
||||||
|
sub->end_time = 100;
|
||||||
|
sub->data = strdup(str);
|
||||||
|
sub->nb_data = strlen(sub->data);
|
||||||
|
|
||||||
|
return sub;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct cc_subtitle * helper_sbs_append_string(char * str, LLONG time_from, LLONG time_trim, struct encoder_ctx * context)
|
||||||
|
{
|
||||||
|
char * str1;
|
||||||
|
struct cc_subtitle * sub;
|
||||||
|
|
||||||
|
str1 = strdup(str);
|
||||||
|
sub = sbs_append_string(str1, time_from, time_trim, context);
|
||||||
|
free(str1);
|
||||||
|
return sub;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------
|
||||||
|
// MOCKS
|
||||||
|
// -------------------------------------
|
||||||
|
struct encoder_ctx * context;
|
||||||
|
|
||||||
|
void freep(void * obj){
|
||||||
|
}
|
||||||
|
void fatal(int x, void * obj){
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char * paraof_ocrtext(void * sub) {
|
||||||
|
// this is OCR -> text converter.
|
||||||
|
// now, in our test cases, we will pass TEXT instead of OCR.
|
||||||
|
// and will return passed text as result
|
||||||
|
|
||||||
|
return ((struct cc_subtitle *)sub)->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------
|
||||||
|
// TEST preparations
|
||||||
|
// -------------------------------------
|
||||||
|
void setup(void)
|
||||||
|
{
|
||||||
|
context = (struct encoder_ctx *)malloc(sizeof(struct encoder_ctx));
|
||||||
|
context->sbs_buffer = NULL;
|
||||||
|
context->sbs_capacity = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void teardown(void)
|
||||||
|
{
|
||||||
|
free(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------
|
||||||
|
// TESTS
|
||||||
|
// -------------------------------------
|
||||||
|
START_TEST(test_sbs_one_simple_sentence)
|
||||||
|
{
|
||||||
|
struct cc_subtitle * sub = helper_create_sub("Simple sentence.", 1, 100);
|
||||||
|
struct cc_subtitle * out = reformat_cc_bitmap_through_sentence_buffer(sub, context);
|
||||||
|
|
||||||
|
ck_assert_ptr_ne(out, NULL);
|
||||||
|
ck_assert_str_eq(out->data, "Simple sentence.");
|
||||||
|
ck_assert_ptr_eq(out->next, NULL);
|
||||||
|
ck_assert_ptr_eq(out->prev, NULL);
|
||||||
|
}
|
||||||
|
END_TEST
|
||||||
|
|
||||||
|
|
||||||
|
START_TEST(test_sbs_two_sentences_with_rep)
|
||||||
|
{
|
||||||
|
struct cc_subtitle * sub1 = helper_create_sub("asdf", 1, 100);
|
||||||
|
struct cc_subtitle * out1 = reformat_cc_bitmap_through_sentence_buffer(sub1, context);
|
||||||
|
ck_assert_ptr_eq(out1, NULL);
|
||||||
|
|
||||||
|
// second sub:
|
||||||
|
struct cc_subtitle * sub2 = helper_create_sub("asdf Hello.", 101, 200);
|
||||||
|
struct cc_subtitle * out2 = reformat_cc_bitmap_through_sentence_buffer(sub2, context);
|
||||||
|
|
||||||
|
ck_assert_ptr_ne(out2, NULL);
|
||||||
|
ck_assert_str_eq(out2->data, "asdf Hello.");
|
||||||
|
ck_assert_ptr_eq(out2->next, NULL);
|
||||||
|
ck_assert_ptr_eq(out2->prev, NULL);}
|
||||||
|
END_TEST
|
||||||
|
|
||||||
|
|
||||||
|
START_TEST(test_sbs_append_string_two_separate)
|
||||||
|
{
|
||||||
|
unsigned char * test_strings[] = {
|
||||||
|
"First string.",
|
||||||
|
"Second string."
|
||||||
|
};
|
||||||
|
struct cc_subtitle * sub;
|
||||||
|
unsigned char * str;
|
||||||
|
|
||||||
|
// first string
|
||||||
|
str = strdup(test_strings[0]);
|
||||||
|
sub = NULL;
|
||||||
|
sub = sbs_append_string(str, 1, 20, context);
|
||||||
|
ck_assert_ptr_ne(sub, NULL);
|
||||||
|
ck_assert_str_eq(sub->data, test_strings[0]);
|
||||||
|
ck_assert_int_eq(sub->start_time, 1);
|
||||||
|
ck_assert_int_eq(sub->end_time, 20);
|
||||||
|
|
||||||
|
// second string:
|
||||||
|
str = strdup(test_strings[1]);
|
||||||
|
sub = NULL;
|
||||||
|
sub = sbs_append_string(str, 21, 40, context);
|
||||||
|
|
||||||
|
ck_assert_ptr_ne(sub, NULL);
|
||||||
|
ck_assert_str_eq(sub->data, test_strings[1]);
|
||||||
|
ck_assert_int_eq(sub->start_time, 21);
|
||||||
|
ck_assert_int_eq(sub->end_time, 40);
|
||||||
|
}
|
||||||
|
END_TEST
|
||||||
|
|
||||||
|
START_TEST(test_sbs_append_string_two_with_broken_sentence)
|
||||||
|
{
|
||||||
|
// important !!
|
||||||
|
// summary len == 32
|
||||||
|
char * test_strings[] = {
|
||||||
|
"First string",
|
||||||
|
" ends here, deabbea."
|
||||||
|
};
|
||||||
|
struct cc_subtitle * sub;
|
||||||
|
char * str;
|
||||||
|
|
||||||
|
// first string
|
||||||
|
str = strdup(test_strings[0]);
|
||||||
|
sub = sbs_append_string(str, 1, 3, context);
|
||||||
|
|
||||||
|
ck_assert_ptr_eq(sub, NULL);
|
||||||
|
|
||||||
|
// second string:
|
||||||
|
str = strdup(test_strings[1]);
|
||||||
|
sub = sbs_append_string(str, 4, 5, context);
|
||||||
|
|
||||||
|
ck_assert_ptr_ne(sub, NULL);
|
||||||
|
ck_assert_str_eq(sub->data, "First string ends here, deabbea.");
|
||||||
|
ck_assert_int_eq(sub->start_time, 1);
|
||||||
|
ck_assert_int_eq(sub->end_time, 5);
|
||||||
|
}
|
||||||
|
END_TEST
|
||||||
|
|
||||||
|
START_TEST(test_sbs_append_string_two_intersecting)
|
||||||
|
{
|
||||||
|
char * test_strings[] = {
|
||||||
|
"First string",
|
||||||
|
"First string ends here."
|
||||||
|
};
|
||||||
|
struct cc_subtitle * sub;
|
||||||
|
char * str;
|
||||||
|
|
||||||
|
// first string
|
||||||
|
str = strdup(test_strings[0]);
|
||||||
|
sub = sbs_append_string(str, 1, 20, context);
|
||||||
|
|
||||||
|
ck_assert_ptr_eq(sub, NULL);
|
||||||
|
free(sub);
|
||||||
|
|
||||||
|
// second string:
|
||||||
|
str = strdup(test_strings[1]);
|
||||||
|
//printf("second string: [%s]\n", str);
|
||||||
|
sub = sbs_append_string(str, 21, 40, context);
|
||||||
|
|
||||||
|
ck_assert_ptr_ne(sub, NULL);
|
||||||
|
ck_assert_str_eq(sub->data, "First string ends here.");
|
||||||
|
ck_assert_int_eq(sub->start_time, 1);
|
||||||
|
ck_assert_int_eq(sub->end_time, 40);
|
||||||
|
}
|
||||||
|
END_TEST
|
||||||
|
|
||||||
|
|
||||||
|
START_TEST(test_sbs_append_string_real_data_1)
|
||||||
|
{
|
||||||
|
struct cc_subtitle * sub;
|
||||||
|
|
||||||
|
// 1
|
||||||
|
sub = helper_sbs_append_string("Oleon",
|
||||||
|
1, 0, context);
|
||||||
|
ck_assert_ptr_eq(sub, NULL);
|
||||||
|
|
||||||
|
// 2
|
||||||
|
sub = helper_sbs_append_string("Oleon costs.",
|
||||||
|
1, 189, context);
|
||||||
|
ck_assert_ptr_ne(sub, NULL);
|
||||||
|
ck_assert_str_eq(sub->data, "Oleon costs.");
|
||||||
|
|
||||||
|
// 3
|
||||||
|
sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
|
||||||
|
Didn't",
|
||||||
|
190, 889, context);
|
||||||
|
ck_assert_ptr_ne(sub, NULL);
|
||||||
|
ck_assert_str_eq(sub->data, "buried in the annex, 95 Oleon costs.");
|
||||||
|
ck_assert_int_eq(sub->start_time, 190); // = <sub start>
|
||||||
|
ck_assert_int_eq(sub->end_time, 783); // = <sub start> + <available time,889-190=699 > * <sentence alphanum, 28> / <sub alphanum, 33>
|
||||||
|
ck_assert_ptr_eq(sub->next, NULL);
|
||||||
|
|
||||||
|
// 4
|
||||||
|
sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
|
||||||
|
Didn't want",
|
||||||
|
890, 1129, context);
|
||||||
|
ck_assert_ptr_eq(sub, NULL);
|
||||||
|
|
||||||
|
// 5
|
||||||
|
sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
|
||||||
|
Didn't want to",
|
||||||
|
1130, 1359, context);
|
||||||
|
ck_assert_ptr_eq(sub, NULL);
|
||||||
|
|
||||||
|
// 6
|
||||||
|
sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
|
||||||
|
Didn't want to acknowledge",
|
||||||
|
1360, 2059, context);
|
||||||
|
ck_assert_ptr_eq(sub, NULL);
|
||||||
|
|
||||||
|
// 7
|
||||||
|
sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
|
||||||
|
Didn't want to acknowledge the",
|
||||||
|
2060, 2299, context);
|
||||||
|
ck_assert_ptr_eq(sub, NULL);
|
||||||
|
|
||||||
|
// 9
|
||||||
|
sub = helper_sbs_append_string("Didn't want to acknowledge the\n\
|
||||||
|
pressures on hospitals, schools and",
|
||||||
|
2300, 5019, context);
|
||||||
|
ck_assert_ptr_eq(sub, NULL);
|
||||||
|
|
||||||
|
// 13
|
||||||
|
sub = helper_sbs_append_string("pressures on hospitals, schools and\n\
|
||||||
|
infrastructure.",
|
||||||
|
5020, 5159, context);
|
||||||
|
ck_assert_ptr_ne(sub, NULL);
|
||||||
|
ck_assert_str_eq(sub->data, "Didn't want to acknowledge the pressures on hospitals, schools and infrastructure.");
|
||||||
|
ck_assert_int_eq(sub->start_time, 784);
|
||||||
|
ck_assert_int_eq(sub->end_time, 5159);
|
||||||
|
ck_assert_ptr_eq(sub->next, NULL);
|
||||||
|
|
||||||
|
// 14
|
||||||
|
sub = helper_sbs_append_string("pressures on hospitals, schools and\n\
|
||||||
|
infrastructure. If",
|
||||||
|
5160, 5529, context);
|
||||||
|
ck_assert_ptr_eq(sub, NULL);
|
||||||
|
|
||||||
|
// 16
|
||||||
|
sub = helper_sbs_append_string("pressures on hospitals, schools and\n\
|
||||||
|
infrastructure. If we go",
|
||||||
|
5530, 6559, context);
|
||||||
|
ck_assert_ptr_eq(sub, NULL);
|
||||||
|
|
||||||
|
// ck_assert_int_eq(sub->start_time, 1);
|
||||||
|
// ck_assert_int_eq(sub->end_time, 40);
|
||||||
|
}
|
||||||
|
END_TEST
|
||||||
|
|
||||||
|
|
||||||
|
Suite * ccx_encoders_splitbysentence_suite(void)
|
||||||
|
{
|
||||||
|
Suite *s;
|
||||||
|
TCase *tc_core;
|
||||||
|
|
||||||
|
s = suite_create("Sentence Buffer");
|
||||||
|
|
||||||
|
/* Overall tests */
|
||||||
|
tc_core = tcase_create("SB: Overall");
|
||||||
|
|
||||||
|
tcase_add_checked_fixture(tc_core, setup, teardown);
|
||||||
|
tcase_add_test(tc_core, test_sbs_one_simple_sentence);
|
||||||
|
tcase_add_test(tc_core, test_sbs_two_sentences_with_rep);
|
||||||
|
suite_add_tcase(s, tc_core);
|
||||||
|
|
||||||
|
/**/
|
||||||
|
TCase *tc_append_string;
|
||||||
|
tc_append_string = tcase_create("SB: append_string");
|
||||||
|
tcase_add_checked_fixture(tc_append_string, setup, teardown);
|
||||||
|
|
||||||
|
tcase_add_test(tc_append_string, test_sbs_append_string_two_separate);
|
||||||
|
tcase_add_test(tc_append_string, test_sbs_append_string_two_with_broken_sentence);
|
||||||
|
tcase_add_test(tc_append_string, test_sbs_append_string_two_intersecting);
|
||||||
|
tcase_add_test(tc_append_string, test_sbs_append_string_real_data_1);
|
||||||
|
|
||||||
|
suite_add_tcase(s, tc_append_string);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
4
tests/ccx_encoders_splitbysentence_suite.h
Normal file
4
tests/ccx_encoders_splitbysentence_suite.h
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
// -------------------------------------
|
||||||
|
// SUITE
|
||||||
|
// -------------------------------------
|
||||||
|
Suite * ccx_encoders_splitbysentence_suite(void);
|
21
tests/runtest.c
Normal file
21
tests/runtest.c
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#include <check.h>
|
||||||
|
|
||||||
|
// TESTS:
|
||||||
|
#include "ccx_encoders_splitbysentence_suite.h"
|
||||||
|
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
int number_failed;
|
||||||
|
Suite *s;
|
||||||
|
SRunner *sr;
|
||||||
|
|
||||||
|
s = ccx_encoders_splitbysentence_suite();
|
||||||
|
sr = srunner_create(s);
|
||||||
|
srunner_set_fork_status(sr, CK_NOFORK);
|
||||||
|
|
||||||
|
srunner_run_all(sr, CK_NORMAL);
|
||||||
|
number_failed = srunner_ntests_failed(sr);
|
||||||
|
srunner_free(sr);
|
||||||
|
return (number_failed == 0) ? 0 : 1;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user