Break incoming subs into sentences (through a buffer), and remove duplicates

2024-12-25 20:32:10 +00:00 · 2016-12-02 13:36:33 +05:00 · 2016-12-02 13:36:33 +05:00 · 66393a80f2
commit 66393a80f2
parent d453d9327e
10 changed files with 1060 additions and 294 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,9 @@
 ####
 # Ignore tests tmp files and results
 tests/runtest
 tests/**/*.gcda
 tests/**/*.gcno
 ####
 # Ignore CVS related files
--- a/src/lib_ccx/ccx_encoders_common.c
+++ b/src/lib_ccx/ccx_encoders_common.c
@ -957,14 +957,10 @@ struct encoder_ctx *init_encoder(struct encoder_cfg *opt)
 	ctx->force_flush = opt->force_flush;
 	ctx->ucla = opt->ucla;
 	ctx->splitbysentence = opt->splitbysentence;
-	ctx->sbs_newblock_start_time = -1;
+	ctx->sbs_time_from = -1;
-	ctx->sbs_newblock_end_time = -1;
+	ctx->sbs_time_trim = -1;
-	ctx->sbs_newblock = NULL;
+	ctx->sbs_capacity = 0;
 	ctx->sbs_newblock_capacity = 0;
 	ctx->sbs_newblock_size = 0;
 	ctx->sbs_buffer = NULL;
 	ctx->sbs_buffer_capacity = 0;
 	ctx->sbs_buffer_size = 0;
 	ctx->subline = (unsigned char *) malloc (SUBLINESIZE);
 	if(!ctx->subline)
@ -1045,203 +1041,204 @@ int encode_sub(struct encoder_ctx *context, struct cc_subtitle *sub)
 		// Write to a buffer that is later s+plit to generate split
 		// in sentences
 		if (sub->type == CC_BITMAP)
-			wrote_something = write_cc_bitmap_to_sentence_buffer(sub, context);
+			sub = reformat_cc_bitmap_through_sentence_buffer(sub, context);
 		if (NULL==sub)
 			return wrote_something;
 	}
-	else
+	// Write subtitles as they come
 	if (sub->type == CC_608)
 	{
-		// Write subtitles as they come
+		struct eia608_screen *data = NULL;
-		if (sub->type == CC_608)
+		struct ccx_s_write *out;
 		for (data = sub->data; sub->nb_data; sub->nb_data--, data++)
 		{
-			struct eia608_screen *data = NULL;
+			// Determine context based on channel. This replaces the code that was above, as this was incomplete (for cases where -12 was used for example)
-			struct ccx_s_write *out;
+			out = get_output_ctx(context, data->my_field);
-			for (data = sub->data; sub->nb_data; sub->nb_data--, data++)
+
 			if (data->format == SFORMAT_XDS)
 			{
 				// Determine context based on channel. This replaces the code that was above, as this was incomplete (for cases where -12 was used for example)
 				out = get_output_ctx(context, data->my_field);
 				if (data->format == SFORMAT_XDS)
 				{
 					data->end_time = data->end_time + context->subs_delay;
 					xds_write_transcript_line_prefix(context, out, data->start_time, data->end_time, data->cur_xds_packet_class);
 					if (data->xds_len > 0)
 					{
 						ret = write(out->fh, data->xds_str, data->xds_len);
 						if (ret < data->xds_len)
 						{
 							mprint("WARNING:Loss of data\n");
 						}
 					}
 					freep(&data->xds_str);
 					write_newline(context, 0);
 					continue;
 				}
 				data->end_time = data->end_time + context->subs_delay;
-				switch (context->write_format)
+				xds_write_transcript_line_prefix(context, out, data->start_time, data->end_time, data->cur_xds_packet_class);
 				if (data->xds_len > 0)
 				{
-					case CCX_OF_SRT:
+					ret = write(out->fh, data->xds_str, data->xds_len);
-						if (!context->startcredits_displayed && context->start_credits_text != NULL)
+					if (ret < data->xds_len)
-							try_to_add_start_credits(context, data->start_time);
+					{
-						wrote_something = write_cc_buffer_as_srt(data, context);
+						mprint("WARNING:Loss of data\n");
-						break;
+					}
 					case CCX_OF_SSA:
 						if (!context->startcredits_displayed && context->start_credits_text != NULL)
 							try_to_add_start_credits(context, data->start_time);
 						wrote_something = write_cc_buffer_as_ssa(data, context);
 						break;
 					case CCX_OF_G608:
 						wrote_something = write_cc_buffer_as_g608(data, context);
 						break;
 					case CCX_OF_WEBVTT:
 						if (!context->startcredits_displayed && context->start_credits_text != NULL)
 							try_to_add_start_credits(context, data->start_time);
 						wrote_something = write_cc_buffer_as_webvtt(data, context);
 						break;
 					case CCX_OF_SAMI:
 						if (!context->startcredits_displayed && context->start_credits_text != NULL)
 							try_to_add_start_credits(context, data->start_time);
 						wrote_something = write_cc_buffer_as_sami(data, context);
 						break;
 					case CCX_OF_SMPTETT:
 						if (!context->startcredits_displayed && context->start_credits_text != NULL)
 							try_to_add_start_credits(context, data->start_time);
 						wrote_something = write_cc_buffer_as_smptett(data, context);
 						break;
 					case CCX_OF_TRANSCRIPT:
 						wrote_something = write_cc_buffer_as_transcript2(data, context);
 						break;
 					case CCX_OF_SPUPNG:
 						wrote_something = write_cc_buffer_as_spupng(data, context);
 						break;
 					case CCX_OF_SIMPLE_XML:
 						if (ccx_options.keep_output_closed && context->out->temporarily_closed)
 						{
 							temporarily_open_output(context->out);
 							write_subtitle_file_header(context, context->out);
 						}
 						wrote_something = write_cc_buffer_as_simplexml(data, context);
 						if (ccx_options.keep_output_closed)
 						{
 							write_subtitle_file_footer(context, context->out);
 							temporarily_close_output(context->out);
 						}
 						break;
 					default:
 						break;
 				}
-				if (wrote_something)
+				freep(&data->xds_str);
-					context->last_displayed_subs_ms = data->end_time;
+				write_newline(context, 0);
-
+				continue;
 				if (context->gui_mode_reports)
 					write_cc_buffer_to_gui(sub->data, context);
 			}
 			freep(&sub->data);
 		}
 		if (sub->type == CC_BITMAP)
 		{
 			switch (context->write_format)
 			{
 			case CCX_OF_SRT:
 				if (!context->startcredits_displayed && context->start_credits_text != NULL)
 					try_to_add_start_credits(context, sub->start_time);
 				wrote_something = write_cc_bitmap_as_srt(sub, context);
 				break;
 			case CCX_OF_SSA:
 				if (!context->startcredits_displayed && context->start_credits_text != NULL)
 					try_to_add_start_credits(context, sub->start_time);
 					wrote_something = write_cc_bitmap_as_ssa(sub, context);
 				break;
 			case CCX_OF_WEBVTT:
 				if (!context->startcredits_displayed && context->start_credits_text != NULL)
 					try_to_add_start_credits(context, sub->start_time);
 				wrote_something = write_cc_bitmap_as_webvtt(sub, context);
 				break;
 			case CCX_OF_SAMI:
 				if (!context->startcredits_displayed && context->start_credits_text != NULL)
 					try_to_add_start_credits(context, sub->start_time);
 				wrote_something = write_cc_bitmap_as_sami(sub, context);
 				break;
 			case CCX_OF_SMPTETT:
 				if (!context->startcredits_displayed && context->start_credits_text != NULL)
 					try_to_add_start_credits(context, sub->start_time);
 				wrote_something = write_cc_bitmap_as_smptett(sub, context);
 				break;
 			case CCX_OF_TRANSCRIPT:
 				wrote_something = write_cc_bitmap_as_transcript(sub, context);
 				break;
 			case CCX_OF_SPUPNG:
 				wrote_something = write_cc_bitmap_as_spupng(sub, context);
 				break;
 			case CCX_OF_SIMPLE_XML:
 				wrote_something = write_cc_bitmap_as_simplexml(sub, context);
 				break;
 #ifdef WITH_LIBCURL
 			case CCX_OF_CURL:
 				wrote_something = write_cc_bitmap_as_libcurl(sub, context);
 				break;
 #endif
 			default:
 				break;
 			}
-		}
+			data->end_time = data->end_time + context->subs_delay;
 		if (sub->type == CC_RAW)
 		{
 			if (context->send_to_srv)
 				net_send_header(sub->data, sub->nb_data);
 			else
 			{
 				ret = write(context->out->fh, sub->data, sub->nb_data);
 				if (ret < sub->nb_data) {
 					mprint("WARNING: Loss of data\n");
 				}
 			}
 			sub->nb_data = 0;
 		}
 		if (sub->type == CC_TEXT)
 		{
 			switch (context->write_format)
 			{
-			case CCX_OF_SRT:
+				case CCX_OF_SRT:
-				if (!context->startcredits_displayed && context->start_credits_text != NULL)
+					if (!context->startcredits_displayed && context->start_credits_text != NULL)
-					try_to_add_start_credits(context, sub->start_time);
+						try_to_add_start_credits(context, data->start_time);
-				wrote_something = write_cc_subtitle_as_srt(sub, context);
+					wrote_something = write_cc_buffer_as_srt(data, context);
-				break;
+					break;
-			case CCX_OF_SSA:
+				case CCX_OF_SSA:
-				if (!context->startcredits_displayed && context->start_credits_text != NULL)
+					if (!context->startcredits_displayed && context->start_credits_text != NULL)
-					try_to_add_start_credits(context, sub->start_time);
+						try_to_add_start_credits(context, data->start_time);
-				wrote_something = write_cc_subtitle_as_ssa(sub, context);
+					wrote_something = write_cc_buffer_as_ssa(data, context);
-				break;
+					break;
-			case CCX_OF_WEBVTT:
+				case CCX_OF_G608:
-				if (!context->startcredits_displayed && context->start_credits_text != NULL)
+					wrote_something = write_cc_buffer_as_g608(data, context);
-					try_to_add_start_credits(context, sub->start_time);
+					break;
-				wrote_something = write_cc_subtitle_as_webvtt(sub, context);
+				case CCX_OF_WEBVTT:
-				break;
+					if (!context->startcredits_displayed && context->start_credits_text != NULL)
-			case CCX_OF_SAMI:
+						try_to_add_start_credits(context, data->start_time);
-				if (!context->startcredits_displayed && context->start_credits_text != NULL)
+					wrote_something = write_cc_buffer_as_webvtt(data, context);
-					try_to_add_start_credits(context, sub->start_time);
+					break;
-				wrote_something = write_cc_subtitle_as_sami(sub, context);
+				case CCX_OF_SAMI:
-				break;
+					if (!context->startcredits_displayed && context->start_credits_text != NULL)
-			case CCX_OF_SMPTETT:
+						try_to_add_start_credits(context, data->start_time);
-				if (!context->startcredits_displayed && context->start_credits_text != NULL)
+					wrote_something = write_cc_buffer_as_sami(data, context);
-					try_to_add_start_credits(context, sub->start_time);
+					break;
-				wrote_something = write_cc_subtitle_as_smptett(sub, context);
+				case CCX_OF_SMPTETT:
-				break;
+					if (!context->startcredits_displayed && context->start_credits_text != NULL)
-			case CCX_OF_TRANSCRIPT:
+						try_to_add_start_credits(context, data->start_time);
-				wrote_something = write_cc_subtitle_as_transcript(sub, context);
+					wrote_something = write_cc_buffer_as_smptett(data, context);
-				break;
+					break;
-			case CCX_OF_SPUPNG:
+				case CCX_OF_TRANSCRIPT:
-				wrote_something = write_cc_subtitle_as_spupng(sub, context);
+					wrote_something = write_cc_buffer_as_transcript2(data, context);
-				break;
+					break;
-			case CCX_OF_SIMPLE_XML:
+				case CCX_OF_SPUPNG:
-				wrote_something = write_cc_subtitle_as_simplexml(sub, context);
+					wrote_something = write_cc_buffer_as_spupng(data, context);
-				break;
+					break;
-			default:
+				case CCX_OF_SIMPLE_XML:
-				break;
+					if (ccx_options.keep_output_closed && context->out->temporarily_closed)
 					{
 						temporarily_open_output(context->out);
 						write_subtitle_file_header(context, context->out);
 					}
 					wrote_something = write_cc_buffer_as_simplexml(data, context);
 					if (ccx_options.keep_output_closed)
 					{
 						write_subtitle_file_footer(context, context->out);
 						temporarily_close_output(context->out);
 					}
 					break;
 				default:
 					break;
 			}
-			sub->nb_data = 0;
+			if (wrote_something)
 				context->last_displayed_subs_ms = data->end_time;
 			if (context->gui_mode_reports)
 				write_cc_buffer_to_gui(sub->data, context);
 		}
 		freep(&sub->data);
 	}
 	if (sub->type == CC_BITMAP)
 	{
 		switch (context->write_format)
 		{
 		case CCX_OF_SRT:
 			if (!context->startcredits_displayed && context->start_credits_text != NULL)
 				try_to_add_start_credits(context, sub->start_time);
 			wrote_something = write_cc_bitmap_as_srt(sub, context);
 			break;
 		case CCX_OF_SSA:
 			if (!context->startcredits_displayed && context->start_credits_text != NULL)
 				try_to_add_start_credits(context, sub->start_time);
 				wrote_something = write_cc_bitmap_as_ssa(sub, context);
 			break;
 		case CCX_OF_WEBVTT:
 			if (!context->startcredits_displayed && context->start_credits_text != NULL)
 				try_to_add_start_credits(context, sub->start_time);
 			wrote_something = write_cc_bitmap_as_webvtt(sub, context);
 			break;
 		case CCX_OF_SAMI:
 			if (!context->startcredits_displayed && context->start_credits_text != NULL)
 				try_to_add_start_credits(context, sub->start_time);
 			wrote_something = write_cc_bitmap_as_sami(sub, context);
 			break;
 		case CCX_OF_SMPTETT:
 			if (!context->startcredits_displayed && context->start_credits_text != NULL)
 				try_to_add_start_credits(context, sub->start_time);
 			wrote_something = write_cc_bitmap_as_smptett(sub, context);
 			break;
 		case CCX_OF_TRANSCRIPT:
 			wrote_something = write_cc_bitmap_as_transcript(sub, context);
 			break;
 		case CCX_OF_SPUPNG:
 			wrote_something = write_cc_bitmap_as_spupng(sub, context);
 			break;
 		case CCX_OF_SIMPLE_XML:
 			wrote_something = write_cc_bitmap_as_simplexml(sub, context);
 			break;
 #ifdef WITH_LIBCURL
 		case CCX_OF_CURL:
 			wrote_something = write_cc_bitmap_as_libcurl(sub, context);
 			break;
 #endif
 		default:
 			break;
 		}
 	}
 	if (sub->type == CC_RAW)
 	{
 		if (context->send_to_srv)
 			net_send_header(sub->data, sub->nb_data);
 		else
 		{
 			ret = write(context->out->fh, sub->data, sub->nb_data);
 			if (ret < sub->nb_data) {
 				mprint("WARNING: Loss of data\n");
 			}
 		}
 		sub->nb_data = 0;
 	}
 	if (sub->type == CC_TEXT)
 	{
 		switch (context->write_format)
 		{
 		case CCX_OF_SRT:
 			if (!context->startcredits_displayed && context->start_credits_text != NULL)
 				try_to_add_start_credits(context, sub->start_time);
 			wrote_something = write_cc_subtitle_as_srt(sub, context);
 			break;
 		case CCX_OF_SSA:
 			if (!context->startcredits_displayed && context->start_credits_text != NULL)
 				try_to_add_start_credits(context, sub->start_time);
 			wrote_something = write_cc_subtitle_as_ssa(sub, context);
 			break;
 		case CCX_OF_WEBVTT:
 			if (!context->startcredits_displayed && context->start_credits_text != NULL)
 				try_to_add_start_credits(context, sub->start_time);
 			wrote_something = write_cc_subtitle_as_webvtt(sub, context);
 			break;
 		case CCX_OF_SAMI:
 			if (!context->startcredits_displayed && context->start_credits_text != NULL)
 				try_to_add_start_credits(context, sub->start_time);
 			wrote_something = write_cc_subtitle_as_sami(sub, context);
 			break;
 		case CCX_OF_SMPTETT:
 			if (!context->startcredits_displayed && context->start_credits_text != NULL)
 				try_to_add_start_credits(context, sub->start_time);
 			wrote_something = write_cc_subtitle_as_smptett(sub, context);
 			break;
 		case CCX_OF_TRANSCRIPT:
 			wrote_something = write_cc_subtitle_as_transcript(sub, context);
 			break;
 		case CCX_OF_SPUPNG:
 			wrote_something = write_cc_subtitle_as_spupng(sub, context);
 			break;
 		case CCX_OF_SIMPLE_XML:
 			wrote_something = write_cc_subtitle_as_simplexml(sub, context);
 			break;
 		default:
 			break;
 		}
 		sub->nb_data = 0;
 	}
 	if (!sub->nb_data)
 		freep(&sub->data);
 	if (wrote_something && context->force_flush)
--- a/src/lib_ccx/ccx_encoders_common.h
+++ b/src/lib_ccx/ccx_encoders_common.h
@ -118,15 +118,14 @@ struct encoder_ctx
 	/* split-by-sentence stuff */
 	int splitbysentence;
 	LLONG sbs_newblock_start_time; // Used by the split-by-sentence code to know when the current block starts...
 	LLONG sbs_newblock_end_time; // ... and ends
 	ccx_sbs_utf8_character *sbs_newblock;
 	int sbs_newblock_capacity;
 	int sbs_newblock_size;
 	ccx_sbs_utf8_character *sbs_buffer;
 	int sbs_buffer_capacity;
 	int sbs_buffer_size;
 	unsigned char * sbs_buffer; /// Storage for sentence-split buffer
 	size_t sbs_handled_len; /// The length of the string in the SBS-buffer, already handled, but preserved for DUP-detection.
 	//ccx_sbs_utf8_character *sbs_newblock;
 	LLONG sbs_time_from; // Used by the split-by-sentence code to know when the current block starts...
 	LLONG sbs_time_trim; // ... and ends
 	size_t sbs_capacity;
 };
 #define INITIAL_ENC_BUFFER_CAPACITY	2048
@ -196,10 +195,9 @@ int write_cc_bitmap_as_sami            (struct cc_subtitle *sub, struct encoder_
 int write_cc_bitmap_as_smptett         (struct cc_subtitle *sub, struct encoder_ctx *context);
 int write_cc_bitmap_as_spupng          (struct cc_subtitle *sub, struct encoder_ctx *context);
 int write_cc_bitmap_as_transcript      (struct cc_subtitle *sub, struct encoder_ctx *context);
 int write_cc_bitmap_to_sentence_buffer (struct cc_subtitle *sub, struct encoder_ctx *context);
 int write_cc_bitmap_as_libcurl         (struct cc_subtitle *sub, struct encoder_ctx *context);
-
+struct cc_subtitle * reformat_cc_bitmap_through_sentence_buffer (struct cc_subtitle *sub, struct encoder_ctx *context);
 void set_encoder_last_displayed_subs_ms(struct encoder_ctx *ctx, LLONG last_displayed_subs_ms);
 void set_encoder_subs_delay(struct encoder_ctx *ctx, LLONG subs_delay);
--- a/src/lib_ccx/ccx_encoders_splitbysentence.c
+++ b/src/lib_ccx/ccx_encoders_splitbysentence.c
@ -1,135 +1,457 @@
-#include "ccx_decoders_common.h"
+#include "ccx_common_platform.h"
 #include "ccx_encoders_common.h"
-#include "spupng_encoder.h"
+#include "lib_ccx.h"
 #include "ccx_encoders_spupng.h"
 #include "utility.h"
 #include "ocr.h"
-#include "ccx_decoders_608.h"
+#include "debug_def.h"
 #include "ccx_decoders_708.h"
 #include "ccx_decoders_708_output.h"
 #include "ccx_encoders_xds.h"
 #include "ccx_encoders_helpers.h"
 #include "utf8proc.h"
 #ifdef ENABLE_SHARING
 #include "ccx_share.h"
 #endif //ENABLE_SHARING
-void lbl_start_block(LLONG start_time, struct encoder_ctx *context)
+int sbs_is_pointer_on_sentence_breaker(char * start, char * current)
 {
-	context->sbs_newblock_start_time = start_time;
+	char c = *current;
-}
+	char n = *(current + 1);
 	char p = *(current - 1);
-void lbl_add_character(struct encoder_ctx *context, ccx_sbs_utf8_character ch)
+	if (0 == c) n = 0;
-{
+	if (current == start) p = 0;
-	if (context->sbs_newblock_capacity == context->sbs_newblock_size)
+
 	if (0 == c) return 1;
 	if ('.' == c
 		|| '!' == c
 		|| '?' == c
 	)
 	{
-		int newcapacity = (context->sbs_newblock_capacity < 512) ? 1024 : context->sbs_newblock_capacity * 2;
+		if ('.' == n
-		context->sbs_newblock = (ccx_sbs_utf8_character *)realloc(context->sbs_newblock, newcapacity*sizeof(ccx_sbs_utf8_character));		
+			|| '!' == n
-		if (!context->sbs_newblock)
+			|| '?' == n
-			fatal(EXIT_NOT_ENOUGH_MEMORY, "Not enough memory in lbl_add_character");
+		)
-		context->sbs_newblock_capacity = newcapacity;
+		{
 			return 0;
 		}
 		return 1;
 	}
-	memcpy(&context->sbs_newblock[context->sbs_newblock_size++], &ch, sizeof ch);
+
 	return 0;
 }
-void lbl_end_block(LLONG end_time, struct encoder_ctx *context)
+int sbs_fuzzy_strncmp(const char * a, const char * b, size_t n, const size_t maxerr)
 {
-	context->sbs_newblock_end_time = end_time;
+	// TODO: implement fuzzy comparing
 	// Error counter DOES NOT WORK!!!
 	int i;
 	//int err;
 	char A, B;
 	i = -1;
 	do
 	{
 		i++;
 		// Bound check (compare to N)
 		if (i == n) return 0;
 		A = a[i];
 		B = b[i];
 		// bound check (line endings)
 		if (A == 0)
 		{
 			if (B == 0) return 0;
 			return 1;
 		}
 		else
 		{
 			if (B == 0) return -1;
 		}
 		if (A == B) continue;
 		if (isspace(A) && isspace(B)) continue;
 		if (A > B) return 1;
 		return -1;
 	} while(1);
 }
-int write_cc_bitmap_to_sentence_buffer(struct cc_subtitle *sub, struct encoder_ctx *context)
+void sbs_strcpy_without_dup(const unsigned char * str, struct encoder_ctx * context)
 {
 	int intersect_len;
 	unsigned char * suffix;
 	const unsigned char * prefix = str;
 	unsigned long sbs_len;
 	unsigned long str_len;
 	str_len = strlen(str);
 	sbs_len = strlen(context->sbs_buffer);
 	intersect_len = str_len;
 	if (sbs_len < intersect_len)
 		intersect_len = sbs_len;
 	while (intersect_len>0)
 	{
 		suffix = context->sbs_buffer + sbs_len - intersect_len;
 		if (0 == sbs_fuzzy_strncmp(prefix, suffix, intersect_len, 1))
 		{
 			break;
 		}
 		intersect_len--;
 	}
 	LOG_DEBUG("Sentence Buffer: sbs_strcpy_without_dup, intersection len [%4d]\n", intersect_len);
 	// check, that new string does not contain data, from
 	// already handled sentence:
 	LOG_DEBUG("Sentence Buffer: sbs_strcpy_without_dup, sbslen [%4d] handled len [%4d]\n", sbs_len, context->sbs_handled_len);
 	if ( (sbs_len - intersect_len) >= context->sbs_handled_len)
 	{
 		// there is no intersection.
 		// It is time to clean the buffer. Excepting the last uncomplete sentence
 		strcpy(context->sbs_buffer, context->sbs_buffer + context->sbs_handled_len);
 		context->sbs_handled_len = 0;
 		sbs_len = strlen(context->sbs_buffer);
 		LOG_DEBUG("Sentence Buffer: Clean buffer, after BUF [%s]\n\n\n", context->sbs_buffer);
 	}
 	if (intersect_len > 0)
 	{
 		// there is a common part (suffix of old sentence equals to prefix of new str)
 		//
 		// remove dup from buffer
 		// we will use an appropriate part from the new string
 		context->sbs_buffer[sbs_len-intersect_len] = 0;
 	}
 	sbs_len = strlen(context->sbs_buffer);
 	// whitespace control. Add space between subs
 	if (
 		!isspace(str[0])                // not a space char in the beginning of new str
 		&& context->sbs_handled_len >0  // buffer is not empty (there is uncomplete sentence)
 		&& !isspace(context->sbs_buffer[sbs_len-1])  // not a space char at the end of existing buf
 	)
 	{
 		//strcat(context->sbs_buffer, " ");
 	}
 	strcat(context->sbs_buffer, str);
 }
 void sbs_str_autofix(unsigned char * str)
 {
 	int i;
 	// replace all whitespaces with spaces:
 	for (i = 0; str[i] != 0; i++)
 	{
 		if (isspace(str[i]))
 		{
 			str[i] = ' ';
 		}
 		if (
 			str[i] == '|'
 			&& (i==0 || isspace(str[i-1]))
 			&& (str[i+1] == 0 || isspace(str[i+1]) || str[i+1]=='\'')
 		)
 		{
 			// try to convert to "I"
 			str[i] = 'I';
 		}
 	}
 }
 /**
 * Appends the function to the sentence buffer, and returns a list of full sentences (if there are any), or NULL
 *
 * @param  str       Partial (or full) sub to append.
 * @param  time_from Starting timestamp
 * @param  time_trim Ending timestamp
 * @param  context   Encoder context
 * @return           New <struct cc_subtitle *> subtitle, or NULL, if <str> doesn't contain the ending part of the sentence. If there are more than one sentence, the remaining sentences will be chained using <result->next> reference.
 */
 struct cc_subtitle * sbs_append_string(unsigned char * str, const LLONG time_from, const LLONG time_trim, struct encoder_ctx * context)
 {
 	struct cc_subtitle * resub;
 	struct cc_subtitle * tmpsub;
 	unsigned char * bp_current;
 	unsigned char * bp_last_break;
 	unsigned char * sbs_undone_start;
 	int is_buf_initialized;
 	int required_capacity;
 	int new_capacity;
 	LLONG alphanum_total;
 	LLONG alphanum_cur;
 	LLONG anychar_total;
 	LLONG anychar_cur;
 	LLONG duration;
 	LLONG available_time;
 	int use_alphanum_counters;
 	if (! str)
 		return NULL;
 	sbs_str_autofix(str);
 	is_buf_initialized = (NULL == context->sbs_buffer || context->sbs_capacity == 0)
 		? 0
 		: 1;
 	// ===============================
 	// grow sentence buffer
 	// ===============================
 	required_capacity =
 		(is_buf_initialized ? strlen(context->sbs_buffer) : 0)    // existing data in buf
 		+ strlen(str)     // length of new string
 		+ 1               // trailing \0
 		+ 1               // space control (will add one space , if required)
 	;
 	if (required_capacity >= context->sbs_capacity)
 	{
 		new_capacity = context->sbs_capacity;
 		if (! is_buf_initialized) new_capacity = 16;
 		while (new_capacity < required_capacity)
 		{
 			// increase NEW_capacity, and check, that increment
 			// is less than 8 Mb. Because 8Mb - it is a lot
 			// for a TEXT buffer. It is weird...
 			new_capacity += (new_capacity > 1048576 * 8)
 				? 1048576 * 8
 				: new_capacity;
 		}
 		context->sbs_buffer = (unsigned char *)realloc(
 			context->sbs_buffer,
 			new_capacity * sizeof(/*unsigned char*/ context->sbs_buffer[0] )
 		);
 		if (!context->sbs_buffer)
 			fatal(EXIT_NOT_ENOUGH_MEMORY, "Not enough memory in sbs_append_string");
 		context->sbs_capacity = new_capacity;
 		// if buffer wasn't initialized, we will se trash in buffer.
 		// but we need just empty string, so here we will get it:
 		if (! is_buf_initialized)
 		{
 			// INIT SBS
 			context->sbs_buffer[0] = 0;
 			context->sbs_handled_len = 0;
 		}
 	}
 	// ===============================
 	// append to buffer
 	//
 	// will update sbs_buffer, sbs_handled_len
 	// ===============================
 	sbs_strcpy_without_dup(str, context);
 	// ===============================
 	// break to sentences
 	// ===============================
 	resub = NULL;
 	tmpsub = NULL;
 	alphanum_total = 0;
 	alphanum_cur = 0;
 	anychar_total = 0;
 	anychar_cur = 0;
 	sbs_undone_start = context->sbs_buffer + context->sbs_handled_len;
 	bp_last_break = sbs_undone_start;
 	LOG_DEBUG("Sentence Buffer: BEFORE sentence break. Last break: [%s]  sbs_undone_start: [%d], sbs_undone: [%s]\n",
 		bp_last_break, context->sbs_handled_len, sbs_undone_start
 	);
 	for (bp_current = sbs_undone_start; bp_current && *bp_current; bp_current++)
 	{
 		if (
 			0 < anychar_cur	// skip empty!
 			&& sbs_is_pointer_on_sentence_breaker(bp_last_break, bp_current) )
 		{
 			// it is new sentence!
 			tmpsub = malloc(sizeof(struct cc_subtitle));
 			tmpsub->type = CC_TEXT;
 			// length of new string:
 			tmpsub->nb_data =
 				bp_current - bp_last_break
 				+ 1	 // terminating '\0'
 				+ 1  // skip '.'
 			;
 			tmpsub->data = strndup(bp_last_break, tmpsub->nb_data - 1);
 			tmpsub->got_output = 1;
 			tmpsub->start_time = alphanum_cur;
 			alphanum_cur = 0;
 			tmpsub->end_time = anychar_cur;
 			anychar_cur = 0;
 			bp_last_break = bp_current + 1;
 			// tune last break:
 			while (
 				*bp_last_break
 				&& isspace(*bp_last_break)
 			)
 			{
 				bp_last_break++;
 			}
 			// ???
 			// tmpsub->info = NULL;
 			// tmpsub->mode = NULL;
 			// link with prev sub:
 			tmpsub->next = NULL;
 			tmpsub->prev = resub;
 			if (NULL != resub)
 			{
 				resub->next = tmpsub;
 			}
 			resub = tmpsub;
 		}
 		if (*bp_current && isalnum(*bp_current))
 		{
 			alphanum_total++;
 			alphanum_cur++;
 		}
 		anychar_total++;
 		anychar_cur++;
 	}
 	// ===============================
 	// okay, we have extracted several sentences, now we should
 	// save the position of the "remainder" - start of the last
 	// incomplete sentece
 	// ===============================
 	if (bp_last_break != sbs_undone_start)
 	{
 		context->sbs_handled_len = bp_last_break - sbs_undone_start;
 	}
 	LOG_DEBUG("Sentence Buffer: AFTER sentence break: Handled Len [%4d]\n", context->sbs_handled_len);
 	LOG_DEBUG("Sentence Buffer: Alphanum Total: [%4d]  Overall chars: [%4d]  STRING:[%20s]  BUFFER:[%20s]\n", alphanum_total, anychar_total, str, context->sbs_buffer);
 	// ===============================
 	// Calculate time spans
 	// ===============================
 	if (!is_buf_initialized)
 	{
 		context->sbs_time_from = time_from;
 		context->sbs_time_trim = time_trim;
 	}
 	available_time = time_trim - context->sbs_time_from;
 	use_alphanum_counters = alphanum_total > 0 ? 1 : 0;
 	tmpsub = resub;
 	while (tmpsub)
 	{
 		alphanum_cur = tmpsub->start_time;
 		anychar_cur = tmpsub->end_time;
 		if (use_alphanum_counters)
 		{
 			duration = available_time * alphanum_cur / alphanum_total;
 		}
 		else
 		{
 			duration = available_time * anychar_cur / anychar_total;
 		}
 		tmpsub->start_time = context->sbs_time_from;
 		tmpsub->end_time = tmpsub->start_time + duration;
 		context->sbs_time_from = tmpsub->end_time + 1;
 		tmpsub = tmpsub->next;
 	}
 	return resub;
 }
 struct cc_subtitle * reformat_cc_bitmap_through_sentence_buffer(struct cc_subtitle *sub, struct encoder_ctx *context)
 {
 	int ret = 0;
 #ifdef ENABLE_OCR
 	struct cc_bitmap* rect;
 	LLONG ms_start, ms_end;
 	int used;
 	int i = 0;
 	char *str;
-	if (context->prev_start != -1 && (sub->flags & SUB_EOD_MARKER))
+	// this is a sub with a full sentence (or chain of such subs)
 	struct cc_subtitle * resub = NULL;
 #ifdef ENABLE_OCR
 	if (sub->flags & SUB_EOD_MARKER)
 	{
-		ms_start = context->prev_start;
+		// the last sub from input
-		ms_end = sub->start_time;
+
 		if (context->prev_start == -1)
 		{
 			ms_start = 1;
 			ms_end = sub->start_time;
 		}
 		else
 		{
 			ms_start = context->prev_start;
 			ms_end = sub->start_time;
 		}
 	}
-	else if (!(sub->flags & SUB_EOD_MARKER))
+	else
 	{
 		// not the last sub from input
 		ms_start = sub->start_time;
 		ms_end = sub->end_time;
 	}
 	else if (context->prev_start == -1 && (sub->flags & SUB_EOD_MARKER))
 	{
 		ms_start = 1;
 		ms_end = sub->start_time;
 	}
 	if (sub->nb_data == 0)
-		return ret;
+		return 0;
 	rect = sub->data;
 	if (sub->flags & SUB_EOD_MARKER)
 		context->prev_start = sub->start_time;
-
+	str = paraof_ocrtext(sub, " ", 1);
-	if (rect[0].ocr_text && *(rect[0].ocr_text))
+	if (str)
 	{
 		lbl_start_block(ms_start, context);
 		if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER))
 		{
-			char *token = NULL;
+			resub = sbs_append_string(str, ms_start, ms_end, context);
 			token = paraof_ocrtext(sub, " ", 1); // Get text with spaces instead of newlines
 			uint32_t offset=0;
 			utf8proc_ssize_t ls; // Last size
 			char *s = token;
 			int32_t uc;
 			while ((ls=utf8proc_iterate(s, -1, &uc))) 
 			{
 				ccx_sbs_utf8_character sbsc;
 				// Note: We don't care about uc here, since we will be writing the encoded bytes, not the code points in binary.
 				//TODO: Deal with ls < 0
 				if (!uc) // End of string
 					break; 
 				printf("%3ld | %08X | %c %c %c %c\n", ls, uc, ((uc & 0xFF000000) >> 24),  ((uc & 0xFF0000) >> 16), 
 					((uc & 0xFF00) >> 8), ( uc & 0xFF));				
 				sbsc.ch = uc;
 				sbsc.encoded[0] = 0; sbsc.encoded[1] = 0; sbsc.encoded[2] = 0; sbsc.encoded[3] = 0;
 				memcpy(sbsc.encoded, s, ls);
 				sbsc.enc_len = ls;
 				sbsc.ts = 0; // We don't know yet
 				lbl_add_character(context, sbsc);
 				s += ls;				
 				// TO-DO: Add each of these characters to the buffer, splitting the timestamps. Remember to add character length to the array
 			}
 			printf("-------\n");
 			/*
 			while (token)
 			{
 				char *newline_pos = strstr(token, context->encoded_crlf);
 				if (!newline_pos)
 				{
 					fdprintf(context->out->fh, "%s", token);
 					break;
 				}
 				else
 				{
 					while (token != newline_pos)
 					{
 						fdprintf(context->out->fh, "%c", *token);
 						token++;
 					}
 					token += context->encoded_crlf_length;
 					fdprintf(context->out->fh, "%c", ' ');
 				}
 			}*/
 		}
-		lbl_end_block(ms_end, context);
+		freep(&str);
 	}
 	for(i = 0, rect = sub->data; i < sub->nb_data; i++, rect++)
 	{
 		freep(rect->data);
 		freep(rect->data+1);
 	}
 #endif
 	sub->nb_data = 0;
 	freep(&sub->data);
-	return ret;
+	return resub;
 }
--- a/src/lib_ccx/debug_def.h
+++ b/src/lib_ccx/debug_def.h
@ -0,0 +1,11 @@
 #ifndef _DEBUG_DEF_H_
 #define _DEBUG_DEF_H_
 #ifdef DEBUG
 #define LOG_DEBUG(...) printf(__VA_ARGS__)
 #else
 #define LOG_DEBUG ;
 #endif
 #endif
--- a/tests/Makefile
+++ b/tests/Makefile
@ -0,0 +1,59 @@
 SHELL = /bin/sh
 CC=gcc
 # SYS := $(shell gcc -dumpmachine)
 CFLAGS=-O0 -std=gnu99 -D ENABLE_OCR -g -ggdb -rdynamic
 #-Q -da -v
 # enable COVERAGE
 # CFLAGS+=-fprofile-arcs -ftest-coverage
 # add debug flag
 ifdef DEBUG
 CFLAGS+=-DDEBUG
 endif
 #ALL_FLAGS = -Wno-write-strings -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT
 LDFLAGS=-lm -g
 CFLAGS+=$(shell pkg-config --cflags check)
 LDFLAGS+=$(shell pkg-config --libs check)
 # TODO: need to rewrite this. Need new way to load sources for testing
 SRC=$(wildcard ../src/lib_ccx/ccx_encoders_splitbysentence.c)
 OBJS=
 SRC_SUITE=$(wildcard *_suite.c)
 OBJ_SUITE=$(patsubst %_suite.c, %_suite.o, $(SRC_SUITE))
 OBJS+=$(OBJ_SUITE)
 all: clean test
 %.o: %.c
 	# explicit output name :  -o $@
 	$(CC) -c $(ALL_FLAGS) $(CFLAGS) $<
 runtest: $(OBJS)
 	@echo "+----------------------------------------------+"
 	@echo "|                 BUILD TESTS                  |"
 	@echo "+----------------------------------------------+"
 	$(CC) -c $(ALL_FLAGS) $(CFLAGS) $@.c
 	$(CC) $(SRC) $@.o $^ $(ALL_FLAGS) $(CFLAGS) $(LDFLAGS) -o $@
 .PHONY: test
 test: runtest
 	@echo "+----------------------------------------------+"
 	@echo "|                 START TESTS                  |"
 	@echo "+----------------------------------------------+"
 	./runtest
 .PHONY: clean
 clean:
 	rm runtest || true
 	rm *.o || true
 	# coverage info
 	rm *.gcda || true
 	rm *.gcno || true
 	# debug info
 	rm *.c.* || true
--- a/tests/README.md
+++ b/tests/README.md
@ -0,0 +1,43 @@
 # UNIT TESTING
 This folder contains a archetype and several unit-tests for CCExtractor
 ## RUN TESTS
 ```shell
 cd tests
 make
 ```
 This will build and run all test-suite.
 If you want MORE output:
 ```shell
 DEBUG=1 make
 ```
 Where `DEBUG` is just an environment variable.
 ## DEBUGGING
 If tests failed after your changes, you could debug them (almost all flags for this are set in the `tests/Makefile`.
 Run:
 ```shell
 # build test runner
 make
 # load test runner to the debgger:
 gdb runner
 # run under debugger:
 (gdb) run
 # on segfault:
 (gdb) where
 ```
 ## DEPENDENCIES
 Tests are built around this library: [**libcheck**](https://github.com/libcheck/check), here is [**documentation**](https://libcheck.github.io/check/)
--- a/tests/ccx_encoders_splitbysentence_suite.c
+++ b/tests/ccx_encoders_splitbysentence_suite.c
@ -0,0 +1,305 @@
 #include <check.h>
 #include "ccx_encoders_splitbysentence_suite.h"
 // -------------------------------------
 // MOCKS
 // -------------------------------------
 typedef int64_t LLONG;
 #include "../src/lib_ccx/ccx_encoders_common.h"
 // -------------------------------------
 // Private SBS-functions (for testing only)
 // -------------------------------------
 struct cc_subtitle * sbs_append_string(unsigned char * str, LLONG time_from, LLONG time_trim, struct encoder_ctx * context);
 // -------------------------------------
 // Helpers
 // -------------------------------------
 struct cc_subtitle * helper_create_sub(char * str, LLONG time_from, LLONG time_trim)
 {
 	struct cc_subtitle * sub = (struct cc_subtitle *)malloc(sizeof(struct cc_subtitle));
 	sub->type = CC_BITMAP;
 	sub->start_time = 1;
 	sub->end_time = 100;
 	sub->data = strdup(str);
 	sub->nb_data = strlen(sub->data);
 	return sub;
 }
 struct cc_subtitle * helper_sbs_append_string(char * str, LLONG time_from, LLONG time_trim, struct encoder_ctx * context)
 {
 	char * str1;
 	struct cc_subtitle * sub;
 	str1 = strdup(str);
 	sub = sbs_append_string(str1, time_from, time_trim, context);
 	free(str1);
 	return sub;
 }
 // -------------------------------------
 // MOCKS
 // -------------------------------------
 struct encoder_ctx * context;
 void freep(void * obj){
 }
 void fatal(int x, void * obj){
 }
 unsigned char * paraof_ocrtext(void * sub) {
 	// this is OCR -> text converter.
 	// now, in our test cases, we will pass TEXT instead of OCR.
 	// and will return passed text as result
 	return ((struct cc_subtitle *)sub)->data;
 }
 // -------------------------------------
 // TEST preparations
 // -------------------------------------
 void setup(void)
 {
 	context = (struct encoder_ctx *)malloc(sizeof(struct encoder_ctx));
 	context->sbs_buffer = NULL;
 	context->sbs_capacity = 0;
 }
 void teardown(void)
 {
 	free(context);
 }
 // -------------------------------------
 // TESTS
 // -------------------------------------
 START_TEST(test_sbs_one_simple_sentence)
 {
 	struct cc_subtitle * sub = helper_create_sub("Simple sentence.", 1, 100);
 	struct cc_subtitle * out = reformat_cc_bitmap_through_sentence_buffer(sub, context);
 	ck_assert_ptr_ne(out, NULL);
 	ck_assert_str_eq(out->data, "Simple sentence.");
 	ck_assert_ptr_eq(out->next, NULL);
 	ck_assert_ptr_eq(out->prev, NULL);
 }
 END_TEST
 START_TEST(test_sbs_two_sentences_with_rep)
 {
 	struct cc_subtitle * sub1 = helper_create_sub("asdf", 1, 100);
 	struct cc_subtitle * out1 = reformat_cc_bitmap_through_sentence_buffer(sub1, context);
 	ck_assert_ptr_eq(out1, NULL);
 	// second sub:
 	struct cc_subtitle * sub2 = helper_create_sub("asdf Hello.", 101, 200);
 	struct cc_subtitle * out2 = reformat_cc_bitmap_through_sentence_buffer(sub2, context);
 	ck_assert_ptr_ne(out2, NULL);
 	ck_assert_str_eq(out2->data, "asdf Hello.");
 	ck_assert_ptr_eq(out2->next, NULL);
 	ck_assert_ptr_eq(out2->prev, NULL);}
 END_TEST
 START_TEST(test_sbs_append_string_two_separate)
 {
 	unsigned char * test_strings[] = {
 		"First string.",
 		"Second string."
 	};
 	struct cc_subtitle * sub;
 	unsigned char * str;
 	// first string
 	str = strdup(test_strings[0]);
 	sub = NULL;
 	sub = sbs_append_string(str, 1, 20, context);
 	ck_assert_ptr_ne(sub, NULL);
 	ck_assert_str_eq(sub->data, test_strings[0]);
 	ck_assert_int_eq(sub->start_time, 1);
 	ck_assert_int_eq(sub->end_time, 20);
 	// second string:
 	str = strdup(test_strings[1]);
 	sub = NULL;
 	sub = sbs_append_string(str, 21, 40, context);
 	ck_assert_ptr_ne(sub, NULL);
 	ck_assert_str_eq(sub->data, test_strings[1]);
 	ck_assert_int_eq(sub->start_time, 21);
 	ck_assert_int_eq(sub->end_time, 40);
 }
 END_TEST
 START_TEST(test_sbs_append_string_two_with_broken_sentence)
 {
 	// important !!
 	// summary len == 32
 	char * test_strings[] = {
 		"First string",
 		" ends here, deabbea."
 	};
 	struct cc_subtitle * sub;
 	char * str;
 	// first string
 	str = strdup(test_strings[0]);
 	sub = sbs_append_string(str, 1, 3, context);
 	ck_assert_ptr_eq(sub, NULL);
 	// second string:
 	str = strdup(test_strings[1]);
 	sub = sbs_append_string(str, 4, 5, context);
 	ck_assert_ptr_ne(sub, NULL);
 	ck_assert_str_eq(sub->data, "First string ends here, deabbea.");
 	ck_assert_int_eq(sub->start_time, 1);
 	ck_assert_int_eq(sub->end_time, 5);
 }
 END_TEST
 START_TEST(test_sbs_append_string_two_intersecting)
 {
 	char * test_strings[] = {
 		"First string",
 		"First string ends here."
 	};
 	struct cc_subtitle * sub;
 	char * str;
 	// first string
 	str = strdup(test_strings[0]);
 	sub = sbs_append_string(str, 1, 20, context);
 	ck_assert_ptr_eq(sub, NULL);
 	free(sub);
 	// second string:
 	str = strdup(test_strings[1]);
 	//printf("second string: [%s]\n", str);
 	sub = sbs_append_string(str, 21, 40, context);
 	ck_assert_ptr_ne(sub, NULL);
 	ck_assert_str_eq(sub->data, "First string ends here.");
 	ck_assert_int_eq(sub->start_time, 1);
 	ck_assert_int_eq(sub->end_time, 40);
 }
 END_TEST
 START_TEST(test_sbs_append_string_real_data_1)
 {
 	struct cc_subtitle * sub;
 	// 1
 	sub = helper_sbs_append_string("Oleon",
 		1, 0, context);
 	ck_assert_ptr_eq(sub, NULL);
 	// 2
 	sub = helper_sbs_append_string("Oleon costs.",
 		1, 189, context);
 	ck_assert_ptr_ne(sub, NULL);
 	ck_assert_str_eq(sub->data, "Oleon costs.");
 	// 3
 	sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
 Didn't",
 		190, 889, context);
 	ck_assert_ptr_ne(sub, NULL);
 	ck_assert_str_eq(sub->data, "buried in the annex, 95 Oleon costs.");
 	ck_assert_int_eq(sub->start_time, 190);    // = <sub start>
 	ck_assert_int_eq(sub->end_time, 783);      // = <sub start>  +  <available time,889-190=699 > * <sentence alphanum, 28>  /  <sub alphanum, 33>
 	ck_assert_ptr_eq(sub->next, NULL);
 	// 4
 	sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
 Didn't want",
 		890, 1129, context);
 	ck_assert_ptr_eq(sub, NULL);
 	// 5
 	sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
 Didn't want to",
 		1130, 1359, context);
 	ck_assert_ptr_eq(sub, NULL);
 	// 6
 	sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
 Didn't want to acknowledge",
 		1360, 2059, context);
 	ck_assert_ptr_eq(sub, NULL);
 	// 7
 	sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
 Didn't want to acknowledge the",
 		2060, 2299, context);
 	ck_assert_ptr_eq(sub, NULL);
 	// 9
 	sub = helper_sbs_append_string("Didn't want to acknowledge the\n\
 pressures on hospitals, schools and",
 		2300, 5019, context);
 	ck_assert_ptr_eq(sub, NULL);
 	// 13
 	sub = helper_sbs_append_string("pressures on hospitals, schools and\n\
 infrastructure.",
 		5020, 5159, context);
 	ck_assert_ptr_ne(sub, NULL);
 	ck_assert_str_eq(sub->data, "Didn't want to acknowledge the pressures on hospitals, schools and infrastructure.");
 	ck_assert_int_eq(sub->start_time, 784);
 	ck_assert_int_eq(sub->end_time, 5159);
 	ck_assert_ptr_eq(sub->next, NULL);
 	// 14
 	sub = helper_sbs_append_string("pressures on hospitals, schools and\n\
 infrastructure. If",
 		5160, 5529, context);
 	ck_assert_ptr_eq(sub, NULL);
 	// 16
 	sub = helper_sbs_append_string("pressures on hospitals, schools and\n\
 infrastructure. If we go",
 		5530, 6559, context);
 	ck_assert_ptr_eq(sub, NULL);
 	// ck_assert_int_eq(sub->start_time, 1);
 	// ck_assert_int_eq(sub->end_time, 40);
 }
 END_TEST
 Suite * ccx_encoders_splitbysentence_suite(void)
 {
 	Suite *s;
 	TCase *tc_core;
 	s = suite_create("Sentence Buffer");
 	/* Overall tests */
 	tc_core = tcase_create("SB: Overall");
 	tcase_add_checked_fixture(tc_core, setup, teardown);
 	tcase_add_test(tc_core, test_sbs_one_simple_sentence);
 	tcase_add_test(tc_core, test_sbs_two_sentences_with_rep);
 	suite_add_tcase(s, tc_core);
 	/**/
 	TCase *tc_append_string;
 	tc_append_string = tcase_create("SB: append_string");
 	tcase_add_checked_fixture(tc_append_string, setup, teardown);
 	tcase_add_test(tc_append_string, test_sbs_append_string_two_separate);
 	tcase_add_test(tc_append_string, test_sbs_append_string_two_with_broken_sentence);
 	tcase_add_test(tc_append_string, test_sbs_append_string_two_intersecting);
 	tcase_add_test(tc_append_string, test_sbs_append_string_real_data_1);
 	suite_add_tcase(s, tc_append_string);
 	return s;
 }
--- a/tests/ccx_encoders_splitbysentence_suite.h
+++ b/tests/ccx_encoders_splitbysentence_suite.h
@ -0,0 +1,4 @@
 // -------------------------------------
 // SUITE
 // -------------------------------------
 Suite * ccx_encoders_splitbysentence_suite(void);
--- a/tests/runtest.c
+++ b/tests/runtest.c
@ -0,0 +1,21 @@
 #include <check.h>
 // TESTS:
 #include "ccx_encoders_splitbysentence_suite.h"
 int main(void)
 {
 	int number_failed;
 	Suite *s;
 	SRunner *sr;
 	s = ccx_encoders_splitbysentence_suite();
 	sr = srunner_create(s);
 	srunner_set_fork_status(sr, CK_NOFORK);
 	srunner_run_all(sr, CK_NORMAL);
 	number_failed = srunner_ntests_failed(sr);
 	srunner_free(sr);
 	return (number_failed == 0) ? 0 : 1;
 }