From c4a8135b4d111f358d1888f59481a36197bf4d3a Mon Sep 17 00:00:00 2001 From: Anshul Maheshwari Date: Wed, 7 Jan 2015 14:45:47 +0530 Subject: [PATCH 1/3] using language while opening traindata Signed-off-by: Anshul Maheshwari --- src/lib_ccx/608_sami.c | 2 +- src/lib_ccx/608_smptett.c | 2 +- src/lib_ccx/608_srt.c | 2 +- src/lib_ccx/ccx_common_constants.c | 7 ++++++- src/lib_ccx/ccx_common_constants.h | 3 ++- src/lib_ccx/ccx_encoders_common.c | 2 +- src/lib_ccx/ocr.c | 11 +++++++++-- src/lib_ccx/ocr.h | 2 +- src/lib_ccx/spupng_encoder.c | 2 +- 9 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/lib_ccx/608_sami.c b/src/lib_ccx/608_sami.c index 61785001..ee720b56 100644 --- a/src/lib_ccx/608_sami.c +++ b/src/lib_ccx/608_sami.c @@ -191,7 +191,7 @@ int write_cc_bitmap_as_sami(struct cc_subtitle *sub, struct encoder_ctx *context mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data[1],rect[0].nb_colors); quantize_map(alpha, palette, pbuf, width*height, 3, rect[0].nb_colors); #ifdef ENABLE_OCR - str = ocr_bitmap(palette,alpha,pbuf,width,height); + str = ocr_bitmap(palette, alpha, pbuf, width, height, sub->lang_index); if(str && str[0]) { if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER)) diff --git a/src/lib_ccx/608_smptett.c b/src/lib_ccx/608_smptett.c index ee11c5e8..39c5c912 100644 --- a/src/lib_ccx/608_smptett.c +++ b/src/lib_ccx/608_smptett.c @@ -217,7 +217,7 @@ int write_cc_bitmap_as_smptett(struct cc_subtitle *sub, struct encoder_ctx *cont mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data[1],rect[0].nb_colors); quantize_map(alpha, palette, pbuf, width*height, 3, rect[0].nb_colors); #ifdef ENABLE_OCR - str = ocr_bitmap(palette,alpha,pbuf,width,height); + str = ocr_bitmap(palette, alpha, pbuf, width, height, sub->lang_index); if(str && str[0]) { if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER)) diff --git a/src/lib_ccx/608_srt.c b/src/lib_ccx/608_srt.c index ed703dae..be5a68a1 100644 --- a/src/lib_ccx/608_srt.c +++ b/src/lib_ccx/608_srt.c @@ -185,7 +185,7 @@ int write_cc_bitmap_as_srt(struct cc_subtitle *sub, struct encoder_ctx *context) mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data[1],rect[0].nb_colors); quantize_map(alpha, palette, pbuf, width*height, 3, rect[0].nb_colors); #ifdef ENABLE_OCR - str = ocr_bitmap(palette,alpha,pbuf,width,height); + str = ocr_bitmap(palette, alpha, pbuf, width, height, sub->lang_index); if(str && str[0]) { if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER)) diff --git a/src/lib_ccx/ccx_common_constants.c b/src/lib_ccx/ccx_common_constants.c index 1f8b70a8..137a40a5 100644 --- a/src/lib_ccx/ccx_common_constants.c +++ b/src/lib_ccx/ccx_common_constants.c @@ -124,10 +124,15 @@ enum DTVCC_PACKET_START = 3, }; -const char *language[4] = +/** + * After Adding a new language here, dont forget + * to increase NB_LANGUAGE define ccx_common_constants.h + */ +const char *language[NB_LANGUAGE] = { "und", "eng", "fin", + "spa", NULL }; diff --git a/src/lib_ccx/ccx_common_constants.h b/src/lib_ccx/ccx_common_constants.h index 8867d2c1..d63c5a42 100644 --- a/src/lib_ccx/ccx_common_constants.h +++ b/src/lib_ccx/ccx_common_constants.h @@ -267,5 +267,6 @@ enum ccx_code_type #define CCX_TXT_AUTO_NOT_YET_FOUND 1 #define CCX_TXT_IN_USE 2 // Positive autodetected, or forced, etc -extern const char *language[4]; +#define NB_LANGUAGE 5 +extern const char *language[NB_LANGUAGE]; #endif diff --git a/src/lib_ccx/ccx_encoders_common.c b/src/lib_ccx/ccx_encoders_common.c index 2adfa866..03782b21 100644 --- a/src/lib_ccx/ccx_encoders_common.c +++ b/src/lib_ccx/ccx_encoders_common.c @@ -338,7 +338,7 @@ int write_cc_bitmap_as_transcript(struct cc_subtitle *sub, struct encoder_ctx *c mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data[1],rect[0].nb_colors); quantize_map(alpha, palette, pbuf, width*height, 3, rect[0].nb_colors); #ifdef ENABLE_OCR - str = ocr_bitmap(palette,alpha,pbuf,width,height); + str = ocr_bitmap(palette, alpha, pbuf, width, height, sub->lang_index); if(str && str[0]) { if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER)) diff --git a/src/lib_ccx/ocr.c b/src/lib_ccx/ocr.c index 1c364abf..52284725 100644 --- a/src/lib_ccx/ocr.c +++ b/src/lib_ccx/ocr.c @@ -3,9 +3,10 @@ #ifdef ENABLE_OCR #include "platform.h" #include "capi.h" +#include "ccx_common_constants.h" #include "allheaders.h" -char* ocr_bitmap(png_color *palette,png_byte *alpha, unsigned char* indata,int w, int h) +char* ocr_bitmap(png_color *palette,png_byte *alpha, unsigned char* indata,int w, int h, int lang_index) { TessBaseAPI* api; PIX *pix; @@ -37,7 +38,13 @@ char* ocr_bitmap(png_color *palette,png_byte *alpha, unsigned char* indata,int w } } - ret = TessBaseAPIInit3(api,"", "eng"); + if(lang_index == 0) + { + /* select english */ + lang_index = 1; + } + //ret = TessBaseAPIInit3(api,"", language[lang_index]); + ret = TessBaseAPIInit3(api,"", "foo"); if(ret < 0) { return NULL; diff --git a/src/lib_ccx/ocr.h b/src/lib_ccx/ocr.h index 032d640c..76252cb2 100644 --- a/src/lib_ccx/ocr.h +++ b/src/lib_ccx/ocr.h @@ -1,6 +1,6 @@ #ifndef OCR_H #define OCR_H #include -char* ocr_bitmap(png_color *palette,png_byte *alpha, unsigned char* indata,int w, int h); +char* ocr_bitmap(png_color *palette,png_byte *alpha, unsigned char* indata,int w, int h,int lang_index); #endif diff --git a/src/lib_ccx/spupng_encoder.c b/src/lib_ccx/spupng_encoder.c index d42cf479..7c3c9de9 100644 --- a/src/lib_ccx/spupng_encoder.c +++ b/src/lib_ccx/spupng_encoder.c @@ -734,7 +734,7 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte /* TODO do rectangle wise, one color table should not be used for all rectangles */ mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data[1],rect[0].nb_colors); quantize_map(alpha, palette, pbuf, width*height, 3, rect[0].nb_colors); - str = ocr_bitmap(palette,alpha,pbuf,width,height); + str = ocr_bitmap(palette, alpha, pbuf, width, height,sub->lang_index); if(str && str[0]) { write_spucomment(sp,str); From a83c686f6a3ecaa55be53163107a8b96d8afa906 Mon Sep 17 00:00:00 2001 From: Anshul Maheshwari Date: Wed, 7 Jan 2015 18:49:53 +0530 Subject: [PATCH 2/3] Remove multi warning when traindata not present --- src/lib_ccx/608_sami.c | 112 ++--------- src/lib_ccx/608_smptett.c | 100 +--------- src/lib_ccx/608_srt.c | 95 +-------- src/lib_ccx/ccx_decoders_structs.h | 3 + src/lib_ccx/ccx_encoders_common.c | 94 +-------- src/lib_ccx/dvb_subtitle_decoder.c | 16 ++ src/lib_ccx/ocr.c | 300 +++++++++++++++++++++++++++-- src/lib_ccx/ocr.h | 5 +- src/lib_ccx/spupng_encoder.c | 188 +----------------- src/lib_ccx/spupng_encoder.h | 2 - 10 files changed, 342 insertions(+), 573 deletions(-) diff --git a/src/lib_ccx/608_sami.c b/src/lib_ccx/608_sami.c index ee720b56..20aee2d9 100644 --- a/src/lib_ccx/608_sami.c +++ b/src/lib_ccx/608_sami.c @@ -80,30 +80,9 @@ void write_stringz_as_sami(char *string, struct encoder_ctx *context, LLONG ms_s int write_cc_bitmap_as_sami(struct cc_subtitle *sub, struct encoder_ctx *context) { - struct spupng_t *sp = (struct spupng_t *)context->out->spupng_data; - int x_pos, y_pos, width, height, i; - int x, y, y_off, x_off, ret; - uint8_t *pbuf; - //char *filename; + int ret = 0; struct cc_bitmap* rect; - png_color *palette = NULL; - png_byte *alpha = NULL; -#ifdef ENABLE_OCR - char*str = NULL; -#endif - //int used; -#ifdef ENABLE_OCR - unsigned h1, m1, s1, ms1; - unsigned h2, m2, s2, ms2; -#endif LLONG ms_start, ms_end; - //char timeline[128]; - int len = 0; - - x_pos = -1; - y_pos = -1; - width = 0; - height = 0; if (context->prev_start != -1 && (sub->flags & SUB_EOD_MARKER)) { @@ -124,110 +103,45 @@ int write_cc_bitmap_as_sami(struct cc_subtitle *sub, struct encoder_ctx *context if(sub->nb_data == 0 ) return 0; rect = sub->data; - for(i = 0;i < sub->nb_data;i++) - { - if(x_pos == -1) - { - x_pos = rect[i].x; - y_pos = rect[i].y; - width = rect[i].w; - height = rect[i].h; - } - else - { - if(x_pos > rect[i].x) - { - width += (x_pos - rect[i].x); - x_pos = rect[i].x; - } - if (rect[i].y < y_pos) - { - height += (y_pos - rect[i].y); - y_pos = rect[i].y; - } - - if (rect[i].x + rect[i].w > x_pos + width) - { - width = rect[i].x + rect[i].w - x_pos; - } - - if (rect[i].y + rect[i].h > y_pos + height) - { - height = rect[i].y + rect[i].h - y_pos; - } - - } - } if ( sub->flags & SUB_EOD_MARKER ) context->prev_start = sub->start_time; - pbuf = (uint8_t*) malloc(width * height); - memset(pbuf, 0x0, width * height); - for(i = 0;i < sub->nb_data;i++) - { - x_off = rect[i].x - x_pos; - y_off = rect[i].y - y_pos; - for (y = 0; y < rect[i].h; y++) - { - for (x = 0; x < rect[i].w; x++) - pbuf[((y + y_off) * width) + x_off + x] = rect[i].data[0][y * rect[i].w + x]; - - } - } - palette = (png_color*) malloc(rect[0].nb_colors * sizeof(png_color)); - if(!palette) - { - ret = -1; - goto end; - } - alpha = (png_byte*) malloc(rect[0].nb_colors * sizeof(png_byte)); - if(!alpha) - { - ret = -1; - goto end; - } - /* TODO do rectangle, wise one color table should not be used for all rectangle */ - mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data[1],rect[0].nb_colors); - quantize_map(alpha, palette, pbuf, width*height, 3, rect[0].nb_colors); -#ifdef ENABLE_OCR - str = ocr_bitmap(palette, alpha, pbuf, width, height, sub->lang_index); - if(str && str[0]) +#if ENABLE_OCR + if (rect[0].ocr_text && *(rect[0].ocr_text)) { if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER)) { char *token = NULL; - sprintf(context->buffer, + char *buf = (char*)context->buffer; + sprintf(buf, "

\r\n" ,(unsigned long long)ms_start); - write(context->out->fh,context->buffer,strlen(context->buffer)); - token = strtok(str,"\r\n"); + write(context->out->fh, buf, strlen(buf)); + token = strtok(rect[0].ocr_text,"\r\n"); while (token) { - sprintf(context->buffer,"%s",token); + sprintf(buf, "%s", token); token = strtok(NULL,"\r\n"); if(token) - strcat(context->buffer,"
\n"); + strcat(buf, "
\n"); else - strcat(context->buffer,"\n"); - write(context->out->fh,context->buffer,strlen(context->buffer)); + strcat(buf, "\n"); + write(context->out->fh, buf, strlen(buf)); } - sprintf(context->buffer, + sprintf(buf, "

 

\r\n\r\n" ,(unsigned long long)ms_end); - write(context->out->fh,context->buffer,strlen(context->buffer)); + write(context->out->fh, buf, strlen(buf)); } } #endif -end: sub->nb_data = 0; freep(&sub->data); - freep(&palette); - freep(&alpha); return ret; } diff --git a/src/lib_ccx/608_smptett.c b/src/lib_ccx/608_smptett.c index 39c5c912..0f031727 100644 --- a/src/lib_ccx/608_smptett.c +++ b/src/lib_ccx/608_smptett.c @@ -106,30 +106,12 @@ void write_stringz_as_smptett(char *string, struct encoder_ctx *context, LLONG m int write_cc_bitmap_as_smptett(struct cc_subtitle *sub, struct encoder_ctx *context) { - struct spupng_t *sp = (struct spupng_t *)context->out->spupng_data; - int x_pos, y_pos, width, height, i; - int x, y, y_off, x_off, ret; - uint8_t *pbuf; - //char *filename; + int ret = 0; struct cc_bitmap* rect; - png_color *palette = NULL; - png_byte *alpha = NULL; -#ifdef ENABLE_OCR - char*str = NULL; -#endif - //int used; -#ifdef ENABLE_OCR - unsigned h1, m1, s1, ms1; - unsigned h2, m2, s2, ms2; -#endif LLONG ms_start, ms_end; //char timeline[128]; int len = 0; - x_pos = -1; - y_pos = -1; - width = 0; - height = 0; if (context->prev_start != -1 && (sub->flags & SUB_EOD_MARKER)) { @@ -150,96 +132,34 @@ int write_cc_bitmap_as_smptett(struct cc_subtitle *sub, struct encoder_ctx *cont if(sub->nb_data == 0 ) return 0; rect = sub->data; - for(i = 0;i < sub->nb_data;i++) - { - if(x_pos == -1) - { - x_pos = rect[i].x; - y_pos = rect[i].y; - width = rect[i].w; - height = rect[i].h; - } - else - { - if(x_pos > rect[i].x) - { - width += (x_pos - rect[i].x); - x_pos = rect[i].x; - } - if (rect[i].y < y_pos) - { - height += (y_pos - rect[i].y); - y_pos = rect[i].y; - } - - if (rect[i].x + rect[i].w > x_pos + width) - { - width = rect[i].x + rect[i].w - x_pos; - } - - if (rect[i].y + rect[i].h > y_pos + height) - { - height = rect[i].y + rect[i].h - y_pos; - } - - } - } if ( sub->flags & SUB_EOD_MARKER ) context->prev_start = sub->start_time; - pbuf = (uint8_t*) malloc(width * height); - memset(pbuf, 0x0, width * height); - for(i = 0;i < sub->nb_data;i++) - { - x_off = rect[i].x - x_pos; - y_off = rect[i].y - y_pos; - for (y = 0; y < rect[i].h; y++) - { - for (x = 0; x < rect[i].w; x++) - pbuf[((y + y_off) * width) + x_off + x] = rect[i].data[0][y * rect[i].w + x]; - - } - } - palette = (png_color*) malloc(rect[0].nb_colors * sizeof(png_color)); - if(!palette) - { - ret = -1; - goto end; - } - alpha = (png_byte*) malloc(rect[0].nb_colors * sizeof(png_byte)); - if(!alpha) - { - ret = -1; - goto end; - } - /* TODO do rectangle, wise one color table should not be used for all rectangle */ - mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data[1],rect[0].nb_colors); - quantize_map(alpha, palette, pbuf, width*height, 3, rect[0].nb_colors); #ifdef ENABLE_OCR - str = ocr_bitmap(palette, alpha, pbuf, width, height, sub->lang_index); - if(str && str[0]) + if (rect[0].ocr_text && *(rect[0].ocr_text)) { if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER)) { + char *buf = (char *) context->buffer; + unsigned h1, m1, s1, ms1; + unsigned h2, m2, s2, ms2; mstotime (ms_start,&h1,&m1,&s1,&ms1); mstotime (ms_end-1,&h2,&m2,&s2,&ms2); // -1 To prevent overlapping with next line. sprintf ((char *) context->buffer,"

\n",h1,m1,s1,ms1, h2,m2,s2,ms2); - write (context->out->fh, context->buffer,strlen(context->buffer) ); - len = strlen(str); - write (context->out->fh, str, len); + write (context->out->fh, buf,strlen(buf) ); + len = strlen(rect[0].ocr_text); + write (context->out->fh, rect[0].ocr_text, len); write (context->out->fh, encoded_crlf, encoded_crlf_length); - sprintf ((char *) str,"

\n"); + sprintf ( buf,"

\n"); + write (context->out->fh, buf,strlen(buf) ); } } #endif -end: sub->nb_data = 0; freep(&sub->data); - freep(&palette); - freep(&alpha); return ret; } diff --git a/src/lib_ccx/608_srt.c b/src/lib_ccx/608_srt.c index be5a68a1..d744774f 100644 --- a/src/lib_ccx/608_srt.c +++ b/src/lib_ccx/608_srt.c @@ -1,9 +1,6 @@ #include "lib_ccx.h" #include "ccx_common_option.h" #include "ccx_encoders_common.h" -#include "png.h" -#include "spupng_encoder.h" -#include "ocr.h" #include "utility.h" /* The timing here is not PTS based, but output based, i.e. user delay must be accounted for @@ -75,16 +72,8 @@ void write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_st int write_cc_bitmap_as_srt(struct cc_subtitle *sub, struct encoder_ctx *context) { - int x_pos, y_pos, width, height, i; - int x, y, y_off, x_off, ret; - uint8_t *pbuf; - //char *filename; + int ret = 0; struct cc_bitmap* rect; - png_color *palette = NULL; - png_byte *alpha = NULL; -#ifdef ENABLE_OCR - char*str = NULL; -#endif LLONG ms_start, ms_end; #ifdef ENABLE_OCR unsigned h1,m1,s1,ms1; @@ -94,11 +83,6 @@ int write_cc_bitmap_as_srt(struct cc_subtitle *sub, struct encoder_ctx *context) int used; #endif - x_pos = -1; - y_pos = -1; - width = 0; - height = 0; - if (context->prev_start != -1 && (sub->flags & SUB_EOD_MARKER)) { ms_start = context->prev_start; @@ -118,75 +102,8 @@ int write_cc_bitmap_as_srt(struct cc_subtitle *sub, struct encoder_ctx *context) if(sub->nb_data == 0 ) return 0; rect = sub->data; - for(i = 0;i < sub->nb_data;i++) - { - if(x_pos == -1) - { - x_pos = rect[i].x; - y_pos = rect[i].y; - width = rect[i].w; - height = rect[i].h; - } - else - { - if(x_pos > rect[i].x) - { - width += (x_pos - rect[i].x); - x_pos = rect[i].x; - } - - if (rect[i].y < y_pos) - { - height += (y_pos - rect[i].y); - y_pos = rect[i].y; - } - - if (rect[i].x + rect[i].w > x_pos + width) - { - width = rect[i].x + rect[i].w - x_pos; - } - - if (rect[i].y + rect[i].h > y_pos + height) - { - height = rect[i].y + rect[i].h - y_pos; - } - - } - } - if ( sub->flags & SUB_EOD_MARKER ) - context->prev_start = sub->start_time; - pbuf = (uint8_t*) malloc(width * height); - memset(pbuf, 0x0, width * height); - - for(i = 0;i < sub->nb_data;i++) - { - x_off = rect[i].x - x_pos; - y_off = rect[i].y - y_pos; - for (y = 0; y < rect[i].h; y++) - { - for (x = 0; x < rect[i].w; x++) - pbuf[((y + y_off) * width) + x_off + x] = rect[i].data[0][y * rect[i].w + x]; - - } - } - palette = (png_color*) malloc(rect[0].nb_colors * sizeof(png_color)); - if(!palette) - { - ret = -1; - goto end; - } - alpha = (png_byte*) malloc(rect[0].nb_colors * sizeof(png_byte)); - if(!alpha) - { - ret = -1; - goto end; - } - /* TODO do rectangle, wise one color table should not be used for all rectangle */ - mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data[1],rect[0].nb_colors); - quantize_map(alpha, palette, pbuf, width*height, 3, rect[0].nb_colors); #ifdef ENABLE_OCR - str = ocr_bitmap(palette, alpha, pbuf, width, height, sub->lang_index); - if(str && str[0]) + if (rect[0].ocr_text && *(rect[0].ocr_text)) { if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER)) { @@ -200,18 +117,14 @@ int write_cc_bitmap_as_srt(struct cc_subtitle *sub, struct encoder_ctx *context) h1,m1,s1,ms1, h2,m2,s2,ms2); used = encode_line(context->buffer,(unsigned char *) timeline); write (context->out->fh, context->buffer, used); - len = strlen(str); - write (context->out->fh, str, len); + len = strlen(rect[0].ocr_text); + write (context->out->fh, rect[0].ocr_text, len); write (context->out->fh, encoded_crlf, encoded_crlf_length); } } #endif - -end: sub->nb_data = 0; freep(&sub->data); - freep(&palette); - freep(&alpha); return ret; } diff --git a/src/lib_ccx/ccx_decoders_structs.h b/src/lib_ccx/ccx_decoders_structs.h index a446c831..8172494d 100644 --- a/src/lib_ccx/ccx_decoders_structs.h +++ b/src/lib_ccx/ccx_decoders_structs.h @@ -19,6 +19,9 @@ struct cc_bitmap int nb_colors; unsigned char *data[2]; int linesize[2]; +#ifdef ENABLE_OCR + char *ocr_text; +#endif }; enum ccx_eia608_format diff --git a/src/lib_ccx/ccx_encoders_common.c b/src/lib_ccx/ccx_encoders_common.c index 03782b21..bb009946 100644 --- a/src/lib_ccx/ccx_encoders_common.c +++ b/src/lib_ccx/ccx_encoders_common.c @@ -232,30 +232,14 @@ int write_cc_buffer_as_transcript2(struct eia608_screen *data, struct encoder_ct } int write_cc_bitmap_as_transcript(struct cc_subtitle *sub, struct encoder_ctx *context) { - struct spupng_t *sp = (struct spupng_t *)context->out->spupng_data; - int x_pos, y_pos, width, height, i; - int x, y, y_off, x_off, ret; - uint8_t *pbuf; - //char *filename; + int ret = 0; struct cc_bitmap* rect; - png_color *palette = NULL; - png_byte *alpha = NULL; -#ifdef ENABLE_OCR - char*str = NULL; -#endif - //int used; + #ifdef ENABLE_OCR unsigned h1,m1,s1,ms1; unsigned h2,m2,s2,ms2; #endif LLONG start_time, end_time; - //char timeline[128]; - int len = 0; - - x_pos = -1; - y_pos = -1; - width = 0; - height = 0; if (context->prev_start != -1 && (sub->flags & SUB_EOD_MARKER)) { @@ -269,82 +253,19 @@ int write_cc_bitmap_as_transcript(struct cc_subtitle *sub, struct encoder_ctx *c } if(sub->nb_data == 0 ) - return 0; + return ret; rect = sub->data; - for(i = 0;i < sub->nb_data;i++) - { - if(x_pos == -1) - { - x_pos = rect[i].x; - y_pos = rect[i].y; - width = rect[i].w; - height = rect[i].h; - } - else - { - if(x_pos > rect[i].x) - { - width += (x_pos - rect[i].x); - x_pos = rect[i].x; - } - if (rect[i].y < y_pos) - { - height += (y_pos - rect[i].y); - y_pos = rect[i].y; - } - - if (rect[i].x + rect[i].w > x_pos + width) - { - width = rect[i].x + rect[i].w - x_pos; - } - - if (rect[i].y + rect[i].h > y_pos + height) - { - height = rect[i].y + rect[i].h - y_pos; - } - - } - } if ( sub->flags & SUB_EOD_MARKER ) context->prev_start = sub->start_time; - pbuf = (uint8_t*) malloc(width * height); - memset(pbuf, 0x0, width * height); - for(i = 0;i < sub->nb_data;i++) - { - x_off = rect[i].x - x_pos; - y_off = rect[i].y - y_pos; - for (y = 0; y < rect[i].h; y++) - { - for (x = 0; x < rect[i].w; x++) - pbuf[((y + y_off) * width) + x_off + x] = rect[i].data[0][y * rect[i].w + x]; - - } - } - palette = (png_color*) malloc(rect[0].nb_colors * sizeof(png_color)); - if(!palette) - { - ret = -1; - goto end; - } - alpha = (png_byte*) malloc(rect[0].nb_colors * sizeof(png_byte)); - if(!alpha) - { - ret = -1; - goto end; - } - /* TODO do rectangle, wise one color table should not be used for all rectangle */ - mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data[1],rect[0].nb_colors); - quantize_map(alpha, palette, pbuf, width*height, 3, rect[0].nb_colors); -#ifdef ENABLE_OCR - str = ocr_bitmap(palette, alpha, pbuf, width, height, sub->lang_index); - if(str && str[0]) +#if ENABLE_OCR + if (rect[0].ocr_text && *(rect[0].ocr_text)) { if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER)) { char *token = NULL; - token = strtok(str,"\r\n"); + token = strtok(rect[0].ocr_text ,"\r\n"); while (token) { @@ -402,11 +323,8 @@ int write_cc_bitmap_as_transcript(struct cc_subtitle *sub, struct encoder_ctx *c } #endif -end: sub->nb_data = 0; freep(&sub->data); - freep(&palette); - freep(&alpha); return ret; } diff --git a/src/lib_ccx/dvb_subtitle_decoder.c b/src/lib_ccx/dvb_subtitle_decoder.c index 5dc9b7d9..859c6f31 100644 --- a/src/lib_ccx/dvb_subtitle_decoder.c +++ b/src/lib_ccx/dvb_subtitle_decoder.c @@ -25,6 +25,7 @@ #include "dvb_subtitle_decoder.h" #include "utility.h" #include "ccx_decoders_common.h" +#include "ocr.h" #define DVBSUB_PAGE_SEGMENT 0x10 #define DVBSUB_REGION_SEGMENT 0x11 @@ -272,6 +273,9 @@ typedef struct DVBSubContext int lang_index; int version; int time_out; +#ifdef ENABLE_OCR + void *ocr_ctx; +#endif DVBSubRegion *region_list; DVBSubCLUT *clut_list; DVBSubObject *object_list; @@ -429,6 +433,9 @@ void* dvbsub_init_decoder(struct dvb_config* cfg) ctx->ancillary_id = cfg->ancillary_id[0]; ctx->lang_index = cfg->lang_index[0]; +#ifdef ENABLE_OCR + ctx->ocr_ctx = init_ocr(ctx->lang_index); +#endif ctx->version = -1; default_clut.id = -1; @@ -1425,6 +1432,7 @@ static int write_dvb_sub(void *dvb_ctx, struct cc_subtitle *sub) struct cc_bitmap *rect = NULL; uint32_t *clut_table; int offset_x=0, offset_y=0; + int ret = 0; sub->type = CC_BITMAP; sub->lang_index = ctx->lang_index; @@ -1458,6 +1466,9 @@ static int write_dvb_sub(void *dvb_ctx, struct cc_subtitle *sub) sub->data = rect; for (display = ctx->display_list; display; display = display->next) { +#ifdef ENABLE_OCR + char *ocr_str = NULL; +#endif region = get_region(ctx, display->region_id); if (!region) @@ -1497,6 +1508,11 @@ static int write_dvb_sub(void *dvb_ctx, struct cc_subtitle *sub) rect->data[0] = malloc(region->buf_size); memcpy(rect->data[0], region->pbuf, region->buf_size); +#ifdef ENABLE_OCR + ret = ocr_rect(ctx->ocr_ctx, rect, &ocr_str); + if(ret >= 0) + rect->ocr_text = ocr_str; +#endif rect++; } diff --git a/src/lib_ccx/ocr.c b/src/lib_ccx/ocr.c index 52284725..3f9c4466 100644 --- a/src/lib_ccx/ocr.c +++ b/src/lib_ccx/ocr.c @@ -5,17 +5,113 @@ #include "capi.h" #include "ccx_common_constants.h" #include "allheaders.h" +#include +#include "spupng_encoder.h" -char* ocr_bitmap(png_color *palette,png_byte *alpha, unsigned char* indata,int w, int h, int lang_index) +struct ocrCtx { TessBaseAPI* api; +}; + +struct transIntensity +{ + uint8_t *t; + png_color *palette; +}; +static int check_trans_tn_intensity(const void *p1, const void *p2, void *arg) +{ + struct transIntensity *ti = arg; + unsigned char* tmp = (unsigned char*)p1; + unsigned char* act = (unsigned char*)p2; + unsigned char tmp_i; + unsigned char act_i; + /** TODO verify that RGB follow ITU-R BT.709 + * Below fomula is valid only for 709 standurd + * Y = 0.2126 R + 0.7152 G + 0.0722 B + */ + tmp_i = (0.2126 * ti->palette[*tmp].red) + (0.7152 * ti->palette[*tmp].green) + (0.0722 * ti->palette[*tmp].blue); + act_i = (0.2126 * ti->palette[*act].red) + (0.7152 * ti->palette[*act].green) + (0.0722 * ti->palette[*act].blue);; + + if (ti->t[*tmp] < ti->t[*act] || (ti->t[*tmp] == ti->t[*act] && tmp_i < act_i)) + return -1; + else if (ti->t[*tmp] == ti->t[*act] && tmp_i == act_i) + return 0; + + + return 1; +} + +static int search_language_pack(const char *dirname,const char *lang) +{ + DIR *dp; + struct dirent *dirp; + char filename[256]; + if ((dp = opendir(dirname)) == NULL) + { + return -1; + } + snprintf(filename, 256, "%s.traineddata",lang); + while ((dirp = readdir(dp)) != NULL) + { + if(!strcmp(dirp->d_name, filename)) + { + closedir(dp); + return 0; + } + } + closedir(dp); + return -1; +} + +static void delete_ocr (struct ocrCtx* ctx) +{ + TessBaseAPIEnd(ctx->api); + TessBaseAPIDelete(ctx->api); + freep(ctx); +} +void* init_ocr(int lang_index) +{ + int ret; + struct ocrCtx* ctx; + + ctx = (struct ocrCtx*)malloc(sizeof(struct ocrCtx)); + if(!ctx) + return NULL; + ctx->api = TessBaseAPICreate(); + + /* if language was undefined use english */ + if(lang_index == 0) + { + /* select english */ + lang_index = 1; + } + + /* if langauge pack not found use english */ + ret = search_language_pack("tessdata",language[lang_index]); + if(ret < 0 ) + { + /* select english */ + lang_index = 1; + } + ret = TessBaseAPIInit3(ctx->api,"", language[lang_index]); + if(ret < 0) + { + goto fail; + } + return ctx; +fail: + delete_ocr(ctx); + return NULL; + +} +char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* indata,int w, int h) +{ PIX *pix; char*text_out= NULL; - int i,j,index,ret; + int i,j,index; unsigned int wpl; unsigned int *data,*ppixel; - api = TessBaseAPICreate(); - + struct ocrCtx* ctx = arg; pix = pixCreate(w, h, 32); if(pix == NULL) { @@ -38,24 +134,192 @@ char* ocr_bitmap(png_color *palette,png_byte *alpha, unsigned char* indata,int w } } - if(lang_index == 0) - { - /* select english */ - lang_index = 1; - } - //ret = TessBaseAPIInit3(api,"", language[lang_index]); - ret = TessBaseAPIInit3(api,"", "foo"); - if(ret < 0) - { - return NULL; - } - - //text_out = TessBaseAPIProcessPages(api, "/home/anshul/test_videos/dvbsubtest.d/sub0018.png", 0, 0); - text_out = TessBaseAPIProcessPage(api, pix, 0, NULL, NULL, 0); + text_out = TessBaseAPIProcessPage(ctx->api, pix, 0, NULL, NULL, 0); if(!text_out) printf("\nsomething messy\n"); + + //TessDeleteText(text_out); + pixDestroy(&pix); + return text_out; } +/* + * @param alpha out + * @param intensity in + * @param palette out should be already initialized + * @param bitmap in + * @param size in size of bitmap + * @param max_color in + * @param nb_color in + */ +static int quantize_map(png_byte *alpha, png_color *palette, + uint8_t *bitmap, int size, int max_color, int nb_color) +{ + /* + * occurrence of color in image + */ + uint32_t *histogram = NULL; + /* intensity ordered table */ + uint8_t *iot = NULL; + /* array of color with most occurrence according to histogram + * save index of intensity order table + */ + uint32_t *mcit = NULL; + struct transIntensity ti = { alpha,palette}; + + int ret = 0; + + histogram = (uint32_t*) malloc(nb_color * sizeof(uint32_t)); + if (!histogram) + { + ret = -1; + goto end; + } + + iot = (uint8_t*) malloc(nb_color * sizeof(uint8_t)); + if (!iot) + { + ret = -1; + goto end; + } + + mcit = (uint32_t*) malloc(nb_color * sizeof(uint32_t)); + if (!mcit) + { + ret = -1; + goto end; + } + + memset(histogram, 0, nb_color * sizeof(uint32_t)); + + /* initializing intensity ordered table with serial order of unsorted color table */ + for (int i = 0; i < nb_color; i++) + { + iot[i] = i; + } + memset(mcit, 0, nb_color * sizeof(uint32_t)); + + /* calculate histogram of image */ + for (int i = 0; i < size; i++) + { + histogram[bitmap[i]]++; + } + /* sorted in increasing order of intensity */ + shell_sort((void*)iot, nb_color, sizeof(*iot), check_trans_tn_intensity, (void*)&ti); + +#if OCR_DEBUG + ccx_common_logging.log_ftn("Intensity ordered table\n"); + for (int i = 0; i < nb_color; i++) + { + ccx_common_logging.log_ftn("%02d) map %02d hist %02d\n", + i, iot[i], histogram[iot[i]]); + } +#endif + /** + * using selection sort since need to find only max_color + * Hostogram becomes invalid in this loop + */ + for (int i = 0; i < max_color; i++) + { + uint32_t max_val = 0; + uint32_t max_ind = 0; + int j; + for (j = 0; j < nb_color; j++) + { + if (max_val < histogram[iot[j]]) + { + max_val = histogram[iot[j]]; + max_ind = j; + } + } + for (j = i; j > 0 && max_ind < mcit[j - 1]; j--) + { + mcit[j] = mcit[j - 1]; + } + mcit[j] = max_ind; + histogram[iot[max_ind]] = 0; + } + +#if OCR_DEBUG + ccx_common_logging.log_ftn("max redundant intensities table\n"); + for (int i = 0; i < max_color; i++) + { + ccx_common_logging.log_ftn("%02d) mcit %02d\n", + i, mcit[i]); + } +#endif + for (int i = 0, mxi = 0; i < nb_color; i++) + { + int step, inc; + if (i == mcit[mxi]) + { + mxi = (mxi < max_color) ? mxi + 1 : mxi; + continue; + } + inc = (mxi) ? -1 : 0; + step = mcit[mxi + inc] + ((mcit[mxi] - mcit[mxi + inc]) / 2); + if (i <= step) + { + int index = iot[mcit[mxi + inc]]; + alpha[iot[i]] = alpha[index]; + palette[iot[i]].red = palette[index].red; + palette[iot[i]].blue = palette[index].blue; + palette[iot[i]].green = palette[index].green; + } + else + { + int index = iot[mcit[mxi]]; + alpha[iot[i]] = alpha[index]; + palette[iot[i]].red = palette[index].red; + palette[iot[i]].blue = palette[index].blue; + palette[iot[i]].green = palette[index].green; + } + + } +#if OCR_DEBUG + ccx_common_logging.log_ftn("Colors present in quantized Image\n"); + for (int i = 0; i < nb_color; i++) + { + ccx_common_logging.log_ftn("%02d)r %03d g %03d b %03d a %03d\n", + i, palette[i].red, palette[i].green, palette[i].blue, alpha[i]); + } +#endif + end: freep(&histogram); + freep(&mcit); + freep(&iot); + return ret; +} + +int ocr_rect(void* arg, struct cc_bitmap *rect, char **str) +{ + int ret = 0; + png_color *palette = NULL; + png_byte *alpha = NULL; + + palette = (png_color*) malloc(rect[0].nb_colors * sizeof(png_color)); + if(!palette) + { + ret = -1; + goto end; + } + alpha = (png_byte*) malloc(rect[0].nb_colors * sizeof(png_byte)); + if(!alpha) + { + ret = -1; + goto end; + } + + /* TODO do rectangle wise, one color table should not be used for all rectangles */ + mapclut_paletee(palette, alpha, (uint32_t *)rect->data[1],rect->nb_colors); + + quantize_map(alpha, palette, rect->data[0], rect->w * rect->h, 3, rect->nb_colors); + *str = ocr_bitmap(arg, palette, alpha, rect->data[0], rect->w, rect->h); +end: + freep(&palette); + freep(&alpha); + return ret; + +} #else char* ocr_bitmap(png_color *palette,png_byte *alpha, unsigned char* indata,unsigned char d,int w, int h) { diff --git a/src/lib_ccx/ocr.h b/src/lib_ccx/ocr.h index 76252cb2..84c2d9ae 100644 --- a/src/lib_ccx/ocr.h +++ b/src/lib_ccx/ocr.h @@ -1,6 +1,9 @@ #ifndef OCR_H #define OCR_H #include -char* ocr_bitmap(png_color *palette,png_byte *alpha, unsigned char* indata,int w, int h,int lang_index); + +void* init_ocr(int lang_index); +char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* indata,int w, int h); +int ocr_rect(void* arg, struct cc_bitmap *rect, char **str); #endif diff --git a/src/lib_ccx/spupng_encoder.c b/src/lib_ccx/spupng_encoder.c index 7c3c9de9..ad035979 100644 --- a/src/lib_ccx/spupng_encoder.c +++ b/src/lib_ccx/spupng_encoder.c @@ -334,7 +334,7 @@ void set_spupng_offset(void *ctx,int x,int y) sp->xOffset = x; sp->yOffset = y; } -static int save_spupng(const char *filename, uint8_t *bitmap, int w, int h, +int save_spupng(const char *filename, uint8_t *bitmap, int w, int h, png_color *palette, png_byte *alpha, int nb_color) { FILE *f = NULL; @@ -458,181 +458,6 @@ int mapclut_paletee(png_color *palette, png_byte *alpha, uint32_t *clut, return 0; } -struct transIntensity -{ - uint8_t *t; - png_color *palette; -}; -int check_trans_tn_intensity(const void *p1, const void *p2, void *arg) -{ - struct transIntensity *ti = arg; - unsigned char* tmp = (unsigned char*)p1; - unsigned char* act = (unsigned char*)p2; - unsigned char tmp_i; - unsigned char act_i; - /** TODO verify that RGB follow ITU-R BT.709 - * Below fomula is valid only for 709 standurd - * Y = 0.2126 R + 0.7152 G + 0.0722 B - */ - tmp_i = (0.2126 * ti->palette[*tmp].red) + (0.7152 * ti->palette[*tmp].green) + (0.0722 * ti->palette[*tmp].blue); - act_i = (0.2126 * ti->palette[*act].red) + (0.7152 * ti->palette[*act].green) + (0.0722 * ti->palette[*act].blue);; - - if (ti->t[*tmp] < ti->t[*act] || (ti->t[*tmp] == ti->t[*act] && tmp_i < act_i)) - return -1; - else if (ti->t[*tmp] == ti->t[*act] && tmp_i == act_i) - return 0; - - - return 1; -} -/* - * @param alpha out - * @param intensity in - * @param palette out should be already initialized - * @param bitmap in - * @param size in size of bitmap - * @param max_color in - * @param nb_color in - */ -int quantize_map(png_byte *alpha, png_color *palette, - uint8_t *bitmap, int size, int max_color, int nb_color) -{ - /* - * occurrence of color in image - */ - uint32_t *histogram = NULL; - /* intensity ordered table */ - uint8_t *iot = NULL; - /* array of color with most occurrence according to histogram - * save index of intensity order table - */ - uint32_t *mcit = NULL; - struct transIntensity ti = { alpha,palette}; - - int ret = 0; - - histogram = (uint32_t*) malloc(nb_color * sizeof(uint32_t)); - if (!histogram) - { - ret = -1; - goto end; - } - - iot = (uint8_t*) malloc(nb_color * sizeof(uint8_t)); - if (!iot) - { - ret = -1; - goto end; - } - - mcit = (uint32_t*) malloc(nb_color * sizeof(uint32_t)); - if (!mcit) - { - ret = -1; - goto end; - } - - memset(histogram, 0, nb_color * sizeof(uint32_t)); - - /* initializing intensity ordered table with serial order of unsorted color table */ - for (int i = 0; i < nb_color; i++) - { - iot[i] = i; - } - memset(mcit, 0, nb_color * sizeof(uint32_t)); - - /* calculate histogram of image */ - for (int i = 0; i < size; i++) - { - histogram[bitmap[i]]++; - } - /* sorted in increasing order of intensity */ - shell_sort((void*)iot, nb_color, sizeof(*iot), check_trans_tn_intensity, (void*)&ti); - -#if OCR_DEBUG - ccx_common_logging.log_ftn("Intensity ordered table\n"); - for (int i = 0; i < nb_color; i++) - { - ccx_common_logging.log_ftn("%02d) map %02d hist %02d\n", - i, iot[i], histogram[iot[i]]); - } -#endif - /** - * using selection sort since need to find only max_color - * Hostogram becomes invalid in this loop - */ - for (int i = 0; i < max_color; i++) - { - uint32_t max_val = 0; - uint32_t max_ind = 0; - int j; - for (j = 0; j < nb_color; j++) - { - if (max_val < histogram[iot[j]]) - { - max_val = histogram[iot[j]]; - max_ind = j; - } - } - for (j = i; j > 0 && max_ind < mcit[j - 1]; j--) - { - mcit[j] = mcit[j - 1]; - } - mcit[j] = max_ind; - histogram[iot[max_ind]] = 0; - } - -#if OCR_DEBUG - ccx_common_logging.log_ftn("max redundant intensities table\n"); - for (int i = 0; i < max_color; i++) - { - ccx_common_logging.log_ftn("%02d) mcit %02d\n", - i, mcit[i]); - } -#endif - for (int i = 0, mxi = 0; i < nb_color; i++) - { - int step, inc; - if (i == mcit[mxi]) - { - mxi = (mxi < max_color) ? mxi + 1 : mxi; - continue; - } - inc = (mxi) ? -1 : 0; - step = mcit[mxi + inc] + ((mcit[mxi] - mcit[mxi + inc]) / 2); - if (i <= step) - { - int index = iot[mcit[mxi + inc]]; - alpha[iot[i]] = alpha[index]; - palette[iot[i]].red = palette[index].red; - palette[iot[i]].blue = palette[index].blue; - palette[iot[i]].green = palette[index].green; - } - else - { - int index = iot[mcit[mxi]]; - alpha[iot[i]] = alpha[index]; - palette[iot[i]].red = palette[index].red; - palette[iot[i]].blue = palette[index].blue; - palette[iot[i]].green = palette[index].green; - } - - } -#if OCR_DEBUG - ccx_common_logging.log_ftn("Colors present in quantized Image\n"); - for (int i = 0; i < nb_color; i++) - { - ccx_common_logging.log_ftn("%02d)r %03d g %03d b %03d a %03d\n", - i, palette[i].red, palette[i].green, palette[i].blue, alpha[i]); - } -#endif - end: freep(&histogram); - freep(&mcit); - freep(&iot); - return ret; -} - - int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *context) { struct spupng_t *sp = (struct spupng_t *)context->out->spupng_data; @@ -643,9 +468,6 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte struct cc_bitmap* rect; png_color *palette = NULL; png_byte *alpha = NULL; -#ifdef ENABLE_OCR - char*str = NULL; -#endif x_pos = -1; y_pos = -1; @@ -730,14 +552,12 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte goto end; } -#ifdef ENABLE_OCR /* TODO do rectangle wise, one color table should not be used for all rectangles */ mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data[1],rect[0].nb_colors); - quantize_map(alpha, palette, pbuf, width*height, 3, rect[0].nb_colors); - str = ocr_bitmap(palette, alpha, pbuf, width, height,sub->lang_index); - if(str && str[0]) +#if ENABLE_OCR + if (rect[0].ocr_text && *(rect[0].ocr_text)) { - write_spucomment(sp,str); + write_spucomment(sp, rect[0].ocr_text); } #endif save_spupng(filename,pbuf,width, height, palette, alpha,rect[0].nb_colors); diff --git a/src/lib_ccx/spupng_encoder.h b/src/lib_ccx/spupng_encoder.h index ab6a988b..f79ea36c 100644 --- a/src/lib_ccx/spupng_encoder.h +++ b/src/lib_ccx/spupng_encoder.h @@ -34,6 +34,4 @@ void inc_spupng_fileindex(void *ctx); void set_spupng_offset(void *ctx,int x,int y); int mapclut_paletee(png_color *palette, png_byte *alpha, uint32_t *clut, uint8_t depth); -int quantize_map(png_byte *alpha, png_color *palette, - uint8_t *bitmap, int size, int max_color, int nb_color); #endif From ecc4c2520e3b87ab9cf320870b6005ad1130760e Mon Sep 17 00:00:00 2001 From: Anshul Maheshwari Date: Wed, 7 Jan 2015 19:38:55 +0530 Subject: [PATCH 3/3] assigning prev_start to start_time --- src/lib_ccx/608_srt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib_ccx/608_srt.c b/src/lib_ccx/608_srt.c index d744774f..cc6843ae 100644 --- a/src/lib_ccx/608_srt.c +++ b/src/lib_ccx/608_srt.c @@ -101,6 +101,10 @@ int write_cc_bitmap_as_srt(struct cc_subtitle *sub, struct encoder_ctx *context) if(sub->nb_data == 0 ) return 0; + + if(sub->flags & SUB_EOD_MARKER) + context->prev_start = sub->start_time; + rect = sub->data; #ifdef ENABLE_OCR if (rect[0].ocr_text && *(rect[0].ocr_text))