From b25a9f2ae42b157bf06dc5ebd6470a31dca94574 Mon Sep 17 00:00:00 2001 From: Abhinav Shukla Date: Sun, 11 Dec 2016 10:01:55 +0530 Subject: [PATCH 1/4] Fix #454 : Removed ugly debug statement with local path --- src/lib_ccx/hardsubx_decoder.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/lib_ccx/hardsubx_decoder.c b/src/lib_ccx/hardsubx_decoder.c index e9068b96..83d82621 100644 --- a/src/lib_ccx/hardsubx_decoder.c +++ b/src/lib_ccx/hardsubx_decoder.c @@ -253,10 +253,6 @@ void _display_frame(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int // txt=get_ocr_text_wordwise_threshold(ctx, feat_im, ctx->conf_thresh); // if(txt != NULL)printf("%s\n", txt); - char write_path[100]; - sprintf(write_path,"./ffmpeg-examples/frames/temp%04d.jpg",timestamp); - - pixWrite(write_path,feat_im,IFF_JFIF_JPEG); pixDestroy(&im); pixDestroy(&edge_im); pixDestroy(&feat_im); From fbc7cb545220e190d4456483985004aef950f117 Mon Sep 17 00:00:00 2001 From: Abhinav Shukla Date: Mon, 9 Jan 2017 17:51:15 +0530 Subject: [PATCH 2/4] Setting up skeleton for tickertext based burned in detection --- src/lib_ccx/ccx_common_option.c | 1 + src/lib_ccx/ccx_common_option.h | 1 + src/lib_ccx/hardsubx.c | 7 ++++++- src/lib_ccx/hardsubx.h | 1 + src/lib_ccx/hardsubx_decoder.c | 19 +++++++++++++++++++ src/lib_ccx/params.c | 6 ++++++ 6 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/lib_ccx/ccx_common_option.c b/src/lib_ccx/ccx_common_option.c index fc882f00..5a8997a6 100644 --- a/src/lib_ccx/ccx_common_option.c +++ b/src/lib_ccx/ccx_common_option.c @@ -59,6 +59,7 @@ void init_options (struct ccx_s_options *options) options->force_flush = 0; // Don't flush whenever content is written. options->append_mode = 0; //By default, files are overwritten. options->ucla = 0; // By default, -UCLA not used + options->tickertext = 0; // By default, do not assume ticker style text options->hardsubx = 0; // By default, don't try to extract hard subtitles options->dvbcolor = 0; // By default, only text detected in DVB options->dvblang = NULL; // By default, autodetect DVB language diff --git a/src/lib_ccx/ccx_common_option.h b/src/lib_ccx/ccx_common_option.h index 6cf071ef..db45ff5b 100644 --- a/src/lib_ccx/ccx_common_option.h +++ b/src/lib_ccx/ccx_common_option.h @@ -125,6 +125,7 @@ struct ccx_s_options // Options from user parameters int force_flush; // Force flush on content write int append_mode; // Append mode for output files int ucla; // 1 if UCLA used, 0 if not + int tickertext; // 1 if ticker text style burned in subs, 0 if not int hardsubx; // 1 if burned-in subtitles to be extracted int dvbcolor; // 1 if Color to be detected for DVB char *dvblang; // The name of the language stream for DVB diff --git a/src/lib_ccx/hardsubx.c b/src/lib_ccx/hardsubx.c index a0563eb9..7a7423e5 100644 --- a/src/lib_ccx/hardsubx.c +++ b/src/lib_ccx/hardsubx.c @@ -101,7 +101,11 @@ int hardsubx_process_data(struct lib_hardsubx_ctx *ctx) enc_ctx = init_encoder(&ccx_options.enc_cfg); mprint("Beginning burned-in subtitle detection...\n"); - hardsubx_process_frames_linear(ctx, enc_ctx); + + if(ctx->tickertext) + hardsubx_process_frames_tickertext(ctx, enc_ctx); + else + hardsubx_process_frames_linear(ctx, enc_ctx); dinit_encoder(&enc_ctx, 0); //TODO: Replace 0 with end timestamp @@ -237,6 +241,7 @@ struct lib_hardsubx_ctx* _init_hardsubx(struct ccx_s_options *options) ctx->cc_to_stdout = options->cc_to_stdout; //Initialize subtitle text parameters + ctx->tickertext = options->tickertext; ctx->cur_conf = 0.0; ctx->prev_conf = 0.0; ctx->ocr_mode = options->hardsubx_ocr_mode; diff --git a/src/lib_ccx/hardsubx.h b/src/lib_ccx/hardsubx.h index 199d5217..0fa84f15 100644 --- a/src/lib_ccx/hardsubx.h +++ b/src/lib_ccx/hardsubx.h @@ -68,6 +68,7 @@ struct lib_hardsubx_ctx float prev_conf; // Subtitle text parameters + int tickertext; struct cc_subtitle *dec_sub; int ocr_mode; int subcolor; diff --git a/src/lib_ccx/hardsubx_decoder.c b/src/lib_ccx/hardsubx_decoder.c index faed02cb..d542da79 100644 --- a/src/lib_ccx/hardsubx_decoder.c +++ b/src/lib_ccx/hardsubx_decoder.c @@ -260,6 +260,25 @@ void _display_frame(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int pixDestroy(&pixd); } +int hardsubx_process_frames_tickertext(struct lib_hardsubx_ctx *ctx, struct encoder_ctx *enc_ctx) +{ + // Search for ticker text at the bottom of the screen, such as in Russia TV1 or stock prices + int got_frame; + while(av_read_frame(ctx->format_ctx, &ctx->packet)>=0) + { + if(ctx->packet.stream_index == ctx->video_stream_id) + { + //Decode the video stream packet + avcodec_decode_video2(ctx->codec_ctx, ctx->frame, &got_frame, &ctx->packet); + if(got_frame) + { + // Do something + } + } + } + return 0; +} + int hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder_ctx *enc_ctx) { // Do an exhaustive linear search over the video diff --git a/src/lib_ccx/params.c b/src/lib_ccx/params.c index 029c8330..842b1c09 100644 --- a/src/lib_ccx/params.c +++ b/src/lib_ccx/params.c @@ -1885,6 +1885,7 @@ int parse_parameters (struct ccx_s_options *opt, int argc, char *argv[]) i++; continue; } + /* Red Hen/ UCLA Specific stuff */ if (strcmp (argv[i],"-UCLA")==0 || strcmp (argv[i],"-ucla")==0) { opt->ucla = 1; @@ -1900,6 +1901,11 @@ int parse_parameters (struct ccx_s_options *opt, int argc, char *argv[]) } continue; } + if (strcmp (argv[i],"-tickertext")==0) + { + opt->tickertext = 1; + continue; + } if (strcmp (argv[i],"-lf")==0 || strcmp (argv[i],"-LF")==0) { opt->enc_cfg.line_terminator_lf = 1; From 59331945709f6660afe1a5b0897f2434e8a7c56b Mon Sep 17 00:00:00 2001 From: Abhinav Shukla Date: Mon, 9 Jan 2017 21:14:52 +0530 Subject: [PATCH 3/4] Progress activity and more context --- src/lib_ccx/hardsubx.c | 23 +++++++++++++++++++---- src/lib_ccx/hardsubx.h | 1 + src/lib_ccx/hardsubx_decoder.c | 10 +++++++++- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/lib_ccx/hardsubx.c b/src/lib_ccx/hardsubx.c index 7a7423e5..3e260e06 100644 --- a/src/lib_ccx/hardsubx.c +++ b/src/lib_ccx/hardsubx.c @@ -221,13 +221,28 @@ struct lib_hardsubx_ctx* _init_hardsubx(struct ccx_s_options *options) ctx->tess_handle = TessBaseAPICreate(); char* pars_vec = strdup("debug_file"); char* pars_values = strdup("/dev/null"); - int res = TessBaseAPIInit4(ctx->tess_handle, NULL, "eng", OEM_DEFAULT, NULL, 0, &pars_vec, - &pars_values, 1, false); + char *tessdata_dir_path="."; + + int ret = -1; + if(options->ocrlang) + { + ret = TessBaseAPIInit4(ctx->tess_handle, NULL, options->ocrlang, OEM_DEFAULT, NULL, 0, &pars_vec, + &pars_values, 1, false); + if(ret != 0) + { + mprint("Failed loading language: %s, trying to load eng\n", options->ocrlang); + } + } + if(ret != 0) + { + ret = TessBaseAPIInit4(ctx->tess_handle, NULL, "eng", OEM_DEFAULT, NULL, 0, &pars_vec, + &pars_values, 1, false); + } free(pars_vec); free(pars_values); - if(res != 0) + if(ret != 0) { - fatal(EXIT_NOT_ENOUGH_MEMORY, "Not enough memory to initialize Tesseract!"); + fatal(EXIT_NOT_ENOUGH_MEMORY, "Tesseract not initialized!"); } //Initialize attributes common to lib_ccx context diff --git a/src/lib_ccx/hardsubx.h b/src/lib_ccx/hardsubx.h index 0fa84f15..c5b4e746 100644 --- a/src/lib_ccx/hardsubx.h +++ b/src/lib_ccx/hardsubx.h @@ -85,6 +85,7 @@ void hardsubx(struct ccx_s_options *options); //hardsubx_decoder.c int hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder_ctx *enc_ctx); +int hardsubx_process_frames_tickertext(struct lib_hardsubx_ctx *ctx, struct encoder_ctx *enc_ctx); int hardsubx_process_frames_binary(struct lib_hardsubx_ctx *ctx); //hardsubx_imgops.c diff --git a/src/lib_ccx/hardsubx_decoder.c b/src/lib_ccx/hardsubx_decoder.c index d542da79..fd74d85e 100644 --- a/src/lib_ccx/hardsubx_decoder.c +++ b/src/lib_ccx/hardsubx_decoder.c @@ -264,6 +264,10 @@ int hardsubx_process_frames_tickertext(struct lib_hardsubx_ctx *ctx, struct enco { // Search for ticker text at the bottom of the screen, such as in Russia TV1 or stock prices int got_frame; + int cur_sec,total_sec,progress; + printf("%s\n", language[10]); + printf("%s\n", language[11]); + printf("%s\n", language[12]); while(av_read_frame(ctx->format_ctx, &ctx->packet)>=0) { if(ctx->packet.stream_index == ctx->video_stream_id) @@ -272,10 +276,14 @@ int hardsubx_process_frames_tickertext(struct lib_hardsubx_ctx *ctx, struct enco avcodec_decode_video2(ctx->codec_ctx, ctx->frame, &got_frame, &ctx->packet); if(got_frame) { - // Do something + cur_sec = (int)convert_pts_to_s(ctx->packet.pts, ctx->format_ctx->streams[ctx->video_stream_id]->time_base); + total_sec = (int)convert_pts_to_s(ctx->format_ctx->duration, AV_TIME_BASE_Q); + progress = (cur_sec*100)/total_sec; + activity_progress(progress,cur_sec/60,cur_sec%60); } } } + activity_progress(100,cur_sec/60,cur_sec%60); return 0; } From 3278b31a8f571f5188d376142b1981a3a99ccff2 Mon Sep 17 00:00:00 2001 From: Abhinav Shukla Date: Thu, 9 Feb 2017 02:30:51 +0530 Subject: [PATCH 4/4] Setting up tickertape parameter --- src/lib_ccx/hardsubx_decoder.c | 92 ++++++++++++++++++++++++++++++++-- src/lib_ccx/params.c | 2 +- 2 files changed, 89 insertions(+), 5 deletions(-) diff --git a/src/lib_ccx/hardsubx_decoder.c b/src/lib_ccx/hardsubx_decoder.c index fd74d85e..e5aa671a 100644 --- a/src/lib_ccx/hardsubx_decoder.c +++ b/src/lib_ccx/hardsubx_decoder.c @@ -260,22 +260,106 @@ void _display_frame(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int pixDestroy(&pixd); } +char* _process_frame_tickertext(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index) +{ + PIX *im; + PIX *edge_im; + PIX *lum_im; + PIX *feat_im; + char *subtitle_text=NULL; + im = pixCreate(width,height,32); + lum_im = pixCreate(width,height,32); + feat_im = pixCreate(width,height,32); + int i,j; + for(i=(92*height)/100;ilinesize[0]; + int r=frame->data[0][p]; + int g=frame->data[0][p+1]; + int b=frame->data[0][p+2]; + pixSetRGBPixel(im,j,i,r,g,b); + float L,A,B; + rgb_to_lab((float)r,(float)g,(float)b,&L,&A,&B); + if(L > ctx->lum_thresh) + pixSetRGBPixel(lum_im,j,i,255,255,255); + else + pixSetRGBPixel(lum_im,j,i,0,0,0); + } + } + + //Handle the edge image + edge_im = pixCreate(width,height,8); + edge_im = pixConvertRGBToGray(im,0.0,0.0,0.0); + edge_im = pixSobelEdgeFilter(edge_im, L_VERTICAL_EDGES); + edge_im = pixDilateGray(edge_im, 21, 11); + edge_im = pixThresholdToBinary(edge_im,50); + + for(i=92*(height/100);i0) + pixSetRGBPixel(feat_im,j,i,255,255,255); + else + pixSetRGBPixel(feat_im,j,i,0,0,0); + } + } + + // Tesseract OCR for the ticker text here + subtitle_text = get_ocr_text_simple(ctx, lum_im); + char write_path[100]; + sprintf(write_path,"./lum_im%04d.jpg",index); + pixWrite(write_path,lum_im,IFF_JFIF_JPEG); + sprintf(write_path,"./im%04d.jpg",index); + pixWrite(write_path,im,IFF_JFIF_JPEG); + + pixDestroy(&lum_im); + pixDestroy(&im); + pixDestroy(&edge_im); + pixDestroy(&feat_im); + + return subtitle_text; +} + int hardsubx_process_frames_tickertext(struct lib_hardsubx_ctx *ctx, struct encoder_ctx *enc_ctx) { // Search for ticker text at the bottom of the screen, such as in Russia TV1 or stock prices int got_frame; int cur_sec,total_sec,progress; - printf("%s\n", language[10]); - printf("%s\n", language[11]); - printf("%s\n", language[12]); + int frame_number = 0; + char *ticker_text = NULL; + while(av_read_frame(ctx->format_ctx, &ctx->packet)>=0) { if(ctx->packet.stream_index == ctx->video_stream_id) { + frame_number++; //Decode the video stream packet avcodec_decode_video2(ctx->codec_ctx, ctx->frame, &got_frame, &ctx->packet); - if(got_frame) + if(got_frame && frame_number % 1000 == 0) { + // sws_scale is used to convert the pixel format to RGB24 from all other cases + sws_scale( + ctx->sws_ctx, + (uint8_t const * const *)ctx->frame->data, + ctx->frame->linesize, + 0, + ctx->codec_ctx->height, + ctx->rgb_frame->data, + ctx->rgb_frame->linesize + ); + + ticker_text = _process_frame_tickertext(ctx,ctx->rgb_frame,ctx->codec_ctx->width,ctx->codec_ctx->height,frame_number); + printf("frame_number: %d\n", frame_number); + + if(strlen(ticker_text)>0)printf("%s\n", ticker_text); + cur_sec = (int)convert_pts_to_s(ctx->packet.pts, ctx->format_ctx->streams[ctx->video_stream_id]->time_base); total_sec = (int)convert_pts_to_s(ctx->format_ctx->duration, AV_TIME_BASE_Q); progress = (cur_sec*100)/total_sec; diff --git a/src/lib_ccx/params.c b/src/lib_ccx/params.c index 842b1c09..95d9db92 100644 --- a/src/lib_ccx/params.c +++ b/src/lib_ccx/params.c @@ -1901,7 +1901,7 @@ int parse_parameters (struct ccx_s_options *opt, int argc, char *argv[]) } continue; } - if (strcmp (argv[i],"-tickertext")==0) + if (strcmp (argv[i],"-tickertext")==0 || strcmp (argv[i],"-tickertape")==0) { opt->tickertext = 1; continue;