diff --git a/src/lib_ccx/hardsubx.c b/src/lib_ccx/hardsubx.c index 0e05a01f..684cff96 100644 --- a/src/lib_ccx/hardsubx.c +++ b/src/lib_ccx/hardsubx.c @@ -110,10 +110,10 @@ int hardsubx_process_data(struct lib_hardsubx_ctx *ctx) dinit_encoder(&enc_ctx, 0); //TODO: Replace 0 with end timestamp // Free the allocated memory for frame processing - av_free(ctx->rgb_buffer); - av_frame_free(ctx->rgb_frame); - av_frame_free(ctx->frame); - avcodec_close(ctx->codec_ctx); + av_free(ctx->rgb_buffer); + if(ctx->frame) av_frame_free(&ctx->frame); + if(ctx->rgb_frame) av_frame_free(&ctx->rgb_frame); + avcodec_close(ctx->codec_ctx); avformat_close_input(&ctx->format_ctx); } @@ -221,23 +221,44 @@ struct lib_hardsubx_ctx* _init_hardsubx(struct ccx_s_options *options) ctx->tess_handle = TessBaseAPICreate(); char* pars_vec = strdup("debug_file"); char* pars_values = strdup("/dev/null"); - char *tessdata_dir_path="."; + char* tessdata_path = NULL; - int ret = -1; - if(options->ocrlang) - { - ret = TessBaseAPIInit4(ctx->tess_handle, NULL, options->ocrlang, ccx_options.ocr_oem, NULL, 0, &pars_vec, - &pars_values, 1, false); - if(ret != 0) - { - mprint("Failed loading language: %s, trying to load eng\n", options->ocrlang); - } - } - if(ret != 0) - { - ret = TessBaseAPIInit4(ctx->tess_handle, NULL, "eng", ccx_options.ocr_oem, NULL, 0, &pars_vec, - &pars_values, 1, false); - } + char* lang = options->ocrlang; + if(!lang) lang = "eng"; // English is default language + + tessdata_path = probe_tessdata_location_string(lang); + if(!tessdata_path) + { + if (strcmp(lang, "eng") == 0) + { + mprint("eng.traineddata not found! No Switching Possible\n"); + return NULL; + } + mprint("%s.traineddata not found! Switching to English\n", lang); + lang = "eng"; + tessdata_path = probe_tessdata_location_string("eng"); + if(!tessdata_path) + { + mprint("eng.traineddata not found! No Switching Possible\n"); + return NULL; + } + } + + int ret = -1; + + if (!strncmp("4.", TessVersion(), 2)) + { + char tess_path [1024]; + snprintf(tess_path, 1024, "%s%s%s", tessdata_path, "/", "tessdata"); + //ccx_options.ocr_oem are deprecated and only supported mode is OEM_LSTM_ONLY + ret = TessBaseAPIInit4(ctx->tess_handle, tess_path, lang, 1, NULL, 0, &pars_vec, + &pars_values, 1, false); + } + else + { + ret = TessBaseAPIInit4(ctx->tess_handle, tessdata_path, lang, ccx_options.ocr_oem, NULL, 0, &pars_vec, + &pars_values, 1, false); + } free(pars_vec); free(pars_values); diff --git a/src/lib_ccx/ocr.c b/src/lib_ccx/ocr.c index c47b6667..20a42b2f 100644 --- a/src/lib_ccx/ocr.c +++ b/src/lib_ccx/ocr.c @@ -129,6 +129,25 @@ char* probe_tessdata_location(int lang_index) return NULL; } +/** + * probe_tessdata_location_string + * + * This function returns tesseract data location given language string + */ +char* probe_tessdata_location_string(char* lang) +{ + int lang_index = -1; + for(int i = 0; i < NB_LANGUAGE; i++) { + if(language[i]) { + if(strcmp(lang, language[i]) == 0) lang_index = i; + } + } + + if(lang_index == -1) return NULL; // No such language found + + return probe_tessdata_location(lang_index); +} + void* init_ocr(int lang_index) { int ret = -1; diff --git a/src/lib_ccx/ocr.h b/src/lib_ccx/ocr.h index 74f80f28..a03032f1 100644 --- a/src/lib_ccx/ocr.h +++ b/src/lib_ccx/ocr.h @@ -12,6 +12,7 @@ struct image_copy //A copy of the original OCR image, used for color detection }; void delete_ocr (void** arg); +char* probe_tessdata_location_string(char* lang); void* init_ocr(int lang_index); char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* indata,int w, int h, struct image_copy *copy); int ocr_rect(void* arg, struct cc_bitmap *rect, char **str, int bgcolor, int ocr_quantmode);