mirror of
https://github.com/CCExtractor/ccextractor.git
synced 2024-12-25 04:11:38 +00:00
Added OEM mode parameter
This commit is contained in:
parent
bb026a7318
commit
89c00a7e21
@ -63,6 +63,7 @@ void init_options (struct ccx_s_options *options)
|
||||
options->dvbcolor = 1; // By default, attempt to detect both text and color
|
||||
options->dvblang = NULL; // By default, autodetect DVB language
|
||||
options->ocrlang = NULL; // By default, autodetect .traineddata file
|
||||
options->ocr_oem = 0; // By default, set Tesseract OEM mode OEM_TESSERACT_ONLY (0)
|
||||
options->ignore_pts_jumps = 1;
|
||||
|
||||
/*HardsubX related stuff*/
|
||||
|
@ -129,6 +129,7 @@ struct ccx_s_options // Options from user parameters
|
||||
int dvbcolor; // 1 if Color to be detected for DVB
|
||||
char *dvblang; // The name of the language stream for DVB
|
||||
char *ocrlang; // The name of the .traineddata file to be loaded with tesseract
|
||||
int ocr_oem; // The Tesseract OEM mode, could be 0 (default), 1 or 2
|
||||
|
||||
/*HardsubX related stuff*/
|
||||
int hardsubx_ocr_mode;
|
||||
|
@ -217,7 +217,7 @@ struct lib_hardsubx_ctx* _init_hardsubx(struct ccx_s_options *options)
|
||||
ctx->tess_handle = TessBaseAPICreate();
|
||||
char* pars_vec = strdup("debug_file");
|
||||
char* pars_values = strdup("/dev/null");
|
||||
int res = TessBaseAPIInit4(ctx->tess_handle, NULL, "eng", OEM_DEFAULT, NULL, 0, &pars_vec,
|
||||
int res = TessBaseAPIInit4(ctx->tess_handle, NULL, "eng", ccx_options.ocr_oem, NULL, 0, &pars_vec,
|
||||
&pars_values, 1, false);
|
||||
free(pars_vec);
|
||||
free(pars_values);
|
||||
|
@ -142,7 +142,7 @@ void* init_ocr(int lang_index)
|
||||
char* pars_vec = strdup("debug_file");
|
||||
char* pars_values = strdup("/dev/null");
|
||||
|
||||
ret = TessBaseAPIInit4(ctx->api, tessdata_path, lang, OEM_DEFAULT, NULL, 0, &pars_vec,
|
||||
ret = TessBaseAPIInit4(ctx->api, tessdata_path, lang, ccx_options.ocr_oem, NULL, 0, &pars_vec,
|
||||
&pars_values, 1, false);
|
||||
|
||||
free(pars_vec);
|
||||
@ -275,7 +275,8 @@ char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* i
|
||||
char* word = TessResultIteratorGetUTF8Text(ri,level);
|
||||
float conf = TessResultIteratorConfidence(ri,level);
|
||||
int x1, y1, x2, y2;
|
||||
TessPageIteratorBoundingBox((TessPageIterator *)ri,level, &x1, &y1, &x2, &y2);
|
||||
if (!TessPageIteratorBoundingBox((TessPageIterator *)ri, level, &x1, &y1, &x2, &y2))
|
||||
continue;
|
||||
// printf("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;",word, conf, x1, y1, x2, y2);
|
||||
// printf("word: '%s';", word);
|
||||
// {
|
||||
|
@ -562,6 +562,10 @@ void print_usage (void)
|
||||
mprint (" using the Chinese (Traditional) trained data\n");
|
||||
mprint (" This option is also helpful when the traineddata file\n");
|
||||
mprint (" has non standard names that don't follow ISO specs\n");
|
||||
mprint (" -oem: Select the OEM mode for Tesseract, could be 0, 1 or 2.\n");
|
||||
mprint (" 0: OEM_TESSERACT_ONLY - default value, the fastest mode.\n");
|
||||
mprint (" 1: OEM_LSTM_ONLY - use LSTM algorithm for recognition.\n");
|
||||
mprint (" 2: OEM_TESSERACT_LSTM_COMBINED - both algorithms.\n");
|
||||
|
||||
mprint ("\n");
|
||||
mprint ("Options that affect how ccextractor reads and writes (buffering):\n");
|
||||
@ -1356,6 +1360,26 @@ int parse_parameters (struct ccx_s_options *opt, int argc, char *argv[])
|
||||
continue;
|
||||
}
|
||||
|
||||
if (strcmp(argv[i], "-oem") == 0)
|
||||
{
|
||||
if (i < argc - 1)
|
||||
{
|
||||
char *str = (char*)malloc(sizeof(argv[i + 1]));
|
||||
sprintf(str, "%s", argv[i + 1]);
|
||||
opt->ocr_oem = atoi(str);
|
||||
if (opt->ocr_oem < 0 || opt->ocr_oem > 2)
|
||||
{
|
||||
fatal(EXIT_MALFORMED_PARAMETER, "-oem must be 0, 1 or 2\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fatal(EXIT_MALFORMED_PARAMETER, "-oem has no argument.");
|
||||
}
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Output file formats */
|
||||
if (strcmp (argv[i],"-srt")==0 ||
|
||||
strcmp (argv[i],"-dvdraw")==0 ||
|
||||
|
Loading…
Reference in New Issue
Block a user