Setting up different levels of subtitle classifiers

This commit is contained in:
Abhinav Shukla 2016-07-31 23:54:48 -07:00
parent 773fb63f92
commit 52dfc82054
4 changed files with 82 additions and 7 deletions

View File

@ -6,7 +6,7 @@ Carlos' version (mainstream) is the most stable branch.
Extracting subtitles has never been so easy. Just type the following command:
ccextractor "name of input"
Gui lovers should download the Sorceforge version of CCExtractor, the Git Version is not your cup of tea.
GUI lovers should download the Sourceforge version of CCExtractor, the Git Version is not your cup of tea.
http://ccextractor.sourceforge.net/download-ccextractor.html
For News about release, please find CHANGES.TXT

View File

@ -1,3 +1,12 @@
0.82
-----------------
- New: HardsubX - Burned in subtitle extraction subsystem.
- New: Color Detection in DVB Subtitles
- Fix: Corrected sentence capitalization
- Fix: Skipping redundant bytes at the end of tx3g atom in MP4
- Fix: Ilegal SRT files being created from DVB subtitles
- Fix: Incorrect Progress Display
0.81 (2016-06-13)
-----------------
- New: --version parameter for extensive version information (version number, compile date, executable hash, git commit (if appropriate))

View File

@ -84,16 +84,22 @@ int hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder_
int hardsubx_process_frames_binary(struct lib_hardsubx_ctx *ctx);
//hardsubx_imgops.c
void rgb2hsv(float R, float G, float B,float *L, float *a, float *b);
void rgb2hsv(float R, float G, float B,float *H, float *S, float *V);
void rgb2lab(float R, float G, float B,float *L, float *a, float *b);
//hardsubx_classifier.c
char *get_ocr_text_simple(struct lib_hardsubx_ctx *ctx, PIX *image);
char *get_ocr_text_wordwise(struct lib_hardsubx_ctx *ctx, PIX *image);
char *get_ocr_text_letterwise(struct lib_hardsubx_ctx *ctx, PIX *image);
char *get_ocr_text_simple_threshold(struct lib_hardsubx_ctx *ctx, PIX *image, float threshold);
char *get_ocr_text_wordwise_threshold(struct lib_hardsubx_ctx *ctx, PIX *image, float threshold);
char *get_ocr_text_letterwise_threshold(struct lib_hardsubx_ctx *ctx, PIX *image, float threshold);
//hardsubx_utility.c
int edit_distance(char * word1, char * word2, int len1, int len2);
int64_t convert_pts_to_ms(int64_t pts, AVRational time_base);
int64_t convert_pts_to_ns(int64_t pts, AVRational time_base);
int64_t convert_pts_to_s(int64_t pts, AVRational time_base);
#endif

View File

@ -30,24 +30,84 @@ char *get_ocr_text_simple(struct lib_hardsubx_ctx *ctx, PIX *image)
return text_out;
}
char *get_ocr_text_wordwise()
char *get_ocr_text_wordwise(struct lib_hardsubx_ctx *ctx, PIX *image)
{
char *text_out;
TessBaseAPISetImage2(ctx->tess_handle, image);
if(TessBaseAPIRecognize(ctx->tess_handle, NULL) != 0)
{
//TODO: Display error message
printf("Error in Tesseract recognition\n");
return NULL;
}
return text_out;
}
char *get_ocr_text_simple_threshold()
char *get_ocr_text_letterwise(struct lib_hardsubx_ctx *ctx, PIX *image)
{
char *text_out;
TessBaseAPISetImage2(ctx->tess_handle, image);
if(TessBaseAPIRecognize(ctx->tess_handle, NULL) != 0)
{
//TODO: Display error message
printf("Error in Tesseract recognition\n");
return NULL;
}
return text_out;
}
char *get_ocr_text_wordwise_threshold()
char *get_ocr_text_simple_threshold(struct lib_hardsubx_ctx *ctx, PIX *image, float threshold)
{
char *text_out;
TessBaseAPISetImage2(ctx->tess_handle, image);
if(TessBaseAPIRecognize(ctx->tess_handle, NULL) != 0)
{
//TODO: Display error message
printf("Error in Tesseract recognition\n");
return NULL;
}
return text_out;
}
char *get_ocr_text_simple_italics()
char *get_ocr_text_wordwise_threshold(struct lib_hardsubx_ctx *ctx, PIX *image, float threshold)
{
char *text_out;
TessBaseAPISetImage2(ctx->tess_handle, image);
if(TessBaseAPIRecognize(ctx->tess_handle, NULL) != 0)
{
//TODO: Display error message
printf("Error in Tesseract recognition\n");
return NULL;
}
return text_out;
}
char *get_ocr_text_letterwise_threshold(struct lib_hardsubx_ctx *ctx, PIX *image, float threshold)
{
char *text_out;
TessBaseAPISetImage2(ctx->tess_handle, image);
if(TessBaseAPIRecognize(ctx->tess_handle, NULL) != 0)
{
//TODO: Display error message
printf("Error in Tesseract recognition\n");
return NULL;
}
return text_out;
}
#endif