mirror of
https://github.com/CCExtractor/ccextractor.git
synced 2024-12-24 20:01:42 +00:00
Binary search for dictionary
This commit is contained in:
parent
9d8bb65e2a
commit
c8f65bb686
2
.gitignore
vendored
2
.gitignore
vendored
@ -5,3 +5,5 @@ CVS
|
||||
linux/ccextractor
|
||||
linux/depend
|
||||
windows/debug/*
|
||||
*.sdf
|
||||
*.suo
|
||||
|
@ -1,5 +1,5 @@
|
||||
#include "ccextractor.h"
|
||||
|
||||
#include "utility.h"
|
||||
//extern unsigned char encoded_crlf[16];
|
||||
|
||||
// Encodes a generic string. Note that since we use the encoders for closed caption
|
||||
@ -30,40 +30,32 @@ unsigned encode_line (unsigned char *buffer, unsigned char *text)
|
||||
return bytes;
|
||||
}
|
||||
|
||||
#define ISSEPARATOR(c) (c==' ' || c==0x89 || ispunct(c) \
|
||||
|| c==0x99) // This is the apostrofe. We get it here in CC encoding, not ASCII
|
||||
|
||||
|
||||
void correct_case (int line_num, struct eia608_screen *data)
|
||||
void correct_case(int line_num,struct eia608_screen *data)
|
||||
{
|
||||
int i=0;
|
||||
while (i<spell_words)
|
||||
{
|
||||
char *c=(char *) data->characters[line_num];
|
||||
size_t len=strlen (spell_correct[i]);
|
||||
while ((c=strstr (c,spell_lower[i]))!=NULL)
|
||||
{
|
||||
// Make sure it's a whole word (start of line or
|
||||
// preceded by space, and end of line or followed by
|
||||
// space)
|
||||
unsigned char prev;
|
||||
if (c==(char *) data->characters[line_num]) // Beginning of line...
|
||||
prev=' '; // ...Pretend we had a blank before
|
||||
else
|
||||
prev=*(c-1);
|
||||
unsigned char next;
|
||||
if (c-(char *) data->characters[line_num]+len==CC608_SCREEN_WIDTH) // End of line...
|
||||
next=' '; // ... pretend we have a blank later
|
||||
else
|
||||
next=*(c+len);
|
||||
if ( ISSEPARATOR(prev) && ISSEPARATOR(next))
|
||||
{
|
||||
memcpy (c,spell_correct[i],len);
|
||||
}
|
||||
c++;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
char delim[64] = {
|
||||
' ' ,'\n','\r', 0x89,0x99,
|
||||
'!' , '"', '#', '%' , '&',
|
||||
'\'', '(', ')', ';' , '<',
|
||||
'=' , '>', '?', '[' ,'\\',
|
||||
']' , '*', '+', ',' , '-',
|
||||
'.' , '/', ':', '^' , '_',
|
||||
'{' , '|', '}', '~' ,'\0' };
|
||||
|
||||
char *line = strdup(((char*)data->characters[line_num]));
|
||||
char *oline = (char*)data->characters[line_num];
|
||||
char *c = strtok(line,delim);
|
||||
do
|
||||
{
|
||||
char **index = bsearch(&c,spell_lower,spell_words,sizeof(*spell_lower),string_cmp);
|
||||
|
||||
if(index)
|
||||
{
|
||||
char *correct_c = *(spell_correct + (index - spell_lower));
|
||||
size_t len=strlen (correct_c);
|
||||
memcpy(oline + (c - line),correct_c,len);
|
||||
}
|
||||
} while ( ( c = strtok(NULL,delim) ) != NULL );
|
||||
free(line);
|
||||
}
|
||||
|
||||
void capitalize (int line_num, struct eia608_screen *data)
|
||||
|
@ -199,41 +199,25 @@ static void freep(void *arg)
|
||||
}
|
||||
#ifdef DEBUG
|
||||
|
||||
int check_trans_tn_intensity(int p1,int p2,int p3,int p4)
|
||||
struct transIntensity
|
||||
{
|
||||
return p1 < p2 || (p1 == p2 && p3 < p4);
|
||||
}
|
||||
/**
|
||||
* @param t array stating transparency of image
|
||||
* @param i array stating intensity
|
||||
* @param lout output array which orederd list hash table
|
||||
* this array must be initialized with valid index of array
|
||||
* @param nb number of element in array
|
||||
*/
|
||||
void sort_intensity_wise(uint8_t *t, uint8_t *i, uint8_t *lout, int nb)
|
||||
uint8_t *t;
|
||||
uint8_t *i;
|
||||
};
|
||||
int check_trans_tn_intensity(const void *p1, const void *p2, void *arg)
|
||||
{
|
||||
for (int gap = nb / 2; gap > 0; gap = gap / 2)
|
||||
{
|
||||
int p, j, tmp;
|
||||
for (p = gap; p < nb; p++)
|
||||
{
|
||||
#define CHECK_TRANSPARENCY ( t[tmp] < t[lout[j - gap]] )
|
||||
/* Transparency have major role in intensity so priority to transperency over Y */
|
||||
#define CHECK_INTENSITY (t[tmp] == t[lout[j - gap]]&& i[tmp] < i[lout[j - gap]])
|
||||
#define CHECK_TRANS_TN_INTEN (CHECK_TRANSPARENCY || CHECK_INTENSITY)
|
||||
tmp = lout[p];
|
||||
for (j = p; j >= gap && check_trans_tn_intensity(t[tmp],t[lout[j - gap]],i[tmp],i[lout[j - gap]]); j -= gap)
|
||||
{
|
||||
lout[j] = lout[j - gap];
|
||||
}
|
||||
lout[j] = tmp;
|
||||
#undef CHECK_TRANSPARENCY
|
||||
#undef CHECK_INTENSITY
|
||||
#undef CHECK_TRANS_TN_INTEN
|
||||
}
|
||||
}
|
||||
}
|
||||
struct transIntensity *ti = arg;
|
||||
unsigned char* tmp = (unsigned char*)p1;
|
||||
unsigned char* act = (unsigned char*)p2;
|
||||
|
||||
if (ti->t[*tmp] < ti->t[*act] || (ti->t[*tmp] == ti->t[*act] && ti->i[*tmp] < ti->i[*act]))
|
||||
return -1;
|
||||
else if (ti->t[*tmp] == ti->t[*act] && ti->i[*tmp] == ti->i[*act])
|
||||
return 0;
|
||||
|
||||
|
||||
return 1;
|
||||
}
|
||||
int mapclut_paletee(png_color *palette, png_byte *alpha, uint32_t *clut,
|
||||
uint8_t depth)
|
||||
{
|
||||
@ -268,6 +252,7 @@ int quantize_map(png_byte *alpha, uint8_t *intensity, png_color *palette,
|
||||
* save index of intensity order table
|
||||
*/
|
||||
uint32_t *mcit = NULL;
|
||||
struct transIntensity ti = { alpha,intensity};
|
||||
|
||||
int ret = 0;
|
||||
|
||||
@ -304,7 +289,7 @@ int quantize_map(png_byte *alpha, uint8_t *intensity, png_color *palette,
|
||||
{
|
||||
histogram[bitmap[i]]++;
|
||||
}
|
||||
sort_intensity_wise((uint8_t*) alpha, (uint8_t*) intensity, iot, nb_color);
|
||||
shell_sort((void*)iot, nb_color, sizeof(*iot), check_trans_tn_intensity, (void*)&ti);
|
||||
|
||||
/* using selection sort since need to find only max_color */
|
||||
for (int i = 0; i < max_color; i++)
|
||||
@ -1754,7 +1739,6 @@ static void dvbsub_parse_page_segment(void *dvb_ctx, const uint8_t *buf,
|
||||
int timeout;
|
||||
int version;
|
||||
long long start = get_visible_start();
|
||||
char *filename;
|
||||
void *sp = ctx->out->spupng_data;
|
||||
|
||||
|
||||
@ -1774,7 +1758,6 @@ static void dvbsub_parse_page_segment(void *dvb_ctx, const uint8_t *buf,
|
||||
ctx->time_out = timeout;
|
||||
ctx->version = version;
|
||||
|
||||
filename = get_spupng_filename(sp);
|
||||
if(ctx->prev_start == 0)
|
||||
{
|
||||
write_sputag(sp, ctx->prev_start, start);
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include "ccextractor.h"
|
||||
#include "utility.h"
|
||||
|
||||
static int inputfile_capacity=0;
|
||||
static int spell_builtin_added=0; // so we don't do it twice
|
||||
@ -158,7 +159,6 @@ int process_cap_file (char *filename)
|
||||
fclose (fi);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int isanumber (char *s)
|
||||
{
|
||||
while (*s)
|
||||
@ -1058,6 +1058,8 @@ void parse_parameters (int argc, char *argv[])
|
||||
if (add_built_in_words())
|
||||
fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory for word list");
|
||||
ccx_options.sentence_cap=1;
|
||||
shell_sort(spell_lower,spell_words,sizeof(*spell_lower),string_cmp2,NULL);
|
||||
shell_sort(spell_correct,spell_words,sizeof(*spell_correct),string_cmp2,NULL);
|
||||
continue;
|
||||
}
|
||||
if ((strcmp (argv[i],"--capfile")==0 ||
|
||||
@ -1071,6 +1073,8 @@ void parse_parameters (int argc, char *argv[])
|
||||
ccx_options.sentence_cap=1;
|
||||
ccx_options.sentence_cap_file=argv[i+1];
|
||||
i++;
|
||||
shell_sort(spell_lower,spell_words,sizeof(*spell_lower),string_cmp2,NULL);
|
||||
shell_sort(spell_correct,spell_words,sizeof(*spell_correct),string_cmp2,NULL);
|
||||
continue;
|
||||
}
|
||||
if (strcmp (argv[i],"--program-number")==0 ||
|
||||
|
@ -1,5 +1,9 @@
|
||||
#include "ccextractor.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define strcasecmp stricmp
|
||||
#endif
|
||||
|
||||
static char *text;
|
||||
static int text_size=0;
|
||||
|
||||
@ -296,3 +300,39 @@ int hex2int (char high, char low)
|
||||
return -1;
|
||||
return h*16+l;
|
||||
}
|
||||
/**
|
||||
* @param base points to the start of the array
|
||||
* @param nb number of element in array
|
||||
* @param size size of each element
|
||||
* @param compar Comparison function, which is called with three argument
|
||||
* that point to the objects being compared and arg.
|
||||
* @param arg argument passed as it is to compare function
|
||||
*/
|
||||
void shell_sort(void *base, int nb,size_t size,int (*compar)(const void*p1,const void *p2,void*arg),void *arg)
|
||||
{
|
||||
unsigned char *lbase = (unsigned char*)base;
|
||||
unsigned char *tmp = (unsigned char*)malloc(size);
|
||||
for (int gap = nb / 2; gap > 0; gap = gap / 2)
|
||||
{
|
||||
int p, j;
|
||||
for (p = gap; p < nb; p++)
|
||||
{
|
||||
memcpy(tmp, lbase + (p *size), size);
|
||||
for (j = p; j >= gap && ( compar(tmp,lbase + ( (j - gap) * size),arg) < 0); j -= gap)
|
||||
{
|
||||
memcpy(lbase + (j*size),lbase + ( (j - gap) * size),size);
|
||||
}
|
||||
memcpy(lbase + (j *size),tmp, size);
|
||||
}
|
||||
}
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
int string_cmp2(const void *p1,const void *p2,void *arg)
|
||||
{
|
||||
return strcasecmp(*(char**)p1,*(char**)p2);
|
||||
}
|
||||
int string_cmp(const void *p1,const void *p2)
|
||||
{
|
||||
return string_cmp2(p1, p2, NULL);
|
||||
}
|
||||
|
@ -10,5 +10,8 @@
|
||||
#define RL16(x) (*(unsigned short int*)(x))
|
||||
#define RB16(x) (ntohs(*(unsigned short int*)(x)))
|
||||
|
||||
void shell_sort(void *base, int nb,size_t size,int (*compar)(const void*p1,const void *p2,void*arg),void *arg);
|
||||
int string_cmp(const void *p1,const void *p2);
|
||||
int string_cmp2(const void *p1,const void *p2,void *arg);
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user