2019-12-22 21:43:57 +00:00
|
|
|
#!/usr/bin/env sh -ex
|
2017-01-09 20:08:36 +00:00
|
|
|
|
|
|
|
####################################################################
|
|
|
|
# setup by tracey apr 2012
|
|
|
|
# updated version dec 2016
|
|
|
|
# see: http://www.ccextractor.org/doku.php
|
|
|
|
####################################################################
|
|
|
|
|
|
|
|
|
|
|
|
# build it static!
|
|
|
|
# simplest way is with linux alpine
|
|
|
|
# hop onto box with docker on it and cd to dir of the file you are staring at
|
|
|
|
# You will get a static-compiled binary and english language library file in the end.
|
|
|
|
if [ ! -e /tmp/cc/ccextractor-README.txt ]; then
|
|
|
|
rm -rf /tmp/cc;
|
|
|
|
mkdir -p -m777 /tmp/cc;
|
|
|
|
mkdir -p -m777 ../lib/tessdata/;
|
|
|
|
cp ccextractor-README.txt /tmp/cc/;
|
|
|
|
sudo docker run -v /tmp/cc:/tmp/cc --rm -it alpine:latest /tmp/cc/ccextractor-README.txt;
|
|
|
|
# NOTE: _AFTER_ testing/validating, you can promote it from "ccextractor.next" to "ccextractor"... ;-)
|
|
|
|
cp /tmp/cc/*traineddata ../lib/tessdata/;
|
|
|
|
chmod go-w ../lib/tessdata/;
|
|
|
|
exit 0;
|
|
|
|
fi
|
|
|
|
|
|
|
|
# NOW we are inside docker container...
|
|
|
|
cd /tmp/cc;
|
|
|
|
|
|
|
|
|
|
|
|
# we want tesseract (for OCR)
|
|
|
|
echo '
|
|
|
|
http://dl-cdn.alpinelinux.org/alpine/v3.5/main
|
|
|
|
http://dl-cdn.alpinelinux.org/alpine/v3.5/community
|
|
|
|
' >| /etc/apk/repositories;
|
|
|
|
apk update; apk upgrade;
|
|
|
|
|
|
|
|
apk add --update bash zsh alpine-sdk perl;
|
|
|
|
|
|
|
|
# (needed by various static builds below)
|
|
|
|
# Even though we're going to (re)builid tesseract from source statically, get its dependencies setup by
|
|
|
|
# installing it now, too.
|
|
|
|
apk add autoconf automake libtool tesseract-ocr-dev;
|
|
|
|
|
|
|
|
|
|
|
|
# Now comes the not-so-fun parts... Many packages _only_ provide .so files in their distros -- not the .a
|
|
|
|
# needed files for building something with it statically. Step through them now...
|
|
|
|
|
|
|
|
|
|
|
|
# libgif
|
|
|
|
wget https://sourceforge.net/projects/giflib/files/giflib-5.1.4.tar.gz;
|
|
|
|
zcat giflib*tar.gz | tar xf -;
|
|
|
|
cd giflib*/;
|
|
|
|
./configure --disable-shared --enable-static; make; make install;
|
|
|
|
hash -r;
|
|
|
|
cd -;
|
|
|
|
|
|
|
|
|
|
|
|
# libwebp
|
|
|
|
git clone https://github.com/webmproject/libwebp;
|
|
|
|
cd libwebp;
|
|
|
|
./autogen.sh;
|
|
|
|
./configure --disable-shared --enable-static; make; make install;
|
|
|
|
cd -;
|
|
|
|
|
|
|
|
|
|
|
|
# leptonica
|
|
|
|
wget http://www.leptonica.org/source/leptonica-1.73.tar.gz;
|
|
|
|
zcat leptonica*tar.gz | tar xf -;
|
|
|
|
cd leptonica*/;
|
|
|
|
./configure --disable-shared --enable-static; make; make install;
|
|
|
|
hash -r;
|
|
|
|
cd -;
|
|
|
|
|
|
|
|
|
|
|
|
# tesseract
|
|
|
|
git clone https://github.com/tesseract-ocr/tesseract;
|
|
|
|
cd tesseract;
|
|
|
|
./autogen.sh;
|
|
|
|
./configure --disable-shared --enable-static; make; make install;
|
|
|
|
cd -;
|
|
|
|
|
|
|
|
|
|
|
|
# ccextractor -- build static
|
|
|
|
git clone https://github.com/CCExtractor/ccextractor;
|
|
|
|
cd ccextractor/linux/;
|
|
|
|
# wget https://sourceforge.net/projects/ccextractor/files/ccextractor/0.82/ccextractor.src.0.82.zip;
|
|
|
|
# unzip ccextractor*.zip;
|
|
|
|
# cd ccextractor.*/linux/;
|
|
|
|
perl -i -pe 's/O3 /O3 -static /' Makefile;
|
|
|
|
# quick patch:
|
2020-01-30 12:58:37 +00:00
|
|
|
perl -i -pe 's/(strchr|strstr)\(/$1((char *)/' ../src/thirdparty/gpacmp4/url.c ../src/thirdparty/gpacmp4/error.c;
|
2017-01-09 20:08:36 +00:00
|
|
|
set +e; # this _will_ FAIL at the end..
|
|
|
|
make ENABLE_OCR=yes;
|
|
|
|
set -e;
|
|
|
|
# I confess hand-compiling (cherrypicking which .a to use when there are 2, etc.) is fragile...
|
|
|
|
# But it was the _only_ way I could get a fully static build after hours of thrashing...
|
[FIX] GCC warning fixes (#1204)
* cea708: Fix missing new line in log message
* subtype: Remove unused CC_708 type
CEA-708 inputs are coerced to CC_608 before hitting encode_sub.
GCC warns:
src/lib_ccx/ccx_encoders_common.c: In function ‘encode_sub’:
src/lib_ccx/ccx_encoders_common.c:1119:2: warning: enumeration value ‘CC_708’ not handled in switch [-Wswitch]
1119 | switch (sub->type)
| ^~~~~~
* build: Disable pointer-sign warning
This warning triggers all over the codebase due to the widespread use of
unsigned char arrays for parsed subtitle strings and them being passed
to string functions that expect signed ones. Since this won't actually
cause issues, silence the warning across the entire codebase.
* splitbysentence: Fix warnings
GCC warns:
src/lib_ccx/ccx_encoders_splitbysentence.c: In function ‘sbs_is_pointer_on_sentence_breaker’:
src/lib_ccx/ccx_encoders_splitbysentence.c:170:7: warning: variable ‘p’ set but not used [-Wunused-but-set-variable]
170 | char p = *(current - 1);
| ^
src/lib_ccx/ccx_encoders_splitbysentence.c: In function ‘sbs_find_insert_point_partial’:
src/lib_ccx/ccx_encoders_splitbysentence.c:231:1: warning: multi-line comment [-Wcomment]
231 | // sprintf(fmtbuf, "SBS: sbs_find_insert_point_partial: compare\n\
| ^
src/lib_ccx/ccx_encoders_splitbysentence.c:263:1: warning: multi-line comment [-Wcomment]
263 | // LOG_DEBUG("SBS: sbs_find_insert_point_partial: LEFT CHANGED,\n\tbuf:[%s]\n\tstr:[%s]\n\
| ^
src/lib_ccx/ccx_encoders_splitbysentence.c:297:1: warning: multi-line comment [-Wcomment]
297 | // sprintf(fmtbuf, "SBS: sbs_find_insert_point_partial: REPLACE ENTIRE TAIL !!\n\
| ^
src/lib_ccx/ccx_encoders_splitbysentence.c:222:6: warning: unused variable ‘i’ [-Wunused-variable]
222 | int i; // top level indexer for strings
| ^
src/lib_ccx/ccx_encoders_splitbysentence.c: In function ‘reformat_cc_bitmap_through_sentence_buffer’:
src/lib_ccx/ccx_encoders_splitbysentence.c:730:8: warning: unused variable ‘str’ [-Wunused-variable]
730 | char *str;
| ^~~
src/lib_ccx/ccx_encoders_splitbysentence.c:729:6: warning: unused variable ‘i’ [-Wunused-variable]
729 | int i = 0;
| ^
src/lib_ccx/ccx_encoders_splitbysentence.c:728:6: warning: unused variable ‘used’ [-Wunused-variable]
728 | int used;
| ^~~~
src/lib_ccx/ccx_encoders_splitbysentence.c:727:18: warning: unused variable ‘ms_end’ [-Wunused-variable]
727 | LLONG ms_start, ms_end;
| ^~~~~~
src/lib_ccx/ccx_encoders_splitbysentence.c:727:8: warning: unused variable ‘ms_start’ [-Wunused-variable]
727 | LLONG ms_start, ms_end;
| ^~~~~~~~
src/lib_ccx/ccx_encoders_splitbysentence.c:726:20: warning: unused variable ‘rect’ [-Wunused-variable]
726 | struct cc_bitmap* rect;
| ^~~~
* spupng: Fix warnings
GCC warns:
src/lib_ccx/ccx_encoders_spupng.c: In function ‘init_face’:
src/lib_ccx/ccx_encoders_spupng.c:644:6: warning: suggest parentheses around assignment used as truth value [-Wparentheses]
644 | if (error = FT_New_Face(ft_library, font, 0, face))
| ^~~~~
src/lib_ccx/ccx_encoders_spupng.c:651:6: warning: suggest parentheses around assignment used as truth value [-Wparentheses]
651 | if (error = FT_Set_Pixel_Sizes(*face, 0, FONT_SIZE))
| ^~~~~
src/lib_ccx/ccx_encoders_spupng.c: In function ‘spupng_export_string2png’:
src/lib_ccx/ccx_encoders_spupng.c:698:7: warning: suggest parentheses around assignment used as truth value [-Wparentheses]
698 | if (error = FT_Init_FreeType(&ft_library))
| ^~~~~
src/lib_ccx/ccx_encoders_spupng.c:706:6: warning: suggest parentheses around assignment used as truth value [-Wparentheses]
706 | if (error = init_face(&face_regular, ccx_options.enc_cfg.render_font))
| ^~~~~
src/lib_ccx/ccx_encoders_spupng.c:708:6: warning: suggest parentheses around assignment used as truth value [-Wparentheses]
708 | if (error = init_face(&face_italics, ccx_options.enc_cfg.render_font_italics))
| ^~~~~
src/lib_ccx/ccx_encoders_spupng.c:850:9: warning: unused variable ‘height’ [-Wunused-variable]
850 | int height = slot->bitmap.rows;
| ^~~~~~
src/lib_ccx/ccx_encoders_spupng.c:849:9: warning: unused variable ‘width’ [-Wunused-variable]
849 | int width = slot->bitmap.width;
| ^~~~~
src/lib_ccx/ccx_encoders_webvtt.c: In function ‘write_webvtt_header’:
src/lib_ccx/ccx_encoders_webvtt.c:263:1: warning: control reaches end of non-void function [-Wreturn-type]
263 | }
| ^
* webvtt: Fix missing return warning
The return value of this function is never used, so just drop the
values.
GCC warns:
src/lib_ccx/ccx_encoders_webvtt.c: In function ‘write_webvtt_header’:
src/lib_ccx/ccx_encoders_webvtt.c:263:1: warning: control reaches end of non-void function [-Wreturn-type]
263 | }
| ^
* gxf: Fix MIN macro redefinition warning
GCC warns:
src/lib_ccx/ccx_gxf.c:23: warning: "MIN" redefined
23 | #define MIN(a, b) ( (a < b) ? a : b)
|
In file included from src/lib_ccx/ccx_demuxer.h:8,
from src/lib_ccx/ccx_gxf.h:4,
from src/lib_ccx/ccx_gxf.c:13:
src/lib_ccx/utility.h:8: note: this is the location of the previous definition
8 | #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
|
* dvd: Fix unused variable warnings
GCC warns:
src/lib_ccx/dvd_subtitle_decoder.c: In function ‘get_bitmap’:
src/lib_ccx/dvd_subtitle_decoder.c:133:9: warning: unused variable ‘discard’ [-Wunused-variable]
133 | int discard = get_bits(ctx, &nextbyte, &pos, &m);
| ^~~~~~~
src/lib_ccx/dvd_subtitle_decoder.c:172:9: warning: unused variable ‘discard’ [-Wunused-variable]
172 | int discard = get_bits(ctx, &nextbyte, &pos, &m);
| ^~~~~~~
src/lib_ccx/dvd_subtitle_decoder.c: In function ‘write_dvd_sub’:
src/lib_ccx/dvd_subtitle_decoder.c:320:6: warning: unused variable ‘ret’ [-Wunused-variable]
320 | int ret =0;
| ^~~
* es_functions: Fix unused variable warning
This also removes the stale commented code that used this variable.
GCC warns:
src/lib_ccx/es_functions.c: In function ‘read_pic_info’:
src/lib_ccx/es_functions.c:682:7: warning: unused variable ‘frame_type_to_char’ [-Wunused-variable]
682 | char frame_type_to_char[] = { '?', 'I', 'P','B', 'D', '?', '?','?' };
| ^~~~~~~~~~~~~~~~~~
* dvb: Fix unused variable warning when OCR is disabled
GCC warns:
src/lib_ccx/dvb_subtitle_decoder.c: In function ‘write_dvb_sub’:
src/lib_ccx/dvb_subtitle_decoder.c:1509:6: warning: unused variable ‘ret’ [-Wunused-variable]
1509 | int ret = 0;
| ^~~
* general_loop: Fix warnings
GCC warns:
src/lib_ccx/general_loop.c: In function ‘general_loop’:
src/lib_ccx/general_loop.c:1113:15: warning: suggest parentheses around ‘&&’ within ‘||’ [-Wparentheses]
1113 | (enc_ctx && (enc_ctx->srt_counter || enc_ctx->cea_708_counter) ||
| ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
At top level:
src/lib_ccx/general_loop.c:25:28: warning: ‘DO_NOTHING’ defined but not used [-Wunused-const-variable=]
25 | const static unsigned char DO_NOTHING[] = {0x80, 0x80};
| ^~~~~~~~~~
* networking: Fix unknown pragma warning for non-MSVC compilers
GCC warns:
src/lib_ccx/networking.c:22: warning: ignoring #pragma warning [-Wunknown-pragmas]
22 | #pragma warning( suppress : 4005)
|
* networking: Fix unused variable warnings on non-Windows platforms
GCC warns:
src/lib_ccx/networking.c: In function ‘net_udp_read’:
src/lib_ccx/networking.c:342:12: warning: variable ‘addr’ set but not used [-Wunused-but-set-variable]
342 | in_addr_t addr;
| ^~~~
src/lib_ccx/networking.c:340:12: warning: unused variable ‘len’ [-Wunused-variable]
340 | socklen_t len = sizeof(source_addr);
| ^~~
src/lib_ccx/networking.c:338:7: warning: unused variable ‘ip’ [-Wunused-variable]
338 | char ip[INET_ADDRSTRLEN];
| ^~
* params: Fix unused variable warning when OCR is disabled
GCC warns:
src/lib_ccx/params.c: In function ‘version’:
src/lib_ccx/params.c:1015:8: warning: unused variable ‘leptversion’ [-Wunused-variable]
1015 | char *leptversion;
| ^~~~~~~~~~~
* params_dump: Fix empty encoding when ASCII is used
GCC warns:
src/lib_ccx/params_dump.c: In function ‘params_dump’:
src/lib_ccx/params_dump.c:110:2: warning: enumeration value ‘CCX_ENC_ASCII’ not handled in switch [-Wswitch]
110 | switch (ccx_options.enc_cfg.encoding)
| ^~~~~~
* params_dump: Fix comparison between mismatching enums
GCC warns:
src/lib_ccx/params_dump.c: In function ‘print_file_report’:
src/lib_ccx/params_dump.c:402:18: warning: comparison between ‘enum ccx_stream_type’ and ‘enum ccx_stream_mode_enum’ [-Wenum-compare]
402 | (info->stream == CCX_SM_TRANSPORT ||
| ^~
src/lib_ccx/params_dump.c:403:18: warning: comparison between ‘enum ccx_stream_type’ and ‘enum ccx_stream_mode_enum’ [-Wenum-compare]
403 | info->stream == CCX_SM_PROGRAM ||
| ^~
src/lib_ccx/params_dump.c:404:18: warning: comparison between ‘enum ccx_stream_type’ and ‘enum ccx_stream_mode_enum’ [-Wenum-compare]
404 | info->stream == CCX_SM_ASF ||
| ^~
src/lib_ccx/params_dump.c:405:18: warning: comparison between ‘enum ccx_stream_type’ and ‘enum ccx_stream_mode_enum’ [-Wenum-compare]
405 | info->stream == CCX_SM_WTV))
| ^~
* telxcc: Fix unused variable warning
GCC warns:
src/lib_ccx/telxcc.c: In function ‘process_telx_packet’:
src/lib_ccx/telxcc.c:928:10: warning: unused variable ‘flag_subtitle’ [-Wunused-variable]
928 | uint8_t flag_subtitle;
| ^~~~~~~~~~~~~
* ts_functions: Fix unused variable warnings
GCC warns:
src/lib_ccx/ts_functions.c: In function ‘get_pts’:
src/lib_ccx/ts_functions.c:642:11: warning: variable ‘pes_packet_length’ set but not used [-Wunused-but-set-variable]
642 | uint16_t pes_packet_length;
| ^~~~~~~~~~~~~~~~~
src/lib_ccx/ts_functions.c:641:10: warning: variable ‘pes_stream_id’ set but not used [-Wunused-but-set-variable]
641 | uint8_t pes_stream_id;
| ^~~~~~~~~~~~~
* ts_tables_epg: Fix warnings
GCC warns:
src/lib_ccx/ts_tables_epg.c: In function ‘EPG_add_event’:
src/lib_ccx/ts_tables_epg.c:380:6: warning: unused variable ‘isnew’ [-Wunused-variable]
380 | int isnew=true, j;
| ^~~~~
src/lib_ccx/ts_tables_epg.c: In function ‘EPG_DVB_decode_string’:
src/lib_ccx/ts_tables_epg.c:469:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
469 | int ret=-1;
| ^~~
src/lib_ccx/ts_tables_epg.c: In function ‘EPG_ATSC_decode_EIT’:
src/lib_ccx/ts_tables_epg.c:802:25: warning: variable ‘emt_location’ set but not used [-Wunused-but-set-variable]
802 | uint8_t title_length, emt_location;
| ^~~~~~~~~~~~
src/lib_ccx/ts_tables_epg.c:764:10: warning: variable ‘table_id’ set but not used [-Wunused-but-set-variable]
764 | uint8_t table_id;
| ^~~~~~~~
src/lib_ccx/ts_tables_epg.c: In function ‘EPG_ATSC_decode_VCT’:
src/lib_ccx/ts_tables_epg.c:837:10: warning: variable ‘table_id’ set but not used [-Wunused-but-set-variable]
837 | uint8_t table_id;
| ^~~~~~~~
src/lib_ccx/ts_tables_epg.c: In function ‘EPG_DVB_decode_EIT’:
src/lib_ccx/ts_tables_epg.c:883:10: warning: variable ‘segment_last_section_number’ set but not used [-Wunused-but-set-variable]
883 | uint8_t segment_last_section_number;
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~
src/lib_ccx/ts_tables_epg.c:882:10: warning: variable ‘last_section_number’ set but not used [-Wunused-but-set-variable]
882 | uint8_t last_section_number;
| ^~~~~~~~~~~~~~~~~~~
src/lib_ccx/ts_tables_epg.c: In function ‘parse_EPG_packet’:
src/lib_ccx/ts_tables_epg.c:1041:11: warning: unused variable ‘transport_error_indicator’ [-Wunused-variable]
1041 | unsigned transport_error_indicator = (tspacket[1]&0x80)>>7;
| ^~~~~~~~~~~~~~~~~~~~~~~~~
* matroska: Fix unused variable warning
The call is left alone since it might create a decoder context.
GCC warns:
src/lib_ccx/matroska.c: In function ‘matroska_save_all’:
src/lib_ccx/matroska.c:1182:27: warning: unused variable ‘dec_ctx’ [-Wunused-variable]
1182 | struct lib_cc_decode *dec_ctx = update_decoder_list(mkv_ctx->ctx);
| ^~~~~~~
* utility: Only define MIN when necessary
GCC warns:
In file included from src/lib_ccx/ccx_demuxer.h:8,
from src/lib_ccx/lib_ccx.h:15,
from src/gpacmp4/mp4.c:6:
src/lib_ccx/utility.h:8: warning: "MIN" redefined
8 | #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
|
In file included from src/gpacmp4/gpac/tools.h:33,
from src/gpacmp4/gpac/isomedia.h:50,
from src/gpacmp4/mp4.c:5:
src/gpacmp4/gpac/setup.h:324: note: this is the location of the previous definition
324 | #define MIN(X, Y) ((X)<(Y)?(X):(Y))
|
2020-01-24 02:49:16 +00:00
|
|
|
gcc -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -O3 -std=gnu99 -s -DGPAC_CONFIG_LINUX -DENABLE_OCR -DPNG_NO_CONFIG_H -I/usr/local/include/tesseract -I/usr/local/include/leptonica objs/*.o -o ccextractor \
|
2017-01-09 20:08:36 +00:00
|
|
|
--static -lm \
|
|
|
|
/usr/local/lib/libtesseract.a \
|
|
|
|
/usr/local/lib/liblept.a \
|
|
|
|
/usr/local/lib/libgif.a \
|
|
|
|
/usr/local/lib/libwebp.a \
|
|
|
|
/usr/lib/libjpeg.a \
|
|
|
|
/usr/lib/libtiff.a \
|
|
|
|
/usr/lib/libgomp.a \
|
|
|
|
-lstdc++;
|
|
|
|
|
|
|
|
cp ccextractor /tmp/cc/ccextractor.next;
|
|
|
|
cd -;
|
|
|
|
|
|
|
|
# get english lang trained data
|
|
|
|
wget https://github.com/tesseract-ocr/tessdata/raw/master/eng.traineddata;
|