mirror of
https://github.com/CCExtractor/ccextractor.git
synced 2025-01-18 07:54:28 +00:00
113 lines
3.6 KiB
Bash
Executable File
113 lines
3.6 KiB
Bash
Executable File
#!/bin/sh -ex
|
|
|
|
####################################################################
|
|
# setup by tracey apr 2012
|
|
# updated version dec 2016
|
|
# see: http://www.ccextractor.org/doku.php
|
|
####################################################################
|
|
|
|
|
|
# build it static!
|
|
# simplest way is with linux alpine
|
|
# hop onto box with docker on it and cd to dir of the file you are staring at
|
|
# You will get a static-compiled binary and english language library file in the end.
|
|
if [ ! -e /tmp/cc/ccextractor-README.txt ]; then
|
|
rm -rf /tmp/cc;
|
|
mkdir -p -m777 /tmp/cc;
|
|
mkdir -p -m777 ../lib/tessdata/;
|
|
cp ccextractor-README.txt /tmp/cc/;
|
|
sudo docker run -v /tmp/cc:/tmp/cc --rm -it alpine:latest /tmp/cc/ccextractor-README.txt;
|
|
# NOTE: _AFTER_ testing/validating, you can promote it from "ccextractor.next" to "ccextractor"... ;-)
|
|
cp /tmp/cc/*traineddata ../lib/tessdata/;
|
|
chmod go-w ../lib/tessdata/;
|
|
exit 0;
|
|
fi
|
|
|
|
# NOW we are inside docker container...
|
|
cd /tmp/cc;
|
|
|
|
|
|
# we want tesseract (for OCR)
|
|
echo '
|
|
http://dl-cdn.alpinelinux.org/alpine/v3.5/main
|
|
http://dl-cdn.alpinelinux.org/alpine/v3.5/community
|
|
' >| /etc/apk/repositories;
|
|
apk update; apk upgrade;
|
|
|
|
apk add --update bash zsh alpine-sdk perl;
|
|
|
|
# (needed by various static builds below)
|
|
# Even though we're going to (re)builid tesseract from source statically, get its dependencies setup by
|
|
# installing it now, too.
|
|
apk add autoconf automake libtool tesseract-ocr-dev;
|
|
|
|
|
|
# Now comes the not-so-fun parts... Many packages _only_ provide .so files in their distros -- not the .a
|
|
# needed files for building something with it statically. Step through them now...
|
|
|
|
|
|
# libgif
|
|
wget https://sourceforge.net/projects/giflib/files/giflib-5.1.4.tar.gz;
|
|
zcat giflib*tar.gz | tar xf -;
|
|
cd giflib*/;
|
|
./configure --disable-shared --enable-static; make; make install;
|
|
hash -r;
|
|
cd -;
|
|
|
|
|
|
# libwebp
|
|
git clone https://github.com/webmproject/libwebp;
|
|
cd libwebp;
|
|
./autogen.sh;
|
|
./configure --disable-shared --enable-static; make; make install;
|
|
cd -;
|
|
|
|
|
|
# leptonica
|
|
wget http://www.leptonica.org/source/leptonica-1.73.tar.gz;
|
|
zcat leptonica*tar.gz | tar xf -;
|
|
cd leptonica*/;
|
|
./configure --disable-shared --enable-static; make; make install;
|
|
hash -r;
|
|
cd -;
|
|
|
|
|
|
# tesseract
|
|
git clone https://github.com/tesseract-ocr/tesseract;
|
|
cd tesseract;
|
|
./autogen.sh;
|
|
./configure --disable-shared --enable-static; make; make install;
|
|
cd -;
|
|
|
|
|
|
# ccextractor -- build static
|
|
git clone https://github.com/CCExtractor/ccextractor;
|
|
cd ccextractor/linux/;
|
|
# wget https://sourceforge.net/projects/ccextractor/files/ccextractor/0.82/ccextractor.src.0.82.zip;
|
|
# unzip ccextractor*.zip;
|
|
# cd ccextractor.*/linux/;
|
|
perl -i -pe 's/O3 /O3 -static /' Makefile;
|
|
# quick patch:
|
|
perl -i -pe 's/(strchr|strstr)\(/$1((char *)/' ../src/gpacmp4/url.c ../src/gpacmp4/error.c;
|
|
set +e; # this _will_ FAIL at the end..
|
|
make ENABLE_OCR=yes;
|
|
set -e;
|
|
# I confess hand-compiling (cherrypicking which .a to use when there are 2, etc.) is fragile...
|
|
# But it was the _only_ way I could get a fully static build after hours of thrashing...
|
|
gcc -Wno-write-strings -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -O3 -std=gnu99 -s -DGPAC_CONFIG_LINUX -DENABLE_OCR -DPNG_NO_CONFIG_H -I/usr/local/include/tesseract -I/usr/local/include/leptonica objs/*.o -o ccextractor \
|
|
--static -lm \
|
|
/usr/local/lib/libtesseract.a \
|
|
/usr/local/lib/liblept.a \
|
|
/usr/local/lib/libgif.a \
|
|
/usr/local/lib/libwebp.a \
|
|
/usr/lib/libjpeg.a \
|
|
/usr/lib/libtiff.a \
|
|
/usr/lib/libgomp.a \
|
|
-lstdc++;
|
|
|
|
cp ccextractor /tmp/cc/ccextractor.next;
|
|
cd -;
|
|
|
|
# get english lang trained data
|
|
wget https://github.com/tesseract-ocr/tessdata/raw/master/eng.traineddata;
|