#!/usr/bin/env perl # cuneiform.wrapper - Wrapper for "cuneiform" main program # License: Creative Commons Attribution-NonCommercial-ShareAlike 3.0 # Revision: 120627 # Note: The license indicated above applies to this file. It doesn't # apply to the official Cuneiform package, or to any file derived # from that package. #--------------------------------------------------------------------- # license information #--------------------------------------------------------------------- # This section may not be modified except as approved by the author or # licensor, or to make non-content changes such as adjustments to par- # agraph formatting or white space. # This version of this software is distributed under the following # license: # # Creative Commons Attribution-NonCommercial-ShareAlike 3.0 # You may use, modify, and redistribute this software without fees or # royalties, but only under the terms and conditions set forth by the # license. In particular, copies and derived works cannot be used for # commercial purposes. Additionally, the license propagates to copies # and derived works. Furthermore, you must provide attribution "in the # manner specified by the author or licensor". The latter point is # discussed below. # The author [and licensor] hereby specifies that attribution must be # handled in the following manner: a. If the software is interactive, # any About or Credits dialog boxes, windows, or output text provided # by the original version must be preserved and readily accessible to # the end user at runtime. b. If the software is non-interactive, or # if it does not provide About or Credits dialog boxes, windows, or # output text, the operating system and/or desktop environment used # must provide attribution that is visible and/or readily accessible # to the end user at runtime. # The following techniques do not meet the attribution requirements: # Attribution through text files, attribution through printed docu- # mentation, verbal attribution, or postings on external web sites # [i.e., web sites that are not an intrinsic local component of the # operating system or desktop environment used]. These examples are # provided for illustrative purposes only. # It should be noted that trademarks are an additional issue. If this # software uses any trademarks, trademark-related restrictions may # apply. # This is not a complete explanation of the terms and conditions in- # volved. For more information, see the Creative Commons Attribution- # NonCommercial-ShareAlike 3.0 license. #--------------------------------------------------------------------- # standard module setup #--------------------------------------------------------------------- require 5.8.1; use strict; use Carp; use warnings; use Cwd; # Trap warnings $SIG {__WARN__} = sub { die @_; }; #--------------------------------------------------------------------- # add LACSUB module[s] #--------------------------------------------------------------------- use LACSUB::Filesystem ( @LACSUB::Filesystem::EXPORT_OK ); #--------------------------------------------------------------------- # basic constants #--------------------------------------------------------------------- use constant ZERO => 0; # Zero use constant ONE => 1; # One use constant FALSE => 0; # Boolean FALSE use constant TRUE => 1; # Boolean TRUE #--------------------------------------------------------------------- # program parameters #--------------------------------------------------------------------- my $DIR_BASE = '__META_PREFIX__'; my $DIR_LIB = "$DIR_BASE/lib"; my $DIR_PROG = "$DIR_BASE/program"; #--------------------------------------------------------------------- # main routine #--------------------------------------------------------------------- sub Main { my $CWD; # Current working directory my $cmd; # Shell-level command string my $language; # Language code, "list", or undefined my $str; # Scratch #--------------------------------------------------------------------- # Initial setup. select STDERR; $| = ONE; # Set flush-on-write mode select STDOUT; $| = ONE; # Ditto [note: this line must be last] $CWD = getcwd(); # Current working directory $CWD =~ s@([^/])\z@$1/@; # Make sure that it ends with a slash # Set a required environment variable $ENV {CF_DATADIR} = "$DIR_PROG/datafiles"; #--------------------------------------------------------------------- # Print usage text, if necessary. if (!scalar (@ARGV)) { print << 'END'; Usage: cuneiform file1.jpg /foo/file2.png ... or cuneiform --language=rus file1.jpg /foo/file2.png ... The specified arguments should be filenames or pathnames for image files. Both absolute and relative paths are allowed. Supported formats include BMP, GIF, JPEG, PNM, PNG, and TIFF. The image files should be scanned copies of text pages. If possible, scans should be done at 300dpi. Multi-column pages may work, but this isn't guaranteed. "cuneiform" assumes that you've got write access to the image-file directories. For each input file NAME.png (or NAME.jpg, etc.), it cre- ates a corresponding output file named NAME.cune. The output files are text files that contain OCR'd text. Several input languages are sup- ported. For a list of language codes, specify "--language=list". Note: Russian output uses Code Page 1251 format. Bug: Some images may crash the program. END exit ONE; } #--------------------------------------------------------------------- # Process the command line. for my $arg (@ARGV) { if ($arg =~ m@^-+lang(uage|)=(\w{3,})\z@i) { $language = lc ($2); $language =~ s@^(help|info|list|show)\z@list@i; $language =~ s@^(.{3}).+\z@$1@ unless $language eq 'list'; } elsif ($arg =~ m@^-@) { die "Error: Invalid option: $arg\n"; } } #--------------------------------------------------------------------- # Adjust LD_LIBRARY_PATH. $str = $ENV {LD_LIBRARY_PATH}; $str = "" unless defined $str; $str = ".:$DIR_LIB:$str"; $str =~ s@^:+@@; $str =~ s@:+\z@@; $str =~ y/:/:/s; $ENV {LD_LIBRARY_PATH} = $str; #--------------------------------------------------------------------- # Handle a special case. if (defined ($language) && ($language eq 'list')) { chdir $DIR_PROG || die "Error: Can't enter directory: $!:\n$DIR_PROG\n"; system "./cuneiform.bin -l"; exit ZERO; } #--------------------------------------------------------------------- # Process image-file arguments. for my $ifname (@ARGV) { next if $ifname =~ m@^-@; next unless -f $ifname; next unless -B $ifname; next unless $ifname =~ m@\.(bmp|gif|jpe?g|pbm|pnm|png|tiff?)\z@i; print "Processing $ifname\n"; if ($ifname !~ m@^/@) { $str = $ifname; $ifname = "${CWD}$ifname"; die "Unsupported filename: $str\n" unless &FixDotDot (-ref_path => \$ifname); } my $ofname = $ifname; die "Internal error\n" unless $ofname =~ s@\.\w+\z@.cune@i; chdir $DIR_PROG || die "Error: Can't enter directory: $!:\n$DIR_PROG\n"; open (OFD, ">$ofname") || die "Error: Can't create output file: $!:\n$ofname\n"; print OFD "\n"; close (OFD) || die "Error: Can't create output file: $!:\n$ofname\n"; unlink ($ofname) || die "Error: Can't delete output file: $!:\n$ofname\n"; my $tempfile = "/tmp/cuneiform-$>-$$.png"; my $cune_cmd; $cune_cmd = "./cuneiform.bin -o \"$ofname\" $tempfile"; $cune_cmd =~ s@(-o)\b@-l $language $1@ if defined ($language); system << "END"; convert "$ifname" "$tempfile" mogrify -type TrueColor "$tempfile" $cune_cmd END unlink $tempfile; chmod (0644, $ofname); if (!-f $ofname) { die "Error: cuneiform.bin failed for $ifname\n"; } chdir $CWD || die "Error: Can't return to directory: $!:\n$DIR_PROG\n"; } undef; } #--------------------------------------------------------------------- # main program #--------------------------------------------------------------------- &Main(); # Call the main routine exit ZERO; # Normal exit