#!/usr/bin/env perl # gutenmark.wrapper - Launcher for "gutenmark" main program # License: Creative Commons Attribution-NonCommercial-ShareAlike 2.5 # Revision: 080602 # Note: The license indicated above applies to this file. It doesn't # apply to the official GutenMark package, or to any file derived from # that package. #--------------------------------------------------------------------- # license information #--------------------------------------------------------------------- # This section may not be modified except as approved by the author or # licensor, or to make non-content changes such as adjustments to par- # agraph formatting or white space. # This version of this software is distributed under the following # license: # # Creative Commons Attribution-NonCommercial-ShareAlike 2.5 # You may use, modify, and redistribute this software without fees or # royalties, but only under the terms and conditions set forth by the # license. In particular, copies and derived works cannot be used for # commercial purposes. Additionally, the license propagates to copies # and derived works. Furthermore, you must provide attribution "in the # manner specified by the author or licensor". The latter point is # discussed below. # The author [and licensor] hereby specifies that attribution must be # handled in the following manner: a. If the software is interactive, # any About or Credits dialog boxes, windows, or output text provided # by the original version must be preserved and readily accessible to # the end user at runtime. b. If the software is non-interactive, or # if it does not provide About or Credits dialog boxes, windows, or # output text, the operating system and/or desktop environment used # must provide attribution that is visible and/or readily accessible # to the end user at runtime. # The following techniques do not meet the attribution requirements: # Attribution through text files, attribution through printed docu- # mentation, verbal attribution, or postings on external web sites # [i.e., web sites that are not an intrinsic local component of the # operating system or desktop environment used]. These examples are # provided for illustrative purposes only. # It should be noted that trademarks are an additional issue. If this # software uses any trademarks, trademark-related restrictions may # apply. # This is not a complete explanation of the terms and conditions in- # volved. For more information, see the Creative Commons Attribution- # NonCommercial-ShareAlike 2.5 license. #--------------------------------------------------------------------- # explanation #--------------------------------------------------------------------- # 1. Overview. # "gutenmark.wrapper" is a wrapper for the main GutenMark program # file [which must be named "gutenmark.bin"]. This wrapper performs # the following operations: # # a. Check the command line. Note: The wrapper expects to see # either one argument (an input file) or two arguments (an in- # put file and an output file). # # b. Use the backend program (gutenmark.bin) to convert a speci- # fied Project Gutenberg text file to HTML format. Save the # output in a temporary file. # # c. If an output file was specified, send the final output there. # Otherwise, send it to the standard output stream. Adjust the # data involved (if necessary) as it's copied to the output # location. # # d. Delete the temporary file. #--------------------------------------------------------------------- # 2. Requirements. # The wrapper and the backend program have very few special require- # ments. Perl and "zlib" might be enough. #--------------------------------------------------------------------- # usage text #--------------------------------------------------------------------- my $USAGE_TEXT = << 'END_OF_USAGE'; PROGNAME REVISION - Converts Project Gutenberg text to HTML Original author: Ron Burkey; distributed under GNU GPL V2 Usage: PROGNAME foo.txt output.html or PROGNAME foo.txt > output.html "foo.txt" should be a text file formatted using Project Gutenberg's usual conventions. This program converts the input to HTML and sends the output to the appropriate destination (which may be a file or the standard output stream, as shown above). Note: This program is a "wrapper" for Ron Burkey's original converter. If you're familiar with the original converter and you'd like to use it, the local name is "REALPROG_NAME". END_OF_USAGE #--------------------------------------------------------------------- # standard module setup #--------------------------------------------------------------------- require 5.8.1; use strict; use Carp; use warnings; # Trap warnings $SIG {__WARN__} = sub { die @_; }; #--------------------------------------------------------------------- # basic constants #--------------------------------------------------------------------- use constant ZERO => 0; # Zero use constant ONE => 1; # One use constant TWO => 2; # One use constant FALSE => 0; # Boolean FALSE use constant TRUE => 1; # Boolean TRUE #--------------------------------------------------------------------- # program parameters #--------------------------------------------------------------------- my $PREFIX = "__META_PREFIX__"; my $BINDIR = "$PREFIX/bin"; my $DATADIR = "$PREFIX/data"; my $ETCDIR = "$PREFIX/etc"; # Name of backend program w/o path my $REALPROG_NAME = 'gutenmark.bin'; # Absolute path for backend program my $REALPROG_PATH = "$BINDIR/$REALPROG_NAME"; my $REVISION = '080602'; # Wrapper revision string # Absolute path for config. file my $CFGFILE = "$ETCDIR/gutenmark.cfg"; # Absolute path for temporary file my $TEMPFILE = "/tmp/gutenmark-temp-$>-$$"; my $IE = 'Internal error'; # Internal-error message prefix my $IO = 'I/O error'; # I/O-error message prefix #--------------------------------------------------------------------- # global variables #--------------------------------------------------------------------- my $PROGNAME; # Program name [without path] $PROGNAME = $0; $PROGNAME =~ s@.*/@@; #--------------------------------------------------------------------- # misc. routine[s] #--------------------------------------------------------------------- # Future change: Document this routine. sub UsageError { my $n; # Scratch my $use_less; # Flag: Use "less" internally $USAGE_TEXT =~ s@^\s+@@s; $USAGE_TEXT =~ s@\bPROGNAME\b@$PROGNAME@g; $USAGE_TEXT =~ s@\bREALPROG_NAME\b@$REALPROG_NAME@g; $USAGE_TEXT =~ s@\bREVISION\b@$REVISION@g; # Use "less" internally? $n = $USAGE_TEXT =~ y/\012/\012/; $use_less = ($n > 21) ? TRUE : FALSE; $USAGE_TEXT = << "END"; # "END" must be double-quoted here $USAGE_TEXT END $USAGE_TEXT =~ s@\s*\z@\n@s; if ($use_less && (-t STDOUT) && open (OFD, "|/usr/bin/less")) { # "END" must be double-quoted here $USAGE_TEXT = << "END"; To exit this "help" text, press "q" or "Q". To scroll up or down, use PGUP, PGDN, or the arrow keys. $USAGE_TEXT END print OFD $USAGE_TEXT; close OFD; } else { print "\n"; print $USAGE_TEXT; print "\n"; } exit ONE; } #--------------------------------------------------------------------- # main routine #--------------------------------------------------------------------- sub Main { my $n; # Scratch (integer) my $str; # Scratch (string ) #--------------------------------------------------------------------- # Initial setup. select STDERR; $| = ONE; # Set flush-on-write mode select STDOUT; $| = ONE; # Ditto [note: this line must be last] # Sanity check die "$IE: Program is missing:\n$REALPROG_PATH\n" unless -f $REALPROG_PATH; #--------------------------------------------------------------------- # Process the command line. $n = scalar (@ARGV); &UsageError() unless ($n == ONE) || ($n == TWO); my $ifname = shift (@ARGV); die "Error: Broken symbolic link: $ifname\n" if (-l $ifname) && (!-e $ifname); die "Error: Input file not found: $ifname\n" unless -e $ifname; die "Error: Not a regular file: $ifname\n" unless -f $ifname; die "Error: Name contains unsupported characters: $ifname\n" unless $ifname =~ m@^[a-z0-9_\-\+\.,/]+\z@i; die "Error: Input file doesn't seem to be a text file: $ifname\n" unless ($ifname =~ m@\.txt\z@i) || (-T $ifname); my $ofname = shift (@ARGV); $ofname = "stdout" unless defined $ofname; die "Error: Name contains unsupported characters: $ofname\n" unless $ofname =~ m@^[a-z0-9_\-\+\.,/]+\z@i; #--------------------------------------------------------------------- # Set up temporary file and output file. # Absolute path for temporary file my $TEMPFILE = "/tmp/gutenmark-temp-$>-$$"; for my $xfname ($TEMPFILE, $ofname) { next if $xfname eq 'stdout'; unlink $xfname; die "$IO: Couldn't delete file: $!\n$xfname\n" if (-e $xfname) || (-l $xfname); system "/bin/touch $xfname"; die "$IO: Couldn't create file: $!\n$xfname\n" unless -f $xfname; unlink $xfname; die "$IO: Couldn't delete file: $!\n$xfname\n" if (-e $xfname) || (-l $xfname); } #--------------------------------------------------------------------- # Run backend program. my $PROFILE; $PROFILE = 'english'; $PROFILE = 'english_all' if -f "$DATADIR/NonUS.places.gz"; my $cmd = << "END"; $REALPROG_PATH --config=$CFGFILE --profile=$PROFILE --force-symbolic "$ifname" "$TEMPFILE" END $cmd =~ s@\s*\n\s*@ @gs; $cmd =~ s@\s+\z@@s; system $cmd; die "Error: Conversion failed for unknown reasons\n" unless -f $TEMPFILE; my $NEWBODYCODE = << 'END'; END $NEWBODYCODE =~ s@\s+\z@@s; $cmd = << "END"; /usr/bin/sed -e "s|^|$NEWBODYCODE|" < $TEMPFILE END $cmd =~ s@\s+\z@@s; $cmd .= " > $ofname" unless $ofname eq 'stdout'; system $cmd; #--------------------------------------------------------------------- # Wrap it up. unlink $TEMPFILE; # Delete the temporary file die "Error: Conversion failed for unknown reasons\n" unless ($ofname eq 'stdout') || -f $ofname; undef; } #--------------------------------------------------------------------- # main program #--------------------------------------------------------------------- &Main(); # Call the main routine exit ZERO; # Normal exit