(*
* footnotes.ml
*
* copyright 2008 by Andreas Romeyke (
[email protected])
*
* Competition of Linux Magazine 2008-10 to write a footnote sorter
* in different languages.
*
* compile it with:
* ocamlopt.opt -I /usr/lib/ocaml/3.10.1/extlib/ -o footnotes -unsafe \
* unix.cmxa extLib.cmxa str.cmxa footnotes.ml
*
* (please replace path to your extlib to compile)
*
* execute it with:
* cat input_text | ./footnotes -u > used_footnotes.txt
* or with
* cat input_text | ./footnotes -o > ordered_footnotes.txt
*
*
* The program works correctly only if the keyword "@footnote:" will be on a
* separate line. The text should be formated in unix style, otherwise the
* carriage return is lost.
*
* In used mode only the program detects unreferenced indices in text part and
* aborts. In ordered mode it only looks to footnotes part and detects lines
* without index markers. With a little bit more overhead the program could
* detect all errors in every mode.
*
* The program is tested sucessfully under Debian GNU/Linux with Ocaml 3.10
*
* Have fun... :)
*
*)
open Str;;
open ExtLib;;
exception ParseError of string;;
exception WrongArgument of string;;
exception MissedIdx of string;;
let debug msg =
Printf.eprintf "%s time:%f\n" msg (Sys.time ());
flush stderr
;;
(* returns list of (Delim d) or (Text t) *)
let lexer (input: string list) =
List.flatten (
List.map (fun line -> Str.full_split( Str.regexp "\[[0-9]+\]" ) line ) input
)
;;
(* Parses list of tokens and splits them into two streams, returns text and footnotes *)
let split_on_footnotes tokens =
let match_footnotes =
function
(Delim d) -> false
| (Text w) ->
try
Str.search_forward (Str.regexp "^\@footnote:$") w 0;
true
with
Not_found -> false
in
let rec splitter text footnotes has_footnotes input =
match input with
[] -> ((List.rev text),(List.rev footnotes))
| hd::tl when (match_footnotes hd) -> splitter (hd::text) footnotes true tl
| hd::tl when has_footnotes -> splitter text (hd::footnotes) has_footnotes tl
| hd::tl -> splitter (hd::text) footnotes has_footnotes tl
in
splitter [] [] false tokens
;;
(* helper function to make Hash unique *)
let add_key_if_new hash (key:string) (value:int) =
try
Hashtbl.find hash key; ()
with
Not_found -> Hashtbl.add hash key value
;;
(* Maps indices to sorted version, returns Hash *)
let build_used_order text =
let ordered = Hashtbl.create 0 in
List.iter (
function
(Delim d) -> let len = (Hashtbl.length ordered) +1 in
add_key_if_new ordered d len; ()
| _ -> ()
) text;
ordered
;;
(* prints substituted text and footnotes to Stdout *)
let print text footnotes hash =
let rec comb_idx_with_txt accu = function
[] -> List.rev accu
| ((Delim i) as idx)::((Text t) as txt)::tl ->
begin
try
let d = (Hashtbl.find hash i) in
let v = (d, t) in
comb_idx_with_txt (v::accu) tl
with
Not_found -> raise (ParseError ("an (idx,txt) pair in footnotes section is not referenced i='" ^ i ^ "'"))
end
| (Text t) ::tl when (t.[0] == '\n') -> comb_idx_with_txt accu tl
| (Delim i)::tl -> raise (ParseError ("not an (idx,txt) pair in footnotes section i='" ^ i ^ "'" ))
| (Text t) ::tl -> raise (ParseError ("not an (idx,txt) pair in footnotes section t='" ^ t ^ "'" ))
in
let sort_footnotes =
let tmp = comb_idx_with_txt [] footnotes
in
(* TODO: check if Space changes the sort order, test, if sort is
* necessary.. *)
let cmp a b = let (a',_) = a
and (b',_) = b
in
compare a' b'
in
List.sort ~cmp:cmp tmp
in
List.iter (
function
(Text t) -> Printf.printf "%s" t
| (Delim d) -> try
Printf.printf "[%i]" (Hashtbl.find hash d)
with
Not_found -> raise (ParseError ("an index in text has no reference in footnotes section i='" ^ d ^ "'"))
) text;
List.iter (
fun (idx,txt) -> Printf.printf "[%i]%s" idx txt
) sort_footnotes
;;
(* Reads data from STDIN and returns list of lines *)
let readin () =
debug ">> readin";
let rec readlines file =
try
let line = (input_line file) ^ "\n" in
line:: readlines file
with
End_of_file -> []
in
readlines stdin
;;
(* MAIN *)
let _ =
if (Array.length Sys.argv) != 2 then
raise (WrongArgument "wrong count of Argv");
let tokens = lexer (readin ()) in
debug ">> lexed";
let (text, footnotes) = split_on_footnotes tokens in
debug ">> splitted";
let ordered_hash =
match Sys.argv.(1) with
"-o" -> build_used_order text
| "-u" -> build_used_order footnotes
| _ -> raise (WrongArgument Sys.argv.(1))
in
debug ">> built";
print text footnotes ordered_hash;
debug ">>printed"
;;