(*
* footnotes.ml
*
* copyright 2008 by Andreas Romeyke ([email protected])
*
* Competition of Linux Magazine 2008-10 to write a footnote sorter
* in different languages.
*
* compile it with:
*    ocamlopt.opt -I /usr/lib/ocaml/3.10.1/extlib/ -o footnotes -unsafe \
*    unix.cmxa extLib.cmxa str.cmxa footnotes.ml
*
* (please replace path to your extlib to compile)
*
* execute it with:
*    cat input_text | ./footnotes -u > used_footnotes.txt
* or with
*    cat input_text | ./footnotes -o > ordered_footnotes.txt
*
*
* The program works correctly only if the keyword "@footnote:" will be on a
* separate line. The text should be formated in unix style, otherwise the
* carriage return is lost.
*
* In used mode only the program detects unreferenced indices in text part and
* aborts. In ordered mode it only looks to footnotes part and detects lines
* without index markers. With a little bit more overhead the program could
* detect all errors in every mode.
*
* The program is tested sucessfully under Debian GNU/Linux with Ocaml 3.10
*
* Have fun... :)
*
*)

open Str;;
open ExtLib;;

exception ParseError of string;;
exception WrongArgument of string;;
exception MissedIdx of string;;

let debug msg =
 Printf.eprintf "%s time:%f\n" msg (Sys.time ());
 flush stderr
;;

(* returns list of (Delim d) or (Text t) *)
let lexer (input: string list) =
 List.flatten (
   List.map (fun line -> Str.full_split( Str.regexp "\[[0-9]+\]" ) line ) input
 )
;;

(* Parses list of tokens and splits them into two streams, returns text and footnotes *)
let split_on_footnotes tokens =
 let match_footnotes =
   function
       (Delim d) -> false
     | (Text w)  ->
         try
           Str.search_forward (Str.regexp "^\@footnote:$") w 0;
           true
         with
             Not_found -> false
 in
 let rec splitter text footnotes has_footnotes input =
   match input with
       [] -> ((List.rev text),(List.rev footnotes))
     | hd::tl when (match_footnotes hd) -> splitter (hd::text) footnotes true tl
     | hd::tl when has_footnotes -> splitter text (hd::footnotes) has_footnotes tl
     | hd::tl -> splitter (hd::text) footnotes has_footnotes tl
 in
   splitter [] [] false tokens
;;

(* helper function to make Hash unique *)
let add_key_if_new hash (key:string) (value:int) =
 try
   Hashtbl.find hash key; ()
 with
     Not_found -> Hashtbl.add hash key value
;;

(* Maps indices to sorted version, returns Hash *)
let build_used_order text =
 let ordered = Hashtbl.create 0 in
   List.iter (
     function
         (Delim d) -> let len = (Hashtbl.length ordered) +1 in
           add_key_if_new ordered d len; ()
       | _ -> ()
   ) text;
   ordered
;;

(* prints substituted text and footnotes to Stdout *)
let print text footnotes hash =
 let rec comb_idx_with_txt accu = function
     [] -> List.rev accu
   | ((Delim i) as idx)::((Text t) as txt)::tl ->
       begin
         try
           let d = (Hashtbl.find hash i) in
           let v = (d, t) in
             comb_idx_with_txt (v::accu) tl
         with
             Not_found -> raise (ParseError ("an (idx,txt) pair in footnotes section is not referenced i='" ^ i ^ "'"))
       end
   | (Text t) ::tl when (t.[0] == '\n') -> comb_idx_with_txt accu tl
   | (Delim i)::tl -> raise (ParseError ("not an (idx,txt) pair in footnotes section i='" ^ i ^ "'" ))
   | (Text t) ::tl -> raise (ParseError ("not an (idx,txt) pair in footnotes section t='" ^ t ^ "'"  ))

 in
 let sort_footnotes =
   let tmp = comb_idx_with_txt [] footnotes
   in
     (* TODO: check if Space changes the sort order, test, if sort is
      * necessary.. *)
   let cmp a b =  let (a',_) = a
       and (b',_) = b
         in
           compare a' b'
   in
     List.sort ~cmp:cmp tmp
 in
   List.iter (
     function
         (Text t) -> Printf.printf "%s" t
       | (Delim d) -> try
           Printf.printf "[%i]" (Hashtbl.find hash d)
         with
                Not_found -> raise (ParseError ("an index in text has no reference in footnotes section i='" ^ d ^ "'"))
   ) text;
   List.iter (
     fun (idx,txt) -> Printf.printf "[%i]%s" idx txt
   ) sort_footnotes
;;

(* Reads data from STDIN and returns list of lines *)
let readin () =
 debug ">> readin";
 let rec readlines file =
   try
     let line = (input_line file) ^ "\n" in
       line:: readlines file
   with
       End_of_file -> []
 in
   readlines stdin
;;

(* MAIN *)
let _ =
 if (Array.length Sys.argv) != 2 then
   raise (WrongArgument "wrong count of Argv");
 let tokens = lexer (readin ()) in
   debug ">> lexed";
 let (text, footnotes) = split_on_footnotes tokens in
   debug ">> splitted";
 let ordered_hash =
   match Sys.argv.(1) with
       "-o" -> build_used_order text
     | "-u" -> build_used_order footnotes
     | _ -> raise (WrongArgument Sys.argv.(1))
 in
   debug ">> built";
   print text footnotes ordered_hash;
   debug ">>printed"
;;