(* compile with: ocamlopt -o footnotes str.cmxa footnotes.ml *)

let sep_line = "@footnote:"             (* the seperator line *)

let ref_regexp = Str.regexp "\\[\\([0-9]+\\)\\]"

let ref_of_int n = "[" ^ string_of_int n ^ "]"

let output_endline c s = output_string c s; output_char c '\n'

exception End_of_body

(* read the body from inc, substitute the references from table refs *)
(* if ref is not in refs, evaluate unknown_ref; print to outc *)
let process_body inc outc refs unknown_ref =
 let sub_ref l _ =
   let old_n = int_of_string (Str.matched_group 1 l) in
   try ref_of_int (Hashtbl.find refs old_n)
   with Not_found -> unknown_ref old_n
 in
 try
   while true do
     let l = input_line inc in
     if l = sep_line then raise End_of_body;
     output_endline outc
       (Str.global_substitute ref_regexp (sub_ref l) l)
   done
 with End_of_body -> output_endline outc sep_line

(* read the foots from inc, process the lines with process_foot *)
let process_foots inc process_foot malformed_foot =
 try
   while true do
     let l = input_line inc in
     if Str.string_match ref_regexp l 0 then
       process_foot l (int_of_string (Str.matched_group 1 l))
     else malformed_foot l
   done
 with End_of_file -> ()

(* insert a new reference for old_n into refs and return the new n *)
let insert_ref refs old_n =
 let n = Hashtbl.length refs + 1 in
 Hashtbl.add refs old_n n;
 n

(* renumber and reorder the footnotes by their appearance in the body *)
let renumber_by_body inc outc =
 let refs = Hashtbl.create 1000000 in
 let new_ref old_n = ref_of_int (insert_ref refs old_n) in
 process_body inc outc refs new_ref;
 let foots = Array.make (Hashtbl.length refs) None in
 let add_foot l old_n =
   try
     let n = Hashtbl.find refs old_n in
     foots.(n-1) <- Some (Str.string_after l (Str.match_end ()))
   with
     Not_found -> prerr_endline ("unreferenced footnote: " ^ l)
 and prerr_malformed l = prerr_endline ("malformed footnote: " ^ l) in
 process_foots inc add_foot prerr_malformed;
 for i = 1 to Array.length foots do
   output_string outc (ref_of_int (i));
   match foots.(i-1) with
     None   -> output_endline outc " ### missing footnote ###"
   | Some l -> output_endline outc l
 done

(* renumber the footnote references by the original footnote order *)
let renumber_by_foots inc outc =
 while input_line inc <> sep_line do () done;
 let refs = Hashtbl.create 1000000 in
 let add_foot_ref _ old_n =
   if not (Hashtbl.mem refs old_n) then ignore (insert_ref refs old_n)
 in
 process_foots inc add_foot_ref ignore;
 seek_in inc 0;
 process_body inc outc refs (function _ -> "[?]");
 let print_foot l old_n =
   let n = Hashtbl.find refs old_n in
   output_string outc (ref_of_int n);
   output_endline outc (Str.string_after l (Str.match_end ()))
 in
 process_foots inc print_foot (output_endline outc)

(* renumber the footnote references *)
let renumber by_foots =
 if by_foots then renumber_by_foots else renumber_by_body

(* process options and files given on command line or stdin *)
let main () =
 if Array.length Sys.argv > 1 then
   let by_foots = ref false in
   let do_file f =
     let c = open_in f in
     (try renumber !by_foots c stdout with e -> close_in c; raise e);
     close_in c
   in
   Arg.parse [("-f", Arg.Set by_foots,
               "Renumber footnotes by their original order")]
     do_file "Usage: footnotes [OPTION...] [file...]"
 else renumber_by_body stdin stdout;;

main ()