// (C) 2008 Sebastian Mach

//-----------------------------------------------------------------------------
// (C) 2008 Sebastian Mach
// [email protected]
// http://greenhybrid.net , http://picogen.org
// --
// contribution for the language-shootout in the german
// 'Linux Magazin', issue 10/2008
// --
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// Please see http://www.gnu.org/licenses/gpl.txt for the license text or
// contact the author.
//
// -> contact author for use not covered by GPLv3
//-----------------------------------------------------------------------------

//-----------------------------------------------------------------------------
// miscellaneous
//-----------------------------------------------------------------------------
// The reason why we use #warning in the following is that #message
// is not portable.
#define TRUE_EQUALS_1 (true==1)
#if TRUE_EQUALS_1
#warning Hooray, can use some optimizations based on the assumption that true==1
#else
#warning Meh, can not use some optimizations based on the assumption that true==1
#endif

// A safer way than #ifdef/#define combos.
enum optimization_flags {
of_true_is_1 = true == 1, // can we rely on that true==1?
of_nums_contigous = // can we rely that '9'-'0'==10 and that numbers go low->high?
'1'-'0' == 1
&& '2'-'0' == 2
&& '3'-'0' == 3
&& '4'-'0' == 4
&& '5'-'0' == 5
&& '6'-'0' == 6
&& '7'-'0' == 7
&& '8'-'0' == 8
&& '9'-'0' == 9
};

//-----------------------------------------------------------------------------
// include files
//-----------------------------------------------------------------------------
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <map>
#include <vector>
#include <cstring>

//-----------------------------------------------------------------------------
// convenience types/functions
//-----------------------------------------------------------------------------

// Can also be used to check whether c is in ['0'..'9']
// (e.g. "bool isNum = toInt(c) != -1;").
template <typename C> int toInt (C c) {

// note that the C++ standard does not mandate
// that the number-chars 0-9 are continous when
// interpeted as integers, thus this function
// does the portable way.

// --> trust in constant folding
// --> check if '0'-'9' are continous when cast to integer
if (of_nums_contigous) {
static int ret [2] = {-1, -1};
ret [1] = c - '0';
return ret [c>('0'-1) & c<('9'+1)];
} else {
switch (c) { // not much slower if the compiler emits jump tables
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
};
return -1;
}
}

// Compiler should do a good job with this convenience class.
// Note that the case that A==B is not covered.
template <typename A, typename B> class tuple {
const A a_;
const B b_;
public:
tuple (A a, B b) : a_(a), b_(b) {}
const A& a () const { return a_; }
const B& b () const { return b_; }
operator A () const { return a_; }
operator B () const { return b_; }
};

//-----------------------------------------------------------------------------
// A custom implementation of an array class. Allocates large chunks of memory
// instead of allocating each single element.
// Note: This is an micro-optimization, if any.
//-----------------------------------------------------------------------------
template <typename PT, int chunksize=4096> struct growing_array {
// -- data/cache ---------------------------------------------------
PT mem [chunksize];
mutable int cachedmajor, cachedminor; // cachedminor only used in fast_*()
mutable const growing_array *cachedchunk;
// -- alloc/dealloc-vars -------------------------------------------
growing_array *curr;
growing_array *next;
int nextfree;
int size_;
// -- members ------------------------------------------------------
growing_array ()
: cachedmajor (0), cachedchunk (this), curr (this), next (0), nextfree(0), size_ (0)
{}
~growing_array () {
if (0 != next) {
next->~growing_array<PT, chunksize> ();
}
}
void push_back(const PT &pt) {
using namespace std;
if (curr->nextfree >= chunksize) {
curr->next = new growing_array <PT, chunksize> ();
curr = curr->next;
}
curr->mem [curr->nextfree++] = pt;
++size_;
}
const int size () const {
return size_;
}
void fast_begin () {
cachedchunk = this;
cachedmajor = 0;
cachedminor = 0;
}
void fast_end () {
cachedchunk = this;
cachedmajor = 0;
cachedminor = 0;
}
const PT & fast_next () const {
if (cachedminor >= chunksize) {
cachedminor = 0;
cachedchunk = cachedchunk->next;
}
return cachedchunk->mem [cachedminor++];
}
const PT & operator [] (int index) const {
const int major = index / chunksize; // <-- gcc should recognize the div/mod-
const int minor = index % chunksize; // combo and emit only 1 div
const int diff = major - cachedmajor;

// Optimization for the case the new - old = 0 || 1
switch (diff) {
case 1: cachedchunk = cachedchunk->next;
cachedmajor = major; // <-- should be faster than ++cachedmajor on modern cpu
// fallthrough
case 0: return cachedchunk->mem [minor];
};

// I am not sure if the C++ standard mandates that true==1, but the following code depends on that
if (of_true_is_1) {
const bool d = diff >= 0;
growing_array const * const tmpchunk [2] = {this, cachedchunk};
const int tmpmajor [2] = {0, cachedmajor};
cachedchunk = tmpchunk [d];
for (cachedmajor=tmpmajor [d]; cachedmajor!=major; ++cachedmajor)
cachedchunk = cachedchunk->next;
return cachedchunk->mem [minor];
} else {
if (diff < 0) {
cachedchunk = this;
for (cachedmajor=0; cachedmajor!=major; ++cachedmajor)
cachedchunk = cachedchunk->next;
return cachedchunk->mem [minor];
} else {
for (; cachedmajor!=major; ++cachedmajor)
cachedchunk = cachedchunk->next;
return cachedchunk->mem [minor];
}
}
}
};

//-----------------------------------------------------------------------------
// typesafe template arguments
//-----------------------------------------------------------------------------
enum order_t {
order_in_main_section,
order_in_footnotes_section
};

enum safemode_t {
safe,
unsafe
};

//-----------------------------------------------------------------------------
// Reorder is a RAII class which does the reordering.
//
// template arguments:
// typename C --> char type for our strings
// C L --> open bracket for references/footnotes
// C R --> close bracket for references/footnotes
// order_t SO=order_in_main_section --> order_in_main_section or
// order_in_footnotes_section
// safemode_t S=safe --> catch some errors?
// (though "safe" will result in slower code)
// bool use_pool=true --> are we using our custom memory pool?
//-----------------------------------------------------------------------------
template <
typename C, C L, C R,
order_t SO=order_in_main_section,
safemode_t S=safe,
bool use_pool=true
> class Reorder {
private:

template <typename PT> struct mempool {
enum { size = (4096*1024) / sizeof (PT) };
// -- data/cache --------------------------------------------------
PT mem [size];
// -- alloc/dealloc-vars ------------------------------------------
mempool *curr;
mempool *next;
int nextfree;
// -- members -----------------------------------------------------
mempool () : curr (this), next (0), nextfree(0) {}
~mempool () {
if (0 != next) {
next->~mempool<PT> ();
}
}
PT *alloc() {
using namespace std;
if (curr->nextfree >= size) {
curr->next = new mempool <PT> ();
curr = curr->next;
}
return & curr->mem [curr->nextfree++];
// could also work with non-pod-types:
//new (& curr->mem [curr->nextfree++]) PT;
}
};

// that function assumes we're already inside the pair of
// brackets (i.e. after '[')
template <class I> tuple <bool, int> scanRefTarget (I &curr, I end) {
using namespace std;
I old_curr = curr;

int len = 0; // used to check validity of reference
int refID = 0; // the reference-id
while (R != *curr) { // <-- until we hit an end-bracket
if (S == safe) {
if (end == curr) { // <-- rudimentary error checking
cerr << "found end of file but expected numbers or '"
<< R << endl;
exit(1);
}
}
const int d = toInt (*curr);
if (-1 != d) {
refID = refID * 10 + d;
++len;
}
++curr;
}
if (0 != len) {
return tuple <bool, int> (true, refID);
}
curr = old_curr; // <-- was not a reference, so rollback
return tuple <bool, int> (false, -1);
}

template <typename I> struct Chunk { // intentionally POD
I begin, end;
int ref_target;
Chunk *next;
};

template <typename I> struct Reference { // intentionally POD
I begin, end;
int order_in_footnotes;
int order_in_text;
};

// Scans the main sections.
template <class I>
Chunk *scanText (I &curr, I end, mempool <Chunk > &chunk_pool) {

Chunk *chunk;
if (use_pool)
chunk = chunk_pool.alloc();
else
chunk = new Chunk;
Chunk * const first_chunk = chunk;
chunk->begin = curr;
chunk->next = 0;
chunk->ref_target = -1;

while (true) {
if (L == *curr) { // <-- we've hit a start-bracket
++curr; // <-- eat start-bracket
const I old_curr = curr;
const tuple <bool, int> refID = scanRefTarget (curr, end);
if (refID) {
chunk->ref_target = refID;
chunk->end = old_curr-1;
if (use_pool)
chunk->next = chunk_pool.alloc();
else
chunk->next = new Chunk;
chunk = chunk->next;
chunk->next = 0;
chunk->ref_target = -1;
chunk->begin = curr+1;
}
} else if (end == curr) {
chunk->end = curr;
break;
} else if ('\n' == *curr) {
// Here we used the bitor '&' intentionally instead of '&&'
// , the difference being that '&' most probably produces
// branchfree code.
// (see http://ompf.org/forum/viewtopic.php?p=10086#p10086)
I old_curr = curr;
if ('@' == curr[1]
& 'f' == curr[2]
& 'o' == curr[3]
& 'o' == curr[4]
& 't' == curr[5]
& 'n' == curr[6]
& 'o' == curr[7]
& 't' == curr[8]
& 'e' == curr[9]
& ':' == curr[10]
) {
curr += 11; // += strlen ("@footnote:")
chunk->end = old_curr;
// eat forward to newline
while (curr != end && *curr != '\n')
++curr;
++curr; // eatup newline
break;
} else {
curr = old_curr;
}
}
++curr;
};
return first_chunk;
}

// Scans the footnotes section.
template <
typename I,
class int_array // <-- template template = nonsense,
// since std::vector<A,B>, but growing_array<A>
>
void scanFootnotes (
std::map <int, Reference > &refs,
int_array &orderedRRefs,
I &curr, I end
) {
using namespace std;
int footnodeID = 1;
while (true) {
while (L != *curr && '\n' != *curr && end != curr)
++curr;
if (L == *curr) {
++curr; // <-- eat start-bracket
const I old_curr = curr;
const tuple <bool, int> refID = scanRefTarget (curr, end);
++curr;
if (refID) {
Reference ref;
// eat whitespace
while ('\t' == *curr || ' ' == *curr || '\n' == *curr)
++curr;
ref.begin = curr;
while (*curr != '\n' && curr != end)
++curr;
ref.end = curr;
ref.order_in_footnotes = footnodeID++;
ref.order_in_text = -1;
orderedRRefs.push_back (refID);
refs [refID] = ref;
}
} else if (end == curr) {
break;
}
++curr;
}
}

public:

template <class I> Reorder (I begin, I end) {
using namespace std;

I curr = begin;

// Scan text.
mempool<Chunk > chunk_pool;
Chunk *chunk = scanText (curr, end, chunk_pool);

// Scan footnotes.
map <int, Reference > refs;
typedef growing_array <int,4096*8> int_array;
int_array orderedRRefs;
scanFootnotes <I, int_array> (refs, orderedRRefs, curr, end);

switch (SO) {
case order_in_footnotes_section: {
// Dump Text
for (const Chunk *it = chunk; 0 != it; it = it->next) {
// Output text.
for (I a=it->begin; a != it->end; ++a) {
cout << *a;
}
// Output reference target.
if (-1 != it->ref_target) {
cout << '['
<< refs [it->ref_target].order_in_footnotes
<< ']';
}
}
cout << '\n';
// Dump footnotes.
cout << "@footnote:\n";
orderedRRefs.fast_begin();
const unsigned int s = orderedRRefs.size();
for (int i=0; i<s; ++i) {
const int r = orderedRRefs.fast_next();
cout << '[' << refs [r].order_in_footnotes << ']'
<< ' ';
for (I c=refs [r].begin; c!=refs [r].end; ++c) {
cout << *c;
}
cout << '\n';
}
orderedRRefs.fast_end();
break;
}
case order_in_main_section: {
// Dump Text
int newOrder = 1;
for (const Chunk *it = chunk; 0 != it; it = it->next) {
// Output text.
for (I a=it->begin; a != it->end; ++a) {
cout << *a;
}
// Output reference target.
if (-1 != it->ref_target) {
int &tmp = refs [it->ref_target].order_in_text;
if (tmp < 0)
tmp = newOrder++;
cout << '[' << tmp << ']';
}
}
cout << '\n';
// Dump footnotes.
cout << "@footnote:\n";
orderedRRefs.fast_begin();
const unsigned int s = orderedRRefs.size();
for (int i=0; i<s; ++i) {
const int r = orderedRRefs.fast_next();
cout << '[' << refs [r].order_in_text << ']' << ' ';
for (I c=refs [r].begin; c!=refs [r].end; ++c) {
cout << *c;
}
cout << '\n';
}
orderedRRefs.fast_end();
break;
}
}

while (0 != chunk) {
Chunk *tmp = chunk;
chunk = chunk->next;
// We can save some work by not calling ~Chunk() by assuming
// that Chunk is a POD
if (!use_pool)
delete tmp;
}
}
};

//-----------------------------------------------------------------------------
// main
//-----------------------------------------------------------------------------
int main (int argc, char *argv []) {
using namespace std;

// Parse arguments.
--argc;
++argv;
string filename = "";
order_t orderType = order_in_main_section;
while (0 != argc) {
const string arg (*argv);
--argc;
++argv;
if ("-f" == arg || "--filename" == arg) {
if (0 == argc) {
cerr << "error:no parameter given to argument '" << arg
<< "'\n";
} else {
filename = *argv;
--argc;
++argv;
}
} else if ("-s" == arg || "--sort" == arg) {
if (0 == argc) {
cerr << "error:no parameter given to argument '" << arg
<< "'\n";
cerr << "use either '" << arg << " main' or '" << arg
<< "' footnotes\n";
} else {
const string tmp = *argv;
if ("main" == tmp) {
orderType = order_in_main_section;
} else if ("footnotes" == tmp) {
orderType = order_in_footnotes_section;
} else {
cerr << "error:unknown parameter to argument '" << arg
<< "': " << tmp << '\n';
cerr << "use either '" << arg << " main' or '" << arg
<< " footnotes'\n";
}
--argc;
++argv;
}
} else {
cerr << "error:unknown argument '" << arg << "', skipping" << endl;
return 1;
}
}

// Either open textfile (if filename given) or read from stdin.
char *begin, *end;
if ("" != filename) {
// Read from file.
ifstream f (filename.c_str());
f.seekg (0, ios::end);
const int len = f.tellg();
f.seekg (0, ios::beg);
begin = new char [len];
end = begin + len;
f.read (begin, len);
f.close();
} else {
// Read from stdin.
string input (""), tmp;
while (!(cin >> tmp).eof())
input += tmp + '\n';
begin = new char [input.size()];
end = begin + input.size();
strcpy (begin, input.c_str());
}

switch (orderType) {
case order_in_main_section:
Reorder <
char, '[', ']', order_in_main_section, unsafe, true
> (begin, end);
break;
case order_in_footnotes_section:
Reorder <
char, '[', ']', order_in_footnotes_section, unsafe, true
> (begin, end);
break;
};
delete [] begin;
return 0;
}