/*****
* profiler.h
* Andy Hammerlindl 2010/07/24
*
* Profiler for the execution of the virtual machine bytecode.
*****/

#ifndef PROFILER_H
#define PROFILER_H

#if !defined(_WIN32)
#include <sys/time.h>
#endif

#include <iostream>

#include "inst.h"
#include "seconds.h"

namespace vm {

#ifdef DEBUG_BLTIN
string lookupBltin(bltin b);
#endif

inline position positionFromLambda(lambda *func) {
 if (func == 0)
   return nullPos;

 program& code = *func->code;

 // Check for empty program.
 if (code.begin() == code.end())
   return nullPos;

 return code.begin()->pos;
}

inline void printNameFromLambda(ostream& out, lambda *func) {
 if (!func) {
   out << "<top level>";
   return;
 }

#ifdef DEBUG_FRAME
 string name = func->name;
#else
 string name = "";
#endif

 // If unnamed, use the pointer address.
 if (name.empty())
   out << func;
 else
   out << name;

 out << " at ";
 positionFromLambda(func).printTerse(out);
}

inline void printNameFromBltin(ostream& out, bltin b) {
#ifdef DEBUG_BLTIN
 string name = lookupBltin(b);
#else
 string name = "";
#endif

 if (!name.empty())
   out << name << " ";
 out << "(builtin at " << (void *)b << ")";
}

class profiler : public gc {
 // To do call graph analysis, each call stack that occurs in practice is
 // represented by a node.  For instance, if f and g are functions, then
 //   f -> g -> g
 // is represented by a node and
 //   g -> f -> g
 // is represented by a different one.
 struct node {
   // The top-level function of the call stack.  It is either an asymptote
   // function with a given lambda, or a builtin function, with a given
   // bltin.
   lambda *func;
   bltin cfunc;

   // The number of times the top-level function has been called resulting in
   // this specific call stack.
   int calls;

   // The number of bytecode instructions executed with this exact call stack.
   // It does not include time spent in called function.
   int instructions;

   // Number of instructions spent in this function or its children.  This is
   // computed by computeTotals.
   int instTotal;

   // The number of real-time nanoseconds spent in this node.  WARNING: May
   // be wildly inaccurate.
   long long nsecs;

   // Total including children.
   long long nsecsTotal;

   // Call stacks resulting from calls during this call stack.
   mem::vector<node> children;

   node()
     : func(0), cfunc(0), calls(0),
       instructions(0), instTotal(0),
       nsecs(0), nsecsTotal(0) {}

   node(lambda *func)
     : func(func), cfunc(0), calls(0),
       instructions(0), instTotal(0),
       nsecs(0), nsecsTotal(0) {}

   node(bltin b)
     : func(0), cfunc(b), calls(0),
       instructions(0), instTotal(0),
       nsecs(0), nsecsTotal(0) {}

   // Return the call stack resulting from a call to func when this call
   // stack is current.
   node *getChild(lambda *func) {
     size_t n = children.size();
     for (size_t i = 0; i < n; ++i)
       if (children[i].func == func)
         return &children[i];

     // Not found, create a new one.
     children.push_back(node(func));
     return &children.back();
   }
   node *getChild(bltin func) {
     size_t n = children.size();
     for (size_t i = 0; i < n; ++i)
       if (children[i].cfunc == func)
         return &children[i];

     // Not found, create a new one.
     children.push_back(node(func));
     return &children.back();
   }

   void computeTotals() {
     instTotal = instructions;
     nsecsTotal = nsecs;
     size_t n = children.size();
     for (size_t i = 0; i < n; ++i) {
       children[i].computeTotals();
       instTotal += children[i].instTotal;
       nsecsTotal += children[i].nsecsTotal;
     }
   }



   void pydump(ostream& out) {
#ifdef DEBUG_FRAME
     string name = func ? func->name : "<top level>";
#else
     string name = "";
#endif

     out << "dict(\n"
         << "    name = '" << name << " " << func << "',\n"
         << "    pos = '" << positionFromLambda(func) << "',\n"
         << "    calls = " << calls << ",\n"
         << "    instructions = " << instructions << ",\n"
         << "    nsecs = " << nsecs << ",\n"
         << "    children = [\n";

     size_t n = children.size();
     for (size_t i = 0; i < n; ++i) {
       children[i].pydump(out);
       out << ",\n";
     }

     out << "    ])\n";
   }
 };

 // An empty call stack.
 node emptynode;

 // All of the callstacks.
 std::stack<node *> callstack;

 node &topnode() {
   return *callstack.top();
 }

 // Arc representing one function calling another.  Used only for building
 // the output for kcachegrind.
 struct arc : public gc {
   int calls;
   int instTotal;
   long long nsecsTotal;

   arc() : calls(0), instTotal(0), nsecsTotal(0) {}

   void add(node& n) {
     calls += n.calls;
     instTotal += n.instTotal;
     nsecsTotal += n.nsecsTotal;
   }
 };

 // Representing one function and its calls to other functions.
 struct fun : public gc {
   int instructions;
   long long nsecs;
   mem::map<lambda *, arc> arcs;
   mem::map<bltin, arc> carcs;

   fun() : instructions(0), nsecs(0) {}

   void addChildTime(node& n) {
     if (n.cfunc)
       carcs[n.cfunc].add(n);
     else
       arcs[n.func].add(n);
   }

   void analyse(node& n) {
     instructions += n.instructions;
     nsecs += n.nsecs;
     size_t numChildren = n.children.size();
     for (size_t i = 0; i < numChildren; ++i)
       addChildTime(n.children[i]);
   }

   void dump(ostream& out) {
     // The unused line number needed by kcachegrind.
     static const string POS = "1";

     out << POS << " " << instructions << " " << nsecs << "\n";
     for (mem::map<lambda *, arc>::iterator i = arcs.begin();
          i != arcs.end();
          ++i)
       {
         lambda *l = i->first;
         arc& a = i->second;

         out << "cfl=" << positionFromLambda(l) << "\n";

         out << "cfn=";
         printNameFromLambda(out, l);
         out << "\n";

         out << "calls=" << a.calls << " " << POS << "\n";
         out << POS << " " << a.instTotal << " " << a.nsecsTotal << "\n";
       }
     for (mem::map<bltin, arc>::iterator i = carcs.begin();
          i != carcs.end();
          ++i)
       {
         bltin b = i->first;
         arc& a = i->second;

         out << "cfl=C++ code" << endl;

         out << "cfn=";
         printNameFromBltin(out, b);
         out << "\n";

         out << "calls=" << a.calls << " " << POS << "\n";
         out << POS << " " << a.instTotal << " " << a.nsecsTotal << "\n";
       }
   }
 };

 // The data for each asymptote function.
 mem::map<lambda *, fun> funs;

 // The data for each C++ function.
 mem::map<bltin, fun> cfuns;

 void analyseNode(node& n) {
   fun& f = n.cfunc ? cfuns[n.cfunc] :
     funs[n.func];

   f.analyse(n);

   size_t numChildren = n.children.size();
   for (size_t i = 0; i < numChildren; ++i)
     analyseNode(n.children[i]);
 }

 // Convert data in the tree of callstack nodes into data for each function.
 void analyseData() {
   emptynode.computeTotals();
   analyseNode(emptynode);
 }

 // Timing data.
 utils::cpuTimer timestamp;

 void startLap() {
   timestamp.reset();
 }

 // Called whenever the stack is about to change, in order to record the time
 // duration for the current node.
 void recordTime() {
   topnode().nsecs += (long long) timestamp.nanoseconds(true);
 }

public:
 profiler();

 void beginFunction(lambda *func);
 void endFunction(lambda *func);
 void beginFunction(bltin func);
 void endFunction(bltin func);
 void recordInstruction();

 // TODO: Add position, type of instruction info to profiling.

 // Dump all of the data out in a format that can be read into Python.
 void pydump(ostream &out);

 // Dump all of the data in a format for kcachegrind.
 void dump(ostream& out);

};

inline profiler::profiler()
 : emptynode()
{
 callstack.push(&emptynode);
 startLap();
}

inline void profiler::beginFunction(lambda *func) {
 assert(func);
 assert(!callstack.empty());

 recordTime();

 callstack.push(topnode().getChild(func));
 ++topnode().calls;
}

inline void profiler::endFunction(lambda *func) {
 assert(func);
 assert(!callstack.empty());
 assert(topnode().func == func);

 recordTime();

 callstack.pop();
}

inline void profiler::beginFunction(bltin cfunc) {
 assert(cfunc);
 assert(!callstack.empty());

 recordTime();

 callstack.push(topnode().getChild(cfunc));
 ++topnode().calls;
}

inline void profiler::endFunction(bltin cfunc) {
 assert(cfunc);
 assert(!callstack.empty());
 assert(topnode().cfunc == cfunc);

 recordTime();

 callstack.pop();
}

inline void profiler::recordInstruction() {
 assert(!callstack.empty());
 ++topnode().instructions;
}

inline void profiler::pydump(ostream& out) {
 out << "profile = ";
 emptynode.pydump(out);
}

inline void profiler::dump(ostream& out) {
 analyseData();

 out << "events: Instructions Nanoseconds\n";

 for (mem::map<lambda *, fun>::iterator i = funs.begin();
      i != funs.end();
      ++i)
   {
     lambda *l = i->first;
     fun& f = i->second;

     out << "fl=" << positionFromLambda(l) << "\n";

     out << "fn=";
     printNameFromLambda(out, l);
     out << "\n";

     f.dump(out);
   }
 for (mem::map<bltin, fun>::iterator i = cfuns.begin();
      i != cfuns.end();
      ++i)
   {
     bltin b = i->first;
     fun& f = i->second;

     out << "fl=C++ code\n";

     out << "fn=";
     printNameFromBltin(out, b);
     out << "\n";

     f.dump(out);
   }
}


} // namespace vm

#endif // PROFILER_H