/* Write Python objects to files and read them back.
This is intended for writing and reading compiled Python code only;
a true persistent storage facility would be much harder, since
it would have to take circular links and sharing into account. */
/* High water mark to determine when the marshalled object is dangerously deep
* and risks coring the interpreter. When the object stack gets this deep,
* raise an exception instead of continuing.
*/
#define MAX_MARSHAL_STACK_DEPTH 5000
typedef struct {
FILE *fp;
int error;
int depth;
/* If fp == NULL, the following are valid: */
PyObject *str;
char *ptr;
char *end;
PyObject *strings; /* dict on marshal, list on unmarshal */
int version;
} WFILE;
#define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
else w_more(c, p)
static int
r_string(char *s, int n, RFILE *p)
{
if (p->fp != NULL)
/* The result fits into int because it must be <=n. */
return (int)fread(s, 1, n, p->fp);
if (p->end - p->ptr < n)
n = (int)(p->end - p->ptr);
memcpy(s, p->ptr, n);
p->ptr += n;
return n;
}
static int
r_short(RFILE *p)
{
register short x;
x = r_byte(p);
x |= r_byte(p) << 8;
/* Sign-extension, in case short greater than 16 bits */
x |= -(x & 0x8000);
return x;
}
static long
r_long(RFILE *p)
{
register long x;
register FILE *fp = p->fp;
if (fp) {
x = getc(fp);
x |= (long)getc(fp) << 8;
x |= (long)getc(fp) << 16;
x |= (long)getc(fp) << 24;
}
else {
x = rs_byte(p);
x |= (long)rs_byte(p) << 8;
x |= (long)rs_byte(p) << 16;
x |= (long)rs_byte(p) << 24;
}
#if SIZEOF_LONG > 4
/* Sign extension for 64-bit machines */
x |= -(x & 0x80000000L);
#endif
return x;
}
/* r_long64 deals with the TYPE_INT64 code. On a machine with
sizeof(long) > 4, it returns a Python int object, else a Python long
object. Note that w_long64 writes out TYPE_INT if 32 bits is enough,
so there's no inefficiency here in returning a PyLong on 32-bit boxes
for everything written via TYPE_INT64 (i.e., if an int is written via
TYPE_INT64, it *needs* more than 32 bits).
*/
static PyObject *
r_long64(RFILE *p)
{
long lo4 = r_long(p);
long hi4 = r_long(p);
#if SIZEOF_LONG > 4
long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL);
return PyInt_FromLong(x);
#else
unsigned char buf[8];
int one = 1;
int is_little_endian = (int)*(char*)&one;
if (is_little_endian) {
memcpy(buf, &lo4, 4);
memcpy(buf+4, &hi4, 4);
}
else {
memcpy(buf, &hi4, 4);
memcpy(buf+4, &lo4, 4);
}
return _PyLong_FromByteArray(buf, 8, is_little_endian, 1);
#endif
}
static PyObject *
r_object(RFILE *p)
{
/* NULL is a valid return value, it does not necessarily means that
an exception is set. */
PyObject *v, *v2, *v3;
long i, n;
int type = r_byte(p);
switch (type) {
case EOF:
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
return NULL;
case TYPE_NULL:
return NULL;
case TYPE_NONE:
Py_INCREF(Py_None);
return Py_None;
case TYPE_STOPITER:
Py_INCREF(PyExc_StopIteration);
return PyExc_StopIteration;
case TYPE_ELLIPSIS:
Py_INCREF(Py_Ellipsis);
return Py_Ellipsis;
case TYPE_FALSE:
Py_INCREF(Py_False);
return Py_False;
case TYPE_TRUE:
Py_INCREF(Py_True);
return Py_True;
case TYPE_INT:
return PyInt_FromLong(r_long(p));
case TYPE_INT64:
return r_long64(p);
case TYPE_LONG:
{
int size;
PyLongObject *ob;
n = r_long(p);
if (n < -INT_MAX || n > INT_MAX) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data");
return NULL;
}
size = n<0 ? -n : n;
ob = _PyLong_New(size);
if (ob == NULL)
return NULL;
ob->ob_size = n;
for (i = 0; i < size; i++) {
int digit = r_short(p);
if (digit < 0) {
Py_DECREF(ob);
PyErr_SetString(PyExc_ValueError,
"bad marshal data");
return NULL;
}
ob->ob_digit[i] = digit;
}
return (PyObject *)ob;
}
case TYPE_INTERNED:
case TYPE_STRING:
n = r_long(p);
if (n < 0 || n > INT_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data");
return NULL;
}
v = PyString_FromStringAndSize((char *)NULL, n);
if (v == NULL)
return v;
if (r_string(PyString_AS_STRING(v), (int)n, p) != n) {
Py_DECREF(v);
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
return NULL;
}
if (type == TYPE_INTERNED) {
PyString_InternInPlace(&v);
PyList_Append(p->strings, v);
}
return v;
case TYPE_STRINGREF:
n = r_long(p);
if (n < 0 || n >= PyList_GET_SIZE(p->strings)) {
PyErr_SetString(PyExc_ValueError, "bad marshal data");
return NULL;
}
v = PyList_GET_ITEM(p->strings, n);
Py_INCREF(v);
return v;
#ifdef Py_USING_UNICODE
case TYPE_UNICODE:
{
char *buffer;
n = r_long(p);
if (n < 0 || n > INT_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data");
return NULL;
}
buffer = PyMem_NEW(char, n);
if (buffer == NULL)
return PyErr_NoMemory();
if (r_string(buffer, (int)n, p) != n) {
PyMem_DEL(buffer);
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
return NULL;
}
v = PyUnicode_DecodeUTF8(buffer, n, NULL);
PyMem_DEL(buffer);
return v;
}
#endif
case TYPE_TUPLE:
n = r_long(p);
if (n < 0 || n > INT_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data");
return NULL;
}
v = PyTuple_New((int)n);
if (v == NULL)
return v;
for (i = 0; i < n; i++) {
v2 = r_object(p);
if ( v2 == NULL ) {
if (!PyErr_Occurred())
PyErr_SetString(PyExc_TypeError,
"NULL object in marshal data");
Py_DECREF(v);
v = NULL;
break;
}
PyTuple_SET_ITEM(v, (int)i, v2);
}
return v;
case TYPE_LIST:
n = r_long(p);
if (n < 0 || n > INT_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data");
return NULL;
}
v = PyList_New((int)n);
if (v == NULL)
return v;
for (i = 0; i < n; i++) {
v2 = r_object(p);
if ( v2 == NULL ) {
if (!PyErr_Occurred())
PyErr_SetString(PyExc_TypeError,
"NULL object in marshal data");
Py_DECREF(v);
v = NULL;
break;
}
PyList_SetItem(v, (int)i, v2);
}
return v;
case TYPE_DICT:
v = PyDict_New();
if (v == NULL)
return NULL;
for (;;) {
PyObject *key, *val;
key = r_object(p);
if (key == NULL)
break;
val = r_object(p);
if (val != NULL)
PyDict_SetItem(v, key, val);
Py_DECREF(key);
Py_XDECREF(val);
}
if (PyErr_Occurred()) {
Py_DECREF(v);
v = NULL;
}
return v;
case TYPE_SET:
case TYPE_FROZENSET:
n = r_long(p);
if (n < 0) {
PyErr_SetString(PyExc_ValueError, "bad marshal data");
return NULL;
}
v = PyTuple_New((int)n);
if (v == NULL)
return v;
for (i = 0; i < n; i++) {
v2 = r_object(p);
if ( v2 == NULL ) {
if (!PyErr_Occurred())
PyErr_SetString(PyExc_TypeError,
"NULL object in marshal data");
Py_DECREF(v);
v = NULL;
break;
}
PyTuple_SET_ITEM(v, (int)i, v2);
}
if (v == NULL)
return v;
if (type == TYPE_SET)
v3 = PySet_New(v);
else
v3 = PyFrozenSet_New(v);
Py_DECREF(v);
return v3;
case TYPE_CODE:
if (PyEval_GetRestricted()) {
PyErr_SetString(PyExc_RuntimeError,
"cannot unmarshal code objects in "
"restricted execution mode");
return NULL;
}
else {
int argcount;
int nlocals;
int stacksize;
int flags;
PyObject *code = NULL;
PyObject *consts = NULL;
PyObject *names = NULL;
PyObject *varnames = NULL;
PyObject *freevars = NULL;
PyObject *cellvars = NULL;
PyObject *filename = NULL;
PyObject *name = NULL;
int firstlineno;
PyObject *lnotab = NULL;
v = NULL;
/* XXX ignore long->int overflows for now */
argcount = (int)r_long(p);
nlocals = (int)r_long(p);
stacksize = (int)r_long(p);
flags = (int)r_long(p);
code = r_object(p);
if (code == NULL)
goto code_error;
consts = r_object(p);
if (consts == NULL)
goto code_error;
names = r_object(p);
if (names == NULL)
goto code_error;
varnames = r_object(p);
if (varnames == NULL)
goto code_error;
freevars = r_object(p);
if (freevars == NULL)
goto code_error;
cellvars = r_object(p);
if (cellvars == NULL)
goto code_error;
filename = r_object(p);
if (filename == NULL)
goto code_error;
name = r_object(p);
if (name == NULL)
goto code_error;
firstlineno = (int)r_long(p);
lnotab = r_object(p);
if (lnotab == NULL)
goto code_error;
default:
/* Bogus data got written, which isn't ideal.
This will let you keep working and recover. */
PyErr_SetString(PyExc_ValueError, "bad marshal data");
return NULL;
}
}
static PyObject *
read_object(RFILE *p)
{
PyObject *v;
if (PyErr_Occurred()) {
fprintf(stderr, "XXX readobject called with exception set\n");
return NULL;
}
v = r_object(p);
if (v == NULL && !PyErr_Occurred())
PyErr_SetString(PyExc_TypeError, "NULL object in marshal data");
return v;
}
#ifdef HAVE_FSTAT
/* Return size of file in bytes; < 0 if unknown. */
static off_t
getfilesize(FILE *fp)
{
struct stat st;
if (fstat(fileno(fp), &st) != 0)
return -1;
else
return st.st_size;
}
#endif
/* If we can get the size of the file up-front, and it's reasonably small,
* read it in one gulp and delegate to ...FromString() instead. Much quicker
* than reading a byte at a time from file; speeds .pyc imports.
* CAUTION: since this may read the entire remainder of the file, don't
* call it unless you know you're done with the file.
*/
PyObject *
PyMarshal_ReadLastObjectFromFile(FILE *fp)
{
/* 75% of 2.1's .pyc files can exploit SMALL_FILE_LIMIT.
* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc.
*/
#define SMALL_FILE_LIMIT (1L << 14)
#define REASONABLE_FILE_LIMIT (1L << 18)
#ifdef HAVE_FSTAT
off_t filesize;
#endif
#ifdef HAVE_FSTAT
filesize = getfilesize(fp);
if (filesize > 0) {
char buf[SMALL_FILE_LIMIT];
char* pBuf = NULL;
if (filesize <= SMALL_FILE_LIMIT)
pBuf = buf;
else if (filesize <= REASONABLE_FILE_LIMIT)
pBuf = (char *)PyMem_MALLOC((int)filesize);
if (pBuf != NULL) {
PyObject* v;
size_t n;
/* filesize must fit into an int, because it
is smaller than REASONABLE_FILE_LIMIT */
n = fread(pBuf, 1, (int)filesize, fp);
v = PyMarshal_ReadObjectFromString(pBuf, n);
if (pBuf != buf)
PyMem_FREE(pBuf);
return v;
}
}
#endif
/* We don't have fstat, or we do but the file is larger than
* REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
*/
return PyMarshal_ReadObjectFromFile(fp);