Index: filters/kword/rtf/import/rtfimport_tokenizer.cpp
===================================================================
--- filters/kword/rtf/import/rtfimport_tokenizer.cpp (revision 467594)
+++ filters/kword/rtf/import/rtfimport_tokenizer.cpp (working copy)
@@ -14,7 +14,7 @@
RTFTokenizer::RTFTokenizer()
{
- tokenText.resize( 4112 );
+ tokenText.resize( 4113 );
fileBuffer.resize( 4096 );
infile = 0L;
}
@@ -28,8 +28,25 @@
fileBufferPtr = 0L;
fileBufferEnd = 0L;
infile = in;
+ type = RTFTokenizer::PlainText;
}
+int RTFTokenizer::nextChar()
+{
+ if ( fileBufferPtr == fileBufferEnd ) {
+ int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() );
+ fileBufferPtr = ( uchar* ) fileBuffer.data();
+ fileBufferEnd = fileBufferPtr;
+
+ if ( n <= 0 )
+ return -1;
+
+ fileBufferEnd = fileBufferPtr + n;
+ }
+ return *fileBufferPtr++;
+}
+
+
/**
* Reads the next token.
*/
@@ -40,22 +57,15 @@
if (!infile)
return;
- do
- {
- if (fileBufferPtr == fileBufferEnd)
- {
- int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() );
+ do {
+ int n = nextChar();
- if (n <= 0)
- {
- // Return CloseGroup on EOF
- ch = '}';
- break;
- }
- fileBufferPtr = (uchar *)fileBuffer.data();
- fileBufferEnd = (fileBufferPtr + n);
- }
- ch = *fileBufferPtr++;
+ if ( n <= 0 ) {
+ ch = '}';
+ break;
+ }
+
+ ch = n;
}
while (ch == '\n' || ch == '\r' && ch != 0);
@@ -65,6 +75,7 @@
uchar *_text = (uchar *)text;
+
if (ch == '{')
type = RTFTokenizer::OpenGroup;
else if (ch == '}')
@@ -73,20 +84,14 @@
{
type = RTFTokenizer::ControlWord;
- if (fileBufferPtr == fileBufferEnd)
- {
- int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() );
+ int n = nextChar();
- if (n <= 0)
- {
- // Return CloseGroup on EOF
- type = RTFTokenizer::CloseGroup;
- return;
- }
- fileBufferPtr = (uchar *)fileBuffer.data();
- fileBufferEnd = (fileBufferPtr + n);
- }
- ch = *fileBufferPtr++;
+ if ( n <= 0 ) {
+ // Return CloseGroup on EOF
+ type = RTFTokenizer::CloseGroup;
+ return;
+ }
+ ch = n;
// Type is either control word or control symbol
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
@@ -94,64 +99,41 @@
int v = 0;
// Read alphabetic string (command)
- while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
+ while (_text < ( uchar* )tokenText.data()+tokenText.size()-3 &&
+ ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) )
{
*_text++ = ch;
- if (fileBufferPtr == fileBufferEnd)
- {
- int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() );
-
- if (n <= 0)
- {
- ch = ' ';
- break;
- }
- fileBufferPtr = (uchar *)fileBuffer.data();
- fileBufferEnd = (fileBufferPtr + n);
- }
- ch = *fileBufferPtr++;
+ int n = nextChar();
+ if ( n <= 0 ) {
+ ch = ' ';
+ break;
+ }
+ ch = n;
}
// Read numeric parameter (param)
bool isneg = (ch == '-');
- if (isneg)
- {
- if (fileBufferPtr == fileBufferEnd)
- {
- int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() );
-
- if (n <= 0)
- {
- // Return CloseGroup on EOF
- type = RTFTokenizer::CloseGroup;
- return;
- }
- fileBufferPtr = (uchar *)fileBuffer.data();
- fileBufferEnd = (fileBufferPtr + n);
- }
- ch = *fileBufferPtr++;
+ if (isneg) {
+ int n = nextChar();
+ if ( n <= 0 ) {
+ type = RTFTokenizer::CloseGroup;
+ return;
+ }
+ ch = n;
}
- while (ch >= '0' && ch <= '9')
- {
+
+ while (ch >= '0' && ch <= '9') {
v = (10 * v) + ch - '0';
hasParam = true;
- if (fileBufferPtr == fileBufferEnd)
- {
- int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() );
+ int n = nextChar();
- if (n <= 0)
- {
- ch = ' ';
- break;
- }
- fileBufferPtr = (uchar *)fileBuffer.data();
- fileBufferEnd = (fileBufferPtr + n);
- }
- ch = *fileBufferPtr++;
- }
+ if ( n <= 0 )
+ n = ' ';
+ ch = n;
+ }
value = isneg ? -v : v;
// If delimiter is a space, it's part of the control word
@@ -164,19 +146,13 @@
{
type = RTFTokenizer::ControlWord;
*_text++ = ch;
- if (fileBufferPtr == fileBufferEnd)
- {
- int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() );
- if (n <= 0)
- {
- // Return CloseGroup on EOF
- type = RTFTokenizer::CloseGroup;
- return;
- }
- fileBufferPtr = (uchar *)fileBuffer.data();
- fileBufferEnd = (fileBufferPtr + n);
- }
+ int n = nextChar();
+
+ if ( n <= 0 ) {
+ type = RTFTokenizer::CloseGroup;
+ return;
+ }
ch = *fileBufferPtr++;
for(int i=0;i<2;i++)
{
@@ -184,22 +160,16 @@
value<<=4;
value=value|((ch + ((ch & 16) ? 0 : 9)) & 0xf);
- if (fileBufferPtr == fileBufferEnd)
- {
- int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() );
+ int n = nextChar();
- if (n <= 0)
- {
- ch = ' ';
- break;
- }
- fileBufferPtr = (uchar *)fileBuffer.data();
- fileBufferEnd = (fileBufferPtr + n);
- }
+ if ( n <= 0 ) {
+ ch = ' ';
+ break;
+ }
ch = *fileBufferPtr++;
}
- --fileBufferPtr;
- }
+ --fileBufferPtr;
+ }
else
{
type = RTFTokenizer::ControlWord;
@@ -212,14 +182,16 @@
// Everything until next backslash, opener or closer
while ( ch != '\\' && ch != '{' && ch != '}' && ch != '\n' &&
- ch != '\r' && fileBufferPtr <= fileBufferEnd )
+ ch != '\r')
{
*_text++ = ch;
+ if(fileBufferPtr >= fileBufferEnd)
+ break;
ch = *fileBufferPtr++;
}
-
- // Give back last char
- --fileBufferPtr;
+ if(fileBufferPtr < fileBufferEnd)
+ --fileBufferPtr; // give back the last char
}
*_text++ = 0;
+
}
Index: filters/kword/rtf/import/rtfimport_tokenizer.h
===================================================================
--- filters/kword/rtf/import/rtfimport_tokenizer.h (revision 467594)
+++ filters/kword/rtf/import/rtfimport_tokenizer.h (working copy)
@@ -35,6 +35,8 @@
// tokenizer (private) data
private:
+ int nextChar();
+
QFile *infile;
QByteArray fileBuffer;
QCString tokenText;