You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
77 lines
4.0 KiB
77 lines
4.0 KiB
14 years ago
|
#if !defined Parser_h
|
||
|
#define Parser_h
|
||
|
|
||
|
#include "Object.h"
|
||
|
#include "Document.h"
|
||
|
#include "Page.h"
|
||
|
|
||
|
#include <string>
|
||
|
#include <vector>
|
||
|
|
||
|
|
||
|
namespace merge_lib
|
||
|
{
|
||
|
class Document;
|
||
|
|
||
|
//This class parsed the pdf document and creates
|
||
|
//an Document object
|
||
|
class Parser
|
||
|
{
|
||
|
public:
|
||
|
Parser(): _root(0), _fileContent(), _objects(), _document(0) {};
|
||
|
Document * parseDocument(const char * fileName);
|
||
|
|
||
|
static const std::string WHITESPACES;
|
||
|
static const std::string DELIMETERS;
|
||
|
static const std::string NUMBERS;
|
||
|
static const std::string WHITESPACES_AND_DELIMETERS;
|
||
|
|
||
|
static bool getNextWord(std::string & out, const std::string &in, size_t &nextPosition,size_t *found = NULL);
|
||
|
static std::string getNextToken( const std::string &in, unsigned &position);
|
||
|
static void trim(std::string &str);
|
||
|
static std::string findTokenStr(const std::string &content, const std::string &pattern, size_t start,size_t &foundStart, size_t &foundEnd);
|
||
|
|
||
|
static size_t findToken(const std::string &content, const std::string &keyword,size_t start = 0);
|
||
|
static size_t findTokenName(const std::string &content, const std::string &keyword,size_t start = 0);
|
||
|
static unsigned int findEndOfElementContent(const std::string &content, unsigned int startOfPageElement);
|
||
|
static bool tokenIsAName(const std::string &content, size_t start );
|
||
|
protected:
|
||
|
const std::string & _getObjectContent(unsigned int objectPosition, unsigned int & objectNumber, unsigned int & generationNumber, std::pair<unsigned int, unsigned int> &, bool &);
|
||
|
virtual unsigned int _readTrailerAndReturnRoot();
|
||
|
private:
|
||
|
//methods
|
||
|
virtual void _getFileContent(const char * fileName);
|
||
|
bool _getNextObject(Object * object);
|
||
|
void _callObserver(std::string objectContent);
|
||
|
void _createObjectTree(const char * fileName);
|
||
|
void _retrieveAllPages(Object * objectWithKids);
|
||
|
void _fillOutObjects();
|
||
|
virtual void _readXRefAndCreateObjects();
|
||
|
unsigned int _getEndOfLineFromContent(unsigned int fromPosition);
|
||
|
const std::pair<unsigned int, unsigned int> & _getLineBounds(const std::string & str, unsigned int fromPosition);
|
||
|
const std::string & _getNextToken(unsigned int & fromPosition);
|
||
|
unsigned int _countTokens(unsigned int leftBound, unsigned int rightBount);
|
||
|
unsigned int _skipWhiteSpaces(const std::string & str);
|
||
|
unsigned int _skipWhiteSpacesFromContent(unsigned int fromPosition);
|
||
|
const std::map<unsigned int, Object::ReferencePositionsInContent> & _getReferences(const std::string & objectContent);
|
||
|
unsigned int _skipNumber(const std::string & str, unsigned int currentPosition);
|
||
|
unsigned int _skipWhiteSpaces(const std::string & str, unsigned int fromPosition);
|
||
|
void _createDocument(const char * docName);
|
||
|
virtual unsigned int _getStartOfXrefWithRoot();
|
||
|
unsigned int _readTrailerAndRterievePrev(const unsigned int startPositionForSearch, unsigned int & previosXref);
|
||
|
void _clearParser();
|
||
|
|
||
|
|
||
|
protected:
|
||
|
|
||
|
//members
|
||
|
Object * _root;
|
||
|
std::string _fileContent;
|
||
|
std::map<unsigned int, Object *> _objects;
|
||
|
Document * _document;
|
||
|
|
||
|
};
|
||
|
}
|
||
|
#endif
|
||
|
|