/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#if !defined Parser_h
#define Parser_h
#include "Object.h"
#include "Document.h"
#include "Page.h"
#include
#include
namespace merge_lib
{
class Document;
//This class parsed the pdf document and creates
//an Document object
class Parser
{
public:
Parser(): _root(0), _fileContent(), _objects(), _document(0) {};
Document * parseDocument(const char * fileName);
static const std::string WHITESPACES;
static const std::string DELIMETERS;
static const std::string NUMBERS;
static const std::string WHITESPACES_AND_DELIMETERS;
static bool getNextWord(std::string & out, const std::string &in, size_t &nextPosition,size_t *found = NULL);
static std::string getNextToken( const std::string &in, unsigned &position);
static void trim(std::string &str);
static std::string findTokenStr(const std::string &content, const std::string &pattern, size_t start,size_t &foundStart, size_t &foundEnd);
static size_t findToken(const std::string &content, const std::string &keyword,size_t start = 0);
static size_t findTokenName(const std::string &content, const std::string &keyword,size_t start = 0);
static unsigned int findEndOfElementContent(const std::string &content, unsigned int startOfPageElement);
static bool tokenIsAName(const std::string &content, size_t start );
protected:
const std::string & _getObjectContent(unsigned int objectPosition, unsigned int & objectNumber, unsigned int & generationNumber, std::pair &, bool &);
virtual unsigned int _readTrailerAndReturnRoot();
private:
//methods
virtual void _getFileContent(const char * fileName);
bool _getNextObject(Object * object);
void _callObserver(std::string objectContent);
void _createObjectTree(const char * fileName);
void _retrieveAllPages(Object * objectWithKids);
void _fillOutObjects();
virtual void _readXRefAndCreateObjects();
unsigned int _getEndOfLineFromContent(unsigned int fromPosition);
const std::pair & _getLineBounds(const std::string & str, unsigned int fromPosition);
const std::string & _getNextToken(unsigned int & fromPosition);
unsigned int _countTokens(unsigned int leftBound, unsigned int rightBount);
unsigned int _skipWhiteSpaces(const std::string & str);
unsigned int _skipWhiteSpacesFromContent(unsigned int fromPosition);
const std::map & _getReferences(const std::string & objectContent);
unsigned int _skipNumber(const std::string & str, unsigned int currentPosition);
unsigned int _skipWhiteSpaces(const std::string & str, unsigned int fromPosition);
void _createDocument(const char * docName);
virtual unsigned int _getStartOfXrefWithRoot();
unsigned int _readTrailerAndRterievePrev(const unsigned int startPositionForSearch, unsigned int & previosXref);
void _clearParser();
protected:
//members
Object * _root;
std::string _fileContent;
std::map _objects;
Document * _document;
};
}
#endif