You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
99 lines
4.8 KiB
99 lines
4.8 KiB
/*
|
|
* Copyright (C) 2012 Webdoc SA
|
|
*
|
|
* This file is part of Open-Sankoré.
|
|
*
|
|
* Open-Sankoré is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation, version 2,
|
|
* with a specific linking exception for the OpenSSL project's
|
|
* "OpenSSL" library (or with modified versions of it that use the
|
|
* same license as the "OpenSSL" library).
|
|
*
|
|
* Open-Sankoré is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public
|
|
* License along with Open-Sankoré; if not, see
|
|
* <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
|
|
#if !defined Parser_h
|
|
#define Parser_h
|
|
|
|
#include "Object.h"
|
|
#include "Document.h"
|
|
#include "Page.h"
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
|
|
namespace merge_lib
|
|
{
|
|
class Document;
|
|
|
|
//This class parsed the pdf document and creates
|
|
//an Document object
|
|
class Parser
|
|
{
|
|
public:
|
|
Parser(): _root(0), _fileContent(), _objects(), _document(0) {};
|
|
Document * parseDocument(const char * fileName);
|
|
|
|
static const std::string WHITESPACES;
|
|
static const std::string DELIMETERS;
|
|
static const std::string NUMBERS;
|
|
static const std::string WHITESPACES_AND_DELIMETERS;
|
|
|
|
static bool getNextWord(std::string & out, const std::string &in, size_t &nextPosition,size_t *found = NULL);
|
|
static std::string getNextToken( const std::string &in, unsigned &position);
|
|
static void trim(std::string &str);
|
|
static std::string findTokenStr(const std::string &content, const std::string &pattern, size_t start,size_t &foundStart, size_t &foundEnd);
|
|
|
|
static size_t findToken(const std::string &content, const std::string &keyword,size_t start = 0);
|
|
static size_t findTokenName(const std::string &content, const std::string &keyword,size_t start = 0);
|
|
static unsigned int findEndOfElementContent(const std::string &content, unsigned int startOfPageElement);
|
|
static bool tokenIsAName(const std::string &content, size_t start );
|
|
protected:
|
|
const std::string & _getObjectContent(unsigned int objectPosition, unsigned int & objectNumber, unsigned int & generationNumber, std::pair<unsigned int, unsigned int> &, bool &);
|
|
virtual unsigned int _readTrailerAndReturnRoot();
|
|
private:
|
|
//methods
|
|
virtual void _getFileContent(const char * fileName);
|
|
bool _getNextObject(Object * object);
|
|
void _callObserver(std::string objectContent);
|
|
void _createObjectTree(const char * fileName);
|
|
void _retrieveAllPages(Object * objectWithKids);
|
|
void _fillOutObjects();
|
|
virtual void _readXRefAndCreateObjects();
|
|
unsigned int _getEndOfLineFromContent(unsigned int fromPosition);
|
|
const std::pair<unsigned int, unsigned int> & _getLineBounds(const std::string & str, unsigned int fromPosition);
|
|
const std::string & _getNextToken(unsigned int & fromPosition);
|
|
unsigned int _countTokens(unsigned int leftBound, unsigned int rightBount);
|
|
unsigned int _skipWhiteSpaces(const std::string & str);
|
|
unsigned int _skipWhiteSpacesFromContent(unsigned int fromPosition);
|
|
const std::map<unsigned int, Object::ReferencePositionsInContent> & _getReferences(const std::string & objectContent);
|
|
unsigned int _skipNumber(const std::string & str, unsigned int currentPosition);
|
|
unsigned int _skipWhiteSpaces(const std::string & str, unsigned int fromPosition);
|
|
void _createDocument(const char * docName);
|
|
virtual unsigned int _getStartOfXrefWithRoot();
|
|
unsigned int _readTrailerAndRterievePrev(const unsigned int startPositionForSearch, unsigned int & previosXref);
|
|
void _clearParser();
|
|
|
|
|
|
protected:
|
|
|
|
//members
|
|
Object * _root;
|
|
std::string _fileContent;
|
|
std::map<unsigned int, Object *> _objects;
|
|
Document * _document;
|
|
|
|
};
|
|
}
|
|
#endif
|
|
|
|
|