/* * Copyright (C) 2012 Webdoc SA * * This file is part of Open-Sankoré. * * Open-Sankoré is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation, version 2, * with a specific linking exception for the OpenSSL project's * "OpenSSL" library (or with modified versions of it that use the * same license as the "OpenSSL" library). * * Open-Sankoré is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with Open-Sankoré; if not, see * . */ #if !defined Parser_h #define Parser_h #include "Object.h" #include "Document.h" #include "Page.h" #include #include namespace merge_lib { class Document; //This class parsed the pdf document and creates //an Document object class Parser { public: Parser(): _root(0), _fileContent(), _objects(), _document(0) {}; Document * parseDocument(const char * fileName); static const std::string WHITESPACES; static const std::string DELIMETERS; static const std::string NUMBERS; static const std::string WHITESPACES_AND_DELIMETERS; static bool getNextWord(std::string & out, const std::string &in, size_t &nextPosition,size_t *found = NULL); static std::string getNextToken( const std::string &in, unsigned &position); static void trim(std::string &str); static std::string findTokenStr(const std::string &content, const std::string &pattern, size_t start,size_t &foundStart, size_t &foundEnd); static size_t findToken(const std::string &content, const std::string &keyword,size_t start = 0); static size_t findTokenName(const std::string &content, const std::string &keyword,size_t start = 0); static unsigned int findEndOfElementContent(const std::string &content, unsigned int startOfPageElement); static bool tokenIsAName(const std::string &content, size_t start ); protected: const std::string & _getObjectContent(unsigned int objectPosition, unsigned int & objectNumber, unsigned int & generationNumber, std::pair &, bool &); virtual unsigned int _readTrailerAndReturnRoot(); private: //methods virtual void _getFileContent(const char * fileName); bool _getNextObject(Object * object); void _callObserver(std::string objectContent); void _createObjectTree(const char * fileName); void _retrieveAllPages(Object * objectWithKids); void _fillOutObjects(); virtual void _readXRefAndCreateObjects(); unsigned int _getEndOfLineFromContent(unsigned int fromPosition); const std::pair & _getLineBounds(const std::string & str, unsigned int fromPosition); const std::string & _getNextToken(unsigned int & fromPosition); unsigned int _countTokens(unsigned int leftBound, unsigned int rightBount); unsigned int _skipWhiteSpaces(const std::string & str); unsigned int _skipWhiteSpacesFromContent(unsigned int fromPosition); const std::map & _getReferences(const std::string & objectContent); unsigned int _skipNumber(const std::string & str, unsigned int currentPosition); unsigned int _skipWhiteSpaces(const std::string & str, unsigned int fromPosition); void _createDocument(const char * docName); virtual unsigned int _getStartOfXrefWithRoot(); unsigned int _readTrailerAndRterievePrev(const unsigned int startPositionForSearch, unsigned int & previosXref); void _clearParser(); protected: //members Object * _root; std::string _fileContent; std::map _objects; Document * _document; }; } #endif