#include "Object.h" #include "Parser.h" #include "Exception.h" #include <string.h> #include <algorithm> #include <fstream> using namespace merge_lib; std::string NUMBERANDWHITESPACE(" 0123456789"); Object::~Object() { _parents.clear(); _children.clear(); _content.clear(); } Object * Object::getClone(std::vector<Object *> & clones) { std::map<unsigned int, Object *> clonesMap; Object * clone = _getClone(clonesMap); std::map<unsigned int, Object *>::iterator conesIterator = clonesMap.begin(); for(; conesIterator != clonesMap.end(); ++conesIterator) clones.push_back((*conesIterator).second); clonesMap.clear(); resetIsPassed(); return clone; } Object * Object::_getClone(std::map<unsigned int, Object *> & clones) { _isPassed = true; unsigned int objectNumber = this->getObjectNumber(); Object * clone = new Object(objectNumber, this->_generationNumber, this->getObjectContent(), _fileName, _streamBounds, _hasStream); clone->_hasStreamInContent = _hasStreamInContent; clones.insert(std::pair<unsigned int, Object *>(objectNumber, clone)); Children::iterator currentChild = _children.begin(); for(; currentChild != _children.end(); ++currentChild) { Object * currentObject = (*currentChild).second.first; unsigned int childObjectNumber = currentObject->getObjectNumber(); Object * cloneOfCurrentChild = 0; if(currentObject->isPassed()) { cloneOfCurrentChild = clones[childObjectNumber]; } else { cloneOfCurrentChild = currentObject->_getClone(clones); } ChildAndItPositionInContent newChild( cloneOfCurrentChild, currentChild->second.second); clone->_children.insert(std::pair<unsigned int, ChildAndItPositionInContent> (newChild.first->getObjectNumber(), newChild) ); } return clone; } void Object::addChild(Object * child, const std::vector<unsigned int> childPositionsInContent) { child->_addParent(this); _addChild(child, childPositionsInContent); } Object::ReferencePositionsInContent Object::removeChild(Object * child) { ReferencePositionsInContent positions = _children[child->getObjectNumber()].second; _children.erase(child->getObjectNumber()); return positions; } void Object::forgetAboutChildren(unsigned int leftBound, unsigned int rightBound) { std::vector<Object *> children = getChildrenByBounds(leftBound, rightBound); for(size_t i = 0; i < children.size(); ++i) { _children.erase(_children.find(children[i]->getObjectNumber())); } } Object * Object::getChild(unsigned int objectNumber) { //TODO: check object before returning return _children[objectNumber].first; } std::vector<Object *> Object::getChildrenByBounds(unsigned int leftBound, unsigned int rightBound) { std::vector<Object *> result; for(Children::iterator currentChild = _children.begin(); currentChild != _children.end(); ++currentChild) { ReferencePositionsInContent childPositions = (*currentChild).second.second; for(size_t i = 0; i < childPositions.size(); ++i) { unsigned int childPosition = childPositions[i]; if((childPosition >= leftBound) && (childPosition <= rightBound)) { result.push_back( (*currentChild).second.first); break; } } } return result; } std::vector<Object *> Object::getSortedByPositionChildren(unsigned int leftBound, unsigned int rightBound) { std::vector<Object *> result; for(Children::iterator currentChild = _children.begin(); currentChild != _children.end(); ++currentChild) { ReferencePositionsInContent childPositions = (*currentChild).second.second; for(size_t i = 0; i < childPositions.size(); ++i) { unsigned int childPosition = childPositions[i]; if((childPosition >= leftBound) && (childPosition <= rightBound)) { unsigned int insertPosition = 0; for(unsigned int j(0); j < result.size(); ++j) if(childPosition > getChildPosition(result[j])) insertPosition = j + 1; result.insert(result.begin() + insertPosition, (*currentChild).second.first); break; } } } return result; } unsigned int Object::getChildPosition(const Object * child)//throw (Exception) { const ReferencePositionsInContent & childrenPostion = _children[child->getObjectNumber()].second; if( (childrenPostion.size() != 1) || (_children[child->getObjectNumber()].first != child) ) throw Exception("Internal error or wrong document (some reference is found twise)"); return childrenPostion[0]; } void Object::removeChildrenByBounds(unsigned int leftBound, unsigned int rightBound) { std::vector<Object *> children = getChildrenByBounds(leftBound, rightBound); for(size_t i = 0; i < children.size(); ++i) { children[i]->removeHimself(); } } const Object::Children & Object::getChildren() { return _children; } void Object::removeHimself() { if(!_parents.empty()) { std::set<Object *>::iterator currentParent = _parents.begin(); for(; currentParent != _parents.end(); ++currentParent) { (*currentParent)->removeChild(this); } } } unsigned int Object::getObjectNumber() const { return _number; } unsigned int Object::getgenerationNumber() const { return _generationNumber; } std::string & Object::getObjectContent() { return _content; } void Object::_setObjectNumber(unsigned int objectNumber) { if(!isPassed()) { _isPassed = true; _oldNumber = _number; _number = objectNumber; } } void Object::setObjectContent(const std::string & objectContent) { _content = objectContent; } void Object::appendContent(const std::string & addToContent) { _content.append(addToContent); } void Object::eraseContent(unsigned int from, unsigned int size) { int iSize = size; _recalculateReferencePositions(from + size, -iSize); _content.erase(from, size); } void Object::insertToContent(unsigned int position, const std::string & insertedStr) { _recalculateReferencePositions(position, insertedStr.size()); _content.insert(position, insertedStr); } void Object::insertToContent(unsigned int position, const char * insertedStr, unsigned int length) { _recalculateReferencePositions(position, length); _content.insert(position, insertedStr, length); } //vector <object number, its size> void Object::serialize(std::ofstream & out, std::map< unsigned int, std::pair<unsigned long long, unsigned int > > & sizesAndGenerationNumbers) { //is this element already printed if(sizesAndGenerationNumbers.find(_number) != sizesAndGenerationNumbers.end()) return; std::string stream; if(_hasStream && !_hasStreamInContent) { getStream(stream); stream.append("endstream\n"); } // xxxx + " " + "0" + " " + "obj" + "\n" + _content.size() + "endobj\n", where x - is a digit unsigned long long objectSizeForXref = (static_cast<unsigned int>(std::log10(static_cast<double>(_number))) + 1) + 14 + _content.size() + stream.size(); sizesAndGenerationNumbers.insert(std::pair<unsigned int, std::pair<unsigned long long, unsigned int > >(_number, std::make_pair(objectSizeForXref, _generationNumber))); _serialize(out, stream); stream.clear(); stream.reserve(); //call serialize of each child Children::iterator it; for ( it=_children.begin() ; it != _children.end(); it++ ) { Object * currentChild = (*it).second.first; currentChild->serialize(out, sizesAndGenerationNumbers); } } void Object::recalculateObjectNumbers(unsigned int & newNumber) { _recalculateObjectNumbers(newNumber); resetIsPassed(); } void Object::_recalculateObjectNumbers(unsigned int & newNumber) { _setObjectNumber(newNumber); Children::iterator childIterator; for ( childIterator = _children.begin() ; childIterator != _children.end(); ++childIterator ) { Object * currentChild = (*childIterator).second.first; if(currentChild->isPassed()) continue; currentChild->_recalculateObjectNumbers(++newNumber); } //recalculate referencies in content for ( childIterator = _children.begin() ; childIterator != _children.end(); ++childIterator) { Object * currentChild = (*childIterator).second.first; //if(currentChild->getOldNumber() == currentChild->getObjectNumber()) continue; const ReferencePositionsInContent & refPositionForcurrentChild = (*childIterator).second.second; const std::string & oldNumberStr = Utils::uIntToStr(currentChild->getOldNumber()); const std::string & newNumber = Utils::uIntToStr(currentChild->getObjectNumber()); const unsigned int newNumberStringSize = newNumber.size(); const unsigned int oldNumberStringSize = oldNumberStr.size(); unsigned int diff = newNumberStringSize; if (newNumberStringSize > oldNumberStringSize) { for(size_t referencePositionIter(0); referencePositionIter < refPositionForcurrentChild.size(); ++referencePositionIter) { _recalculateReferencePositions(refPositionForcurrentChild[referencePositionIter], newNumberStringSize - oldNumberStringSize); for(size_t referenceStringInter(oldNumberStringSize); referenceStringInter < newNumberStringSize; ++referenceStringInter ) _content.insert( refPositionForcurrentChild[referencePositionIter] + referenceStringInter, 1, newNumber[referenceStringInter]); } diff = oldNumberStringSize; } if (newNumberStringSize < oldNumberStringSize) { for(size_t referencePositionIter(0); referencePositionIter < refPositionForcurrentChild.size(); ++referencePositionIter) { _recalculateReferencePositions(refPositionForcurrentChild[referencePositionIter], newNumberStringSize - oldNumberStringSize); _content.erase(refPositionForcurrentChild[referencePositionIter] + newNumberStringSize, oldNumberStringSize - newNumberStringSize ); } } for(unsigned int i = 0; i < diff; i++) for(size_t referencePositionIter(0); referencePositionIter < refPositionForcurrentChild.size(); ++referencePositionIter) _content[i + refPositionForcurrentChild[referencePositionIter]] = newNumber[i]; } } //this method should be called in case changing object's content void Object::_recalculateReferencePositions(unsigned int changedReference, int displacement) { Children::iterator childIterator; for ( childIterator = _children.begin() ; childIterator != _children.end(); ++childIterator ) { ReferencePositionsInContent & refPositionForcurrentChild = (*childIterator).second.second; for(size_t i = 0; i < refPositionForcurrentChild.size(); ++i) if(refPositionForcurrentChild[i] > changedReference) refPositionForcurrentChild[i] += displacement; } } void Object::_retrieveMaxObjectNumber(unsigned int & maxNumber) { if(isPassed()) return; _isPassed = true; if(maxNumber < _number) maxNumber = _number; Children::iterator it; for ( it=_children.begin() ; it != _children.end(); ++it ) (*it).second.first->_retrieveMaxObjectNumber(maxNumber); } //TODO add check for absent token bool Object::_findObject(const std::string & token, Object* & foundObject, unsigned int & tokenPositionInContent) { _isPassed = true; tokenPositionInContent = Parser::findToken(_content,token); if(tokenPositionInContent != std::string::npos) { foundObject = this; return true; } for (Children::iterator it=_children.begin() ; it != _children.end(); ++it ) if((!(*it).second.first->_isPassed) && ((*it).second.first->_findObject(token, foundObject, tokenPositionInContent))) return true; return false; } bool Object::findObject(const std::string & token, Object* & foundObject, unsigned int & tokenPositionInContent) { bool result = _findObject(token, foundObject, tokenPositionInContent); resetIsPassed(); if(result) { return true; } return false; } void Object::retrieveMaxObjectNumber(unsigned int & maxNumber) { _retrieveMaxObjectNumber(maxNumber); resetIsPassed(); } //methods void Object::_addChild(Object * child, const ReferencePositionsInContent & childPositionsInContent) { ChildAndItPositionInContent childAndItPositions(child, childPositionsInContent); unsigned int childObjectNumber = child->getObjectNumber(); while(_children.count(childObjectNumber)) ++childObjectNumber; _children.insert(std::pair<unsigned int, ChildAndItPositionInContent > (childObjectNumber, childAndItPositions)); } void Object::_addParent(Object * child) { _parents.insert(child); } void Object::_serialize(std::ofstream & out, const std::string & stream) { out << _number << " " << _generationNumber << " obj\n" << _content << stream << "endobj\n"; out.flush(); } /** @brief getStream * * @todo: document this function */ bool Object::getStream(std::string & stream) { if(!_hasStream && !_hasStreamInContent) return false; if( _hasStream && _hasStreamInContent) { if(_getStreamFromContent(stream)) return true; else return false; } std::ifstream pdfFile; pdfFile.open (_fileName.c_str(), std::ios::binary ); if (pdfFile.fail()) { std::stringstream errorMessage("File "); errorMessage << _fileName << " is absent" << "\0"; throw Exception(errorMessage); } // get length of file: int length = _streamBounds.second - _streamBounds.first; pdfFile.seekg (_streamBounds.first, std::ios_base::beg); stream.resize(length); pdfFile.read(&stream[0], length); pdfFile.close(); return true; } bool Object::_getStreamFromContent(std::string & stream) { size_t stream_begin = _content.find("stream"); if( stream_begin == std::string::npos ) { return false; } size_t stream_end = _content.find("endstream",stream_begin); if( stream_end == std::string::npos ) { return false; } stream_begin += strlen("stream"); // need to skip trailing \r while(_content[stream_begin] == '\r') { stream_begin ++; } if( _content[stream_begin] == '\n') { stream_begin ++; } stream = _content.substr(stream_begin, stream_end - stream_begin); return true; } /** @brief getHeader * * @todo: document this function */ bool Object::getHeader(std::string &content) { if( !hasStream() ) { content = _content; return true; } size_t stream_begin = _content.find("stream"); content = _content.substr(0,stream_begin); return true; } /** @brief hasStream * * @todo: document this function */ bool Object::hasStream() { return _hasStream; } // the method returns the value of some object. // For example, .../Length 123 /Filter will return 123 // For /Length 12 0 R will return the content of 12 0 obj std::string Object::getNameSimpleValue(const std::string &content, const std::string &pattern, size_t start) { size_t foundStart, foundEnd; std::string token = Parser::findTokenStr(content,pattern,start,foundStart,foundEnd); std::string value; size_t beg = 0; // Now token could be /Length 127 or /Length 12 0 R if( Parser::getNextWord(value,token,beg) ) { // 127 or 12 std::string interm; if( Parser::getNextWord(interm,token,beg) ) // 0? { if( Parser::getNextWord(interm,token,beg) ) // R { if( interm == "R" ) // we found reference to object! { int number = Utils::stringToInt(value); Object *child = getChild(number); if( child ) { value = child->getObjectContent(); Parser::trim(value); } else { std::cerr<<"Error::child object with number "<<number<<"is absent\n"; } } else { std::cerr<<"Error:undefined format of token "<<token<<"\n"; } } else { std::cerr<<"Error:undefined word"<<interm<<"\n"; } } } return value; } Object* Object::findPatternInObjOrParents(const std::string &pattern) { std::string content=getObjectContent(); if( Parser::findToken(content,pattern,0) != std::string::npos ) { return this; } Object * parent = this; Object *foundObj = NULL; while(1) { unsigned int startOfParent = content.find("/Parent"); unsigned int endOfParent = content.find(" R", startOfParent); if(startOfParent == std::string::npos) { break; } std::vector <Object *> parents = parent->getChildrenByBounds(startOfParent, endOfParent); if( parents.size() != 1 ) { break; } parent = parents[0]; std::string parentContent = parent->getObjectContent(); unsigned int startOfPattern = parentContent.find(pattern); if(startOfPattern == std::string::npos) { content = parentContent; continue; } foundObj = parent; break; } return foundObj; }