578 lines
17 KiB
578 lines
17 KiB
#include "Object.h"
#include "Parser.h"
#include "Exception.h"
#include <string.h>
#include <algorithm>
#include <fstream>
using namespace merge_lib;
std::string NUMBERANDWHITESPACE(" 0123456789");
Object * Object::getClone(std::vector<Object *> & clones)
std::map<unsigned int, Object *> clonesMap;
Object * clone = _getClone(clonesMap);
std::map<unsigned int, Object *>::iterator conesIterator = clonesMap.begin();
for(; conesIterator != clonesMap.end(); ++conesIterator)
return clone;
Object * Object::_getClone(std::map<unsigned int, Object *> & clones)
_isPassed = true;
unsigned int objectNumber = this->getObjectNumber();
Object * clone = new Object(objectNumber, this->_generationNumber, this->getObjectContent(), _fileName, _streamBounds, _hasStream);
clone->_hasStreamInContent = _hasStreamInContent;
clones.insert(std::pair<unsigned int, Object *>(objectNumber, clone));
Children::iterator currentChild = _children.begin();
for(; currentChild != _children.end(); ++currentChild)
Object * currentObject = (*currentChild).second.first;
unsigned int childObjectNumber = currentObject->getObjectNumber();
Object * cloneOfCurrentChild = 0;
cloneOfCurrentChild = clones[childObjectNumber];
cloneOfCurrentChild = currentObject->_getClone(clones);
ChildAndItPositionInContent newChild(
clone->_children.insert(std::pair<unsigned int, ChildAndItPositionInContent>
(newChild.first->getObjectNumber(), newChild)
return clone;
void Object::addChild(Object * child, const std::vector<unsigned int> childPositionsInContent)
_addChild(child, childPositionsInContent);
Object::ReferencePositionsInContent Object::removeChild(Object * child)
ReferencePositionsInContent positions = _children[child->getObjectNumber()].second;
return positions;
void Object::forgetAboutChildren(unsigned int leftBound, unsigned int rightBound)
std::vector<Object *> children = getChildrenByBounds(leftBound, rightBound);
for(size_t i = 0; i < children.size(); ++i)
Object * Object::getChild(unsigned int objectNumber)
//TODO: check object before returning
return _children[objectNumber].first;
std::vector<Object *> Object::getChildrenByBounds(unsigned int leftBound, unsigned int rightBound)
std::vector<Object *> result;
for(Children::iterator currentChild = _children.begin(); currentChild != _children.end(); ++currentChild)
ReferencePositionsInContent childPositions = (*currentChild).second.second;
for(size_t i = 0; i < childPositions.size(); ++i)
unsigned int childPosition = childPositions[i];
if((childPosition >= leftBound) && (childPosition <= rightBound))
result.push_back( (*currentChild).second.first);
return result;
std::vector<Object *> Object::getSortedByPositionChildren(unsigned int leftBound, unsigned int rightBound)
std::vector<Object *> result;
for(Children::iterator currentChild = _children.begin(); currentChild != _children.end(); ++currentChild)
ReferencePositionsInContent childPositions = (*currentChild).second.second;
for(size_t i = 0; i < childPositions.size(); ++i)
unsigned int childPosition = childPositions[i];
if((childPosition >= leftBound) && (childPosition <= rightBound))
unsigned int insertPosition = 0;
for(unsigned int j(0); j < result.size(); ++j)
if(childPosition > getChildPosition(result[j]))
insertPosition = j + 1;
result.insert(result.begin() + insertPosition, (*currentChild).second.first);
return result;
unsigned int Object::getChildPosition(const Object * child)//throw (Exception)
const ReferencePositionsInContent & childrenPostion = _children[child->getObjectNumber()].second;
(childrenPostion.size() != 1) ||
(_children[child->getObjectNumber()].first != child)
throw Exception("Internal error or wrong document (some reference is found twise)");
return childrenPostion[0];
void Object::removeChildrenByBounds(unsigned int leftBound, unsigned int rightBound)
std::vector<Object *> children = getChildrenByBounds(leftBound, rightBound);
for(size_t i = 0; i < children.size(); ++i)
const Object::Children & Object::getChildren()
return _children;
void Object::removeHimself()
std::set<Object *>::iterator currentParent = _parents.begin();
for(; currentParent != _parents.end(); ++currentParent)
unsigned int Object::getObjectNumber() const
return _number;
unsigned int Object::getgenerationNumber() const
return _generationNumber;
std::string & Object::getObjectContent()
return _content;
void Object::_setObjectNumber(unsigned int objectNumber)
_isPassed = true;
_oldNumber = _number;
_number = objectNumber;
void Object::setObjectContent(const std::string & objectContent)
_content = objectContent;
void Object::appendContent(const std::string & addToContent)
void Object::eraseContent(unsigned int from, unsigned int size)
int iSize = size;
_recalculateReferencePositions(from + size, -iSize);
_content.erase(from, size);
void Object::insertToContent(unsigned int position, const std::string & insertedStr)
_recalculateReferencePositions(position, insertedStr.size());
_content.insert(position, insertedStr);
void Object::insertToContent(unsigned int position, const char * insertedStr, unsigned int length)
_recalculateReferencePositions(position, length);
_content.insert(position, insertedStr, length);
//vector <object number, its size>
void Object::serialize(std::ofstream & out, std::map< unsigned int, std::pair<unsigned long long, unsigned int > > & sizesAndGenerationNumbers)
//is this element already printed
if(sizesAndGenerationNumbers.find(_number) != sizesAndGenerationNumbers.end()) return;
std::string stream;
if(_hasStream && !_hasStreamInContent)
// xxxx + " " + "0" + " " + "obj" + "\n" + _content.size() + "endobj\n", where x - is a digit
unsigned long long objectSizeForXref = (static_cast<unsigned int>(std::log10(static_cast<double>(_number))) + 1) + 14 + _content.size() + stream.size();
sizesAndGenerationNumbers.insert(std::pair<unsigned int, std::pair<unsigned long long, unsigned int > >(_number, std::make_pair(objectSizeForXref, _generationNumber)));
_serialize(out, stream);
//call serialize of each child
Children::iterator it;
for ( it=_children.begin() ; it != _children.end(); it++ )
Object * currentChild = (*it).second.first;
currentChild->serialize(out, sizesAndGenerationNumbers);
void Object::recalculateObjectNumbers(unsigned int & newNumber)
void Object::_recalculateObjectNumbers(unsigned int & newNumber)
Children::iterator childIterator;
for ( childIterator = _children.begin() ; childIterator != _children.end(); ++childIterator )
Object * currentChild = (*childIterator).second.first;
if(currentChild->isPassed()) continue;
//recalculate referencies in content
for ( childIterator = _children.begin() ; childIterator != _children.end(); ++childIterator)
Object * currentChild = (*childIterator).second.first;
//if(currentChild->getOldNumber() == currentChild->getObjectNumber()) continue;
const ReferencePositionsInContent & refPositionForcurrentChild = (*childIterator).second.second;
const std::string & oldNumberStr = Utils::uIntToStr(currentChild->getOldNumber());
const std::string & newNumber = Utils::uIntToStr(currentChild->getObjectNumber());
const unsigned int newNumberStringSize = newNumber.size();
const unsigned int oldNumberStringSize = oldNumberStr.size();
unsigned int diff = newNumberStringSize;
if (newNumberStringSize > oldNumberStringSize)
for(size_t referencePositionIter(0); referencePositionIter < refPositionForcurrentChild.size(); ++referencePositionIter)
_recalculateReferencePositions(refPositionForcurrentChild[referencePositionIter], newNumberStringSize - oldNumberStringSize);
for(size_t referenceStringInter(oldNumberStringSize); referenceStringInter < newNumberStringSize; ++referenceStringInter )
refPositionForcurrentChild[referencePositionIter] + referenceStringInter,
diff = oldNumberStringSize;
if (newNumberStringSize < oldNumberStringSize)
for(size_t referencePositionIter(0); referencePositionIter < refPositionForcurrentChild.size(); ++referencePositionIter)
_recalculateReferencePositions(refPositionForcurrentChild[referencePositionIter], newNumberStringSize - oldNumberStringSize);
_content.erase(refPositionForcurrentChild[referencePositionIter] + newNumberStringSize,
oldNumberStringSize - newNumberStringSize
for(unsigned int i = 0; i < diff; i++)
for(size_t referencePositionIter(0); referencePositionIter < refPositionForcurrentChild.size(); ++referencePositionIter)
_content[i + refPositionForcurrentChild[referencePositionIter]] = newNumber[i];
//this method should be called in case changing object's content
void Object::_recalculateReferencePositions(unsigned int changedReference, int displacement)
Children::iterator childIterator;
for ( childIterator = _children.begin() ; childIterator != _children.end(); ++childIterator )
ReferencePositionsInContent & refPositionForcurrentChild = (*childIterator).second.second;
for(size_t i = 0; i < refPositionForcurrentChild.size(); ++i)
if(refPositionForcurrentChild[i] > changedReference)
refPositionForcurrentChild[i] += displacement;
void Object::_retrieveMaxObjectNumber(unsigned int & maxNumber)
if(isPassed()) return;
_isPassed = true;
if(maxNumber < _number)
maxNumber = _number;
Children::iterator it;
for ( it=_children.begin() ; it != _children.end(); ++it )
//TODO add check for absent token
bool Object::_findObject(const std::string & token, Object* & foundObject, unsigned int & tokenPositionInContent)
_isPassed = true;
tokenPositionInContent = Parser::findToken(_content,token);
if(tokenPositionInContent != std::string::npos)
foundObject = this;
return true;
for (Children::iterator it=_children.begin() ; it != _children.end(); ++it )
if((!(*it).second.first->_isPassed) &&
((*it).second.first->_findObject(token, foundObject, tokenPositionInContent)))
return true;
return false;
bool Object::findObject(const std::string & token, Object* & foundObject, unsigned int & tokenPositionInContent)
bool result = _findObject(token, foundObject, tokenPositionInContent);
return true;
return false;
void Object::retrieveMaxObjectNumber(unsigned int & maxNumber)
void Object::_addChild(Object * child, const ReferencePositionsInContent & childPositionsInContent)
ChildAndItPositionInContent childAndItPositions(child, childPositionsInContent);
unsigned int childObjectNumber = child->getObjectNumber();
_children.insert(std::pair<unsigned int, ChildAndItPositionInContent > (childObjectNumber, childAndItPositions));
void Object::_addParent(Object * child)
void Object::_serialize(std::ofstream & out, const std::string & stream)
out << _number << " " << _generationNumber << " obj\n" << _content << stream << "endobj\n";
/** @brief getStream
* @todo: document this function
bool Object::getStream(std::string & stream)
if(!_hasStream && !_hasStreamInContent)
return false;
if( _hasStream && _hasStreamInContent)
return true;
return false;
std::ifstream pdfFile;
pdfFile.open (_fileName.c_str(), std::ios::binary );
if (pdfFile.fail())
std::stringstream errorMessage("File ");
errorMessage << _fileName << " is absent" << "\0";
throw Exception(errorMessage);
// get length of file:
int length = _streamBounds.second - _streamBounds.first;
pdfFile.seekg (_streamBounds.first, std::ios_base::beg);
pdfFile.read(&stream[0], length);
return true;
bool Object::_getStreamFromContent(std::string & stream)
size_t stream_begin = _content.find("stream");
if( stream_begin == std::string::npos )
return false;
size_t stream_end = _content.find("endstream",stream_begin);
if( stream_end == std::string::npos )
return false;
stream_begin += strlen("stream");
// need to skip trailing \r
while(_content[stream_begin] == '\r')
stream_begin ++;
if( _content[stream_begin] == '\n')
stream_begin ++;
stream = _content.substr(stream_begin, stream_end - stream_begin);
return true;
/** @brief getHeader
* @todo: document this function
bool Object::getHeader(std::string &content)
if( !hasStream() )
content = _content;
return true;
size_t stream_begin = _content.find("stream");
content = _content.substr(0,stream_begin);
return true;
/** @brief hasStream
* @todo: document this function
bool Object::hasStream()
return _hasStream;
// the method returns the value of some object.
// For example, .../Length 123 /Filter will return 123
// For /Length 12 0 R will return the content of 12 0 obj
std::string Object::getNameSimpleValue(const std::string &content, const std::string &pattern, size_t start)
size_t foundStart, foundEnd;
std::string token = Parser::findTokenStr(content,pattern,start,foundStart,foundEnd);
std::string value;
size_t beg = 0;
// Now token could be /Length 127 or /Length 12 0 R
if( Parser::getNextWord(value,token,beg) )
// 127 or 12
std::string interm;
if( Parser::getNextWord(interm,token,beg) ) // 0?
if( Parser::getNextWord(interm,token,beg) ) // R
if( interm == "R" ) // we found reference to object!
int number = Utils::stringToInt(value);
Object *child = getChild(number);
if( child )
value = child->getObjectContent();
std::cerr<<"Error::child object with number "<<number<<"is absent\n";
std::cerr<<"Error:undefined format of token "<<token<<"\n";
std::cerr<<"Error:undefined word"<<interm<<"\n";
return value;
Object* Object::findPatternInObjOrParents(const std::string &pattern)
std::string content=getObjectContent();
if( Parser::findToken(content,pattern,0) != std::string::npos )
return this;
Object * parent = this;
Object *foundObj = NULL;
unsigned int startOfParent = content.find("/Parent");
unsigned int endOfParent = content.find(" R", startOfParent);
if(startOfParent == std::string::npos)
std::vector <Object *> parents = parent->getChildrenByBounds(startOfParent, endOfParent);
if( parents.size() != 1 )
parent = parents[0];
std::string parentContent = parent->getObjectContent();
unsigned int startOfPattern = parentContent.find(pattern);
if(startOfPattern == std::string::npos)
content = parentContent;
foundObj = parent;
return foundObj;