moved pdf merger file into src directory.

preferencesAboutTextFull
Claudio Valerio 14 years ago
parent 97cee1b6b0
commit 7564a8badf
  1. 212
      src/pdf-merger/ASCII85Decode.cpp
  2. 48
      src/pdf-merger/ASCII85Decode.h
  3. 122
      src/pdf-merger/ASCIIHexDecode.cpp
  4. 44
      src/pdf-merger/ASCIIHexDecode.h
  5. 118
      src/pdf-merger/AbstractBoxElementHandler.h
  6. 0
      src/pdf-merger/AnnotsHandler.cpp
  7. 66
      src/pdf-merger/AnnotsHandler.h
  8. 42
      src/pdf-merger/CCITTFaxDecode.h
  9. 0
      src/pdf-merger/Config.h
  10. 146
      src/pdf-merger/ContentHandler.cpp
  11. 92
      src/pdf-merger/ContentHandler.h
  12. 104
      src/pdf-merger/CropBoxElementHandler.h
  13. 44
      src/pdf-merger/DCTDecode.h
  14. 50
      src/pdf-merger/Decoder.h
  15. 0
      src/pdf-merger/Document.cpp
  16. 0
      src/pdf-merger/Document.h
  17. 72
      src/pdf-merger/Exception.h
  18. 38
      src/pdf-merger/FileIsAbsentException.h
  19. 0
      src/pdf-merger/Filter.cpp
  20. 0
      src/pdf-merger/Filter.h
  21. 528
      src/pdf-merger/FilterPredictor.cpp
  22. 88
      src/pdf-merger/FilterPredictor.h
  23. 404
      src/pdf-merger/FlateDecode.cpp
  24. 52
      src/pdf-merger/FlateDecode.h
  25. 44
      src/pdf-merger/JBIG2Decode.h
  26. 386
      src/pdf-merger/LZWDecode.cpp
  27. 82
      src/pdf-merger/LZWDecode.h
  28. 104
      src/pdf-merger/MediaBoxElementHandler.h
  29. 194
      src/pdf-merger/MergePageDescription.h
  30. 230
      src/pdf-merger/Merger.cpp
  31. 84
      src/pdf-merger/Merger.h
  32. 0
      src/pdf-merger/Object.cpp
  33. 298
      src/pdf-merger/Object.h
  34. 288
      src/pdf-merger/OverlayDocumentParser.cpp
  35. 80
      src/pdf-merger/OverlayDocumentParser.h
  36. 1104
      src/pdf-merger/Page.cpp
  37. 128
      src/pdf-merger/Page.h
  38. 148
      src/pdf-merger/PageElementHandler.cpp
  39. 168
      src/pdf-merger/PageElementHandler.h
  40. 82
      src/pdf-merger/PageParser.h
  41. 0
      src/pdf-merger/Parser.cpp
  42. 152
      src/pdf-merger/Parser.h
  43. 248
      src/pdf-merger/Rectangle.cpp
  44. 78
      src/pdf-merger/Rectangle.h
  45. 50
      src/pdf-merger/RemoveHimSelfHandler.h
  46. 28
      src/pdf-merger/RemoveHimselfHandler.cpp
  47. 102
      src/pdf-merger/RotationHandler.h
  48. 84
      src/pdf-merger/RunLengthDecode.cpp
  49. 44
      src/pdf-merger/RunLengthDecode.h
  50. 662
      src/pdf-merger/Transformation.h
  51. 60
      src/pdf-merger/TypeElementHandler.h
  52. 394
      src/pdf-merger/Utils.cpp
  53. 64
      src/pdf-merger/Utils.h
  54. 0
      src/pdf-merger/doc/PDF Merge Library Description.doc
  55. 111
      src/pdf-merger/pdfMerger.pri

@ -1,106 +1,106 @@
#include <iostream> #include <iostream>
#include "ASCII85Decode.h" #include "ASCII85Decode.h"
using namespace merge_lib; using namespace merge_lib;
static const unsigned long pow85[] = { static const unsigned long pow85[] = {
85*85*85*85, 85*85*85, 85*85, 85, 1 85*85*85*85, 85*85*85, 85*85, 85, 1
}; };
void ASCII85Decode::_wput(std::string &cur,unsigned long tuple, int len) void ASCII85Decode::_wput(std::string &cur,unsigned long tuple, int len)
{ {
switch (len) switch (len)
{ {
case 4: case 4:
cur+=static_cast<char>(tuple >> 24); cur+=static_cast<char>(tuple >> 24);
cur+=static_cast<char>(tuple >> 16); cur+=static_cast<char>(tuple >> 16);
cur+=static_cast<char>(tuple >> 8); cur+=static_cast<char>(tuple >> 8);
cur+=static_cast<char>(tuple); cur+=static_cast<char>(tuple);
break; break;
case 3: case 3:
cur+=static_cast<char>(tuple >> 24); cur+=static_cast<char>(tuple >> 24);
cur+=static_cast<char>(tuple >> 16); cur+=static_cast<char>(tuple >> 16);
cur+=static_cast<char>(tuple >> 8); cur+=static_cast<char>(tuple >> 8);
break; break;
case 2: case 2:
cur+=static_cast<char>(tuple >> 24); cur+=static_cast<char>(tuple >> 24);
cur+=static_cast<char>(tuple >> 16); cur+=static_cast<char>(tuple >> 16);
break; break;
case 1: case 1:
cur+=static_cast<char>(tuple >> 24); cur+=static_cast<char>(tuple >> 24);
break; break;
default: default:
std::cerr<<"Asci85Filter - unexpected len = "<<len<<"\n"; std::cerr<<"Asci85Filter - unexpected len = "<<len<<"\n";
break; break;
} }
} }
bool ASCII85Decode::decode(std::string &encoded) bool ASCII85Decode::decode(std::string &encoded)
{ {
unsigned long tuple = 0; unsigned long tuple = 0;
std::string decoded = ""; std::string decoded = "";
int count = 0; int count = 0;
int size = encoded.size(); int size = encoded.size();
int i = 0; int i = 0;
bool found = false; bool found = false;
for(;size;) for(;size;)
{ {
char ch = encoded[i++]; char ch = encoded[i++];
// sometimes <~ can present. // sometimes <~ can present.
switch(ch) switch(ch)
{ {
default: default:
if( ch < '!' || ch > 'u' ) if( ch < '!' || ch > 'u' )
{ {
std::cerr<<"bag character in ascii85 block["<<ch<<"]\n"; std::cerr<<"bag character in ascii85 block["<<ch<<"]\n";
return false; return false;
} }
tuple += (unsigned long)(ch - '!') * pow85[count++]; tuple += (unsigned long)(ch - '!') * pow85[count++];
if( count == 5) if( count == 5)
{ {
_wput(decoded,tuple,4); _wput(decoded,tuple,4);
count = 0; count = 0;
tuple = 0; tuple = 0;
} }
break; break;
case 'z': case 'z':
if( count != 0 ) if( count != 0 )
{ {
std::cerr<<"Z inside of acii85 5-tuple!\n"; std::cerr<<"Z inside of acii85 5-tuple!\n";
return false; return false;
} }
decoded += "\0\0\0\0"; decoded += "\0\0\0\0";
break; break;
case '~': case '~':
if( --size ) if( --size )
{ {
ch = encoded[i++]; ch = encoded[i++];
if( ch == '>') if( ch == '>')
{ {
if( count > 0 ) if( count > 0 )
{ {
count --; count --;
tuple += pow85[count]; tuple += pow85[count];
_wput(decoded,tuple,count); _wput(decoded,tuple,count);
} }
} }
encoded = decoded; encoded = decoded;
return true; return true;
} }
std::cerr<<"~ without > in ascii85 stream!\n = ["<<ch<<"]\n"; std::cerr<<"~ without > in ascii85 stream!\n = ["<<ch<<"]\n";
encoded = decoded; encoded = decoded;
return false; return false;
break; break;
case '\n': case '\n':
case '\r': case '\r':
case '\t': case '\t':
case ' ': case ' ':
case '\0': case '\0':
case '\f': case '\f':
case '\b': case '\b':
case 0177: case 0177:
break; break;
} }
--size; --size;
} }
return true; return true;
} }

@ -1,24 +1,24 @@
#ifndef ASCII85Decode_H #ifndef ASCII85Decode_H
#define ASCII85Decode_H #define ASCII85Decode_H
#include <string> #include <string>
#include "Decoder.h" #include "Decoder.h"
namespace merge_lib namespace merge_lib
{ {
// this class provides method for FlateDecode encoding and decoding // this class provides method for FlateDecode encoding and decoding
class ASCII85Decode : public Decoder class ASCII85Decode : public Decoder
{ {
public: public:
ASCII85Decode(){}; ASCII85Decode(){};
virtual ~ASCII85Decode(){}; virtual ~ASCII85Decode(){};
bool encode(std::string & decoded) {return false;} bool encode(std::string & decoded) {return false;}
bool decode(std::string & encoded); bool decode(std::string & encoded);
void initialize(Object * objectWithStram){}; void initialize(Object * objectWithStram){};
private: private:
void _wput(std::string &cur,unsigned long tuple, int len); void _wput(std::string &cur,unsigned long tuple, int len);
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED

@ -1,61 +1,61 @@
#include "ASCIIHexDecode.h" #include "ASCIIHexDecode.h"
#include <string> #include <string>
#include "Utils.h" #include "Utils.h"
using namespace merge_lib; using namespace merge_lib;
const std::string WHITESPACES(" \t\f\v\n\r"); const std::string WHITESPACES(" \t\f\v\n\r");
#define HEX_TO_VAL(char_c) (char_c)>9?'A'+(char_c)-10:'0'+(char_c); #define HEX_TO_VAL(char_c) (char_c)>9?'A'+(char_c)-10:'0'+(char_c);
static unsigned int convertHexVal(unsigned char c) static unsigned int convertHexVal(unsigned char c)
{ {
if(c >= '0' && c <= '9') if(c >= '0' && c <= '9')
{ {
return (c - '0'); return (c - '0');
} }
if(c >= 'A' && c <= 'F') if(c >= 'A' && c <= 'F')
{ {
return (c - 'A' + 10); return (c - 'A' + 10);
} }
if(c >= 'a' && c <= 'f') if(c >= 'a' && c <= 'f')
{ {
return (c - 'a' + 10); return (c - 'a' + 10);
} }
return 0; return 0;
} }
bool ASCIIHexDecode::decode(std::string & encoded) bool ASCIIHexDecode::decode(std::string & encoded)
{ {
bool isLow = true; bool isLow = true;
unsigned char decodedChar = '\0'; unsigned char decodedChar = '\0';
int len = encoded.size(); int len = encoded.size();
std::string decoded =""; std::string decoded ="";
for(int i = 0;i<len;i++) for(int i = 0;i<len;i++)
{ {
unsigned char ch = encoded[i]; unsigned char ch = encoded[i];
if( WHITESPACES.find(ch) != std::string::npos ) if( WHITESPACES.find(ch) != std::string::npos )
{ {
continue; continue;
} }
if( ch == '>' ) if( ch == '>' )
{ {
continue; // EOD found continue; // EOD found
} }
ch = convertHexVal(ch); ch = convertHexVal(ch);
if( isLow ) if( isLow )
{ {
decodedChar = (ch & 0x0F); decodedChar = (ch & 0x0F);
isLow = false; isLow = false;
} }
else else
{ {
decodedChar = ((decodedChar << 4) | ch); decodedChar = ((decodedChar << 4) | ch);
isLow = true; isLow = true;
decoded += decodedChar; decoded += decodedChar;
} }
} }
encoded = decoded; encoded = decoded;
return true; return true;
} }

@ -1,22 +1,22 @@
#ifndef ASCIIHexDecode_h #ifndef ASCIIHexDecode_h
#define ASCIIHexDecode_h #define ASCIIHexDecode_h
#include <string> #include <string>
#include "Decoder.h" #include "Decoder.h"
namespace merge_lib namespace merge_lib
{ {
// this class provides method for ASCIIHEX encoding and decoding // this class provides method for ASCIIHEX encoding and decoding
class ASCIIHexDecode : public Decoder class ASCIIHexDecode : public Decoder
{ {
public: public:
ASCIIHexDecode(){}; ASCIIHexDecode(){};
virtual ~ASCIIHexDecode(){}; virtual ~ASCIIHexDecode(){};
bool encode(std::string & decoded){return false;} bool encode(std::string & decoded){return false;}
bool decode(std::string & encoded); bool decode(std::string & encoded);
void initialize(Object * objectWithStram){}; void initialize(Object * objectWithStram){};
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED

@ -1,59 +1,59 @@
#if !defined AbstractBoxElementHandler_h #if !defined AbstractBoxElementHandler_h
#define AbstractBoxElementHandler_h #define AbstractBoxElementHandler_h
#include "PageElementHandler.h" #include "PageElementHandler.h"
namespace merge_lib namespace merge_lib
{ {
//class for processing MediaBox field of Page object //class for processing MediaBox field of Page object
class AbstractBoxElementHandler: public PageElementHandler class AbstractBoxElementHandler: public PageElementHandler
{ {
public: public:
AbstractBoxElementHandler(Object * page): PageElementHandler(page) AbstractBoxElementHandler(Object * page): PageElementHandler(page)
{ {
} }
virtual ~AbstractBoxElementHandler() virtual ~AbstractBoxElementHandler()
{ {
} }
protected: protected:
void _retrieveBoxFromParent() void _retrieveBoxFromParent()
{ {
std::string content = _page->getObjectContent(); std::string content = _page->getObjectContent();
std::string mediaBox; std::string mediaBox;
Object * parent = _page; Object * parent = _page;
while(1) while(1)
{ {
unsigned int startOfParent = content.find("/Parent"); unsigned int startOfParent = content.find("/Parent");
unsigned int endOfParent = content.find(" R", startOfParent); unsigned int endOfParent = content.find(" R", startOfParent);
if(startOfParent == std::string::npos) if(startOfParent == std::string::npos)
break; break;
std::vector <Object *> parents = parent->getChildrenByBounds(startOfParent, endOfParent); std::vector <Object *> parents = parent->getChildrenByBounds(startOfParent, endOfParent);
if(parents.size() != 1) if(parents.size() != 1)
break; break;
parent = parents[0]; parent = parents[0];
std::string parentContent = parent->getObjectContent(); std::string parentContent = parent->getObjectContent();
unsigned int startOfMediaBox = parentContent.find(_handlerName); unsigned int startOfMediaBox = parentContent.find(_handlerName);
if(startOfMediaBox == std::string::npos) if(startOfMediaBox == std::string::npos)
{ {
content = parentContent; content = parentContent;
continue; continue;
} }
unsigned int endOfMediaBox = parentContent.find("]", startOfMediaBox); unsigned int endOfMediaBox = parentContent.find("]", startOfMediaBox);
mediaBox = parentContent.substr(startOfMediaBox, endOfMediaBox - startOfMediaBox + 1); mediaBox = parentContent.substr(startOfMediaBox, endOfMediaBox - startOfMediaBox + 1);
break; break;
} }
if(!mediaBox.empty()) if(!mediaBox.empty())
{ {
unsigned int startOfMediaBox = _page->getObjectContent().rfind(">>"); unsigned int startOfMediaBox = _page->getObjectContent().rfind(">>");
_page->insertToContent(startOfMediaBox, mediaBox); _page->insertToContent(startOfMediaBox, mediaBox);
_changeObjectContent(startOfMediaBox); _changeObjectContent(startOfMediaBox);
} }
} }
private: private:
virtual void _changeObjectContent(unsigned int startOfPageElement) = 0; virtual void _changeObjectContent(unsigned int startOfPageElement) = 0;
}; };
} }
#endif #endif

@ -1,33 +1,33 @@
#if !defined AnnotsHandler_h #if !defined AnnotsHandler_h
#define AnnotsHandler_h #define AnnotsHandler_h
#include "RemoveHimSelfHandler.h" #include "RemoveHimSelfHandler.h"
//this class is needed to process "Annots" field //this class is needed to process "Annots" field
//AnnotsHandler parses Annots field of Page object and fills //AnnotsHandler parses Annots field of Page object and fills
//annots container with Annots objects //annots container with Annots objects
namespace merge_lib namespace merge_lib
{ {
class AnnotsHandler: public RemoveHimselfHandler class AnnotsHandler: public RemoveHimselfHandler
{ {
public: public:
AnnotsHandler(Object * page, const std::string & handlerName, std::vector<Object *> & annots): AnnotsHandler(Object * page, const std::string & handlerName, std::vector<Object *> & annots):
RemoveHimselfHandler(page, handlerName), RemoveHimselfHandler(page, handlerName),
_annotations(annots) _annotations(annots)
{ {
_setHandlerName(handlerName); _setHandlerName(handlerName);
} }
private: private:
//methods //methods
void _processObjectContent(unsigned int startOfPageElement); void _processObjectContent(unsigned int startOfPageElement);
//memebers //memebers
std::vector<Object *> & _annotations; std::vector<Object *> & _annotations;
}; };
} }
#endif #endif

@ -1,21 +1,21 @@
#ifndef CCITTFaxDecode_H #ifndef CCITTFaxDecode_H
#define CCITTFaxDecode_H #define CCITTFaxDecode_H
#include <string> #include <string>
namespace merge_lib namespace merge_lib
{ {
// this class provides method for FlateDecode encoding and decoding // this class provides method for FlateDecode encoding and decoding
class CCITTFaxDecode : public Decoder class CCITTFaxDecode : public Decoder
{ {
public: public:
CCITTFaxDecode(){}; CCITTFaxDecode(){};
virtual ~CCITTFaxDecode(){}; virtual ~CCITTFaxDecode(){};
bool encode(std::string & decoded) {return true;}; bool encode(std::string & decoded) {return true;};
bool decode(std::string & encoded) {return true;}; bool decode(std::string & encoded) {return true;};
void initialize(Object * objectWithStram){}; void initialize(Object * objectWithStram){};
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED

@ -1,73 +1,73 @@
#include "ContentHandler.h" #include "ContentHandler.h"
#include "Filter.h" #include "Filter.h"
#include "FlateDecode.h" #include "FlateDecode.h"
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <string.h> #include <string.h>
using namespace merge_lib; using namespace merge_lib;
using namespace std; using namespace std;
//concatenate stream of all objects which contain Content of Page //concatenate stream of all objects which contain Content of Page
void ContentHandler::_processObjectContent(unsigned int startOfPageElement) void ContentHandler::_processObjectContent(unsigned int startOfPageElement)
{ {
unsigned int endOfPage = _findEndOfElementContent(startOfPageElement); unsigned int endOfPage = _findEndOfElementContent(startOfPageElement);
_concatenatedStream = _retrieveStreamContent(_page, startOfPageElement, endOfPage); _concatenatedStream = _retrieveStreamContent(_page, startOfPageElement, endOfPage);
FlateDecode flate; FlateDecode flate;
flate.encode(_concatenatedStream); flate.encode(_concatenatedStream);
} }
//write concatenated stream to Page object //write concatenated stream to Page object
void ContentHandler::_changeObjectContent(unsigned int startOfPageElement) void ContentHandler::_changeObjectContent(unsigned int startOfPageElement)
{ {
unsigned int endOfPage = _findEndOfElementContent(startOfPageElement); unsigned int endOfPage = _findEndOfElementContent(startOfPageElement);
_page->forgetAboutChildren(startOfPageElement, endOfPage); _page->forgetAboutChildren(startOfPageElement, endOfPage);
_page->eraseContent(startOfPageElement, endOfPage - startOfPageElement); _page->eraseContent(startOfPageElement, endOfPage - startOfPageElement);
unsigned int endOfObjectDescription = _pageContent.rfind(">>"); unsigned int endOfObjectDescription = _pageContent.rfind(">>");
const char * length = "/Filter /FlateDecode\n/Length "; const char * length = "/Filter /FlateDecode\n/Length ";
unsigned int sizeOfLength = strlen(length); unsigned int sizeOfLength = strlen(length);
_page->insertToContent(endOfObjectDescription, length, sizeOfLength); _page->insertToContent(endOfObjectDescription, length, sizeOfLength);
_page->insertToContent(endOfObjectDescription + sizeOfLength, Utils::uIntToStr(_concatenatedStream.size()).c_str()); _page->insertToContent(endOfObjectDescription + sizeOfLength, Utils::uIntToStr(_concatenatedStream.size()).c_str());
_page->appendContent("\nstream\n"); _page->appendContent("\nstream\n");
_page->appendContent(_concatenatedStream); _page->appendContent(_concatenatedStream);
_page->appendContent("endstream\n"); _page->appendContent("endstream\n");
_page->forgetStreamInFile(); _page->forgetStreamInFile();
} }
//get content of stream //get content of stream
// object - object with stream // object - object with stream
//leftBound - left bound of object's content //leftBound - left bound of object's content
//rightBound - right bound of object's content //rightBound - right bound of object's content
string ContentHandler::_retrieveStreamContent(merge_lib::Object * object, unsigned int leftBound, unsigned int rightBound) string ContentHandler::_retrieveStreamContent(merge_lib::Object * object, unsigned int leftBound, unsigned int rightBound)
{ {
return (object->hasStream()) ? return (object->hasStream()) ?
_getStreamFromContent(object) : _getStreamFromContent(object) :
_getStreamFromReferencies(object, leftBound, rightBound); _getStreamFromReferencies(object, leftBound, rightBound);
} }
//get stream from Array elements //get stream from Array elements
string ContentHandler::_getStreamFromReferencies(merge_lib::Object * objectWithArray, unsigned int leftBound, unsigned int rightBound) string ContentHandler::_getStreamFromReferencies(merge_lib::Object * objectWithArray, unsigned int leftBound, unsigned int rightBound)
{ {
std::string result; std::string result;
std::vector<Object *> referencies = objectWithArray->getSortedByPositionChildren(leftBound, rightBound); std::vector<Object *> referencies = objectWithArray->getSortedByPositionChildren(leftBound, rightBound);
for(size_t i = 0; i < referencies.size(); ++i) for(size_t i = 0; i < referencies.size(); ++i)
{ {
result.append(_retrieveStreamContent(referencies[i], 0, referencies[i]->getObjectContent().size())); result.append(_retrieveStreamContent(referencies[i], 0, referencies[i]->getObjectContent().size()));
} }
objectWithArray->forgetAboutChildren(leftBound,rightBound); objectWithArray->forgetAboutChildren(leftBound,rightBound);
return result; return result;
} }
//get stream from Object //get stream from Object
string ContentHandler::_getStreamFromContent(merge_lib::Object * objectWithStream) string ContentHandler::_getStreamFromContent(merge_lib::Object * objectWithStream)
{ {
Filter filter(objectWithStream); Filter filter(objectWithStream);
string decodedStream; string decodedStream;
filter.getDecodedStream(decodedStream); filter.getDecodedStream(decodedStream);
return decodedStream; return decodedStream;
} }

@ -1,46 +1,46 @@
#if !defined ContentHandler_h #if !defined ContentHandler_h
#define ContentHandler_h #define ContentHandler_h
#include "PageElementHandler.h" #include "PageElementHandler.h"
namespace merge_lib namespace merge_lib
{ {
//this class is needed to process "Content" field of Page object //this class is needed to process "Content" field of Page object
class ContentHandler: public PageElementHandler class ContentHandler: public PageElementHandler
{ {
public: public:
ContentHandler(Object * page, const std::string & handlerName): ContentHandler(Object * page, const std::string & handlerName):
PageElementHandler(page) PageElementHandler(page)
{ {
_setHandlerName(handlerName); _setHandlerName(handlerName);
} }
virtual ~ContentHandler(){}; virtual ~ContentHandler(){};
private: private:
//methods //methods
//concatedate stream of all objects which contains Content of Page //concatedate stream of all objects which contains Content of Page
void _processObjectContent(unsigned int startOfPageElement); void _processObjectContent(unsigned int startOfPageElement);
//write concatenated stream to Page object //write concatenated stream to Page object
void _changeObjectContent(unsigned int startOfPageElement); void _changeObjectContent(unsigned int startOfPageElement);
//get content of stream //get content of stream
// object - object with stream // object - object with stream
//leftBound - left bound of object's content //leftBound - left bound of object's content
//rightBound - right bound of object's content //rightBound - right bound of object's content
std::string _retrieveStreamContent(Object * object, unsigned int leftBound, unsigned int rightBound); std::string _retrieveStreamContent(Object * object, unsigned int leftBound, unsigned int rightBound);
//get stream from Arrey elemetns //get stream from Arrey elemetns
std::string _getStreamFromReferencies(Object * objectWithArray, unsigned int leftBound, unsigned int rightBound); std::string _getStreamFromReferencies(Object * objectWithArray, unsigned int leftBound, unsigned int rightBound);
//get stream from Object //get stream from Object
std::string _getStreamFromContent(Object * objectWithStream); std::string _getStreamFromContent(Object * objectWithStream);
//memebers //memebers
std::string _concatenatedStream; std::string _concatenatedStream;
}; };
} }
#endif #endif

@ -1,52 +1,52 @@
#ifndef CROPBOX_ELEMENT_HANDLER_H #ifndef CROPBOX_ELEMENT_HANDLER_H
#define CROPBOX_ELEMENT_HANDLER_H #define CROPBOX_ELEMENT_HANDLER_H
#include "AbstractBoxElementHandler.h" #include "AbstractBoxElementHandler.h"
#include "Rectangle.h" #include "Rectangle.h"
namespace merge_lib namespace merge_lib
{ {
//class for processing CropBox field of Page object //class for processing CropBox field of Page object
class CropBoxElementHandler: public AbstractBoxElementHandler class CropBoxElementHandler: public AbstractBoxElementHandler
{ {
public: public:
CropBoxElementHandler(Object * page): AbstractBoxElementHandler(page) CropBoxElementHandler(Object * page): AbstractBoxElementHandler(page)
{ {
_setHandlerName("/CropBox"); _setHandlerName("/CropBox");
} }
virtual ~CropBoxElementHandler() virtual ~CropBoxElementHandler()
{ {
} }
private: private:
//replace CropBox with BBox //replace CropBox with BBox
virtual void _changeObjectContent(unsigned int startOfPageElement) virtual void _changeObjectContent(unsigned int startOfPageElement)
{ {
Rectangle mediaBox("/CropBox", _page->getObjectContent()); Rectangle mediaBox("/CropBox", _page->getObjectContent());
double shiftX = Utils::doubleEquals(mediaBox.x1,0)?0:-mediaBox.x1; double shiftX = Utils::doubleEquals(mediaBox.x1,0)?0:-mediaBox.x1;
double shiftY = Utils::doubleEquals(mediaBox.y1,0)?0:-mediaBox.y1; double shiftY = Utils::doubleEquals(mediaBox.y1,0)?0:-mediaBox.y1;
mediaBox.setNewRectangleName("/BBox"); mediaBox.setNewRectangleName("/BBox");
unsigned int endOfElement = _findEndOfElementContent(startOfPageElement); unsigned int endOfElement = _findEndOfElementContent(startOfPageElement);
_page->forgetAboutChildren(startOfPageElement,endOfElement); _page->forgetAboutChildren(startOfPageElement,endOfElement);
_page->eraseContent(startOfPageElement,endOfElement-startOfPageElement); _page->eraseContent(startOfPageElement,endOfElement-startOfPageElement);
std::string newContent; std::string newContent;
mediaBox.appendRectangleToString(newContent," "); mediaBox.appendRectangleToString(newContent," ");
_page->insertToContent(startOfPageElement, newContent); _page->insertToContent(startOfPageElement, newContent);
std::stringstream matrix; std::stringstream matrix;
matrix<<"/Matrix [ 1 0 0 1 "<<shiftX<<" "<< shiftY<<" ]\n"; matrix<<"/Matrix [ 1 0 0 1 "<<shiftX<<" "<< shiftY<<" ]\n";
_page->insertToContent(startOfPageElement, matrix.str()); _page->insertToContent(startOfPageElement, matrix.str());
} }
void _pageElementNotFound() void _pageElementNotFound()
{ {
_retrieveBoxFromParent(); _retrieveBoxFromParent();
} }
}; };
} }
#endif // CROPBOX_ELEMENT_HANDLER_H #endif // CROPBOX_ELEMENT_HANDLER_H

@ -1,22 +1,22 @@
#ifndef DCTDecode_H #ifndef DCTDecode_H
#define DCTDecode_H #define DCTDecode_H
#include <string> #include <string>
namespace merge_lib namespace merge_lib
{ {
// this class provides method for FlateDecode encoding and decoding // this class provides method for FlateDecode encoding and decoding
class DCTDecode : public Decoder class DCTDecode : public Decoder
{ {
public: public:
DCTDecode(){}; DCTDecode(){};
virtual ~DCTDecode(){}; virtual ~DCTDecode(){};
bool encode(std::string & decoded) {return true;}; bool encode(std::string & decoded) {return true;};
bool decode(std::string & encoded) {return true;}; bool decode(std::string & encoded) {return true;};
void initialize(Object * objectWithStram){}; void initialize(Object * objectWithStram){};
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED

@ -1,25 +1,25 @@
#ifndef DECODER_H #ifndef DECODER_H
#define DECODER_H #define DECODER_H
#include <string> #include <string>
#include "Object.h" #include "Object.h"
namespace merge_lib namespace merge_lib
{ {
// base class 4 all decoders // base class 4 all decoders
class Decoder class Decoder
{ {
public: public:
Decoder(){}; Decoder(){};
virtual ~Decoder(){}; virtual ~Decoder(){};
virtual bool encode(std::string &decoded) = 0; virtual bool encode(std::string &decoded) = 0;
virtual bool decode(std::string &encoded) = 0; virtual bool decode(std::string &encoded) = 0;
//read fields of objectWithStream and initialize internal parameters //read fields of objectWithStream and initialize internal parameters
//of decoder //of decoder
virtual void initialize(Object * objectWithStram) = 0; virtual void initialize(Object * objectWithStram) = 0;
}; };
} }
#endif // DECODER_H #endif // DECODER_H

@ -1,36 +1,36 @@
#ifndef EXCEPTION_H #ifndef EXCEPTION_H
#define EXCEPTION_H #define EXCEPTION_H
#include <exception> #include <exception>
#include <string> #include <string>
#include <sstream> #include <sstream>
#include <iostream> #include <iostream>
namespace merge_lib namespace merge_lib
{ {
class Exception : public std::exception class Exception : public std::exception
{ {
public: public:
Exception() {} Exception() {}
Exception(const char * message) : _message(message) {} Exception(const char * message) : _message(message) {}
Exception(std::string & message) : _message(message) {} Exception(std::string & message) : _message(message) {}
Exception(std::stringstream & message) : _message(message.str()) {} Exception(std::stringstream & message) : _message(message.str()) {}
Exception(const std::string & message) : _message(message) {} Exception(const std::string & message) : _message(message) {}
virtual ~Exception() throw () {} virtual ~Exception() throw () {}
virtual const char * what() const throw() { return _message.c_str(); } virtual const char * what() const throw() { return _message.c_str(); }
void show() const {} void show() const {}
protected: protected:
std::string _message; std::string _message;
}; };
} }
#endif // EXCEPTION_HH #endif // EXCEPTION_HH

@ -1,19 +1,19 @@
#if !defined FileIsAbsentException_h #if !defined FileIsAbsentException_h
#define FileIsAbsentException_h #define FileIsAbsentException_h
#include <string> #include <string>
class FileIsAbsentException class FileIsAbsentException
{ {
public: public:
FileIsAbsentException(const char * fileName): FileIsAbsentException(const char * fileName):
_fileName(fileName){}; _fileName(fileName){};
const char * getFileName() const char * getFileName()
{ {
return _fileName.c_str(); return _fileName.c_str();
} }
private: private:
std::string _fileName; std::string _fileName;
}; };
#endif #endif

@ -1,264 +1,264 @@
#include "Config.h" #include "Config.h"
#include <iostream> #include <iostream>
#include <map> #include <map>
#include "FilterPredictor.h" #include "FilterPredictor.h"
#include "Utils.h" #include "Utils.h"
#include "Object.h" #include "Object.h"
#include "Parser.h" #include "Parser.h"
using namespace merge_lib; using namespace merge_lib;
const std::string FilterPredictor::PREDICTOR_TOKEN = "/Predictor"; const std::string FilterPredictor::PREDICTOR_TOKEN = "/Predictor";
const std::string FilterPredictor::DECODE_PARAM_TOKEN = "/DecodeParms"; const std::string FilterPredictor::DECODE_PARAM_TOKEN = "/DecodeParms";
const std::string COLUMNS_TOKEN = "/Columns"; const std::string COLUMNS_TOKEN = "/Columns";
const std::string COLORS_TOKEN = "/Colors"; const std::string COLORS_TOKEN = "/Colors";
const std::string BITS_TOKEN = "/BitsPerComponent"; const std::string BITS_TOKEN = "/BitsPerComponent";
const std::string EARLY_TOKEN = "/EarlyChange"; const std::string EARLY_TOKEN = "/EarlyChange";
const std::string DICT_START_TOKEN = "<<"; const std::string DICT_START_TOKEN = "<<";
const std::string DICT_END_TOKEN = ">>"; const std::string DICT_END_TOKEN = ">>";
FilterPredictor::FilterPredictor(): FilterPredictor::FilterPredictor():
_predictor(1), _predictor(1),
_colors(1), _colors(1),
_bits(8), _bits(8),
_columns(1), _columns(1),
_earlyChange(1) _earlyChange(1)
{ {
} }
FilterPredictor::~FilterPredictor() FilterPredictor::~FilterPredictor()
{ {
} }
std::string FilterPredictor::getDictionaryContentStr(std::string & in, size_t &pos ) std::string FilterPredictor::getDictionaryContentStr(std::string & in, size_t &pos )
{ {
size_t beg = in.find(DICT_START_TOKEN,pos); size_t beg = in.find(DICT_START_TOKEN,pos);
if( beg == std::string::npos ) if( beg == std::string::npos )
{ {
return ""; return "";
} }
beg += DICT_START_TOKEN.size(); beg += DICT_START_TOKEN.size();
size_t end = in.find(DICT_END_TOKEN,beg); size_t end = in.find(DICT_END_TOKEN,beg);
if( end == std::string::npos ) if( end == std::string::npos )
{ {
return ""; return "";
} }
return in.substr(beg,end-beg); return in.substr(beg,end-beg);
} }
void FilterPredictor::obtainDecodeParams(Object *objectWithStream, std::string &dictStr) void FilterPredictor::obtainDecodeParams(Object *objectWithStream, std::string &dictStr)
{ {
typedef std::map< std::string, int> DecodeParams; typedef std::map< std::string, int> DecodeParams;
DecodeParams params; DecodeParams params;
// set some initiaial values // set some initiaial values
params[PREDICTOR_TOKEN] = _predictor; params[PREDICTOR_TOKEN] = _predictor;
params[COLUMNS_TOKEN] = _columns; params[COLUMNS_TOKEN] = _columns;
params[COLORS_TOKEN] = _colors; params[COLORS_TOKEN] = _colors;
params[BITS_TOKEN] = _bits; params[BITS_TOKEN] = _bits;
params[EARLY_TOKEN] = _earlyChange; params[EARLY_TOKEN] = _earlyChange;
// lets parse the content of dictionary and set actual values into the map // lets parse the content of dictionary and set actual values into the map
DecodeParams::iterator it = params.begin(); DecodeParams::iterator it = params.begin();
for(; it != params.end();it++) for(; it != params.end();it++)
{ {
size_t pos = dictStr.find((*it).first); size_t pos = dictStr.find((*it).first);
if( pos != std::string::npos ) if( pos != std::string::npos )
{ {
pos += (*it).first.size(); pos += (*it).first.size();
// we assume the following pattern "/Colors 8" // we assume the following pattern "/Colors 8"
std::string numstr = objectWithStream->getNameSimpleValue(dictStr,(*it).first); std::string numstr = objectWithStream->getNameSimpleValue(dictStr,(*it).first);
if( numstr.empty() ) if( numstr.empty() )
{ {
std::cerr<<"Wrong value of "<<(*it).first<<"defined as "<<dictStr<<"\n"; std::cerr<<"Wrong value of "<<(*it).first<<"defined as "<<dictStr<<"\n";
} }
int number = Utils::stringToInt(numstr); int number = Utils::stringToInt(numstr);
//trace("Object number with length = %d",number); //trace("Object number with length = %d",number);
params[(*it).first] = number; params[(*it).first] = number;
} }
} }
// refresh the values after reading // refresh the values after reading
_predictor = params[PREDICTOR_TOKEN]; _predictor = params[PREDICTOR_TOKEN];
_columns = params[COLUMNS_TOKEN]; _columns = params[COLUMNS_TOKEN];
_colors = params[COLORS_TOKEN]; _colors = params[COLORS_TOKEN];
_bits = params[BITS_TOKEN]; _bits = params[BITS_TOKEN];
_earlyChange = params[EARLY_TOKEN]; _earlyChange = params[EARLY_TOKEN];
} }
void FilterPredictor::initialize(Object *objectWithStream) void FilterPredictor::initialize(Object *objectWithStream)
{ {
if( objectWithStream ) if( objectWithStream )
{ {
std::string content; std::string content;
objectWithStream->getHeader(content); objectWithStream->getHeader(content);
// we need to parse the header of file to obtain the decoder parameter // we need to parse the header of file to obtain the decoder parameter
size_t position = content.find(DECODE_PARAM_TOKEN); size_t position = content.find(DECODE_PARAM_TOKEN);
if( position != std::string::npos) if( position != std::string::npos)
{ {
position += DECODE_PARAM_TOKEN.size(); position += DECODE_PARAM_TOKEN.size();
std::string dictStr = getDictionaryContentStr(content,position); std::string dictStr = getDictionaryContentStr(content,position);
// trace_hex(dictStr.data(),dictStr.size()); // trace_hex(dictStr.data(),dictStr.size());
obtainDecodeParams(objectWithStream,dictStr); obtainDecodeParams(objectWithStream,dictStr);
} }
} }
} }
//----------------------------- //-----------------------------
// Function perorms decoding of one row of data. // Function perorms decoding of one row of data.
//----------------------------- //-----------------------------
bool FilterPredictor::decodeRow(const char *in,std::string & out,const std::string &prev,int curPrediction) bool FilterPredictor::decodeRow(const char *in,std::string & out,const std::string &prev,int curPrediction)
{ {
std::string dec(_bytesPerPixel,'\0'); std::string dec(_bytesPerPixel,'\0');
dec.append(in,_rowLen); // the buffer to decode dec.append(in,_rowLen); // the buffer to decode
int start = _bytesPerPixel; int start = _bytesPerPixel;
int end = _bytesPerPixel + _rowLen; int end = _bytesPerPixel + _rowLen;
switch(curPrediction) switch(curPrediction)
{ {
case 2: // TIFF predictor case 2: // TIFF predictor
// to do, implement TIFF predictor // to do, implement TIFF predictor
std::cerr<<"TIFF predictor not yet implemented!\n"; std::cerr<<"TIFF predictor not yet implemented!\n";
return false; return false;
break; break;
case 1: case 1:
case 10: // PNG NONE prediction case 10: // PNG NONE prediction
// nothing to do, take as is // nothing to do, take as is
break; break;
case 11: // PNG SUB on all raws case 11: // PNG SUB on all raws
for(int i = start;i<end;i++) for(int i = start;i<end;i++)
{ {
dec[i] += dec[ i - _bytesPerPixel ]; dec[i] += dec[ i - _bytesPerPixel ];
} }
break; break;
case 12: // PNG UP on all raws case 12: // PNG UP on all raws
for(int i = start;i<end;i++) for(int i = start;i<end;i++)
{ {
dec[i] += prev[i]; dec[i] += prev[i];
} }
break; break;
case 13: // PNG average on all raws case 13: // PNG average on all raws
//Average(x) + floor((Raw(x-bpp)+Prior(x))/2) //Average(x) + floor((Raw(x-bpp)+Prior(x))/2)
for(int i = start;i<end;i++) for(int i = start;i<end;i++)
{ {
int leftV = int(dec[i - _bytesPerPixel])&0xFF; int leftV = int(dec[i - _bytesPerPixel])&0xFF;
int aboveV = int(prev[i - _bytesPerPixel])&0xFF; int aboveV = int(prev[i - _bytesPerPixel])&0xFF;
unsigned char average = (unsigned char)( (((leftV+aboveV)>>1)&0xFF)); unsigned char average = (unsigned char)( (((leftV+aboveV)>>1)&0xFF));
dec[i] += average; dec[i] += average;
} }
break; break;
case 14: //PNG PAETH on all rows case 14: //PNG PAETH on all rows
/*function PaethPredictor (a, b, c) /*function PaethPredictor (a, b, c)
; a = left, b = above, c = upper left ; a = left, b = above, c = upper left
p := a + b - c ; initial estimate p := a + b - c ; initial estimate
pa := abs(p - a) ; distances to a, b, c pa := abs(p - a) ; distances to a, b, c
pb := abs(p - b) pb := abs(p - b)
pc := abs(p - c) pc := abs(p - c)
; return nearest of a,b,c, ; return nearest of a,b,c,
; breaking ties in order a,b,c. ; breaking ties in order a,b,c.
if pa <= pb AND pa <= pc then return a if pa <= pb AND pa <= pc then return a
else if pb <= pc then return b else if pb <= pc then return b
else return c else return c
Paeth(x) + PaethPredictor(Raw(x-bpp), Prior(x), Prior(x-bpp)) Paeth(x) + PaethPredictor(Raw(x-bpp), Prior(x), Prior(x-bpp))
*/ */
for(int i = start;i<end;i++) for(int i = start;i<end;i++)
{ {
int left = int( dec[i - _bytesPerPixel]) & 0xFF; int left = int( dec[i - _bytesPerPixel]) & 0xFF;
int upperLeft = int( prev[i - _bytesPerPixel]) & 0xFF; int upperLeft = int( prev[i - _bytesPerPixel]) & 0xFF;
int above = int( prev[i]) & 0xFF; int above = int( prev[i]) & 0xFF;
int p = left + above - upperLeft; int p = left + above - upperLeft;
int pLeft = abs(p - left); int pLeft = abs(p - left);
int pAbove = abs(p - above); int pAbove = abs(p - above);
int pUpperLeft = abs(p - upperLeft); int pUpperLeft = abs(p - upperLeft);
int paeth = 0; int paeth = 0;
if( pLeft <= pAbove && pLeft <=pUpperLeft ) if( pLeft <= pAbove && pLeft <=pUpperLeft )
{ {
paeth = left; paeth = left;
} }
else if( pAbove <= pUpperLeft ) else if( pAbove <= pUpperLeft )
{ {
paeth = above; paeth = above;
} }
else else
{ {
paeth = upperLeft; paeth = upperLeft;
} }
dec[i] += char (paeth & 0xFF); dec[i] += char (paeth & 0xFF);
} }
break; break;
default: default:
break; break;
} }
out = dec; out = dec;
return true; return true;
} }
// method performs prediction decoding // method performs prediction decoding
bool FilterPredictor::decode(std::string &content) bool FilterPredictor::decode(std::string &content)
{ {
bool isPNG = _predictor >= 10?true:false; bool isPNG = _predictor >= 10?true:false;
int rowBits = _columns*_colors*_bits; int rowBits = _columns*_colors*_bits;
_rowLen = (rowBits>>3) + (rowBits&7); _rowLen = (rowBits>>3) + (rowBits&7);
_bytesPerPixel = (_colors * _bits + 7) >> 3; _bytesPerPixel = (_colors * _bits + 7) >> 3;
int rows = 0; int rows = 0;
if( isPNG ) if( isPNG )
{ {
rows = content.size()/(_rowLen+1) + (content.size()% (_rowLen+1)); rows = content.size()/(_rowLen+1) + (content.size()% (_rowLen+1));
} }
else else
{ {
rows = content.size()/(_rowLen) + (content.size()% (_rowLen) ); rows = content.size()/(_rowLen) + (content.size()% (_rowLen) );
} }
int inSize = content.size(); int inSize = content.size();
std::string out = ""; std::string out = "";
if( inSize%(isPNG?_rowLen+1:_rowLen) != 0 ) if( inSize%(isPNG?_rowLen+1:_rowLen) != 0 )
{ {
std::cerr<<"Warning : wrong PNG identation inSize "<<inSize<<" rowLen = "<<_rowLen<<" isPNG = "<<isPNG<<"\n"; std::cerr<<"Warning : wrong PNG identation inSize "<<inSize<<" rowLen = "<<_rowLen<<" isPNG = "<<isPNG<<"\n";
content = out; content = out;
return false; return false;
} }
const char *curRow = NULL; const char *curRow = NULL;
std::string prev(_bytesPerPixel+_rowLen,'\0'); //"previous" line std::string prev(_bytesPerPixel+_rowLen,'\0'); //"previous" line
int curPredictor = 1; int curPredictor = 1;
for(int i = 0;i<rows;i++) for(int i = 0;i<rows;i++)
{ {
curRow = content.data() + (i* (_rowLen + (isPNG?1:0)) ); curRow = content.data() + (i* (_rowLen + (isPNG?1:0)) );
if( isPNG ) if( isPNG )
{ {
// this is PNG predictor! // this is PNG predictor!
curPredictor = *curRow++; curPredictor = *curRow++;
curPredictor +=10; curPredictor +=10;
} }
else else
{ {
curPredictor = _predictor; // default NONE predictor curPredictor = _predictor; // default NONE predictor
} }
std::string dec; std::string dec;
if( !decodeRow(curRow,dec,prev,curPredictor) ) if( !decodeRow(curRow,dec,prev,curPredictor) )
{ {
std::cerr<<"Unable to process prediction"<<curPredictor<<"!\n"; std::cerr<<"Unable to process prediction"<<curPredictor<<"!\n";
content = out; content = out;
return false; return false;
} }
//trace_hex(dec.data()+_bytesPerPixel,_rowLen); //trace_hex(dec.data()+_bytesPerPixel,_rowLen);
prev = dec; prev = dec;
out += std::string(dec.data()+_bytesPerPixel,_rowLen); out += std::string(dec.data()+_bytesPerPixel,_rowLen);
} }
content = out; content = out;
return true; return true;
} }

@ -1,44 +1,44 @@
#ifndef FILTER_PREDICTOR_H #ifndef FILTER_PREDICTOR_H
#define FILTER_PREDICTOR_H #define FILTER_PREDICTOR_H
#include <string> #include <string>
#include "Decoder.h" #include "Decoder.h"
namespace merge_lib namespace merge_lib
{ {
// this method performs filter prediction processing. // this method performs filter prediction processing.
class FilterPredictor:public Decoder class FilterPredictor:public Decoder
{ {
public: public:
FilterPredictor(); FilterPredictor();
virtual ~FilterPredictor(); virtual ~FilterPredictor();
bool encode(std::string & decoded){return false;} bool encode(std::string & decoded){return false;}
bool decode(std::string & encoded); bool decode(std::string & encoded);
void initialize(Object * objectWithStream); void initialize(Object * objectWithStream);
static const std::string PREDICTOR_TOKEN; static const std::string PREDICTOR_TOKEN;
static const std::string DECODE_PARAM_TOKEN; static const std::string DECODE_PARAM_TOKEN;
int getEarlyChange() const { return _earlyChange;} int getEarlyChange() const { return _earlyChange;}
private: private:
bool decodeRow(const char *input, std::string &out,const std::string &prev,int curPrediction); bool decodeRow(const char *input, std::string &out,const std::string &prev,int curPrediction);
void obtainDecodeParams(Object*objectWithStream,std::string &dictStr); void obtainDecodeParams(Object*objectWithStream,std::string &dictStr);
std::string getDictionaryContentStr(std::string & in, size_t &pos ); std::string getDictionaryContentStr(std::string & in, size_t &pos );
int _predictor; int _predictor;
int _colors; int _colors;
int _bits; int _bits;
int _columns; int _columns;
int _earlyChange; int _earlyChange;
int _rowLen; int _rowLen;
int _bytesPerPixel; int _bytesPerPixel;
}; };
} }
#endif #endif

@ -1,202 +1,202 @@
#include <iostream> #include <iostream>
#include "FlateDecode.h" #include "FlateDecode.h"
#include "zlib.h" #include "zlib.h"
#include "Utils.h" #include "Utils.h"
#include <string.h> #include <string.h>
using namespace merge_lib; using namespace merge_lib;
#define ZLIB_MEM_DELTA 65535 #define ZLIB_MEM_DELTA 65535
#define ZLIB_CHECK_ERR(err,msg) \ #define ZLIB_CHECK_ERR(err,msg) \
if( err != Z_OK) {\ if( err != Z_OK) {\
std::cout<<msg<<" ZLIB error:"<<err<<std::endl; \ std::cout<<msg<<" ZLIB error:"<<err<<std::endl; \
}\ }\
FlateDecode::FlateDecode():_predict(NULL) FlateDecode::FlateDecode():_predict(NULL)
{ {
} }
FlateDecode::~FlateDecode() FlateDecode::~FlateDecode()
{ {
if( _predict ) if( _predict )
{ {
delete _predict; delete _predict;
} }
} }
void FlateDecode::initialize(Object * objectWithStream) void FlateDecode::initialize(Object * objectWithStream)
{ {
if( objectWithStream ) if( objectWithStream )
{ {
std::string head; std::string head;
objectWithStream->getHeader(head); objectWithStream->getHeader(head);
if( head.find(FilterPredictor::DECODE_PARAM_TOKEN) != std::string::npos ) if( head.find(FilterPredictor::DECODE_PARAM_TOKEN) != std::string::npos )
{ {
_predict = new FilterPredictor(); _predict = new FilterPredictor();
_predict->initialize(objectWithStream); _predict->initialize(objectWithStream);
} }
} }
} }
/** @brief encode /** @brief encode
* *
* @todo: * @todo:
document this function document this function
*/ */
bool FlateDecode::encode(std::string &decoded) bool FlateDecode::encode(std::string &decoded)
{ {
z_stream stream; z_stream stream;
stream.zalloc = (alloc_func)0; stream.zalloc = (alloc_func)0;
stream.zfree = (free_func)0; stream.zfree = (free_func)0;
stream.opaque = (voidpf)0; stream.opaque = (voidpf)0;
size_t out_len = 0; size_t out_len = 0;
unsigned char *out_p = NULL; unsigned char *out_p = NULL;
stream.next_out = out_p; stream.next_out = out_p;
stream.avail_out = (uInt)out_len; stream.avail_out = (uInt)out_len;
stream.next_in = (unsigned char*)decoded.c_str(); stream.next_in = (unsigned char*)decoded.c_str();
stream.avail_in = (uInt)decoded.size(); stream.avail_in = (uInt)decoded.size();
int err = deflateInit(&stream, Z_DEFAULT_COMPRESSION); int err = deflateInit(&stream, Z_DEFAULT_COMPRESSION);
ZLIB_CHECK_ERR(err, "deflateInit"); ZLIB_CHECK_ERR(err, "deflateInit");
if ( err != Z_OK ) if ( err != Z_OK )
{ {
return false; return false;
} }
bool toContinue = false; bool toContinue = false;
int flush = Z_NO_FLUSH; int flush = Z_NO_FLUSH;
do do
{ {
toContinue = false; toContinue = false;
flush = (stream.avail_in == 0)?Z_FINISH:Z_NO_FLUSH; flush = (stream.avail_in == 0)?Z_FINISH:Z_NO_FLUSH;
if ( !stream.avail_out ) if ( !stream.avail_out )
{ {
// increase the space // increase the space
out_p = (unsigned char*)realloc(out_p,out_len + ZLIB_MEM_DELTA); out_p = (unsigned char*)realloc(out_p,out_len + ZLIB_MEM_DELTA);
// init new memory // init new memory
unsigned char *new_out_start = out_p + out_len; unsigned char *new_out_start = out_p + out_len;
memset(new_out_start,0,ZLIB_MEM_DELTA); memset(new_out_start,0,ZLIB_MEM_DELTA);
// Point next_out to the next unused byte // Point next_out to the next unused byte
stream.next_out = new_out_start; stream.next_out = new_out_start;
// Update the size of the buffer // Update the size of the buffer
stream.avail_out = (uInt)ZLIB_MEM_DELTA; stream.avail_out = (uInt)ZLIB_MEM_DELTA;
out_len += ZLIB_MEM_DELTA; out_len += ZLIB_MEM_DELTA;
} }
err = deflate(&stream,flush); err = deflate(&stream,flush);
if ( err == Z_OK && stream.avail_out == 0 ) if ( err == Z_OK && stream.avail_out == 0 )
{ {
toContinue = true; toContinue = true;
} }
} }
while ( toContinue || flush == Z_NO_FLUSH ); while ( toContinue || flush == Z_NO_FLUSH );
err = deflateEnd(&stream); err = deflateEnd(&stream);
ZLIB_CHECK_ERR(err, "deflateEnd"); ZLIB_CHECK_ERR(err, "deflateEnd");
if( err != Z_OK ) if( err != Z_OK )
{ {
free(out_p); free(out_p);
return false; return false;
} }
decoded = std::string((char*)out_p,stream.total_out); decoded = std::string((char*)out_p,stream.total_out);
free(out_p); free(out_p);
return true; return true;
} }
/** @brief decode /** @brief decode
* *
* @todo: document this function * @todo: document this function
*/ */
bool FlateDecode::decode(std::string & encoded) bool FlateDecode::decode(std::string & encoded)
{ {
z_stream stream; z_stream stream;
//some initialization of ZLIB stuff //some initialization of ZLIB stuff
stream.zalloc = (alloc_func)0; stream.zalloc = (alloc_func)0;
stream.zfree = (free_func)0; stream.zfree = (free_func)0;
stream.opaque = (voidpf)0; stream.opaque = (voidpf)0;
//trace_hex((char*)encoded.c_str(),encoded.size()); //trace_hex((char*)encoded.c_str(),encoded.size());
stream.next_in = (unsigned char*)encoded.c_str(); stream.next_in = (unsigned char*)encoded.c_str();
stream.avail_in = (uInt)encoded.size(); stream.avail_in = (uInt)encoded.size();
int err = inflateInit(&stream); int err = inflateInit(&stream);
ZLIB_CHECK_ERR(err,"InflateInit"); ZLIB_CHECK_ERR(err,"InflateInit");
if ( err != Z_OK ) if ( err != Z_OK )
{ {
return false; return false;
} }
unsigned char *out_p = NULL; unsigned char *out_p = NULL;
int out_len = 0; int out_len = 0;
stream.next_out = out_p; stream.next_out = out_p;
stream.avail_out = out_len; stream.avail_out = out_len;
for (;;) for (;;)
{ {
if ( !stream.avail_out) if ( !stream.avail_out)
{ {
// there is no more space for deallocation - increase the space // there is no more space for deallocation - increase the space
out_p = (unsigned char*)realloc(out_p,out_len + ZLIB_MEM_DELTA); out_p = (unsigned char*)realloc(out_p,out_len + ZLIB_MEM_DELTA);
// init new memory // init new memory
unsigned char *new_out_start = out_p + out_len; unsigned char *new_out_start = out_p + out_len;
memset(new_out_start,0,ZLIB_MEM_DELTA); memset(new_out_start,0,ZLIB_MEM_DELTA);
// Point next_out to the next unused byte // Point next_out to the next unused byte
stream.next_out = new_out_start; stream.next_out = new_out_start;
// Update the size of the uncompressed buffer // Update the size of the uncompressed buffer
stream.avail_out = (uInt)ZLIB_MEM_DELTA; stream.avail_out = (uInt)ZLIB_MEM_DELTA;
out_len += ZLIB_MEM_DELTA; out_len += ZLIB_MEM_DELTA;
} }
err = inflate(&stream,Z_NO_FLUSH); err = inflate(&stream,Z_NO_FLUSH);
if ( err == Z_STREAM_END) if ( err == Z_STREAM_END)
{ {
break; break;
} }
ZLIB_CHECK_ERR(err,"Deflate"); ZLIB_CHECK_ERR(err,"Deflate");
if ( err != Z_OK ) if ( err != Z_OK )
{ {
if( out_p ) if( out_p )
{ {
free(out_p); free(out_p);
} }
return false; return false;
} }
} }
err = inflateEnd(&stream); err = inflateEnd(&stream);
ZLIB_CHECK_ERR(err,"InflateEnd"); ZLIB_CHECK_ERR(err,"InflateEnd");
if( err != Z_OK ) if( err != Z_OK )
{ {
if( out_p ) if( out_p )
{ {
free(out_p); free(out_p);
} }
return false; return false;
} }
encoded = std::string((char*)out_p,stream.total_out); encoded = std::string((char*)out_p,stream.total_out);
free(out_p); free(out_p);
// trace_hex((char*)encoded.c_str(),encoded.size()); // trace_hex((char*)encoded.c_str(),encoded.size());
// if predictor exists for that object, then lets decode it // if predictor exists for that object, then lets decode it
if( _predict ) if( _predict )
{ {
_predict->decode(encoded); _predict->decode(encoded);
} }
return true; return true;
} }

@ -1,26 +1,26 @@
#ifndef FLATEDECODE_H_INCLUDED #ifndef FLATEDECODE_H_INCLUDED
#define FLATEDECODE_H_INCLUDED #define FLATEDECODE_H_INCLUDED
#include "Decoder.h" #include "Decoder.h"
#include <string> #include <string>
#include "Decoder.h" #include "Decoder.h"
#include "FilterPredictor.h" #include "FilterPredictor.h"
namespace merge_lib namespace merge_lib
{ {
// this class provides method for FlateDecode encoding and decoding // this class provides method for FlateDecode encoding and decoding
class FlateDecode : public Decoder class FlateDecode : public Decoder
{ {
public: public:
FlateDecode(); FlateDecode();
virtual ~FlateDecode(); virtual ~FlateDecode();
bool encode(std::string & decoded); bool encode(std::string & decoded);
bool decode(std::string & encoded); bool decode(std::string & encoded);
void initialize(Object * objectWithStream); void initialize(Object * objectWithStream);
private: private:
FilterPredictor *_predict; FilterPredictor *_predict;
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED

@ -1,22 +1,22 @@
#ifndef JBIG2Decode_H #ifndef JBIG2Decode_H
#define JBIG2Decode_H #define JBIG2Decode_H
#include <string> #include <string>
namespace merge_lib namespace merge_lib
{ {
// this class provides method for FlateDecode encoding and decoding // this class provides method for FlateDecode encoding and decoding
class JBIG2Decode : public Decoder class JBIG2Decode : public Decoder
{ {
public: public:
JBIG2Decode(){}; JBIG2Decode(){};
virtual ~JBIG2Decode(){}; virtual ~JBIG2Decode(){};
bool encode(std::string & decoded) {return true;}; bool encode(std::string & decoded) {return true;};
bool decode(std::string & encoded) {return true;}; bool decode(std::string & encoded) {return true;};
void initialize(Object * objectWithStram){}; void initialize(Object * objectWithStram){};
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED

@ -1,193 +1,193 @@
#include <iostream> #include <iostream>
#include "LZWDecode.h" #include "LZWDecode.h"
#include "FilterPredictor.h" #include "FilterPredictor.h"
// method performs decoding // method performs decoding
using namespace merge_lib; using namespace merge_lib;
LZWDecode::LZWDecode(): LZWDecode::LZWDecode():
_predict(NULL), _predict(NULL),
_dummy(""), _dummy(""),
_encoded(_dummy), _encoded(_dummy),
_curSymbolIndex(0), _curSymbolIndex(0),
_earlyChange(1), _earlyChange(1),
_readBuf(0), _readBuf(0),
_readBits(0), _readBits(0),
_nextCode(0), _nextCode(0),
_bitsToRead(0), _bitsToRead(0),
_curSequenceLength(0), _curSequenceLength(0),
_first(true) _first(true)
{ {
clearTable(); clearTable();
} }
LZWDecode::~LZWDecode() LZWDecode::~LZWDecode()
{ {
if( _predict ) if( _predict )
{ {
delete _predict; delete _predict;
} }
} }
void LZWDecode::initialize(Object * objectWithStream) void LZWDecode::initialize(Object * objectWithStream)
{ {
if( objectWithStream ) if( objectWithStream )
{ {
std::string head; std::string head;
objectWithStream->getHeader(head); objectWithStream->getHeader(head);
if( head.find(FilterPredictor::DECODE_PARAM_TOKEN) != std::string::npos ) if( head.find(FilterPredictor::DECODE_PARAM_TOKEN) != std::string::npos )
{ {
_predict = new FilterPredictor(); _predict = new FilterPredictor();
_predict->initialize(objectWithStream); _predict->initialize(objectWithStream);
_earlyChange = _predict->getEarlyChange(); _earlyChange = _predict->getEarlyChange();
} }
_readBits = 0; _readBits = 0;
_readBuf = 0; _readBuf = 0;
clearTable(); clearTable();
} }
} }
void LZWDecode::clearTable() void LZWDecode::clearTable()
{ {
_nextCode = 258; _nextCode = 258;
_bitsToRead = 9; _bitsToRead = 9;
_curSequenceLength = 0; _curSequenceLength = 0;
_first = true; _first = true;
} }
int LZWDecode::getCode() int LZWDecode::getCode()
{ {
int c = 0; int c = 0;
int code = 0; int code = 0;
while (_readBits < _bitsToRead) while (_readBits < _bitsToRead)
{ {
if( _curSymbolIndex < _encoded.size() ) if( _curSymbolIndex < _encoded.size() )
{ {
c = _encoded[_curSymbolIndex++]; c = _encoded[_curSymbolIndex++];
} }
else else
{ {
return EOF; return EOF;
} }
_readBuf = (_readBuf << 8) | (c & 0xff); _readBuf = (_readBuf << 8) | (c & 0xff);
_readBits += 8; _readBits += 8;
} }
code = (_readBuf >> (_readBits - _bitsToRead)) & ((1 << _bitsToRead) - 1); code = (_readBuf >> (_readBits - _bitsToRead)) & ((1 << _bitsToRead) - 1);
_readBits -= _bitsToRead; _readBits -= _bitsToRead;
return code; return code;
} }
// Method performs LZW decoding // Method performs LZW decoding
bool LZWDecode::decode(std::string & encoded) bool LZWDecode::decode(std::string & encoded)
{ {
_curSymbolIndex = 0; _curSymbolIndex = 0;
_encoded = encoded; _encoded = encoded;
// LZW decoding // LZW decoding
std::string decoded; std::string decoded;
struct DecodingTable struct DecodingTable
{ {
int length; int length;
int head; int head;
unsigned tail; unsigned tail;
} decTable[4097]; } decTable[4097];
int prevCode = 0; int prevCode = 0;
int newChar = 0; int newChar = 0;
unsigned curSequence[4097]; unsigned curSequence[4097];
int nextLength = 0; int nextLength = 0;
clearTable(); clearTable();
while(1) while(1)
{ {
int code = getCode(); int code = getCode();
if( code == EOF || code == 257 ) if( code == EOF || code == 257 )
{ {
// finish // finish
break; break;
} }
if( code == 256 ) if( code == 256 )
{ {
clearTable(); clearTable();
continue; continue;
} }
if( _nextCode >= 4997 ) if( _nextCode >= 4997 )
{ {
std::cout<<"Bad LZW stream - unexpected clearTable\n"; std::cout<<"Bad LZW stream - unexpected clearTable\n";
clearTable(); clearTable();
continue; continue;
} }
nextLength = _curSequenceLength + 1; nextLength = _curSequenceLength + 1;
if( code < 256 ) if( code < 256 )
{ {
curSequence[ 0 ] = code; curSequence[ 0 ] = code;
_curSequenceLength = 1; _curSequenceLength = 1;
} }
else if( code < _nextCode ) else if( code < _nextCode )
{ {
//lets take sequence from table //lets take sequence from table
_curSequenceLength = decTable[code].length; _curSequenceLength = decTable[code].length;
int j = code; int j = code;
for( int i = _curSequenceLength - 1; i > 0; i--) for( int i = _curSequenceLength - 1; i > 0; i--)
{ {
curSequence[ i ] = decTable[j].tail; curSequence[ i ] = decTable[j].tail;
j = decTable[ j ].head; j = decTable[ j ].head;
} }
curSequence[0] = j; curSequence[0] = j;
} }
else if( code == _nextCode ) else if( code == _nextCode )
{ {
curSequence[ _curSequenceLength ] = newChar; curSequence[ _curSequenceLength ] = newChar;
++_curSequenceLength; ++_curSequenceLength;
} }
else else
{ {
std::cout<<"Bad LZW stream - unexpected code "<<code<<"\n"; std::cout<<"Bad LZW stream - unexpected code "<<code<<"\n";
break; break;
} }
newChar = curSequence[0]; newChar = curSequence[0];
if( _first ) if( _first )
{ {
_first = false; _first = false;
} }
else else
{ {
// lets build decoding table // lets build decoding table
decTable[ _nextCode ].length = nextLength; decTable[ _nextCode ].length = nextLength;
decTable[ _nextCode ].head = prevCode; decTable[ _nextCode ].head = prevCode;
decTable[ _nextCode ].tail = newChar; decTable[ _nextCode ].tail = newChar;
++ _nextCode; ++ _nextCode;
// processing of PDF LZW parameter // processing of PDF LZW parameter
if (_nextCode + _earlyChange == 512) if (_nextCode + _earlyChange == 512)
{ {
_bitsToRead = 10; _bitsToRead = 10;
} }
else if (_nextCode + _earlyChange == 1024) else if (_nextCode + _earlyChange == 1024)
{ {
_bitsToRead = 11; _bitsToRead = 11;
} }
else if (_nextCode + _earlyChange == 2048) else if (_nextCode + _earlyChange == 2048)
{ {
_bitsToRead = 12; _bitsToRead = 12;
} }
} }
prevCode = code; prevCode = code;
// put current sequence to output stream // put current sequence to output stream
for(int i = 0;i < _curSequenceLength;i++) for(int i = 0;i < _curSequenceLength;i++)
{ {
decoded += (char)curSequence[ i ]; decoded += (char)curSequence[ i ];
} }
} }
encoded = decoded; encoded = decoded;
// if predictor exists for that object, then lets decode it // if predictor exists for that object, then lets decode it
if( _predict ) if( _predict )
{ {
_predict->decode(encoded); _predict->decode(encoded);
} }
return true; return true;
} }

@ -1,41 +1,41 @@
#ifndef LZWDecode_H #ifndef LZWDecode_H
#define LZWDecode_H #define LZWDecode_H
#include <string> #include <string>
#include "Decoder.h" #include "Decoder.h"
#include "FilterPredictor.h" #include "FilterPredictor.h"
namespace merge_lib namespace merge_lib
{ {
// this class provides method for FlateDecode encoding and decoding // this class provides method for FlateDecode encoding and decoding
class LZWDecode : public Decoder class LZWDecode : public Decoder
{ {
public: public:
LZWDecode(); LZWDecode();
virtual ~LZWDecode(); virtual ~LZWDecode();
bool encode(std::string & decoded) {return true;}; bool encode(std::string & decoded) {return true;};
bool decode(std::string & encoded); bool decode(std::string & encoded);
void initialize(Object * objectWithStram); void initialize(Object * objectWithStram);
private: private:
FilterPredictor *_predict; FilterPredictor *_predict;
void clearTable(); void clearTable();
int getCode(); int getCode();
std::string &_encoded; std::string &_encoded;
std::string _dummy; std::string _dummy;
size_t _curSymbolIndex; size_t _curSymbolIndex;
int _earlyChange; // early parameter int _earlyChange; // early parameter
int _readBuf; int _readBuf;
int _readBits; int _readBits;
int _nextCode; int _nextCode;
int _bitsToRead; int _bitsToRead;
bool _first; bool _first;
int _curSequenceLength; int _curSequenceLength;
}; };
} }
#endif // LZW_DECODE_H_INCLUDED #endif // LZW_DECODE_H_INCLUDED

@ -1,52 +1,52 @@
#if !defined MediaBoxElementHandler_h #if !defined MediaBoxElementHandler_h
#define MediaBoxElementHandler_h #define MediaBoxElementHandler_h
#include "AbstractBoxElementHandler.h" #include "AbstractBoxElementHandler.h"
#include "RemoveHimSelfHandler.h" #include "RemoveHimSelfHandler.h"
#include <memory> #include <memory>
namespace merge_lib namespace merge_lib
{ {
//class for processing MediaBox field of Page object //class for processing MediaBox field of Page object
class MediaBoxElementHandler: public AbstractBoxElementHandler class MediaBoxElementHandler: public AbstractBoxElementHandler
{ {
public: public:
MediaBoxElementHandler(Object * page): AbstractBoxElementHandler(page) MediaBoxElementHandler(Object * page): AbstractBoxElementHandler(page)
{ {
_setHandlerName("/MediaBox"); _setHandlerName("/MediaBox");
} }
virtual ~MediaBoxElementHandler() virtual ~MediaBoxElementHandler()
{ {
} }
private: private:
//replace MediaBox with BBox //replace MediaBox with BBox
virtual void _changeObjectContent(unsigned int startOfPageElement) virtual void _changeObjectContent(unsigned int startOfPageElement)
{ {
if(_wasCropBoxHandlerCalled()) if(_wasCropBoxHandlerCalled())
{ {
PageElementHandler * tempNextHandler = _nextHandler; PageElementHandler * tempNextHandler = _nextHandler;
_nextHandler = new RemoveHimselfHandler(_page, _handlerName); _nextHandler = new RemoveHimselfHandler(_page, _handlerName);
_nextHandler->addNextHandler(tempNextHandler); _nextHandler->addNextHandler(tempNextHandler);
return; return;
} }
_page->eraseContent(startOfPageElement, _handlerName.size()); _page->eraseContent(startOfPageElement, _handlerName.size());
static std::string bbox("/BBox"); static std::string bbox("/BBox");
static std::string matrix("/Matrix [ 1 0 0 1 0 0 ]\n"); static std::string matrix("/Matrix [ 1 0 0 1 0 0 ]\n");
_page->insertToContent(startOfPageElement, bbox); _page->insertToContent(startOfPageElement, bbox);
_page->insertToContent(startOfPageElement, matrix); _page->insertToContent(startOfPageElement, matrix);
} }
void _pageElementNotFound() void _pageElementNotFound()
{ {
if(_wasCropBoxHandlerCalled()) if(_wasCropBoxHandlerCalled())
return; return;
_retrieveBoxFromParent(); _retrieveBoxFromParent();
} }
bool _wasCropBoxHandlerCalled() bool _wasCropBoxHandlerCalled()
{ {
return (_page->getObjectContent().find("/BBox") != std::string::npos) ? true : false; return (_page->getObjectContent().find("/BBox") != std::string::npos) ? true : false;
} }
}; };
} }
#endif #endif

@ -1,97 +1,97 @@
#if !defined MergePageDescription_h #if !defined MergePageDescription_h
#define MergePageDescription_h #define MergePageDescription_h
#include "Transformation.h" #include "Transformation.h"
#include <map> #include <map>
#include <string> #include <string>
namespace merge_lib namespace merge_lib
{ {
struct MergePageDescription struct MergePageDescription
{ {
//members: //members:
double outPageWidth; // output page width double outPageWidth; // output page width
double outPageHeight; // output page height double outPageHeight; // output page height
unsigned int basePageNumber; unsigned int basePageNumber;
std::string baseDocumentName; std::string baseDocumentName;
TransformationDescription basePageTransformation; TransformationDescription basePageTransformation;
unsigned int overlayPageNumber; unsigned int overlayPageNumber;
TransformationDescription overlayPageTransformation; TransformationDescription overlayPageTransformation;
bool skipOverlayPage; bool skipOverlayPage;
bool skipBasePage; bool skipBasePage;
//methods: //methods:
//constructor //constructor
MergePageDescription(double outputPageWidth, MergePageDescription(double outputPageWidth,
double outputPageHeight, double outputPageHeight,
unsigned int basePageNum, unsigned int basePageNum,
const char * baseDocName, const char * baseDocName,
const TransformationDescription & baseTrans, const TransformationDescription & baseTrans,
unsigned int overlayPageNum, unsigned int overlayPageNum,
const TransformationDescription & overlayTrans, const TransformationDescription & overlayTrans,
bool omitOverlayPage = false, bool omitOverlayPage = false,
bool omitBasePage = false bool omitBasePage = false
): ):
outPageWidth(outputPageWidth), outPageWidth(outputPageWidth),
outPageHeight(outputPageHeight), outPageHeight(outputPageHeight),
basePageNumber(basePageNum), basePageNumber(basePageNum),
baseDocumentName(baseDocName), baseDocumentName(baseDocName),
basePageTransformation(baseTrans), basePageTransformation(baseTrans),
overlayPageNumber(overlayPageNum), overlayPageNumber(overlayPageNum),
overlayPageTransformation(overlayTrans), overlayPageTransformation(overlayTrans),
skipOverlayPage(omitOverlayPage), skipOverlayPage(omitOverlayPage),
skipBasePage(omitBasePage) skipBasePage(omitBasePage)
{ {
} }
MergePageDescription(double outputPageWidth, MergePageDescription(double outputPageWidth,
double outputPageHeight, double outputPageHeight,
unsigned int basePageNum, unsigned int basePageNum,
const char * baseDocName, const char * baseDocName,
const TransformationDescription & baseTrans const TransformationDescription & baseTrans
): ):
outPageWidth(outputPageWidth), outPageWidth(outputPageWidth),
outPageHeight(outputPageHeight), outPageHeight(outputPageHeight),
basePageNumber(basePageNum), basePageNumber(basePageNum),
baseDocumentName(baseDocName), baseDocumentName(baseDocName),
basePageTransformation(baseTrans), basePageTransformation(baseTrans),
overlayPageNumber(0), overlayPageNumber(0),
overlayPageTransformation(), overlayPageTransformation(),
skipOverlayPage(true), skipOverlayPage(true),
skipBasePage(false) skipBasePage(false)
{ {
} }
MergePageDescription(const MergePageDescription & copy) MergePageDescription(const MergePageDescription & copy)
{ {
*this = copy; *this = copy;
} }
MergePageDescription& operator = (const MergePageDescription &copy) MergePageDescription& operator = (const MergePageDescription &copy)
{ {
if( this != &copy ) if( this != &copy )
{ {
baseDocumentName = copy.baseDocumentName; baseDocumentName = copy.baseDocumentName;
basePageNumber = copy.basePageNumber; basePageNumber = copy.basePageNumber;
skipBasePage = copy.skipBasePage; skipBasePage = copy.skipBasePage;
skipOverlayPage = copy.skipOverlayPage; skipOverlayPage = copy.skipOverlayPage;
outPageHeight = copy.outPageHeight; outPageHeight = copy.outPageHeight;
outPageWidth = copy.outPageWidth; outPageWidth = copy.outPageWidth;
basePageTransformation = copy.basePageTransformation; basePageTransformation = copy.basePageTransformation;
overlayPageNumber = copy.overlayPageNumber; overlayPageNumber = copy.overlayPageNumber;
overlayPageTransformation = copy.overlayPageTransformation; overlayPageTransformation = copy.overlayPageTransformation;
} }
return *this; return *this;
} }
~MergePageDescription() ~MergePageDescription()
{ {
} }
}; };
// array of merge descriptions - allows to merge selected pages // array of merge descriptions - allows to merge selected pages
typedef std::vector<MergePageDescription> MergeDescription; typedef std::vector<MergePageDescription> MergeDescription;
} }
#endif #endif

@ -1,115 +1,115 @@
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// Merger.cpp // Merger.cpp
// Implementation of the Class Merger // Implementation of the Class Merger
// Created on: 19-???-2009 12:27:54 // Created on: 19-???-2009 12:27:54
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
#include "Merger.h" #include "Merger.h"
#include "Parser.h" #include "Parser.h"
#include "OverlayDocumentParser.h" #include "OverlayDocumentParser.h"
#include "Exception.h" #include "Exception.h"
#include <map> #include <map>
#include <iostream> #include <iostream>
using namespace merge_lib; using namespace merge_lib;
Parser Merger::_parser; Parser Merger::_parser;
Merger::Merger():_baseDocuments(),_overlayDocument(0) Merger::Merger():_baseDocuments(),_overlayDocument(0)
{ {
} }
Merger::~Merger() Merger::~Merger()
{ {
std::map<std::string, Document *>::iterator docIterator = _baseDocuments.begin(); std::map<std::string, Document *>::iterator docIterator = _baseDocuments.begin();
for(; docIterator != _baseDocuments.end(); ++docIterator) for(; docIterator != _baseDocuments.end(); ++docIterator)
{ {
delete (*docIterator).second; delete (*docIterator).second;
} }
if( _overlayDocument ) if( _overlayDocument )
{ {
delete _overlayDocument; delete _overlayDocument;
_overlayDocument = 0; _overlayDocument = 0;
} }
_baseDocuments.clear(); _baseDocuments.clear();
} }
void Merger::addBaseDocument(const char * docName) void Merger::addBaseDocument(const char * docName)
{ {
//if docName has been already opened then do nothing //if docName has been already opened then do nothing
if(_baseDocuments.count(docName)) if(_baseDocuments.count(docName))
return; return;
Document * newBaseDoc = _parser.parseDocument(docName); Document * newBaseDoc = _parser.parseDocument(docName);
_baseDocuments.insert(std::pair<std::string, Document *>(docName, newBaseDoc)); _baseDocuments.insert(std::pair<std::string, Document *>(docName, newBaseDoc));
} }
void Merger::addOverlayDocument(const char * docName) void Merger::addOverlayDocument(const char * docName)
{ {
if( _overlayDocument ) if( _overlayDocument )
{ {
delete _overlayDocument; delete _overlayDocument;
_overlayDocument = 0; _overlayDocument = 0;
} }
if( !_overlayDocument ) if( !_overlayDocument )
{ {
OverlayDocumentParser overlayDocParser; OverlayDocumentParser overlayDocParser;
_overlayDocument = overlayDocParser.parseDocument(docName); _overlayDocument = overlayDocParser.parseDocument(docName);
if( !_overlayDocument ) if( !_overlayDocument )
{ {
throw Exception("Error loading overlay document!"); throw Exception("Error loading overlay document!");
} }
} }
} }
// The main method which performs the merge // The main method which performs the merge
void Merger::merge(const char * overlayDocName, const MergeDescription & pagesToMerge) void Merger::merge(const char * overlayDocName, const MergeDescription & pagesToMerge)
{ {
if( !_overlayDocument) if( !_overlayDocument)
{ {
addOverlayDocument(overlayDocName); addOverlayDocument(overlayDocName);
if( !_overlayDocument ) if( !_overlayDocument )
{ {
throw Exception("Error loading overlay document!"); throw Exception("Error loading overlay document!");
} }
} }
MergeDescription::const_iterator pageIterator = pagesToMerge.begin(); MergeDescription::const_iterator pageIterator = pagesToMerge.begin();
for(; pageIterator != pagesToMerge.end(); ++pageIterator ) for(; pageIterator != pagesToMerge.end(); ++pageIterator )
{ {
Page * destinationPage = _overlayDocument->getPage( (*pageIterator).overlayPageNumber); Page * destinationPage = _overlayDocument->getPage( (*pageIterator).overlayPageNumber);
if( destinationPage == 0 ) if( destinationPage == 0 )
{ {
std::stringstream error; std::stringstream error;
error << "There is no page with " << (*pageIterator).overlayPageNumber << error << "There is no page with " << (*pageIterator).overlayPageNumber <<
" number in " << overlayDocName; " number in " << overlayDocName;
throw Exception(error); throw Exception(error);
} }
Document * sourceDocument = _baseDocuments[(*pageIterator).baseDocumentName]; Document * sourceDocument = _baseDocuments[(*pageIterator).baseDocumentName];
Page * sourcePage = (sourceDocument == 0)? 0 : sourceDocument->getPage((*pageIterator).basePageNumber); Page * sourcePage = (sourceDocument == 0)? 0 : sourceDocument->getPage((*pageIterator).basePageNumber);
bool isPageDuplicated = false; bool isPageDuplicated = false;
if( sourcePage ) if( sourcePage )
{ {
unsigned int howManyTimesPageFound(0); unsigned int howManyTimesPageFound(0);
for(size_t i = 0; i < pagesToMerge.size(); ++i) for(size_t i = 0; i < pagesToMerge.size(); ++i)
{ {
if(pagesToMerge[i].basePageNumber == (*pageIterator).basePageNumber) if(pagesToMerge[i].basePageNumber == (*pageIterator).basePageNumber)
++howManyTimesPageFound; ++howManyTimesPageFound;
if(howManyTimesPageFound == 2) if(howManyTimesPageFound == 2)
break; break;
} }
isPageDuplicated = (2 == howManyTimesPageFound) ? true : false; isPageDuplicated = (2 == howManyTimesPageFound) ? true : false;
} }
destinationPage->merge(sourcePage, _overlayDocument, const_cast<MergePageDescription&>((*pageIterator)), isPageDuplicated); destinationPage->merge(sourcePage, _overlayDocument, const_cast<MergePageDescription&>((*pageIterator)), isPageDuplicated);
} }
} }
// Method performs saving of merged documents into selected file // Method performs saving of merged documents into selected file
void Merger::saveMergedDocumentsAs(const char * outDocumentName) void Merger::saveMergedDocumentsAs(const char * outDocumentName)
{ {
_overlayDocument->saveAs(outDocumentName); _overlayDocument->saveAs(outDocumentName);
} }

@ -1,42 +1,42 @@
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// Merger.h // Merger.h
// Implementation of the Class Merger // Implementation of the Class Merger
// Created on: 19-èþí-2009 12:27:54 // Created on: 19-èþí-2009 12:27:54
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
#if !defined Merger_h #if !defined Merger_h
#define Merger_h #define Merger_h
#include "Document.h" #include "Document.h"
#include "Parser.h" #include "Parser.h"
#include <map> #include <map>
// structure defines parameter of merge // structure defines parameter of merge
namespace merge_lib namespace merge_lib
{ {
class Document; class Document;
class Merger class Merger
{ {
public: public:
Merger(); Merger();
~Merger(); ~Merger();
//this method should be called every time the "Add" button is clicked //this method should be called every time the "Add" button is clicked
void addBaseDocument(const char *docName); void addBaseDocument(const char *docName);
void addOverlayDocument(const char *docName); void addOverlayDocument(const char *docName);
void saveMergedDocumentsAs(const char *outDocumentName); void saveMergedDocumentsAs(const char *outDocumentName);
void merge(const char *overlayDocName, const MergeDescription & pagesToMerge); void merge(const char *overlayDocName, const MergeDescription & pagesToMerge);
private: private:
std::map<std::string, Document * > _baseDocuments; std::map<std::string, Document * > _baseDocuments;
static Parser _parser; static Parser _parser;
Document * _overlayDocument; Document * _overlayDocument;
}; };
} }
#endif // #endif //

@ -1,149 +1,149 @@
#if !defined Object_h #if !defined Object_h
#define Object_h #define Object_h
#include "Utils.h" #include "Utils.h"
#include <cmath> #include <cmath>
#include <string> #include <string>
#include <fstream> #include <fstream>
#include <map> #include <map>
#include <set> #include <set>
#include <vector> #include <vector>
#include <utility> #include <utility>
namespace merge_lib namespace merge_lib
{ {
//This class represents pdf objects, and defines methods for performing //This class represents pdf objects, and defines methods for performing
//all necessary operations on pdf objects //all necessary operations on pdf objects
//Each object consists of two parts: content and object's number //Each object consists of two parts: content and object's number
//<object number> <number> obj //<object number> <number> obj
//<content> //<content>
//endobj //endobj
//Object can contain several links to other object. These objects has been named "children". //Object can contain several links to other object. These objects has been named "children".
//Each reference (child object) should be kept with it position(s) in object's content. //Each reference (child object) should be kept with it position(s) in object's content.
//After each content modification, all references should be changed too. //After each content modification, all references should be changed too.
//This convention lighten the recalculation object numbers work. //This convention lighten the recalculation object numbers work.
class Object class Object
{ {
public: public:
friend class PageElementHandler; friend class PageElementHandler;
typedef std::vector<unsigned int> ReferencePositionsInContent; typedef std::vector<unsigned int> ReferencePositionsInContent;
typedef std::pair<Object *, ReferencePositionsInContent > ChildAndItPositionInContent; typedef std::pair<Object *, ReferencePositionsInContent > ChildAndItPositionInContent;
typedef std::map <unsigned int, ChildAndItPositionInContent> Children; typedef std::map <unsigned int, ChildAndItPositionInContent> Children;
Object(unsigned int objectNumber, unsigned int generationNumber, const std::string & objectContent, Object(unsigned int objectNumber, unsigned int generationNumber, const std::string & objectContent,
std::string fileName = "", std::pair<unsigned int, unsigned int> streamBounds = std::make_pair ((unsigned int)0,(unsigned int)0), bool hasStream = false std::string fileName = "", std::pair<unsigned int, unsigned int> streamBounds = std::make_pair ((unsigned int)0,(unsigned int)0), bool hasStream = false
): ):
_number(objectNumber), _generationNumber(generationNumber), _oldNumber(objectNumber), _content(objectContent),_parents(),_children(),_isPassed(false), _number(objectNumber), _generationNumber(generationNumber), _oldNumber(objectNumber), _content(objectContent),_parents(),_children(),_isPassed(false),
_fileName(fileName), _streamBounds(streamBounds), _hasStream(hasStream), _hasStreamInContent(false) _fileName(fileName), _streamBounds(streamBounds), _hasStream(hasStream), _hasStreamInContent(false)
{ {
} }
virtual ~Object(); virtual ~Object();
Object * getClone(std::vector<Object *> & clones); Object * getClone(std::vector<Object *> & clones);
void addChild(Object * child, const std::vector<unsigned int> childPositionsInContent); void addChild(Object * child, const std::vector<unsigned int> childPositionsInContent);
void addChild(const Children & children); void addChild(const Children & children);
ReferencePositionsInContent removeChild(Object * child); ReferencePositionsInContent removeChild(Object * child);
void forgetAboutChildren(unsigned int leftBound, unsigned int rightBound); void forgetAboutChildren(unsigned int leftBound, unsigned int rightBound);
Object * getChild(unsigned int objectNumber); Object * getChild(unsigned int objectNumber);
bool findObject(const std::string & token, Object* & foundObject, unsigned int & tokenPositionInContent); bool findObject(const std::string & token, Object* & foundObject, unsigned int & tokenPositionInContent);
std::vector<Object *> getChildrenByBounds(unsigned int leftBound, unsigned int rightBound); std::vector<Object *> getChildrenByBounds(unsigned int leftBound, unsigned int rightBound);
std::vector<Object *> getSortedByPositionChildren(unsigned int leftBound, unsigned int rightBound); std::vector<Object *> getSortedByPositionChildren(unsigned int leftBound, unsigned int rightBound);
void removeChildrenByBounds(unsigned int leftBound, unsigned int rightBound); void removeChildrenByBounds(unsigned int leftBound, unsigned int rightBound);
const Children & getChildren(); const Children & getChildren();
void removeHimself(); void removeHimself();
unsigned int getObjectNumber() const; unsigned int getObjectNumber() const;
unsigned int getgenerationNumber() const; unsigned int getgenerationNumber() const;
std::string & getObjectContent(); std::string & getObjectContent();
void setObjectContent(const std::string & objectContent); void setObjectContent(const std::string & objectContent);
void appendContent(const std::string & addToContent); void appendContent(const std::string & addToContent);
void eraseContent(unsigned int from, unsigned int size); void eraseContent(unsigned int from, unsigned int size);
void insertToContent(unsigned int position, const char * insertedStr, unsigned int length); void insertToContent(unsigned int position, const char * insertedStr, unsigned int length);
void insertToContent(unsigned int position, const std::string & insertedStr); void insertToContent(unsigned int position, const std::string & insertedStr);
//vector <object number, its size> //vector <object number, its size>
void serialize(std::ofstream & out, std::map< unsigned int, std::pair<unsigned long long, unsigned int > > & sizesAndGenerationNumbers); void serialize(std::ofstream & out, std::map< unsigned int, std::pair<unsigned long long, unsigned int > > & sizesAndGenerationNumbers);
void recalculateObjectNumbers(unsigned int & newNumber); void recalculateObjectNumbers(unsigned int & newNumber);
bool isPassed() bool isPassed()
{ {
return _isPassed; return _isPassed;
} }
void retrieveMaxObjectNumber(unsigned int & maxNumber); void retrieveMaxObjectNumber(unsigned int & maxNumber);
void resetIsPassed() void resetIsPassed()
{ {
if(_isPassed) if(_isPassed)
_isPassed = false; _isPassed = false;
Children::iterator it; Children::iterator it;
for ( it=_children.begin() ; it != _children.end(); it++ ) for ( it=_children.begin() ; it != _children.end(); it++ )
{ {
if((*it).second.first->isPassed()) if((*it).second.first->isPassed())
(*it).second.first->resetIsPassed(); (*it).second.first->resetIsPassed();
} }
} }
unsigned int getOldNumber() unsigned int getOldNumber()
{ {
return _oldNumber; return _oldNumber;
} }
void setObjectNumber(unsigned int objNumber) void setObjectNumber(unsigned int objNumber)
{ {
_number = objNumber; _number = objNumber;
_oldNumber = objNumber; _oldNumber = objNumber;
} }
bool getStream(std::string &); bool getStream(std::string &);
bool hasStream(); bool hasStream();
bool getHeader(std::string &content); bool getHeader(std::string &content);
void forgetStreamInFile() void forgetStreamInFile()
{ {
_hasStreamInContent = true; _hasStreamInContent = true;
_hasStream = true; _hasStream = true;
} }
std::string getNameSimpleValue(const std::string &content, const std::string &patten, size_t pos = 0); std::string getNameSimpleValue(const std::string &content, const std::string &patten, size_t pos = 0);
unsigned int getChildPosition(const Object * child); //throw (Exception) unsigned int getChildPosition(const Object * child); //throw (Exception)
const std::set<Object *> & getParents() const std::set<Object *> & getParents()
{ {
return _parents; return _parents;
} }
Object* findPatternInObjOrParents(const std::string &pattern); Object* findPatternInObjOrParents(const std::string &pattern);
private: private:
//methods //methods
Object(const Object & copy); Object(const Object & copy);
Object * _getClone(std::map<unsigned int, Object *> & clones); Object * _getClone(std::map<unsigned int, Object *> & clones);
void _addChild(Object * child, const ReferencePositionsInContent & childPositionsInContent); void _addChild(Object * child, const ReferencePositionsInContent & childPositionsInContent);
void _setObjectNumber(unsigned int objectNumber); void _setObjectNumber(unsigned int objectNumber);
void _addParent(Object * child); void _addParent(Object * child);
bool _findObject(const std::string & token, Object* & foundObject, unsigned int & tokenPositionInContent); bool _findObject(const std::string & token, Object* & foundObject, unsigned int & tokenPositionInContent);
void _serialize(std::ofstream & out, const std::string & stream); void _serialize(std::ofstream & out, const std::string & stream);
void _recalculateObjectNumbers(unsigned int & maxNumber); void _recalculateObjectNumbers(unsigned int & maxNumber);
void _recalculateReferencePositions(unsigned int changedReference, int displacement); void _recalculateReferencePositions(unsigned int changedReference, int displacement);
void _retrieveMaxObjectNumber(unsigned int & maxNumber); void _retrieveMaxObjectNumber(unsigned int & maxNumber);
void _serialize(std::ofstream & out, std::map<unsigned int, unsigned long long> & sizes); void _serialize(std::ofstream & out, std::map<unsigned int, unsigned long long> & sizes);
bool _getStreamFromContent(std::string & stream); bool _getStreamFromContent(std::string & stream);
//members //members
unsigned int _number; unsigned int _number;
unsigned int _generationNumber; unsigned int _generationNumber;
unsigned int _oldNumber; unsigned int _oldNumber;
std::string _content; std::string _content;
std::set <Object *> _parents; std::set <Object *> _parents;
Children _children; Children _children;
bool _isPassed; bool _isPassed;
std::pair<unsigned int, unsigned int> _streamBounds; std::pair<unsigned int, unsigned int> _streamBounds;
std::string _fileName; std::string _fileName;
bool _hasStream; bool _hasStream;
bool _hasStreamInContent; bool _hasStreamInContent;
}; };
} }
#endif #endif

@ -1,144 +1,144 @@
#include "OverlayDocumentParser.h" #include "OverlayDocumentParser.h"
#include <fstream> #include <fstream>
#include <string.h> #include <string.h>
#include "Exception.h" #include "Exception.h"
#include "Object.h" #include "Object.h"
using namespace merge_lib; using namespace merge_lib;
using namespace std; using namespace std;
int OverlayDocumentParser::DOC_PART_WITH_START_OF_XREF = 30; int OverlayDocumentParser::DOC_PART_WITH_START_OF_XREF = 30;
unsigned int partSize = 10485760; // = 10 Mb unsigned int partSize = 10485760; // = 10 Mb
Document * OverlayDocumentParser::parseDocument(const char * fileName) Document * OverlayDocumentParser::parseDocument(const char * fileName)
{ {
_fileName = fileName; _fileName = fileName;
return Parser::parseDocument(fileName); return Parser::parseDocument(fileName);
} }
void OverlayDocumentParser::_readXRefAndCreateObjects() void OverlayDocumentParser::_readXRefAndCreateObjects()
{ {
std::map<unsigned int, unsigned long> objectsAndPositions; std::map<unsigned int, unsigned long> objectsAndPositions;
_readXref(objectsAndPositions); _readXref(objectsAndPositions);
std::map<unsigned int, unsigned long> objectsAndSizes; std::map<unsigned int, unsigned long> objectsAndSizes;
std::map<unsigned int, unsigned long>::iterator objAndSIter; std::map<unsigned int, unsigned long>::iterator objAndSIter;
std::map<unsigned int, unsigned long>::iterator objAndPIter; std::map<unsigned int, unsigned long>::iterator objAndPIter;
unsigned long fileSize = Utils::getFileSize(_fileName.c_str()); unsigned long fileSize = Utils::getFileSize(_fileName.c_str());
for(objAndSIter = objectsAndPositions.begin(); objAndSIter != objectsAndPositions.end(); ++objAndSIter) for(objAndSIter = objectsAndPositions.begin(); objAndSIter != objectsAndPositions.end(); ++objAndSIter)
{ {
unsigned int nextPosition = fileSize; unsigned int nextPosition = fileSize;
for(objAndPIter = objectsAndPositions.begin(); objAndPIter != objectsAndPositions.end(); ++objAndPIter) for(objAndPIter = objectsAndPositions.begin(); objAndPIter != objectsAndPositions.end(); ++objAndPIter)
{ {
if((objAndPIter->second > objAndSIter->second) && (objAndPIter->second < nextPosition)) if((objAndPIter->second > objAndSIter->second) && (objAndPIter->second < nextPosition))
nextPosition = objAndPIter->second; nextPosition = objAndPIter->second;
} }
objectsAndSizes[objAndSIter->first] = nextPosition - objAndSIter->second; objectsAndSizes[objAndSIter->first] = nextPosition - objAndSIter->second;
} }
bool notEndOfFile = true; bool notEndOfFile = true;
do do
{ {
unsigned long partStart = fileSize; unsigned long partStart = fileSize;
std::map<unsigned int, unsigned long>::iterator objIter; std::map<unsigned int, unsigned long>::iterator objIter;
for(objIter = objectsAndPositions.begin(); objIter != objectsAndPositions.end(); ++objIter) for(objIter = objectsAndPositions.begin(); objIter != objectsAndPositions.end(); ++objIter)
{ {
if(objIter->second < partStart) if(objIter->second < partStart)
partStart = objIter->second; partStart = objIter->second;
} }
unsigned long nextPartStart = partStart + partSize; unsigned long nextPartStart = partStart + partSize;
if((nextPartStart) < fileSize) if((nextPartStart) < fileSize)
_getPartOfFileContent(partStart, partSize); _getPartOfFileContent(partStart, partSize);
else else
{ {
_getPartOfFileContent(partStart, fileSize - partStart); _getPartOfFileContent(partStart, fileSize - partStart);
nextPartStart = fileSize; nextPartStart = fileSize;
notEndOfFile = false; notEndOfFile = false;
} }
unsigned long toReadAgain = 0; unsigned long toReadAgain = 0;
for(objIter = objectsAndPositions.begin(); objIter != objectsAndPositions.end(); ) for(objIter = objectsAndPositions.begin(); objIter != objectsAndPositions.end(); )
{ {
if((objectsAndSizes[objIter->first] + objIter->second <= nextPartStart) && (objIter->second >= partStart) && ((objIter->second < nextPartStart))) if((objectsAndSizes[objIter->first] + objIter->second <= nextPartStart) && (objIter->second >= partStart) && ((objIter->second < nextPartStart)))
{ {
std::pair<unsigned int, unsigned int> streamBounds; std::pair<unsigned int, unsigned int> streamBounds;
unsigned int objectNumber; unsigned int objectNumber;
unsigned int generationNumber; unsigned int generationNumber;
bool hasObjectStream; bool hasObjectStream;
const std::string content = _getObjectContent(objIter->second - partStart, objectNumber, generationNumber, streamBounds, hasObjectStream); const std::string content = _getObjectContent(objIter->second - partStart, objectNumber, generationNumber, streamBounds, hasObjectStream);
streamBounds.first += partStart; streamBounds.first += partStart;
streamBounds.second += partStart; streamBounds.second += partStart;
Object * newObject = new Object(objectNumber, generationNumber, content, _document->_documentName ,streamBounds, hasObjectStream); Object * newObject = new Object(objectNumber, generationNumber, content, _document->_documentName ,streamBounds, hasObjectStream);
_objects[objectNumber] = newObject; _objects[objectNumber] = newObject;
std::map<unsigned int, unsigned long>::iterator temp = objIter; std::map<unsigned int, unsigned long>::iterator temp = objIter;
++objIter; ++objIter;
objectsAndPositions.erase(temp); objectsAndPositions.erase(temp);
continue; continue;
} }
++objIter; ++objIter;
} }
partStart = nextPartStart; partStart = nextPartStart;
} }
while(notEndOfFile); while(notEndOfFile);
} }
void OverlayDocumentParser::_getPartOfFileContent(long startOfPart, unsigned int length) void OverlayDocumentParser::_getPartOfFileContent(long startOfPart, unsigned int length)
{ {
ifstream pdfFile; ifstream pdfFile;
pdfFile.open (_fileName.c_str(), ios::binary ); pdfFile.open (_fileName.c_str(), ios::binary );
if (pdfFile.fail()) if (pdfFile.fail())
{ {
stringstream errorMessage("File "); stringstream errorMessage("File ");
errorMessage << _fileName << " is absent" << "\0"; errorMessage << _fileName << " is absent" << "\0";
throw Exception(errorMessage); throw Exception(errorMessage);
} }
ios_base::seekdir dir; ios_base::seekdir dir;
if(startOfPart >= 0) if(startOfPart >= 0)
dir = ios_base::beg; dir = ios_base::beg;
else else
dir = ios_base::end; dir = ios_base::end;
pdfFile.seekg (startOfPart, dir); pdfFile.seekg (startOfPart, dir);
_fileContent.resize(length); _fileContent.resize(length);
pdfFile.read(&_fileContent[0], length); pdfFile.read(&_fileContent[0], length);
pdfFile.close(); pdfFile.close();
} }
void OverlayDocumentParser::_readXref(std::map<unsigned int, unsigned long> & objectsAndSizes) void OverlayDocumentParser::_readXref(std::map<unsigned int, unsigned long> & objectsAndSizes)
{ {
_getPartOfFileContent(- DOC_PART_WITH_START_OF_XREF, DOC_PART_WITH_START_OF_XREF); _getPartOfFileContent(- DOC_PART_WITH_START_OF_XREF, DOC_PART_WITH_START_OF_XREF);
unsigned int startOfStartxref = _fileContent.find("startxref"); unsigned int startOfStartxref = _fileContent.find("startxref");
unsigned int startOfNumber = _fileContent.find_first_of(Parser::NUMBERS, startOfStartxref); unsigned int startOfNumber = _fileContent.find_first_of(Parser::NUMBERS, startOfStartxref);
unsigned int endOfNumber = _fileContent.find_first_not_of(Parser::NUMBERS, startOfNumber + 1); unsigned int endOfNumber = _fileContent.find_first_not_of(Parser::NUMBERS, startOfNumber + 1);
std::string startXref = _fileContent.substr(startOfNumber, endOfNumber - startOfNumber); std::string startXref = _fileContent.substr(startOfNumber, endOfNumber - startOfNumber);
unsigned int strtXref = Utils::stringToInt(startXref); unsigned int strtXref = Utils::stringToInt(startXref);
unsigned int sizeOfXref = Utils::getFileSize(_fileName.c_str()) - strtXref; unsigned int sizeOfXref = Utils::getFileSize(_fileName.c_str()) - strtXref;
_getPartOfFileContent(strtXref, sizeOfXref); _getPartOfFileContent(strtXref, sizeOfXref);
unsigned int leftBoundOfObjectNumber = _fileContent.find("0 ") + strlen("0 "); unsigned int leftBoundOfObjectNumber = _fileContent.find("0 ") + strlen("0 ");
unsigned int rightBoundOfObjectNumber = _fileContent.find_first_not_of(Parser::NUMBERS, leftBoundOfObjectNumber); unsigned int rightBoundOfObjectNumber = _fileContent.find_first_not_of(Parser::NUMBERS, leftBoundOfObjectNumber);
std::string objectNuberStr = _fileContent.substr(leftBoundOfObjectNumber, rightBoundOfObjectNumber - leftBoundOfObjectNumber); std::string objectNuberStr = _fileContent.substr(leftBoundOfObjectNumber, rightBoundOfObjectNumber - leftBoundOfObjectNumber);
unsigned long objectNumber = Utils::stringToInt(objectNuberStr); unsigned long objectNumber = Utils::stringToInt(objectNuberStr);
unsigned int startOfObjectPosition = _fileContent.find("0000000000 65535 f ") + strlen("0000000000 65535 f "); unsigned int startOfObjectPosition = _fileContent.find("0000000000 65535 f ") + strlen("0000000000 65535 f ");
for(unsigned long i = 1; i < objectNumber; ++i) for(unsigned long i = 1; i < objectNumber; ++i)
{ {
startOfObjectPosition = _fileContent.find_first_of(Parser::NUMBERS, startOfObjectPosition); startOfObjectPosition = _fileContent.find_first_of(Parser::NUMBERS, startOfObjectPosition);
unsigned int endOfObjectPostion = _fileContent.find(" 00000 n", startOfObjectPosition); unsigned int endOfObjectPostion = _fileContent.find(" 00000 n", startOfObjectPosition);
std::string objectPostionStr = _fileContent.substr(startOfObjectPosition, endOfObjectPostion - startOfObjectPosition); std::string objectPostionStr = _fileContent.substr(startOfObjectPosition, endOfObjectPostion - startOfObjectPosition);
objectsAndSizes[i] = Utils::stringToInt(objectPostionStr); objectsAndSizes[i] = Utils::stringToInt(objectPostionStr);
startOfObjectPosition = endOfObjectPostion + strlen(" 00000 n"); startOfObjectPosition = endOfObjectPostion + strlen(" 00000 n");
} }
} }
unsigned int OverlayDocumentParser::_readTrailerAndReturnRoot() unsigned int OverlayDocumentParser::_readTrailerAndReturnRoot()
{ {
_getPartOfFileContent(- (3*DOC_PART_WITH_START_OF_XREF), (3*DOC_PART_WITH_START_OF_XREF)); _getPartOfFileContent(- (3*DOC_PART_WITH_START_OF_XREF), (3*DOC_PART_WITH_START_OF_XREF));
return Parser::_readTrailerAndReturnRoot(); return Parser::_readTrailerAndReturnRoot();
} }
unsigned int OverlayDocumentParser::_getStartOfXrefWithRoot() unsigned int OverlayDocumentParser::_getStartOfXrefWithRoot()
{ {
return 0; return 0;
} }

@ -1,40 +1,40 @@
#if !defined OverlayDocumentParser_h #if !defined OverlayDocumentParser_h
#define OverlayDocumentParser_h #define OverlayDocumentParser_h
#include "Object.h" #include "Object.h"
#include "Document.h" #include "Document.h"
#include "Page.h" #include "Page.h"
#include "Parser.h" #include "Parser.h"
#include <map> #include <map>
#include <iostream> #include <iostream>
namespace merge_lib namespace merge_lib
{ {
class Document; class Document;
//This class parsed the pdf document and creates //This class parsed the pdf document and creates
//an Document object //an Document object
class OverlayDocumentParser: private Parser class OverlayDocumentParser: private Parser
{ {
public: public:
OverlayDocumentParser(): Parser(), _fileName() {}; OverlayDocumentParser(): Parser(), _fileName() {};
Document * parseDocument(const char * fileName); Document * parseDocument(const char * fileName);
protected: protected:
unsigned int _readTrailerAndReturnRoot(); unsigned int _readTrailerAndReturnRoot();
private: private:
//methods //methods
void _getFileContent(const char * fileName){}; void _getFileContent(const char * fileName){};
void _readXRefAndCreateObjects(); void _readXRefAndCreateObjects();
void _readXref(std::map<unsigned int, unsigned long> & objectsAndSizes); void _readXref(std::map<unsigned int, unsigned long> & objectsAndSizes);
void _getPartOfFileContent(long startOfPart, unsigned int length); void _getPartOfFileContent(long startOfPart, unsigned int length);
unsigned int _getStartOfXrefWithRoot(); unsigned int _getStartOfXrefWithRoot();
//constants //constants
static int DOC_PART_WITH_START_OF_XREF; static int DOC_PART_WITH_START_OF_XREF;
//members //members
std::string _fileName; std::string _fileName;
}; };
} }
#endif #endif

@ -1,64 +1,64 @@
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// Page.h // Page.h
// Implementation of the Class Page // Implementation of the Class Page
// Created on: 19-èþí-2009 12:27:56 // Created on: 19-èþí-2009 12:27:56
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
#if !defined Page_h #if !defined Page_h
#define Page_h #define Page_h
#include <string> #include <string>
#include "Object.h" #include "Object.h"
#include "MergePageDescription.h" #include "MergePageDescription.h"
namespace merge_lib namespace merge_lib
{ {
class Document; class Document;
class Page class Page
{ {
friend class Parser; friend class Parser;
friend class PageElementsFactory; friend class PageElementsFactory;
public: public:
Page(unsigned int pageNumber); Page(unsigned int pageNumber);
~Page(); ~Page();
unsigned int getPageNumber() unsigned int getPageNumber()
{ {
return _pageNumber; return _pageNumber;
} }
void merge(Page * sourcePage, Document * parentDocument, MergePageDescription & description, bool isPageDuplicated); void merge(Page * sourcePage, Document * parentDocument, MergePageDescription & description, bool isPageDuplicated);
void recalculateObjectNumbers(unsigned int & newNumber); void recalculateObjectNumbers(unsigned int & newNumber);
std::string & getPageContent(); std::string & getPageContent();
const Object::Children & getPageRefs(); const Object::Children & getPageRefs();
Object * pageToXObject(std::vector<Object *> & allObjects, std::vector<Object *> & annots, bool isCloneNeeded); Object * pageToXObject(std::vector<Object *> & allObjects, std::vector<Object *> & annots, bool isCloneNeeded);
void setRotation(int rotation) void setRotation(int rotation)
{ {
_rotation = rotation; _rotation = rotation;
} }
private: private:
//methods //methods
Object * _pageToXObject(Object *& page, std::vector<Object *> & annots); Object * _pageToXObject(Object *& page, std::vector<Object *> & annots);
std::string _getMergedPageContent( unsigned int & contentPosition, std::string _getMergedPageContent( unsigned int & contentPosition,
unsigned int & parentPosition, unsigned int & parentPosition,
unsigned int & originalPage1Position, unsigned int & originalPage1Position,
unsigned int & originalPage2Position, unsigned int & originalPage2Position,
std::pair<unsigned int, unsigned int> originalPageNumbers, std::pair<unsigned int, unsigned int> originalPageNumbers,
const MergePageDescription & description, const MergePageDescription & description,
Object * basePage, Object * basePage,
const std::vector<Object *> & annots, const std::vector<Object *> & annots,
std::vector <Object::ChildAndItPositionInContent> & annotsPositions std::vector <Object::ChildAndItPositionInContent> & annotsPositions
); );
//members //members
Object * _root; Object * _root;
unsigned int _pageNumber; unsigned int _pageNumber;
int _rotation; int _rotation;
}; };
} }
#endif #endif

@ -1,74 +1,74 @@
#include "PageElementHandler.h" #include "PageElementHandler.h"
using namespace merge_lib; using namespace merge_lib;
std::set<std::string> PageElementHandler::_allPageFields; std::set<std::string> PageElementHandler::_allPageFields;
void PageElementHandler::_createAllPageFieldsSet() void PageElementHandler::_createAllPageFieldsSet()
{ {
if(!_allPageFields.empty()) if(!_allPageFields.empty())
return; return;
_allPageFields.insert(std::string("Type")); _allPageFields.insert(std::string("Type"));
_allPageFields.insert(std::string("Parent")); _allPageFields.insert(std::string("Parent"));
_allPageFields.insert(std::string("LastModified")); _allPageFields.insert(std::string("LastModified"));
_allPageFields.insert(std::string("Resources")); _allPageFields.insert(std::string("Resources"));
_allPageFields.insert(std::string("MediaBox")); _allPageFields.insert(std::string("MediaBox"));
_allPageFields.insert(std::string("CropBox")); _allPageFields.insert(std::string("CropBox"));
_allPageFields.insert(std::string("BleedBox")); _allPageFields.insert(std::string("BleedBox"));
_allPageFields.insert(std::string("TrimBox")); _allPageFields.insert(std::string("TrimBox"));
_allPageFields.insert(std::string("ArtBox")); _allPageFields.insert(std::string("ArtBox"));
_allPageFields.insert(std::string("BoxColorInfo")); _allPageFields.insert(std::string("BoxColorInfo"));
_allPageFields.insert(std::string("Contents")); _allPageFields.insert(std::string("Contents"));
_allPageFields.insert(std::string("Rotate")); _allPageFields.insert(std::string("Rotate"));
_allPageFields.insert(std::string("Group")); _allPageFields.insert(std::string("Group"));
_allPageFields.insert(std::string("Thumb")); _allPageFields.insert(std::string("Thumb"));
_allPageFields.insert(std::string("B")); _allPageFields.insert(std::string("B"));
_allPageFields.insert(std::string("Dur")); _allPageFields.insert(std::string("Dur"));
_allPageFields.insert(std::string("Trans")); _allPageFields.insert(std::string("Trans"));
_allPageFields.insert(std::string("Annots")); _allPageFields.insert(std::string("Annots"));
_allPageFields.insert(std::string("AA")); _allPageFields.insert(std::string("AA"));
_allPageFields.insert(std::string("Metadata")); _allPageFields.insert(std::string("Metadata"));
_allPageFields.insert(std::string("PieceInfo")); _allPageFields.insert(std::string("PieceInfo"));
_allPageFields.insert(std::string("StructParents")); _allPageFields.insert(std::string("StructParents"));
_allPageFields.insert(std::string("ID")); _allPageFields.insert(std::string("ID"));
_allPageFields.insert(std::string("PZ")); _allPageFields.insert(std::string("PZ"));
_allPageFields.insert(std::string("SeparationInfo")); _allPageFields.insert(std::string("SeparationInfo"));
_allPageFields.insert(std::string("Tabs")); _allPageFields.insert(std::string("Tabs"));
_allPageFields.insert(std::string("TemplateInstantiated")); _allPageFields.insert(std::string("TemplateInstantiated"));
_allPageFields.insert(std::string("PresSteps")); _allPageFields.insert(std::string("PresSteps"));
_allPageFields.insert(std::string("UserUnit")); _allPageFields.insert(std::string("UserUnit"));
_allPageFields.insert(std::string("VP")); _allPageFields.insert(std::string("VP"));
//for correct search all fields of XObject should be present to //for correct search all fields of XObject should be present to
_allPageFields.insert(std::string("Subtype")); _allPageFields.insert(std::string("Subtype"));
_allPageFields.insert(std::string("FormType")); _allPageFields.insert(std::string("FormType"));
_allPageFields.insert(std::string("BBox")); _allPageFields.insert(std::string("BBox"));
_allPageFields.insert(std::string("Matrix")); _allPageFields.insert(std::string("Matrix"));
_allPageFields.insert(std::string("Ref")); _allPageFields.insert(std::string("Ref"));
_allPageFields.insert(std::string("StructParent")); _allPageFields.insert(std::string("StructParent"));
_allPageFields.insert(std::string("OPI")); _allPageFields.insert(std::string("OPI"));
_allPageFields.insert(std::string("OC")); _allPageFields.insert(std::string("OC"));
_allPageFields.insert(std::string("Name")); _allPageFields.insert(std::string("Name"));
} }
unsigned int PageElementHandler::_findEndOfElementContent(unsigned int startOfPageElement) unsigned int PageElementHandler::_findEndOfElementContent(unsigned int startOfPageElement)
{ {
static std::string whitespacesAndDelimeters(" \t\f\v\n\r<<[/"); static std::string whitespacesAndDelimeters(" \t\f\v\n\r<<[/");
unsigned int foundSlash = _pageContent.find("/", startOfPageElement + 1); unsigned int foundSlash = _pageContent.find("/", startOfPageElement + 1);
std::string fieldType; std::string fieldType;
while(foundSlash != std::string::npos) while(foundSlash != std::string::npos)
{ {
unsigned int foundWhitespace = _pageContent.find_first_of(whitespacesAndDelimeters, foundSlash + 1); unsigned int foundWhitespace = _pageContent.find_first_of(whitespacesAndDelimeters, foundSlash + 1);
if(foundWhitespace != std::string::npos) if(foundWhitespace != std::string::npos)
fieldType = _pageContent.substr(foundSlash + 1, foundWhitespace - foundSlash - 1); fieldType = _pageContent.substr(foundSlash + 1, foundWhitespace - foundSlash - 1);
else else
break; break;
//is this any page element between "/" and " " //is this any page element between "/" and " "
if(_allPageFields.count(fieldType)) if(_allPageFields.count(fieldType))
{ {
return foundSlash; return foundSlash;
} }
foundSlash = foundWhitespace; foundSlash = foundWhitespace;
} }
return _pageContent.rfind(">>"); return _pageContent.rfind(">>");
} }

@ -1,84 +1,84 @@
#if !defined PageElementHandler_h #if !defined PageElementHandler_h
#define PageElementHandler_h #define PageElementHandler_h
#include <string> #include <string>
#include <set> #include <set>
#include "Object.h" #include "Object.h"
#include "Parser.h" #include "Parser.h"
namespace merge_lib namespace merge_lib
{ {
//base class for all elements handlers //base class for all elements handlers
//Handlers are needed to process Page fields during the merge //Handlers are needed to process Page fields during the merge
//this is the chain of responsibility pattern //this is the chain of responsibility pattern
class PageElementHandler class PageElementHandler
{ {
public: public:
PageElementHandler(Object * page): _page(page), _pageContent(page->_content), _nextHandler(0) PageElementHandler(Object * page): _page(page), _pageContent(page->_content), _nextHandler(0)
{ {
_createAllPageFieldsSet(); _createAllPageFieldsSet();
} }
virtual ~PageElementHandler() virtual ~PageElementHandler()
{ {
delete _nextHandler; delete _nextHandler;
} }
void addNextHandler(PageElementHandler * nextHandler) void addNextHandler(PageElementHandler * nextHandler)
{ {
_nextHandler = nextHandler; _nextHandler = nextHandler;
} }
void processObjectContent() void processObjectContent()
{ {
unsigned int startOfPageElement = _findStartOfPageElement(); unsigned int startOfPageElement = _findStartOfPageElement();
if(startOfPageElement != std::string::npos) if(startOfPageElement != std::string::npos)
_processObjectContent(startOfPageElement); _processObjectContent(startOfPageElement);
if(_nextHandler) if(_nextHandler)
_nextHandler->processObjectContent(); _nextHandler->processObjectContent();
} }
void changeObjectContent() void changeObjectContent()
{ {
unsigned int startOfPageElement = _findStartOfPageElement(); unsigned int startOfPageElement = _findStartOfPageElement();
if(startOfPageElement != std::string::npos) if(startOfPageElement != std::string::npos)
_changeObjectContent(startOfPageElement); _changeObjectContent(startOfPageElement);
else else
_pageElementNotFound(); _pageElementNotFound();
if(_nextHandler) if(_nextHandler)
_nextHandler->changeObjectContent(); _nextHandler->changeObjectContent();
} }
protected: protected:
//methods //methods
void _setHandlerName(const std::string & handlerName) void _setHandlerName(const std::string & handlerName)
{ {
_handlerName = handlerName; _handlerName = handlerName;
} }
unsigned int _findEndOfElementContent(unsigned int startOfPageElement); unsigned int _findEndOfElementContent(unsigned int startOfPageElement);
void _createAllPageFieldsSet(); void _createAllPageFieldsSet();
//members //members
std::string & _pageContent; std::string & _pageContent;
Object * _page; Object * _page;
std::string _handlerName; std::string _handlerName;
PageElementHandler * _nextHandler; PageElementHandler * _nextHandler;
private: private:
//methods //methods
virtual void _processObjectContent(unsigned int startOfPageElement){}; virtual void _processObjectContent(unsigned int startOfPageElement){};
virtual void _changeObjectContent(unsigned int startOfPageElement) = 0; virtual void _changeObjectContent(unsigned int startOfPageElement) = 0;
virtual void _pageElementNotFound() {}; virtual void _pageElementNotFound() {};
unsigned int _findStartOfPageElement() unsigned int _findStartOfPageElement()
{ {
return Parser::findToken(_pageContent,_handlerName); return Parser::findToken(_pageContent,_handlerName);
} }
//members //members
static std::set<std::string> _allPageFields; static std::set<std::string> _allPageFields;
}; };
} }
#endif #endif

@ -1,41 +1,41 @@
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// Page.h // Page.h
// Implementation of the Class Page // Implementation of the Class Page
// Created on: 19-èþí-2009 12:27:56 // Created on: 19-èþí-2009 12:27:56
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
#if !defined(EA_FF53E894_11D7_4c64_A409_DBC97C9EF3CF__INCLUDED_) #if !defined(EA_FF53E894_11D7_4c64_A409_DBC97C9EF3CF__INCLUDED_)
#define EA_FF53E894_11D7_4c64_A409_DBC97C9EF3CF__INCLUDED_ #define EA_FF53E894_11D7_4c64_A409_DBC97C9EF3CF__INCLUDED_
#include "Object.h" #include "Object.h"
#include <string> #include <string>
class PageParser class PageParser
{ {
friend class Parser; friend class Parser;
public: public:
PageParser(const std::string & pageContent); PageParser(const std::string & pageContent);
~PageParser(); ~PageParser();
unsigned int getPageNumber() unsigned int getPageNumber()
{ {
return _pageNumber; return _pageNumber;
} }
void merge(const Page & sourcePage); void merge(const Page & sourcePage);
void recalculateObjectNumbers(unsigned int & newNumber); void recalculateObjectNumbers(unsigned int & newNumber);
std::string & getPageContent(); std::string & getPageContent();
const std::map <unsigned int, Object *> & getPageRefs(); const std::map <unsigned int, Object *> & getPageRefs();
private: private:
//methods //methods
//members //members
Object * _root; Object * _root;
unsigned int _pageNumber; unsigned int _pageNumber;
}; };
#endif // !defined(EA_FF53E894_11D7_4c64_A409_DBC97C9EF3CF__INCLUDED_) #endif // !defined(EA_FF53E894_11D7_4c64_A409_DBC97C9EF3CF__INCLUDED_)

@ -1,76 +1,76 @@
#if !defined Parser_h #if !defined Parser_h
#define Parser_h #define Parser_h
#include "Object.h" #include "Object.h"
#include "Document.h" #include "Document.h"
#include "Page.h" #include "Page.h"
#include <string> #include <string>
#include <vector> #include <vector>
namespace merge_lib namespace merge_lib
{ {
class Document; class Document;
//This class parsed the pdf document and creates //This class parsed the pdf document and creates
//an Document object //an Document object
class Parser class Parser
{ {
public: public:
Parser(): _root(0), _fileContent(), _objects(), _document(0) {}; Parser(): _root(0), _fileContent(), _objects(), _document(0) {};
Document * parseDocument(const char * fileName); Document * parseDocument(const char * fileName);
static const std::string WHITESPACES; static const std::string WHITESPACES;
static const std::string DELIMETERS; static const std::string DELIMETERS;
static const std::string NUMBERS; static const std::string NUMBERS;
static const std::string WHITESPACES_AND_DELIMETERS; static const std::string WHITESPACES_AND_DELIMETERS;
static bool getNextWord(std::string & out, const std::string &in, size_t &nextPosition,size_t *found = NULL); static bool getNextWord(std::string & out, const std::string &in, size_t &nextPosition,size_t *found = NULL);
static std::string getNextToken( const std::string &in, unsigned &position); static std::string getNextToken( const std::string &in, unsigned &position);
static void trim(std::string &str); static void trim(std::string &str);
static std::string findTokenStr(const std::string &content, const std::string &pattern, size_t start,size_t &foundStart, size_t &foundEnd); static std::string findTokenStr(const std::string &content, const std::string &pattern, size_t start,size_t &foundStart, size_t &foundEnd);
static size_t findToken(const std::string &content, const std::string &keyword,size_t start = 0); static size_t findToken(const std::string &content, const std::string &keyword,size_t start = 0);
static size_t findTokenName(const std::string &content, const std::string &keyword,size_t start = 0); static size_t findTokenName(const std::string &content, const std::string &keyword,size_t start = 0);
static unsigned int findEndOfElementContent(const std::string &content, unsigned int startOfPageElement); static unsigned int findEndOfElementContent(const std::string &content, unsigned int startOfPageElement);
static bool tokenIsAName(const std::string &content, size_t start ); static bool tokenIsAName(const std::string &content, size_t start );
protected: protected:
const std::string & _getObjectContent(unsigned int objectPosition, unsigned int & objectNumber, unsigned int & generationNumber, std::pair<unsigned int, unsigned int> &, bool &); const std::string & _getObjectContent(unsigned int objectPosition, unsigned int & objectNumber, unsigned int & generationNumber, std::pair<unsigned int, unsigned int> &, bool &);
virtual unsigned int _readTrailerAndReturnRoot(); virtual unsigned int _readTrailerAndReturnRoot();
private: private:
//methods //methods
virtual void _getFileContent(const char * fileName); virtual void _getFileContent(const char * fileName);
bool _getNextObject(Object * object); bool _getNextObject(Object * object);
void _callObserver(std::string objectContent); void _callObserver(std::string objectContent);
void _createObjectTree(const char * fileName); void _createObjectTree(const char * fileName);
void _retrieveAllPages(Object * objectWithKids); void _retrieveAllPages(Object * objectWithKids);
void _fillOutObjects(); void _fillOutObjects();
virtual void _readXRefAndCreateObjects(); virtual void _readXRefAndCreateObjects();
unsigned int _getEndOfLineFromContent(unsigned int fromPosition); unsigned int _getEndOfLineFromContent(unsigned int fromPosition);
const std::pair<unsigned int, unsigned int> & _getLineBounds(const std::string & str, unsigned int fromPosition); const std::pair<unsigned int, unsigned int> & _getLineBounds(const std::string & str, unsigned int fromPosition);
const std::string & _getNextToken(unsigned int & fromPosition); const std::string & _getNextToken(unsigned int & fromPosition);
unsigned int _countTokens(unsigned int leftBound, unsigned int rightBount); unsigned int _countTokens(unsigned int leftBound, unsigned int rightBount);
unsigned int _skipWhiteSpaces(const std::string & str); unsigned int _skipWhiteSpaces(const std::string & str);
unsigned int _skipWhiteSpacesFromContent(unsigned int fromPosition); unsigned int _skipWhiteSpacesFromContent(unsigned int fromPosition);
const std::map<unsigned int, Object::ReferencePositionsInContent> & _getReferences(const std::string & objectContent); const std::map<unsigned int, Object::ReferencePositionsInContent> & _getReferences(const std::string & objectContent);
unsigned int _skipNumber(const std::string & str, unsigned int currentPosition); unsigned int _skipNumber(const std::string & str, unsigned int currentPosition);
unsigned int _skipWhiteSpaces(const std::string & str, unsigned int fromPosition); unsigned int _skipWhiteSpaces(const std::string & str, unsigned int fromPosition);
void _createDocument(const char * docName); void _createDocument(const char * docName);
virtual unsigned int _getStartOfXrefWithRoot(); virtual unsigned int _getStartOfXrefWithRoot();
unsigned int _readTrailerAndRterievePrev(const unsigned int startPositionForSearch, unsigned int & previosXref); unsigned int _readTrailerAndRterievePrev(const unsigned int startPositionForSearch, unsigned int & previosXref);
void _clearParser(); void _clearParser();
protected: protected:
//members //members
Object * _root; Object * _root;
std::string _fileContent; std::string _fileContent;
std::map<unsigned int, Object *> _objects; std::map<unsigned int, Object *> _objects;
Document * _document; Document * _document;
}; };
} }
#endif #endif

@ -1,124 +1,124 @@
#include "Rectangle.h" #include "Rectangle.h"
#include "Utils.h" #include "Utils.h"
#include "Object.h" #include "Object.h"
#include "Parser.h" #include "Parser.h"
#include <iostream> #include <iostream>
using namespace merge_lib; using namespace merge_lib;
Rectangle::Rectangle(const char * rectangleName): Rectangle::Rectangle(const char * rectangleName):
_rectangleName(rectangleName), _rectangleName(rectangleName),
x1(0), x1(0),
y1(0), y1(0),
x2(0), x2(0),
y2(0), y2(0),
_tm() _tm()
{} {}
Rectangle::Rectangle(const char * rectangleName, const std::string content): Rectangle::Rectangle(const char * rectangleName, const std::string content):
_rectangleName(rectangleName), _rectangleName(rectangleName),
x1(0), x1(0),
y1(0), y1(0),
x2(0), x2(0),
y2(0) y2(0)
{ {
unsigned int rectanglePosition = Parser::findToken(content,rectangleName); unsigned int rectanglePosition = Parser::findToken(content,rectangleName);
if( rectanglePosition == std::string::npos ) if( rectanglePosition == std::string::npos )
{ {
std::cerr<<"Unable to find rectangle name "<<rectangleName<<" in content\n"; std::cerr<<"Unable to find rectangle name "<<rectangleName<<" in content\n";
} }
size_t beg = content.find("[",rectanglePosition); size_t beg = content.find("[",rectanglePosition);
size_t end = content.find("]",rectanglePosition); size_t end = content.find("]",rectanglePosition);
if( beg != std::string::npos && end != std::string::npos ) if( beg != std::string::npos && end != std::string::npos )
{ {
std::string arr = content.substr(beg+1,end-beg-1); std::string arr = content.substr(beg+1,end-beg-1);
std::stringstream in; std::stringstream in;
in<<arr; in<<arr;
in>>x1>>y1>>x2>>y2; in>>x1>>y1>>x2>>y2;
} }
} }
void Rectangle::appendRectangleToString(std::string & content, const char * delimeter) void Rectangle::appendRectangleToString(std::string & content, const char * delimeter)
{ {
content.append(_getRectangleAsString(delimeter)); content.append(_getRectangleAsString(delimeter));
} }
const std::string Rectangle::_getRectangleAsString(const char * delimeter) const std::string Rectangle::_getRectangleAsString(const char * delimeter)
{ {
std::string result(_rectangleName); std::string result(_rectangleName);
result.append(" ["); result.append(" [");
result.append(Utils::doubleToStr(x1)); result.append(Utils::doubleToStr(x1));
result.append(delimeter); result.append(delimeter);
result.append(Utils::doubleToStr(y1)); result.append(Utils::doubleToStr(y1));
result.append(delimeter); result.append(delimeter);
result.append(Utils::doubleToStr(x2)); result.append(Utils::doubleToStr(x2));
result.append(delimeter); result.append(delimeter);
result.append(Utils::doubleToStr(y2)); result.append(Utils::doubleToStr(y2));
result.append(" ]\n"); result.append(" ]\n");
return result; return result;
} }
void Rectangle::setNewRectangleName(const char * newName) void Rectangle::setNewRectangleName(const char * newName)
{ {
_rectangleName = newName; _rectangleName = newName;
} }
void Rectangle::recalculateInternalRectangleCoordinates(const PageTransformations & transformations) void Rectangle::recalculateInternalRectangleCoordinates(const PageTransformations & transformations)
{ {
TransformationMatrix tempTm; TransformationMatrix tempTm;
for(size_t i = 0; i < transformations.size(); ++i) for(size_t i = 0; i < transformations.size(); ++i)
{ {
tempTm = transformations[i]->getMatrix(); tempTm = transformations[i]->getMatrix();
tempTm.add(_tm); tempTm.add(_tm);
_tm = tempTm; _tm = tempTm;
} }
_tm.recalculateCoordinates(x1, y1); _tm.recalculateCoordinates(x1, y1);
_tm.recalculateCoordinates(x2, y2); _tm.recalculateCoordinates(x2, y2);
} }
void Rectangle::updateRectangle(Object * objectWithRectangle, const char * delimeter) void Rectangle::updateRectangle(Object * objectWithRectangle, const char * delimeter)
{ {
Object * foundObjectWithRectangle; Object * foundObjectWithRectangle;
unsigned int fake; unsigned int fake;
objectWithRectangle->findObject(std::string(_rectangleName), foundObjectWithRectangle, fake); objectWithRectangle->findObject(std::string(_rectangleName), foundObjectWithRectangle, fake);
std::string objectContent = foundObjectWithRectangle->getObjectContent(); std::string objectContent = foundObjectWithRectangle->getObjectContent();
unsigned int rectanglePosition = objectContent.find(_rectangleName); unsigned int rectanglePosition = objectContent.find(_rectangleName);
unsigned int endOfRectangle = objectContent.find("]", rectanglePosition) + 1; unsigned int endOfRectangle = objectContent.find("]", rectanglePosition) + 1;
foundObjectWithRectangle->eraseContent(rectanglePosition, endOfRectangle - rectanglePosition); foundObjectWithRectangle->eraseContent(rectanglePosition, endOfRectangle - rectanglePosition);
foundObjectWithRectangle->insertToContent(rectanglePosition, _getRectangleAsString(delimeter)); foundObjectWithRectangle->insertToContent(rectanglePosition, _getRectangleAsString(delimeter));
// reread the objectContent, since it was changed just above; // reread the objectContent, since it was changed just above;
objectContent = foundObjectWithRectangle->getObjectContent(); objectContent = foundObjectWithRectangle->getObjectContent();
//update matrix //update matrix
unsigned int startOfAP = Parser::findToken(objectContent,"/AP"); unsigned int startOfAP = Parser::findToken(objectContent,"/AP");
unsigned int endOfAP = objectContent.find(">>", startOfAP); unsigned int endOfAP = objectContent.find(">>", startOfAP);
std::vector<Object *> aps = foundObjectWithRectangle->getChildrenByBounds(startOfAP, endOfAP); std::vector<Object *> aps = foundObjectWithRectangle->getChildrenByBounds(startOfAP, endOfAP);
for(size_t i = 0; i < aps.size(); ++i) for(size_t i = 0; i < aps.size(); ++i)
{ {
Object * objectWithMatrix = aps[i]; Object * objectWithMatrix = aps[i];
std::string objectContent = objectWithMatrix->getObjectContent(); std::string objectContent = objectWithMatrix->getObjectContent();
unsigned int matrixPosition = Parser::findToken(objectContent,"/Matrix"); unsigned int matrixPosition = Parser::findToken(objectContent,"/Matrix");
if(matrixPosition == std::string::npos) if(matrixPosition == std::string::npos)
continue; continue;
unsigned int matrixValueLeftBound = objectContent.find("[", matrixPosition); unsigned int matrixValueLeftBound = objectContent.find("[", matrixPosition);
unsigned int matrixValueRightBound = objectContent.find("]", matrixValueLeftBound) + 1; unsigned int matrixValueRightBound = objectContent.find("]", matrixValueLeftBound) + 1;
objectWithMatrix->eraseContent(matrixValueLeftBound, matrixValueRightBound - matrixValueLeftBound); objectWithMatrix->eraseContent(matrixValueLeftBound, matrixValueRightBound - matrixValueLeftBound);
objectWithMatrix->insertToContent(matrixValueLeftBound, _tm.getValue()); objectWithMatrix->insertToContent(matrixValueLeftBound, _tm.getValue());
} }
} }
double Rectangle::getWidth() double Rectangle::getWidth()
{ {
return x2 - x1; return x2 - x1;
} }
double Rectangle::getHeight() double Rectangle::getHeight()
{ {
return y2 - y1; return y2 - y1;
} }

@ -1,39 +1,39 @@
#if !defined Rectangle_h #if !defined Rectangle_h
#define Rectangle_h #define Rectangle_h
#include "Transformation.h" #include "Transformation.h"
#include <vector> #include <vector>
#include <map> #include <map>
namespace merge_lib namespace merge_lib
{ {
class Object; class Object;
class Rectangle class Rectangle
{ {
public: public:
Rectangle(const char * rectangleName); Rectangle(const char * rectangleName);
Rectangle(const char * rectangleName, const std::string content); Rectangle(const char * rectangleName, const std::string content);
void appendRectangleToString(std::string & content, const char * delimeter); void appendRectangleToString(std::string & content, const char * delimeter);
void updateRectangle(Object * objectWithRectangle, const char * delimeter); void updateRectangle(Object * objectWithRectangle, const char * delimeter);
void setNewRectangleName(const char * newName); void setNewRectangleName(const char * newName);
void recalculateInternalRectangleCoordinates(const PageTransformations & transformations); void recalculateInternalRectangleCoordinates(const PageTransformations & transformations);
double getWidth(); double getWidth();
double getHeight(); double getHeight();
//members //members
double x1, x2, y1, y2; double x1, x2, y1, y2;
private: private:
//methods //methods
const std::string _getRectangleAsString(const char * delimeter); const std::string _getRectangleAsString(const char * delimeter);
//members //members
const char * _rectangleName; const char * _rectangleName;
TransformationMatrix _tm; TransformationMatrix _tm;
}; };
} }
#endif #endif

@ -1,25 +1,25 @@
#if !defined RemoveHimselfHandler_h #if !defined RemoveHimselfHandler_h
#define RemoveHimselfHandler_h #define RemoveHimselfHandler_h
#include "PageElementHandler.h" #include "PageElementHandler.h"
namespace merge_lib namespace merge_lib
{ {
//This class remove field from Page object's content. //This class remove field from Page object's content.
class RemoveHimselfHandler: public PageElementHandler class RemoveHimselfHandler: public PageElementHandler
{ {
public: public:
RemoveHimselfHandler(Object * page, const std::string & handlerName): PageElementHandler(page) RemoveHimselfHandler(Object * page, const std::string & handlerName): PageElementHandler(page)
{ {
_setHandlerName(handlerName); _setHandlerName(handlerName);
} }
virtual ~RemoveHimselfHandler() virtual ~RemoveHimselfHandler()
{ {
} }
private: private:
//methods //methods
virtual void _changeObjectContent(unsigned int startOfPageElement); virtual void _changeObjectContent(unsigned int startOfPageElement);
}; };
} }
#endif #endif

@ -1,14 +1,14 @@
#include "RemoveHimSelfHandler.h" #include "RemoveHimSelfHandler.h"
using namespace merge_lib; using namespace merge_lib;
void RemoveHimselfHandler::_changeObjectContent(unsigned int startOfPageElement) void RemoveHimselfHandler::_changeObjectContent(unsigned int startOfPageElement)
{ {
unsigned int endOfElement = _findEndOfElementContent(startOfPageElement); unsigned int endOfElement = _findEndOfElementContent(startOfPageElement);
_page->forgetAboutChildren(startOfPageElement, endOfElement); _page->forgetAboutChildren(startOfPageElement, endOfElement);
_page->eraseContent(startOfPageElement, endOfElement - startOfPageElement); _page->eraseContent(startOfPageElement, endOfElement - startOfPageElement);
} }

@ -1,51 +1,51 @@
#if !defined RotationHandler_h #if !defined RotationHandler_h
#define RotationHandler_h #define RotationHandler_h
#include "PageElementHandler.h" #include "PageElementHandler.h"
#include "Page.h" #include "Page.h"
namespace merge_lib namespace merge_lib
{ {
//This class remove field from Page object's content. //This class remove field from Page object's content.
class RotationHandler: public PageElementHandler class RotationHandler: public PageElementHandler
{ {
public: public:
RotationHandler(Object * page, const std::string & handlerName, Page & basePage): RotationHandler(Object * page, const std::string & handlerName, Page & basePage):
PageElementHandler(page), PageElementHandler(page),
_basePage(basePage) _basePage(basePage)
{ {
_setHandlerName(handlerName); _setHandlerName(handlerName);
} }
virtual ~RotationHandler() virtual ~RotationHandler()
{ {
} }
private: private:
//methods //methods
virtual void _processObjectContent(unsigned int startOfPageElement) virtual void _processObjectContent(unsigned int startOfPageElement)
{ {
unsigned int endOfElement = _findEndOfElementContent(startOfPageElement); unsigned int endOfElement = _findEndOfElementContent(startOfPageElement);
std::string rotationField = _page->getObjectContent().substr(startOfPageElement, endOfElement - startOfPageElement); std::string rotationField = _page->getObjectContent().substr(startOfPageElement, endOfElement - startOfPageElement);
std::string numbers("1234567890"); std::string numbers("1234567890");
unsigned int startOfNumber = rotationField.find_first_of(numbers); unsigned int startOfNumber = rotationField.find_first_of(numbers);
if( startOfNumber > 0 ) if( startOfNumber > 0 )
{ {
if( rotationField[startOfNumber-1] == '-' ) if( rotationField[startOfNumber-1] == '-' )
{ {
startOfNumber--; // negative number startOfNumber--; // negative number
} }
} }
unsigned int endOfNumber = rotationField.find_first_not_of(numbers, startOfNumber + 1); unsigned int endOfNumber = rotationField.find_first_not_of(numbers, startOfNumber + 1);
std::string rotationStr = rotationField.substr(startOfNumber, endOfNumber - startOfNumber + 1); std::string rotationStr = rotationField.substr(startOfNumber, endOfNumber - startOfNumber + 1);
int rotation = 0; int rotation = 0;
std::stringstream strin(rotationStr); std::stringstream strin(rotationStr);
strin>>rotation; strin>>rotation;
_basePage.setRotation(rotation); _basePage.setRotation(rotation);
} }
virtual void _changeObjectContent(unsigned int startOfPageElement) {}; virtual void _changeObjectContent(unsigned int startOfPageElement) {};
//members //members
Page & _basePage; Page & _basePage;
}; };
} }
#endif #endif

@ -1,43 +1,43 @@
#include "RunLengthDecode.h" #include "RunLengthDecode.h"
using namespace merge_lib; using namespace merge_lib;
/* The encoded data is a sequence of /* The encoded data is a sequence of
runs, where each run consists of a length byte followed by 1 to 128 bytes of data. If runs, where each run consists of a length byte followed by 1 to 128 bytes of data. If
the length byte is in the range 0 to 127, the following length + 1 (1 to 128) bytes the length byte is in the range 0 to 127, the following length + 1 (1 to 128) bytes
are copied literally during decompression. If length is in the range 129 to 255, the are copied literally during decompression. If length is in the range 129 to 255, the
following single byte is to be copied 257 - length (2 to 128) times during decom- following single byte is to be copied 257 - length (2 to 128) times during decom-
pression. A length value of 128 denotes EOD. */ pression. A length value of 128 denotes EOD. */
// Function performs RunLength Decoder for PDF, very simple // Function performs RunLength Decoder for PDF, very simple
bool RunLengthDecode::decode(std::string & encoded) bool RunLengthDecode::decode(std::string & encoded)
{ {
std::string decoded; std::string decoded;
for(unsigned enci = 0;enci < encoded.size();) for(unsigned enci = 0;enci < encoded.size();)
{ {
unsigned char c = encoded[enci++]; unsigned char c = encoded[enci++];
if( c == 128 ) if( c == 128 )
{ {
break; //EOD break; //EOD
} }
else if( c < 128 ) else if( c < 128 )
{ {
for(int j = 0; j < (c+1);j++) for(int j = 0; j < (c+1);j++)
{ {
decoded.append(1,encoded[enci]); decoded.append(1,encoded[enci]);
} }
enci++; enci++;
} }
else else
{ {
for(int j = 0; j < (257 - c);j++) for(int j = 0; j < (257 - c);j++)
{ {
decoded.append(1,encoded[enci]); decoded.append(1,encoded[enci]);
} }
enci++; enci++;
} }
} }
return true; return true;
} }

@ -1,22 +1,22 @@
#ifndef RunLengthDecode_H #ifndef RunLengthDecode_H
#define RunLengthDecode_H #define RunLengthDecode_H
#include <string> #include <string>
#include "Decoder.h" #include "Decoder.h"
namespace merge_lib namespace merge_lib
{ {
// this class provides method for RunLengthDecode aaaaaencoding and decoding // this class provides method for RunLengthDecode aaaaaencoding and decoding
class RunLengthDecode : public Decoder class RunLengthDecode : public Decoder
{ {
public: public:
RunLengthDecode(){}; RunLengthDecode(){};
virtual ~RunLengthDecode(){}; virtual ~RunLengthDecode(){};
bool encode(std::string & decoded){return false;} bool encode(std::string & decoded){return false;}
bool decode(std::string & encoded); bool decode(std::string & encoded);
void initialize(Object * objectWithStream){}; void initialize(Object * objectWithStream){};
}; };
} }
#endif // FLATEDECODE_H_INCLUDED #endif // FLATEDECODE_H_INCLUDED

@ -1,331 +1,331 @@
/* /*
The file defines some classes for transformation of PDF content stream. The file defines some classes for transformation of PDF content stream.
*/ */
#ifndef TRANSFORMATION_H #ifndef TRANSFORMATION_H
#define TRANSFORMATION_H #define TRANSFORMATION_H
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <math.h> #include <math.h>
#include <vector> #include <vector>
#include <iostream> #include <iostream>
#include "Utils.h" #include "Utils.h"
namespace merge_lib namespace merge_lib
{ {
#ifndef M_PI #ifndef M_PI
#define M_PI 3.14159265358979323846 #define M_PI 3.14159265358979323846
#endif #endif
class TransformationMatrix class TransformationMatrix
{ {
public: public:
TransformationMatrix(double a = 1, double b = 0, double c = 0, double d = 1, double e = 0, double f = 0): TransformationMatrix(double a = 1, double b = 0, double c = 0, double d = 1, double e = 0, double f = 0):
_a(a), _b(b), _c(c), _d(d), _e(e), _f(f) _a(a), _b(b), _c(c), _d(d), _e(e), _f(f)
{} {}
TransformationMatrix(const TransformationMatrix & copy) TransformationMatrix(const TransformationMatrix & copy)
{ {
setParameters(copy._a, copy._b, copy._c, copy._d, copy._e, copy._f); setParameters(copy._a, copy._b, copy._c, copy._d, copy._e, copy._f);
} }
void setParameters(double a, double b, double c, double d, double e, double f) void setParameters(double a, double b, double c, double d, double e, double f)
{ {
_a = Utils::normalizeValue(a); _a = Utils::normalizeValue(a);
_b = Utils::normalizeValue(b); _b = Utils::normalizeValue(b);
_c = Utils::normalizeValue(c); _c = Utils::normalizeValue(c);
_d = Utils::normalizeValue(d); _d = Utils::normalizeValue(d);
_e = Utils::normalizeValue(e); _e = Utils::normalizeValue(e);
_f = Utils::normalizeValue(f); _f = Utils::normalizeValue(f);
} }
void add(const TransformationMatrix & tm) void add(const TransformationMatrix & tm)
{ {
double newA = _a*tm._a + _b*tm._c; double newA = _a*tm._a + _b*tm._c;
double newB = _a*tm._b + _b*tm._d; double newB = _a*tm._b + _b*tm._d;
double newC = _c*tm._a + _d*tm._c; double newC = _c*tm._a + _d*tm._c;
double newD = _c*tm._b + _d*tm._d; double newD = _c*tm._b + _d*tm._d;
double newE = _e*tm._a + _f*tm._c + tm._e; double newE = _e*tm._a + _f*tm._c + tm._e;
double newF = _e*tm._b + _f*tm._d + tm._f; double newF = _e*tm._b + _f*tm._d + tm._f;
// we need to round the values to avoid not-needed transformation // we need to round the values to avoid not-needed transformation
// since 1.e-17 is not 0 from PDF point of view, while such double // since 1.e-17 is not 0 from PDF point of view, while such double
// value really means 0. // value really means 0.
_a = Utils::normalizeValue(newA); _a = Utils::normalizeValue(newA);
_b = Utils::normalizeValue(newB); _b = Utils::normalizeValue(newB);
_c = Utils::normalizeValue(newC); _c = Utils::normalizeValue(newC);
_d = Utils::normalizeValue(newD); _d = Utils::normalizeValue(newD);
_e = Utils::normalizeValue(newE); _e = Utils::normalizeValue(newE);
_f = Utils::normalizeValue(newF); _f = Utils::normalizeValue(newF);
} }
std::string getValue() std::string getValue()
{ {
std::ostringstream value; std::ostringstream value;
value << "[ " << _a << " " << _b << " " << _c << " " << _d << " " << _e << " " << _f << " ]\n"; value << "[ " << _a << " " << _b << " " << _c << " " << _d << " " << _e << " " << _f << " ]\n";
return value.str(); return value.str();
} }
std::string getCMT() std::string getCMT()
{ {
std::ostringstream buf; std::ostringstream buf;
buf << std::fixed << _a <<" "<< _b <<" "<< _c <<" "<< _d << " "<< _e << " "<< _f << " cm\n"; buf << std::fixed << _a <<" "<< _b <<" "<< _c <<" "<< _d << " "<< _e << " "<< _f << " cm\n";
return buf.str(); return buf.str();
} }
void recalculateCoordinates(double & x, double &y) void recalculateCoordinates(double & x, double &y)
{ {
double inputX = x; double inputX = x;
double inputY = y; double inputY = y;
x = _a*inputX + _c*inputY + _e; x = _a*inputX + _c*inputY + _e;
y = _b*inputX + _d*inputY + _f; y = _b*inputX + _d*inputY + _f;
} }
private: private:
double _a, _b, _c, _d, _e, _f; double _a, _b, _c, _d, _e, _f;
}; };
// base class of transformation CMT // base class of transformation CMT
class Transformation class Transformation
{ {
public: public:
Transformation(): _tm(){}; Transformation(): _tm(){};
virtual Transformation * getClone() const = 0; virtual Transformation * getClone() const = 0;
std::string getCMT() std::string getCMT()
{ {
return _tm.getCMT(); return _tm.getCMT();
} }
virtual ~Transformation() {}; virtual ~Transformation() {};
const TransformationMatrix & getMatrix() const TransformationMatrix & getMatrix()
{ {
return _tm; return _tm;
} }
void addMatrix(const TransformationMatrix & tm) void addMatrix(const TransformationMatrix & tm)
{ {
_tm.add(tm); _tm.add(tm);
} }
protected: protected:
TransformationMatrix _tm; TransformationMatrix _tm;
}; };
// rotation CMT // rotation CMT
class Rotation: public Transformation class Rotation: public Transformation
{ {
public: public:
Rotation(double angle):Transformation(),_angle(angle) Rotation(double angle):Transformation(),_angle(angle)
{ {
double cosValue = cos(_angle * (M_PI / 180)); double cosValue = cos(_angle * (M_PI / 180));
double sinValue = sin(_angle * (M_PI / 180)); double sinValue = sin(_angle * (M_PI / 180));
_tm.setParameters(cosValue, sinValue, -sinValue, cosValue, 0, 0); _tm.setParameters(cosValue, sinValue, -sinValue, cosValue, 0, 0);
}; };
virtual ~Rotation(){}; virtual ~Rotation(){};
virtual Transformation * getClone() const virtual Transformation * getClone() const
{ {
return new Rotation(_angle); return new Rotation(_angle);
} }
protected: protected:
double _angle; // number of degrees to rotate double _angle; // number of degrees to rotate
}; };
// translation CMT // translation CMT
class Translation: public Transformation class Translation: public Transformation
{ {
public: public:
Translation(double x, double y):Transformation(),_x(x),_y(y) Translation(double x, double y):Transformation(),_x(x),_y(y)
{ {
_tm.setParameters(1, 0, 0, 1, _x, _y); _tm.setParameters(1, 0, 0, 1, _x, _y);
}; };
virtual ~Translation(){}; virtual ~Translation(){};
virtual Transformation * getClone() const virtual Transformation * getClone() const
{ {
return new Translation(_x, _y); return new Translation(_x, _y);
} }
protected: protected:
double _x; double _x;
double _y; double _y;
}; };
// scaling CMT // scaling CMT
class Scaling: public Transformation class Scaling: public Transformation
{ {
public: public:
Scaling(double x):Transformation(),_x(x) Scaling(double x):Transformation(),_x(x)
{ {
_tm.setParameters(_x, 0, 0, _x, 0, 0); _tm.setParameters(_x, 0, 0, _x, 0, 0);
}; };
virtual Transformation * getClone() const virtual Transformation * getClone() const
{ {
return new Scaling(_x); return new Scaling(_x);
} }
protected: protected:
double _x; // the value to multiply the content double _x; // the value to multiply the content
}; };
// transformation can consist of one or several // transformation can consist of one or several
// operations like rotation, scaling, translation // operations like rotation, scaling, translation
typedef std::vector<Transformation *> PageTransformations; typedef std::vector<Transformation *> PageTransformations;
// This is interface class for setting transformation parameters // This is interface class for setting transformation parameters
// //
class TransformationDescription class TransformationDescription
{ {
public: public:
TransformationDescription( double x = 0, // leftBottomX coordinate TransformationDescription( double x = 0, // leftBottomX coordinate
double y = 0, // leftBottomY coordinate double y = 0, // leftBottomY coordinate
double scale = 1, // scale (by default = 1 = NONE double scale = 1, // scale (by default = 1 = NONE
int angel = 0): // rotation (0,90,180,270) int angel = 0): // rotation (0,90,180,270)
_x(x),_y(y),_scale(scale),_angel(angel) _x(x),_y(y),_scale(scale),_angel(angel)
{ {
if( _angel ) if( _angel )
{ {
_transforms.push_back(new Rotation(_angel)); _transforms.push_back(new Rotation(_angel));
} }
if( !Utils::doubleEquals(_scale,1) && !Utils::doubleEquals(_scale,0) ) if( !Utils::doubleEquals(_scale,1) && !Utils::doubleEquals(_scale,0) )
{ {
_transforms.push_back(new Scaling(_scale)); _transforms.push_back(new Scaling(_scale));
} }
} }
virtual ~TransformationDescription() virtual ~TransformationDescription()
{ {
for(size_t i = 0;i<_annotsTransforms.size();i++) for(size_t i = 0;i<_annotsTransforms.size();i++)
{ {
if( _annotsTransforms[i] ) if( _annotsTransforms[i] )
{ {
delete _annotsTransforms[i]; delete _annotsTransforms[i];
_annotsTransforms[i] = 0; _annotsTransforms[i] = 0;
} }
_annotsTransforms.clear(); _annotsTransforms.clear();
} }
for(size_t i = 0;i<_transforms.size();i++) for(size_t i = 0;i<_transforms.size();i++)
{ {
if( _transforms[i] ) if( _transforms[i] )
{ {
delete _transforms[i]; delete _transforms[i];
_transforms[i] = 0; _transforms[i] = 0;
} }
} }
_transforms.clear(); _transforms.clear();
} }
void addRotation(int rotation) void addRotation(int rotation)
{ {
if( rotation ) if( rotation )
{ {
_angel = (_angel - rotation)%360; _angel = (_angel - rotation)%360;
// /Rotation rotate the object, while _angel rotate the coordinate system // /Rotation rotate the object, while _angel rotate the coordinate system
// where object is located, that's why // where object is located, that's why
// we should compensate that // we should compensate that
_transforms.push_back(new Rotation(360-rotation)); _transforms.push_back(new Rotation(360-rotation));
} }
} }
const PageTransformations & getTransformations() const const PageTransformations & getTransformations() const
{ {
return _transforms; return _transforms;
} }
const PageTransformations getAnnotsTransformations() const const PageTransformations getAnnotsTransformations() const
{ {
PageTransformations trans; PageTransformations trans;
trans = _transforms; trans = _transforms;
for(size_t i = 0; i < _annotsTransforms.size(); ++i) for(size_t i = 0; i < _annotsTransforms.size(); ++i)
{ {
trans.push_back(_annotsTransforms[i]); trans.push_back(_annotsTransforms[i]);
} }
return trans; return trans;
} }
void addAnnotsTransformation( Transformation & trans ) void addAnnotsTransformation( Transformation & trans )
{ {
_annotsTransforms.push_back(trans.getClone()); _annotsTransforms.push_back(trans.getClone());
} }
// method recalculates the final translation in order to put // method recalculates the final translation in order to put
// object into needed x,y coordinates. // object into needed x,y coordinates.
// Page is located from position 0,0 // Page is located from position 0,0
void recalculateTranslation(double width, double height) void recalculateTranslation(double width, double height)
{ {
double dx1 = 0; double dx1 = 0;
double dy1 = 0; double dy1 = 0;
double scaling = ( Utils::doubleEquals(_scale,0))?1:_scale; double scaling = ( Utils::doubleEquals(_scale,0))?1:_scale;
switch(_angel) switch(_angel)
{ {
case 0: case 0:
dx1 = _x/scaling; dx1 = _x/scaling;
dy1 = _y/scaling; dy1 = _y/scaling;
break; break;
case -270: case -270:
case 90: case 90:
dx1 = _y/scaling ; dx1 = _y/scaling ;
dy1 = - _x /scaling - height; dy1 = - _x /scaling - height;
break; break;
case 180: case 180:
case -180: case -180:
dx1 = - _x /scaling - width; dx1 = - _x /scaling - width;
dy1 = - _y /scaling - height; dy1 = - _y /scaling - height;
break; break;
case 270: case 270:
case -90: case -90:
dx1 = - _y/scaling - width; dx1 = - _y/scaling - width;
dy1 = _x/scaling; dy1 = _x/scaling;
break; break;
default: default:
std::cerr<<"Unsupported rotation parameter"<<_angel<<std::endl; std::cerr<<"Unsupported rotation parameter"<<_angel<<std::endl;
break; break;
} }
//std::cerr<< "dx1 = "<<dx1<<"dy1 = "<<dy1<<std::endl; //std::cerr<< "dx1 = "<<dx1<<"dy1 = "<<dy1<<std::endl;
if( ! (Utils::doubleEquals(dx1,0) && Utils::doubleEquals(dy1,0)) ) if( ! (Utils::doubleEquals(dx1,0) && Utils::doubleEquals(dy1,0)) )
{ {
// This translation is needed to put transformed content into // This translation is needed to put transformed content into
// desired coordinates // desired coordinates
_transforms.push_back(new Translation(dx1,dy1)); _transforms.push_back(new Translation(dx1,dy1));
} }
} }
TransformationDescription( const TransformationDescription & copy) TransformationDescription( const TransformationDescription & copy)
{ {
*this = copy; *this = copy;
} }
TransformationDescription& operator = (const TransformationDescription &copy) TransformationDescription& operator = (const TransformationDescription &copy)
{ {
if( this != &copy ) if( this != &copy )
{ {
for(size_t i = 0;i < copy._annotsTransforms.size();i++) for(size_t i = 0;i < copy._annotsTransforms.size();i++)
{ {
_annotsTransforms.push_back(copy._annotsTransforms[i]->getClone()); _annotsTransforms.push_back(copy._annotsTransforms[i]->getClone());
} }
for(size_t i = 0; i < copy._transforms.size(); ++i) for(size_t i = 0; i < copy._transforms.size(); ++i)
{ {
_transforms.push_back(copy._transforms[i]->getClone()); _transforms.push_back(copy._transforms[i]->getClone());
} }
_x = copy._x; _x = copy._x;
_y = copy._y; _y = copy._y;
_scale = copy._scale; _scale = copy._scale;
_angel = copy._angel; _angel = copy._angel;
} }
return *this; return *this;
} }
std::string getCMT() std::string getCMT()
{ {
std::stringstream content; std::stringstream content;
for(size_t i = 0;i<_transforms.size();i++) for(size_t i = 0;i<_transforms.size();i++)
{ {
content<<_transforms[i]->getCMT(); content<<_transforms[i]->getCMT();
} }
return content.str(); return content.str();
} }
private: private:
double _x; double _x;
double _y; double _y;
double _scale; double _scale;
int _angel; int _angel;
PageTransformations _transforms; PageTransformations _transforms;
PageTransformations _annotsTransforms; PageTransformations _annotsTransforms;
}; };
} }
#endif #endif

@ -1,30 +1,30 @@
#if !defined TypeElementHandler_h #if !defined TypeElementHandler_h
#define TypeElementHandler_h #define TypeElementHandler_h
#include "PageElementHandler.h" #include "PageElementHandler.h"
namespace merge_lib namespace merge_lib
{ {
class TypeElementHandler: public PageElementHandler class TypeElementHandler: public PageElementHandler
{ {
public: public:
TypeElementHandler(Object * page): PageElementHandler(page) TypeElementHandler(Object * page): PageElementHandler(page)
{ {
_setHandlerName("/Type"); _setHandlerName("/Type");
} }
private: private:
virtual void _changeObjectContent(unsigned int startOfPageElement) virtual void _changeObjectContent(unsigned int startOfPageElement)
{ {
std::string searchPattern("/Page"); std::string searchPattern("/Page");
unsigned int startOfPage = _pageContent.find(searchPattern, startOfPageElement); unsigned int startOfPage = _pageContent.find(searchPattern, startOfPageElement);
_page->eraseContent(startOfPage, searchPattern.size()); _page->eraseContent(startOfPage, searchPattern.size());
std::string xObject = " /XObject\n"; std::string xObject = " /XObject\n";
_page->insertToContent(startOfPage, xObject); _page->insertToContent(startOfPage, xObject);
static std::string subtype("/Subtype /Form\n"); static std::string subtype("/Subtype /Form\n");
_page->insertToContent(startOfPage + xObject.size(), subtype); _page->insertToContent(startOfPage + xObject.size(), subtype);
} }
}; };
} }
#endif #endif

@ -1,197 +1,197 @@
#include "Config.h" #include "Config.h"
#include "Utils.h" #include "Utils.h"
#include "Exception.h" #include "Exception.h"
#include <iostream> #include <iostream>
#include <cmath> #include <cmath>
#include <sstream> #include <sstream>
#include <fstream> #include <fstream>
#include <string.h> #include <string.h>
using namespace merge_lib; using namespace merge_lib;
int Utils::stringToInt(const std::string & str) //throw ConvertException int Utils::stringToInt(const std::string & str) //throw ConvertException
{ {
//skip zeros //skip zeros
unsigned int lastZero = 0;str.find_last_of("0"); unsigned int lastZero = 0;str.find_last_of("0");
while(str[lastZero++] == '0') while(str[lastZero++] == '0')
{ {
if(lastZero == str.size()) if(lastZero == str.size())
{ {
return 0; return 0;
} }
} }
//if lastZero = 1, then 0 is not first symbol //if lastZero = 1, then 0 is not first symbol
lastZero--; lastZero--;
if((str.size() > 1) && (lastZero != 0)) if((str.size() > 1) && (lastZero != 0))
{ {
//all number is zero, for ex. 00000000 //all number is zero, for ex. 00000000
std::string copy = str; std::string copy = str;
const std::string & cutedStr = copy.erase(0, lastZero); const std::string & cutedStr = copy.erase(0, lastZero);
return _stringToInt(cutedStr) ; return _stringToInt(cutedStr) ;
} }
else else
{ {
return _stringToInt(str); return _stringToInt(str);
} }
} }
double Utils::stringToDouble(const std::string & s ) double Utils::stringToDouble(const std::string & s )
{ {
std::istringstream i(s); std::istringstream i(s);
double x; double x;
if (!(i >> x)) if (!(i >> x))
//TODO or throw exception? Think about! //TODO or throw exception? Think about!
return 0; return 0;
return x; return x;
} }
std::string Utils::uIntToStr(unsigned int integer) std::string Utils::uIntToStr(unsigned int integer)
{ {
char str[10]; char str[10];
snprintf(str, sizeof(str), "%u", integer); snprintf(str, sizeof(str), "%u", integer);
return std::string(str); return std::string(str);
} }
std::string Utils::doubleToStr(double doubleValue) std::string Utils::doubleToStr(double doubleValue)
{ {
char str[16]; char str[16];
snprintf(str, sizeof(str), "%f", doubleValue); snprintf(str, sizeof(str), "%f", doubleValue);
return std::string(str); return std::string(str);
} }
int Utils::_stringToInt(const std::string & str) //throw ConvertException int Utils::_stringToInt(const std::string & str) //throw ConvertException
{ {
int intValue = atoi(str.c_str()); int intValue = atoi(str.c_str());
if(((intValue == 0) && (str.size() > 1)) || // for ex string = xxx and integer = 0 if(((intValue == 0) && (str.size() > 1)) || // for ex string = xxx and integer = 0
((intValue == 0) && (str[0] != '0'))) ((intValue == 0) && (str[0] != '0')))
{ {
throw Exception("Internal error"); throw Exception("Internal error");
} }
if((intValue != 0) && (static_cast<unsigned int>(std::log10(static_cast<double>(intValue))) + 1) != str.size()) //for ex. string = 5x and integer = 5) if((intValue != 0) && (static_cast<unsigned int>(std::log10(static_cast<double>(intValue))) + 1) != str.size()) //for ex. string = 5x and integer = 5)
{ {
throw Exception("Internal error"); throw Exception("Internal error");
} }
return intValue; return intValue;
} }
bool Utils::doubleEquals(const double left, const double right, const double epsilon) bool Utils::doubleEquals(const double left, const double right, const double epsilon)
{ {
return ( fabs (left - right) < epsilon); return ( fabs (left - right) < epsilon);
} }
double Utils::normalizeValue(double &val, const double epsilon ) double Utils::normalizeValue(double &val, const double epsilon )
{ {
if( Utils::doubleEquals(val,0)) if( Utils::doubleEquals(val,0))
{ {
val = 0; val = 0;
} }
return val; return val;
} }
unsigned long Utils::getFileSize(const char * fileName) unsigned long Utils::getFileSize(const char * fileName)
{ {
std::ifstream pdfFile; std::ifstream pdfFile;
pdfFile.open (fileName, std::ios::binary ); pdfFile.open (fileName, std::ios::binary );
if (pdfFile.fail()) if (pdfFile.fail())
{ {
std::stringstream errorMessage("File "); std::stringstream errorMessage("File ");
errorMessage << fileName << " is absent" << "\0"; errorMessage << fileName << " is absent" << "\0";
throw Exception(errorMessage); throw Exception(errorMessage);
} }
// get length of file: // get length of file:
pdfFile.seekg (0, std::ios::end); pdfFile.seekg (0, std::ios::end);
unsigned long length = pdfFile.tellg(); unsigned long length = pdfFile.tellg();
pdfFile.close(); pdfFile.close();
return length; return length;
} }
#ifdef DEBUG_VERBOSE #ifdef DEBUG_VERBOSE
#define TO_HEX_CHAR(char_c) (char_c)>9?'A'+(char_c)-10:'0'+(char_c); #define TO_HEX_CHAR(char_c) (char_c)>9?'A'+(char_c)-10:'0'+(char_c);
static void hex_dump_one_line(int numberChars_n, static void hex_dump_one_line(int numberChars_n,
const char *input_p, const char *input_p,
char *output_p) char *output_p)
{ {
int i; int i;
char* to_hex_p; char* to_hex_p;
char* to_char_p; char* to_char_p;
char char_c; char char_c;
/* Set pointer to the begining of hexadecimal area */ /* Set pointer to the begining of hexadecimal area */
to_hex_p=output_p; to_hex_p=output_p;
/* Set pointer to the begining of textual area */ /* Set pointer to the begining of textual area */
to_char_p=output_p+56; to_char_p=output_p+56;
/* Write spaces between hexadecimal and textual areas */ /* Write spaces between hexadecimal and textual areas */
memset(output_p+50,' ',6); memset(output_p+50,' ',6);
/* some kind of delimeter */ /* some kind of delimeter */
*(output_p+53) = '#'; *(output_p+53) = '#';
/* Print out the hex area */ /* Print out the hex area */
for (i = 0 ; i < 16 ; i++) for (i = 0 ; i < 16 ; i++)
{ {
/* Two spaces beetwen "four columns" */ /* Two spaces beetwen "four columns" */
if (!(i&3)) if (!(i&3))
{ {
*to_hex_p++=' '; *to_hex_p++=' ';
} }
/* One space between columns */ /* One space between columns */
*to_hex_p++=' '; *to_hex_p++=' ';
if (i < numberChars_n) if (i < numberChars_n)
{ {
/* Print out byte in hexadecimal form */ /* Print out byte in hexadecimal form */
*to_hex_p++=TO_HEX_CHAR((input_p[i]>>4)&0xF); *to_hex_p++=TO_HEX_CHAR((input_p[i]>>4)&0xF);
*to_hex_p++=TO_HEX_CHAR(input_p[i]&0xF); *to_hex_p++=TO_HEX_CHAR(input_p[i]&0xF);
/* Output the char */ /* Output the char */
char_c = input_p[i]&0xFF; char_c = input_p[i]&0xFF;
if ( char_c<0x20 || char_c>0x7E ) if ( char_c<0x20 || char_c>0x7E )
{ {
char_c = '.'; char_c = '.';
} }
*to_char_p=char_c; *to_char_p=char_c;
to_char_p++; to_char_p++;
} }
else else
{ {
*to_hex_p++=' '; *to_hex_p++=' ';
*to_hex_p++=' '; *to_hex_p++=' ';
*to_char_p++=' '; *to_char_p++=' ';
} }
} /* for */ } /* for */
} }
void trace_buffer(const void *buf, int len) void trace_buffer(const void *buf, int len)
{ {
char dump[160]; char dump[160];
int line_n = len/16; int line_n = len/16;
int rest_n = len- line_n*16; int rest_n = len- line_n*16;
int i; int i;
memset(dump,0,160); memset(dump,0,160);
printf(" length:%d\n",len); printf(" length:%d\n",len);
for (i = 0;i<line_n;i++) for (i = 0;i<line_n;i++)
{ {
hex_dump_one_line(16, hex_dump_one_line(16,
(char*)buf+(i*16), (char*)buf+(i*16),
dump); dump);
printf("%s\n",dump);; printf("%s\n",dump);;
} }
if ( rest_n) if ( rest_n)
{ {
hex_dump_one_line(rest_n, hex_dump_one_line(rest_n,
(char*)buf+(line_n*16), (char*)buf+(line_n*16),
dump); dump);
printf("%s\n",dump); printf("%s\n",dump);
} }
} }
#endif #endif

@ -1,32 +1,32 @@
#if !defined Utils_h #if !defined Utils_h
#define Utils_h #define Utils_h
#include "Config.h" #include "Config.h"
#include <stdlib.h> #include <stdlib.h>
#include <string> #include <string>
#include <time.h> #include <time.h>
#include <stdio.h> #include <stdio.h>
namespace merge_lib namespace merge_lib
{ {
class Utils class Utils
{ {
public: public:
static int stringToInt(const std::string & str); //throw ConvertException static int stringToInt(const std::string & str); //throw ConvertException
static std::string uIntToStr(unsigned int integer); static std::string uIntToStr(unsigned int integer);
static std::string doubleToStr(double doubleValue); static std::string doubleToStr(double doubleValue);
static double stringToDouble(const std::string & s ); static double stringToDouble(const std::string & s );
static bool doubleEquals(const double left,const double right, const double epsilon = +1.e-10); static bool doubleEquals(const double left,const double right, const double epsilon = +1.e-10);
static double normalizeValue(double &val,const double epsilon = +1.e-10); static double normalizeValue(double &val,const double epsilon = +1.e-10);
static unsigned long getFileSize(const char * fileName); static unsigned long getFileSize(const char * fileName);
private: private:
static int _stringToInt(const std::string & str); //throw ConvertException static int _stringToInt(const std::string & str); //throw ConvertException
static int _stringToInt(std::string & str); //throw ConvertException static int _stringToInt(std::string & str); //throw ConvertException
}; };
} }
#endif #endif

@ -1,63 +1,62 @@
INCLUDEPATH += "$$PWD/../../zlib/1.2.3/include"
HEADERS += \ HEADERS += \
AnnotsHandler.h \ AnnotsHandler.h \
ASCII85Decode.h \ ASCII85Decode.h \
ASCIIHexDecode.h \ ASCIIHexDecode.h \
CCITTFaxDecode.h \ CCITTFaxDecode.h \
Config.h \ Config.h \
ContentHandler.h \ ContentHandler.h \
DCTDecode.h \ DCTDecode.h \
Decoder.h \ Decoder.h \
Document.h \ Document.h \
Exception.h \ Exception.h \
FileIsAbsentException.h \ FileIsAbsentException.h \
Filter.h \ Filter.h \
FilterPredictor.h \ FilterPredictor.h \
FlateDecode.h \ FlateDecode.h \
JBIG2Decode.h \ JBIG2Decode.h \
LZWDecode.h \ LZWDecode.h \
MediaBoxElementHandler.h \ MediaBoxElementHandler.h \
MergePageDescription.h \ MergePageDescription.h \
Merger.h \ Merger.h \
Object.h \ Object.h \
Page.h \ Page.h \
PageElementHandler.h \ PageElementHandler.h \
PageParser.h \ PageParser.h \
Parser.h \ Parser.h \
Rectangle.h \ Rectangle.h \
RemoveHimSelfHandler.h \ RemoveHimSelfHandler.h \
RunLengthDecode.h \ RunLengthDecode.h \
Transformation.h \ Transformation.h \
TypeElementHandler.h \ TypeElementHandler.h \
Utils.h \ Utils.h \
AbstractBoxElementHandler.h \ AbstractBoxElementHandler.h \
CropBoxElementHandler.h \ CropBoxElementHandler.h \
OverlayDocumentParser.h \ OverlayDocumentParser.h \
RotationHandler.h RotationHandler.h
SOURCES += \ SOURCES += \
AnnotsHandler.cpp \ AnnotsHandler.cpp \
ASCII85Decode.cpp \ ASCII85Decode.cpp \
ASCIIHexDecode.cpp \ ASCIIHexDecode.cpp \
ContentHandler.cpp \ ContentHandler.cpp \
Document.cpp \ Document.cpp \
Filter.cpp \ Filter.cpp \
FilterPredictor.cpp \ FilterPredictor.cpp \
FlateDecode.cpp \ FlateDecode.cpp \
LZWDecode.cpp \ LZWDecode.cpp \
Merger.cpp \ Merger.cpp \
Object.cpp \ Object.cpp \
Page.cpp \ Page.cpp \
PageElementHandler.cpp \ PageElementHandler.cpp \
Parser.cpp \ Parser.cpp \
Rectangle.cpp \ Rectangle.cpp \
RemoveHimselfHandler.cpp \ RemoveHimselfHandler.cpp \
RunLengthDecode.cpp \ RunLengthDecode.cpp \
Utils.cpp \ Utils.cpp \
OverlayDocumentParser.cpp OverlayDocumentParser.cpp
macx { macx {
LIBS += -lz LIBS += -lz
Loading…
Cancel
Save