|
|
|
/*
|
|
|
|
* Copyright (C) 2012 Webdoc SA
|
|
|
|
*
|
|
|
|
* This file is part of Open-Sankoré.
|
|
|
|
*
|
|
|
|
* Open-Sankoré is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Library General Public
|
|
|
|
* License as published by the Free Software Foundation, version 2,
|
|
|
|
* with a specific linking exception for the OpenSSL project's
|
|
|
|
* "OpenSSL" library (or with modified versions of it that use the
|
|
|
|
* same license as the "OpenSSL" library).
|
|
|
|
*
|
|
|
|
* Open-Sankoré is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Library General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Library General Public
|
|
|
|
* License along with Open-Sankoré; if not, see
|
|
|
|
* <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#include "OverlayDocumentParser.h"
|
|
|
|
#include <fstream>
|
|
|
|
#include <string.h>
|
|
|
|
#include <QtGlobal>
|
|
|
|
#include "Exception.h"
|
|
|
|
#include "Object.h"
|
|
|
|
|
|
|
|
#include "core/memcheck.h"
|
|
|
|
|
|
|
|
using namespace merge_lib;
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
int OverlayDocumentParser::DOC_PART_WITH_START_OF_XREF = 30;
|
|
|
|
unsigned int partSize = 10485760; // = 10 Mb
|
|
|
|
|
|
|
|
Document * OverlayDocumentParser::parseDocument(const char * fileName)
|
|
|
|
{
|
|
|
|
_fileName = fileName;
|
|
|
|
return Parser::parseDocument(fileName);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void OverlayDocumentParser::_readXRefAndCreateObjects()
|
|
|
|
{
|
|
|
|
std::map<unsigned int, unsigned long> objectsAndPositions;
|
|
|
|
_readXref(objectsAndPositions);
|
|
|
|
std::map<unsigned int, unsigned long> objectsAndSizes;
|
|
|
|
std::map<unsigned int, unsigned long>::iterator objAndSIter;
|
|
|
|
std::map<unsigned int, unsigned long>::iterator objAndPIter;
|
|
|
|
unsigned long fileSize = Utils::getFileSize(_fileName.c_str());
|
|
|
|
|
|
|
|
for(objAndSIter = objectsAndPositions.begin(); objAndSIter != objectsAndPositions.end(); ++objAndSIter)
|
|
|
|
{
|
|
|
|
unsigned int nextPosition = fileSize;
|
|
|
|
for(objAndPIter = objectsAndPositions.begin(); objAndPIter != objectsAndPositions.end(); ++objAndPIter)
|
|
|
|
{
|
|
|
|
if((objAndPIter->second > objAndSIter->second) && (objAndPIter->second < nextPosition))
|
|
|
|
nextPosition = objAndPIter->second;
|
|
|
|
}
|
|
|
|
objectsAndSizes[objAndSIter->first] = nextPosition - objAndSIter->second;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool notEndOfFile = true;
|
|
|
|
do
|
|
|
|
{
|
|
|
|
unsigned long partStart = fileSize;
|
|
|
|
std::map<unsigned int, unsigned long>::iterator objIter;
|
|
|
|
for(objIter = objectsAndPositions.begin(); objIter != objectsAndPositions.end(); ++objIter)
|
|
|
|
{
|
|
|
|
if(objIter->second < partStart)
|
|
|
|
partStart = objIter->second;
|
|
|
|
}
|
|
|
|
unsigned long nextPartStart = partStart + partSize;
|
|
|
|
|
|
|
|
if((nextPartStart) < fileSize)
|
|
|
|
_getPartOfFileContent(partStart, partSize);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
_getPartOfFileContent(partStart, fileSize - partStart);
|
|
|
|
nextPartStart = fileSize;
|
|
|
|
notEndOfFile = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
for(objIter = objectsAndPositions.begin(); objIter != objectsAndPositions.end(); )
|
|
|
|
{
|
|
|
|
if((objectsAndSizes[objIter->first] + objIter->second <= nextPartStart) && (objIter->second >= partStart) && ((objIter->second < nextPartStart)))
|
|
|
|
{
|
|
|
|
std::pair<unsigned int, unsigned int> streamBounds;
|
|
|
|
unsigned int objectNumber;
|
|
|
|
unsigned int generationNumber;
|
|
|
|
bool hasObjectStream;
|
|
|
|
const std::string content = _getObjectContent(objIter->second - partStart, objectNumber, generationNumber, streamBounds, hasObjectStream);
|
|
|
|
streamBounds.first += partStart;
|
|
|
|
streamBounds.second += partStart;
|
|
|
|
Object * newObject = new Object(objectNumber, generationNumber, content, _document->_documentName ,streamBounds, hasObjectStream);
|
|
|
|
_objects[objectNumber] = newObject;
|
|
|
|
std::map<unsigned int, unsigned long>::iterator temp = objIter;
|
|
|
|
++objIter;
|
|
|
|
objectsAndPositions.erase(temp);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
++objIter;
|
|
|
|
}
|
|
|
|
partStart = nextPartStart;
|
|
|
|
}
|
|
|
|
while(notEndOfFile);
|
|
|
|
}
|
|
|
|
|
|
|
|
void OverlayDocumentParser::_getFileContent(const char * fileName)
|
|
|
|
{
|
|
|
|
Q_UNUSED(fileName);
|
|
|
|
}
|
|
|
|
|
|
|
|
void OverlayDocumentParser::_getPartOfFileContent(long startOfPart, unsigned int length)
|
|
|
|
{
|
|
|
|
ifstream pdfFile;
|
|
|
|
pdfFile.open (_fileName.c_str(), ios::binary );
|
|
|
|
if (pdfFile.fail())
|
|
|
|
{
|
|
|
|
stringstream errorMessage("File ");
|
|
|
|
errorMessage << _fileName << " is absent" << "\0";
|
|
|
|
throw Exception(errorMessage);
|
|
|
|
}
|
|
|
|
ios_base::seekdir dir;
|
|
|
|
if(startOfPart >= 0)
|
|
|
|
dir = ios_base::beg;
|
|
|
|
else
|
|
|
|
dir = ios_base::end;
|
|
|
|
pdfFile.seekg (startOfPart, dir);
|
|
|
|
_fileContent.resize(length);
|
|
|
|
pdfFile.read(&_fileContent[0], length);
|
|
|
|
pdfFile.close();
|
|
|
|
}
|
|
|
|
|
|
|
|
void OverlayDocumentParser::_readXref(std::map<unsigned int, unsigned long> & objectsAndSizes)
|
|
|
|
{
|
|
|
|
_getPartOfFileContent(- DOC_PART_WITH_START_OF_XREF, DOC_PART_WITH_START_OF_XREF);
|
|
|
|
unsigned int startOfStartxref = _fileContent.find("startxref");
|
|
|
|
unsigned int startOfNumber = _fileContent.find_first_of(Parser::NUMBERS, startOfStartxref);
|
|
|
|
unsigned int endOfNumber = _fileContent.find_first_not_of(Parser::NUMBERS, startOfNumber + 1);
|
|
|
|
std::string startXref = _fileContent.substr(startOfNumber, endOfNumber - startOfNumber);
|
|
|
|
unsigned int strtXref = Utils::stringToInt(startXref);
|
|
|
|
|
|
|
|
unsigned int sizeOfXref = Utils::getFileSize(_fileName.c_str()) - strtXref;
|
|
|
|
_getPartOfFileContent(strtXref, sizeOfXref);
|
|
|
|
unsigned int leftBoundOfObjectNumber = _fileContent.find("0 ") + strlen("0 ");
|
|
|
|
unsigned int rightBoundOfObjectNumber = _fileContent.find_first_not_of(Parser::NUMBERS, leftBoundOfObjectNumber);
|
|
|
|
std::string objectNuberStr = _fileContent.substr(leftBoundOfObjectNumber, rightBoundOfObjectNumber - leftBoundOfObjectNumber);
|
|
|
|
unsigned long objectNumber = Utils::stringToInt(objectNuberStr);
|
|
|
|
unsigned int startOfObjectPosition = _fileContent.find("0000000000 65535 f ") + strlen("0000000000 65535 f ");
|
|
|
|
for(unsigned long i = 1; i < objectNumber; ++i)
|
|
|
|
{
|
|
|
|
startOfObjectPosition = _fileContent.find_first_of(Parser::NUMBERS, startOfObjectPosition);
|
|
|
|
unsigned int endOfObjectPostion = _fileContent.find(" 00000 n", startOfObjectPosition);
|
|
|
|
std::string objectPostionStr = _fileContent.substr(startOfObjectPosition, endOfObjectPostion - startOfObjectPosition);
|
|
|
|
objectsAndSizes[i] = Utils::stringToInt(objectPostionStr);
|
|
|
|
startOfObjectPosition = endOfObjectPostion + strlen(" 00000 n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned int OverlayDocumentParser::_readTrailerAndReturnRoot()
|
|
|
|
{
|
|
|
|
_getPartOfFileContent(- (3*DOC_PART_WITH_START_OF_XREF), (3*DOC_PART_WITH_START_OF_XREF));
|
|
|
|
return Parser::_readTrailerAndReturnRoot();
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned int OverlayDocumentParser::_getStartOfXrefWithRoot()
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|