PDF Page Count AlgorithmDecember 2004It's not often that you can't find coding examples on the internet to do basic programming tasks, so I was surprised when I couldn't find code to determine the page count of PDF documents.I've had to return to my old hacking ways (ie 'hacking' in its original sense). I visited my favorite hacking website - www.wotsit.org, downloaded the PDF document specifications and got coding.It didn't take too long to construct a reasonably efficient algorithm which does what I want. I've thrown a few PDFs at it over the last couple of days so I hope I've squashed most of the bugs.Update 22-Dec-2005:Yes, I did find a couple of minor bugs which have now been corrected.
Code snippet ...
type
PPdfObj = ^TPdfObj;
TPdfObj = record
number,
offset: integer;
end;
function GetPdfPageCount(const filename: string): integer;
var
ms: TMemoryStream;
k, cnt, pagesNum, rootNum: integer;
p, p2: pchar;
PdfObj: PPdfObj;
PdfObjList: TList;
function GetNumber(out num: integer): boolean;
var
tmpStr: string;
begin
tmpStr := '';
while p^ < #33 do inc(p);
while (p^ in ['0'..'9']) do
begin
tmpStr := tmpStr + p^;
inc(p);
end;
result := tmpStr <> '';
if not result then exit;
num := strtoint(tmpStr);
end;
function IsString(const str: string): boolean;
var
len: integer;
begin
len := length(str);
result := CompareMem( p, pchar(str), len);
inc(p, len);
end;
function FindStrInDict(const str: string): boolean;
begin
result := false;
while not result do
begin
while (p^ <> '>') and (p^ <> str[1]) do inc(p);
if (p^ = '>') then
begin
inc(p);
if (p^ = '>') then exit else continue;
end;
result := IsString(str);
end;
end;
begin
result := -1;
try
ms := TMemoryStream.Create;
PdfObjList := TList.Create;
screen.Cursor := crHourGlass;
application.ProcessMessages;
try
ms.LoadFromFile(filename);
p := pchar(ms.Memory) + ms.Size -5;
p2 := pchar(ms.Memory);
repeat
while (p > p2) and (p^ <> 'f') do dec(p);
if (p = p2) then exit;
if StrLComp( (p-8), 'startxref', 9) = 0 then break;
dec(p);
until false;
inc(p);
rootNum := -1;
if not GetNumber(k) then exit;
p := pchar(ms.Memory) + k +4;
while true do
begin
if not GetNumber(k) then exit;
if not GetNumber(cnt) then exit;
while not (p^ in ['0'..'9']) do inc(p);
p2 := p;
for cnt := 0 to cnt-1 do
begin
new(PdfObj);
PdfObjList.Add(PdfObj);
PdfObj.number := k + cnt;
if not GetNumber(PdfObj.offset) then exit;
inc(p2,20);
p := p2;
end;
if p^ in ['0'..'9'] then continue;
if not IsString('trailer') then exit;
p2 := p;
if (rootNum = -1) and FindStrInDict('/Root') then
if not GetNumber(rootNum) then exit;
p := p2;
if not FindStrInDict('/Prev') then break;
if not GetNumber(k) then exit;
p := pchar(ms.Memory) + k +4;
end;
if rootNum < 0 then exit;
k := 0;
while k < PdfObjList.Count do
if PPdfObj(PdfObjList[k]).number = rootNum then
break else
inc(k);
if k = PdfObjList.Count then exit;
p := pchar(ms.Memory) + PPdfObj(PdfObjList[k]).offset;
if not GetNumber(k) or (k <> rootNum) then exit;
if not FindStrInDict('/Pages') then exit;
if not GetNumber(pagesNum) then exit;
k := 0;
while k < PdfObjList.Count do
if PPdfObj(PdfObjList[k]).number = pagesNum then
break else
inc(k);
if k = PdfObjList.Count then exit;
p := pchar(ms.Memory) + PPdfObj(PdfObjList[k]).offset;
if not GetNumber(k) or (k <> pagesNum) then exit;
if not FindStrInDict('/Count') then exit;
if not GetNumber(cnt) then exit;
if GetNumber(k) and IsString(' R') then
begin
k := 0;
while k < PdfObjList.Count do
if PPdfObj(PdfObjList[k]).number = cnt then break else inc(k);
if k = PdfObjList.Count then exit;
p := pchar(ms.Memory) + PPdfObj(PdfObjList[k]).offset;
if not GetNumber(k) or
not GetNumber(k) or
not IsString(' obj') or
not GetNumber(cnt) then exit;
end;
result := cnt;
finally
screen.Cursor := crDefault;
for k := 0 to PdfObjList.Count -1 do
dispose(PPdfObj(PdfObjList[k]));
PdfObjList.Free;
ms.Free;
end;
except
end;
end;
转载请注明原文地址: https://ibbs.8miu.com/read-7870.html