delphi做一个本地网页的搜索引擎。
思路:1、建立本地网页的索引;2、在索引里面搜索关键字;3输出查找的结果。
第一步、检索因。
procedure TWebthread.GetFileList(AStrings: TStrings; ASourFile,//查找本地网页 FileName: string);var sour_path,sour_file: string; FileRec, subFileRec:TSearchrec; i,n: Integer; web:string;begin try n := 0; if rightStr(trim(ASourFile), 1) <> '/' then sour_path :=trim(ASourFile) + '/' else sour_path :=trim(ASourFile); sour_file:= FileName;
if not DirectoryExists(sour_path) then begin AStrings.Clear; exit; end;
if FindFirst(sour_path+'*.*',faAnyfile,FileRec) = 0 then repeat if ((FileRec.Attr and faDirectory) <> 0) then begin if ((FileRec.Name<> '.') and (FileRec.Name <> '..')) then GetFileList(AStrings, sour_path+ FileRec.Name + '/', sour_file); end else if FindFirst(sour_path + FileName,faAnyfile,subFileRec) = 0 then repeat if ((subFileRec.Attr and faDirectory) = 0)and(pos(sour_path+subFileRec.Name,TmpList.Text)=0) then begin path:=sour_path+subFileRec.Name; Gettitle(path); Postdata(path, Gettitle(path),' *'); end; until FindNext(subFileRec)<>0;
until FindNext(FileRec)<>0;
SysUtils.FindClose(FileRec); except
end;end;
function TWebthread.Gettitle(ml: string): string;vari,j ,n,p:Integer;s,m,s1 :WideString;begin Fupdate.memo2.Lines.Clear; Fupdate.memo2.Lines.LoadFromFile(ml); s :=Memo2.Text; s1 := '';
//=============查找网页标题=============================== i:=pos('<title>',s); if i =0 then i := pos('<TITLE>',s); s:=copy(s,i+7,length(s)-i-7);// j:=pos('</title>',s); if j =0 then j:=pos('</TITLE>',s); m:=copy(s,1,j-1);// Result := m; //=============查找网页内容=============================== n := pos('id=zoom>',s)+8; if n-8 >0 then s1 := rightstr(s,length(s)-n); if s1 <>'' then p := pos('</TD>',s1); Content := Trim(UpperCase(leftstr(s1,p))); if length(Content)>300 then Content := leftstr(Content,300); //=============过滤关键字=============================== Content := AnsiReplaceStr(Content,'<',''); Content := AnsiReplaceStr(Content,'/',''); Content := AnsiReplaceStr(Content,'>',''); Content := AnsiReplaceStr(Content,'BR',''); Content := AnsiReplaceStr(Content,'P',''); Content := AnsiReplaceStr(Content,'TD',''); Content := AnsiReplaceStr(Content,'TR',''); Content := AnsiReplaceStr(Content,'NBS;',''); Content := AnsiReplaceStr(Content,'NBSP;',''); Content := AnsiReplaceStr(Content,'&',''); if trim(Content)='' then Content := '*';end;
procedure TWebthread.Postdata(temp, temp1, temp3: string);//索引保存数据库begin try temp := UpperCase(temp); if (temp<>'') then if (temp1<>'')then begin temp := rightstr(temp,length(temp)-pos('KNOWLEDGEB',temp)); with fupdate.ADOQpub do begin close; sql.Clear; sql.Add('Insert into web(path,title,Content) values(:a1,:a2,:a3)'); parameters.ParamByName('a1').Value := temp; parameters.ParamByName('a2').Value := temp1; parameters.ParamByName('a3').Value := Content; execsql; end; end; except end; end;
第二步、查找多个关键字(多个关键字要空格隔开)
procedure TMainForm.SpeedButton1Click(Sender: TObject);var Key,path,title,Content:string; sqltext:string; total,pagesize,pagecount:integer; n,m:integer;begin total:=0; pagesize:=5; n := 0; m:= 0; webhtml := ''; if trim(Edit2.Text)='' then begin Messagebox(handle,'关键字不能为空!','系统提示',mb_iconinformation+mb_ok); Edit2.SetFocus; exit; end;
//=======================查找多个关键字========================
if pos(' ',Trim(edit2.Text))<=0 then sqltext := 'select * from web where title like'+quotedstr('%'+Trim(Edit2.Text)+'%') else begin key := Edit2.Text; sqltext := 'select * from web where title like '+quotedstr('%'+leftstr(Trim(key),pos(' ',Trim(key))-1)+'%'); key := rightstr(trim(key),length(Trim(key))-pos(' ',Trim(key))); while pos(' ',Trim(key))>0 do begin sqltext := sqltext+' and title like '+quotedstr('%'+leftstr(Trim(key),pos(' ',Trim(key))-1)+'%'); key := rightstr(trim(key),length(Trim(key))-pos(' ',Trim(key))); next; end; sqltext := sqltext + ' and title like '+quotedstr('%'+Trim(key)+'%'); end;
//============查找建立的索引表============================================ try screen.Cursor := crhourglass; try with ADOQPUB do begin close; sql.Clear; sql.Text := sqltext; open; total := Recordcount;//查找到多少条记录 if (total mod pagesize)=0 then pagecount := (total div pagesize) else pagecount := (total div pagesize)+1;//可以输出几个页面 first; while not eof do begin inc(n); path := fieldbyname('path').AsString;//相对路径 path :=ExtractFiledir(ExtractFIleDir(ExtractFileDir(application.ExeName)))+'/'+ path ;//绝对路径 title := Trim(fieldbyname('title').AsString);//网页标题 Content := Trim(fieldbyname('Content').AsString);//网页内容 if trim(Content)='*'then Content :=''; if trim(title)<>'' then begin if trim(path)<>'' then begin webhtml := webhtml+outcontent(title, Content,path);//以下为生成的类似百度的查找结果 if ((n mod pagesize)=0) then begin memo1.Lines.Append(outtitle(total,pagecount)); memo1.Lines.Append(outbody(total, pagesize)); memo1.Lines.Append(webhtml); memo1.Lines.Append(outend()); memo1.Lines.SaveToFile(ExtractFiledir(ExtractFIleDir(ExtractFileDir(application.ExeName)))+'/KnowledgeB/Base/temp/'+inttostr((n div pagesize))+'.htm'); webhtml := ''; end else if total - n < pagesize then begin if total = n then begin memo1.Lines.Append(outtitle(total,pagecount)); memo1.Lines.Append(outbody(total, pagesize)); memo1.Lines.Append(webhtml); memo1.Lines.Append(outend()); memo1.Lines.SaveToFile(ExtractFiledir(ExtractFIleDir(ExtractFileDir(application.ExeName)))+'/KnowledgeB/Base/temp/'+inttostr((n div pagesize)+1)+'.htm'); webhtml := ''; end; end; end;//if trim(path)<>'' then end;//if trim(title)<>'' then next; end; end; shellexecute(handle,nil,pchar(ExtractFiledir(ExtractFIleDir(ExtractFileDir(application.ExeName)))+'/KnowledgeB/Base/base1.htm'),nil,nil,sw_show); except Messagebox(handle,'查找失败!','系统提示',mb_iconinformation+mb_ok); end; finally screen.Cursor := crdefault; end;end;