PageRank计算方法的SQL实现

    技术2024-08-13  55

    1、表准备存储所有网页的基本信息CREATE TABLE [WebPages] ( [DOCID] [int] IDENTITY (1, 1) NOT NULL , [URL] [varchar] (900) COLLATE Chinese_PRC_CI_AS NULL , [PAGETEXT] [text] COLLATE Chinese_PRC_CI_AS NULL , [REFCOUNT] [int] NULL , [WEIGHT] [float] NULL , CONSTRAINT [DOCID] PRIMARY KEY  CLUSTERED  (  [DOCID] )  ON [PRIMARY] ) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]GO

    存储网页链接关系CREATE TABLE [PageLinks] (        [docid] [int] NULL ,        [url] [varchar] (900) COLLATE Chinese_PRC_CI_AS NULL ,        [anchordocid] [int] NULL ,        [anchorurl] [varchar] (900) COLLATE Chinese_PRC_CI_AS NULL ) ON [PRIMARY]GO

    2、执行脚本,填充WebPages中的REFCOUNT,使之成为链出网页的数量declare cur_webpages cursorfor select docid from webpages

    declare @docid intopen cur_webpagesfetch next from cur_webpages into @docidwhile (@@fetch_status=0)begin        declare @rcount int        select @rcount=count(*) from pagelinks where docid=@docid        update webpages set refcount=@rcount WHERE CURRENT OF cur_webpages        fetch next from cur_webpages into @docidendclose cur_webpagesdeallocate cur_webpages

     

    3、填充初始值update webpages set weight=1.0/(select count(*) from webpages)update webpages set REVWEIGHT=0

     

    4、计算PageRank的SQL脚本declare @count intset @count=0

    while(@count<=7)begin        declare cur_webpages cursor        for select docid,refcount,weight,revweight from webpages                declare @docid int        declare @refcount int        declare @weight float        declare @revweight float        if(@count%2=0)                select docid,weight into pagebak from webpages         else                select docid,revweight as weight into pagebak from webpages         CREATE  INDEX [docid] ON [dbo].[pagebak]([docid])                open cur_webpages        fetch next from cur_webpages into @docid,@refcount,@weight,@revweight        while (@@fetch_status=0)        begin                if @refcount<>0                begin                        if(@count%2=0)                                update pagebak set weight=weight+(@weight/@refcount) where docid in (select anchordocid from pagelinks where docid=@docid)                        else                                update pagebak set weight=weight+(@revweight/@refcount) where docid in (select anchordocid from pagelinks where docid=@docid)                end                fetch next from cur_webpages into @docid,@refcount,@weight,@revweight        end        close cur_webpages        deallocate cur_webpages        if(@count%2=0)        begin                update webpages set revweight=0.2+(select weight from pagebak where pagebak.docid=webpages.docid)                update webpages set revweight=revweight/(select sum(revweight) from webpages)        end        else        begin                update webpages set weight=0.2+(select weight from pagebak where pagebak.docid=webpages.docid)                update webpages set weight=weight/(select sum(weight) from webpages)        end        drop table pagebak         select * from webpages        set @count=@count+1end

     

     

     

     

     

     

    最新回复(0)