offline日志分析(1)

    技术2023-03-19  29

    通过对用户的行为日志进行分析,可以发现非常有趣的用户访问模式,而且还可以利用根据日志挖掘出的信息进行资源的推荐,以及设置个性化网站等。针对用户访问模式的研究,国内外已经做了很多工作。

     

    采用的技术方案是:将日志一次性读取到内存中,然后进行相应的统计分析工作。

          这种方案有2点优势:       1.顺序读取每条记录,将信息存储在内存中,提高了效率。       2.以函数的形式实现相应的功能,具有很好的扩展性和复用性。         目前对日志的分析主要做了下面3项工作:        1.统计各个时间段内(以1h为分割)的资源点击次数。        2.统计各资源的总体访问次数和呈现次数(即推荐次数)。        3.统计各资源的打星率(打星次数/呈现次数)        以上各项功能,均是以函数形式编写,可通过传参形式进行复用,从而实现对不同种类的日志进行统计工作的分析,比如统计资源打星率的函数可以被调用,以统计资源的单击率。               现在只是做了很少的工作,对日志的统计分析还要很多工作要做:        1.统计各资源的推荐率。        2.统计各资源的单击率。        3.对比分析推荐的资源是否与用户的点击次数和打星次数成正相关。        4.对比分析默认推荐的资源和按类别推荐的资源的访问次数,比较优劣,对系统的有效性和实用性进行验证。        5.分析用户的访问路径,可以对访问过的资源进行关联分析,并利用相应的反馈信息资源,进行推荐,这样可以提高推荐的有效性。(这项工作还要对日志进行修改,添加相应的字段)   当然日志分析还有其他很多工作要做,我想可以参考罗老师前段时间给的Eytan Adar 的文章,也希望师兄师姐和老师多给意见和建议。 代码示例: using System; using System.Collections; using System.Configuration; using System.Data; using System.Linq; using System.Web; using System.Web.Security; using System.Web.UI; using System.Web.UI.HtmlControls; using System.Web.UI.WebControls; using System.Web.UI.WebControls.WebParts; using System.Xml.Linq; using System.Text; using System.IO; using System.Collections.Generic; namespace LogAnalysis {     public partial class Function : System.Web.UI.Page     {         //得到所有的文件名         string[] direc = Directory.GetFiles( @"G:/lab/source/log");         int i = 0;         //key:time  value:访问次数         Dictionary<String, int> dicClick = new Dictionary<string, int>();         protected void Page_Load(object sender, EventArgs e)         {             //调用函数统计各个时间段内的访问次数             dicClick = timeAnalysis(@"G:/lab/source/log/ClickLog");             Response.Write("time   clicks <br>");             printInt(dicClick);             //输出recommendition中,各种资源对应的访问数目             Dictionary<String, int> dicRecomDefault = countAnalysis(@"G:/lab/source/log/DefaultRecommendationOfferLog");//RecommendationByCategoryOfferLog             Dictionary<String, int> dicRecomCategory = countAnalysis(@"G:/lab/source/log/RecommendationByCategoryOfferLog");//RecommendationByCategoryOfferLog             dicClick = countAnalysis(@"G:/lab/source/log/ClickLog");//RecommendationByCategoryOfferLog             Response.Write("time   recommands <br>");             //Response.Write(dicRecom.Count.ToString());             printInt(dicClick);             //输出打星lv             Dictionary<String, double> dicStarRate = starRate();             Response.Write("star   rates <br>");             printDouble(dicStarRate);         }                 //输出字典         void printInt(Dictionary<String, int> dic)         {             foreach (KeyValuePair<String, int> term in dic)             {                 Response.Write(term.Key + " :   " + term.Value + "</br>");             }         }         void printDouble(Dictionary<String, double> dic)         {             foreach (KeyValuePair<String, double> term in dic)             {                 Response.Write(term.Key + " :   " + term.Value + "</br>");             }         }         //timeAnalysis函数得到某个时间段的访问次数         Dictionary<String,int> timeAnalysis(String path)         {             Dictionary<String, int> dic = new Dictionary<string, int>();             foreach (string dir in direc)             {                 //处理ClickLog                 //Label1.Text += "i: " +dir;                 if (dir.StartsWith(path))//@"G:/lab/source/log/ClickLog"                 {                     FileStream fs = new FileStream(dir, FileMode.Open);                     StreamReader sr = new StreamReader(fs, Encoding.Default);                     String source = string.Empty;                     while (sr.Peek() > -1)                     {                         //读取文件中的每一行                         String input = sr.ReadLine();                         //得到时间字段,比如:12:31:31                         String[] clickTemp = input.Split(' ');                         //得到时间的小事字段:12                         String [] timeTemp = clickTemp[1].Split(':');                         //如果字典中存在此键值,则访问次数加1                         if (dic.ContainsKey(timeTemp[0]))                         {                             dic[timeTemp[0]]++;                         }                         //键值不存在,直接加入字典中                         else                         {                             dic.Add(timeTemp[0], 1);                         }                                                 source += input;                         //Label1.Text += "你猜猜:" + source;                     }// while (sr.Peek() > -1)                     sr.Close();                 }                 i++;             }// foreach(string dir in direc)                         return dic;         }         //计算出现次数         Dictionary<String, int> countAnalysis(String path)         {             Dictionary<String, int> dic = new Dictionary<string, int>();             foreach (string dir in direc)             {                 //处理ClickLog                 //Label1.Text += "i: " +dir;                 if (dir.StartsWith(path))//@"G:/lab/source/log/ClickLog"                 {                     FileStream fs = new FileStream(dir, FileMode.Open);                     StreamReader sr = new StreamReader(fs, Encoding.Default);                     String source = string.Empty;                     while (sr.Peek() > -1)                     {                         //读取文件中的每一行                         String input = sr.ReadLine();                         if (!String.IsNullOrEmpty(input.Trim()))                         {                             String[] countTemp = input.Split(' ');                             //如果字典中存在此键值,则访问次数加1                             if (dic.ContainsKey(countTemp[3]))                             {                                 dic[countTemp[3]]++;                             }                             //键值不存在,直接加入字典中                             else                             {                                 dic.Add(countTemp[3], 1);                             }                         }                                                source += input;                         //Label1.Text += "你猜猜:" + source;                     }// while (sr.Peek() > -1)                     sr.Close();                 }                 i++;             }// foreach(string dir in direc)             return dic;         }         //计算打星率         Dictionary<String, double> starRate()         {                         double star = 0.0;             //dicStardicStarRate key:resourceID value:访问次数             Dictionary<String, double> dicStarRate = new Dictionary<string, double>();             Dictionary<String, int> dicRecom = countAnalysis(@"G:/lab/source/log/DefaultRecommendationOfferLog");             Dictionary<String, int> dicStar = countAnalysis(@"G:/lab/source/log/StarLog");             foreach(KeyValuePair<String,int> dicR in dicRecom)             {                 int totalNum = dicRecom[dicR.Key];                 //Response.Write("total num is:" +totalNum);                 foreach(KeyValuePair<String,int> dicS in dicStar)                 {                     if (dicR.Key == dicS.Key)                     {                         int starNum = dicStar[dicS.Key];                         //Response.Write("star num is:" + starNum);                         star =(double) starNum / totalNum;                         dicStarRate.Add(dicS.Key, star);                                             }                 }             }             return dicStarRate;         }     } } 技术细节: 
    最新回复(0)