Apriori算法--关联规则 转载

    技术2022-05-20  51

    package apr;

    import java.io.BufferedWriter;import java.io.FileWriter;import java.util.*;

    public class Apriori {

     private double minsup = 0.6;// 最小支持度 private double minconf = 0.2;// 最小置信度

     // 注意使用IdentityHashMap,否则由于关联规则产生存在键值相同的会出现覆盖 private IdentityHashMap ruleMap = new IdentityHashMap();

     private String[] transSet = { "abc", "abc", "acde", "bcdf", "abcd", "abcdf" };// 事务集合,可以根据需要从构造函数里传入

     private int itemCounts = 0;// 候选1项目集大小,即字母的个数 private TreeSet[] frequencySet = new TreeSet[40];// 频繁项集数组,[0]:代表1频繁集... private TreeSet maxFrequency = new TreeSet();// 最大频繁集 private TreeSet candidate = new TreeSet();// 1候选集 private TreeSet candidateSet[] = new TreeSet[40];// 候选集数组 private int frequencyIndex;

      public Apriori() {

      maxFrequency = new TreeSet();  itemCounts = counts();// 初始化1候选集的大小

      // 初始化其他两个  for (int i = 0; i < itemCounts; i++) {   frequencySet[i] = new TreeSet();   candidateSet[i] = new TreeSet();  }  candidateSet[0] = candidate; }

      public Apriori(String[] transSet) {  this.transSet = transSet;  maxFrequency = new TreeSet();  itemCounts = counts();// 初始化1候选集的大小

      // 初始化其他两个  for (int i = 0; i < itemCounts; i++) {   frequencySet[i] = new TreeSet();   candidateSet[i] = new TreeSet();  }  candidateSet[0] = candidate; }

      public int counts() {

      String temp1 = null;  char temp2 = 'a';  // 遍历所有事务集String 加入集合,set自动去重了  for (int i = 0; i < transSet.length; i++) {   temp1 = transSet[i];   for (int j = 0; j < temp1.length(); j++) {    temp2 = temp1.charAt(j);    candidate.add(String.valueOf(temp2));   }  }  return candidate.size(); }

      public void item1_gen() {  String temp1 = "";  double m = 0;

      Iterator temp = candidateSet[0].iterator();  while (temp.hasNext()) {   temp1 = (String) temp.next();   m = count_sup(temp1);

       // 符合条件的加入 1候选集   if (m >= minsup * transSet.length) {    frequencySet[0].add(temp1);   }  } }

      public double count_sup(String x) {  int temp = 0;  for (int i = 0; i < transSet.length; i++) {   for (int j = 0; j < x.length(); j++) {    if (transSet[i].indexOf(x.charAt(j)) == -1)     break;    else if (j == (x.length() - 1))     temp++;   }  }  return temp; }

      public void canditate_gen(int k) {  String y = "", z = "", m = "";  char c1 = 'a', c2 = 'a';

      Iterator temp1 = frequencySet[k - 2].iterator();  Iterator temp2 = frequencySet[0].iterator();  TreeSet h = new TreeSet();

      while (temp1.hasNext()) {   y = (String) temp1.next();   c1 = y.charAt(y.length() - 1);

       while (temp2.hasNext()) {    z = (String) temp2.next();

        c2 = z.charAt(0);    if (c1 >= c2)     continue;    else {     m = y + z;     h.add(m);    }   }   temp2 = frequencySet[0].iterator();  }  candidateSet[k - 1] = h; }

     // k候选集=>k频繁集 public void frequent_gen(int k) {  String s1 = "";

      Iterator ix = candidateSet[k - 1].iterator();  while (ix.hasNext()) {   s1 = (String) ix.next();   if (count_sup(s1) >= (minsup * transSet.length)) {    frequencySet[k - 1].add(s1);   }  } }

     public boolean is_frequent_empty(int k) {  if (frequencySet[k - 1].isEmpty())   return true;  else   return false; }

     public boolean included(String s1, String s2) {  for (int i = 0; i < s1.length(); i++) {   if (s2.indexOf(s1.charAt(i)) == -1)    return false;   else if (i == s1.length() - 1)    return true;  }  return true; }

     public void maxfrequent_gen() {  int i, j;  Iterator iterator, iterator1, iterator2;  String temp = "", temp1 = "", temp2 = "";  for (i = 1; i < frequencyIndex; i++) {   maxFrequency.addAll(frequencySet[i]);  }  // for (i = 0; i < frequencyIndex; i++) {  // iterator1 = frequencySet[i].iterator();  // while (iterator1.hasNext()) {  // temp1 = (String) iterator1.next();  // for (j = i + 1; j < frequencyIndex; j++) {  // iterator2 = frequencySet[j].iterator();  // while (iterator2.hasNext()) {  // temp2 = (String) iterator2.next();  // if (included(temp1, temp2))  // maxFrequency.remove(temp1);  // }  // }  // }  // } }

      public void print_maxfrequent() {  Iterator iterator = maxFrequency.iterator();  System.out.print("产生规则频繁项集:");  while (iterator.hasNext()) {   System.out.print(toDigit((String) iterator.next()) + "/t");  }  System.out.println(); }

      public void rulePrint() {  String x, y;  double temp = 0;

      Set hs = ruleMap.keySet();  Iterator iterator = hs.iterator();

      StringBuffer sb = new StringBuffer();  System.out.println("关联规则:");  while (iterator.hasNext()) {   x = (String) iterator.next();

       y = (String) ruleMap.get(x);

       temp = (count_sup(x + y) / count_sup(x));      //x = toDigit(x);   //y = toDigit(y);

       System.out.println(x + (x.length() < 5 ? "/t" : "") + "-->" + y     + "/t" + temp);   sb.append("  " + x + (x.length() < 5 ? "/t" : "") + "-->" + y     + "/t" + temp + "/t/n");  }  BufferedWriter bw = null;  try {   FileWriter fw = new FileWriter("Asr.txt");

       bw = new BufferedWriter(fw);

       bw.write("最小支持度 minsup = " + minsup);   bw.newLine();   bw.write("最小置信度 minconf = " + minconf);   bw.newLine();   bw.write("产生关联规则如下: ");   bw.newLine();

       bw.write(sb.toString());   // bw.newLine();

       if (bw != null)    bw.close();  } catch (Exception e) {   e.printStackTrace();  }

     }

     public void subGen(String s) {  String x = "", y = "";  for (int i = 1; i < (1 << s.length()) - 1; i++) {   for (int j = 0; j < s.length(); j++) {    if (((1 << j) & i) != 0) {     x += s.charAt(j);    }   }

       for (int j = 0; j < s.length(); j++) {    if (((1 << j) & (~i)) != 0) {

         y += s.charAt(j);

        }   }   if (count_sup(x + y) / count_sup(x) >= minconf) {    ruleMap.put(x, y);   }   x = "";   y = "";

      } }

     public void ruleGen() {  String s;  Iterator iterator = maxFrequency.iterator();  while (iterator.hasNext()) {   s = (String) iterator.next();   subGen(s);  } }

     // for test public void print1() {  Iterator temp = candidateSet[0].iterator();  while (temp.hasNext())   System.out.println(temp.next()); }

     // for test public void print2() {  Iterator temp = frequencySet[0].iterator();  while (temp.hasNext())   System.out.println((String) temp.next()); }

     // for test public void print3() {  canditate_gen(1);  frequent_gen(2);  Iterator temp = candidateSet[1].iterator();  Iterator temp1 = frequencySet[1].iterator();  while (temp.hasNext())   System.out.println("候选" + (String) temp.next());  while (temp1.hasNext())   System.out.println("频繁" + (String) temp1.next()); }

     public void print_canditate() {

      for (int i = 0; i < frequencySet[0].size(); i++) {   Iterator ix = candidateSet[i].iterator();   Iterator iy = frequencySet[i].iterator();   System.out.print("候选集" + (i + 1) + ":");   while (ix.hasNext()) {    System.out.print((String) ix.next() + "/t");    //System.out.print(toDigit((String) ix.next()) + "/t");   }   System.out.print("/n" + "频繁集" + (i + 1) + ":");   while (iy.hasNext()) {    System.out.print((String) iy.next() + "/t");    //System.out.print(toDigit((String) iy.next()) + "/t");   }   System.out.println();  } }

      private String toDigit(String str) {  if (str != null) {   StringBuffer temp = new StringBuffer();

       for (int i = 0; i < str.length(); i++) {    char c = str.charAt(i);    temp.append(((int) c - 65) + " ");   }

       return temp.toString();  } else {   return null;  }

     }

     public String[] getTrans_set() {  return transSet; }

     public void setTrans_set(String[] transSet) {  transSet = transSet; }

     public double getMinsup() {  return minsup; }

     public void setMinsup(double minsup) {  this.minsup = minsup; }

     public double getMinconf() {  return minconf; }

     public void setMinconf(double minconf) {  this.minconf = minconf; }

     public void run() {  int k = 1;

      item1_gen();

      do {   k++;   canditate_gen(k);   frequent_gen(k);  } while (!is_frequent_empty(k));  frequencyIndex = k - 1;  print_canditate();  maxfrequent_gen();  print_maxfrequent();  ruleGen();  rulePrint();

     }  public static void main(String[] args) {  Apriori ap = new Apriori();  ap.run(); }    

    }


    最新回复(0)