Apriori algorithm source code
In computer science and data mining, Apriori is a classic algorithm for learning association rules. Apriori is designed to operate on databases containing transactions (for example, collections of items bought by customers, or details of a website frequentation). Other algorithms are designed for finding association rules in data having no transactions (Winepi and Minepi), or having no timestamps (DNA sequencing). More in Wikipedia.
I found this code on the kdkeys website:
using System; using System.Collections.Generic; using System.Text; using System.Collections; using System.IO; class Program { static void Main(string[] args) { string file = @"big_test.csv"; string sup = "6"; if (args.Length > 0) { file = args[0]; } if (args.Length == 2) { sup = args[1]; } double support = double.Parse(sup); CSVReader cr = new CSVReader(); ItemSet data = cr.Read(file); Program p = new Program(); ItemSet a = p.apriori(data, support); for (int i = 0; i < a.Count; i++) { ItemSet cur = (ItemSet)a.arrIdea; for (int j = 0; j < cur.Count; j++) { ItemSet now = (ItemSet)cur.arr[j]; for(int k=0;k< data.Count; i++) { cur = (ItemSet)data.arrIdea; for (int j = 0; j < cur.Count; j++) { cd = (DataItem)cur.arr[j]; for (int n = 0; n < result.Count; n++) { set = (ItemSet)result.arrNo; td = (DataItem)set.arr[0]; if (cd.Id == td.Id) { set.ICount++; flag = false; break; } flag = true; } if (flag) { newset = new ItemSet(); newset.Add(cd); result.Add(newset); newset.ICount = 1; } } } ItemSet finalResult = new ItemSet(); for (int i = 0; i < result.Count; i++) { ItemSet con = (ItemSet)result.arrIdea; if (con.ICount >= support) { finalResult.Add(con); } } //finalResult.Sort(); return finalResult; } private ItemSet apriori(ItemSet data, double support) { ItemSet result = new ItemSet(); ItemSet li = new ItemSet(); ItemSet conList = new ItemSet(); ItemSet subConList = new ItemSet(); ItemSet subDataList = new ItemSet(); ItemSet CurList = null; ItemSet subList = null; int k = 2; li.Add(new ItemSet()); li.Add(this.FindOneColSet(data, support)); while (((ItemSet)li.arr[k - 1]).Count != 0) { Console.WriteLine(k - 1); conList = AprioriGenerate((ItemSet)li.arr[k - 1], k - 1, support); for (int i = 0; i < data.Count; i++) { subDataList = SubSet((ItemSet)data.arrIdea, k); for (int j = 0; j < subDataList.Count; j++) { subList = (ItemSet)subDataList.arr[j]; for (int n = 0; n < conList.Count; n++) { ((ItemSet)subDataList.arr[j]).Sort(); ((ItemSet)conList.arrNo).Sort(); CurList = (ItemSet)conList.arrNo; if (subList.Equals(CurList)) { ((ItemSet)conList.arrNo).ICount++; } } } } li.Add(new ItemSet()); for (int i = 0; i < conList.Count; i++) { ItemSet con = (ItemSet)conList.arrIdea; if (con.ICount >= support) { ((ItemSet)li.arr[k]).Add(con); } } k++; } //for (int j = 0; j < li.Count; j++) //{ // for (int h = 0; h < li.Count; h++) // { // if (((ItemSet)li.arr[j]).Equals((ItemSet)li.arrCool)) // { // li.arr.RemoveAt(j); // li.Count = li.arr.Count; // } // } //} for (int i = 0; i < li.Count; i++) { result.Add((ItemSet)li.arrIdea); } return result; } private ItemSet AprioriGenerate(ItemSet li, int k, double support) { ItemSet curList = null; ItemSet durList = null; ItemSet candi = null; ItemSet result = new ItemSet(); for (int i = 0; i < li.Count; i++) { for (int j = 0; j < li.Count; j++) { bool flag = true; curList = (ItemSet)li.arrIdea; durList = (ItemSet)li.arr[j]; for (int n = 2; n < k; n++) { if (((DataItem)curList.arr[n - 2]).Id == ((DataItem)durList.arr[n - 2]).Id) { flag = true; } else { flag = false; break; } } if (flag && ((DataItem)curList.arr[k - 1]).Id < ((DataItem)durList.arr[k - 1]).Id) { flag = true; } else { flag = false; } if (flag) { candi = new ItemSet(); for (int m = 0; m < k; m++) { candi.Add((DataItem)durList.arr[m]); } candi.Add((DataItem)curList.arr[k - 1]); if (HasInFrequentSubset(candi, li, k)) { candi.Clear(); } else { result.Add(candi); } } } } return result; } private bool HasInFrequentSubset(ItemSet candidate, ItemSet li, int k) { ItemSet subSet = SubSet(candidate, k); ItemSet curList = null; ItemSet liCurList = null; for (int i = 0; i < subSet.Count; i++) { curList = (ItemSet)subSet.arrIdea; for (int j = 0; j < li.Count; j++) { liCurList = (ItemSet)li.arr[j]; if (liCurList.Equals(curList)) { return false; } } } return true; ; } //???? private ItemSet SubSet(ItemSet set) { ItemSet subSet = new ItemSet(); ItemSet itemSet = new ItemSet(); //???2n?? int num = 1 << set.Count; int bit; int mask = 0; ; for (int i = 0; i < num; i++) { itemSet = new ItemSet(); for (int j = 0; j < set.Count; j++) { //mask?i?????????? mask = 1 << j; bit = i & mask; if (bit > 0) { itemSet.Add((ItemSet)set.arr[j]); } } if (itemSet.Count > 0) { subSet.Add(itemSet); } } return subSet; } //???? private ItemSet SubSet(ItemSet set, int t) { ItemSet subSet = new ItemSet(); ItemSet itemSet = new ItemSet(); //???2n?? int num = 1 << set.Count; int bit; int mask = 0; ; for (int i = 0; i < num; i++) { itemSet = new ItemSet(); for (int j = 0; j < set.Count; j++) { //mask?i?????????? mask = 1 << j; bit = i & mask; if (bit > 0) { itemSet.Add((DataItem)set.arr[j]); } } if (itemSet.Count == t) { subSet.Add(itemSet); } } return subSet; } } public class DataItem { public int Id; public string ItemName; public void Add(string item,int id) { ItemName=item; Id=id; } } public class ItemSet { public int Count=0; public int ICount=0; public ArrayList arr = new ArrayList(); public void Add(ItemSet input) { arr.Add(input); Count++; } public void Add(string input) { arr.Add(input); Count++; } public void Add(DataItem input) { arr.Add(input); Count++; } public void Sort() { DataItem temp = null; for (int i = 0; i < this.Count-1; i++) { for (int j = i+1; j < this.Count; j++) { if (((DataItem)this.arrIdea).Id > ((DataItem)this.arr[j]).Id) { temp = (DataItem)this.arrIdea; this.arrIdea = this.arr[j]; this.arr[j] = temp; } } } } public bool Equals(ItemSet input) { if ((input.arr == null) || !(input.arr.GetType().Equals(this.arr.GetType()))) { return false; } else if (input.arr.Count != this.arr.Count) { return false; } else { for (int i = 0; i < arr.Count; i++) { if (((DataItem)arrIdea).ItemName != ((DataItem)input.arrIdea).ItemName) return false; } return true; } } public void Clear() { arr.Clear(); Count = 0; ICount = 0; } } public class CSVReader { public ItemSet Read(string file) { StreamReader csvfile = new StreamReader(file); ItemSet General = new ItemSet(); ItemSet items =new ItemSet(); DataItem set = new DataItem(); string Line=""; string temp = ""; int start = 0; int id = 0; int tcn=0; Line = csvfile.ReadLine(); while (!csvfile.EndOfStream) { Line = csvfile.ReadLine(); tcn++; items = new ItemSet(); while (Line.IndexOf(",") != -1) { set = new DataItem(); temp = Line.Substring(0, Line.IndexOf(",")); Line = Line.Substring(Line.IndexOf(",") + 1); set.Add(temp,id); items.Add(set); id++; start = Line.IndexOf(","); } temp = Line; set.Add(temp,id); items.Add(set); id = 0; temp = ""; start = 0; General.Add(items); } //General.Add(items); Console.WriteLine(General.Count); return General; } }
I don't test it yet but will do it soon and back with results.