Apriori algorithm source code

воскресенье, 2 августа 2009, Александр Краковецкий

In computer science and data mining, Apriori is a classic algorithm for learning association rules. Apriori is designed to operate on databases containing transactions (for example, collections of items bought by customers, or details of a website frequentation). Other algorithms are designed for finding association rules in data having no transactions (Winepi and Minepi), or having no timestamps (DNA sequencing). More in Wikipedia.

I found this code on the kdkeys website:

using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.IO;


class Program
{
    static void Main(string[] args)
    {
        string file = @"big_test.csv";
        string sup = "6";
       
        if (args.Length > 0)
        {
            file = args[0];

        }
        if (args.Length == 2)
        {
            sup = args[1];

        }


        double support = double.Parse(sup);

        CSVReader cr = new CSVReader();
        ItemSet data = cr.Read(file);


       

        Program p = new Program();
        ItemSet a = p.apriori(data, support);
        for (int i = 0; i < a.Count; i++)
        {
            ItemSet cur = (ItemSet)a.arrIdea;
            for (int j = 0; j < cur.Count; j++)
            {
                ItemSet now = (ItemSet)cur.arr[j];
                for(int k=0;k< data.Count; i++)
        {
            cur = (ItemSet)data.arrIdea;
            for (int j = 0; j < cur.Count; j++)
            {
                cd = (DataItem)cur.arr[j];
                for (int n = 0; n < result.Count; n++)
                {
                   
                    set = (ItemSet)result.arrNo;
                    td = (DataItem)set.arr[0];
                    if (cd.Id == td.Id)
                    {
                        set.ICount++;
                        flag = false;
                        break;

                    }
                    flag = true;
                }
                if (flag)
                {
                    newset = new ItemSet();
                    newset.Add(cd);
                    result.Add(newset);
                    newset.ICount = 1;
                }
            }

 

        }
        ItemSet finalResult = new ItemSet();
        for (int i = 0; i < result.Count; i++)
        {
            ItemSet con = (ItemSet)result.arrIdea;
            if (con.ICount >= support)
            {

                finalResult.Add(con);
            }


        }
        //finalResult.Sort();   
        return finalResult;


    }


    private ItemSet apriori(ItemSet data, double support)
    {

        ItemSet result = new ItemSet();
        ItemSet li = new ItemSet();
        ItemSet conList = new ItemSet();
        ItemSet subConList = new ItemSet();
        ItemSet subDataList = new ItemSet();
        ItemSet CurList = null;
        ItemSet subList = null;
        int k = 2;
        li.Add(new ItemSet());
        li.Add(this.FindOneColSet(data, support));

        while (((ItemSet)li.arr[k - 1]).Count != 0)
        {
            Console.WriteLine(k - 1);
            conList = AprioriGenerate((ItemSet)li.arr[k - 1], k - 1, support);
            for (int i = 0; i < data.Count; i++)
            {
                subDataList = SubSet((ItemSet)data.arrIdea, k);
                for (int j = 0; j < subDataList.Count; j++)
                {
                    subList = (ItemSet)subDataList.arr[j];
                    for (int n = 0; n < conList.Count; n++)
                    {
                        ((ItemSet)subDataList.arr[j]).Sort();
                        ((ItemSet)conList.arrNo).Sort();
                        CurList = (ItemSet)conList.arrNo;
                        if (subList.Equals(CurList))
                        {
                            ((ItemSet)conList.arrNo).ICount++;

                        }
                    }

                }

            }

            li.Add(new ItemSet());
            for (int i = 0; i < conList.Count; i++)
            {
                ItemSet con = (ItemSet)conList.arrIdea;
                if (con.ICount >= support)
                {

                    ((ItemSet)li.arr[k]).Add(con);
                }


            }

            k++;
        }
        //for (int j = 0; j < li.Count; j++)
        //{
        //    for (int h = 0; h < li.Count; h++)
        //    {
        //        if (((ItemSet)li.arr[j]).Equals((ItemSet)li.arrCool))
        //        {
        //            li.arr.RemoveAt(j);
        //            li.Count = li.arr.Count;
        //        }
        //    }
        //}
        for (int i = 0; i < li.Count; i++)
        {
            result.Add((ItemSet)li.arrIdea);
        }
        return result;

 

    }

    private ItemSet AprioriGenerate(ItemSet li, int k, double support)
    {

        ItemSet curList = null;
        ItemSet durList = null;
        ItemSet candi = null;
        ItemSet result = new ItemSet();
        for (int i = 0; i < li.Count; i++)
        {
            for (int j = 0; j < li.Count; j++)
            {
                bool flag = true;
                curList = (ItemSet)li.arrIdea;
                durList = (ItemSet)li.arr[j];
                for (int n = 2; n < k; n++)
                {

                    if (((DataItem)curList.arr[n - 2]).Id == ((DataItem)durList.arr[n - 2]).Id)
                    {

                        flag = true;

                    }
                    else
                    {
                       
                        flag = false;
                        break;


                    }


                }

                if (flag && ((DataItem)curList.arr[k - 1]).Id < ((DataItem)durList.arr[k - 1]).Id)
                {

                    flag = true;
                }
                else
                {
                    flag = false;
                }
                if (flag)
                {
                    candi = new ItemSet();


                    for (int m = 0; m < k; m++)
                    {
                        candi.Add((DataItem)durList.arr[m]);

                    }
                    candi.Add((DataItem)curList.arr[k - 1]);

 

 

                    if (HasInFrequentSubset(candi, li, k))
                    {
                        candi.Clear();

                    }
                    else
                    {
                        result.Add(candi);
                    }
                }

            }
        }
        return result;

    }

 

    private bool HasInFrequentSubset(ItemSet candidate, ItemSet li, int k)
    {
        ItemSet subSet = SubSet(candidate, k);
        ItemSet curList = null;
        ItemSet liCurList = null;

        for (int i = 0; i < subSet.Count; i++)
        {
            curList = (ItemSet)subSet.arrIdea;
            for (int j = 0; j < li.Count; j++)
            {

                liCurList = (ItemSet)li.arr[j];
                if (liCurList.Equals(curList))
                {
                    return false;

                }

            }
        }
        return true; ;
    }
    //????   
    private ItemSet SubSet(ItemSet set)
    {
        ItemSet subSet = new ItemSet();

        ItemSet itemSet = new ItemSet();
        //???2n??   
        int num = 1 << set.Count;

        int bit;
        int mask = 0; ;
        for (int i = 0; i < num; i++)
        {
            itemSet = new ItemSet();
            for (int j = 0; j < set.Count; j++)
            {
                //mask?i??????????   
                mask = 1 << j;
                bit = i & mask;
                if (bit > 0)
                {

                    itemSet.Add((ItemSet)set.arr[j]);

                }
            }
            if (itemSet.Count > 0)
            {
                subSet.Add(itemSet);
            }


        }

 

        return subSet;
    }

 

    //????   
    private ItemSet SubSet(ItemSet set, int t)
    {
        ItemSet subSet = new ItemSet();

        ItemSet itemSet = new ItemSet();
        //???2n??   
        int num = 1 << set.Count;

        int bit;
        int mask = 0; ;
        for (int i = 0; i < num; i++)
        {
            itemSet = new ItemSet();
            for (int j = 0; j < set.Count; j++)
            {
                //mask?i??????????   
                mask = 1 << j;
                bit = i & mask;
                if (bit > 0)
                {

                    itemSet.Add((DataItem)set.arr[j]);

                }
            }
            if (itemSet.Count == t)
            {
                subSet.Add(itemSet);
            }


        }

 

        return subSet;
    }
}

public class DataItem
{
    public int Id;
    public string ItemName;
    public void Add(string item,int id)
    {
        ItemName=item;
        Id=id;
    }
}

public class ItemSet
{
    public int Count=0;
    public int ICount=0;
    public ArrayList arr = new ArrayList();
    public void Add(ItemSet input)
    {
        arr.Add(input);
        Count++;
    }
    public void Add(string input)
    {
        arr.Add(input);
        Count++;
    }
    public void Add(DataItem input)
    {
        arr.Add(input);
        Count++;
    }
    public void Sort()
    {
        DataItem temp = null;
        for (int i = 0; i < this.Count-1; i++)
        {
            for (int j = i+1; j < this.Count; j++)
            {
                if (((DataItem)this.arrIdea).Id > ((DataItem)this.arr[j]).Id)
                {
                    temp = (DataItem)this.arrIdea;
                    this.arrIdea = this.arr[j];
                    this.arr[j] = temp;
                }
            }
        }
    }
    public bool Equals(ItemSet input)
    {
        if ((input.arr == null) || !(input.arr.GetType().Equals(this.arr.GetType())))
        {
            return false;
        }
        else if (input.arr.Count != this.arr.Count)
        {
            return false;
        }
        else
        {
            for (int i = 0; i < arr.Count; i++)
            {
                if (((DataItem)arrIdea).ItemName != ((DataItem)input.arrIdea).ItemName)
                    return false;
            }
            return true;
        }
    }
    public void Clear()
    {
        arr.Clear();
        Count = 0;
        ICount = 0;
    }
}

public class CSVReader
{
    public ItemSet Read(string file)
    {
        StreamReader csvfile = new StreamReader(file);
        ItemSet General = new ItemSet();
        ItemSet items =new ItemSet();
        DataItem set = new DataItem();
        string Line="";
        string temp = "";
        int start = 0;
        int id = 0;
        int tcn=0;
        Line = csvfile.ReadLine();
       
        while (!csvfile.EndOfStream)
        {
           
            Line = csvfile.ReadLine();
            tcn++;
            items = new ItemSet();

            while (Line.IndexOf(",") != -1)
            {
                set = new DataItem();
                temp = Line.Substring(0, Line.IndexOf(","));
               
                Line = Line.Substring(Line.IndexOf(",") + 1);
                set.Add(temp,id);
                items.Add(set);
                id++;
                start = Line.IndexOf(",");
            }
            temp = Line;
            set.Add(temp,id);
            items.Add(set);
            id = 0;
            temp = "";
            start = 0;
            General.Add(items);
        }
        //General.Add(items);
        Console.WriteLine(General.Count);
        return General;
    }
}

I don't test it yet but will do it soon and back with results.


Ищите нас в интернетах!

Комментарии

Свежие вакансии