Input: TD: Transaction (Document) Database; ms: minimum support threshold; |
Output: FIS: frequent itemsets; inFIS: infrequent itemsets; |
(1) initialize FIS = Φ; inFIS = Φ; |
(2) temp1 = top(N) IDF terms; /* get all 1-itemsets being in top-N IDF items */ |
(2.1) = temp1 and support () ≥ (ms)}; /* get frequent 1-itemsets */ |
(2.2) = temp1 − FIS1; /* all infrequent 1-itemsets */ |
; /* initialize for itemsets greater than 1 */ |
(3) while (≠ Φ) do begin |
(3.1) = generate (, ms); /* candidate -itemsets */ |
(3.2) for each transaction |
do begin /* scan database TD*/ |
= subset(); /* get temp candidates in transaction */ |
for each candidate |
c.count++; /* increase count of itemsets if it exists in transaction */ |
end; |
(3.3) c.support = ; /* calculate support of candidate -itemset */ |
(3.4) and ; /* add to temp -itemsets */ |
(4) = and . support ms)}; /* add to frequent -itemsets if supp() greater than minsupp*/ |
(5) = ; /* add to in-frequent -itemsets having support less than minsupp*/ |
(6) FIS = ; /* add generated frequent -itemsets to FIS */ |
(7) inFIS = ; /* add generated in-frequent -itemsets to inFIS */ |
(8) k ++; /* increment itemset size by 1 */ |
end; |
(9) return FIS and inFIS; |