% Apply HAC (Hierarchical Agglomerative Clustering) Algorithm to merge |
the clusters whose centers are not far away from each other % |
() declare vectors (dynamic arrays) Tmp_ClAvg, Tmp_TotIns and Tmp_ClMax |
% Tmp_ClAvg (temporarily) stores the mean values of the clusters; |
Tmp_TotIns stores the total number of instances of each cluster; and |
Tmp_ClMax stores the highest-valued instance of each cluster % |
() for to do |
() Tmp_ClAvg.Add(Clus_Avg); % Add() function is used to add a |
new element at the end of the vector % |
() Clus_Avg; Clus_Max; Clus_Prob; |
end for |
() for to do |
() var temp1 = −1; var temp2 = −1; |
() if then |
() temp1 = 0; temp2 = Pos; |
() else if then |
() temp1 = Pos; temp2 = Pos; |
() else |
() temp1 = Pos; temp2 = ; |
() Tmp_TotIns.Add(temp2 − temp1 + 1); |
() Tmp_ClMax.Add(Time_Arraytemp2); |
end for |
() var clMrg1 = 0; var clMrg2 = 0; % used to store the index |
numbers of the clusters that have the lowest pairwise distance % |
() var repeat_HAC = true; % repeat until certain conditions are met % |
() do % apply the single-linkage HAC algorithm % |
() var shortDist = maximum 64-bit integer value; |
() for to do % find the two clusters that are most similar % |
() if (Tmp_ClAvg − Tmp_ClAvg) < shortDist then |
() shortDist = Tmp_ClAvg − Tmp_ClAvg; |
() clMrg1 = ; clMrg2 = ; |
end for |
() if shortDist < (user-defined) similarity threshold value then |
% merge the two clusters and calculate the mean, the total number of instances |
and the highest-valued instance of the resulting cluster % |
() var tmp1 = Tmp_ClAvgclMrg1 × Tmp_TotInsclMrg1; |
() var tmp2 = Tmp_ClAvgclMrg2 × Tmp_TotInsclMrg2; |
() var tmp3 = Tmp_TotInsclMrg1 + Tmp_TotInsclMrg2; |
() = (tmp1 + tmp2)/tmp3; |
() clMrg1 = tmp3; |
() clMrg1 = Tmp_ClMaxclMrg2; |
% Remove() function deletes an element at the specified position % |
() Tmp_ClAvg.Remove(clMrg2); Tmp_TotIns.Remove(clMrg2); |
() Tmp_ClMax.remove(clMrg2); ; |
() else |
() repeat_HAC = false; % all the cluster centers are far away |
from each other − the halting condition of HAC algorithm % |
() if then repeat_HAC = false; % single cluster % |
() while repeat_HAC ≠ false % termination condition % |
% store the average and maximum transition times back into Clus_Avg |
and Clus_Max, respectively; also calculate the probability values and store |
them in Clus_Prob (note: is the resulting number of clusters) % |
() var tmp1 = ; var tmp2 = 0; |
() for to tmp1 do |
% remove the clusters that have very low support (means: outliers) % |
() if (Tmp_TotIns) > (user-defined) probability threshold value then |
() Clus_Avgtmp2 = Tmp_ClAvg; Clus_Maxtmp2 = Tmp_ClMax; |
() Clus_Probtmp2 = Tmp_TotIns/N; tmp2 = tmp2 + 1; |
() else ; % decrease the total number of clusters % |
end for |