| % input: |
| % monolingual textual corpus |
| % output: |
| % graph of terms and documents |
| % read the corpus, parse it, and execute the morphological step, |
| % weight the terms and obtain the term-document matrix |
| doc_file = ‘files/document.txt’; |
| stopword_file = ‘files/stopword_en.txt’; |
| A = doc_term_mat (doc_file, stopword_file, weight); |
| % apply the SVD decomposition |
| [W, Sigma, Y] = svd(A); |
| % obtain the components of by the SVD |
| W_k = W (:, 1 : k); |
| Sigma_k = Sigma (1 : k, 1 : k); |
| Y_k = Y(:, 1 : k); |
| % apply the TULVD |
| [U, L, V] = TULV(A); |
| % obtain the components of A_k by the TULVD |
| U_k = U; L_k = L; V_k = V; |
| % find term and document vectors in k-space |
| term_vec = U_k ∗ L_k |
| doc_vec = L_k ∗ transpose (V_k) |
| % represent the query q in k-space |
| query_vec = query (q, U_k, L_k) |
| % find semantic relationship in the corpus using cosine similarity |
| semantic_sim = cosine_sim (query_vec, doc_vec) |