Research Article
Method of Profanity Detection Using Word Embedding and LSTM
Table 1
Separation of the onset, nucleus, and coda.
| :>>> def run (x) |
| >>> consonant_ord_list = [ord (char) for char in Korean consonants] | >>> choseong_list = [char for char in Korean consonants] | >>> jungseong_list = [char for char in Korean vowels] | >>> jongseong_list = [char for char in Korean consonants and double consonants] | >>> result = [] | >>> for char in s: | >>> if ord (char) = = 32: | >>> result.append (char) | >>> elif 48 ≤ ord (char)≤57: | >>> result.append (char) | >>> elif consonant_list.count (char) = = 0: | >>> character_code = ord (char) | >>> if (55203 < character_code or character_code <44032): | >>> continue | >>> code = 44032 | >>> choseong_index = (character_code−code)//21//28 | >>> jungseong_index = (character_code−code−(choseong_index 21 28))//28. | >>> jongseong_index = character_code−code−(choseong_index 21 28)−(jungseong_index28) | >>> result.append (choseong_list [choseong_index]) | >>> result.append (jungseong_list [jungseong_index]) | >>> result.append (jongseong_list[jongseong_index]) | >>> else: | >>> choseong_index = consonant_ord_list.index(ord(char)) | >>> result.append(choseong_list[choseong_index]) | >>> result.append (“−”) | >>> result.append (“−”) | >>> return “” .join (result) |
|
|