From b0d544eb9034c8c0ccfe5d6a39b9552b84a03bcf Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Fri, 5 Jul 2024 20:58:14 +0200 Subject: Hiragana and katakana --- japanese.vim | 273 ++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 223 insertions(+), 50 deletions(-) diff --git a/japanese.vim b/japanese.vim index 621cd17..62f0726 100644 --- a/japanese.vim +++ b/japanese.vim @@ -1,9 +1,16 @@ +" Convention: If a variable holds a 1-based coordinate, its name is suffixed +" with '_1'. + +if !hlexists("JapaneseVimFragment") + hi JapaneseVimFragment cterm=underline gui=underline +endif + function s:append_at_cursor(text) - let y = line(".") - let x = charcol(".") - let ln = getline(".") - call setline(".", strcharpart(ln, 0, x - 1) . a:text . strcharpart(ln, x - 1)) - call setcursorcharpos(y, x + strcharlen(a:text)) + const y_1 = line(".") + const xbytes = col(".") - 1 + const ln = getline(".") + call setline(".", strpart(ln, 0, xbytes) .. a:text .. strpart(ln, xbytes)) + call cursor(y_1, xbytes + 1 + strlen(a:text)) endfunction function s:isvowel(c) @@ -11,69 +18,235 @@ function s:isvowel(c) endfunction function s:isconsonant(c) - return strcharlen(a:c) == 1 && stridx("kgsztdnhmyrw", a:c) != -1 + return strcharlen(a:c) == 1 && stridx("kgsztdnhbpmyrw", a:c) != -1 endfunction -let s:punctuation = { - \ '.': "。", - \ ',': "、", - \ '(': "(", - \ ')': ")", - \ '{': "{", - \ '}': "}", - \ '[': "「", - \ ']': "」", - \ '/': "・", - \ '~': "〜", - \ ':': ":", - \ '!': "!", - \ '?': "?" +const s:nokana_digraph = ["yi", "ye", "wu"] + +const s:punctuation = { + \ ".": "。", + \ ",": "、", + \ "(": "(", + \ ")": ")", + \ "{": "{", + \ "}": "}", + \ "[": "「", + \ "]": "」", + \ "/": "・", + \ "~": "〜", + \ ":": ":", + \ "!": "!", + \ "?": "?", \ } -function s:renderkana(s) +" const s:hiragana_string = "かきくけこがぎぐげごさしすせそざじずぜぞたちつてとだぢづでどなにぬねのはひふへほばびぶべぼぱぴぷぺぽまみむめもやゆよらりるれろわゐゑを" +" const s:katakana_string = "カキクケコガギグゲゴサシスセソザジズゼゾタチツテトダヂヅデドナニヌネノハヒフヘホバビブベボパピプペポマミムメモヤユヨラリルレロワヰヱヲ" + +function s:renderkana(s, kanamode) if len(a:s) == 0 return "" endif + if a:kanamode == 1 + const hira = 1 + elseif a:kanamode == 2 + const hira = 0 + else + throw "Invalid kanamode " . a:kanamode + endif + " echom "renderkana:" a:s strcharpart(a:s, 0, 1) s:isvowel(strcharpart(a:s, 0, 1)) - let c0 = strcharpart(a:s, 0, 1) - let c1 = strcharpart(a:s, 1, 1) - let tail1 = strcharpart(a:s, 1) - let tail2 = strcharpart(a:s, 2) - if s:isvowel(c0) - return digraph_get(c0 . "5") . s:renderkana(tail1) - elseif s:isconsonant(c0) && s:isvowel(c1) - return digraph_get(c0 . c1) . s:renderkana(tail2) - elseif c0 ==# "-" - return "ー" . s:renderkana(tail1) - elseif has_key(s:punctuation, c0) - return get(s:punctuation, c0) . s:renderkana(tail1) - else - return c0 . s:renderkana(tail1) + let result = "" + let i = 0 + while i < strcharlen(a:s) + let c0 = strcharpart(a:s, i, 1) + let c1 = strcharpart(a:s, i+1, 1) + let c2 = strcharpart(a:s, i+2, 1) + + " echom "render:" a:s i c0 + + " vowel kana + if s:isvowel(c0) + let result ..= hira ? digraph_get(c0 .. "5") : digraph_get(toupper(c0) .. "6") + let i += 1 + " consonant-vowel kana + elseif s:isconsonant(c0) && s:isvowel(c1) && index(s:nokana_digraph, c0 .. c1) == -1 + let result ..= hira ? digraph_get(c0 .. c1) : digraph_get(toupper(c0) .. c1) + let i += 2 + " glide + elseif s:isconsonant(c0) && c1 ==# "y" && s:isvowel(c2) + let result ..= hira ? digraph_get(c0 .. "i") : digraph_get(toupper(c0) .. "i") + let result ..= hira ? digraph_get("y" .. toUpper(c2)) : digraph_get("Y" .. toUpper(c2)) + let i += 3 + " normal n + elseif c0 ==# "n" && c1 ==# "n" + let result ..= (hira ? "ん" : "ン") + let i += 2 + " implicit n + elseif c0 ==# "n" && c1 !=# "y" && s:isconsonant(c1) + let result ..= (hira ? "ん" : "ン") + let i += 1 + " geminated consonant + elseif s:isconsonant(c0) && s:isconsonant(c1) + let result ..= (hira ? "っ" : "ッ") + let i += 1 + " sokuon + elseif c0 == "-" + let result ..= "ー" + let i += 1 + " puctuation + elseif has_key(s:punctuation, c0) + let result ..= get(s:punctuation, c0) + let i += 1 + " fallback + else + let result ..= c0 + let i += 1 + endif + endwhile + + return result +endfunction + +" matchid: ID of the match() region +" y_1: 1-based line number of the region +" x1_1: 1-based x of start of region, inclusive +" x2_1: 1-based x of end of region, exclusive +" kanamode: 1 for hiragana, 2 for katakana +function s:make_region(matchid, y_1, x1_1, x2_1, kanamode) + return #{matchid: a:matchid, + \ y_1: a:y_1, + \ x1_1: a:x1_1, + \ x2_1: a:x2_1, + \ kanamode: a:kanamode, + \ } +endfunction + +" The fragment currently being typed and formatted. +" Dictionary as returned by s:make_region. +let s:cur_region = v:null + +function s:terminate_region() + if s:cur_region isnot v:null + call matchdelete(s:cur_region.matchid) + let s:cur_region = v:null endif endfunction -function s:jpkey(key) - " call s:insert_at_cursor("key " . a:key) - let y = line(".") - let x = charcol(".") - 1 - let fullline = getline(".") +function s:handle_keypress(key) + " call s:insert_at_cursor("key " .. a:key) + const y_1 = line(".") + const x = charcol(".") - 1 + const fullline = getline(".") - let fragment = strcharpart(fullline, max([0, x - 2]), x) . a:key + " If the user moved away from the region, start a new region here + if s:cur_region isnot v:null && (y_1 != s:cur_region.y_1 || x + 1 < s:cur_region.x1_1 || x + 1 > s:cur_region.x2_1) + call s:terminate_region() + endif - " call s:append_at_cursor("<" . fragment . ">") - while len(fragment) > 0 && strgetchar(fragment, 0) > 127 - let fragment = strcharpart(fragment, 1) + " If the line became shorter, make sure the region doesn't extend past the end of the line + if s:cur_region isnot v:null + let s:cur_region.x2_1 = min([strcharlen(fullline) + 1, s:cur_region.x2_1]) + endif + + const cur_region_start = s:cur_region is v:null ? x : s:cur_region.x1_1 - 1 + let input_start = max([cur_region_start, x - 2]) + let input = strcharpart(fullline, input_start, x - input_start) .. a:key + + while len(input) > 0 && strgetchar(input, 0) > 127 + let input = strcharpart(input, 1) + let input_start += 1 endwhile - " call s:append_at_cursor("<" . fragment . ">") - let result = s:renderkana(fragment) - " echom result - call setline(".", strcharpart(fullline, 0, x + 1 - strcharlen(fragment)) . result . strcharpart(fullline, x)) - call setcursorcharpos(y, x + 1 - strcharlen(fragment) + strcharlen(result) + 1) + const input_extra_chars = x - input_start + + const result = s:renderkana(input, s:cur_region is v:null ? 1 : s:cur_region.kanamode) + + " echom y_1 x s:cur_region input result + + call setline(".", strcharpart(fullline, 0, x - input_extra_chars) .. result .. strcharpart(fullline, x)) + const newx_1 = x + 1 - input_extra_chars + strcharlen(result) + call setcursorcharpos(y_1, newx_1) + + if s:cur_region is v:null + const startcol = x - input_extra_chars + const matchid = matchaddpos("JapaneseVimFragment", [[y_1, strlen(strcharpart(fullline, 0, startcol)) + 1, strlen(result)]]) + let s:cur_region = s:make_region(matchid, y_1, startcol + 1, newx_1, 1) + else + call matchdelete(s:cur_region.matchid) + let s:cur_region.x2_1 += strcharlen(result) - input_extra_chars + const reglen = len(strcharpart(getline("."), s:cur_region.x1_1 - 1, s:cur_region.x2_1 - s:cur_region.x1_1)) + let s:cur_region.matchid = matchaddpos("JapaneseVimFragment", [[s:cur_region.y_1, strlen(strcharpart(getline("."), 0, s:cur_region.x1_1 - 1)) + 1, reglen]]) + endif +endfunction + +function s:convert_region_kana(tokanamode) + if s:cur_region is v:null + " Sound a bell + execute "normal! \" + return + endif + + const ln = getline(s:cur_region.y_1) + const s = strcharpart(ln, s:cur_region.x1_1 - 1, s:cur_region.x2_1 - s:cur_region.x1_1) + let result = "" + " TODO: this conversion is not complete, but it gets the most important kana. + for c in s + let n = char2nr(c) + " hiragana -> katakana + if a:tokanamode == 2 && strcharlen(c) == 1 && 0x3041 <= n && n <= 0x3096 + let result ..= nr2char(n + (0x30A0 - 0x3040)) + " katakana -> hiragana + elseif a:tokanamode == 1 && strcharlen(c) == 1 && 0x30A1 <= n && n <= 0x30F6 + let result ..= nr2char(n + (0x3040 - 0x30A0)) + else + let result ..= c + endif + endfor + + call setline(s:cur_region.y_1, strcharpart(ln, 0, s:cur_region.x1_1 - 1) . result . strcharpart(ln, s:cur_region.x2_1 - 1)) + + let s:cur_region.kanamode = a:tokanamode +endfunction + +function s:handle_escape() + call s:terminate_region() + stopinsert +endfunction + +function s:handle_space() + call s:terminate_region() + call s:append_at_cursor(" ") +endfunction + +function s:handle_enter() + if s:cur_region isnot v:null + call s:terminate_region() + else + " Simulate normal enter + const y_1 = line(".") + const xbytes = col(".") - 1 + const ln = getline(".") + call setline(".", strpart(ln, 0, xbytes)) + call append(y_1, strpart(ln, xbytes)) + call cursor(y_1 + 1, 0) + endif +endfunction + +function s:handle_hiragana() + call s:convert_region_kana(1) +endfunction + +function s:handle_katagana() + call s:convert_region_kana(2) endfunction for c in "abcdefghijklmnopqrstuvwxyz-.,(){}[]/~:!?" - execute "inoremap " . c . " call jpkey('" . c . "')" + execute "inoremap " .. c .. " call handle_keypress(\"" .. c .. "\")" endfor +inoremap call handle_escape() +inoremap call handle_space() +inoremap call handle_enter() +inoremap call handle_hiragana() +inoremap call handle_katagana() -- cgit v1.2.3-70-g09d2