aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2024-07-05 20:58:14 +0200
committerTom Smeding <tom@tomsmeding.com>2024-07-05 20:58:14 +0200
commitb0d544eb9034c8c0ccfe5d6a39b9552b84a03bcf (patch)
tree7f9abafd3fea9db5cb5a902e0da68380e57c2e76
parent04944f4de1b2020464ef506cdaf33a1ea65068a9 (diff)
Hiragana and katakana
-rw-r--r--japanese.vim273
1 files changed, 223 insertions, 50 deletions
diff --git a/japanese.vim b/japanese.vim
index 621cd17..62f0726 100644
--- a/japanese.vim
+++ b/japanese.vim
@@ -1,9 +1,16 @@
+" Convention: If a variable holds a 1-based coordinate, its name is suffixed
+" with '_1'.
+
+if !hlexists("JapaneseVimFragment")
+ hi JapaneseVimFragment cterm=underline gui=underline
+endif
+
function s:append_at_cursor(text)
- let y = line(".")
- let x = charcol(".")
- let ln = getline(".")
- call setline(".", strcharpart(ln, 0, x - 1) . a:text . strcharpart(ln, x - 1))
- call setcursorcharpos(y, x + strcharlen(a:text))
+ const y_1 = line(".")
+ const xbytes = col(".") - 1
+ const ln = getline(".")
+ call setline(".", strpart(ln, 0, xbytes) .. a:text .. strpart(ln, xbytes))
+ call cursor(y_1, xbytes + 1 + strlen(a:text))
endfunction
function s:isvowel(c)
@@ -11,69 +18,235 @@ function s:isvowel(c)
endfunction
function s:isconsonant(c)
- return strcharlen(a:c) == 1 && stridx("kgsztdnhmyrw", a:c) != -1
+ return strcharlen(a:c) == 1 && stridx("kgsztdnhbpmyrw", a:c) != -1
endfunction
-let s:punctuation = {
- \ '.': "。",
- \ ',': "、",
- \ '(': "(",
- \ ')': ")",
- \ '{': "{",
- \ '}': "}",
- \ '[': "「",
- \ ']': "」",
- \ '/': "・",
- \ '~': "〜",
- \ ':': ":",
- \ '!': "!",
- \ '?': "?"
+const s:nokana_digraph = ["yi", "ye", "wu"]
+
+const s:punctuation = {
+ \ ".": "。",
+ \ ",": "、",
+ \ "(": "(",
+ \ ")": ")",
+ \ "{": "{",
+ \ "}": "}",
+ \ "[": "「",
+ \ "]": "」",
+ \ "/": "・",
+ \ "~": "〜",
+ \ ":": ":",
+ \ "!": "!",
+ \ "?": "?",
\ }
-function s:renderkana(s)
+" const s:hiragana_string = "かきくけこがぎぐげごさしすせそざじずぜぞたちつてとだぢづでどなにぬねのはひふへほばびぶべぼぱぴぷぺぽまみむめもやゆよらりるれろわゐゑを"
+" const s:katakana_string = "カキクケコガギグゲゴサシスセソザジズゼゾタチツテトダヂヅデドナニヌネノハヒフヘホバビブベボパピプペポマミムメモヤユヨラリルレロワヰヱヲ"
+
+function s:renderkana(s, kanamode)
if len(a:s) == 0
return ""
endif
+ if a:kanamode == 1
+ const hira = 1
+ elseif a:kanamode == 2
+ const hira = 0
+ else
+ throw "Invalid kanamode " . a:kanamode
+ endif
+
" echom "renderkana:" a:s strcharpart(a:s, 0, 1) s:isvowel(strcharpart(a:s, 0, 1))
- let c0 = strcharpart(a:s, 0, 1)
- let c1 = strcharpart(a:s, 1, 1)
- let tail1 = strcharpart(a:s, 1)
- let tail2 = strcharpart(a:s, 2)
- if s:isvowel(c0)
- return digraph_get(c0 . "5") . s:renderkana(tail1)
- elseif s:isconsonant(c0) && s:isvowel(c1)
- return digraph_get(c0 . c1) . s:renderkana(tail2)
- elseif c0 ==# "-"
- return "ー" . s:renderkana(tail1)
- elseif has_key(s:punctuation, c0)
- return get(s:punctuation, c0) . s:renderkana(tail1)
- else
- return c0 . s:renderkana(tail1)
+ let result = ""
+ let i = 0
+ while i < strcharlen(a:s)
+ let c0 = strcharpart(a:s, i, 1)
+ let c1 = strcharpart(a:s, i+1, 1)
+ let c2 = strcharpart(a:s, i+2, 1)
+
+ " echom "render:" a:s i c0
+
+ " vowel kana
+ if s:isvowel(c0)
+ let result ..= hira ? digraph_get(c0 .. "5") : digraph_get(toupper(c0) .. "6")
+ let i += 1
+ " consonant-vowel kana
+ elseif s:isconsonant(c0) && s:isvowel(c1) && index(s:nokana_digraph, c0 .. c1) == -1
+ let result ..= hira ? digraph_get(c0 .. c1) : digraph_get(toupper(c0) .. c1)
+ let i += 2
+ " glide
+ elseif s:isconsonant(c0) && c1 ==# "y" && s:isvowel(c2)
+ let result ..= hira ? digraph_get(c0 .. "i") : digraph_get(toupper(c0) .. "i")
+ let result ..= hira ? digraph_get("y" .. toUpper(c2)) : digraph_get("Y" .. toUpper(c2))
+ let i += 3
+ " normal n
+ elseif c0 ==# "n" && c1 ==# "n"
+ let result ..= (hira ? "ん" : "ン")
+ let i += 2
+ " implicit n
+ elseif c0 ==# "n" && c1 !=# "y" && s:isconsonant(c1)
+ let result ..= (hira ? "ん" : "ン")
+ let i += 1
+ " geminated consonant
+ elseif s:isconsonant(c0) && s:isconsonant(c1)
+ let result ..= (hira ? "っ" : "ッ")
+ let i += 1
+ " sokuon
+ elseif c0 == "-"
+ let result ..= "ー"
+ let i += 1
+ " puctuation
+ elseif has_key(s:punctuation, c0)
+ let result ..= get(s:punctuation, c0)
+ let i += 1
+ " fallback
+ else
+ let result ..= c0
+ let i += 1
+ endif
+ endwhile
+
+ return result
+endfunction
+
+" matchid: ID of the match() region
+" y_1: 1-based line number of the region
+" x1_1: 1-based x of start of region, inclusive
+" x2_1: 1-based x of end of region, exclusive
+" kanamode: 1 for hiragana, 2 for katakana
+function s:make_region(matchid, y_1, x1_1, x2_1, kanamode)
+ return #{matchid: a:matchid,
+ \ y_1: a:y_1,
+ \ x1_1: a:x1_1,
+ \ x2_1: a:x2_1,
+ \ kanamode: a:kanamode,
+ \ }
+endfunction
+
+" The fragment currently being typed and formatted.
+" Dictionary as returned by s:make_region.
+let s:cur_region = v:null
+
+function s:terminate_region()
+ if s:cur_region isnot v:null
+ call matchdelete(s:cur_region.matchid)
+ let s:cur_region = v:null
endif
endfunction
-function s:jpkey(key)
- " call s:insert_at_cursor("key " . a:key)
- let y = line(".")
- let x = charcol(".") - 1
- let fullline = getline(".")
+function s:handle_keypress(key)
+ " call s:insert_at_cursor("key " .. a:key)
+ const y_1 = line(".")
+ const x = charcol(".") - 1
+ const fullline = getline(".")
- let fragment = strcharpart(fullline, max([0, x - 2]), x) . a:key
+ " If the user moved away from the region, start a new region here
+ if s:cur_region isnot v:null && (y_1 != s:cur_region.y_1 || x + 1 < s:cur_region.x1_1 || x + 1 > s:cur_region.x2_1)
+ call s:terminate_region()
+ endif
- " call s:append_at_cursor("<" . fragment . ">")
- while len(fragment) > 0 && strgetchar(fragment, 0) > 127
- let fragment = strcharpart(fragment, 1)
+ " If the line became shorter, make sure the region doesn't extend past the end of the line
+ if s:cur_region isnot v:null
+ let s:cur_region.x2_1 = min([strcharlen(fullline) + 1, s:cur_region.x2_1])
+ endif
+
+ const cur_region_start = s:cur_region is v:null ? x : s:cur_region.x1_1 - 1
+ let input_start = max([cur_region_start, x - 2])
+ let input = strcharpart(fullline, input_start, x - input_start) .. a:key
+
+ while len(input) > 0 && strgetchar(input, 0) > 127
+ let input = strcharpart(input, 1)
+ let input_start += 1
endwhile
- " call s:append_at_cursor("<" . fragment . ">")
- let result = s:renderkana(fragment)
- " echom result
- call setline(".", strcharpart(fullline, 0, x + 1 - strcharlen(fragment)) . result . strcharpart(fullline, x))
- call setcursorcharpos(y, x + 1 - strcharlen(fragment) + strcharlen(result) + 1)
+ const input_extra_chars = x - input_start
+
+ const result = s:renderkana(input, s:cur_region is v:null ? 1 : s:cur_region.kanamode)
+
+ " echom y_1 x s:cur_region input result
+
+ call setline(".", strcharpart(fullline, 0, x - input_extra_chars) .. result .. strcharpart(fullline, x))
+ const newx_1 = x + 1 - input_extra_chars + strcharlen(result)
+ call setcursorcharpos(y_1, newx_1)
+
+ if s:cur_region is v:null
+ const startcol = x - input_extra_chars
+ const matchid = matchaddpos("JapaneseVimFragment", [[y_1, strlen(strcharpart(fullline, 0, startcol)) + 1, strlen(result)]])
+ let s:cur_region = s:make_region(matchid, y_1, startcol + 1, newx_1, 1)
+ else
+ call matchdelete(s:cur_region.matchid)
+ let s:cur_region.x2_1 += strcharlen(result) - input_extra_chars
+ const reglen = len(strcharpart(getline("."), s:cur_region.x1_1 - 1, s:cur_region.x2_1 - s:cur_region.x1_1))
+ let s:cur_region.matchid = matchaddpos("JapaneseVimFragment", [[s:cur_region.y_1, strlen(strcharpart(getline("."), 0, s:cur_region.x1_1 - 1)) + 1, reglen]])
+ endif
+endfunction
+
+function s:convert_region_kana(tokanamode)
+ if s:cur_region is v:null
+ " Sound a bell
+ execute "normal! \<Esc>"
+ return
+ endif
+
+ const ln = getline(s:cur_region.y_1)
+ const s = strcharpart(ln, s:cur_region.x1_1 - 1, s:cur_region.x2_1 - s:cur_region.x1_1)
+ let result = ""
+ " TODO: this conversion is not complete, but it gets the most important kana.
+ for c in s
+ let n = char2nr(c)
+ " hiragana -> katakana
+ if a:tokanamode == 2 && strcharlen(c) == 1 && 0x3041 <= n && n <= 0x3096
+ let result ..= nr2char(n + (0x30A0 - 0x3040))
+ " katakana -> hiragana
+ elseif a:tokanamode == 1 && strcharlen(c) == 1 && 0x30A1 <= n && n <= 0x30F6
+ let result ..= nr2char(n + (0x3040 - 0x30A0))
+ else
+ let result ..= c
+ endif
+ endfor
+
+ call setline(s:cur_region.y_1, strcharpart(ln, 0, s:cur_region.x1_1 - 1) . result . strcharpart(ln, s:cur_region.x2_1 - 1))
+
+ let s:cur_region.kanamode = a:tokanamode
+endfunction
+
+function s:handle_escape()
+ call s:terminate_region()
+ stopinsert
+endfunction
+
+function s:handle_space()
+ call s:terminate_region()
+ call s:append_at_cursor(" ")
+endfunction
+
+function s:handle_enter()
+ if s:cur_region isnot v:null
+ call s:terminate_region()
+ else
+ " Simulate normal enter
+ const y_1 = line(".")
+ const xbytes = col(".") - 1
+ const ln = getline(".")
+ call setline(".", strpart(ln, 0, xbytes))
+ call append(y_1, strpart(ln, xbytes))
+ call cursor(y_1 + 1, 0)
+ endif
+endfunction
+
+function s:handle_hiragana()
+ call s:convert_region_kana(1)
+endfunction
+
+function s:handle_katagana()
+ call s:convert_region_kana(2)
endfunction
for c in "abcdefghijklmnopqrstuvwxyz-.,(){}[]/~:!?"
- execute "inoremap <buffer> <silent> " . c . " <Cmd>call <SID>jpkey('" . c . "')<CR>"
+ execute "inoremap <buffer> <silent> " .. c .. " <Cmd>call <SID>handle_keypress(\"" .. c .. "\")<CR>"
endfor
+inoremap <buffer> <silent> <Esc> <Cmd>call <SID>handle_escape()<CR>
+inoremap <buffer> <silent> <Space> <Cmd>call <SID>handle_space()<CR>
+inoremap <buffer> <silent> <CR> <Cmd>call <SID>handle_enter()<CR>
+inoremap <buffer> <silent> <C-h> <Cmd>call <SID>handle_hiragana()<CR>
+inoremap <buffer> <silent> <C-k> <Cmd>call <SID>handle_katagana()<CR>