76 lines
2.6 KiB
Text
76 lines
2.6 KiB
Text
/-
|
|
Copyright (c) 2020 Marc Huisinga. All rights reserved.
|
|
Released under Apache 2.0 license as described in the file LICENSE.
|
|
|
|
Authors: Marc Huisinga, Wojciech Nawrocki
|
|
-/
|
|
import Init.Data.String
|
|
import Init.Data.Array
|
|
import Lean.Data.Lsp.Basic
|
|
import Lean.Data.Position
|
|
|
|
/-! LSP uses UTF-16 for indexing, so we need to provide some primitives
|
|
to interact with Lean strings using UTF-16 indices. -/
|
|
|
|
namespace Char
|
|
|
|
def utf16Size (c : Char) : UInt32 :=
|
|
if c.val ≤ 0xFFFF then 1 else 2
|
|
|
|
end Char
|
|
|
|
namespace String
|
|
|
|
private def csize (c : Char) : Nat :=
|
|
c.utf16Size.toNat
|
|
|
|
def utf16Length (s : String) : Nat :=
|
|
s.foldr (fun c acc => csize c + acc) 0
|
|
|
|
private def codepointPosToUtf16PosFromAux (s : String) : Nat → Pos → Nat → Nat
|
|
| 0, utf8pos, utf16pos => utf16pos
|
|
| cp+1, utf8pos, utf16pos => codepointPosToUtf16PosFromAux cp (s.next utf8pos) (utf16pos + csize (s.get utf8pos))
|
|
|
|
/-- Computes the UTF-16 offset of the `n`-th Unicode codepoint
|
|
in the substring of `s` starting at UTF-8 offset `off`.
|
|
Yes, this is actually useful.-/
|
|
def codepointPosToUtf16PosFrom (s : String) (n : Nat) (off : Pos) : Nat :=
|
|
codepointPosToUtf16PosFromAux s n off 0
|
|
|
|
def codepointPosToUtf16Pos (s : String) (pos : Nat) : Nat :=
|
|
codepointPosToUtf16PosFrom s pos 0
|
|
|
|
private partial def utf16PosToCodepointPosFromAux (s : String) : Nat → Pos → Nat → Nat
|
|
| 0, utf8pos, cp => cp
|
|
| utf16pos, utf8pos, cp => utf16PosToCodepointPosFromAux (utf16pos - csize (s.get utf8pos)) (s.next utf8pos) (cp + 1)
|
|
|
|
/-- Computes the position of the Unicode codepoint at UTF-16 offset
|
|
`utf16pos` in the substring of `s` starting at UTF-8 offset `off`. -/
|
|
def utf16PosToCodepointPosFrom (s : String) (utf16pos : Nat) (off : Pos) : Nat :=
|
|
utf16PosToCodepointPosFromAux s utf16pos off 0
|
|
|
|
def utf16PosToCodepointPos (s : String) (pos : Nat) : Nat :=
|
|
utf16PosToCodepointPosFrom s pos 0
|
|
|
|
/-- Starting at `utf8pos`, finds the UTF-8 offset of the `p`-th codepoint. -/
|
|
def codepointPosToUtf8PosFrom (s : String) : String.Pos → Nat → String.Pos
|
|
| utf8pos, 0 => utf8pos
|
|
| utf8pos, p+1 => codepointPosToUtf8PosFrom (s.next utf8pos) p
|
|
|
|
end String
|
|
|
|
namespace Lean
|
|
namespace FileMap
|
|
|
|
/-- Computes an UTF-8 offset into `text.source`
|
|
from an LSP-style 0-indexed (ln, col) position. -/
|
|
def lspPosToUtf8Pos (text : FileMap) (pos : Lsp.Position) : String.Pos :=
|
|
let colPos := text.positions.get! (pos.line);
|
|
let chr := text.source.utf16PosToCodepointPosFrom pos.character colPos;
|
|
text.source.codepointPosToUtf8PosFrom colPos chr
|
|
|
|
def leanPosToLspPos (text : FileMap) : Lean.Position → Lsp.Position
|
|
| ⟨ln, col⟩ => ⟨ln-1, text.source.codepointPosToUtf16PosFrom col (text.positions.get! $ ln - 1)⟩
|
|
|
|
end FileMap
|
|
end Lean
|