/- Copyright (c) 2020 Marc Huisinga. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Authors: Marc Huisinga, Wojciech Nawrocki -/ import Init.Data.String import Init.Data.Array import Lean.Data.Lsp.Basic import Lean.Data.Position /-! LSP uses UTF-16 for indexing, so we need to provide some primitives to interact with Lean strings using UTF-16 indices. -/ namespace Char def utf16Size (c : Char) : UInt32 := if c.val ≤ 0xFFFF then 1 else 2 end Char namespace String private def csize (c : Char) : Nat := c.utf16Size.toNat def utf16Length (s : String) : Nat := s.foldr (fun c acc => csize c + acc) 0 private def codepointPosToUtf16PosFromAux (s : String) : Nat → Pos → Nat → Nat | 0, utf8pos, utf16pos => utf16pos | cp+1, utf8pos, utf16pos => codepointPosToUtf16PosFromAux cp (s.next utf8pos) (utf16pos + csize (s.get utf8pos)) /-- Computes the UTF-16 offset of the `n`-th Unicode codepoint in the substring of `s` starting at UTF-8 offset `off`. Yes, this is actually useful.-/ def codepointPosToUtf16PosFrom (s : String) (n : Nat) (off : Pos) : Nat := codepointPosToUtf16PosFromAux s n off 0 def codepointPosToUtf16Pos (s : String) (pos : Nat) : Nat := codepointPosToUtf16PosFrom s pos 0 private partial def utf16PosToCodepointPosFromAux (s : String) : Nat → Pos → Nat → Nat | 0, utf8pos, cp => cp | utf16pos, utf8pos, cp => utf16PosToCodepointPosFromAux (utf16pos - csize (s.get utf8pos)) (s.next utf8pos) (cp + 1) /-- Computes the position of the Unicode codepoint at UTF-16 offset `utf16pos` in the substring of `s` starting at UTF-8 offset `off`. -/ def utf16PosToCodepointPosFrom (s : String) (utf16pos : Nat) (off : Pos) : Nat := utf16PosToCodepointPosFromAux s utf16pos off 0 def utf16PosToCodepointPos (s : String) (pos : Nat) : Nat := utf16PosToCodepointPosFrom s pos 0 /-- Starting at `utf8pos`, finds the UTF-8 offset of the `p`-th codepoint. -/ def codepointPosToUtf8PosFrom (s : String) : String.Pos → Nat → String.Pos | utf8pos, 0 => utf8pos | utf8pos, p+1 => codepointPosToUtf8PosFrom (s.next utf8pos) p end String namespace Lean namespace FileMap /-- Computes an UTF-8 offset into `text.source` from an LSP-style 0-indexed (ln, col) position. -/ def lspPosToUtf8Pos (text : FileMap) (pos : Lsp.Position) : String.Pos := let colPos := text.positions.get! (pos.line); let chr := text.source.utf16PosToCodepointPosFrom pos.character colPos; text.source.codepointPosToUtf8PosFrom colPos chr def leanPosToLspPos (text : FileMap) : Lean.Position → Lsp.Position | ⟨ln, col⟩ => ⟨ln-1, text.source.codepointPosToUtf16PosFrom col (text.positions.get! $ ln - 1)⟩ end FileMap end Lean