diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 1f2567cfdf..94cffd92df 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -12,8 +12,11 @@ universes u structure String := (data : List Char) -/-- A character position in an UTF-8 encoded string. -To represent codepoint positions, use a plain `Nat`. -/ +/-- A byte position in a `String`. Internally, `String`s are UTF-8 encoded. +Codepoint positions (counting the Unicode codepoints rather than bytes) +are represented by plain `Nat`s instead. +Indexing a `String` by a byte position is constant-time, while codepoint +positions need to be translated internally to byte positions in linear-time. -/ abbrev String.Pos := Nat structure Substring := diff --git a/src/Lean/Data/Lsp/Utf16.lean b/src/Lean/Data/Lsp/Utf16.lean index d0d041760e..b4db02772f 100644 --- a/src/Lean/Data/Lsp/Utf16.lean +++ b/src/Lean/Data/Lsp/Utf16.lean @@ -72,12 +72,5 @@ text.source.codepointPosToUtf8PosFrom colPos chr def leanPosToLspPos (text : FileMap) : Lean.Position → Lsp.Position | ⟨ln, col⟩ => ⟨ln-1, text.source.codepointPosToUtf16PosFrom col (text.positions.get! $ ln - 1)⟩ -def replaceLspRange (text : FileMap) (r : Lsp.Range) (newText : String) : FileMap := -let start := text.lspPosToUtf8Pos r.start; -let «end» := text.lspPosToUtf8Pos r.«end»; -let pre := text.source.extract 0 start; -let post := text.source.extract «end» text.source.bsize; -(pre ++ newText ++ post).toFileMap - end FileMap end Lean diff --git a/src/Lean/Server.lean b/src/Lean/Server.lean index 91421ebe36..c56fa1a2ce 100644 --- a/src/Lean/Server.lean +++ b/src/Lean/Server.lean @@ -130,17 +130,23 @@ writeLspNotification "textDocument/publishDiagnostics" def handleDidOpen (p : DidOpenTextDocumentParams) : ServerM Unit := do let doc := p.textDocument; --- LSP is EOL agnostic, so we first split on "\r\n" to avoid possibly redundant line breaks. -let splitOnEOLs (s : String) : List String := (do - line ← s.splitOn "\r\n"; - line ← line.splitOn "\n"; - line ← line.splitOn "\r"; - pure line); -let text := ("\n".intercalate $ splitOnEOLs doc.text).toFileMap; +-- NOTE(WN): `toFileMap` marks line beginnings as immediately following +-- "\n", which should be enough to handle both LF and CRLF correctly. +-- This is because LSP always refers to characters by (line, column), +-- so if we get the line number correct it shouldn't matter that there +-- is a CR there. +let text := doc.text.toFileMap; (msgLog, newDoc) ← monadLift $ compileDocument doc.version text; updateOpenDocuments doc.uri newDoc; sendDiagnostics doc.uri newDoc msgLog +private def replaceLspRange (text : FileMap) (r : Lsp.Range) (newText : String) : FileMap := +let start := text.lspPosToUtf8Pos r.start; +let «end» := text.lspPosToUtf8Pos r.«end»; +let pre := text.source.extract 0 start; +let post := text.source.extract «end» text.source.bsize; +(pre ++ newText ++ post).toFileMap + def handleDidChange (p : DidChangeTextDocumentParams) : ServerM Unit := do let docId := p.textDocument; let changes := p.contentChanges; @@ -152,7 +158,7 @@ else changes.forM $ fun change => match change with | TextDocumentContentChangeEvent.rangeChange (range : Range) (newText : String) => do let startOff := oldDoc.text.lspPosToUtf8Pos range.start; - let newDocText := oldDoc.text.replaceLspRange range newText; + let newDocText := replaceLspRange oldDoc.text range newText; (msgLog, newDoc) ← monadLift $ updateDocument oldDoc startOff newVersion newDocText; updateOpenDocuments docId.uri newDoc;