120 lines
4.6 KiB
Text
120 lines
4.6 KiB
Text
/-
|
|
Copyright (c) 2020 Microsoft Corporation. All rights reserved.
|
|
Released under Apache 2.0 license as described in the file LICENSE.
|
|
Author: Leonardo de Moura
|
|
-/
|
|
module
|
|
|
|
prelude
|
|
import all Init.Data.ByteArray.Basic
|
|
public import Init.Data.String.Basic
|
|
import all Init.Data.String.Basic
|
|
import Init.Data.String.Search
|
|
import Init.Data.String.Termination
|
|
|
|
public section
|
|
|
|
namespace String
|
|
|
|
@[deprecated ByteArray.utf8DecodeChar? (since := "2025-10-01")]
|
|
abbrev utf8DecodeChar? (a : ByteArray) (i : Nat) : Option Char :=
|
|
a.utf8DecodeChar? i
|
|
|
|
/--
|
|
Checks whether an array of bytes is a valid UTF-8 encoding of a string.
|
|
-/
|
|
@[deprecated ByteArray.validateUTF8 (since := "2025-10-01")]
|
|
abbrev validateUTF8 (a : ByteArray) : Bool :=
|
|
a.validateUTF8
|
|
|
|
private def findLeadingSpacesSize (s : String) : Nat :=
|
|
let it := s.startPos
|
|
let it := it.find? (· == '\n') |>.bind String.Pos.next?
|
|
match it with
|
|
| some it => consumeSpaces it 0 s.length
|
|
| none => 0
|
|
where
|
|
consumeSpaces {s : String} (it : s.Pos) (curr min : Nat) : Nat :=
|
|
if h : it.IsAtEnd then min
|
|
else if it.get h == ' ' || it.get h == '\t' then consumeSpaces (it.next h) (curr + 1) min
|
|
else if it.get h == '\n' then findNextLine (it.next h) min
|
|
else findNextLine (it.next h) (Nat.min curr min)
|
|
termination_by it
|
|
findNextLine {s : String} (it : s.Pos) (min : Nat) : Nat :=
|
|
if h : it.IsAtEnd then min
|
|
else if it.get h == '\n' then consumeSpaces (it.next h) 0 min
|
|
else findNextLine (it.next h) min
|
|
termination_by it
|
|
|
|
private def removeNumLeadingSpaces (n : Nat) (s : String) : String :=
|
|
consumeSpaces n s.startPos ""
|
|
where
|
|
consumeSpaces (n : Nat) {s : String} (it : s.Pos) (r : String) : String :=
|
|
match n with
|
|
| 0 => saveLine it r
|
|
| n+1 =>
|
|
if h : it.IsAtEnd then r
|
|
else if it.get h == ' ' || it.get h == '\t' then consumeSpaces n (it.next h) r
|
|
else saveLine it r
|
|
termination_by (it, 1)
|
|
saveLine {s : String} (it : s.Pos) (r : String) : String :=
|
|
if h : it.IsAtEnd then r
|
|
else if it.get h == '\n' then consumeSpaces n (it.next h) (r.push '\n')
|
|
else saveLine (it.next h) (r.push (it.get h))
|
|
termination_by (it, 0)
|
|
|
|
/--
|
|
Consistently de-indents the lines in a string, removing the same amount of leading whitespace from
|
|
each line such that the least-indented line has no leading whitespace.
|
|
|
|
The number of leading whitespace characters to remove from each line is determined by counting the
|
|
number of leading space (`' '`) and tab (`'\t'`) characters on lines after the first line that also
|
|
contain non-whitespace characters. No distinction is made between tab and space characters; both
|
|
count equally.
|
|
|
|
The least number of leading whitespace characters found is then removed from the beginning of each
|
|
line. The first line's leading whitespace is not counted when determining how far to de-indent the
|
|
string, but leading whitespace is removed from it.
|
|
|
|
Examples:
|
|
* `"Here:\n fun x =>\n x + 1".removeLeadingSpaces = "Here:\nfun x =>\n x + 1"`
|
|
* `"Here:\n\t\tfun x =>\n\t \tx + 1".removeLeadingSpaces = "Here:\nfun x =>\n \tx + 1"`
|
|
* `"Here:\n\t\tfun x =>\n \n\t \tx + 1".removeLeadingSpaces = "Here:\nfun x =>\n\n \tx + 1"`
|
|
-/
|
|
def removeLeadingSpaces (s : String) : String :=
|
|
let n := findLeadingSpacesSize s
|
|
if n == 0 then s else removeNumLeadingSpaces n s
|
|
|
|
/--
|
|
Replaces each `\r\n` with `\n` to normalize line endings, but does not validate that there are no
|
|
isolated `\r` characters.
|
|
|
|
This is an optimized version of `String.replace text "\r\n" "\n"`.
|
|
-/
|
|
def crlfToLf (text : String) : String :=
|
|
go "" 0 0
|
|
where
|
|
go (acc : String) (accStop pos : String.Pos.Raw) : String :=
|
|
if h : pos.atEnd text then
|
|
-- note: if accStop = 0 then acc is empty
|
|
if accStop = 0 then text else acc ++ accStop.extract text pos
|
|
else
|
|
let c := pos.get' text h
|
|
let pos' := pos.next' text h
|
|
if h' : ¬ pos'.atEnd text ∧ c == '\r' ∧ pos'.get text == '\n' then
|
|
let acc := acc ++ accStop.extract text pos
|
|
go acc pos' (pos'.next' text h'.1)
|
|
else
|
|
go acc accStop pos'
|
|
termination_by text.utf8ByteSize - pos.byteIdx
|
|
decreasing_by
|
|
decreasing_with
|
|
change text.utf8ByteSize - ((pos.next text).next text).byteIdx < text.utf8ByteSize - pos.byteIdx
|
|
have k := Nat.gt_of_not_le <| mt decide_eq_true h
|
|
exact Nat.sub_lt_sub_left k (Nat.lt_trans (String.Pos.Raw.lt_next text pos) (String.Pos.Raw.lt_next _ _))
|
|
decreasing_with
|
|
change text.utf8ByteSize - (pos.next text).byteIdx < text.utf8ByteSize - pos.byteIdx
|
|
have k := Nat.gt_of_not_le <| mt decide_eq_true h
|
|
exact Nat.sub_lt_sub_left k (String.Pos.Raw.lt_next _ _)
|
|
|
|
end String
|