From 5b1aac7b8fb88478bc7ab9c5f6ee06ac40f78fda Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Thu, 6 Oct 2022 17:45:21 -0700 Subject: [PATCH] fix: avoid nontermination on non-utf8 input This is not a perfect solution, but ensures the non-termination does not happen. The changes also make it easier to prove termination in the future. TODO: validate UTF8 input? closes #1690 --- src/Init/Data/String/Basic.lean | 16 ++++++++-------- tests/lean/1690.lean | 1 + tests/lean/1690.lean.expected.out | 2 ++ 3 files changed, 11 insertions(+), 8 deletions(-) create mode 100644 tests/lean/1690.lean create mode 100644 tests/lean/1690.lean.expected.out diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 5ec8e36eec..f69d520243 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -115,7 +115,7 @@ def atEnd : (@& String) → (@& Pos) → Bool framework and wellfounded recursion support -/ partial def posOfAux (s : String) (c : Char) (stopPos : Pos) (pos : Pos) : Pos := - if pos == stopPos then pos + if pos >= stopPos then pos else if s.get pos == c then pos else posOfAux s c stopPos (s.next pos) @@ -132,7 +132,7 @@ def revPosOf (s : String) (c : Char) : Option Pos := else revPosOfAux s c (s.prev s.endPos) partial def findAux (s : String) (p : Char → Bool) (stopPos : Pos) (pos : Pos) : Pos := - if pos == stopPos then pos + if pos >= stopPos then pos else if p (s.get pos) then pos else findAux s p stopPos (s.next pos) @@ -155,7 +155,7 @@ abbrev Pos.min (p₁ p₂ : Pos) : Pos := partial def firstDiffPos (a b : String) : Pos := let stopPos := a.endPos.min b.endPos let rec loop (i : Pos) : Pos := - if i == stopPos || a.get i != b.get i then i + if i >= stopPos || a.get i != b.get i then i else loop (a.next i) loop 0 @@ -300,7 +300,7 @@ def prevn : Iterator → Nat → Iterator end Iterator partial def offsetOfPosAux (s : String) (pos : Pos) (i : Pos) (offset : Nat) : Nat := - if i == pos || s.atEnd i then + if i >= pos || s.atEnd i then offset else offsetOfPosAux s pos (s.next i) (offset+1) @@ -310,7 +310,7 @@ def offsetOfPos (s : String) (pos : Pos) : Nat := @[specialize] partial def foldlAux {α : Type u} (f : α → Char → α) (s : String) (stopPos : Pos) (i : Pos) (a : α) : α := let rec loop (i : Pos) (a : α) := - if i == stopPos then a + if i >= stopPos then a else loop (s.next i) (f a (s.get i)) loop i a @@ -319,7 +319,7 @@ def offsetOfPos (s : String) (pos : Pos) : Nat := @[specialize] partial def foldrAux {α : Type u} (f : Char → α → α) (a : α) (s : String) (stopPos : Pos) (i : Pos) : α := let rec loop (i : Pos) := - if i == stopPos then a + if i >= stopPos then a else f (s.get i) (loop (s.next i)) loop i @@ -328,7 +328,7 @@ def offsetOfPos (s : String) (pos : Pos) : Nat := @[specialize] partial def anyAux (s : String) (stopPos : Pos) (p : Char → Bool) (i : Pos) : Bool := let rec loop (i : Pos) := - if i == stopPos then false + if i >= stopPos then false else if p (s.get i) then true else loop (s.next i) loop i @@ -498,7 +498,7 @@ def contains (s : Substring) (c : Char) : Bool := s.any (fun a => a == c) @[specialize] private partial def takeWhileAux (s : String) (stopPos : String.Pos) (p : Char → Bool) (i : String.Pos) : String.Pos := - if i == stopPos then i + if i >= stopPos then i else if p (s.get i) then takeWhileAux s stopPos p (s.next i) else i diff --git a/tests/lean/1690.lean b/tests/lean/1690.lean new file mode 100644 index 0000000000..1910ff82cf --- /dev/null +++ b/tests/lean/1690.lean @@ -0,0 +1 @@ +def foo := \ No newline at end of file diff --git a/tests/lean/1690.lean.expected.out b/tests/lean/1690.lean.expected.out new file mode 100644 index 0000000000..e5427ba8f3 --- /dev/null +++ b/tests/lean/1690.lean.expected.out @@ -0,0 +1,2 @@ +1690.lean:1:10-1:11: error: unknown identifier ' + '