This PR addresses a missing check in the module system where private names that remain in the public environment map for technical reasons (e.g. inductive constructors generated by the kernel and relied on by the code generator) accidentally were accessible in the public scope.
208 lines
6 KiB
Text
208 lines
6 KiB
Text
/-
|
||
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
|
||
Released under Apache 2.0 license as described in the file LICENSE.
|
||
Author: Sebastian Ullrich, Leonardo de Moura, Joachim Breitner
|
||
|
||
A string trie data structure, used for tokenizing the Lean language
|
||
-/
|
||
module
|
||
|
||
prelude
|
||
public import Lean.Data.Format
|
||
public import Init.Data.Option.Coe
|
||
|
||
public section
|
||
|
||
namespace Lean
|
||
namespace Data
|
||
|
||
/-
|
||
## Implementation notes
|
||
|
||
Tries have typically many nodes with small degree, where a linear scan
|
||
through the (compact) `ByteArray` is faster than using binary search or
|
||
search trees like `Std.TreeMap`.
|
||
|
||
Moreover, many nodes have degree 1, which justifies the special case `Node1`
|
||
constructor.
|
||
|
||
The code would be a bit less repetitive if we used something like the following
|
||
```
|
||
mutual
|
||
@[expose] def Trie α := Option α × ByteAssoc α
|
||
|
||
inductive ByteAssoc α where
|
||
| leaf : Trie α
|
||
| node1 : UInt8 → Trie α → Trie α
|
||
| node : ByteArray → Array (Trie α) → Trie α
|
||
end
|
||
```
|
||
but that would come at the cost of extra indirections.
|
||
-/
|
||
|
||
/-- A Trie is a key-value store where the keys are of type `String`,
|
||
and the internal structure is a tree that branches on the bytes of the string. -/
|
||
inductive Trie (α : Type) where
|
||
| leaf : Option α → Trie α
|
||
| node1 : Option α → UInt8 → Trie α → Trie α
|
||
| node : Option α → ByteArray → Array (Trie α) → Trie α
|
||
|
||
namespace Trie
|
||
variable {α : Type}
|
||
|
||
/-- The empty `Trie` -/
|
||
def empty : Trie α := leaf none
|
||
|
||
instance : EmptyCollection (Trie α) :=
|
||
⟨empty⟩
|
||
|
||
instance : Inhabited (Trie α) where
|
||
default := empty
|
||
|
||
/-- Insert or update the value at a the given key `s`. -/
|
||
partial def upsert (t : Trie α) (s : String) (f : Option α → α) : Trie α :=
|
||
let rec insertEmpty (i : Nat) : Trie α :=
|
||
if h : i < s.utf8ByteSize then
|
||
let c := s.getUtf8Byte i h
|
||
let t := insertEmpty (i + 1)
|
||
node1 none c t
|
||
else
|
||
leaf (f .none)
|
||
let rec loop
|
||
| i, leaf v =>
|
||
if h : i < s.utf8ByteSize then
|
||
let c := s.getUtf8Byte i h
|
||
let t := insertEmpty (i + 1)
|
||
node1 v c t
|
||
else
|
||
leaf (f v)
|
||
| i, node1 v c' t' =>
|
||
if h : i < s.utf8ByteSize then
|
||
let c := s.getUtf8Byte i h
|
||
if c == c'
|
||
then node1 v c' (loop (i + 1) t')
|
||
else
|
||
let t := insertEmpty (i + 1)
|
||
node v (.mk #[c, c']) #[t, t']
|
||
else
|
||
node1 (f v) c' t'
|
||
| i, node v cs ts =>
|
||
if h : i < s.utf8ByteSize then
|
||
let c := s.getUtf8Byte i h
|
||
match cs.findIdx? (· == c) with
|
||
| none =>
|
||
let t := insertEmpty (i + 1)
|
||
node v (cs.push c) (ts.push t)
|
||
| some idx =>
|
||
node v cs (ts.modify idx (loop (i + 1)))
|
||
else
|
||
node (f v) cs ts
|
||
loop 0 t
|
||
|
||
/-- Inserts a value at a the given key `s`, overriding an existing value if present. -/
|
||
partial def insert (t : Trie α) (s : String) (val : α) : Trie α :=
|
||
upsert t s (fun _ => val)
|
||
|
||
/-- Looks up a value at the given key `s`. -/
|
||
partial def find? (t : Trie α) (s : String) : Option α :=
|
||
let rec loop
|
||
| i, leaf val =>
|
||
if i < s.utf8ByteSize then
|
||
none
|
||
else
|
||
val
|
||
| i, node1 val c' t' =>
|
||
if h : i < s.utf8ByteSize then
|
||
let c := s.getUtf8Byte i h
|
||
if c == c'
|
||
then loop (i + 1) t'
|
||
else none
|
||
else
|
||
val
|
||
| i, node val cs ts =>
|
||
if h : i < s.utf8ByteSize then
|
||
let c := s.getUtf8Byte i h
|
||
match cs.findIdx? (· == c) with
|
||
| none => none
|
||
| some idx => loop (i + 1) ts[idx]!
|
||
else
|
||
val
|
||
loop 0 t
|
||
|
||
/-- Returns an `Array` of all values in the trie, in no particular order. -/
|
||
partial def values (t : Trie α) : Array α := go t |>.run #[] |>.2
|
||
where
|
||
go : Trie α → StateM (Array α) Unit
|
||
| leaf a? => do
|
||
if let some a := a? then
|
||
modify (·.push a)
|
||
| node1 a? _ t' => do
|
||
if let some a := a? then
|
||
modify (·.push a)
|
||
go t'
|
||
| node a? _ ts => do
|
||
if let some a := a? then
|
||
modify (·.push a)
|
||
ts.forM fun t' => go t'
|
||
|
||
/-- Returns all values whose key have the given string `pre` as a prefix, in no particular order. -/
|
||
partial def findPrefix (t : Trie α) (pre : String) : Array α := go t 0
|
||
where
|
||
go (t : Trie α) (i : Nat) : Array α :=
|
||
if h : i < pre.utf8ByteSize then
|
||
let c := pre.getUtf8Byte i h
|
||
match t with
|
||
| leaf _val => .empty
|
||
| node1 _val c' t' =>
|
||
if c == c'
|
||
then go t' (i + 1)
|
||
else .empty
|
||
| node _val cs ts =>
|
||
match cs.findIdx? (· == c) with
|
||
| none => .empty
|
||
| some idx => go ts[idx]! (i + 1)
|
||
else
|
||
t.values
|
||
|
||
/-- Find the longest _key_ in the trie that is contained in the given string `s` at position `i`,
|
||
and return the associated value. -/
|
||
partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos) : Option α :=
|
||
let rec loop
|
||
| leaf v, _, res =>
|
||
if v.isSome then v else res
|
||
| node1 v c' t', i, res =>
|
||
let res := if v.isSome then v else res
|
||
if h : i < s.utf8ByteSize then
|
||
let c := s.getUtf8Byte i h
|
||
if c == c'
|
||
then loop t' (i + 1) res
|
||
else res
|
||
else
|
||
res
|
||
| node v cs ts, i, res =>
|
||
let res := if v.isSome then v else res
|
||
if h : i < s.utf8ByteSize then
|
||
let c := s.getUtf8Byte i h
|
||
match cs.findIdx? (· == c) with
|
||
| none => res
|
||
| some idx => loop ts[idx]! (i + 1) res
|
||
else
|
||
res
|
||
loop t i.byteIdx none
|
||
|
||
private partial def toStringAux {α : Type} : Trie α → List Format
|
||
| leaf _ => []
|
||
| node1 _ c t =>
|
||
[ format (repr c), Format.group $ Format.nest 4 $ flip Format.joinSep Format.line $ toStringAux t ]
|
||
| node _ cs ts =>
|
||
List.flatten $ List.zipWith (fun c t =>
|
||
[ format (repr c), (Format.group $ Format.nest 4 $ flip Format.joinSep Format.line $ toStringAux t) ]
|
||
) cs.toList ts.toList
|
||
|
||
instance {α : Type} : ToString (Trie α) where
|
||
toString t := private (flip Format.joinSep Format.line $ toStringAux t).pretty
|
||
|
||
end Trie
|
||
|
||
end Data
|
||
end Lean
|