lean4-htt/src/Lean/Data/Trie.lean
2020-10-27 16:11:48 -07:00

92 lines
2.6 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/-
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Sebastian Ullrich, Leonardo de Moura
Trie for tokenizing the Lean language
-/
import Lean.Data.Format
namespace Lean
namespace Parser
open Std (RBNode RBNode.leaf RBNode.singleton RBNode.find RBNode.insert)
inductive Trie (α : Type)
| Node : Option α → RBNode Char (fun _ => Trie α) → Trie α
namespace Trie
variables {α : Type}
def empty : Trie α :=
⟨none, RBNode.leaf⟩
instance : HasEmptyc (Trie α) :=
⟨empty⟩
instance : Inhabited (Trie α) :=
⟨Node none RBNode.leaf⟩
partial def insert (t : Trie α) (s : String) (val : α) : Trie α :=
let rec insertEmpty (i : String.Pos) : Trie α :=
match s.atEnd i with
| true => Trie.Node (some val) RBNode.leaf
| false =>
let c := s.get i
let t := insertEmpty (s.next i)
Trie.Node none (RBNode.singleton c t)
let rec loop
| Trie.Node v m, i =>
match s.atEnd i with
| true => Trie.Node (some val) m -- overrides old value
| false =>
let c := s.get i
let i := s.next i
let t := match RBNode.find Char.lt m c with
| none => insertEmpty i
| some t => loop t i
Trie.Node v (RBNode.insert Char.lt m c t)
loop t 0
partial def find? (t : Trie α) (s : String) : Option α :=
let rec loop
| Trie.Node val m, i =>
match s.atEnd i with
| true => val
| false =>
let c := s.get i
let i := s.next i
match RBNode.find Char.lt m c with
| none => none
| some t => loop t i
loop t 0
private def updtAcc (v : Option α) (i : String.Pos) (acc : String.Pos × Option α) : String.Pos × Option α :=
match v, acc with
| some v, (j, w) => (i, some v) -- we pattern match on `acc` to enable memory reuse
| none, acc => acc
partial def matchPrefix (s : String) (t : Trie α) (i : String.Pos) : String.Pos × Option α :=
let rec loop
| Trie.Node v m, i, acc =>
match s.atEnd i with
| true => updtAcc v i acc
| false =>
let acc := updtAcc v i acc
let c := s.get i
let i := s.next i
match RBNode.find Char.lt m c with
| some t => loop t i acc
| none => acc
loop t i (i, none)
private partial def toStringAux {α : Type} : Trie α → List Format
| Trie.Node val map => map.fold (fun Fs c t =>
format (repr c) :: (Format.group $ Format.nest 2 $ flip Format.joinSep Format.line $ toStringAux t) :: Fs) []
instance {α : Type} : ToString (Trie α) :=
⟨fun t => (flip Format.joinSep Format.line $ toStringAux t).pretty⟩
end Trie
end Parser
end Lean