From 44cdb1fc5682551240640ab45eef980bbb2d8979 Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Thu, 14 Mar 2019 08:38:08 -0700 Subject: [PATCH] chore(tests/playground/flat_parser): add new `file_map` Goal: minimize the number of constructor object allocations. They derailed me when I was looking for performance bottlenecks in the flat_parser. --- tests/playground/flat_parser.lean | 49 ++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/tests/playground/flat_parser.lean b/tests/playground/flat_parser.lean index ffb284c00d..03238f127a 100644 --- a/tests/playground/flat_parser.lean +++ b/tests/playground/flat_parser.lean @@ -6,6 +6,44 @@ open string open parser (syntax syntax.missing) open parser (trie token_map) +abbreviation pos := string.utf8_pos + +/-- A precomputed cache for quickly mapping char offsets to positions. -/ +structure file_map := +(offsets : array nat) +(lines : array nat) + +namespace file_map +private def from_string_aux (s : string) : nat → nat → nat → pos → array nat → array nat → file_map +| 0 offset line i offsets lines := ⟨offsets.push offset, lines.push line⟩ +| (k+1) offset line i offsets lines := + if s.utf8_at_end i then ⟨offsets.push offset, lines.push line⟩ + else let c := s.utf8_get i in + let i := s.utf8_next i in + let offset := offset + 1 in + if c = '\n' + then from_string_aux k offset (line+1) i (offsets.push offset) (lines.push (line+1)) + else from_string_aux k offset line i offsets lines + +def from_string (s : string) : file_map := +from_string_aux s s.length 0 1 0 (array.nil.push 0) (array.nil.push 1) + +/- Remark: `offset is in [(offsets.get b), (offsets.get e)]` and `b < e` -/ +private def to_position_aux (offsets : array nat) (lines : array nat) (offset : nat) : nat → nat → nat → position +| 0 b e := ⟨offset, 1⟩ -- unreachable +| (k+1) b e := + let offset_b := offsets.read' b in + if e = b + 1 then ⟨offset - offset_b, lines.read' b⟩ + else let m := (b + e) / 2 in + let offset_m := offsets.read' m in + if offset = offset_m then ⟨0, lines.read' m⟩ + else if offset > offset_m then to_position_aux k m e + else to_position_aux k b m + +def to_position : file_map → nat → position +| ⟨offsets, lines⟩ offset := to_position_aux offsets lines offset offsets.size 0 (offsets.size-1) +end file_map + structure token_config := («prefix» : string) (lbp : nat := 0) @@ -23,8 +61,6 @@ structure parser_config extends frontend_config := structure parser_state := (messages : message_log) -abbreviation pos := string.utf8_pos - structure token_cache_entry := (start_pos stop_pos : pos) (tk : syntax) @@ -207,7 +243,7 @@ def dummy_parser_core : parser_core := def test_parser {α : Type} (x : parser_m α) (input : string) : string := let r := x { cmd_parser := dummy_parser_core, term_parser := λ _, dummy_parser_core } - { filename := "test", input := input, file_map := lean.file_map.from_string input, tokens := lean.parser.trie.mk } + { filename := "test", input := input, file_map := file_map.from_string input, tokens := lean.parser.trie.mk } (mk_result_ok 0 {} {messages := message_log.empty}) in match r with | result.ok _ i _ _ _ := "Ok at " ++ to_string i @@ -253,9 +289,14 @@ def mk_big_string : nat → string → string section open lean.flat_parser +-- set_option pp.binder_types false +-- set_option pp.implicit true +-- set_option trace.compiler.boxed true + def flat_p : parser_m unit := many1 (str "--" *> take_until (= '\n') *> any *> pure ()) +-- #exit end section @@ -266,7 +307,7 @@ open lean.parser.monad_parsec def test_parsec (p : parser unit) (input : string) : string := let ps : lean.flat_parser.rec_parsers := { cmd_parser := lean.flat_parser.dummy_parser_core, term_parser := λ _, lean.flat_parser.dummy_parser_core } in -let cfg : lean.flat_parser.parser_config := { filename := "test", input := input, file_map := lean.file_map.from_string input, tokens := lean.parser.trie.mk } in +let cfg : lean.flat_parser.parser_config := { filename := "test", input := input, file_map := lean.flat_parser.file_map.from_string input, tokens := lean.parser.trie.mk } in let r := p ps cfg input.mk_iterator {} in match r with | (parsec.result.ok _ it _, _) := "OK at " ++ to_string it.offset