From 224fdc7a7883d20a919bb68fcc48784d8640643c Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Thu, 10 May 2018 13:10:42 -0700 Subject: [PATCH] refactor(library/init/lean/ir): platform dependent IR Motivation: in 64-bit machines, we can store boxed uint32 values as a tagged pointer. In 32-bit machines, we need to allocated an object (like Haskell) to store the uint32 value. So, the generated bytecode is quite different in each platform. This change also allow us to simplify the IR. Example: we don't need the type `sizet` anymore. Impact: To be able to bootstrap in both platforms, we will have to store two versions of the generated code: 32 and 64 versions. In principle, we only need to store the 64-bit version, and use cross-compilation to build the 32-bit version. --- library/init/data/to_string.lean | 3 +++ library/init/lean/format.lean | 1 + library/init/lean/ir/extract_cpp.lean | 10 +++------- library/init/lean/ir/format.lean | 6 ------ library/init/lean/ir/ir.lean | 13 +++---------- library/init/lean/ir/parser.lean | 21 ++++++++++----------- tests/lean/parser1.lean | 6 +++--- tests/lean/run/parser_ir1.lean | 16 ++++++++-------- 8 files changed, 31 insertions(+), 45 deletions(-) diff --git a/library/init/data/to_string.lean b/library/init/data/to_string.lean index 568a57e1b1..07db575213 100644 --- a/library/init/data/to_string.lean +++ b/library/init/data/to_string.lean @@ -61,6 +61,9 @@ instance : has_to_string uint32 := instance : has_to_string uint64 := ⟨λ n, to_string n.to_nat⟩ +instance : has_to_string usize := +⟨λ n, to_string n.to_nat⟩ + instance {α : Type u} [has_to_string α] : has_to_string (option α) := ⟨λ o, match o with | none := "none" | (some a) := "(some " ++ to_string a ++ ")"⟩ diff --git a/library/init/lean/format.lean b/library/init/lean/format.lean index 87499f1444..484a51bb1c 100644 --- a/library/init/lean/format.lean +++ b/library/init/lean/format.lean @@ -136,6 +136,7 @@ instance nat_has_to_format : has_to_format nat := ⟨λ n, to_string n⟩ instance uint16_has_to_format : has_to_format uint16 := ⟨λ n, to_string n⟩ instance uint32_has_to_format : has_to_format uint32 := ⟨λ n, to_string n⟩ instance uint64_has_to_format : has_to_format uint64 := ⟨λ n, to_string n⟩ +instance usize_has_to_format : has_to_format usize := ⟨λ n, to_string n⟩ instance format_has_to_string : has_to_string format := ⟨pretty⟩ diff --git a/library/init/lean/ir/extract_cpp.lean b/library/init/lean/ir/extract_cpp.lean index 55af467604..56f6f991dd 100644 --- a/library/init/lean/ir/extract_cpp.lean +++ b/library/init/lean/ir/extract_cpp.lean @@ -126,10 +126,6 @@ def emit_call_lhs : list var → extract_m unit def emit_type_size (ty : type) : extract_m unit := emit "sizeof" >> paren(emit_type ty) -def emit_sizet : list (nat × type) → extract_m unit -| [] := emit 0 -| ((n, ty)::ss) := emit n >> emit "*" >> emit_type_size ty >> emit " + " >> emit_sizet ss - /-- Emit `op(x)` -/ def emit_op_x (op : string) (x : var) : extract_m unit := emit op >> paren (emit_var x) @@ -234,11 +230,11 @@ ins.decorate_error $ | (instr.unop x t op y) := emit_unop x t op y | (instr.binop x t op y z) := emit_binop x t op y z | (instr.call xs f ys) := emit_call_lhs xs >> emit_fnid f >> paren(emit_var_list ys) - | (instr.cnstr o t n sz) := emit_var o >> emit " := lean::alloc_cnstr" >> paren(emit t <+> emit n <+> emit_sizet sz) + | (instr.cnstr o t n sz) := emit_var o >> emit " := lean::alloc_cnstr" >> paren(emit t <+> emit n <+> emit sz) | (instr.set o i x) := emit "lean::set_cnstr_obj" >> paren (emit_var o <+> emit i <+> emit_var x) | (instr.get x o i) := emit_var x >> emit " := lean::cnstr_obj" >> paren(emit_var o <+> emit i) - | (instr.sset o d x) := emit "lean::set_cnstr_scalar" >> paren(emit_var o <+> emit_sizet d <+> emit_var x) - | (instr.sget x t o d) := emit_var x >> emit " := lean::cnstr_scalar" >> emit_template_param t >> paren(emit_var o <+> emit_sizet d) + | (instr.sset o d x) := emit "lean::set_cnstr_scalar" >> paren(emit_var o <+> emit d <+> emit_var x) + | (instr.sget x t o d) := emit_var x >> emit " := lean::cnstr_scalar" >> emit_template_param t >> paren(emit_var o <+> emit d) | (instr.closure x f ys) := return () -- TODO | (instr.apply x ys) := return () -- TODO | (instr.array a sz c) := emit_var a >> emit " := lean::alloc_array" >> paren(emit_var sz <+> emit_var c) diff --git a/library/init/lean/ir/format.lean b/library/init/lean/ir/format.lean index c49f45bb0c..c7745eaf39 100644 --- a/library/init/lean/ir/format.lean +++ b/library/init/lean/ir/format.lean @@ -92,12 +92,6 @@ def sizet_entry.to_format : nat × type → format | (1, ty) := to_fmt ty | (n, ty) := to_fmt n ++ ":" ++ to_fmt ty -def sizet.to_format (s : sizet) : format := -sbracket $ join_sep (s.map sizet_entry.to_format) ("," ++ line) - -instance sizet.has_to_format : has_to_format sizet := ⟨sizet.to_format⟩ -instance sizet.has_to_string : has_to_string sizet := ⟨pretty ∘ to_fmt⟩ - def instr.to_format : instr → format | (instr.lit x ty lit) := to_fmt x ++ " : " ++ to_fmt ty ++ " := " ++ to_fmt lit | (instr.unop x ty op y) := to_fmt x ++ " : " ++ to_fmt ty ++ " := " ++ to_fmt op ++ " " ++ to_fmt y diff --git a/library/init/lean/ir/ir.lean b/library/init/lean/ir/ir.lean index 0cbff904fd..e995a81790 100644 --- a/library/init/lean/ir/ir.lean +++ b/library/init/lean/ir/ir.lean @@ -81,13 +81,6 @@ def tag := uint16 def var := name def fnid := name def blockid := name -/-- -`sizet` is used to represent object size and offset. -An element `[(n_1, t_1), ..., (n_k, t_k)]` represents -the size/offset `n_1 * sizeof(t_1) + ... + n_k * sizeof(t_k)`. -We use this type to be able to generate platform independent -code because the size of some types are platform dependent (e.g., `object` and `usize`) -/ -def sizet := list (nat × type) instance var_has_lt : has_lt var := (name.has_lt_quick : has_lt name) instance blockid_has_lt : has_lt blockid := (name.has_lt_quick : has_lt name) @@ -116,11 +109,11 @@ inductive instr | binop (x : var) (ty : type) (op : binop) (y z : var) -- x : ty := op y z | call (xs : list var) (f : fnid) (ys : list var) -- Function call: xs := f ys /- Constructor objects -/ -| cnstr (o : var) (tag : tag) (nobjs : uint16) (ssz : sizet) -- Create constructor object +| cnstr (o : var) (tag : tag) (nobjs : uint16) (ssz : usize) -- Create constructor object | set (o : var) (i : uint16) (x : var) -- Set object field: set o i x | get (x : var) (o : var) (i : uint16) -- Get object field: x := get o i -| sset (o : var) (d : sizet) (v : var) -- Set scalar field: sset o d v -| sget (x : var) (ty : type) (o : var) (d : sizet) -- Get scalar field: x : ty := sget o d +| sset (o : var) (d : usize) (v : var) -- Set scalar field: sset o d v +| sget (x : var) (ty : type) (o : var) (d : usize) -- Get scalar field: x : ty := sget o d /- Closures -/ | closure (x : var) (f : fnid) (ys : list var) -- Create closure: x := closure f ys | apply (x : var) (ys : list var) -- Apply closure: x := apply ys diff --git a/library/init/lean/ir/parser.lean b/library/init/lean/ir/parser.lean index 828142b56e..1d836009a9 100644 --- a/library/init/lean/ir/parser.lean +++ b/library/init/lean/ir/parser.lean @@ -89,6 +89,13 @@ try (do p ← pos, return $ uint16.of_nat n) "uint16" +def parse_usize : parser usize := +try (do p ← pos, + n ← lexeme num, + when (n ≥ usize_sz) $ unexpected_at "big numeral, it does not fit in an usize" p, + return $ usize.of_nat n) + "usize" + def identifier : parser name := try (do p ← pos, n ← lean.parser.identifier, @@ -113,19 +120,11 @@ do xs ← many1 parse_var, ys ← many parse_var, return $ instr.call ([x] ++ xs) fid ys -def parse_sizet_entry : parser (nat × type) := -(prod.mk 1 <$> parse_type) -<|> -(prod.mk <$> (lexeme num "numeral") <*> (symbol ":" >> parse_type)) - -def parse_sizet : parser sizet := -symbol "[" >> sep_by parse_sizet_entry (symbol ",") <* symbol "]" - def parse_typed_assignment (x : var) : parser instr := do symbol ":", ty ← parse_type, symbol ":=", - (keyword "sget" >> instr.sget x ty <$> parse_var <*> parse_sizet) + (keyword "sget" >> instr.sget x ty <$> parse_var <*> parse_usize) <|> (instr.unop x ty <$> parse_unop <*> parse_var) <|> (instr.binop x ty <$> parse_binop <*> parse_var <*> parse_var) <|> (instr.lit x ty <$> parse_literal) @@ -136,7 +135,7 @@ do symbol ":=", <|> (keyword "apply" >> instr.apply x <$> many1 parse_var) <|> (keyword "get" >> instr.get x <$> parse_var <*> parse_uint16) <|> (keyword "call" >> instr.call [x] <$> parse_fnid <*> many parse_var) -<|> (keyword "cnstr" >> instr.cnstr x <$> parse_uint16 <*> parse_uint16 <*> parse_sizet) +<|> (keyword "cnstr" >> instr.cnstr x <$> parse_uint16 <*> parse_uint16 <*> parse_usize) <|> (keyword "array" >> instr.array x <$> parse_var <*> parse_var) <|> (keyword "sarray" >> instr.sarray x <$> parse_type <*> parse_var <*> parse_var) @@ -150,7 +149,7 @@ def parse_instr : parser instr := (keyword "array_write" >> instr.array_write <$> parse_var <*> parse_var <*> parse_var) <|> (keyword "array_push" >> instr.array_push <$> parse_var <*> parse_var) <|> (keyword "set" >> instr.set <$> parse_var <*> parse_uint16 <*> parse_var) -<|> (keyword "sset" >> instr.sset <$> parse_var <*> parse_sizet <*> parse_var) +<|> (keyword "sset" >> instr.sset <$> parse_var <*> parse_usize <*> parse_var) <|> (instr.unary <$> parse_unins <*> parse_var) <|> parse_assignment diff --git a/tests/lean/parser1.lean b/tests/lean/parser1.lean index c3bc9a5d37..67b2e202dc 100644 --- a/tests/lean/parser1.lean +++ b/tests/lean/parser1.lean @@ -104,11 +104,11 @@ do cmd ← lean.ir.parse_instr, #eval test parse_instr_pp "x : bool := and z y" "x : bool := and z y" #eval test parse_instr_pp "x y := call f z w" "x y := call f z w" #eval test parse_instr_pp "x := call f z w" "x := call f z w" -#eval test parse_instr_pp "o := cnstr 0 3 []" "o := cnstr 0 3 []" +#eval test parse_instr_pp "o := cnstr 0 3 0" "o := cnstr 0 3 0" #eval test parse_instr_pp "set o 0 x" "set o 0 x" #eval test parse_instr_pp "x := get o 0" "x := get o 0" -#eval test parse_instr_pp "sset o [object] x" "sset o [object] x" -#eval test parse_instr_pp "x : bool := sget o [object, 2:usize]" "x : bool := sget o [object, 2:usize]" +#eval test parse_instr_pp "sset o 8 x" "sset o 8 x" +#eval test parse_instr_pp "x : bool := sget o 24" "x : bool := sget o 24" #eval test parse_instr_pp "x := closure f a" "x := closure f a" #eval test parse_instr_pp "x := closure f a b" "x := closure f a b" #eval test parse_instr_pp "x := apply f a" "x := apply f a" diff --git a/tests/lean/run/parser_ir1.lean b/tests/lean/run/parser_ir1.lean index fd5ac3d9c6..4de7a5f638 100644 --- a/tests/lean/run/parser_ir1.lean +++ b/tests/lean/run/parser_ir1.lean @@ -53,13 +53,13 @@ do (except.ok d) ← return $ parse (whitespace >> parse_def) s, def IR3 := " def mk_struct (d1 : object) (d2 : uint32) (d3 : usize) (d4 : uint32) (d5 : bool) (d6 : bool) : object := main: - o := cnstr 0 1 [object, 2:uint32, usize, bool]; + o := cnstr 0 1 18; set o 0 d1; - sset o [object] d3; - sset o [object, usize] d2; - sset o [object, usize, uint32] d4; - sset o [object, usize, 2:uint32] d5; - sset o [object, usize, 2:uint32, bool] d6; + sset o 8 d3; + sset o 16 d2; + sset o 20 d4; + sset o 24 d5; + sset o 25 d6; ret o; " #eval show_result (whitespace >> parse_def) IR3 @@ -76,8 +76,8 @@ do (except.ok d) ← return $ parse (whitespace >> parse_def) s, def IR4 := " def swap (d1 : object) (d2 : object) : object object := main: - r1 := cnstr 0 2 []; - r2 := cnstr 0 2 []; + r1 := cnstr 0 2 0; + r2 := cnstr 0 2 0; set r1 0 d1; set r1 1 d2; inc d1;