c-parser: provide AARCH64 setup

The setup for L4V_ARCH=AARCH64 is identical to RISCV64, i.e. same word length, encoding, and endianness. The setup includes the standalone parser used for compile and preprocess checks in the seL4 repo. Signed-off-by: Gerwin Klein <gerwin.klein@proofcraft.systems>
2022-02-03 15:50:22 +11:00 · 2022-02-03 15:50:22 +11:00 · ffcaff6af9
parent d3ecd0e451
commit ffcaff6af9
9 changed files with 431 additions and 4 deletions
--- a/tools/c-parser/.gitignore
+++ b/tools/c-parser/.gitignore
@ -14,6 +14,7 @@ globalmakevars.local
 /standalone-parser/table.ML
 /standalone-parser/ARM/
 /standalone-parser/ARM_HYP/
+/standalone-parser/AARCH64/
 /standalone-parser/RISCV64/
 /standalone-parser/X64/

--- a/tools/c-parser/INSTALL.md
+++ b/tools/c-parser/INSTALL.md
@ -15,6 +15,7 @@ The C parser supports multiple target architectures:

    - ARM
    - ARM_HYP
+    - AARCH64
    - X64
    - RISCV64

--- a/tools/c-parser/mkrelease
+++ b/tools/c-parser/mkrelease
@ -38,7 +38,7 @@ fi
 # Get the directory that this script is running in.
 CPARSER_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 TOPLEVEL_DIR=$( git -C ${CPARSER_DIR} rev-parse --show-toplevel)
-RELEASE_ARCHS='ARM ARM_HYP X64 RISCV64'
+RELEASE_ARCHS='ARM ARM_HYP AARCH64 X64 RISCV64'
 pushd "${TOPLEVEL_DIR}"

 # Ensure that our working directory is clean.
--- a/tools/c-parser/standalone-parser/Makefile
+++ b/tools/c-parser/standalone-parser/Makefile
@ -17,21 +17,26 @@ STP_INCLUDED=true

 ARM_DIR=$(STP_PFX)/ARM
 ARM_HYP_DIR=$(STP_PFX)/ARM_HYP
+AARCH64_DIR=$(STP_PFX)/AARCH64
 X64_DIR=$(STP_PFX)/X64
 RISCV64_DIR=$(STP_PFX)/RISCV64
-ARCH_DIRS=$(ARM_DIR) $(ARM_HYP_DIR) $(X64_DIR) $(RISCV64_DIR)
+ARCH_DIRS=$(ARM_DIR) $(ARM_HYP_DIR) $(AARCH64_DIR) $(X64_DIR) $(RISCV64_DIR)

 STPARSER_ARM=$(ARM_DIR)/c-parser
 STPARSER_ARM_HYP=$(ARM_HYP_DIR)/c-parser
+STPARSER_AARCH64=$(AARCH64_DIR)/c-parser
 STPARSER_X64=$(X64_DIR)/c-parser
 STPARSER_RISCV64=$(RISCV64_DIR)/c-parser
-STPARSERS=$(STPARSER_ARM) $(STPARSER_ARM_HYP) $(STPARSER_X64) $(STPARSER_RISCV64)
+STPARSERS=$(STPARSER_ARM) $(STPARSER_ARM_HYP) $(STPARSER_AARCH64) \
+          $(STPARSER_X64) $(STPARSER_RISCV64)

 TOKENIZER_ARM=$(ARM_DIR)/tokenizer
 TOKENIZER_ARM_HYP=$(ARM_HYP_DIR)/tokenizer
+TOKENIZER_AARCH64=$(AARCH64_DIR)/tokenizer
 TOKENIZER_X64=$(X64_DIR)/tokenizer
 TOKENIZER_RISCV64=$(RISCV64_DIR)/tokenizer
-TOKENIZERS=$(TOKENIZER_ARM) $(TOKENIZER_ARM_HYP) $(TOKENIZER_X64) $(TOKENIZER_RISCV64)
+TOKENIZERS=$(TOKENIZER_ARM) $(TOKENIZER_ARM_HYP) $(TOKENIZER_AARCH64) \
+           $(TOKENIZER_X64) $(TOKENIZER_RISCV64)

 .PHONY: all cparser_tools stp_all standalone-cparser standalone-tokenizer

@ -58,16 +63,19 @@ ifeq ($(SML_COMPILER),mlton)

 ARM_MLB_PATH := -mlb-path-var 'L4V_ARCH ARM'
 ARM_HYP_MLB_PATH := -mlb-path-var 'L4V_ARCH ARM_HYP'
+AARCH64_MLB_PATH := -mlb-path-var 'L4V_ARCH AARCH64'
 X64_MLB_PATH := -mlb-path-var 'L4V_ARCH X64'
 RISCV64_MLB_PATH := -mlb-path-var 'L4V_ARCH RISCV64'

 PARSER_DEPS_ARM := $(shell mlton $(ARM_MLB_PATH) -stop f $(STP_PFX)/c-parser.mlb)
 PARSER_DEPS_ARM_HYP := $(shell mlton $(ARM_HYP_MLB_PATH) -stop f $(STP_PFX)/c-parser.mlb)
+PARSER_DEPS_AARCH64 := $(shell mlton $(AARCH64_MLB_PATH) -stop f $(STP_PFX)/c-parser.mlb)
 PARSER_DEPS_X64 := $(shell mlton $(X64_MLB_PATH) -stop f $(STP_PFX)/c-parser.mlb)
 PARSER_DEPS_RISCV64 := $(shell mlton $(RISCV64_MLB_PATH) -stop f $(STP_PFX)/c-parser.mlb)

 TOKENIZER_DEPS_ARM := $(shell mlton $(ARM_MLB_PATH) -stop f $(STP_PFX)/tokenizer.mlb)
 TOKENIZER_DEPS_ARM_HYP := $(shell mlton $(ARM_HYP_MLB_PATH) -stop f $(STP_PFX)/tokenizer.mlb)
+TOKENIZER_DEPS_AARCH64 := $(shell mlton $(AARCH64_MLB_PATH) -stop f $(STP_PFX)/tokenizer.mlb)
 TOKENIZER_DEPS_X64 := $(shell mlton $(X64_MLB_PATH) -stop f $(STP_PFX)/tokenizer.mlb)
 TOKENIZER_DEPS_RISCV64 := $(shell mlton $(RISCV64_MLB_PATH) -stop f $(STP_PFX)/tokenizer.mlb)

@ -77,6 +85,9 @@ $(STPARSER_ARM): $(PARSER_DEPS_ARM) | $(ARM_DIR)
 $(STPARSER_ARM_HYP): $(PARSER_DEPS_ARM_HYP) | $(ARM_HYP_DIR)
 	mlton $(ARM_HYP_MLB_PATH) -output $@ $<

+$(STPARSER_AARCH64): $(PARSER_DEPS_AARCH64) | $(AARCH64_DIR)
+	mlton $(AARCH64_MLB_PATH) -output $@ $<
+
 $(STPARSER_X64): $(PARSER_DEPS_X64) | $(X64_DIR)
 	mlton $(X64_MLB_PATH) -output $@ $<

@ -89,6 +100,9 @@ $(TOKENIZER_ARM): $(TOKENIZER_DEPS_ARM) | $(ARM_DIR)
 $(TOKENIZER_ARM_HYP): $(TOKENIZER_DEPS_ARM_HYP) | $(ARM_HYP_DIR)
 	mlton $(ARM_HYP_MLB_PATH) -output $@ $<

+$(TOKENIZER_AARCH64): $(TOKENIZER_DEPS_AARCH64) | $(AARCH64_DIR)
+	mlton $(AARCH64_MLB_PATH) -output $@ $<
+
 $(TOKENIZER_X64): $(TOKENIZER_DEPS_X64) | $(X64_DIR)
 	mlton $(X64_MLB_PATH) -output $@ $<

--- a/tools/c-parser/testfiles/AARCH64/imports/MachineWords.thy
+++ b/tools/c-parser/testfiles/AARCH64/imports/MachineWords.thy
@ -0,0 +1,20 @@
+(*
+ * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ *)
+
+theory MachineWords
+imports "CParser.CTranslation"
+begin
+
+type_synonym machine_word_len = "64"
+
+type_synonym machine_word = "machine_word_len word"
+
+abbreviation "machine_word_bytes \<equiv> 8 :: nat"
+
+lemma of_nat_machine_word_bytes[simp]: "of_nat machine_word_bytes = 8"
+  by simp
+
+end
--- a/tools/c-parser/umm_heap/AARCH64/Addr_Type.thy
+++ b/tools/c-parser/umm_heap/AARCH64/Addr_Type.thy
@ -0,0 +1,38 @@
+(*
+ * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ *)
+
+(* License: BSD, terms see file ./LICENSE *)
+
+theory Addr_Type
+imports "Word_Lib.WordSetup"
+begin
+
+type_synonym addr_bitsize = "64"
+type_synonym addr = "addr_bitsize word"
+definition addr_bitsize :: nat where "addr_bitsize \<equiv> 64"
+definition addr_align :: nat where "addr_align \<equiv> 3"
+declare addr_align_def[simp]
+
+definition addr_card :: nat where
+  "addr_card \<equiv> card (UNIV::addr set)"
+
+
+
+declare addr_bitsize_def[simp]
+
+lemma addr_card:
+  "addr_card = 2^addr_bitsize"
+  by (simp add: addr_card_def card_word)
+
+lemma len_of_addr_card:
+  "2 ^ len_of TYPE(addr_bitsize) = addr_card"
+  by (simp add: addr_card)
+
+lemma of_nat_addr_card [simp]:
+  "of_nat addr_card = (0::addr)"
+  by (simp add: addr_card)
+
+end
--- a/tools/c-parser/umm_heap/AARCH64/ArchArraysMemInstance.thy
+++ b/tools/c-parser/umm_heap/AARCH64/ArchArraysMemInstance.thy
@ -0,0 +1,269 @@
+(*
+ * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ *)
+
+theory ArchArraysMemInstance
+imports ArraysMemInstance
+begin
+
+(* Showing arrays are in mem_type requires maximum sizes for objects,
+   and maximum counts for elements *)
+class array_outer_max_size = mem_type +
+  assumes array_outer_max_size_ax: "size_of TYPE('a::c_type) < 2 ^ 26"
+
+class array_max_count = finite +
+  assumes array_max_count_ax: "CARD ('a) <= 2 ^ 20"
+
+instance array :: (array_outer_max_size, array_max_count) mem_type
+apply intro_classes
+apply simp
+apply (subgoal_tac "addr_card = 2 ^ (addr_bitsize - 26) * 2 ^ 26")
+  apply (erule ssubst)
+  apply (rule less_le_trans[where y = "card (UNIV::'b set) * 2 ^ 26"])
+    apply (rule mult_less_mono2)
+      apply (rule array_outer_max_size_ax)
+    apply simp
+  apply (rule mult_le_mono1)
+    apply (rule le_trans[where j = "2 ^ 20"])
+      apply (rule array_max_count_ax)
+    apply simp
+  apply simp
+apply (simp add: addr_card)
+done
+
+class array_inner_max_size = array_outer_max_size +
+  assumes array_inner_max_size_ax: "size_of TYPE('a::c_type) < 2 ^ 6"
+
+instance array :: (array_inner_max_size, array_max_count) array_outer_max_size
+apply intro_classes
+apply simp
+  apply (rule order_less_le_trans)
+   apply (rule mult_le_less_imp_less)
+    apply (rule array_max_count_ax)
+   apply (rule array_inner_max_size_ax)
+  apply simp
+   apply simp
+  apply simp
+  done
+
+instance word :: (len8) array_outer_max_size
+apply intro_classes
+apply(simp add: size_of_def)
+apply(subgoal_tac "len_of TYPE('a) \<le> 128")
+ apply simp
+apply(rule len8_width)
+done
+
+instance word :: (len8) array_inner_max_size
+apply intro_classes
+apply(simp add: size_of_def)
+apply(subgoal_tac "len_of TYPE('a) \<le> 128")
+ apply simp
+apply(rule len8_width)
+done
+
+instance ptr :: (c_type) array_outer_max_size
+apply intro_classes
+apply (simp add: size_of_def)
+done
+
+instance ptr :: (c_type) array_inner_max_size
+apply intro_classes
+apply (simp add: size_of_def)
+done
+
+class lt19 = finite +
+  assumes lt19_ax: "CARD ('a) < 2 ^ 19"
+class lt18 = lt19 +
+  assumes lt18_ax: "CARD ('a) < 2 ^ 18"
+class lt17 = lt18 +
+  assumes lt17_ax: "CARD ('a) < 2 ^ 17"
+class lt16 = lt17 +
+  assumes lt16_ax: "CARD ('a) < 2 ^ 16"
+class lt15 = lt16 +
+  assumes lt15_ax: "CARD ('a) < 2 ^ 15"
+class lt14 = lt15 +
+  assumes lt14_ax: "CARD ('a) < 2 ^ 14"
+class lt13 = lt14 +
+  assumes lt13_ax: "CARD ('a) < 2 ^ 13"
+class lt12 = lt13 +
+  assumes lt12_ax: "CARD ('a) < 2 ^ 12"
+class lt11 = lt12 +
+  assumes lt11_ax: "CARD ('a) < 2 ^ 11"
+class lt10 = lt11 +
+  assumes lt10_ax: "CARD ('a) < 2 ^ 10"
+class lt9 = lt10 +
+  assumes lt9_ax: "CARD ('a) < 2 ^ 9"
+class lt8 = lt9 +
+  assumes lt8_ax: "CARD ('a) < 2 ^ 8"
+class lt7 = lt8 +
+  assumes lt7_ax: "CARD ('a) < 2 ^ 7"
+class lt6 = lt7 +
+  assumes lt6_ax: "CARD ('a) < 2 ^ 6"
+class lt5 = lt6 +
+  assumes lt5_ax: "CARD ('a) < 2 ^ 5"
+class lt4 = lt5 +
+  assumes lt4_ax: "CARD ('a) < 2 ^ 4"
+class lt3 = lt4 +
+  assumes lt3_ax: "CARD ('a) < 2 ^ 3"
+class lt2 = lt3 +
+  assumes lt2_ax: "CARD ('a) < 2 ^ 2"
+class lt1 = lt2 +
+  assumes lt1_ax: "CARD ('a) < 2 ^ 1"
+
+instance bit0 :: (lt19) array_max_count
+  using lt19_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt19) array_max_count
+  using lt19_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt18) lt19
+  using lt18_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt18) lt19
+  using lt18_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt17) lt18
+  using lt17_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt17) lt18
+  using lt17_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt16) lt17
+  using lt16_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt16) lt17
+  using lt16_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt15) lt16
+  using lt15_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt15) lt16
+  using lt15_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt14) lt15
+  using lt14_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt14) lt15
+  using lt14_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt13) lt14
+  using lt13_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt13) lt14
+  using lt13_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt12) lt13
+  using lt12_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt12) lt13
+  using lt12_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt11) lt12
+  using lt11_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt11) lt12
+  using lt11_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt10) lt11
+  using lt10_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt10) lt11
+  using lt10_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt9) lt10
+  using lt9_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt9) lt10
+  using lt9_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt8) lt9
+  using lt8_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt8) lt9
+  using lt8_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt7) lt8
+  using lt7_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt7) lt8
+  using lt7_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt6) lt7
+  using lt6_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt6) lt7
+  using lt6_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt5) lt6
+  using lt5_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt5) lt6
+  using lt5_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt4) lt5
+  using lt4_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt4) lt5
+  using lt4_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt3) lt4
+  using lt3_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt3) lt4
+  using lt3_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt2) lt3
+  using lt2_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt2) lt3
+  using lt2_ax[where 'a='a] by intro_classes simp
+
+instance bit0 :: (lt1) lt2
+  using lt1_ax[where 'a='a] by intro_classes simp
+
+instance bit1 :: (lt1) lt2
+  using lt1_ax[where 'a='a] by intro_classes simp
+
+instance num1 :: lt1
+  by (intro_classes, simp_all)
+
+(* don't understand why this also seems to be necessary *)
+instance num1 :: array_max_count
+  by (intro_classes, simp)
+
+(* introduce hackish handling of 8192 type by making a copy of the type
+   under a constructor, and then manually showing that it is an instance of
+   array_max_count *)
+datatype array_max_count_ty = array_max_count_ty "1048576"
+
+(* ML c-parser code also needs to know at which array size to use this type *)
+ML \<open>
+  structure ArchArrayMaxCount = struct
+    val array_max_count = 1048576
+  end
+\<close>
+
+lemma univ_array_max_count_ty:
+  "(UNIV::array_max_count_ty set) = image array_max_count_ty (UNIV::1048576 set)"
+  apply (simp add: set_eq_iff image_iff)
+  apply (rule_tac allI)
+  apply (rule_tac array_max_count_ty.induct)
+  apply simp
+  done
+
+instance "array_max_count_ty" :: finite
+  apply intro_classes
+  apply (simp add: univ_array_max_count_ty)
+  done
+
+lemma card_array_max_count_ty[simp]: "CARD(array_max_count_ty) = CARD(1048576)"
+  apply (simp add: univ_array_max_count_ty card_image inj_on_def)
+  done
+
+instance "array_max_count_ty" :: array_max_count
+  by intro_classes simp
+
+end
--- a/tools/c-parser/umm_heap/AARCH64/TargetNumbers.ML
+++ b/tools/c-parser/umm_heap/AARCH64/TargetNumbers.ML
@ -0,0 +1,62 @@
+(*
+ * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ *)
+
+structure ImplementationNumbers : IMPLEMENTATION_NUMBERS =
+struct
+
+open IntInf
+
+fun exp (n, ex) = if ex = 0 then 1
+                  else let
+                      val q = ex div 2
+                      val r = if ex mod 2 = 1 then n else 1
+                      val qex = exp (n, q)
+                    in
+                      qex * qex * r
+                    end
+
+val boolWidth = fromInt 8
+val charWidth = 8
+val shortWidth = 16
+val intWidth = 32
+val longWidth = 64
+val llongWidth = 64
+val ptrWidth : int = 64
+val ptr_t = BaseCTypes.Long
+val CHAR_BIT : int = 8
+
+fun umax width = exp(2, width) - 1
+fun max width = exp(2, width - 1) - 1
+fun min width = ~(exp(2, width - 1))
+
+val UCHAR_MAX = umax charWidth
+val USHORT_MAX = umax shortWidth
+val UINT_MAX = umax intWidth
+val ULONG_MAX = umax longWidth
+val ULLONG_MAX = umax llongWidth
+
+val SCHAR_MAX = max intWidth
+val SHORT_MAX = max shortWidth
+val INT_MAX = max intWidth
+val LONG_MAX = max longWidth
+val LLONG_MAX = max llongWidth
+
+val SCHAR_MIN = min charWidth
+val SHORT_MIN = min shortWidth
+val INT_MIN = min intWidth
+val LONG_MIN = min longWidth
+val LLONG_MIN = min llongWidth
+
+val char_signedp = false
+
+val CHAR_MAX = UCHAR_MAX
+val CHAR_MIN = IntInf.fromInt 0
+
+open CharLitUtil
+
+val charliteral_conversion = uchar_conv {umax=UCHAR_MAX}
+
+end
--- a/tools/c-parser/umm_heap/AARCH64/Word_Mem_Encoding.thy
+++ b/tools/c-parser/umm_heap/AARCH64/Word_Mem_Encoding.thy
@ -0,0 +1,22 @@
+(*
+ * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ *)
+
+theory Word_Mem_Encoding
+  imports Vanilla32_Preliminaries
+begin
+
+(* Little-endian encoding *)
+definition
+  word_tag :: "'a::len8 word itself \<Rightarrow> 'a word typ_info"
+where
+  "word_tag (w::'a::len8 word itself) \<equiv>
+    TypDesc (TypScalar (len_of TYPE('a) div 8)
+                       (len_exp TYPE('a))
+                       \<lparr> field_access = \<lambda>v bs. (rev o word_rsplit) v,
+                         field_update = \<lambda>bs v. (word_rcat (rev bs)::'a::len8 word) \<rparr>)
+            (''word'' @  nat_to_bin_string (len_of TYPE('a)))"
+
+end