diff --git a/tools/c-parser/.gitignore b/tools/c-parser/.gitignore index 5707690eb..f5cf504c5 100644 --- a/tools/c-parser/.gitignore +++ b/tools/c-parser/.gitignore @@ -14,6 +14,7 @@ globalmakevars.local /standalone-parser/table.ML /standalone-parser/ARM/ /standalone-parser/ARM_HYP/ +/standalone-parser/AARCH64/ /standalone-parser/RISCV64/ /standalone-parser/X64/ diff --git a/tools/c-parser/INSTALL.md b/tools/c-parser/INSTALL.md index c70f4f9e5..0fb46808f 100644 --- a/tools/c-parser/INSTALL.md +++ b/tools/c-parser/INSTALL.md @@ -15,6 +15,7 @@ The C parser supports multiple target architectures: - ARM - ARM_HYP + - AARCH64 - X64 - RISCV64 diff --git a/tools/c-parser/mkrelease b/tools/c-parser/mkrelease index d3c55ac51..234d5d95b 100755 --- a/tools/c-parser/mkrelease +++ b/tools/c-parser/mkrelease @@ -38,7 +38,7 @@ fi # Get the directory that this script is running in. CPARSER_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) TOPLEVEL_DIR=$( git -C ${CPARSER_DIR} rev-parse --show-toplevel) -RELEASE_ARCHS='ARM ARM_HYP X64 RISCV64' +RELEASE_ARCHS='ARM ARM_HYP AARCH64 X64 RISCV64' pushd "${TOPLEVEL_DIR}" # Ensure that our working directory is clean. diff --git a/tools/c-parser/standalone-parser/Makefile b/tools/c-parser/standalone-parser/Makefile index 06143cd80..ec008e30e 100644 --- a/tools/c-parser/standalone-parser/Makefile +++ b/tools/c-parser/standalone-parser/Makefile @@ -17,21 +17,26 @@ STP_INCLUDED=true ARM_DIR=$(STP_PFX)/ARM ARM_HYP_DIR=$(STP_PFX)/ARM_HYP +AARCH64_DIR=$(STP_PFX)/AARCH64 X64_DIR=$(STP_PFX)/X64 RISCV64_DIR=$(STP_PFX)/RISCV64 -ARCH_DIRS=$(ARM_DIR) $(ARM_HYP_DIR) $(X64_DIR) $(RISCV64_DIR) +ARCH_DIRS=$(ARM_DIR) $(ARM_HYP_DIR) $(AARCH64_DIR) $(X64_DIR) $(RISCV64_DIR) STPARSER_ARM=$(ARM_DIR)/c-parser STPARSER_ARM_HYP=$(ARM_HYP_DIR)/c-parser +STPARSER_AARCH64=$(AARCH64_DIR)/c-parser STPARSER_X64=$(X64_DIR)/c-parser STPARSER_RISCV64=$(RISCV64_DIR)/c-parser -STPARSERS=$(STPARSER_ARM) $(STPARSER_ARM_HYP) $(STPARSER_X64) $(STPARSER_RISCV64) +STPARSERS=$(STPARSER_ARM) $(STPARSER_ARM_HYP) $(STPARSER_AARCH64) \ + $(STPARSER_X64) $(STPARSER_RISCV64) TOKENIZER_ARM=$(ARM_DIR)/tokenizer TOKENIZER_ARM_HYP=$(ARM_HYP_DIR)/tokenizer +TOKENIZER_AARCH64=$(AARCH64_DIR)/tokenizer TOKENIZER_X64=$(X64_DIR)/tokenizer TOKENIZER_RISCV64=$(RISCV64_DIR)/tokenizer -TOKENIZERS=$(TOKENIZER_ARM) $(TOKENIZER_ARM_HYP) $(TOKENIZER_X64) $(TOKENIZER_RISCV64) +TOKENIZERS=$(TOKENIZER_ARM) $(TOKENIZER_ARM_HYP) $(TOKENIZER_AARCH64) \ + $(TOKENIZER_X64) $(TOKENIZER_RISCV64) .PHONY: all cparser_tools stp_all standalone-cparser standalone-tokenizer @@ -58,16 +63,19 @@ ifeq ($(SML_COMPILER),mlton) ARM_MLB_PATH := -mlb-path-var 'L4V_ARCH ARM' ARM_HYP_MLB_PATH := -mlb-path-var 'L4V_ARCH ARM_HYP' +AARCH64_MLB_PATH := -mlb-path-var 'L4V_ARCH AARCH64' X64_MLB_PATH := -mlb-path-var 'L4V_ARCH X64' RISCV64_MLB_PATH := -mlb-path-var 'L4V_ARCH RISCV64' PARSER_DEPS_ARM := $(shell mlton $(ARM_MLB_PATH) -stop f $(STP_PFX)/c-parser.mlb) PARSER_DEPS_ARM_HYP := $(shell mlton $(ARM_HYP_MLB_PATH) -stop f $(STP_PFX)/c-parser.mlb) +PARSER_DEPS_AARCH64 := $(shell mlton $(AARCH64_MLB_PATH) -stop f $(STP_PFX)/c-parser.mlb) PARSER_DEPS_X64 := $(shell mlton $(X64_MLB_PATH) -stop f $(STP_PFX)/c-parser.mlb) PARSER_DEPS_RISCV64 := $(shell mlton $(RISCV64_MLB_PATH) -stop f $(STP_PFX)/c-parser.mlb) TOKENIZER_DEPS_ARM := $(shell mlton $(ARM_MLB_PATH) -stop f $(STP_PFX)/tokenizer.mlb) TOKENIZER_DEPS_ARM_HYP := $(shell mlton $(ARM_HYP_MLB_PATH) -stop f $(STP_PFX)/tokenizer.mlb) +TOKENIZER_DEPS_AARCH64 := $(shell mlton $(AARCH64_MLB_PATH) -stop f $(STP_PFX)/tokenizer.mlb) TOKENIZER_DEPS_X64 := $(shell mlton $(X64_MLB_PATH) -stop f $(STP_PFX)/tokenizer.mlb) TOKENIZER_DEPS_RISCV64 := $(shell mlton $(RISCV64_MLB_PATH) -stop f $(STP_PFX)/tokenizer.mlb) @@ -77,6 +85,9 @@ $(STPARSER_ARM): $(PARSER_DEPS_ARM) | $(ARM_DIR) $(STPARSER_ARM_HYP): $(PARSER_DEPS_ARM_HYP) | $(ARM_HYP_DIR) mlton $(ARM_HYP_MLB_PATH) -output $@ $< +$(STPARSER_AARCH64): $(PARSER_DEPS_AARCH64) | $(AARCH64_DIR) + mlton $(AARCH64_MLB_PATH) -output $@ $< + $(STPARSER_X64): $(PARSER_DEPS_X64) | $(X64_DIR) mlton $(X64_MLB_PATH) -output $@ $< @@ -89,6 +100,9 @@ $(TOKENIZER_ARM): $(TOKENIZER_DEPS_ARM) | $(ARM_DIR) $(TOKENIZER_ARM_HYP): $(TOKENIZER_DEPS_ARM_HYP) | $(ARM_HYP_DIR) mlton $(ARM_HYP_MLB_PATH) -output $@ $< +$(TOKENIZER_AARCH64): $(TOKENIZER_DEPS_AARCH64) | $(AARCH64_DIR) + mlton $(AARCH64_MLB_PATH) -output $@ $< + $(TOKENIZER_X64): $(TOKENIZER_DEPS_X64) | $(X64_DIR) mlton $(X64_MLB_PATH) -output $@ $< diff --git a/tools/c-parser/testfiles/AARCH64/imports/MachineWords.thy b/tools/c-parser/testfiles/AARCH64/imports/MachineWords.thy new file mode 100644 index 000000000..27b52a717 --- /dev/null +++ b/tools/c-parser/testfiles/AARCH64/imports/MachineWords.thy @@ -0,0 +1,20 @@ +(* + * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) + * + * SPDX-License-Identifier: BSD-2-Clause + *) + +theory MachineWords +imports "CParser.CTranslation" +begin + +type_synonym machine_word_len = "64" + +type_synonym machine_word = "machine_word_len word" + +abbreviation "machine_word_bytes \ 8 :: nat" + +lemma of_nat_machine_word_bytes[simp]: "of_nat machine_word_bytes = 8" + by simp + +end diff --git a/tools/c-parser/umm_heap/AARCH64/Addr_Type.thy b/tools/c-parser/umm_heap/AARCH64/Addr_Type.thy new file mode 100644 index 000000000..e78f50f3d --- /dev/null +++ b/tools/c-parser/umm_heap/AARCH64/Addr_Type.thy @@ -0,0 +1,38 @@ +(* + * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) + * + * SPDX-License-Identifier: BSD-2-Clause + *) + +(* License: BSD, terms see file ./LICENSE *) + +theory Addr_Type +imports "Word_Lib.WordSetup" +begin + +type_synonym addr_bitsize = "64" +type_synonym addr = "addr_bitsize word" +definition addr_bitsize :: nat where "addr_bitsize \ 64" +definition addr_align :: nat where "addr_align \ 3" +declare addr_align_def[simp] + +definition addr_card :: nat where + "addr_card \ card (UNIV::addr set)" + + + +declare addr_bitsize_def[simp] + +lemma addr_card: + "addr_card = 2^addr_bitsize" + by (simp add: addr_card_def card_word) + +lemma len_of_addr_card: + "2 ^ len_of TYPE(addr_bitsize) = addr_card" + by (simp add: addr_card) + +lemma of_nat_addr_card [simp]: + "of_nat addr_card = (0::addr)" + by (simp add: addr_card) + +end diff --git a/tools/c-parser/umm_heap/AARCH64/ArchArraysMemInstance.thy b/tools/c-parser/umm_heap/AARCH64/ArchArraysMemInstance.thy new file mode 100644 index 000000000..8eb2a8ffb --- /dev/null +++ b/tools/c-parser/umm_heap/AARCH64/ArchArraysMemInstance.thy @@ -0,0 +1,269 @@ +(* + * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) + * + * SPDX-License-Identifier: BSD-2-Clause + *) + +theory ArchArraysMemInstance +imports ArraysMemInstance +begin + +(* Showing arrays are in mem_type requires maximum sizes for objects, + and maximum counts for elements *) +class array_outer_max_size = mem_type + + assumes array_outer_max_size_ax: "size_of TYPE('a::c_type) < 2 ^ 26" + +class array_max_count = finite + + assumes array_max_count_ax: "CARD ('a) <= 2 ^ 20" + +instance array :: (array_outer_max_size, array_max_count) mem_type +apply intro_classes +apply simp +apply (subgoal_tac "addr_card = 2 ^ (addr_bitsize - 26) * 2 ^ 26") + apply (erule ssubst) + apply (rule less_le_trans[where y = "card (UNIV::'b set) * 2 ^ 26"]) + apply (rule mult_less_mono2) + apply (rule array_outer_max_size_ax) + apply simp + apply (rule mult_le_mono1) + apply (rule le_trans[where j = "2 ^ 20"]) + apply (rule array_max_count_ax) + apply simp + apply simp +apply (simp add: addr_card) +done + +class array_inner_max_size = array_outer_max_size + + assumes array_inner_max_size_ax: "size_of TYPE('a::c_type) < 2 ^ 6" + +instance array :: (array_inner_max_size, array_max_count) array_outer_max_size +apply intro_classes +apply simp + apply (rule order_less_le_trans) + apply (rule mult_le_less_imp_less) + apply (rule array_max_count_ax) + apply (rule array_inner_max_size_ax) + apply simp + apply simp + apply simp + done + +instance word :: (len8) array_outer_max_size +apply intro_classes +apply(simp add: size_of_def) +apply(subgoal_tac "len_of TYPE('a) \ 128") + apply simp +apply(rule len8_width) +done + +instance word :: (len8) array_inner_max_size +apply intro_classes +apply(simp add: size_of_def) +apply(subgoal_tac "len_of TYPE('a) \ 128") + apply simp +apply(rule len8_width) +done + +instance ptr :: (c_type) array_outer_max_size +apply intro_classes +apply (simp add: size_of_def) +done + +instance ptr :: (c_type) array_inner_max_size +apply intro_classes +apply (simp add: size_of_def) +done + +class lt19 = finite + + assumes lt19_ax: "CARD ('a) < 2 ^ 19" +class lt18 = lt19 + + assumes lt18_ax: "CARD ('a) < 2 ^ 18" +class lt17 = lt18 + + assumes lt17_ax: "CARD ('a) < 2 ^ 17" +class lt16 = lt17 + + assumes lt16_ax: "CARD ('a) < 2 ^ 16" +class lt15 = lt16 + + assumes lt15_ax: "CARD ('a) < 2 ^ 15" +class lt14 = lt15 + + assumes lt14_ax: "CARD ('a) < 2 ^ 14" +class lt13 = lt14 + + assumes lt13_ax: "CARD ('a) < 2 ^ 13" +class lt12 = lt13 + + assumes lt12_ax: "CARD ('a) < 2 ^ 12" +class lt11 = lt12 + + assumes lt11_ax: "CARD ('a) < 2 ^ 11" +class lt10 = lt11 + + assumes lt10_ax: "CARD ('a) < 2 ^ 10" +class lt9 = lt10 + + assumes lt9_ax: "CARD ('a) < 2 ^ 9" +class lt8 = lt9 + + assumes lt8_ax: "CARD ('a) < 2 ^ 8" +class lt7 = lt8 + + assumes lt7_ax: "CARD ('a) < 2 ^ 7" +class lt6 = lt7 + + assumes lt6_ax: "CARD ('a) < 2 ^ 6" +class lt5 = lt6 + + assumes lt5_ax: "CARD ('a) < 2 ^ 5" +class lt4 = lt5 + + assumes lt4_ax: "CARD ('a) < 2 ^ 4" +class lt3 = lt4 + + assumes lt3_ax: "CARD ('a) < 2 ^ 3" +class lt2 = lt3 + + assumes lt2_ax: "CARD ('a) < 2 ^ 2" +class lt1 = lt2 + + assumes lt1_ax: "CARD ('a) < 2 ^ 1" + +instance bit0 :: (lt19) array_max_count + using lt19_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt19) array_max_count + using lt19_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt18) lt19 + using lt18_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt18) lt19 + using lt18_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt17) lt18 + using lt17_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt17) lt18 + using lt17_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt16) lt17 + using lt16_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt16) lt17 + using lt16_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt15) lt16 + using lt15_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt15) lt16 + using lt15_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt14) lt15 + using lt14_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt14) lt15 + using lt14_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt13) lt14 + using lt13_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt13) lt14 + using lt13_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt12) lt13 + using lt12_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt12) lt13 + using lt12_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt11) lt12 + using lt11_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt11) lt12 + using lt11_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt10) lt11 + using lt10_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt10) lt11 + using lt10_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt9) lt10 + using lt9_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt9) lt10 + using lt9_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt8) lt9 + using lt8_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt8) lt9 + using lt8_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt7) lt8 + using lt7_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt7) lt8 + using lt7_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt6) lt7 + using lt6_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt6) lt7 + using lt6_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt5) lt6 + using lt5_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt5) lt6 + using lt5_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt4) lt5 + using lt4_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt4) lt5 + using lt4_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt3) lt4 + using lt3_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt3) lt4 + using lt3_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt2) lt3 + using lt2_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt2) lt3 + using lt2_ax[where 'a='a] by intro_classes simp + +instance bit0 :: (lt1) lt2 + using lt1_ax[where 'a='a] by intro_classes simp + +instance bit1 :: (lt1) lt2 + using lt1_ax[where 'a='a] by intro_classes simp + +instance num1 :: lt1 + by (intro_classes, simp_all) + +(* don't understand why this also seems to be necessary *) +instance num1 :: array_max_count + by (intro_classes, simp) + +(* introduce hackish handling of 8192 type by making a copy of the type + under a constructor, and then manually showing that it is an instance of + array_max_count *) +datatype array_max_count_ty = array_max_count_ty "1048576" + +(* ML c-parser code also needs to know at which array size to use this type *) +ML \ + structure ArchArrayMaxCount = struct + val array_max_count = 1048576 + end +\ + +lemma univ_array_max_count_ty: + "(UNIV::array_max_count_ty set) = image array_max_count_ty (UNIV::1048576 set)" + apply (simp add: set_eq_iff image_iff) + apply (rule_tac allI) + apply (rule_tac array_max_count_ty.induct) + apply simp + done + +instance "array_max_count_ty" :: finite + apply intro_classes + apply (simp add: univ_array_max_count_ty) + done + +lemma card_array_max_count_ty[simp]: "CARD(array_max_count_ty) = CARD(1048576)" + apply (simp add: univ_array_max_count_ty card_image inj_on_def) + done + +instance "array_max_count_ty" :: array_max_count + by intro_classes simp + +end diff --git a/tools/c-parser/umm_heap/AARCH64/TargetNumbers.ML b/tools/c-parser/umm_heap/AARCH64/TargetNumbers.ML new file mode 100644 index 000000000..eba235e84 --- /dev/null +++ b/tools/c-parser/umm_heap/AARCH64/TargetNumbers.ML @@ -0,0 +1,62 @@ +(* + * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) + * + * SPDX-License-Identifier: BSD-2-Clause + *) + +structure ImplementationNumbers : IMPLEMENTATION_NUMBERS = +struct + +open IntInf + +fun exp (n, ex) = if ex = 0 then 1 + else let + val q = ex div 2 + val r = if ex mod 2 = 1 then n else 1 + val qex = exp (n, q) + in + qex * qex * r + end + +val boolWidth = fromInt 8 +val charWidth = 8 +val shortWidth = 16 +val intWidth = 32 +val longWidth = 64 +val llongWidth = 64 +val ptrWidth : int = 64 +val ptr_t = BaseCTypes.Long +val CHAR_BIT : int = 8 + +fun umax width = exp(2, width) - 1 +fun max width = exp(2, width - 1) - 1 +fun min width = ~(exp(2, width - 1)) + +val UCHAR_MAX = umax charWidth +val USHORT_MAX = umax shortWidth +val UINT_MAX = umax intWidth +val ULONG_MAX = umax longWidth +val ULLONG_MAX = umax llongWidth + +val SCHAR_MAX = max intWidth +val SHORT_MAX = max shortWidth +val INT_MAX = max intWidth +val LONG_MAX = max longWidth +val LLONG_MAX = max llongWidth + +val SCHAR_MIN = min charWidth +val SHORT_MIN = min shortWidth +val INT_MIN = min intWidth +val LONG_MIN = min longWidth +val LLONG_MIN = min llongWidth + +val char_signedp = false + +val CHAR_MAX = UCHAR_MAX +val CHAR_MIN = IntInf.fromInt 0 + +open CharLitUtil + +val charliteral_conversion = uchar_conv {umax=UCHAR_MAX} + +end diff --git a/tools/c-parser/umm_heap/AARCH64/Word_Mem_Encoding.thy b/tools/c-parser/umm_heap/AARCH64/Word_Mem_Encoding.thy new file mode 100644 index 000000000..2190af612 --- /dev/null +++ b/tools/c-parser/umm_heap/AARCH64/Word_Mem_Encoding.thy @@ -0,0 +1,22 @@ +(* + * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) + * + * SPDX-License-Identifier: BSD-2-Clause + *) + +theory Word_Mem_Encoding + imports Vanilla32_Preliminaries +begin + +(* Little-endian encoding *) +definition + word_tag :: "'a::len8 word itself \ 'a word typ_info" +where + "word_tag (w::'a::len8 word itself) \ + TypDesc (TypScalar (len_of TYPE('a) div 8) + (len_exp TYPE('a)) + \ field_access = \v bs. (rev o word_rsplit) v, + field_update = \bs v. (word_rcat (rev bs)::'a::len8 word) \) + (''word'' @ nat_to_bin_string (len_of TYPE('a)))" + +end