Movatterモバイル変換


[0]ホーム

URL:


This is the mail archive of thelibc-alpha@sourceware.orgmailing list for theglibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:[Date Prev] [Date Next][Thread Prev] [Thread Next]
Other format:[Raw text]

[PATCH v2 16/28] arm64/sve: Probe SVE capabilities and usable vector lengths


This patch uses the cpufeatures framework to determine common SVEcapabilities and vector lengths, and configures the runtime SVEsupport code appropriately.ZCR_ELx is not really a feature register, but it is convenient touse it as a template for recording the maximum vector lengthsupported by a CPU, using the LEN field.  This field is similar toa feature field in that it is a contiguous bitfield for which wewant to determine the minimum system-wide value.  This patch addsZCR as a pseudo-register in cpuinfo/cpufeatures, with appropriatecustom code to populate it.  Finding the minimum supported value ofthe LEN field is left to the cpufeatures framework in the usualway.The meaning of ID_AA64ZFR0_EL1 is not architecturally defined yet,so for now we just require it to be zero.Note that much of this code is dormant and SVE still won't be usedyet, since system_supports_sve() remains hardwired to false.Signed-off-by: Dave Martin <Dave.Martin@arm.com>Cc: Alex Bennée <alex.bennee@linaro.org>Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com>---Changes since v1----------------Requested by Alex Bennée:* Thin out BUG_ON()s:Redundant BUG_ON()s and ones that just check invariants are removed.Important sanity-checks are migrated to WARN_ON()s, with someminimal best-effort patch-up code.Other changes related to Alex Bennée's comments:* Migrate away from magic numbers for converting VL to VQ.Requested by Suzuki Poulose:* Make sve_vq_map __ro_after_init.Other changes related to Suzuki Poulose's comments:* Rely on cpufeatures for not attempting to update the vq map after boot.--- arch/arm64/include/asm/cpu.h        |   4 ++ arch/arm64/include/asm/cpufeature.h |  29 ++++++++++ arch/arm64/include/asm/fpsimd.h     |  10 ++++ arch/arm64/kernel/cpufeature.c      |  50 +++++++++++++++++ arch/arm64/kernel/cpuinfo.c         |   6 ++ arch/arm64/kernel/fpsimd.c          | 106 +++++++++++++++++++++++++++++++++++- 6 files changed, 202 insertions(+), 3 deletions(-)diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.hindex 889226b..8839227 100644--- a/arch/arm64/include/asm/cpu.h+++ b/arch/arm64/include/asm/cpu.h@@ -41,6 +41,7 @@ struct cpuinfo_arm64 { u64reg_id_aa64mmfr2; u64reg_id_aa64pfr0; u64reg_id_aa64pfr1;+u64reg_id_aa64zfr0;  u32reg_id_dfr0; u32reg_id_isar0;@@ -59,6 +60,9 @@ struct cpuinfo_arm64 { u32reg_mvfr0; u32reg_mvfr1; u32reg_mvfr2;++/* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */+u64reg_zcr; };  DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.hindex 4ea3441..d98e7ba 100644--- a/arch/arm64/include/asm/cpufeature.h+++ b/arch/arm64/include/asm/cpufeature.h@@ -10,7 +10,9 @@ #define __ASM_CPUFEATURE_H  #include <asm/cpucaps.h>+#include <asm/fpsimd.h> #include <asm/hwcap.h>+#include <asm/sigcontext.h> #include <asm/sysreg.h>  /*@@ -223,6 +225,13 @@ static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) return val == ID_AA64PFR0_EL0_32BIT_64BIT; } +static inline bool id_aa64pfr0_sve(u64 pfr0)+{+u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_SVE_SHIFT);++return val > 0;+}+ void __init setup_cpu_features(void);  void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,@@ -267,6 +276,26 @@ static inline bool system_supports_sve(void) return false; } +/*+ * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE+ * vector length.+ * Use only if SVE is present.  This function clobbers the SVE vector length.+ */+static u64 __maybe_unused read_zcr_features(void)+{+u64 zcr;+unsigned int vq_max;++write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);++zcr = read_sysreg_s(SYS_ZCR_EL1);+zcr &= ~(u64)ZCR_ELx_LEN_MASK;+vq_max = sve_vq_from_vl(sve_get_vl());+zcr |= vq_max - 1;++return zcr;+}+ #endif /* __ASSEMBLY__ */  #endifdiff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.hindex 32c8e19..6c22624 100644--- a/arch/arm64/include/asm/fpsimd.h+++ b/arch/arm64/include/asm/fpsimd.h@@ -92,12 +92,22 @@ extern void fpsimd_dup_sve(struct task_struct *dst, extern int sve_set_vector_length(struct task_struct *task,  unsigned long vl, unsigned long flags); +extern void __init sve_init_vq_map(void);+extern void sve_update_vq_map(void);+extern int sve_verify_vq_map(void);+extern void __init sve_setup(void);+ #else /* ! CONFIG_ARM64_SVE */  static void __maybe_unused sve_alloc(struct task_struct *task) { } static void __maybe_unused fpsimd_release_thread(struct task_struct *task) { } static void __maybe_unused fpsimd_dup_sve(struct task_struct *dst,   struct task_struct const *src) { }+static void __maybe_unused sve_init_vq_map(void) { }+static void __maybe_unused sve_update_vq_map(void) { }+static int __maybe_unused sve_verify_vq_map(void) { return 0; }+static void __maybe_unused sve_setup(void) { }+ #endif /* ! CONFIG_ARM64_SVE */  /* For use by EFI runtime services calls only */diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.cindex 43ba8df..c30bb6b 100644--- a/arch/arm64/kernel/cpufeature.c+++ b/arch/arm64/kernel/cpufeature.c@@ -27,6 +27,7 @@ #include <asm/cpu.h> #include <asm/cpufeature.h> #include <asm/cpu_ops.h>+#include <asm/fpsimd.h> #include <asm/mmu_context.h> #include <asm/processor.h> #include <asm/sysreg.h>@@ -283,6 +284,12 @@ static const struct arm64_ftr_bits ftr_id_dfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_zcr[] = {+ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,+ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_SIZE, 0),/* LEN */+ARM64_FTR_END,+};+ /*  * Common ftr bits for a 32bit register with all hidden, strict  * attributes, with 4bit feature fields and a default safe value of@@ -349,6 +356,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz),+ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz),  /* Op1 = 0, CRn = 0, CRm = 5 */ ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),@@ -363,6 +371,9 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1), ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), +/* Op1 = 0, CRn = 1, CRm = 2 */+ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),+ /* Op1 = 3, CRn = 0, CRm = 0 */ { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 }, ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),@@ -500,6 +511,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);+init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);  if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);@@ -520,6 +532,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); } +if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {+init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);+sve_init_vq_map();+} }  static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)@@ -623,6 +639,9 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,       info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1); +taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,+      info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);+ /*  * If we have AArch32, we care about 32-bit features for compat.  * If the system doesn't support AArch32, don't update them.@@ -670,6 +689,14 @@ void update_cpu_features(int cpu, info->reg_mvfr2, boot->reg_mvfr2); } +if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {+taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,+info->reg_zcr, boot->reg_zcr);++if (!sys_caps_initialised)+sve_update_vq_map();+}+ /*  * Mismatched CPU features are a recipe for disaster. Don't even  * pretend to support them.@@ -1097,6 +1124,23 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps) } } +static void verify_sve_features(void)+{+u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);+u64 zcr = read_zcr_features();++unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK;+unsigned int len = zcr & ZCR_ELx_LEN_MASK;++if (len < safe_len || sve_verify_vq_map()) {+pr_crit("CPU%d: SVE: required vector length(s) missing\n",+smp_processor_id());+cpu_die_early();+}++/* Add checks on other ZCR bits here if necessary */+}+ /*  * Run through the enabled system capabilities and enable() it on this CPU.  * The capabilities were decided based on the available CPUs at the boot time.@@ -1110,8 +1154,12 @@ static void verify_local_cpu_capabilities(void) verify_local_cpu_errata_workarounds(); verify_local_cpu_features(arm64_features); verify_local_elf_hwcaps(arm64_elf_hwcaps);+ if (system_supports_32bit_el0()) verify_local_elf_hwcaps(compat_elf_hwcaps);++if (system_supports_sve())+verify_sve_features(); }  void check_local_cpu_capabilities(void)@@ -1189,6 +1237,8 @@ void __init setup_cpu_features(void) if (system_supports_32bit_el0()) setup_elf_hwcaps(compat_elf_hwcaps); +sve_setup();+ /* Advertise that we have computed the system capabilities */ set_sys_caps_initialised(); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.cindex 3118859..be260e8 100644--- a/arch/arm64/kernel/cpuinfo.c+++ b/arch/arm64/kernel/cpuinfo.c@@ -19,6 +19,7 @@ #include <asm/cpu.h> #include <asm/cputype.h> #include <asm/cpufeature.h>+#include <asm/fpsimd.h>  #include <linux/bitops.h> #include <linux/bug.h>@@ -326,6 +327,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);+info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);  /* Update the 32bit ID registers only if AArch32 is implemented */ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {@@ -348,6 +350,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_mvfr2 = read_cpuid(MVFR2_EL1); } +if (IS_ENABLED(CONFIG_ARM64_SVE) &&+    id_aa64pfr0_sve(info->reg_id_aa64pfr0))+info->reg_zcr = read_zcr_features();+ cpuinfo_detect_icache_policy(info); } diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.cindex 713476e..cea05a7 100644--- a/arch/arm64/kernel/fpsimd.c+++ b/arch/arm64/kernel/fpsimd.c@@ -110,19 +110,19 @@ static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);  /* Default VL for tasks that don't set it explicitly: */-static int sve_default_vl = SVE_VL_MIN;+static int sve_default_vl = -1;  #ifdef CONFIG_ARM64_SVE  /* Maximum supported vector length across all CPUs (initially poisoned) */ int __ro_after_init sve_max_vl = -1; /* Set of available vector lengths, as vq_to_bit(vq): */-static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);+static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);  #else /* ! CONFIG_ARM64_SVE */  /* Dummy declaration for code that will be optimised out: */-extern DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);+extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);  #endif /* ! CONFIG_ARM64_SVE */ @@ -387,6 +387,103 @@ int sve_set_vector_length(struct task_struct *task, return 0; } +static unsigned long *sve_alloc_vq_map(void)+{+return kzalloc(BITS_TO_LONGS(SVE_VQ_MAX) * sizeof(unsigned long),+       GFP_KERNEL);+}++static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))+{+unsigned int vq, vl;+unsigned long zcr;++zcr = ZCR_ELx_LEN_MASK;+zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;++for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {+write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */+vl = sve_get_vl();+vq = sve_vq_from_vl(vl); /* skip intervening lengths */+set_bit(vq_to_bit(vq), map);+}+}++void __init sve_init_vq_map(void)+{+sve_probe_vqs(sve_vq_map);+}++/*+ * If we haven't committed to the set of supported VQs yet, filter out+ * those not supported by the current CPU.+ */+void sve_update_vq_map(void)+{+unsigned long *map;++map = sve_alloc_vq_map();+sve_probe_vqs(map);+bitmap_and(sve_vq_map, sve_vq_map, map, SVE_VQ_MAX);+kfree(map);+}++/* Check whether the current CPU supports all VQs in the committed set */+int sve_verify_vq_map(void)+{+int ret = 0;+unsigned long *map = sve_alloc_vq_map();++sve_probe_vqs(map);+bitmap_andnot(map, sve_vq_map, map, SVE_VQ_MAX);+if (!bitmap_empty(map, SVE_VQ_MAX)) {+pr_warn("SVE: cpu%d: Required vector length(s) missing\n",+smp_processor_id());+ret = -EINVAL;+}++kfree(map);++return ret;+}++void __init sve_setup(void)+{+u64 zcr;++if (!system_supports_sve())+return;++/*+ * The SVE architecture mandates support for 128-bit vectors,+ * so sve_vq_map must have at least SVE_VQ_MIN set.+ * If something went wrong, at least try to patch it up:+ */+if (WARN_ON(!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)))+set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);++zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);+sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);++/*+ * Sanity-check that the max VL we determined through CPU features+ * corresponds properly to sve_vq_map.  If not, do our best:+ */+if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl)))+sve_max_vl = find_supported_vector_length(sve_max_vl);++/*+ * For the default VL, pick the maximum supported value <= 64.+ * VL == 64 is guaranteed not to grow the signal frame.+ */+sve_default_vl = find_supported_vector_length(64);++pr_info("SVE: maximum available vector length %u bytes per vector\n",+sve_max_vl);+pr_info("SVE: default vector length %u bytes per vector\n",+sve_default_vl);+}+ void fpsimd_release_thread(struct task_struct *dead_task) { sve_free(dead_task);@@ -502,6 +599,9 @@ void fpsimd_flush_thread(void)  * This is where we ensure that all user tasks have a valid  * vector length configured: no kernel task can become a user  * task without an exec and hence a call to this function.+ * By the time the first call to this function is made, all+ * early hardware probing is complete, so sve_default_vl+ * should be valid.  * If a bug causes this to go wrong, we make some noise and  * try to fudge thread.sve_vl to a safe value here.  */-- 2.1.4

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:[Date Prev] [Date Next][Thread Prev] [Thread Next]

[8]ページ先頭

©2009-2026 Movatter.jp