diff --git a/headers/private/kernel/arch/x86/arch_cpu.h b/headers/private/kernel/arch/x86/arch_cpu.h index cb17d5c57f..69b1d5a302 100644 --- a/headers/private/kernel/arch/x86/arch_cpu.h +++ b/headers/private/kernel/arch/x86/arch_cpu.h @@ -102,6 +102,12 @@ #ifndef _ASSEMBLER +typedef struct x86_mtrr_info { + uint64 base; + uint64 size; + uint8 type; +} x86_mtrr_info; + typedef struct x86_optimized_functions { void (*memcpy)(void* dest, const void* source, size_t count); void* memcpy_end; @@ -118,6 +124,7 @@ typedef struct x86_cpu_module_info { uint8 type); status_t (*get_mtrr)(uint32 index, uint64* _base, uint64* _length, uint8* _type); + void (*set_mtrrs)(const x86_mtrr_info* infos, uint32 count); void (*get_optimized_functions)(x86_optimized_functions* functions); } x86_cpu_module_info; @@ -283,6 +290,7 @@ uint32 x86_count_mtrrs(void); void x86_set_mtrr(uint32 index, uint64 base, uint64 length, uint8 type); status_t x86_get_mtrr(uint32 index, uint64* _base, uint64* _length, uint8* _type); +void x86_set_mtrrs(const x86_mtrr_info* infos, uint32 count); bool x86_check_feature(uint32 feature, enum x86_feature_type type); void* x86_get_double_fault_stack(int32 cpu, size_t* _size); int32 x86_double_fault_get_cpu(void); diff --git a/src/add-ons/kernel/cpu/x86/amd.cpp b/src/add-ons/kernel/cpu/x86/amd.cpp index 5a5ec656d9..8d3cd18c70 100644 --- a/src/add-ons/kernel/cpu/x86/amd.cpp +++ b/src/add-ons/kernel/cpu/x86/amd.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2005-2006, Haiku, Inc. + * Copyright 2005-2009, Haiku, Inc. * Distributed under the terms of the MIT License. * * Authors: @@ -42,6 +42,13 @@ amd_init(void) } +static void +amd_set_mtrrs(const x86_mtrr_info* infos, uint32 count) +{ + generic_set_mtrrs(infos, count, generic_count_mtrrs()); +} + + static status_t amd_stdops(int32 op, ...) { @@ -68,4 +75,5 @@ x86_cpu_module_info gAMDModule = { generic_set_mtrr, generic_get_mtrr, + amd_set_mtrrs }; diff --git a/src/add-ons/kernel/cpu/x86/generic_x86.cpp b/src/add-ons/kernel/cpu/x86/generic_x86.cpp index 253f5a0c5a..4d462746c8 100644 --- a/src/add-ons/kernel/cpu/x86/generic_x86.cpp +++ b/src/add-ons/kernel/cpu/x86/generic_x86.cpp @@ -1,9 +1,10 @@ /* - * Copyright 2005-2007, Haiku, Inc. + * Copyright 2005-2009, Haiku, Inc. * Distributed under the terms of the MIT License. * * Authors: * Axel Dörfler, axeld@pinc-software.de + * Ingo Weinhold, ingo_weinhold@gmx.de */ @@ -15,6 +16,7 @@ #include #include #include +#include //#define TRACE_MTRR @@ -66,52 +68,8 @@ mtrr_type_to_string(uint8 type) #endif // TRACE_MTRR -uint32 -generic_count_mtrrs(void) -{ - if (!x86_check_feature(IA32_FEATURE_MTRR, FEATURE_COMMON) - || !x86_check_feature(IA32_FEATURE_MSR, FEATURE_COMMON)) - return 0; - - mtrr_capabilities capabilities(x86_read_msr(IA32_MSR_MTRR_CAPABILITIES)); - TRACE("CPU has %u variable range MTRRs.\n", - (uint8)capabilities.variable_ranges); - return capabilities.variable_ranges; -} - - -void -generic_init_mtrrs(uint32 count) -{ - if (count == 0) - return; - - // disable and clear all MTRRs - // (we leave the fixed MTRRs as is) - // TODO: check if the fixed MTRRs are set on all CPUs identically? - TRACE("generic_init_mtrrs(count = %ld)\n", count); - - x86_write_msr(IA32_MSR_MTRR_DEFAULT_TYPE, - x86_read_msr(IA32_MSR_MTRR_DEFAULT_TYPE) & ~IA32_MTRR_ENABLE); - - for (uint32 i = count; i-- > 0;) { - if (x86_read_msr(IA32_MSR_MTRR_PHYSICAL_MASK_0 + i * 2) & IA32_MTRR_VALID_RANGE) - x86_write_msr(IA32_MSR_MTRR_PHYSICAL_MASK_0 + i * 2, 0); - } - - // but turn on variable MTRR functionality - - // we need to ensure that the default type is uncacheable, otherwise - // clearing the mtrrs could result in ranges that aren't supposed to be - // cacheable to become cacheable due to the default type - - x86_write_msr(IA32_MSR_MTRR_DEFAULT_TYPE, - (x86_read_msr(IA32_MSR_MTRR_DEFAULT_TYPE) & ~0xff) | IA32_MTRR_ENABLE); -} - - -void -generic_set_mtrr(uint32 index, uint64 base, uint64 length, uint8 type) +static void +set_mtrr(uint32 index, uint64 base, uint64 length, uint8 type) { uint64 mask = length - 1; mask = ~mask & gPhysicalMask; @@ -139,8 +97,60 @@ generic_set_mtrr(uint32 index, uint64 base, uint64 length, uint8 type) // reset base as well x86_write_msr(IA32_MSR_MTRR_PHYSICAL_BASE_0 + index, 0); } +} - TRACE("mtrrs now:\n"); + +// #pragma mark - + + +uint32 +generic_count_mtrrs(void) +{ + if (!x86_check_feature(IA32_FEATURE_MTRR, FEATURE_COMMON) + || !x86_check_feature(IA32_FEATURE_MSR, FEATURE_COMMON)) + return 0; + + mtrr_capabilities capabilities(x86_read_msr(IA32_MSR_MTRR_CAPABILITIES)); + TRACE("CPU %ld has %u variable range MTRRs.\n", smp_get_current_cpu(), + (uint8)capabilities.variable_ranges); + return capabilities.variable_ranges; +} + + +void +generic_init_mtrrs(uint32 count) +{ + if (count == 0) + return; + + // If MTRRs are enabled, we leave everything as is (save for, possibly, the + // default, which we set below), so that we can benefit from the BIOS's + // setup until we've installed our own. If MTRRs are disabled, we clear + // all registers and enable MTRRs. + // (we leave the fixed MTRRs as is) + // TODO: check if the fixed MTRRs are set on all CPUs identically? + TRACE("generic_init_mtrrs(count = %ld)\n", count); + + uint64 defaultType = x86_read_msr(IA32_MSR_MTRR_DEFAULT_TYPE); + if ((defaultType & IA32_MTRR_ENABLE) == 0) { + for (uint32 i = 0; i < count; i++) + set_mtrr(i, 0, 0, 0); + } + + // Turn on variable MTRR functionality. + // We need to ensure that the default type is uncacheable, otherwise + // clearing the mtrrs could result in ranges that aren't supposed to be + // cacheable to become cacheable due to the default type. + x86_write_msr(IA32_MSR_MTRR_DEFAULT_TYPE, + (defaultType & ~0xff) | IA32_MTRR_ENABLE); +} + + +void +generic_set_mtrr(uint32 index, uint64 base, uint64 length, uint8 type) +{ + set_mtrr(index, base, length, type); + TRACE("[cpu %ld] mtrrs now:\n", smp_get_current_cpu()); generic_dump_mtrrs(generic_count_mtrrs()); } @@ -162,6 +172,34 @@ generic_get_mtrr(uint32 index, uint64 *_base, uint64 *_length, uint8 *_type) } +void +generic_set_mtrrs(const x86_mtrr_info* infos, uint32 count, uint32 maxCount) +{ + // check count + if (maxCount == 0) + return; + + if (count > maxCount) + count = maxCount; + + // disable MTTRs + uint64 defaultType = x86_read_msr(IA32_MSR_MTRR_DEFAULT_TYPE) + & ~IA32_MTRR_ENABLE; + x86_write_msr(IA32_MSR_MTRR_DEFAULT_TYPE, defaultType); + + // set the given MTRRs + for (uint32 i = 0; i < count; i++) + set_mtrr(i, infos[i].base, infos[i].size, infos[i].type); + + // clear the other MTRRs + for (uint32 i = count; i < maxCount; i++) + set_mtrr(i, 0, 0, 0); + + // re-enable MTTRs + x86_write_msr(IA32_MSR_MTRR_DEFAULT_TYPE, defaultType | IA32_MTRR_ENABLE); +} + + status_t generic_mtrr_compute_physical_mask(void) { @@ -180,8 +218,8 @@ generic_mtrr_compute_physical_mask(void) gPhysicalMask = ((1ULL << bits) - 1) & ~(B_PAGE_SIZE - 1); - TRACE("CPU has %ld physical address bits, physical mask is %016Lx\n", - bits, gPhysicalMask); + TRACE("CPU %ld has %ld physical address bits, physical mask is %016Lx\n", + smp_get_current_cpu(), bits, gPhysicalMask); return B_OK; } @@ -194,12 +232,13 @@ generic_dump_mtrrs(uint32 count) if (count == 0) return; + int cpu = smp_get_current_cpu(); uint64 defaultType = x86_read_msr(IA32_MSR_MTRR_DEFAULT_TYPE); - TRACE("MTRRs are %sabled\n", + TRACE("[cpu %d] MTRRs are %sabled\n", cpu, (defaultType & IA32_MTRR_ENABLE) != 0 ? "en" : "dis"); - TRACE("default type is %u %s\n", + TRACE("[cpu %d] default type is %u %s\n", cpu, (uint8)defaultType, mtrr_type_to_string(defaultType)); - TRACE("fixed range MTRRs are %sabled\n", + TRACE("[cpu %d] fixed range MTRRs are %sabled\n", cpu, (defaultType & IA32_MTRR_ENABLE_FIXED) != 0 ? "en" : "dis"); for (uint32 i = 0; i < count; i++) { @@ -207,10 +246,10 @@ generic_dump_mtrrs(uint32 count) uint64 length; uint8 type; if (generic_get_mtrr(i, &base, &length, &type) == B_OK) { - TRACE("%lu: base: 0x%Lx; length: 0x%Lx; type: %u %s\n", - i, base, length, type, mtrr_type_to_string(type)); + TRACE("[cpu %d] %lu: base: 0x%Lx; length: 0x%Lx; type: %u %s\n", + cpu, i, base, length, type, mtrr_type_to_string(type)); } else - TRACE("%lu: empty\n", i); + TRACE("[cpu %d] %lu: empty\n", cpu, i); } #endif // TRACE_MTRR } diff --git a/src/add-ons/kernel/cpu/x86/generic_x86.h b/src/add-ons/kernel/cpu/x86/generic_x86.h index 7793e67ea4..54af254158 100644 --- a/src/add-ons/kernel/cpu/x86/generic_x86.h +++ b/src/add-ons/kernel/cpu/x86/generic_x86.h @@ -12,6 +12,8 @@ #include +struct x86_mtrr_info; + extern uint64 gPhysicalMask; #ifdef __cplusplus @@ -23,6 +25,8 @@ extern void generic_init_mtrrs(uint32 count); extern void generic_set_mtrr(uint32 index, uint64 base, uint64 length, uint8 type); extern status_t generic_get_mtrr(uint32 index, uint64 *_base, uint64 *_length, uint8 *_type); +extern void generic_set_mtrrs(const struct x86_mtrr_info* infos, + uint32 count, uint32 maxCount); extern status_t generic_mtrr_compute_physical_mask(void); extern void generic_dump_mtrrs(uint32 count); diff --git a/src/add-ons/kernel/cpu/x86/intel.cpp b/src/add-ons/kernel/cpu/x86/intel.cpp index 8281ed2045..5ed793139e 100644 --- a/src/add-ons/kernel/cpu/x86/intel.cpp +++ b/src/add-ons/kernel/cpu/x86/intel.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2005, Haiku, Inc. + * Copyright 2005-2009, Haiku, Inc. * Distributed under the terms of the MIT License. * * Authors: @@ -37,6 +37,13 @@ intel_init(void) } +static void +intel_set_mtrrs(const x86_mtrr_info* infos, uint32 count) +{ + generic_set_mtrrs(infos, count, generic_count_mtrrs()); +} + + static status_t intel_stdops(int32 op, ...) { @@ -63,4 +70,5 @@ x86_cpu_module_info gIntelModule = { generic_set_mtrr, generic_get_mtrr, + intel_set_mtrrs }; diff --git a/src/add-ons/kernel/cpu/x86/via.cpp b/src/add-ons/kernel/cpu/x86/via.cpp index d1afbd23f4..8e02c25984 100644 --- a/src/add-ons/kernel/cpu/x86/via.cpp +++ b/src/add-ons/kernel/cpu/x86/via.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2005, Haiku, Inc. + * Copyright 2005-2009, Haiku, Inc. * Distributed under the terms of the MIT License. * * Authors: @@ -16,7 +16,6 @@ static uint32 via_count_mtrrs(void) { - cpuid_info cpuInfo; if (!x86_check_feature(IA32_FEATURE_MTRR, FEATURE_COMMON)) return 0; @@ -32,6 +31,13 @@ via_init_mtrrs(void) } +static void +via_set_mtrrs(const x86_mtrr_info* infos, uint32 count) +{ + generic_set_mtrrs(infos, count, via_count_mtrrs()); +} + + static status_t via_init(void) { @@ -76,4 +82,5 @@ x86_cpu_module_info gVIAModule = { generic_set_mtrr, generic_get_mtrr, + via_set_mtrrs }; diff --git a/src/system/kernel/arch/x86/arch_cpu.cpp b/src/system/kernel/arch/x86/arch_cpu.cpp index ab6e64d16e..af31887fa1 100644 --- a/src/system/kernel/arch/x86/arch_cpu.cpp +++ b/src/system/kernel/arch/x86/arch_cpu.cpp @@ -69,6 +69,11 @@ struct set_mtrr_parameter { uint8 type; }; +struct set_mtrrs_parameter { + const x86_mtrr_info* infos; + uint32 count; +}; + extern "C" void reboot(void); // from arch_x86.S @@ -172,6 +177,25 @@ set_mtrr(void *_parameter, int cpu) } +static void +set_mtrrs(void* _parameter, int cpu) +{ + set_mtrrs_parameter* parameter = (set_mtrrs_parameter*)_parameter; + + // wait until all CPUs have arrived here + smp_cpu_rendezvous(&sCpuRendezvous, cpu); + + disable_caches(); + + sCpuModule->set_mtrrs(parameter->infos, parameter->count); + + enable_caches(); + + // wait until all CPUs have arrived here + smp_cpu_rendezvous(&sCpuRendezvous2, cpu); +} + + static void init_mtrrs(void *_unused, int cpu) { @@ -222,6 +246,18 @@ x86_get_mtrr(uint32 index, uint64 *_base, uint64 *_length, uint8 *_type) } +void +x86_set_mtrrs(const x86_mtrr_info* infos, uint32 count) +{ + struct set_mtrrs_parameter parameter; + parameter.infos = infos; + parameter.count = count; + + sCpuRendezvous = sCpuRendezvous2 = 0; + call_all_cpus(&set_mtrrs, ¶meter); +} + + extern "C" void init_sse(void) { diff --git a/src/system/kernel/arch/x86/arch_vm.cpp b/src/system/kernel/arch/x86/arch_vm.cpp index 71058a1efd..93d790f36b 100644 --- a/src/system/kernel/arch/x86/arch_vm.cpp +++ b/src/system/kernel/arch/x86/arch_vm.cpp @@ -1,6 +1,7 @@ /* - * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. + * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de. * Copyright 2008, Jérôme Duval. + * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. * Distributed under the terms of the MIT License. * * Copyright 2001, Travis Geiselbrecht. All rights reserved. @@ -44,111 +45,359 @@ #endif -#define kMaxMemoryTypeRegisters 32 +static const uint32 kMaxMemoryTypeRanges = 32; +static const uint32 kMaxMemoryTypeRegisters = 32; +static const uint64 kMinMemoryTypeRangeSize = 1 << 12; + + +struct memory_type_range_analysis_info { + uint64 size; + uint32 rangesNeeded; + uint32 subtractiveRangesNeeded; + uint64 bestSubtractiveRange; +}; + +struct memory_type_range_analysis { + uint64 base; + uint64 size; + uint32 type; + uint32 rangesNeeded; + uint64 endRange; + memory_type_range_analysis_info left; + memory_type_range_analysis_info right; +}; + +struct memory_type_range { + uint64 base; + uint64 size; + uint32 type; + area_id area; +}; + void *gDmaAddress; -static uint32 sMemoryTypeBitmap; -static int32 sMemoryTypeIDs[kMaxMemoryTypeRegisters]; +static memory_type_range sMemoryTypeRanges[kMaxMemoryTypeRanges]; +static uint32 sMemoryTypeRangeCount; + +static memory_type_range_analysis sMemoryTypeRangeAnalysis[ + kMaxMemoryTypeRanges]; + +static x86_mtrr_info sMemoryTypeRegisters[kMaxMemoryTypeRegisters]; static uint32 sMemoryTypeRegisterCount; -static spinlock sMemoryTypeLock; +static uint32 sMemoryTypeRegistersUsed; + +static mutex sMemoryTypeLock = MUTEX_INITIALIZER("memory type ranges"); -static int32 -allocate_mtrr(void) +static void +set_mtrrs() { - InterruptsSpinLocker _(&sMemoryTypeLock); + x86_set_mtrrs(sMemoryTypeRegisters, sMemoryTypeRegistersUsed); - // find free bit - - for (uint32 index = 0; index < sMemoryTypeRegisterCount; index++) { - if (sMemoryTypeBitmap & (1UL << index)) - continue; - - sMemoryTypeBitmap |= 1UL << index; - return index; +#ifdef TRACE_MTRR_ARCH_VM + TRACE_MTRR("set MTRRs to:\n"); + for (uint32 i = 0; i < sMemoryTypeRegistersUsed; i++) { + const x86_mtrr_info& info = sMemoryTypeRegisters[i]; + TRACE_MTRR(" mtrr: %2lu: base: %#9llx, size: %#9llx, type: %u\n", + i, info.base, info.size, info.type); } - - return -1; +#endif } static void -free_mtrr(int32 index) +add_used_mtrr(uint64 base, uint64 size, uint32 type) { - InterruptsSpinLocker _(&sMemoryTypeLock); + ASSERT(sMemoryTypeRegistersUsed < sMemoryTypeRegisterCount); - sMemoryTypeBitmap &= ~(1UL << index); + x86_mtrr_info& info = sMemoryTypeRegisters[sMemoryTypeRegistersUsed++]; + info.base = base; + info.size = size; + info.type = type; } -#if 0 -/*! - Checks if the provided range overlaps an existing mtrr range - If it actually extends an existing range, extendedIndex is filled -*/ -static bool -is_memory_overlapping(uint64 base, uint64 length, int32 *extendedIndex) +static void +analyze_range(memory_type_range_analysis& analysis, uint64 previousEnd, + uint64 nextBase) { - *extendedIndex = -1; - for (uint32 index = 0; index < sMemoryTypeRegisterCount; index++) { - if (sMemoryTypeBitmap & (1UL << index)) { - uint64 b,l; - uint8 t; - x86_get_mtrr(index, &b, &l, &t); + uint64 base = analysis.base; + uint64 size = analysis.size; - // check first for write combining extensions - if (base <= b - && (base + length) >= (b + l) - && t == IA32_MTR_WRITE_COMBINING) { - *extendedIndex = index; - return true; + memory_type_range_analysis_info& left = analysis.left; + memory_type_range_analysis_info& right = analysis.right; + + uint32 leftSubtractiveRangesNeeded = 2; + int32 leftBestSubtractiveRangeDifference = 0; + uint32 leftBestSubtractivePositiveRangesNeeded = 0; + uint32 leftBestSubtractiveRangesNeeded = 0; + + uint32 rightSubtractiveRangesNeeded = 2; + int32 rightBestSubtractiveRangeDifference = 0; + uint32 rightBestSubtractivePositiveRangesNeeded = 0; + uint32 rightBestSubtractiveRangesNeeded = 0; + + uint64 range = kMinMemoryTypeRangeSize; + + while (size > 0) { + if ((base & range) != 0) { + left.rangesNeeded++; + + bool replaceBestSubtractive = false; + int32 rangeDifference = (int32)left.rangesNeeded + - (int32)leftSubtractiveRangesNeeded; + if (left.bestSubtractiveRange == 0 + || leftBestSubtractiveRangeDifference < rangeDifference) { + // check for intersection with previous range + replaceBestSubtractive + = previousEnd == 0 || base - range >= previousEnd; } - if ((base >= b && base < (b + l)) - || ((base + length) > b - && (base + length) <= (b + l))) - return true; + + if (replaceBestSubtractive) { + leftBestSubtractiveRangeDifference = rangeDifference; + leftBestSubtractiveRangesNeeded + = leftSubtractiveRangesNeeded; + left.bestSubtractiveRange = range; + leftBestSubtractivePositiveRangesNeeded = 0; + } else + leftBestSubtractivePositiveRangesNeeded++; + + left.size += range; + base += range; + size -= range; + } else if (left.bestSubtractiveRange > 0) + leftSubtractiveRangesNeeded++; + + if ((size & range) != 0) { + right.rangesNeeded++; + + bool replaceBestSubtractive = false; + int32 rangeDifference = (int32)right.rangesNeeded + - (int32)rightSubtractiveRangesNeeded; + if (right.bestSubtractiveRange == 0 + || rightBestSubtractiveRangeDifference < rangeDifference) { + // check for intersection with previous range + replaceBestSubtractive + = nextBase == 0 || base + size + range <= nextBase; + } + + if (replaceBestSubtractive) { + rightBestSubtractiveRangeDifference = rangeDifference; + rightBestSubtractiveRangesNeeded + = rightSubtractiveRangesNeeded; + right.bestSubtractiveRange = range; + rightBestSubtractivePositiveRangesNeeded = 0; + } else + rightBestSubtractivePositiveRangesNeeded++; + + right.size += range; + size -= range; + } else if (right.bestSubtractiveRange > 0) + rightSubtractiveRangesNeeded++; + + range <<= 1; + } + + analysis.endRange = range; + + // If a subtractive setup doesn't have any advantages, don't use it. + // Also compute analysis.rangesNeeded. + if (leftBestSubtractiveRangesNeeded + + leftBestSubtractivePositiveRangesNeeded >= left.rangesNeeded) { + left.bestSubtractiveRange = 0; + left.subtractiveRangesNeeded = 0; + analysis.rangesNeeded = left.rangesNeeded; + } else { + left.subtractiveRangesNeeded = leftBestSubtractiveRangesNeeded + + leftBestSubtractivePositiveRangesNeeded; + analysis.rangesNeeded = left.subtractiveRangesNeeded; + } + + if (rightBestSubtractiveRangesNeeded + + rightBestSubtractivePositiveRangesNeeded >= right.rangesNeeded) { + right.bestSubtractiveRange = 0; + right.subtractiveRangesNeeded = 0; + analysis.rangesNeeded += right.rangesNeeded; + } else { + right.subtractiveRangesNeeded = rightBestSubtractiveRangesNeeded + + rightBestSubtractivePositiveRangesNeeded; + analysis.rangesNeeded += right.subtractiveRangesNeeded; + } +} + +static void +compute_mtrrs(const memory_type_range_analysis& analysis) +{ + const memory_type_range_analysis_info& left = analysis.left; + const memory_type_range_analysis_info& right = analysis.right; + + // generate a setup for the left side + if (left.rangesNeeded > 0) { + uint64 base = analysis.base; + uint64 size = left.size; + uint64 range = analysis.endRange; + uint64 rangeEnd = base + size; + bool subtractive = false; + while (size > 0) { + if (range == left.bestSubtractiveRange) { + base = rangeEnd - 2 * range; + add_used_mtrr(base, range, analysis.type); + subtractive = true; + break; + } + + if ((size & range) != 0) { + rangeEnd -= range; + add_used_mtrr(rangeEnd, range, analysis.type); + size -= range; + } + + range >>= 1; + } + + if (subtractive) { + uint64 shortestRange = range; + while (size > 0) { + if ((size & range) != 0) { + shortestRange = range; + size -= range; + } else { + add_used_mtrr(base, range, IA32_MTR_UNCACHED); + base += range; + } + + range >>= 1; + } + + add_used_mtrr(base, shortestRange, IA32_MTR_UNCACHED); } } - return false; -} -#endif // 0 + // generate a setup for the right side + if (right.rangesNeeded > 0) { + uint64 base = analysis.base + left.size; + uint64 size = right.size; + uint64 range = analysis.endRange; + bool subtractive = false; + while (size > 0) { + if (range == right.bestSubtractiveRange) { + add_used_mtrr(base, range * 2, analysis.type); + subtractive = true; + break; + } -static uint64 -nearest_power(uint64 value) -{ - uint64 power = 1UL << 12; - // 12 bits is the smallest supported alignment/length + if ((size & range) != 0) { + add_used_mtrr(base, range, analysis.type); + base += range; + size -= range; + } - while (value > power) - power <<= 1; + range >>= 1; + } - return power; -} + if (subtractive) { + uint64 rangeEnd = base + range * 2; + uint64 shortestRange = range; + while (size > 0) { + if ((size & range) != 0) { + shortestRange = range; + size -= range; + } else { + rangeEnd -= range; + add_used_mtrr(rangeEnd, range, IA32_MTR_UNCACHED); + } + range >>= 1; + } -static void -nearest_powers(uint64 value, uint64 *lower, uint64 *upper) -{ - uint64 power = 1UL << 12; - *lower = power; - // 12 bits is the smallest supported alignment/length - - while (value >= power) { - *lower = power; - power <<= 1; + rangeEnd -= shortestRange; + add_used_mtrr(rangeEnd, shortestRange, IA32_MTR_UNCACHED); + } } - - *upper = power; } static status_t -set_memory_type(int32 id, uint64 base, uint64 length, uint32 type) +update_mttrs() { - int32 index = -1; + // Transfer the range array to the analysis array, dropping all uncachable + // ranges (that's the default anyway) and joining adjacent ranges with the + // same type. + memory_type_range_analysis* ranges = sMemoryTypeRangeAnalysis; + uint32 rangeCount = 0; + { + uint32 previousRangeType = IA32_MTR_UNCACHED; + uint64 previousRangeEnd = 0; + for (uint32 i = 0; i < sMemoryTypeRangeCount; i++) { + if (sMemoryTypeRanges[i].type != IA32_MTR_UNCACHED) { + uint64 rangeEnd = sMemoryTypeRanges[i].base + + sMemoryTypeRanges[i].size; + if (previousRangeType == sMemoryTypeRanges[i].type + && previousRangeEnd >= sMemoryTypeRanges[i].base) { + // the range overlaps/continues the previous range -- just + // enlarge that one + if (rangeEnd > previousRangeEnd) + previousRangeEnd = rangeEnd; + ranges[rangeCount - 1].size = previousRangeEnd + - ranges[rangeCount - 1].base; + } else { + // add the new range + memset(&ranges[rangeCount], 0, sizeof(ranges[rangeCount])); + ranges[rangeCount].base = sMemoryTypeRanges[i].base; + ranges[rangeCount].size = sMemoryTypeRanges[i].size; + ranges[rangeCount].type = sMemoryTypeRanges[i].type; + previousRangeEnd = rangeEnd; + previousRangeType = sMemoryTypeRanges[i].type; + rangeCount++; + } + } + } + } + // analyze the ranges + uint32 registersNeeded = 0; + uint64 previousEnd = 0; + for (uint32 i = 0; i < rangeCount; i++) { + memory_type_range_analysis& range = ranges[i]; + uint64 nextBase = i + 1 < rangeCount ? ranges[i + 1].base : 0; + analyze_range(range, previousEnd, nextBase); + registersNeeded += range.rangesNeeded; + previousEnd = range.base + range.size; + } + + // fail when we need more registers than we have + if (registersNeeded > sMemoryTypeRegisterCount) + return B_BUSY; + + sMemoryTypeRegistersUsed = 0; + + for (uint32 i = 0; i < rangeCount; i++) { + memory_type_range_analysis& range = ranges[i]; + compute_mtrrs(range); + } + + set_mtrrs(); + + return B_OK; +} + + +static void +remove_memory_type_range_locked(uint32 index) +{ + sMemoryTypeRangeCount--; + if (index < sMemoryTypeRangeCount) { + memmove(sMemoryTypeRanges + index, sMemoryTypeRanges + index + 1, + (sMemoryTypeRangeCount - index) * sizeof(memory_type_range)); + } +} + + +static status_t +add_memory_type_range(area_id areaID, uint64 base, uint64 size, uint32 type) +{ + // translate the type if (type == 0) return B_OK; @@ -172,148 +421,124 @@ set_memory_type(int32 id, uint64 base, uint64 length, uint32 type) return B_BAD_VALUE; } - if (sMemoryTypeRegisterCount == 0) - return B_NOT_SUPPORTED; + TRACE_MTRR("add_memory_type_range(%ld, %#llx, %#llx, %lu)\n", areaID, base, + size, type); -#if 0 - // check if it overlaps - if (type == IA32_MTR_WRITE_COMBINING - && is_memory_overlapping(base, length, &index)) { - if (index < 0) { - dprintf("allocate MTRR failed, it overlaps an existing MTRR slot\n"); + // base and size must at least be aligned to the minimum range size + if (((base | size) & (kMinMemoryTypeRangeSize - 1)) != 0) { + dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): Memory base or " + "size not minimally aligned!\n", areaID, base, size, type); + return B_BAD_VALUE; + } + + MutexLocker locker(sMemoryTypeLock); + + if (sMemoryTypeRangeCount == kMaxMemoryTypeRanges) { + dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): Out of " + "memory ranges!\n", areaID, base, size, type); + return B_BUSY; + } + + // iterate through the existing ranges and check for clashes + bool foundInsertionIndex = false; + uint32 index = 0; + for (uint32 i = 0; i < sMemoryTypeRangeCount; i++) { + const memory_type_range& range = sMemoryTypeRanges[i]; + if (range.base > base) { + if (range.base - base < size && range.type != type) { + dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): Memory " + "range intersects with existing one (%#llx, %#llx, %lu).\n", + areaID, base, size, type, range.base, range.size, + range.type); + return B_BAD_VALUE; + } + + // found the insertion index + if (!foundInsertionIndex) { + index = i; + foundInsertionIndex = true; + } + break; + } else if (base - range.base < range.size && range.type != type) { + dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): Memory " + "range intersects with existing one (%#llx, %#llx, %lu).\n", + areaID, base, size, type, range.base, range.size, range.type); return B_BAD_VALUE; } - // we replace an existing write-combining mtrr with a bigger one at the index position - } -#endif - - // length must be a power of 2; just round it up to the next value - length = nearest_power(length); - - if (length + base <= base) { - // 4GB overflow - return B_BAD_VALUE; } - // base must be aligned to the length - if (base & (length - 1)) - return B_BAD_VALUE; + if (!foundInsertionIndex) + index = sMemoryTypeRangeCount; - if (index < 0) - index = allocate_mtrr(); - if (index < 0) - return B_ERROR; - - TRACE_MTRR("allocate MTRR slot %ld, base = %Lx, length = %Lx, type=0x%lx\n", - index, base, length, type); - - sMemoryTypeIDs[index] = id; - x86_set_mtrr(index, base, length, type); - - return B_OK; -} - - -#define MTRR_MAX_SOLUTIONS 5 // usually MTRR count is eight, keep a few for other needs -#define MTRR_MIN_SIZE 0x80000 // 512 KB -static int64 sSolutions[MTRR_MAX_SOLUTIONS]; -static int32 sSolutionCount; -static int64 sPropositions[MTRR_MAX_SOLUTIONS]; - - -/*! Find the nearest powers of two for a value, save current iteration, - then make recursives calls for the remaining values. - It uses at most MTRR_MAX_SOLUTIONS levels of recursion because - only that count of MTRR registers are available to map the memory. -*/ -static void -find_nearest(uint64 value, int iteration) -{ - int i; - uint64 down, up; - TRACE_MTRR("find_nearest %Lx %d\n", value, iteration); - if (iteration > (MTRR_MAX_SOLUTIONS - 1) || (iteration + 1) >= sSolutionCount) { - if (sSolutionCount > MTRR_MAX_SOLUTIONS) { - // no solutions yet, save something - for (i=0; i 0); + + dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): update_mtrrs() " + "failed.\n", areaID, base, size, type); + remove_memory_type_range_locked(index); + return error; +} + + +static void +remove_memory_type_range(area_id areaID) +{ + MutexLocker locker(sMemoryTypeLock); + + for (uint32 i = 0; i < sMemoryTypeRangeCount; i++) { + if (sMemoryTypeRanges[i].area == areaID) { + TRACE_MTRR("remove_memory_type_range(%ld, %#llx, %#llxd)\n", + areaID, sMemoryTypeRanges[i].base, sMemoryTypeRanges[i].size); + remove_memory_type_range_locked(i); + update_mttrs(); + // TODO: It's actually possible that this call fails, since + // compute_mtrrs() joins ranges and removing one might cause a + // previously joined big simple range to be split into several + // ranges (or just make it more complicated). + return; } } } @@ -371,8 +596,6 @@ arch_vm_init_end(kernel_args *args) status_t arch_vm_init_post_modules(kernel_args *args) { -// void *cookie; - // the x86 CPU modules are now accessible sMemoryTypeRegisterCount = x86_count_mtrrs(); @@ -383,17 +606,10 @@ arch_vm_init_post_modules(kernel_args *args) if (sMemoryTypeRegisterCount > kMaxMemoryTypeRegisters) sMemoryTypeRegisterCount = kMaxMemoryTypeRegisters; - // init memory type ID table - - for (uint32 i = 0; i < sMemoryTypeRegisterCount; i++) { - sMemoryTypeIDs[i] = -1; - } - // set the physical memory ranges to write-back mode - for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { - set_memory_write_back(-1, args->physical_memory_range[i].start, - args->physical_memory_range[i].size); + add_memory_type_range(-1, args->physical_memory_range[i].start, + args->physical_memory_range[i].size, B_MTR_WB); } return B_OK; @@ -433,22 +649,10 @@ arch_vm_supports_protection(uint32 protection) void arch_vm_unset_memory_type(struct vm_area *area) { - uint32 index; - if (area->memory_type == 0) return; - // find index for area ID - - for (index = 0; index < sMemoryTypeRegisterCount; index++) { - if (sMemoryTypeIDs[index] == area->id) { - x86_set_mtrr(index, 0, 0, 0); - - sMemoryTypeIDs[index] = -1; - free_mtrr(index); - break; - } - } + remove_memory_type_range(area->id); } @@ -457,5 +661,5 @@ arch_vm_set_memory_type(struct vm_area *area, addr_t physicalBase, uint32 type) { area->memory_type = type >> MEMORY_TYPE_SHIFT; - return set_memory_type(area->id, physicalBase, area->size, type); + return add_memory_type_range(area->id, physicalBase, area->size, type); }