在ARMv8/ARMv9架构中,TLB(Translation Lookaside Buffer)作为地址转换的缓存机制,对系统性能至关重要。当页表内容发生变化时,必须及时使TLB中对应的缓存项失效,以保证内存访问的正确性。AArch64架构提供了一套精细控制的TLB失效指令集,允许开发者根据不同的场景需求选择最合适的失效方式。
AArch64的TLB失效操作主要分为以下几类:
每个TLB失效操作都通过一组参数精确控制其作用范围:
c复制func AArch64_TLBIP_RVAA(security : SecurityState, regime : Regime, vmid : bits(16),
broadcast_in : Broadcast, level : TLBILevel, attr : TLBIMemAttr,
Xt : bits(128))
begin
assert PSTATE.EL IN {EL3, EL2, EL1};
var broadcast : Broadcast = broadcast_in;
var r : TLBIRecord;
r.op = TLBIOp_RVAA;
r.from_aarch64 = TRUE;
r.security = security;
r.regime = regime;
r.vmid = vmid;
r.use_vmid = UseVMID(regime);
r.level = level;
r.attr = attr;
r.ttl[1:0] = Xt[38:37];
if IsFeatureImplemented(FEAT_TLBID) && Xt[32] == '1' then
r.d64 = TRUE;
r.d128 = r.ttl[1:0] == '00';
else
r.d64 = r.ttl[1:0] == '00';
r.d128 = TRUE;
end;
var valid : boolean;
(valid, r.tg, r.address, r.end_address) = TLBIPRange(regime, Xt);
if !valid then return; end;
TLBI(r);
let domains : bits(16) = TLBIDomains(broadcast, Xt[15:0]);
if (broadcast == Broadcast_OSH &&
(IsBroadcast_OSHnISH(regime) || OSHDomainExceedsNIS(domains))) then
broadcast = Broadcast_OSHnISH;
end;
if broadcast != Broadcast_NSH then BroadcastTLBI(broadcast, r, domains); end;
return;
end;
c复制func AArch64_TLBIP_VA(security : SecurityState, regime : Regime, vmid : bits(16),
broadcast_in : Broadcast, level : TLBILevel, attr : TLBIMemAttr,
Xt : bits(128))
begin
assert PSTATE.EL IN {EL3, EL2, EL1};
var broadcast : Broadcast = broadcast_in;
var r : TLBIRecord;
r.op = TLBIOp_VA;
r.from_aarch64 = TRUE;
r.security = security;
r.regime = regime;
r.vmid = vmid;
r.use_vmid = UseVMID(regime);
r.level = level;
r.attr = attr;
r.asid = Xt[63:48];
r.ttl = Xt[47:44];
r.address = ZeroExtend{64}(Xt[107:64] :: Zeros{12});
if IsFeatureImplemented(FEAT_TLBID) && Xt[32] == '1' then
r.d64 = TRUE;
r.d128 = r.ttl == '00xx';
else
r.d64 = r.ttl == '00xx';
r.d128 = TRUE;
end;
TLBI(r);
let domains : bits(16) = TLBIDomains(broadcast, Xt[15:0]);
if broadcast == Broadcast_OSH && IsBroadcast_OSHnISH(regime) then
broadcast = Broadcast_OSHnISH;
end;
if broadcast != Broadcast_NSH then BroadcastTLBI(broadcast, r, domains); end;
return;
end;
TLB失效操作的核心是TLBIMatch函数,它决定了哪些TLB条目应该被失效:
c复制func TLBIMatch(tlbi : TLBIRecord, tlb_entry : TLBRecord) => boolean
begin
var match : boolean;
let entry_block_mask : bits(64) = ZeroExtend{}(Ones{tlb_entry.blocksize});
var entry_end_address : bits(64) = tlb_entry.context.ia OR entry_block_mask;
var entry_start_address : bits(64) = tlb_entry.context.ia AND NOT entry_block_mask;
case tlbi.op of
when TLBIOp_ALL =>
let relax_regime : boolean = (tlbi.from_aarch64 &&
tlbi.regime IN {Regime_EL20, Regime_EL2} &&
tlb_entry.context.regime IN {Regime_EL20, Regime_EL2});
match = (tlbi.security == tlb_entry.context.ss &&
(tlbi.regime == tlb_entry.context.regime || relax_regime));
// 其他操作类型的匹配逻辑...
end;
if tlbi.attr == TLBI_ExcludeXS && tlb_entry.context.xs == '1' then
match = FALSE;
end;
return match;
end;
在虚拟化环境中,TLB管理更加复杂,涉及stage1和stage2两级转换:
c复制func AArch64_TLBI_IPAS2(security : SecurityState, regime : Regime, vmid : bits(16),
broadcast_in : Broadcast, level : TLBILevel, attr : TLBIMemAttr,
Xt : bits(64))
begin
assert PSTATE.EL IN {EL3, EL2};
var broadcast : Broadcast = broadcast_in;
var r : TLBIRecord;
r.op = TLBIOp_IPAS2;
r.from_aarch64 = TRUE;
r.security = security;
r.regime = regime;
r.vmid = vmid;
r.use_vmid = TRUE;
r.level = level;
r.attr = attr;
r.ttl = Xt[47:44];
r.address = ZeroExtend{64}(Xt[39:0] :: Zeros{12});
r.d64 = TRUE;
r.d128 = r.ttl == '00xx';
case security of
when SS_NonSecure =>
r.ipaspace = PAS_NonSecure;
when SS_Secure =>
r.ipaspace = if Xt[63] == '1' then PAS_NonSecure else PAS_Secure;
when SS_Realm =>
r.ipaspace = PAS_Realm;
otherwise =>
unreachable;
end;
TLBI(r);
if broadcast == Broadcast_OSH && IsBroadcast_OSHnISH(regime) then
broadcast = Broadcast_OSHnISH;
end;
if broadcast != Broadcast_NSH then BroadcastTLBI(broadcast, r); end;
return;
end;
c复制func AArch64_TLBI_VMALLS12(security : SecurityState, regime : Regime,
vmid : bits(16), broadcast_in : Broadcast,
attr : TLBIMemAttr, Xt : bits(64))
begin
assert PSTATE.EL IN {EL3, EL2};
var broadcast : Broadcast = broadcast_in;
var r : TLBIRecord;
r.op = TLBIOp_VMALLS12;
r.from_aarch64 = TRUE;
r.security = security;
r.regime = regime;
r.level = TLBILevel_Any;
r.vmid = vmid;
r.use_vmid = TRUE;
r.attr = attr;
TLBI(r);
let domains : bits(16) = TLBIDomains(broadcast, Xt[15:0]);
if (broadcast == Broadcast_OSH &&
(IsBroadcast_OSHnISH(regime) || OSHDomainExceedsNIS(domains))) then
broadcast = Broadcast_OSHnISH;
end;
if broadcast != Broadcast_NSH then BroadcastTLBI(broadcast, r, domains); end;
return;
end;
在实际系统设计中,TLB失效操作的性能影响不容忽视。以下是几个关键的优化考虑:
AArch64提供了多种广播域选项:
选择适当的广播域可以显著减少核间同步开销。
相比单条目失效,范围失效(如TLBI_RVAA)可以一次性失效大量TLB条目,减少总指令数。但需要注意:
合理使用ASID和VMID可以:
当怀疑TLB失效不彻底时,可以:
对于TLB失效导致的性能下降:
在虚拟化场景中:
提示:在调试TLB问题时,ARM架构提供的TRBE(Trace Buffer Extension)可以记录TLB失效事件,是强大的调试工具。
根据实际项目经验,总结以下TLB管理的最佳实践:
在ARMv8.4及以上版本中,新增的TLBID功能允许更精细的控制TLB失效的广播范围,可以进一步优化多核系统中的TLB一致性维护开销。