From: Xiao Guangrong on
'walk_addr' is out of mmu_lock's protection, so while we handle 'fetch',
then guest's mapping has modifited by other vcpu's write path, such as
invlpg, pte_write and other fetch path

Fixed by checking all level's mapping

Signed-off-by: Xiao Guangrong <xiaoguangrong(a)cn.fujitsu.com>
---
arch/x86/kvm/paging_tmpl.h | 73 ++++++++++++++++++++++++++------------------
1 files changed, 43 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 19f0077..f58a5c4 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -300,7 +300,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
int *ptwrite, pfn_t pfn)
{
unsigned access = gw->pt_access;
- struct kvm_mmu_page *sp;
+ struct kvm_mmu_page *sp = NULL;
u64 spte, *sptep = NULL;
int direct;
gfn_t table_gfn;
@@ -319,22 +319,23 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
direct_access &= ~ACC_WRITE_MASK;

for_each_shadow_entry(vcpu, addr, iterator) {
+ bool nonpresent = false, last_mapping = false;
+
level = iterator.level;
sptep = iterator.sptep;
- if (iterator.level == hlevel) {
- mmu_set_spte(vcpu, sptep, access,
- gw->pte_access & access,
- user_fault, write_fault,
- dirty, ptwrite, level,
- gw->gfn, pfn, false, true);
- break;
+
+ if (level == hlevel) {
+ last_mapping = true;
+ goto check_set_spte;
}

- if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) {
- struct kvm_mmu_page *child;
+ if (is_large_pte(*sptep)) {
+ drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
+ kvm_flush_remote_tlbs(vcpu->kvm);
+ }

- if (level != gw->level)
- continue;
+ if (is_shadow_present_pte(*sptep) && level == gw->level) {
+ struct kvm_mmu_page *child;

/*
* For the direct sp, if the guest pte's dirty bit
@@ -344,19 +345,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
* a new sp with the correct access.
*/
child = page_header(*sptep & PT64_BASE_ADDR_MASK);
- if (child->role.access == direct_access)
- continue;
-
- mmu_page_remove_parent_pte(child, sptep);
- __set_spte(sptep, shadow_trap_nonpresent_pte);
- kvm_flush_remote_tlbs(vcpu->kvm);
+ if (child->role.access != direct_access) {
+ mmu_page_remove_parent_pte(child, sptep);
+ __set_spte(sptep, shadow_trap_nonpresent_pte);
+ kvm_flush_remote_tlbs(vcpu->kvm);
+ }
}

- if (is_large_pte(*sptep)) {
- drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte);
- kvm_flush_remote_tlbs(vcpu->kvm);
- }
+ if (is_shadow_present_pte(*sptep))
+ goto check_set_spte;

+ nonpresent = true;
if (level <= gw->level) {
direct = 1;
access = direct_access;
@@ -374,22 +373,36 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
}
sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
direct, access, sptep);
- if (!direct) {
+check_set_spte:
+ if (level >= gw->level) {
r = kvm_read_guest_atomic(vcpu->kvm,
- gw->pte_gpa[level - 2],
+ gw->pte_gpa[level - 1],
&curr_pte, sizeof(curr_pte));
- if (r || curr_pte != gw->ptes[level - 2]) {
- kvm_mmu_put_page(sp, sptep);
+ if (r || curr_pte != gw->ptes[level - 1]) {
+ if (nonpresent)
+ kvm_mmu_put_page(sp, sptep);
kvm_release_pfn_clean(pfn);
sptep = NULL;
break;
}
}

- spte = __pa(sp->spt)
- | PT_PRESENT_MASK | PT_ACCESSED_MASK
- | PT_WRITABLE_MASK | PT_USER_MASK;
- *sptep = spte;
+ if (nonpresent) {
+ spte = __pa(sp->spt)
+ | PT_PRESENT_MASK | PT_ACCESSED_MASK
+ | PT_WRITABLE_MASK | PT_USER_MASK;
+ *sptep = spte;
+ continue;
+ }
+
+ if (last_mapping) {
+ mmu_set_spte(vcpu, sptep, access,
+ gw->pte_access & access,
+ user_fault, write_fault,
+ dirty, ptwrite, level,
+ gw->gfn, pfn, false, true);
+ break;
+ }
}

return sptep;
--
1.6.1.2


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/