From 9594a4986192f99c01a7c0a1779b5ac0eff8e208 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 Jul 2012 17:53:25 +0900 Subject: KVM: MMU: Use __gfn_to_rmap() to clean up kvm_handle_hva() We can treat every level uniformly. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 28c8fbcc676..2beb95b57cb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1281,14 +1281,14 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; gfn_t gfn = memslot->base_gfn + gfn_offset; - ret = handler(kvm, &memslot->rmap[gfn_offset], data); + ret = 0; - for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { - struct kvm_lpage_info *linfo; + for (j = PT_PAGE_TABLE_LEVEL; + j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { + unsigned long *rmapp; - linfo = lpage_info_slot(gfn, memslot, - PT_DIRECTORY_LEVEL + j); - ret |= handler(kvm, &linfo->rmap_pde, data); + rmapp = __gfn_to_rmap(gfn, j, memslot); + ret |= handler(kvm, rmapp, data); } trace_kvm_age_page(hva, memslot, ret); retval |= ret; -- cgit v1.2.3-70-g09d2 From d19a748b1c42b133e9263e9023c1d162efa6f4ad Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 Jul 2012 17:54:30 +0900 Subject: KVM: Introduce hva_to_gfn_memslot() for kvm_handle_hva() This restricts hva handling in mmu code and makes it easier to extend kvm_handle_hva() so that it can treat a range of addresses later in this patch series. Signed-off-by: Takuya Yoshikawa Cc: Alexander Graf Cc: Paul Mackerras Signed-off-by: Marcelo Tosatti --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 6 +++--- arch/x86/kvm/mmu.c | 3 +-- include/linux/kvm_host.h | 8 ++++++++ 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d03eb6f7b05..37037553fe6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -772,10 +772,10 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, end = start + (memslot->npages << PAGE_SHIFT); if (hva >= start && hva < end) { - gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; + gfn_t gfn = hva_to_gfn_memslot(hva, memslot); + gfn_t gfn_offset = gfn - memslot->base_gfn; - ret = handler(kvm, &memslot->rmap[gfn_offset], - memslot->base_gfn + gfn_offset); + ret = handler(kvm, &memslot->rmap[gfn_offset], gfn); retval |= ret; } } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2beb95b57cb..170a632d9d3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1278,8 +1278,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, end = start + (memslot->npages << PAGE_SHIFT); if (hva >= start && hva < end) { - gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; - gfn_t gfn = memslot->base_gfn + gfn_offset; + gfn_t gfn = hva_to_gfn_memslot(hva, memslot); ret = 0; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e3c86f8c86c..6f6c18a03c5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -740,6 +740,14 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); } +static inline gfn_t +hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) +{ + gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT; + + return slot->base_gfn + gfn_offset; +} + static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { -- cgit v1.2.3-70-g09d2 From 84504ef38673fa021b3d8f3da2b79cf878b33315 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 Jul 2012 17:55:48 +0900 Subject: KVM: MMU: Make kvm_handle_hva() handle range of addresses When guest's memory is backed by THP pages, MMU notifier needs to call kvm_unmap_hva(), which in turn leads to kvm_handle_hva(), in a loop to invalidate a range of pages which constitute one huge page: for each page for each memslot if page is in memslot unmap using rmap This means although every page in that range is expected to be found in the same memslot, we are forced to check unrelated memslots many times. If the guest has more memslots, the situation will become worse. Furthermore, if the range does not include any pages in the guest's memory, the loop over the pages will just consume extra time. This patch, together with the following patches, solves this problem by introducing kvm_handle_hva_range() which makes the loop look like this: for each memslot for each page in memslot unmap using rmap In this new processing, the actual work is converted to a loop over rmap which is much more cache friendly than before. Signed-off-by: Takuya Yoshikawa Cc: Alexander Graf Cc: Paul Mackerras Signed-off-by: Marcelo Tosatti --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 36 ++++++++++++++++++++++++------- arch/x86/kvm/mmu.c | 42 ++++++++++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 37037553fe6..1a470bc2876 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -756,9 +756,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, goto out_put; } -static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, - int (*handler)(struct kvm *kvm, unsigned long *rmapp, - unsigned long gfn)) +static int kvm_handle_hva_range(struct kvm *kvm, + unsigned long start, + unsigned long end, + int (*handler)(struct kvm *kvm, + unsigned long *rmapp, + unsigned long gfn)) { int ret; int retval = 0; @@ -767,12 +770,22 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { - unsigned long start = memslot->userspace_addr; - unsigned long end; + unsigned long hva_start, hva_end; + gfn_t gfn, gfn_end; + + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + /* + * {gfn(page) | page intersects with [hva_start, hva_end)} = + * {gfn, gfn+1, ..., gfn_end-1}. + */ + gfn = hva_to_gfn_memslot(hva_start, memslot); + gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); - end = start + (memslot->npages << PAGE_SHIFT); - if (hva >= start && hva < end) { - gfn_t gfn = hva_to_gfn_memslot(hva, memslot); + for (; gfn < gfn_end; ++gfn) { gfn_t gfn_offset = gfn - memslot->base_gfn; ret = handler(kvm, &memslot->rmap[gfn_offset], gfn); @@ -783,6 +796,13 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, return retval; } +static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, + int (*handler)(struct kvm *kvm, unsigned long *rmapp, + unsigned long gfn)) +{ + return kvm_handle_hva_range(kvm, hva, hva + 1, handler); +} + static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, unsigned long gfn) { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 170a632d9d3..7235b0c9587 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1259,10 +1259,13 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, return 0; } -static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, - unsigned long data, - int (*handler)(struct kvm *kvm, unsigned long *rmapp, - unsigned long data)) +static int kvm_handle_hva_range(struct kvm *kvm, + unsigned long start, + unsigned long end, + unsigned long data, + int (*handler)(struct kvm *kvm, + unsigned long *rmapp, + unsigned long data)) { int j; int ret; @@ -1273,13 +1276,22 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { - unsigned long start = memslot->userspace_addr; - unsigned long end; + unsigned long hva_start, hva_end; + gfn_t gfn, gfn_end; - end = start + (memslot->npages << PAGE_SHIFT); - if (hva >= start && hva < end) { - gfn_t gfn = hva_to_gfn_memslot(hva, memslot); + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + /* + * {gfn(page) | page intersects with [hva_start, hva_end)} = + * {gfn, gfn+1, ..., gfn_end-1}. + */ + gfn = hva_to_gfn_memslot(hva_start, memslot); + gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); + for (; gfn < gfn_end; ++gfn) { ret = 0; for (j = PT_PAGE_TABLE_LEVEL; @@ -1289,7 +1301,9 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, rmapp = __gfn_to_rmap(gfn, j, memslot); ret |= handler(kvm, rmapp, data); } - trace_kvm_age_page(hva, memslot, ret); + trace_kvm_age_page(memslot->userspace_addr + + (gfn - memslot->base_gfn) * PAGE_SIZE, + memslot, ret); retval |= ret; } } @@ -1297,6 +1311,14 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, return retval; } +static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, + unsigned long data, + int (*handler)(struct kvm *kvm, unsigned long *rmapp, + unsigned long data)) +{ + return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); +} + int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) { return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); -- cgit v1.2.3-70-g09d2 From b3ae2096974b12c3af2ad1a4e7716b084949867f Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 Jul 2012 17:56:33 +0900 Subject: KVM: Introduce kvm_unmap_hva_range() for kvm_mmu_notifier_invalidate_range_start() When we tested KVM under memory pressure, with THP enabled on the host, we noticed that MMU notifier took a long time to invalidate huge pages. Since the invalidation was done with mmu_lock held, it not only wasted the CPU but also made the host harder to respond. This patch mitigates this by using kvm_handle_hva_range(). Signed-off-by: Takuya Yoshikawa Cc: Alexander Graf Cc: Paul Mackerras Signed-off-by: Marcelo Tosatti --- arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 7 +++++++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 5 +++++ virt/kvm/kvm_main.c | 3 +-- 5 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 50ea12fd7bf..572ad014126 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -52,6 +52,8 @@ struct kvm; extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +extern int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end); extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 1a470bc2876..3c635c0616b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -870,6 +870,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) return 0; } +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + if (kvm->arch.using_mmu_notifiers) + kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); + return 0; +} + static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, unsigned long gfn) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a3e9409e90b..d4aab865606 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -944,6 +944,7 @@ extern bool kvm_rebooting; #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_age_hva(struct kvm *kvm, unsigned long hva); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7235b0c9587..d2855f895fd 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1324,6 +1324,11 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); } +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); +} + void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) { kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b3ce91c623e..e2b1a159e5d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -332,8 +332,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, * count is also read inside the mmu_lock critical section. */ kvm->mmu_notifier_count++; - for (; start < end; start += PAGE_SIZE) - need_tlb_flush |= kvm_unmap_hva(kvm, start); + need_tlb_flush = kvm_unmap_hva_range(kvm, start, end); need_tlb_flush |= kvm->tlbs_dirty; /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush) -- cgit v1.2.3-70-g09d2 From 77d11309b3a10e1ce112058ec2c9b7b979bcf311 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 Jul 2012 17:57:17 +0900 Subject: KVM: Separate rmap_pde from kvm_lpage_info->write_count This makes it possible to loop over rmap_pde arrays in the same way as we do over rmap so that we can optimize kvm_handle_hva_range() easily in the following patch. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 6 +++--- arch/x86/kvm/x86.c | 11 +++++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d4aab865606..4f98da9243f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -500,11 +500,11 @@ struct kvm_vcpu_arch { }; struct kvm_lpage_info { - unsigned long rmap_pde; int write_count; }; struct kvm_arch_memory_slot { + unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1]; struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; }; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d2855f895fd..3b3f5ae5da6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -960,13 +960,13 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, struct kvm_memory_slot *slot) { - struct kvm_lpage_info *linfo; + unsigned long idx; if (likely(level == PT_PAGE_TABLE_LEVEL)) return &slot->rmap[gfn - slot->base_gfn]; - linfo = lpage_info_slot(gfn, slot, level); - return &linfo->rmap_pde; + idx = gfn_to_index(gfn, slot->base_gfn, level); + return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx]; } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59b59508ff0..829b4e97255 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6314,6 +6314,10 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, int i; for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { + if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) { + kvm_kvfree(free->arch.rmap_pde[i]); + free->arch.rmap_pde[i] = NULL; + } if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { kvm_kvfree(free->arch.lpage_info[i]); free->arch.lpage_info[i] = NULL; @@ -6333,6 +6337,11 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) lpages = gfn_to_index(slot->base_gfn + npages - 1, slot->base_gfn, level) + 1; + slot->arch.rmap_pde[i] = + kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i])); + if (!slot->arch.rmap_pde[i]) + goto out_free; + slot->arch.lpage_info[i] = kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); if (!slot->arch.lpage_info[i]) @@ -6361,7 +6370,9 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) out_free: for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { + kvm_kvfree(slot->arch.rmap_pde[i]); kvm_kvfree(slot->arch.lpage_info[i]); + slot->arch.rmap_pde[i] = NULL; slot->arch.lpage_info[i] = NULL; } return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 048212d0bc0b1769a4bbecd7ace8c8d237577d1b Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 Jul 2012 17:57:59 +0900 Subject: KVM: MMU: Add memslot parameter to hva handlers This is needed to push trace_kvm_age_page() into kvm_age_rmapp() in the following patch. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3b3f5ae5da6..dfd7a9a3115 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1200,7 +1200,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) } static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, - unsigned long data) + struct kvm_memory_slot *slot, unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1218,7 +1218,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, } static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, - unsigned long data) + struct kvm_memory_slot *slot, unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1265,6 +1265,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, unsigned long data, int (*handler)(struct kvm *kvm, unsigned long *rmapp, + struct kvm_memory_slot *slot, unsigned long data)) { int j; @@ -1299,7 +1300,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, unsigned long *rmapp; rmapp = __gfn_to_rmap(gfn, j, memslot); - ret |= handler(kvm, rmapp, data); + ret |= handler(kvm, rmapp, memslot, data); } trace_kvm_age_page(memslot->userspace_addr + (gfn - memslot->base_gfn) * PAGE_SIZE, @@ -1314,6 +1315,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, unsigned long data, int (*handler)(struct kvm *kvm, unsigned long *rmapp, + struct kvm_memory_slot *slot, unsigned long data)) { return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); @@ -1335,7 +1337,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) } static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - unsigned long data) + struct kvm_memory_slot *slot, unsigned long data) { u64 *sptep; struct rmap_iterator uninitialized_var(iter); @@ -1350,7 +1352,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, * out actively used pages or breaking up actively used hugepages. */ if (!shadow_accessed_mask) - return kvm_unmap_rmapp(kvm, rmapp, data); + return kvm_unmap_rmapp(kvm, rmapp, slot, data); for (sptep = rmap_get_first(*rmapp, &iter); sptep; sptep = rmap_get_next(&iter)) { @@ -1367,7 +1369,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - unsigned long data) + struct kvm_memory_slot *slot, unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1405,7 +1407,7 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); - kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); + kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); kvm_flush_remote_tlbs(vcpu->kvm); } -- cgit v1.2.3-70-g09d2 From f395302e09ef783b8f82d1160510a95aa8c66dbc Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 Jul 2012 17:58:48 +0900 Subject: KVM: MMU: Push trace_kvm_age_page() into kvm_age_rmapp() This restricts the tracing to page aging and makes it possible to optimize kvm_handle_hva_range() further in the following patch. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index dfd7a9a3115..58adec38448 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1269,8 +1269,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, unsigned long data)) { int j; - int ret; - int retval = 0; + int ret = 0; struct kvm_memslots *slots; struct kvm_memory_slot *memslot; @@ -1293,8 +1292,6 @@ static int kvm_handle_hva_range(struct kvm *kvm, gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); for (; gfn < gfn_end; ++gfn) { - ret = 0; - for (j = PT_PAGE_TABLE_LEVEL; j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { unsigned long *rmapp; @@ -1302,14 +1299,10 @@ static int kvm_handle_hva_range(struct kvm *kvm, rmapp = __gfn_to_rmap(gfn, j, memslot); ret |= handler(kvm, rmapp, memslot, data); } - trace_kvm_age_page(memslot->userspace_addr + - (gfn - memslot->base_gfn) * PAGE_SIZE, - memslot, ret); - retval |= ret; } } - return retval; + return ret; } static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, @@ -1351,8 +1344,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, * This has some overhead, but not as much as the cost of swapping * out actively used pages or breaking up actively used hugepages. */ - if (!shadow_accessed_mask) - return kvm_unmap_rmapp(kvm, rmapp, slot, data); + if (!shadow_accessed_mask) { + young = kvm_unmap_rmapp(kvm, rmapp, slot, data); + goto out; + } for (sptep = rmap_get_first(*rmapp, &iter); sptep; sptep = rmap_get_next(&iter)) { @@ -1364,7 +1359,9 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, (unsigned long *)sptep); } } - +out: + /* @data has hva passed to kvm_age_hva(). */ + trace_kvm_age_page(data, slot, young); return young; } @@ -1413,7 +1410,7 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) int kvm_age_hva(struct kvm *kvm, unsigned long hva) { - return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); + return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) -- cgit v1.2.3-70-g09d2 From bcd3ef58283a471d6b65855b83f78bd39eb55391 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 Jul 2012 17:59:33 +0900 Subject: KVM: MMU: Avoid handling same rmap_pde in kvm_handle_hva_range() When we invalidate a THP page, we call the handler with the same rmap_pde argument 512 times in the following loop: for each guest page in the range for each level unmap using rmap This patch avoids these extra handler calls by changing the loop order like this: for each level for each rmap in the range unmap using rmap With the preceding patches in the patch series, this made THP page invalidation more than 5 times faster on our x86 host: the host became more responsive during swapping the guest's memory as a result. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 58adec38448..a5d6ef785b7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1277,7 +1277,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, kvm_for_each_memslot(memslot, slots) { unsigned long hva_start, hva_end; - gfn_t gfn, gfn_end; + gfn_t gfn_start, gfn_end; hva_start = max(start, memslot->userspace_addr); hva_end = min(end, memslot->userspace_addr + @@ -1286,19 +1286,27 @@ static int kvm_handle_hva_range(struct kvm *kvm, continue; /* * {gfn(page) | page intersects with [hva_start, hva_end)} = - * {gfn, gfn+1, ..., gfn_end-1}. + * {gfn_start, gfn_start+1, ..., gfn_end-1}. */ - gfn = hva_to_gfn_memslot(hva_start, memslot); + gfn_start = hva_to_gfn_memslot(hva_start, memslot); gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); - for (; gfn < gfn_end; ++gfn) { - for (j = PT_PAGE_TABLE_LEVEL; - j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { - unsigned long *rmapp; + for (j = PT_PAGE_TABLE_LEVEL; + j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { + unsigned long idx, idx_end; + unsigned long *rmapp; - rmapp = __gfn_to_rmap(gfn, j, memslot); - ret |= handler(kvm, rmapp, memslot, data); - } + /* + * {idx(page_j) | page_j intersects with + * [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}. + */ + idx = gfn_to_index(gfn_start, memslot->base_gfn, j); + idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j); + + rmapp = __gfn_to_rmap(gfn_start, j, memslot); + + for (; idx <= idx_end; ++idx) + ret |= handler(kvm, rmapp++, memslot, data); } } -- cgit v1.2.3-70-g09d2 From 2f5f6ad9390c1ebbf738d130dbfe80b60eaa167e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 8 Aug 2011 16:57:47 -0400 Subject: ftrace: Pass ftrace_ops as third parameter to function trace callback Currently the function trace callback receives only the ip and parent_ip of the function that it traced. It would be more powerful to also return the ops that registered the function as well. This allows the same function to act differently depending on what ftrace_ops registered it. Link: http://lkml.kernel.org/r/20120612225424.267254552@goodmis.org Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 4 ++ arch/x86/kernel/entry_64.S | 1 + include/linux/ftrace.h | 16 +++++- kernel/trace/ftrace.c | 101 ++++++++++++++++++++++++++------------ kernel/trace/trace_event_perf.c | 3 +- kernel/trace/trace_events.c | 3 +- kernel/trace/trace_functions.c | 9 ++-- kernel/trace/trace_irqsoff.c | 3 +- kernel/trace/trace_sched_wakeup.c | 2 +- kernel/trace/trace_selftest.c | 15 ++++-- kernel/trace/trace_stack.c | 2 +- 11 files changed, 113 insertions(+), 46 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b0767bc0874..783b107eacb 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -32,6 +32,10 @@ #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ +#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_X86_64) +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif + #ifndef __ASSEMBLY__ extern void mcount(void); extern atomic_t modifying_ftrace_code; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7d65133b51b..2b4f94c5dc6 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -79,6 +79,7 @@ ENTRY(ftrace_caller) MCOUNT_SAVE_FRAME + leaq function_trace_op, %rdx movq 0x38(%rsp), %rdi movq 8(%rbp), %rsi subq $MCOUNT_INSN_SIZE, %rdi diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 55e6d63d46d..2d596411988 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -18,6 +18,15 @@ #include +/* + * If the arch supports passing the variable contents of + * function_trace_op as the third parameter back from the + * mcount call, then the arch should define this as 1. + */ +#ifndef ARCH_SUPPORTS_FTRACE_OPS +#define ARCH_SUPPORTS_FTRACE_OPS 0 +#endif + struct module; struct ftrace_hash; @@ -29,7 +38,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); +struct ftrace_ops; + +typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op); /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are @@ -163,7 +175,7 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) return *this_cpu_ptr(ops->disabled); } -extern void ftrace_stub(unsigned long a0, unsigned long a1); +extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op); #else /* !CONFIG_FUNCTION_TRACER */ /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b4f20fba09f..4f2ab9352a6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -64,12 +64,19 @@ #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) +static struct ftrace_ops ftrace_list_end __read_mostly = { + .func = ftrace_stub, +}; + /* ftrace_enabled is a method to turn ftrace on or off */ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; /* Quick disabling of function tracer. */ -int function_trace_stop; +int function_trace_stop __read_mostly; + +/* Current function tracing op */ +struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; /* List for set_ftrace_pid's pids. */ LIST_HEAD(ftrace_pids); @@ -86,10 +93,6 @@ static int ftrace_disabled __read_mostly; static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops ftrace_list_end __read_mostly = { - .func = ftrace_stub, -}; - static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; @@ -100,8 +103,14 @@ ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; static struct ftrace_ops control_ops; -static void -ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); +#if ARCH_SUPPORTS_FTRACE_OPS +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op); +#else +/* See comment below, where ftrace_ops_list_func is defined */ +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); +#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) +#endif /* * Traverse the ftrace_global_list, invoking all entries. The reason that we @@ -112,29 +121,29 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); * * Silly Alpha and silly pointer-speculation compiler optimizations! */ -static void ftrace_global_list_func(unsigned long ip, - unsigned long parent_ip) +static void +ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { - struct ftrace_ops *op; - if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) return; trace_recursion_set(TRACE_GLOBAL_BIT); op = rcu_dereference_raw(ftrace_global_list); /*see above*/ while (op != &ftrace_list_end) { - op->func(ip, parent_ip); + op->func(ip, parent_ip, op); op = rcu_dereference_raw(op->next); /*see above*/ }; trace_recursion_clear(TRACE_GLOBAL_BIT); } -static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) +static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { if (!test_tsk_trace_trace(current)) return; - ftrace_pid_function(ip, parent_ip); + ftrace_pid_function(ip, parent_ip, op); } static void set_ftrace_pid_function(ftrace_func_t func) @@ -163,12 +172,13 @@ void clear_ftrace_function(void) * For those archs that do not test ftrace_trace_stop in their * mcount call site, we need to do it from C. */ -static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip) +static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { if (function_trace_stop) return; - __ftrace_trace_function(ip, parent_ip); + __ftrace_trace_function(ip, parent_ip, op); } #endif @@ -230,15 +240,24 @@ static void update_ftrace_function(void) /* * If we are at the end of the list and this ops is - * not dynamic, then have the mcount trampoline call - * the function directly + * not dynamic and the arch supports passing ops, then have the + * mcount trampoline call the function directly. */ if (ftrace_ops_list == &ftrace_list_end || (ftrace_ops_list->next == &ftrace_list_end && - !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC))) + !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) && + ARCH_SUPPORTS_FTRACE_OPS)) { + /* Set the ftrace_ops that the arch callback uses */ + if (ftrace_ops_list == &global_ops) + function_trace_op = ftrace_global_list; + else + function_trace_op = ftrace_ops_list; func = ftrace_ops_list->func; - else + } else { + /* Just use the default ftrace_ops */ + function_trace_op = &ftrace_list_end; func = ftrace_ops_list_func; + } #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST ftrace_trace_function = func; @@ -773,7 +792,8 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) } static void -function_profile_call(unsigned long ip, unsigned long parent_ip) +function_profile_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops) { struct ftrace_profile_stat *stat; struct ftrace_profile *rec; @@ -803,7 +823,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) #ifdef CONFIG_FUNCTION_GRAPH_TRACER static int profile_graph_entry(struct ftrace_graph_ent *trace) { - function_profile_call(trace->func, 0); + function_profile_call(trace->func, 0, NULL); return 1; } @@ -2790,8 +2810,8 @@ static int __init ftrace_mod_cmd_init(void) } device_initcall(ftrace_mod_cmd_init); -static void -function_trace_probe_call(unsigned long ip, unsigned long parent_ip) +static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { struct ftrace_func_probe *entry; struct hlist_head *hhd; @@ -3942,10 +3962,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) #endif /* CONFIG_DYNAMIC_FTRACE */ static void -ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) +ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { - struct ftrace_ops *op; - if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) return; @@ -3959,7 +3978,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) while (op != &ftrace_list_end) { if (!ftrace_function_local_disabled(op) && ftrace_ops_test(op, ip)) - op->func(ip, parent_ip); + op->func(ip, parent_ip, op); op = rcu_dereference_raw(op->next); }; @@ -3971,8 +3990,9 @@ static struct ftrace_ops control_ops = { .func = ftrace_ops_control_func, }; -static void -ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) +static inline void +__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ignored) { struct ftrace_ops *op; @@ -3988,13 +4008,32 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) op = rcu_dereference_raw(ftrace_ops_list); while (op != &ftrace_list_end) { if (ftrace_ops_test(op, ip)) - op->func(ip, parent_ip); + op->func(ip, parent_ip, op); op = rcu_dereference_raw(op->next); }; preempt_enable_notrace(); trace_recursion_clear(TRACE_INTERNAL_BIT); } +/* + * Some archs only support passing ip and parent_ip. Even though + * the list function ignores the op parameter, we do not want any + * C side effects, where a function is called without the caller + * sending a third parameter. + */ +#if ARCH_SUPPORTS_FTRACE_OPS +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) +{ + __ftrace_ops_list_func(ip, parent_ip, NULL); +} +#else +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) +{ + __ftrace_ops_list_func(ip, parent_ip, NULL); +} +#endif + static void clear_ftrace_swapper(void) { struct task_struct *p; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index fee3752ae8f..a872a9a298a 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -258,7 +258,8 @@ EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); #ifdef CONFIG_FUNCTION_TRACER static void -perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) +perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops) { struct ftrace_entry *entry; struct hlist_head *head; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 29111da1d10..88daa5177bf 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1681,7 +1681,8 @@ static __init void event_trace_self_tests(void) static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); static void -function_test_events_call(unsigned long ip, unsigned long parent_ip) +function_test_events_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { struct ring_buffer_event *event; struct ring_buffer *buffer; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c7b0c6a7db0..fceb7a9aa06 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -48,7 +48,8 @@ static void function_trace_start(struct trace_array *tr) } static void -function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) +function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { struct trace_array *tr = func_trace; struct trace_array_cpu *data; @@ -75,7 +76,8 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) } static void -function_trace_call(unsigned long ip, unsigned long parent_ip) +function_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { struct trace_array *tr = func_trace; struct trace_array_cpu *data; @@ -106,7 +108,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } static void -function_stack_trace_call(unsigned long ip, unsigned long parent_ip) +function_stack_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { struct trace_array *tr = func_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 99d20e92036..2862c77f95d 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -136,7 +136,8 @@ static int func_prolog_dec(struct trace_array *tr, * irqsoff uses its own tracer function to keep the overhead down: */ static void -irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) +irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index ff791ea48b5..0caf4f5da56 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -108,7 +108,7 @@ out_enable: * wakeup uses its own tracer function to keep the overhead down: */ static void -wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) +wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 288541f977f..9ae40c823af 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -103,35 +103,40 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret) static int trace_selftest_test_probe1_cnt; static void trace_selftest_test_probe1_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op) { trace_selftest_test_probe1_cnt++; } static int trace_selftest_test_probe2_cnt; static void trace_selftest_test_probe2_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op) { trace_selftest_test_probe2_cnt++; } static int trace_selftest_test_probe3_cnt; static void trace_selftest_test_probe3_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op) { trace_selftest_test_probe3_cnt++; } static int trace_selftest_test_global_cnt; static void trace_selftest_test_global_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op) { trace_selftest_test_global_cnt++; } static int trace_selftest_test_dyn_cnt; static void trace_selftest_test_dyn_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op) { trace_selftest_test_dyn_cnt++; } diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index d4545f49242..e20006d5fb6 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -111,7 +111,7 @@ static inline void check_stack(void) } static void -stack_trace_call(unsigned long ip, unsigned long parent_ip) +stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op) { int cpu; -- cgit v1.2.3-70-g09d2 From 28fb5dfa783c25dbeeb25a72663f8066a3a517f5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 10 Aug 2011 22:00:55 -0400 Subject: ftrace/x86_32: Push ftrace_ops in as 3rd parameter to function tracer Add support of passing the current ftrace_ops into the 3rd parameter of the callback to the function tracer. Link: http://lkml.kernel.org/r/20120612225424.942411318@goodmis.org Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 2 +- arch/x86/kernel/entry_32.S | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 783b107eacb..b3bb1f3f277 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -32,7 +32,7 @@ #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ -#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_X86_64) +#ifdef CONFIG_DYNAMIC_FTRACE #define ARCH_SUPPORTS_FTRACE_OPS 1 #endif diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 623f2883747..e3e17a0b7ff 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1111,6 +1111,7 @@ ENTRY(ftrace_caller) pushl %edx movl 0xc(%esp), %eax movl 0x4(%ebp), %edx + leal function_trace_op, %ecx subl $MCOUNT_INSN_SIZE, %eax .globl ftrace_call -- cgit v1.2.3-70-g09d2 From 08f6fba503111e0336f2b4d6915a4a18f9b60e51 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 30 Apr 2012 16:20:23 -0400 Subject: ftrace/x86: Add separate function to save regs Add a way to have different functions calling different trampolines. If a ftrace_ops wants regs saved on the return, then have only the functions with ops registered to save regs. Functions registered by other ops would not be affected, unless the functions overlap. If one ftrace_ops registered functions A, B and C and another ops registered fucntions to save regs on A, and D, then only functions A and D would be saving regs. Function B and C would work as normal. Although A is registered by both ops: normal and saves regs; this is fine as saving the regs is needed to satisfy one of the ops that calls it but the regs are ignored by the other ops function. x86_64 implements the full regs saving, and i386 just passes a NULL for regs to satisfy the ftrace_ops passing. Where an arch must supply both regs and ftrace_ops parameters, even if regs is just NULL. It is OK for an arch to pass NULL regs. All function trace users that require regs passing must add the flag FTRACE_OPS_FL_SAVE_REGS when registering the ftrace_ops. If the arch does not support saving regs then the ftrace_ops will fail to register. The flag FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED may be set that will prevent the ftrace_ops from failing to register. In this case, the handler may either check if regs is not NULL or check if ARCH_SUPPORTS_FTRACE_SAVE_REGS. If the arch supports passing regs it will set this macro and pass regs for ops that request them. All other archs will just pass NULL. Link: Link: http://lkml.kernel.org/r/20120711195745.107705970@goodmis.org Cc: Alexander van Heukelum Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 47 +++++++++++-------- arch/x86/kernel/entry_32.S | 4 +- arch/x86/kernel/entry_64.S | 94 +++++++++++++++++++++++++++++++++---- arch/x86/kernel/ftrace.c | 77 ++++++++++++++++++++++++++++-- include/linux/ftrace.h | 107 +++++++++++++++++++++++++++++++++++++++--- kernel/trace/ftrace.c | 91 +++++++++++++++++++++++++++++++---- 6 files changed, 373 insertions(+), 47 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b3bb1f3f277..a8475014c4a 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -3,27 +3,33 @@ #ifdef __ASSEMBLY__ - .macro MCOUNT_SAVE_FRAME - /* taken from glibc */ - subq $0x38, %rsp - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) + /* skip is set if the stack was already partially adjusted */ + .macro MCOUNT_SAVE_FRAME skip=0 + /* + * We add enough stack to save all regs. + */ + subq $(SS+8-\skip), %rsp + movq %rax, RAX(%rsp) + movq %rcx, RCX(%rsp) + movq %rdx, RDX(%rsp) + movq %rsi, RSI(%rsp) + movq %rdi, RDI(%rsp) + movq %r8, R8(%rsp) + movq %r9, R9(%rsp) + /* Move RIP to its proper location */ + movq SS+8(%rsp), %rdx + movq %rdx, RIP(%rsp) .endm - .macro MCOUNT_RESTORE_FRAME - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $0x38, %rsp + .macro MCOUNT_RESTORE_FRAME skip=0 + movq R9(%rsp), %r9 + movq R8(%rsp), %r8 + movq RDI(%rsp), %rdi + movq RSI(%rsp), %rsi + movq RDX(%rsp), %rdx + movq RCX(%rsp), %rcx + movq RAX(%rsp), %rax + addq $(SS+8-\skip), %rsp .endm #endif @@ -34,6 +40,9 @@ #ifdef CONFIG_DYNAMIC_FTRACE #define ARCH_SUPPORTS_FTRACE_OPS 1 +#ifdef CONFIG_X86_64 +#define ARCH_SUPPORTS_FTRACE_SAVE_REGS +#endif #endif #ifndef __ASSEMBLY__ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index e3e17a0b7ff..5da11d1b85a 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1109,7 +1109,8 @@ ENTRY(ftrace_caller) pushl %eax pushl %ecx pushl %edx - movl 0xc(%esp), %eax + pushl $0 /* Pass NULL as regs pointer */ + movl 4*4(%esp), %eax movl 0x4(%ebp), %edx leal function_trace_op, %ecx subl $MCOUNT_INSN_SIZE, %eax @@ -1118,6 +1119,7 @@ ENTRY(ftrace_caller) ftrace_call: call ftrace_stub + addl $4,%esp /* skip NULL pointer */ popl %edx popl %ecx popl %eax diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 2b4f94c5dc6..52bda2e8f7a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -73,21 +73,34 @@ ENTRY(mcount) retq END(mcount) +/* skip is set if stack has been adjusted */ +.macro ftrace_caller_setup skip=0 + MCOUNT_SAVE_FRAME \skip + + /* Load the ftrace_ops into the 3rd parameter */ + leaq function_trace_op, %rdx + + /* Load ip into the first parameter */ + movq RIP(%rsp), %rdi + subq $MCOUNT_INSN_SIZE, %rdi + /* Load the parent_ip into the second parameter */ + movq 8(%rbp), %rsi +.endm + ENTRY(ftrace_caller) + /* Check if tracing was disabled (quick check) */ cmpl $0, function_trace_stop jne ftrace_stub - MCOUNT_SAVE_FRAME - - leaq function_trace_op, %rdx - movq 0x38(%rsp), %rdi - movq 8(%rbp), %rsi - subq $MCOUNT_INSN_SIZE, %rdi + ftrace_caller_setup + /* regs go into 4th parameter (but make it NULL) */ + movq $0, %rcx GLOBAL(ftrace_call) call ftrace_stub MCOUNT_RESTORE_FRAME +ftrace_return: #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -98,6 +111,71 @@ GLOBAL(ftrace_stub) retq END(ftrace_caller) +ENTRY(ftrace_regs_caller) + /* Save the current flags before compare (in SS location)*/ + pushfq + + /* Check if tracing was disabled (quick check) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* skip=8 to skip flags saved in SS */ + ftrace_caller_setup 8 + + /* Save the rest of pt_regs */ + movq %r15, R15(%rsp) + movq %r14, R14(%rsp) + movq %r13, R13(%rsp) + movq %r12, R12(%rsp) + movq %r11, R11(%rsp) + movq %r10, R10(%rsp) + movq %rbp, RBP(%rsp) + movq %rbx, RBX(%rsp) + /* Copy saved flags */ + movq SS(%rsp), %rcx + movq %rcx, EFLAGS(%rsp) + /* Kernel segments */ + movq $__KERNEL_DS, %rcx + movq %rcx, SS(%rsp) + movq $__KERNEL_CS, %rcx + movq %rcx, CS(%rsp) + /* Stack - skipping return address */ + leaq SS+16(%rsp), %rcx + movq %rcx, RSP(%rsp) + + /* regs go into 4th parameter */ + leaq (%rsp), %rcx + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + /* Copy flags back to SS, to restore them */ + movq EFLAGS(%rsp), %rax + movq %rax, SS(%rsp) + + /* restore the rest of pt_regs */ + movq R15(%rsp), %r15 + movq R14(%rsp), %r14 + movq R13(%rsp), %r13 + movq R12(%rsp), %r12 + movq R10(%rsp), %r10 + movq RBP(%rsp), %rbp + movq RBX(%rsp), %rbx + + /* skip=8 to skip flags saved in SS */ + MCOUNT_RESTORE_FRAME 8 + + /* Restore flags */ + popfq + + jmp ftrace_return +ftrace_restore_flags: + popfq + jmp ftrace_stub + +END(ftrace_regs_caller) + + #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) cmpl $0, function_trace_stop @@ -120,7 +198,7 @@ GLOBAL(ftrace_stub) trace: MCOUNT_SAVE_FRAME - movq 0x38(%rsp), %rdi + movq RIP(%rsp), %rdi movq 8(%rbp), %rsi subq $MCOUNT_INSN_SIZE, %rdi @@ -141,7 +219,7 @@ ENTRY(ftrace_graph_caller) MCOUNT_SAVE_FRAME leaq 8(%rbp), %rdi - movq 0x38(%rsp), %rsi + movq RIP(%rsp), %rsi movq (%rbp), %rdx subq $MCOUNT_INSN_SIZE, %rsi diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c3a7cb4bf6e..b90eb1a1307 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -206,6 +206,23 @@ static int ftrace_modify_code(unsigned long ip, unsigned const char *old_code, unsigned const char *new_code); +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +/* + * Should never be called: + * As it is only called by __ftrace_replace_code() which is called by + * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() + * which is called to turn mcount into nops or nops into function calls + * but not to convert a function from not using regs to one that uses + * regs, which ftrace_modify_call() is for. + */ +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + WARN_ON(1); + return -EINVAL; +} +#endif + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -220,6 +237,16 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret = ftrace_modify_code(ip, old, new); +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS + /* Also update the regs callback function */ + if (!ret) { + ip = (unsigned long)(&ftrace_regs_call); + memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + } +#endif + atomic_dec(&modifying_ftrace_code); return ret; @@ -299,6 +326,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec) return add_break(rec->ip, old); } +/* + * If the record has the FTRACE_FL_REGS set, that means that it + * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS + * is not not set, then it wants to convert to the normal callback. + */ +static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + +/* + * The FTRACE_FL_REGS_EN is set when the record already points to + * a function that saves all the regs. Basically the '_EN' version + * represents the current state of the function. + */ +static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS_EN) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + static int add_breakpoints(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; @@ -306,7 +359,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: @@ -316,6 +369,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) /* converting nop to call */ return add_brk_on_nop(rec); + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: + ftrace_addr = get_ftrace_old_addr(rec); + /* fall through */ case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return add_brk_on_call(rec, ftrace_addr); @@ -360,13 +417,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec) * If not, don't touch the breakpoint, we make just create * a disaster. */ - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); + nop = ftrace_call_replace(ip, ftrace_addr); + + if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) + goto update; + + /* Check both ftrace_addr and ftrace_old_addr */ + ftrace_addr = get_ftrace_old_addr(rec); nop = ftrace_call_replace(ip, ftrace_addr); if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } + update: return probe_kernel_write((void *)ip, &nop[0], 1); } @@ -405,12 +470,14 @@ static int add_update(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return add_update_call(rec, ftrace_addr); @@ -455,12 +522,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable) ret = ftrace_update_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return finish_update_call(rec, ftrace_addr); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e4202881fb0..ab39990cc43 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -71,12 +71,28 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * could be controled by following calls: * ftrace_function_local_enable * ftrace_function_local_disable + * SAVE_REGS - The ftrace_ops wants regs saved at each function called + * and passed to the callback. If this flag is set, but the + * architecture does not support passing regs + * (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the + * ftrace_ops will fail to register, unless the next flag + * is set. + * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the + * handler can handle an arch that does not save regs + * (the handler tests if regs == NULL), then it can set + * this flag instead. It will not fail registering the ftrace_ops + * but, the regs field will be NULL if the arch does not support + * passing regs to the handler. + * Note, if this flag is set, the SAVE_REGS flag will automatically + * get set upon registering the ftrace_ops, if the arch supports it. */ enum { - FTRACE_OPS_FL_ENABLED = 1 << 0, - FTRACE_OPS_FL_GLOBAL = 1 << 1, - FTRACE_OPS_FL_DYNAMIC = 1 << 2, - FTRACE_OPS_FL_CONTROL = 1 << 3, + FTRACE_OPS_FL_ENABLED = 1 << 0, + FTRACE_OPS_FL_GLOBAL = 1 << 1, + FTRACE_OPS_FL_DYNAMIC = 1 << 2, + FTRACE_OPS_FL_CONTROL = 1 << 3, + FTRACE_OPS_FL_SAVE_REGS = 1 << 4, + FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5, }; struct ftrace_ops { @@ -254,12 +270,31 @@ extern void unregister_ftrace_function_probe_all(char *glob); extern int ftrace_text_reserved(void *start, void *end); +/* + * The dyn_ftrace record's flags field is split into two parts. + * the first part which is '0-FTRACE_REF_MAX' is a counter of + * the number of callbacks that have registered the function that + * the dyn_ftrace descriptor represents. + * + * The second part is a mask: + * ENABLED - the function is being traced + * REGS - the record wants the function to save regs + * REGS_EN - the function is set up to save regs. + * + * When a new ftrace_ops is registered and wants a function to save + * pt_regs, the rec->flag REGS is set. When the function has been + * set up to save regs, the REG_EN flag is set. Once a function + * starts saving regs it will do so until all ftrace_ops are removed + * from tracing that function. + */ enum { - FTRACE_FL_ENABLED = (1 << 30), + FTRACE_FL_ENABLED = (1UL << 29), + FTRACE_FL_REGS = (1UL << 30), + FTRACE_FL_REGS_EN = (1UL << 31) }; -#define FTRACE_FL_MASK (0x3UL << 30) -#define FTRACE_REF_MAX ((1 << 30) - 1) +#define FTRACE_FL_MASK (0x7UL << 29) +#define FTRACE_REF_MAX ((1UL << 29) - 1) struct dyn_ftrace { union { @@ -290,9 +325,23 @@ enum { FTRACE_STOP_FUNC_RET = (1 << 4), }; +/* + * The FTRACE_UPDATE_* enum is used to pass information back + * from the ftrace_update_record() and ftrace_test_record() + * functions. These are called by the code update routines + * to find out what is to be done for a given function. + * + * IGNORE - The function is already what we want it to be + * MAKE_CALL - Start tracing the function + * MODIFY_CALL - Stop saving regs for the function + * MODIFY_CALL_REGS - Start saving regs for the function + * MAKE_NOP - Stop tracing the function + */ enum { FTRACE_UPDATE_IGNORE, FTRACE_UPDATE_MAKE_CALL, + FTRACE_UPDATE_MODIFY_CALL, + FTRACE_UPDATE_MODIFY_CALL_REGS, FTRACE_UPDATE_MAKE_NOP, }; @@ -344,7 +393,9 @@ extern int ftrace_dyn_arch_init(void *data); extern void ftrace_replace_code(int enable); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); +extern void ftrace_regs_caller(void); extern void ftrace_call(void); +extern void ftrace_regs_call(void); extern void mcount_call(void); void ftrace_modify_all_code(int command); @@ -352,6 +403,15 @@ void ftrace_modify_all_code(int command); #ifndef FTRACE_ADDR #define FTRACE_ADDR ((unsigned long)ftrace_caller) #endif + +#ifndef FTRACE_REGS_ADDR +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +# define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller) +#else +# define FTRACE_REGS_ADDR FTRACE_ADDR +#endif +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern void ftrace_graph_caller(void); extern int ftrace_enable_ftrace_graph_caller(void); @@ -407,6 +467,39 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +/** + * ftrace_modify_call - convert from one addr to another (no nop) + * @rec: the mcount call site record + * @old_addr: the address expected to be currently called to + * @addr: the address to change to + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should be a caller to @old_addr + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr); +#else +/* Should never be called */ +static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return -EINVAL; +} +#endif + /* May be defined in arch */ extern int ftrace_arch_read_dyn_info(char *buf, int size); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6ff07ad0ede..c55f7e27461 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -314,6 +314,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) return -EINVAL; +#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS + /* + * If the ftrace_ops specifies SAVE_REGS, then it only can be used + * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set. + * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant. + */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS && + !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)) + return -EINVAL; + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED) + ops->flags |= FTRACE_OPS_FL_SAVE_REGS; +#endif + if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; @@ -1515,6 +1529,12 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, rec->flags++; if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX)) return; + /* + * If any ops wants regs saved for this function + * then all ops will get saved regs. + */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) + rec->flags |= FTRACE_FL_REGS; } else { if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0)) return; @@ -1606,18 +1626,59 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) if (enable && (rec->flags & ~FTRACE_FL_MASK)) flag = FTRACE_FL_ENABLED; + /* + * If enabling and the REGS flag does not match the REGS_EN, then + * do not ignore this record. Set flags to fail the compare against + * ENABLED. + */ + if (flag && + (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN))) + flag |= FTRACE_FL_REGS; + /* If the state of this record hasn't changed, then do nothing */ if ((rec->flags & FTRACE_FL_ENABLED) == flag) return FTRACE_UPDATE_IGNORE; if (flag) { - if (update) + /* Save off if rec is being enabled (for return value) */ + flag ^= rec->flags & FTRACE_FL_ENABLED; + + if (update) { rec->flags |= FTRACE_FL_ENABLED; - return FTRACE_UPDATE_MAKE_CALL; + if (flag & FTRACE_FL_REGS) { + if (rec->flags & FTRACE_FL_REGS) + rec->flags |= FTRACE_FL_REGS_EN; + else + rec->flags &= ~FTRACE_FL_REGS_EN; + } + } + + /* + * If this record is being updated from a nop, then + * return UPDATE_MAKE_CALL. + * Otherwise, if the EN flag is set, then return + * UPDATE_MODIFY_CALL_REGS to tell the caller to convert + * from the non-save regs, to a save regs function. + * Otherwise, + * return UPDATE_MODIFY_CALL to tell the caller to convert + * from the save regs, to a non-save regs function. + */ + if (flag & FTRACE_FL_ENABLED) + return FTRACE_UPDATE_MAKE_CALL; + else if (rec->flags & FTRACE_FL_REGS_EN) + return FTRACE_UPDATE_MODIFY_CALL_REGS; + else + return FTRACE_UPDATE_MODIFY_CALL; } - if (update) - rec->flags &= ~FTRACE_FL_ENABLED; + if (update) { + /* If there's no more users, clear all flags */ + if (!(rec->flags & ~FTRACE_FL_MASK)) + rec->flags = 0; + else + /* Just disable the record (keep REGS state) */ + rec->flags &= ~FTRACE_FL_ENABLED; + } return FTRACE_UPDATE_MAKE_NOP; } @@ -1652,13 +1713,17 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable) static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) { + unsigned long ftrace_old_addr; unsigned long ftrace_addr; int ret; - ftrace_addr = (unsigned long)FTRACE_ADDR; - ret = ftrace_update_record(rec, enable); + if (rec->flags & FTRACE_FL_REGS) + ftrace_addr = (unsigned long)FTRACE_REGS_ADDR; + else + ftrace_addr = (unsigned long)FTRACE_ADDR; + switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; @@ -1668,6 +1733,15 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) case FTRACE_UPDATE_MAKE_NOP: return ftrace_make_nop(NULL, rec, ftrace_addr); + + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: + if (rec->flags & FTRACE_FL_REGS) + ftrace_old_addr = (unsigned long)FTRACE_ADDR; + else + ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR; + + return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } return -1; /* unknow ftrace bug */ @@ -2421,8 +2495,9 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, "%ps", (void *)rec->ip); if (iter->flags & FTRACE_ITER_ENABLED) - seq_printf(m, " (%ld)", - rec->flags & ~FTRACE_FL_MASK); + seq_printf(m, " (%ld)%s", + rec->flags & ~FTRACE_FL_MASK, + rec->flags & FTRACE_FL_REGS ? " R" : ""); seq_printf(m, "\n"); return 0; -- cgit v1.2.3-70-g09d2 From 4de72395ff4cf48e23b61986dbc90b99a7c4ed97 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 5 Jun 2012 20:00:11 -0400 Subject: ftrace/x86: Add save_regs for i386 function calls Add saving full regs for function tracing on i386. The saving of regs was influenced by patches sent out by Masami Hiramatsu. Link: Link: http://lkml.kernel.org/r/20120711195745.379060003@goodmis.org Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 2 -- arch/x86/kernel/entry_32.S | 68 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/ftrace.c | 4 --- 3 files changed, 68 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index a8475014c4a..a6cae0c1720 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -40,10 +40,8 @@ #ifdef CONFIG_DYNAMIC_FTRACE #define ARCH_SUPPORTS_FTRACE_OPS 1 -#ifdef CONFIG_X86_64 #define ARCH_SUPPORTS_FTRACE_SAVE_REGS #endif -#endif #ifndef __ASSEMBLY__ extern void mcount(void); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 5da11d1b85a..46caa5649a5 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1123,6 +1123,7 @@ ftrace_call: popl %edx popl %ecx popl %eax +ftrace_ret: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: @@ -1134,6 +1135,73 @@ ftrace_stub: ret END(ftrace_caller) +ENTRY(ftrace_regs_caller) + pushf /* push flags before compare (in cs location) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* + * i386 does not save SS and ESP when coming from kernel. + * Instead, to get sp, ®s->sp is used (see ptrace.h). + * Unfortunately, that means eflags must be at the same location + * as the current return ip is. We move the return ip into the + * ip location, and move flags into the return ip location. + */ + pushl 4(%esp) /* save return ip into ip slot */ + subl $MCOUNT_INSN_SIZE, (%esp) /* Adjust ip */ + + pushl $0 /* Load 0 into orig_ax */ + pushl %gs + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + + movl 13*4(%esp), %eax /* Get the saved flags */ + movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ + /* clobbering return ip */ + movl $__KERNEL_CS,13*4(%esp) + + movl 12*4(%esp), %eax /* Load ip (1st parameter) */ + movl 0x4(%ebp), %edx /* Load parent ip (2cd parameter) */ + lea (%esp), %ecx + pushl %ecx /* Save pt_regs as 4th parameter */ + leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + addl $4, %esp /* Skip pt_regs */ + movl 14*4(%esp), %eax /* Move flags back into cs */ + movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ + movl 12*4(%esp), %eax /* Get return ip from regs->ip */ + addl $MCOUNT_INSN_SIZE, %eax + movl %eax, 14*4(%esp) /* Put return ip back for ret */ + + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + popl %ds + popl %es + popl %fs + popl %gs + addl $8, %esp /* Skip orig_ax and ip */ + popf /* Pop flags at end (no addl to corrupt flags) */ + jmp ftrace_ret + +ftrace_restore_flags: + popf + jmp ftrace_stub #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index b90eb1a1307..1d414029f1d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -206,7 +206,6 @@ static int ftrace_modify_code(unsigned long ip, unsigned const char *old_code, unsigned const char *new_code); -#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS /* * Should never be called: * As it is only called by __ftrace_replace_code() which is called by @@ -221,7 +220,6 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, WARN_ON(1); return -EINVAL; } -#endif int ftrace_update_ftrace_func(ftrace_func_t func) { @@ -237,7 +235,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret = ftrace_modify_code(ip, old, new); -#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS /* Also update the regs callback function */ if (!ret) { ip = (unsigned long)(&ftrace_regs_call); @@ -245,7 +242,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) new = ftrace_call_replace(ip, (unsigned long)func); ret = ftrace_modify_code(ip, old, new); } -#endif atomic_dec(&modifying_ftrace_code); -- cgit v1.2.3-70-g09d2 From 9d3c92af47d853d4e31ee971dba7bc086275b7b3 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:50:48 +0800 Subject: KVM: x86: remove unnecessary mark_page_dirty fix: [ 132.474633] 3.5.0-rc1+ #50 Not tainted [ 132.474634] ------------------------------- [ 132.474635] include/linux/kvm_host.h:369 suspicious rcu_dereference_check() usage! [ 132.474636] [ 132.474636] other info that might help us debug this: [ 132.474636] [ 132.474638] [ 132.474638] rcu_scheduler_active = 1, debug_locks = 1 [ 132.474640] 1 lock held by qemu-kvm/2832: [ 132.474657] #0: (&vcpu->mutex){+.+.+.}, at: [] vcpu_load+0x1e/0x91 [kvm] [ 132.474658] [ 132.474658] stack backtrace: [ 132.474660] Pid: 2832, comm: qemu-kvm Not tainted 3.5.0-rc1+ #50 [ 132.474661] Call Trace: [ 132.474665] [] lockdep_rcu_suspicious+0xfc/0x105 [ 132.474675] [] kvm_memslots+0x6d/0x75 [kvm] [ 132.474683] [] gfn_to_memslot+0x14/0x4c [kvm] [ 132.474693] [] mark_page_dirty+0x17/0x2a [kvm] [ 132.474706] [] kvm_arch_vcpu_ioctl+0xbcf/0xc07 [kvm] Actually, we do not write vcpu->arch.time at this time, mark_page_dirty should be removed. Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 829b4e97255..ecc71dde4bb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2632,7 +2632,6 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) if (!vcpu->arch.time_page) return -EINVAL; src->flags |= PVCLOCK_GUEST_STOPPED; - mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT); kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); return 0; } -- cgit v1.2.3-70-g09d2 From 86fde74cf5b829627b37ca86322acfdd99b524b8 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:52:52 +0800 Subject: KVM: MMU: track the refcount when unmap the page It will trigger a WARN_ON if the page has been freed but it is still used in mmu, it can help us to detect mm bug early Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a5d6ef785b7..685a4855738 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -556,6 +556,14 @@ static int mmu_spte_clear_track_bits(u64 *sptep) return 0; pfn = spte_to_pfn(old_spte); + + /* + * KVM does not hold the refcount of the page used by + * kvm mmu, before reclaiming the page, we should + * unmap it from mmu first. + */ + WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn))); + if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) kvm_set_pfn_accessed(pfn); if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) -- cgit v1.2.3-70-g09d2 From 903816fa4d016e20ec71a1a97700cfcdda115580 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:54:11 +0800 Subject: KVM: using get_fault_pfn to get the fault pfn Using get_fault_pfn to cleanup the code Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 6 ++---- include/linux/kvm_host.h | 5 +---- virt/kvm/kvm_main.c | 13 ++++--------- 3 files changed, 7 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 685a4855738..f85cc21ae95 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2513,10 +2513,8 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, unsigned long hva; slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); - if (!slot) { - get_page(fault_page); - return page_to_pfn(fault_page); - } + if (!slot) + return get_fault_pfn(); hva = gfn_to_hva_memslot(slot, gfn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6f6c18a03c5..1a7f838d30c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -383,15 +383,11 @@ id_to_memslot(struct kvm_memslots *slots, int id) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } extern struct page *bad_page; -extern struct page *fault_page; - extern pfn_t bad_pfn; -extern pfn_t fault_pfn; int is_error_page(struct page *page); int is_error_pfn(pfn_t pfn); int is_hwpoison_pfn(pfn_t pfn); -int is_fault_pfn(pfn_t pfn); int is_noslot_pfn(pfn_t pfn); int is_invalid_pfn(pfn_t pfn); int kvm_is_error_hva(unsigned long addr); @@ -441,6 +437,7 @@ void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); void kvm_set_pfn_accessed(pfn_t pfn); void kvm_get_pfn(pfn_t pfn); +pfn_t get_fault_pfn(void); int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e2b1a159e5d..0fbbf2d2160 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -103,8 +103,8 @@ static bool largepages_enabled = true; static struct page *hwpoison_page; static pfn_t hwpoison_pfn; -struct page *fault_page; -pfn_t fault_pfn; +static struct page *fault_page; +static pfn_t fault_pfn; inline int kvm_is_mmio_pfn(pfn_t pfn) { @@ -949,12 +949,6 @@ int is_hwpoison_pfn(pfn_t pfn) } EXPORT_SYMBOL_GPL(is_hwpoison_pfn); -int is_fault_pfn(pfn_t pfn) -{ - return pfn == fault_pfn; -} -EXPORT_SYMBOL_GPL(is_fault_pfn); - int is_noslot_pfn(pfn_t pfn) { return pfn == bad_pfn; @@ -1038,11 +1032,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_hva); -static pfn_t get_fault_pfn(void) +pfn_t get_fault_pfn(void) { get_page(fault_page); return fault_pfn; } +EXPORT_SYMBOL_GPL(get_fault_pfn); int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) -- cgit v1.2.3-70-g09d2 From d566104853361cc377c61f70e41c1ad3d44b86c6 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:56:16 +0800 Subject: KVM: remove the unused parameter of gfn_to_pfn_memslot The parameter, 'kvm', is not used in gfn_to_pfn_memslot, we can happily remove it Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/powerpc/kvm/e500_tlb.c | 2 +- arch/x86/kvm/mmu.c | 2 +- include/linux/kvm_host.h | 5 ++--- virt/kvm/iommu.c | 10 +++++----- virt/kvm/kvm_main.c | 15 +++++++-------- 5 files changed, 16 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index c510fc96130..c8f6c582674 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -520,7 +520,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (likely(!pfnmap)) { unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); - pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn); + pfn = gfn_to_pfn_memslot(slot, gfn); if (is_error_pfn(pfn)) { printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", (long)gfn); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f85cc21ae95..4f77f7ac6d2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2518,7 +2518,7 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, hva = gfn_to_hva_memslot(slot, gfn); - return hva_to_pfn_atomic(vcpu->kvm, hva); + return hva_to_pfn_atomic(hva); } static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e8d13a072d2..db9aa917840 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -418,15 +418,14 @@ void kvm_release_page_dirty(struct page *page); void kvm_set_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); -pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr); +pfn_t hva_to_pfn_atomic(unsigned long addr); pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, bool write_fault, bool *writable); pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); -pfn_t gfn_to_pfn_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn); +pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_release_pfn_dirty(pfn_t); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index e9fff9830bf..c03f1fb2670 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); -static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, - gfn_t gfn, unsigned long size) +static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, + unsigned long size) { gfn_t end_gfn; pfn_t pfn; - pfn = gfn_to_pfn_memslot(kvm, slot, gfn); + pfn = gfn_to_pfn_memslot(slot, gfn); end_gfn = gfn + (size >> PAGE_SHIFT); gfn += 1; @@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, return pfn; while (gfn < end_gfn) - gfn_to_pfn_memslot(kvm, slot, gfn++); + gfn_to_pfn_memslot(slot, gfn++); return pfn; } @@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) * Pin all pages we are about to map in memory. This is * important because we unmap and unpin in 4kb steps later. */ - pfn = kvm_pin_pages(kvm, slot, gfn, page_size); + pfn = kvm_pin_pages(slot, gfn, page_size); if (is_error_pfn(pfn)) { gfn += 1; continue; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f955eee92aa..68dda513cd7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1062,8 +1062,8 @@ static inline int check_user_page_hwpoison(unsigned long addr) return rc == -EHWPOISON; } -static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, - bool *async, bool write_fault, bool *writable) +static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, + bool write_fault, bool *writable) { struct page *page[1]; int npages = 0; @@ -1143,9 +1143,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, return pfn; } -pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) +pfn_t hva_to_pfn_atomic(unsigned long addr) { - return hva_to_pfn(kvm, addr, true, NULL, true, NULL); + return hva_to_pfn(addr, true, NULL, true, NULL); } EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); @@ -1163,7 +1163,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, return page_to_pfn(bad_page); } - return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable); + return hva_to_pfn(addr, atomic, async, write_fault, writable); } pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) @@ -1192,11 +1192,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, } EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); -pfn_t gfn_to_pfn_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn) +pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { unsigned long addr = gfn_to_hva_memslot(slot, gfn); - return hva_to_pfn(kvm, addr, false, NULL, true, NULL); + return hva_to_pfn(addr, false, NULL, true, NULL); } int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, -- cgit v1.2.3-70-g09d2 From 0fa060714753ef65aef294b3462efb0212520933 Mon Sep 17 00:00:00 2001 From: Guo Chao Date: Thu, 28 Jun 2012 15:16:19 +0800 Subject: KVM: VMX: Fix typos Signed-off-by: Guo Chao Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c39b60707e0..2300e5319ed 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1343,7 +1343,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) guest_efer = vmx->vcpu.arch.efer; /* - * NX is emulated; LMA and LME handled by hardware; SCE meaninless + * NX is emulated; LMA and LME handled by hardware; SCE meaningless * outside long mode */ ignore_bits = EFER_NX | EFER_SCE; @@ -3261,7 +3261,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, * qemu binaries. * IA32 arch specifies that at the time of processor reset the * "Accessed" bit in the AR field of segment registers is 1. And qemu - * is setting it to 0 in the usedland code. This causes invalid guest + * is setting it to 0 in the userland code. This causes invalid guest * state vmexit when "unrestricted guest" mode is turned on. * Fix for this setup issue in cpu_reset is being pushed in the qemu * tree. Newer qemu binaries with that qemu fix would not need this @@ -4446,7 +4446,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) hypercall[2] = 0xc1; } -/* called to set cr0 as approriate for a mov-to-cr0 exit. */ +/* called to set cr0 as appropriate for a mov-to-cr0 exit. */ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) { if (to_vmx(vcpu)->nested.vmxon && -- cgit v1.2.3-70-g09d2 From c5ec2e56d0a654797ee43a31001738ccd05eef0b Mon Sep 17 00:00:00 2001 From: Guo Chao Date: Thu, 28 Jun 2012 15:16:43 +0800 Subject: KVM: SVM: Fix typos Signed-off-by: Guo Chao Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index baead950d6c..687d0c30e55 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2063,7 +2063,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm) if (svm->nested.intercept & 1ULL) { /* * The #vmexit can't be emulated here directly because this - * code path runs with irqs and preemtion disabled. A + * code path runs with irqs and preemption disabled. A * #vmexit emulation might sleep. Only signal request for * the #vmexit here. */ @@ -2409,7 +2409,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) { /* * This function merges the msr permission bitmaps of kvm and the - * nested vmcb. It is omptimized in that it only merges the parts where + * nested vmcb. It is optimized in that it only merges the parts where * the kvm msr permission bitmap may contain zero bits */ int i; -- cgit v1.2.3-70-g09d2 From 4a9699807c491740c4dfe7b6a06703e1d262e802 Mon Sep 17 00:00:00 2001 From: Guo Chao Date: Thu, 28 Jun 2012 15:17:27 +0800 Subject: KVM: x86: Fix typos in x86.c Signed-off-by: Guo Chao Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ecc71dde4bb..3d9d08edbf2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1093,7 +1093,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) * For each generation, we track the original measured * nanosecond time, offset, and write, so if TSCs are in * sync, we can match exact offset, and if not, we can match - * exact software computaion in compute_guest_tsc() + * exact software computation in compute_guest_tsc() * * These values are tracked in kvm->arch.cur_xxx variables. */ @@ -1500,7 +1500,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) { gpa_t gpa = data & ~0x3f; - /* Bits 2:5 are resrved, Should be zero */ + /* Bits 2:5 are reserved, Should be zero */ if (data & 0x3c) return 1; @@ -1723,7 +1723,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) * Ignore all writes to this no longer documented MSR. * Writes are only relevant for old K7 processors, * all pre-dating SVM, but a recommended workaround from - * AMD for these chips. It is possible to speicify the + * AMD for these chips. It is possible to specify the * affected processor models on the command line, hence * the need to ignore the workaround. */ @@ -4491,7 +4491,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) /* * if emulation was due to access to shadowed page table - * and it failed try to unshadow page and re-entetr the + * and it failed try to unshadow page and re-enter the * guest to let CPU execute the instruction. */ if (kvm_mmu_unprotect_page_virt(vcpu, gva)) @@ -5587,7 +5587,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) /* * We are here if userspace calls get_regs() in the middle of * instruction emulation. Registers state needs to be copied - * back from emulation context to vcpu. Usrapace shouldn't do + * back from emulation context to vcpu. Userspace shouldn't do * that usually, but some bad designed PV devices (vmware * backdoor interface) need this to work */ @@ -6116,7 +6116,7 @@ int kvm_arch_hardware_enable(void *garbage) * as we reset last_host_tsc on all VCPUs to stop this from being * called multiple times (one for each physical CPU bringup). * - * Platforms with unnreliable TSCs don't have to deal with this, they + * Platforms with unreliable TSCs don't have to deal with this, they * will be compensated by the logic in vcpu_load, which sets the TSC to * catchup mode. This will catchup all VCPUs to real time, but cannot * guarantee that they stay in perfect synchronization. @@ -6391,7 +6391,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, map_flags = MAP_SHARED | MAP_ANONYMOUS; /*To keep backward compatibility with older userspace, - *x86 needs to hanlde !user_alloc case. + *x86 needs to handle !user_alloc case. */ if (!user_alloc) { if (npages && !old.rmap) { -- cgit v1.2.3-70-g09d2 From fc0586807dc4e307da6d3ba4ed5c927b6d27276c Mon Sep 17 00:00:00 2001 From: Guo Chao Date: Thu, 28 Jun 2012 15:19:51 +0800 Subject: KVM: x86: Fix typos in emulate.c Signed-off-by: Guo Chao Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 97d9a9914ba..85b611e13e8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -642,7 +642,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) goto bad; } else { - /* exapand-down segment */ + /* expand-down segment */ if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) goto bad; lim = desc.d ? 0xffffffff : 0xffff; @@ -1383,7 +1383,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, err_code = selector & 0xfffc; err_vec = GP_VECTOR; - /* can't load system descriptor into segment selecor */ + /* can't load system descriptor into segment selector */ if (seg <= VCPU_SREG_GS && !seg_desc.s) goto exception; @@ -2398,7 +2398,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); /* - * Now load segment descriptors. If fault happenes at this stage + * Now load segment descriptors. If fault happens at this stage * it is handled in a context of new task */ ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR); @@ -2640,7 +2640,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, * * 1. jmp/call/int to task gate: Check against DPL of the task gate * 2. Exception/IRQ/iret: No check is performed - * 3. jmp/call to TSS: Check agains DPL of the TSS + * 3. jmp/call to TSS: Check against DPL of the TSS */ if (reason == TASK_SWITCH_GATE) { if (idt_index != -1) { @@ -2681,7 +2681,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; /* set back link to prev task only if NT bit is set in eflags - note that old_tss_sel is not used afetr this point */ + note that old_tss_sel is not used after this point */ if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) old_tss_sel = 0xffff; -- cgit v1.2.3-70-g09d2 From bbbda79510b6e5d399fae76bdb9b999286eb1b59 Mon Sep 17 00:00:00 2001 From: Guo Chao Date: Thu, 28 Jun 2012 15:20:58 +0800 Subject: KVM: x86: Fix typos in cpuid.c Signed-off-by: Guo Chao Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0595f1397b7..b496da684bd 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -316,7 +316,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } case 7: { entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - /* Mask ebx against host capbability word 9 */ + /* Mask ebx against host capability word 9 */ if (index == 0) { entry->ebx &= kvm_supported_word9_x86_features; cpuid_mask(&entry->ebx, 9); -- cgit v1.2.3-70-g09d2 From d5b0b5b196b474ef26f46f2036c8fbaa8cca2cf5 Mon Sep 17 00:00:00 2001 From: Guo Chao Date: Thu, 28 Jun 2012 15:22:57 +0800 Subject: KVM: x86: Fix typos in lapic.c Signed-off-by: Guo Chao Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/lapic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ce878788a39..fff7173f6a7 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -719,7 +719,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, { unsigned char alignment = offset & 0xf; u32 result; - /* this bitmask has a bit cleared for each reserver register */ + /* this bitmask has a bit cleared for each reserved register */ static const u64 rmask = 0x43ff01ffffffe70cULL; if ((alignment + len) > 4) { @@ -792,7 +792,7 @@ static void start_apic_timer(struct kvm_lapic *apic) atomic_set(&apic->lapic_timer.pending, 0); if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { - /* lapic timer in oneshot or peroidic mode */ + /* lapic timer in oneshot or periodic mode */ now = apic->lapic_timer.timer.base->get_time(); apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->divide_count; -- cgit v1.2.3-70-g09d2 From c7a7062fa00db7dc66280a72cd9dad0f3595bc66 Mon Sep 17 00:00:00 2001 From: Guo Chao Date: Thu, 28 Jun 2012 15:23:08 +0800 Subject: KVM: x86: Fix typos in pmu.c Signed-off-by: Guo Chao Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 2e88438ffd8..db206a46e0d 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -1,5 +1,5 @@ /* - * Kernel-based Virtual Machine -- Performane Monitoring Unit support + * Kernel-based Virtual Machine -- Performance Monitoring Unit support * * Copyright 2011 Red Hat, Inc. and/or its affiliates. * -- cgit v1.2.3-70-g09d2 From 93b6547e2219784b2df790353e083e0bdbebd2c2 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 19 Jul 2012 13:55:53 +0300 Subject: KVM: switch to symbolic name for irq_states size Use PIC_NUM_PINS instead of hard-coded 16 for pic pins. Signed-off-by: Michael S. Tsirkin Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/irq.h | 2 +- virt/kvm/irq_comm.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2086f2bfba3..2d03568e949 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -70,7 +70,7 @@ struct kvm_pic { struct kvm_io_device dev_slave; struct kvm_io_device dev_eclr; void (*ack_notifier)(void *opaque, int irq); - unsigned long irq_states[16]; + unsigned long irq_states[PIC_NUM_PINS]; }; struct kvm_pic *kvm_create_pic(struct kvm *kvm); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a6a0365475e..22aae8fd146 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -343,11 +343,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, switch (ue->u.irqchip.irqchip) { case KVM_IRQCHIP_PIC_MASTER: e->set = kvm_set_pic_irq; - max_pin = 16; + max_pin = PIC_NUM_PINS; break; case KVM_IRQCHIP_PIC_SLAVE: e->set = kvm_set_pic_irq; - max_pin = 16; + max_pin = PIC_NUM_PINS; delta = 8; break; case KVM_IRQCHIP_IOAPIC: -- cgit v1.2.3-70-g09d2 From f2a743473194a1ad44a85f8b63aeef9d63e5bf47 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Wed, 18 Jul 2012 19:07:32 +0530 Subject: KVM: Add config to support ple or cpu relax optimzation Suggested-by: Avi Kivity Signed-off-by: Raghavendra K T Reviewed-by: Marcelo Tosatti Reviewed-by: Rik van Riel Tested-by: Christian Borntraeger # on s390x Signed-off-by: Avi Kivity --- arch/s390/kvm/Kconfig | 1 + arch/x86/kvm/Kconfig | 1 + virt/kvm/Kconfig | 3 +++ 3 files changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 78eb9847008..a6e2677724e 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM depends on HAVE_KVM && EXPERIMENTAL select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a28f338843e..45c044f0fff 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -37,6 +37,7 @@ config KVM select TASK_DELAY_ACCT select PERF_EVENTS select HAVE_KVM_MSI + select HAVE_KVM_CPU_RELAX_INTERCEPT ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 28694f4a913..d01b24b72c6 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -21,3 +21,6 @@ config KVM_ASYNC_PF config HAVE_KVM_MSI bool + +config HAVE_KVM_CPU_RELAX_INTERCEPT + bool -- cgit v1.2.3-70-g09d2 From 3b2bd2f800ba9488d9ad493216a0c07d71055b56 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 26 Jul 2012 11:57:43 +0800 Subject: KVM: MMU: use kvm_release_pfn_clean to release pfn The current code depends on the fact that fault_page is the normal page, however, we will use the error code instead of these dummy pages in the later patch, so we use kvm_release_pfn_clean to release pfn which will release the error code properly Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 24199344359..a9a20528e70 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3275,7 +3275,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, if (!async) return false; /* *pfn has correct page already */ - put_page(pfn_to_page(*pfn)); + kvm_release_pfn_clean(*pfn); if (!prefault && can_do_async_pf(vcpu)) { trace_kvm_try_async_get_page(gva, gfn); -- cgit v1.2.3-70-g09d2 From f23b070e662ca55a8fdaaa28537af06cab664499 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 26 Jul 2012 13:12:22 +0800 Subject: KVM: x86 emulator: simplify read_emulated No need split mmio read region into 8-bits pieces since we do it in emulator_read_write_onepage Changelog: Add a WARN_ON to check read-cache overflow Acked-by: Marcelo Tosatti Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 85b611e13e8..2c5d1e65d9d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1166,24 +1166,21 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, int rc; struct read_cache *mc = &ctxt->mem_read; - while (size) { - int n = min(size, 8u); - size -= n; - if (mc->pos < mc->end) - goto read_cached; - - rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, n, - &ctxt->exception); - if (rc != X86EMUL_CONTINUE) - return rc; - mc->end += n; + if (mc->pos < mc->end) + goto read_cached; - read_cached: - memcpy(dest, mc->data + mc->pos, n); - mc->pos += n; - dest += n; - addr += n; - } + WARN_ON((mc->end + size) >= sizeof(mc->data)); + + rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size, + &ctxt->exception); + if (rc != X86EMUL_CONTINUE) + return rc; + + mc->end += size; + +read_cached: + memcpy(dest, mc->data + mc->pos, size); + mc->pos += size; return X86EMUL_CONTINUE; } -- cgit v1.2.3-70-g09d2 From 99245b507dc3b1b2815d6a6cb4e94a6b7018a24b Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 25 Jul 2012 15:49:42 +0300 Subject: KVM: x86 emulator: drop unneeded call to get_segment() setup_syscalls_segments() calls get_segment() and than overwrites all but one of the structure fields and this one should also be overwritten anyway, so we can drop call to get_segment() and avoid a couple of vmreads on vmx. Also drop zeroing ss/cs structures since most of the fields are set anyway. Just set those that were not set explicitly. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2c5d1e65d9d..10f0136f50c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2035,12 +2035,6 @@ static void setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, struct desc_struct *cs, struct desc_struct *ss) { - u16 selector; - - memset(cs, 0, sizeof(struct desc_struct)); - ctxt->ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); - memset(ss, 0, sizeof(struct desc_struct)); - cs->l = 0; /* will be adjusted later */ set_desc_base(cs, 0); /* flat segment */ cs->g = 1; /* 4kb granularity */ @@ -2050,6 +2044,7 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, cs->dpl = 0; /* will be adjusted later */ cs->p = 1; cs->d = 1; + cs->avl = 0; set_desc_base(ss, 0); /* flat segment */ set_desc_limit(ss, 0xfffff); /* 4GB limit */ @@ -2059,6 +2054,8 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, ss->d = 1; /* 32bit stack segment */ ss->dpl = 0; ss->p = 1; + ss->l = 0; + ss->avl = 0; } static bool vendor_intel(struct x86_emulate_ctxt *ctxt) -- cgit v1.2.3-70-g09d2 From 23d43cf998275bc97437931c0cdee1df2c1aa3ca Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 24 Jul 2012 08:51:20 -0400 Subject: KVM: Move KVM_IRQ_LINE to arch-generic code Handle KVM_IRQ_LINE and KVM_IRQ_LINE_STATUS in the generic kvm_vm_ioctl() function and call into kvm_vm_ioctl_irq_line(). This is even more relevant when KVM/ARM also uses this ioctl. Signed-off-by: Christoffer Dall Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 33 ++++++++++----------------------- arch/x86/kvm/x86.c | 33 ++++++++++----------------------- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 23 +++++++++++++++++++++++ 4 files changed, 44 insertions(+), 46 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index bd77cb507c1..eac65380bd2 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -924,6 +924,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) +{ + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event->irq, irq_event->level); + return 0; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -963,29 +973,6 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; } break; - case KVM_IRQ_LINE_STATUS: - case KVM_IRQ_LINE: { - struct kvm_irq_level irq_event; - - r = -EFAULT; - if (copy_from_user(&irq_event, argp, sizeof irq_event)) - goto out; - r = -ENXIO; - if (irqchip_in_kernel(kvm)) { - __s32 status; - status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event.irq, irq_event.level); - if (ioctl == KVM_IRQ_LINE_STATUS) { - r = -EFAULT; - irq_event.status = status; - if (copy_to_user(argp, &irq_event, - sizeof irq_event)) - goto out; - } - r = 0; - } - break; - } case KVM_GET_IRQCHIP: { /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ struct kvm_irqchip chip; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3d9d08edbf2..b6379e55ee2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3165,6 +3165,16 @@ out: return r; } +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) +{ + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event->irq, irq_event->level); + return 0; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3271,29 +3281,6 @@ long kvm_arch_vm_ioctl(struct file *filp, create_pit_unlock: mutex_unlock(&kvm->slots_lock); break; - case KVM_IRQ_LINE_STATUS: - case KVM_IRQ_LINE: { - struct kvm_irq_level irq_event; - - r = -EFAULT; - if (copy_from_user(&irq_event, argp, sizeof irq_event)) - goto out; - r = -ENXIO; - if (irqchip_in_kernel(kvm)) { - __s32 status; - status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event.irq, irq_event.level); - if (ioctl == KVM_IRQ_LINE_STATUS) { - r = -EFAULT; - irq_event.status = status; - if (copy_to_user(argp, &irq_event, - sizeof irq_event)) - goto out; - } - r = 0; - } - break; - } case KVM_GET_IRQCHIP: { /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ struct kvm_irqchip *chip; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4e60d3695e4..dbc65f9d6a2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -498,6 +498,7 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, int user_alloc); +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level); long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index de89497fe4c..bcf973ec98f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2148,6 +2148,29 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_send_userspace_msi(kvm, &msi); break; } +#endif +#ifdef __KVM_HAVE_IRQ_LINE + case KVM_IRQ_LINE_STATUS: + case KVM_IRQ_LINE: { + struct kvm_irq_level irq_event; + + r = -EFAULT; + if (copy_from_user(&irq_event, argp, sizeof irq_event)) + goto out; + + r = kvm_vm_ioctl_irq_line(kvm, &irq_event); + if (r) + goto out; + + r = -EFAULT; + if (ioctl == KVM_IRQ_LINE_STATUS) { + if (copy_to_user(argp, &irq_event, sizeof irq_event)) + goto out; + } + + r = 0; + break; + } #endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); -- cgit v1.2.3-70-g09d2 From 4a4541a40e1fe145c72c4b959fac524a5600d9fb Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 22 Jul 2012 17:41:00 +0300 Subject: KVM: Don't update PPR on any APIC read The current code will update the PPR on almost any APIC read; however that's only required if we read the PPR. kvm_update_ppr() shows up in some profiles, albeit with a low usage (~1%). This should reduce it further (it will still be called during interrupt processing). Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/lapic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index fff7173f6a7..ad7fff7ad13 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -696,12 +696,14 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) val = apic_get_tmcct(apic); break; - + case APIC_PROCPRI: + apic_update_ppr(apic); + val = apic_get_reg(apic, offset); + break; case APIC_TASKPRI: report_tpr_access(apic, false); /* fall thru */ default: - apic_update_ppr(apic); val = apic_get_reg(apic, offset); break; } -- cgit v1.2.3-70-g09d2 From e4ea3f6b1bf3d489674a3660db652636e50186f9 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 19 Jul 2012 13:04:47 -0400 Subject: ftrace/x86_32: Simplify parameter setup for ftrace_regs_caller The final position of the stack after saving regs and setting up the parameters for ftrace_regs_call, is the position of the pt_regs needed for the 4th parameter. Instead of saving it into a temporary reg and pushing the reg, simply push the stack pointer. Link: http://lkml.kernel.org/r/1342702344.12353.16.camel@gandalf.stny.rr.com Reviewed-by: Masami Hiramatsu Signed-off-by: Uros Bizjak Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_32.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 46caa5649a5..4dc301709e7 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1169,10 +1169,9 @@ ENTRY(ftrace_regs_caller) movl $__KERNEL_CS,13*4(%esp) movl 12*4(%esp), %eax /* Load ip (1st parameter) */ - movl 0x4(%ebp), %edx /* Load parent ip (2cd parameter) */ - lea (%esp), %ecx - pushl %ecx /* Save pt_regs as 4th parameter */ + movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ + pushl %esp /* Save pt_regs as 4th parameter */ GLOBAL(ftrace_regs_call) call ftrace_stub -- cgit v1.2.3-70-g09d2 From 5767cfeaa9ec7b67c802143394f3ad9f8b174eb8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 3 Jul 2012 16:16:09 -0400 Subject: ftrace/x86: Remove function_trace_stop check from graph caller The graph caller is called by the mcount callers, which already does the check against the function_trace_stop variable. No reason to check it again. Link: http://lkml.kernel.org/r/20120711195745.588538769@goodmis.org Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_32.S | 3 --- arch/x86/kernel/entry_64.S | 3 --- 2 files changed, 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 4dc301709e7..061ac17ee97 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1241,9 +1241,6 @@ END(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - pushl %eax pushl %ecx pushl %edx diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 52bda2e8f7a..38308fa7d7e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -213,9 +213,6 @@ END(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - MCOUNT_SAVE_FRAME leaq 8(%rbp), %rdi -- cgit v1.2.3-70-g09d2 From e52538965119319447c0800c534da73142c27be2 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jun 2012 19:28:38 +0900 Subject: kprobes/x86: ftrace based optimization for x86 Add function tracer based kprobe optimization support handlers on x86. This allows kprobes to use function tracer for probing on mcount call. Link: http://lkml.kernel.org/r/20120605102838.27845.26317.stgit@localhost.localdomain Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Ananth N Mavinakayanahalli Cc: "Frank Ch. Eigler" Cc: Andrew Morton Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu [ Updated to new port of ftrace save regs functions ] Signed-off-by: Steven Rostedt --- arch/x86/include/asm/kprobes.h | 1 + arch/x86/kernel/kprobes.c | 48 ++++++++++++++++++++++++++++++++++++++++++ include/linux/kprobes.h | 2 +- kernel/kprobes.c | 2 +- 4 files changed, 51 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 54788253915..d3ddd17405d 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -27,6 +27,7 @@ #include #define __ARCH_WANT_KPROBES_INSN_SLOT +#define ARCH_SUPPORTS_KPROBES_ON_FTRACE struct pt_regs; struct kprobe; diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e2f751efb7b..47ae1023a93 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1052,6 +1052,54 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +#ifdef KPROBES_CAN_USE_FTRACE +/* Ftrace callback handler for kprobes */ +void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + regs->ip += sizeof(kprobe_opcode_t); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (p->pre_handler) + p->pre_handler(p, regs); + + if (unlikely(p->post_handler)) { + /* Emulate singlestep as if there is a 5byte nop */ + regs->ip = ip + MCOUNT_INSN_SIZE; + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); + regs->ip = ip; /* Recover for next callback */ + } +end: + local_irq_restore(flags); +} + +int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} +#endif + int __init arch_init_kprobes(void) { return arch_init_optprobes(); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index aa0d05e852e..23755ba42ab 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -318,7 +318,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, #endif /* CONFIG_OPTPROBES */ #ifdef KPROBES_CAN_USE_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct pt_regs *regs); + struct ftrace_ops *ops, struct pt_regs *regs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); #endif diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 69c16efc315..35b4315d84f 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -921,7 +921,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) #ifdef KPROBES_CAN_USE_FTRACE static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { - .regs_func = kprobe_ftrace_handler, + .func = kprobe_ftrace_handler, .flags = FTRACE_OPS_FL_SAVE_REGS, }; static int kprobe_ftrace_enabled; -- cgit v1.2.3-70-g09d2 From e9d90d472da97e1b1560bffb89578ba082c88a69 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 26 Jul 2012 18:01:50 +0300 Subject: KVM: Remove internal timer abstraction kvm_timer_fn(), the sole inhabitant of timer.c, is only used by lapic.c. Move it there to make it easier to hack on it. struct kvm_timer is a thin wrapper around hrtimer, and only adds obfuscation. Move near its two users (with different names) to prepare for simplification. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/Makefile | 2 +- arch/x86/kvm/i8254.c | 8 ++++---- arch/x86/kvm/i8254.h | 18 +++++++++++++++++- arch/x86/kvm/kvm_timer.h | 18 ------------------ arch/x86/kvm/lapic.c | 30 +++++++++++++++++++++++++++++- arch/x86/kvm/lapic.h | 17 ++++++++++++++++- arch/x86/kvm/timer.c | 47 ----------------------------------------------- 7 files changed, 67 insertions(+), 73 deletions(-) delete mode 100644 arch/x86/kvm/kvm_timer.h delete mode 100644 arch/x86/kvm/timer.c (limited to 'arch/x86') diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 4f579e8dcac..04d30401c5c 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ - i8254.o timer.o cpuid.o pmu.o + i8254.o cpuid.o pmu.o kvm-intel-y += vmx.o kvm-amd-y += svm.o diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index adba28f88d1..1d8e75702d9 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -272,14 +272,14 @@ static void destroy_pit_timer(struct kvm_pit *pit) flush_kthread_work(&pit->expired); } -static bool kpit_is_periodic(struct kvm_timer *ktimer) +static bool kpit_is_periodic(struct kvm_pit_timer *ktimer) { struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state, pit_timer); return ps->is_periodic; } -static struct kvm_timer_ops kpit_ops = { +static struct kvm_pit_timer_ops kpit_ops = { .is_periodic = kpit_is_periodic, }; @@ -322,7 +322,7 @@ static void pit_do_work(struct kthread_work *work) static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) { - struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); + struct kvm_pit_timer *ktimer = container_of(data, struct kvm_pit_timer, timer); struct kvm_pit *pt = ktimer->kvm->arch.vpit; if (ktimer->reinject || !atomic_read(&ktimer->pending)) { @@ -340,7 +340,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) { struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; - struct kvm_timer *pt = &ps->pit_timer; + struct kvm_pit_timer *pt = &ps->pit_timer; s64 interval; if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index fdf40425ea1..3351816e8b3 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -21,10 +21,26 @@ struct kvm_kpit_channel_state { ktime_t count_load_time; }; +struct kvm_pit_timer { + struct hrtimer timer; + s64 period; /* unit: ns */ + u32 timer_mode_mask; + u64 tscdeadline; + atomic_t pending; /* accumulated triggered timers */ + bool reinject; + struct kvm_pit_timer_ops *t_ops; + struct kvm *kvm; + struct kvm_vcpu *vcpu; +}; + +struct kvm_pit_timer_ops { + bool (*is_periodic)(struct kvm_pit_timer *); +}; + struct kvm_kpit_state { struct kvm_kpit_channel_state channels[3]; u32 flags; - struct kvm_timer pit_timer; + struct kvm_pit_timer pit_timer; bool is_periodic; u32 speaker_data_on; struct mutex lock; diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h deleted file mode 100644 index 497dbaa366d..00000000000 --- a/arch/x86/kvm/kvm_timer.h +++ /dev/null @@ -1,18 +0,0 @@ - -struct kvm_timer { - struct hrtimer timer; - s64 period; /* unit: ns */ - u32 timer_mode_mask; - u64 tscdeadline; - atomic_t pending; /* accumulated triggered timers */ - bool reinject; - struct kvm_timer_ops *t_ops; - struct kvm *kvm; - struct kvm_vcpu *vcpu; -}; - -struct kvm_timer_ops { - bool (*is_periodic)(struct kvm_timer *); -}; - -enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ad7fff7ad13..61ed32cd17c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1262,6 +1262,34 @@ static const struct kvm_io_device_ops apic_mmio_ops = { .write = apic_mmio_write, }; +static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) +{ + struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); + struct kvm_vcpu *vcpu = ktimer->vcpu; + wait_queue_head_t *q = &vcpu->wq; + + /* + * There is a race window between reading and incrementing, but we do + * not care about potentially losing timer events in the !reinject + * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked + * in vcpu_enter_guest. + */ + if (ktimer->reinject || !atomic_read(&ktimer->pending)) { + atomic_inc(&ktimer->pending); + /* FIXME: this code should not know anything about vcpus */ + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + } + + if (waitqueue_active(q)) + wake_up_interruptible(q); + + if (ktimer->t_ops->is_periodic(ktimer)) { + hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); + return HRTIMER_RESTART; + } else + return HRTIMER_NORESTART; +} + int kvm_create_lapic(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic; @@ -1285,7 +1313,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - apic->lapic_timer.timer.function = kvm_timer_fn; + apic->lapic_timer.timer.function = apic_timer_fn; apic->lapic_timer.t_ops = &lapic_timer_ops; apic->lapic_timer.kvm = vcpu->kvm; apic->lapic_timer.vcpu = vcpu; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 4af5405ae1e..d7251c92ed4 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -2,10 +2,25 @@ #define __KVM_X86_LAPIC_H #include "iodev.h" -#include "kvm_timer.h" #include +struct kvm_timer { + struct hrtimer timer; + s64 period; /* unit: ns */ + u32 timer_mode_mask; + u64 tscdeadline; + atomic_t pending; /* accumulated triggered timers */ + bool reinject; + struct kvm_timer_ops *t_ops; + struct kvm *kvm; + struct kvm_vcpu *vcpu; +}; + +struct kvm_timer_ops { + bool (*is_periodic)(struct kvm_timer *); +}; + struct kvm_lapic { unsigned long base_address; struct kvm_io_device dev; diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c deleted file mode 100644 index 6b85cc647f3..00000000000 --- a/arch/x86/kvm/timer.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Kernel-based Virtual Machine driver for Linux - * - * This module enables machines with Intel VT-x extensions to run virtual - * machines without emulation or binary translation. - * - * timer support - * - * Copyright 2010 Red Hat, Inc. and/or its affiliates. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ - -#include -#include -#include -#include -#include "kvm_timer.h" - -enum hrtimer_restart kvm_timer_fn(struct hrtimer *data) -{ - struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); - struct kvm_vcpu *vcpu = ktimer->vcpu; - wait_queue_head_t *q = &vcpu->wq; - - /* - * There is a race window between reading and incrementing, but we do - * not care about potentially losing timer events in the !reinject - * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked - * in vcpu_enter_guest. - */ - if (ktimer->reinject || !atomic_read(&ktimer->pending)) { - atomic_inc(&ktimer->pending); - /* FIXME: this code should not know anything about vcpus */ - kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); - } - - if (waitqueue_active(q)) - wake_up_interruptible(q); - - if (ktimer->t_ops->is_periodic(ktimer)) { - hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); - return HRTIMER_RESTART; - } else - return HRTIMER_NORESTART; -} -- cgit v1.2.3-70-g09d2 From 2a6eac9638a92b61de04bac4233d8ca665ae96af Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 26 Jul 2012 18:01:51 +0300 Subject: KVM: Simplify kvm_timer 'reinject' is never initialized 't_ops' only serves as indirection to lapic_is_periodic; call that directly instead 'kvm' is never used 'vcpu' can be derived via container_of Remove these fields. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/lapic.c | 18 +++++------------- arch/x86/kvm/lapic.h | 8 -------- 2 files changed, 5 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 61ed32cd17c..0cd431c85d3 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1214,10 +1214,8 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu) *---------------------------------------------------------------------- */ -static bool lapic_is_periodic(struct kvm_timer *ktimer) +static bool lapic_is_periodic(struct kvm_lapic *apic) { - struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, - lapic_timer); return apic_lvtt_period(apic); } @@ -1253,10 +1251,6 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) kvm_apic_local_deliver(apic, APIC_LVT0); } -static struct kvm_timer_ops lapic_timer_ops = { - .is_periodic = lapic_is_periodic, -}; - static const struct kvm_io_device_ops apic_mmio_ops = { .read = apic_mmio_read, .write = apic_mmio_write, @@ -1265,7 +1259,8 @@ static const struct kvm_io_device_ops apic_mmio_ops = { static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) { struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); - struct kvm_vcpu *vcpu = ktimer->vcpu; + struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); + struct kvm_vcpu *vcpu = apic->vcpu; wait_queue_head_t *q = &vcpu->wq; /* @@ -1274,7 +1269,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked * in vcpu_enter_guest. */ - if (ktimer->reinject || !atomic_read(&ktimer->pending)) { + if (!atomic_read(&ktimer->pending)) { atomic_inc(&ktimer->pending); /* FIXME: this code should not know anything about vcpus */ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); @@ -1283,7 +1278,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) if (waitqueue_active(q)) wake_up_interruptible(q); - if (ktimer->t_ops->is_periodic(ktimer)) { + if (lapic_is_periodic(apic)) { hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); return HRTIMER_RESTART; } else @@ -1314,9 +1309,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); apic->lapic_timer.timer.function = apic_timer_fn; - apic->lapic_timer.t_ops = &lapic_timer_ops; - apic->lapic_timer.kvm = vcpu->kvm; - apic->lapic_timer.vcpu = vcpu; apic->base_address = APIC_DEFAULT_PHYS_BASE; vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index d7251c92ed4..166766fffd9 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -11,14 +11,6 @@ struct kvm_timer { u32 timer_mode_mask; u64 tscdeadline; atomic_t pending; /* accumulated triggered timers */ - bool reinject; - struct kvm_timer_ops *t_ops; - struct kvm *kvm; - struct kvm_vcpu *vcpu; -}; - -struct kvm_timer_ops { - bool (*is_periodic)(struct kvm_timer *); }; struct kvm_lapic { -- cgit v1.2.3-70-g09d2 From 9d9d2239bdecd525ce3eb6cbfe4abb925c98208c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 26 Jul 2012 18:01:52 +0300 Subject: KVM: Simplify kvm_pit_timer 'timer_mode_mask' is unused 'tscdeadline' is unused 't_ops' only adds needless indirection 'vcpu' is unused Remove. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/i8254.c | 14 +------------- arch/x86/kvm/i8254.h | 8 -------- 2 files changed, 1 insertion(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 1d8e75702d9..a9e187a5b19 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -272,17 +272,6 @@ static void destroy_pit_timer(struct kvm_pit *pit) flush_kthread_work(&pit->expired); } -static bool kpit_is_periodic(struct kvm_pit_timer *ktimer) -{ - struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state, - pit_timer); - return ps->is_periodic; -} - -static struct kvm_pit_timer_ops kpit_ops = { - .is_periodic = kpit_is_periodic, -}; - static void pit_do_work(struct kthread_work *work) { struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); @@ -330,7 +319,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) queue_kthread_work(&pt->worker, &pt->expired); } - if (ktimer->t_ops->is_periodic(ktimer)) { + if (pt->pit_state.is_periodic) { hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); return HRTIMER_RESTART; } else @@ -357,7 +346,6 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) ps->is_periodic = is_period; pt->timer.function = pit_timer_fn; - pt->t_ops = &kpit_ops; pt->kvm = ps->pit->kvm; atomic_set(&pt->pending, 0); diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 3351816e8b3..c9bbcb889c4 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -24,17 +24,9 @@ struct kvm_kpit_channel_state { struct kvm_pit_timer { struct hrtimer timer; s64 period; /* unit: ns */ - u32 timer_mode_mask; - u64 tscdeadline; atomic_t pending; /* accumulated triggered timers */ bool reinject; - struct kvm_pit_timer_ops *t_ops; struct kvm *kvm; - struct kvm_vcpu *vcpu; -}; - -struct kvm_pit_timer_ops { - bool (*is_periodic)(struct kvm_pit_timer *); }; struct kvm_kpit_state { -- cgit v1.2.3-70-g09d2 From 26ef19242f6e4d747a61b5fd8da72343838864e4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 26 Jul 2012 18:01:53 +0300 Subject: KVM: fold kvm_pit_timer into kvm_kpit_state One structure nests inside the other, providing no value at all. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/i8254.c | 52 +++++++++++++++++++++++++--------------------------- arch/x86/kvm/i8254.h | 14 +++++--------- arch/x86/kvm/x86.c | 2 +- 3 files changed, 31 insertions(+), 37 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index a9e187a5b19..11300d2fa71 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -108,7 +108,7 @@ static s64 __kpit_elapsed(struct kvm *kvm) ktime_t remaining; struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; - if (!ps->pit_timer.period) + if (!ps->period) return 0; /* @@ -120,9 +120,9 @@ static s64 __kpit_elapsed(struct kvm *kvm) * itself with the initial count and continues counting * from there. */ - remaining = hrtimer_get_remaining(&ps->pit_timer.timer); - elapsed = ps->pit_timer.period - ktime_to_ns(remaining); - elapsed = mod_64(elapsed, ps->pit_timer.period); + remaining = hrtimer_get_remaining(&ps->timer); + elapsed = ps->period - ktime_to_ns(remaining); + elapsed = mod_64(elapsed, ps->period); return elapsed; } @@ -238,12 +238,12 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) int value; spin_lock(&ps->inject_lock); - value = atomic_dec_return(&ps->pit_timer.pending); + value = atomic_dec_return(&ps->pending); if (value < 0) /* spurious acks can be generated if, for example, the * PIC is being reset. Handle it gracefully here */ - atomic_inc(&ps->pit_timer.pending); + atomic_inc(&ps->pending); else if (value > 0) /* in this case, we had multiple outstanding pit interrupts * that we needed to inject. Reinject @@ -261,14 +261,14 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) if (!kvm_vcpu_is_bsp(vcpu) || !pit) return; - timer = &pit->pit_state.pit_timer.timer; + timer = &pit->pit_state.timer; if (hrtimer_cancel(timer)) hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } static void destroy_pit_timer(struct kvm_pit *pit) { - hrtimer_cancel(&pit->pit_state.pit_timer.timer); + hrtimer_cancel(&pit->pit_state.timer); flush_kthread_work(&pit->expired); } @@ -311,16 +311,16 @@ static void pit_do_work(struct kthread_work *work) static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) { - struct kvm_pit_timer *ktimer = container_of(data, struct kvm_pit_timer, timer); - struct kvm_pit *pt = ktimer->kvm->arch.vpit; + struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer); + struct kvm_pit *pt = ps->kvm->arch.vpit; - if (ktimer->reinject || !atomic_read(&ktimer->pending)) { - atomic_inc(&ktimer->pending); + if (ps->reinject || !atomic_read(&ps->pending)) { + atomic_inc(&ps->pending); queue_kthread_work(&pt->worker, &pt->expired); } - if (pt->pit_state.is_periodic) { - hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); + if (ps->is_periodic) { + hrtimer_add_expires_ns(&ps->timer, ps->period); return HRTIMER_RESTART; } else return HRTIMER_NORESTART; @@ -329,7 +329,6 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) { struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; - struct kvm_pit_timer *pt = &ps->pit_timer; s64 interval; if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) @@ -340,18 +339,18 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) pr_debug("create pit timer, interval is %llu nsec\n", interval); /* TODO The new value only affected after the retriggered */ - hrtimer_cancel(&pt->timer); + hrtimer_cancel(&ps->timer); flush_kthread_work(&ps->pit->expired); - pt->period = interval; + ps->period = interval; ps->is_periodic = is_period; - pt->timer.function = pit_timer_fn; - pt->kvm = ps->pit->kvm; + ps->timer.function = pit_timer_fn; + ps->kvm = ps->pit->kvm; - atomic_set(&pt->pending, 0); + atomic_set(&ps->pending, 0); ps->irq_ack = 1; - hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval), + hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), HRTIMER_MODE_ABS); } @@ -627,7 +626,7 @@ void kvm_pit_reset(struct kvm_pit *pit) } mutex_unlock(&pit->pit_state.lock); - atomic_set(&pit->pit_state.pit_timer.pending, 0); + atomic_set(&pit->pit_state.pending, 0); pit->pit_state.irq_ack = 1; } @@ -636,7 +635,7 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); if (!mask) { - atomic_set(&pit->pit_state.pit_timer.pending, 0); + atomic_set(&pit->pit_state.pending, 0); pit->pit_state.irq_ack = 1; } } @@ -694,12 +693,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) pit_state = &pit->pit_state; pit_state->pit = pit; - hrtimer_init(&pit_state->pit_timer.timer, - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); pit_state->irq_ack_notifier.gsi = 0; pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); - pit_state->pit_timer.reinject = true; + pit_state->reinject = true; mutex_unlock(&pit->pit_state.lock); kvm_pit_reset(pit); @@ -749,7 +747,7 @@ void kvm_free_pit(struct kvm *kvm) kvm_unregister_irq_ack_notifier(kvm, &kvm->arch.vpit->pit_state.irq_ack_notifier); mutex_lock(&kvm->arch.vpit->pit_state.lock); - timer = &kvm->arch.vpit->pit_state.pit_timer.timer; + timer = &kvm->arch.vpit->pit_state.timer; hrtimer_cancel(timer); flush_kthread_work(&kvm->arch.vpit->expired); kthread_stop(kvm->arch.vpit->worker_task); diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index c9bbcb889c4..dd1b16b611b 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -21,19 +21,15 @@ struct kvm_kpit_channel_state { ktime_t count_load_time; }; -struct kvm_pit_timer { - struct hrtimer timer; - s64 period; /* unit: ns */ - atomic_t pending; /* accumulated triggered timers */ - bool reinject; - struct kvm *kvm; -}; - struct kvm_kpit_state { struct kvm_kpit_channel_state channels[3]; u32 flags; - struct kvm_pit_timer pit_timer; bool is_periodic; + s64 period; /* unit: ns */ + struct hrtimer timer; + atomic_t pending; /* accumulated triggered timers */ + bool reinject; + struct kvm *kvm; u32 speaker_data_on; struct mutex lock; struct kvm_pit *pit; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b6379e55ee2..3a53bcc24f2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3082,7 +3082,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, if (!kvm->arch.vpit) return -ENXIO; mutex_lock(&kvm->arch.vpit->pit_state.lock); - kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject; + kvm->arch.vpit->pit_state.reinject = control->pit_reinject; mutex_unlock(&kvm->arch.vpit->pit_state.lock); return 0; } -- cgit v1.2.3-70-g09d2 From 7af6c2456851eb08d51d95de38ae8302994031e9 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Wed, 11 Jul 2012 14:20:51 +0300 Subject: crypto: arch/x86 - cleanup - remove unneeded crypto_alg.cra_list initializations Initialization of cra_list is currently mixed, most ciphers initialize this field and most shashes do not. Initialization however is not needed at all since cra_list is initialized/overwritten in __crypto_register_alg() with list_add(). Therefore perform cleanup to remove all unneeded initializations of this field in 'arch/x86/crypto/'. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- arch/x86/crypto/aes_glue.c | 1 - arch/x86/crypto/aesni-intel_glue.c | 5 +---- arch/x86/crypto/blowfish_glue.c | 4 ---- arch/x86/crypto/camellia_glue.c | 6 ------ arch/x86/crypto/ghash-clmulni-intel_glue.c | 2 -- arch/x86/crypto/salsa20_glue.c | 1 - arch/x86/crypto/serpent_avx_glue.c | 10 ---------- arch/x86/crypto/serpent_sse2_glue.c | 10 ---------- arch/x86/crypto/twofish_avx_glue.c | 10 ---------- arch/x86/crypto/twofish_glue.c | 1 - arch/x86/crypto/twofish_glue_3way.c | 5 ----- 11 files changed, 1 insertion(+), 54 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index 59b37deb8c8..aafe8ce0d65 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c @@ -40,7 +40,6 @@ static struct crypto_alg aes_alg = { .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), .cra_u = { .cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 34fdcff4d2c..648347a0577 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1118,7 +1118,7 @@ MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); static int __init aesni_init(void) { - int err, i; + int err; if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; @@ -1127,9 +1127,6 @@ static int __init aesni_init(void) if (err) return err; - for (i = 0; i < ARRAY_SIZE(aesni_algs); i++) - INIT_LIST_HEAD(&aesni_algs[i].cra_list); - return crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); } diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 7967474de8f..50ec333b70e 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -367,7 +367,6 @@ static struct crypto_alg bf_algs[4] = { { .cra_ctxsize = sizeof(struct bf_ctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[0].cra_list), .cra_u = { .cipher = { .cia_min_keysize = BF_MIN_KEY_SIZE, @@ -387,7 +386,6 @@ static struct crypto_alg bf_algs[4] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = BF_MIN_KEY_SIZE, @@ -407,7 +405,6 @@ static struct crypto_alg bf_algs[4] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = BF_MIN_KEY_SIZE, @@ -428,7 +425,6 @@ static struct crypto_alg bf_algs[4] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[3].cra_list), .cra_u = { .blkcipher = { .min_keysize = BF_MIN_KEY_SIZE, diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index eeb2b3b743e..7a74d7bb326 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -1601,7 +1601,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_ctxsize = sizeof(struct camellia_ctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[0].cra_list), .cra_u = { .cipher = { .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, @@ -1621,7 +1620,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE, @@ -1641,7 +1639,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE, @@ -1662,7 +1659,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[3].cra_list), .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE, @@ -1683,7 +1679,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[4].cra_list), .cra_exit = lrw_exit_tfm, .cra_u = { .blkcipher = { @@ -1707,7 +1702,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[5].cra_list), .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index b4bf0a63b52..6759dd1135b 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -150,7 +150,6 @@ static struct shash_alg ghash_alg = { .cra_blocksize = GHASH_BLOCK_SIZE, .cra_ctxsize = sizeof(struct ghash_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), }, }; @@ -288,7 +287,6 @@ static struct ahash_alg ghash_async_alg = { .cra_blocksize = GHASH_BLOCK_SIZE, .cra_type = &crypto_ahash_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list), .cra_init = ghash_async_init_tfm, .cra_exit = ghash_async_exit_tfm, }, diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index bccb76d8098..a3a3c0205c1 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -97,7 +97,6 @@ static struct crypto_alg alg = { .cra_ctxsize = sizeof(struct salsa20_ctx), .cra_alignmask = 3, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), .cra_u = { .blkcipher = { .setkey = setkey, diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index b36bdac237e..3f543a04cf1 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -390,7 +390,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -410,7 +409,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -430,7 +428,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -451,7 +448,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), .cra_exit = lrw_exit_tfm, .cra_u = { .blkcipher = { @@ -475,7 +471,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE * 2, @@ -496,7 +491,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -518,7 +512,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -541,7 +534,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -565,7 +557,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -590,7 +581,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index d679c8675f4..9107a9908c4 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -393,7 +393,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -413,7 +412,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -433,7 +431,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -454,7 +451,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), .cra_exit = lrw_exit_tfm, .cra_u = { .blkcipher = { @@ -478,7 +474,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE * 2, @@ -499,7 +494,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -521,7 +515,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -544,7 +537,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -568,7 +560,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -593,7 +584,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 782b67ddaf6..e7708b5442e 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -378,7 +378,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -398,7 +397,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -418,7 +416,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -439,7 +436,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list), .cra_exit = lrw_twofish_exit_tfm, .cra_u = { .blkcipher = { @@ -463,7 +459,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE * 2, @@ -484,7 +479,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -506,7 +500,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -529,7 +522,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -553,7 +545,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -578,7 +569,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c index 359ae084275..0a520230350 100644 --- a/arch/x86/crypto/twofish_glue.c +++ b/arch/x86/crypto/twofish_glue.c @@ -70,7 +70,6 @@ static struct crypto_alg alg = { .cra_ctxsize = sizeof(struct twofish_ctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), .cra_u = { .cipher = { .cia_min_keysize = TF_MIN_KEY_SIZE, diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 15f9347316c..aa3eb358b7e 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -342,7 +342,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -362,7 +361,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -383,7 +381,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -404,7 +401,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), .cra_exit = lrw_twofish_exit_tfm, .cra_u = { .blkcipher = { @@ -426,7 +422,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE * 2, -- cgit v1.2.3-70-g09d2 From 4d6d6a2c850f89bc9283d02519cb536baba72032 Mon Sep 17 00:00:00 2001 From: Johannes Goetzfried Date: Wed, 11 Jul 2012 19:37:37 +0200 Subject: crypto: cast5 - add x86_64/avx assembler implementation This patch adds a x86_64/avx assembler implementation of the Cast5 block cipher. The implementation processes sixteen blocks in parallel (four 4 block chunk AVX operations). The table-lookups are done in general-purpose registers. For small blocksizes the functions from the generic module are called. A good performance increase is provided for blocksizes greater or equal to 128B. Patch has been tested with tcrypt and automated filesystem tests. Tcrypt benchmark results: Intel Core i5-2500 CPU (fam:6, model:42, step:7) cast5-avx-x86_64 vs. cast5-generic 64bit key: size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec 16B 0.99x 0.99x 1.00x 1.00x 1.02x 1.01x 64B 1.00x 1.00x 0.98x 1.00x 1.01x 1.02x 256B 2.03x 2.01x 0.95x 2.11x 2.12x 2.13x 1024B 2.30x 2.24x 0.95x 2.29x 2.35x 2.35x 8192B 2.31x 2.27x 0.95x 2.31x 2.39x 2.39x 128bit key: size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec 16B 0.99x 0.99x 1.00x 1.00x 1.01x 1.01x 64B 1.00x 1.00x 0.98x 1.01x 1.02x 1.01x 256B 2.17x 2.13x 0.96x 2.19x 2.19x 2.19x 1024B 2.29x 2.32x 0.95x 2.34x 2.37x 2.38x 8192B 2.35x 2.32x 0.95x 2.35x 2.39x 2.39x Signed-off-by: Johannes Goetzfried Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 + arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 322 ++++++++++++++++++ arch/x86/crypto/cast5_avx_glue.c | 530 ++++++++++++++++++++++++++++++ crypto/Kconfig | 14 + crypto/testmgr.c | 60 ++++ 5 files changed, 928 insertions(+) create mode 100644 arch/x86/crypto/cast5-avx-x86_64-asm_64.S create mode 100644 arch/x86/crypto/cast5_avx_glue.c (limited to 'arch/x86') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e908e5de82d..565e82b0014 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o +obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o @@ -32,6 +33,7 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o +cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S new file mode 100644 index 00000000000..94693c877e3 --- /dev/null +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -0,0 +1,322 @@ +/* + * Cast5 Cipher 16-way parallel algorithm (AVX/x86_64) + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "cast5-avx-x86_64-asm_64.S" +.text + +.extern cast5_s1 +.extern cast5_s2 +.extern cast5_s3 +.extern cast5_s4 + +/* structure of crypto context */ +#define km 0 +#define kr (16*4) +#define rr ((16*4)+16) + +/* s-boxes */ +#define s1 cast5_s1 +#define s2 cast5_s2 +#define s3 cast5_s3 +#define s4 cast5_s4 + +/********************************************************************** + 16-way AVX cast5 + **********************************************************************/ +#define CTX %rdi + +#define RL1 %xmm0 +#define RR1 %xmm1 +#define RL2 %xmm2 +#define RR2 %xmm3 +#define RL3 %xmm4 +#define RR3 %xmm5 +#define RL4 %xmm6 +#define RR4 %xmm7 + +#define RX %xmm8 + +#define RKM %xmm9 +#define RKRF %xmm10 +#define RKRR %xmm11 + +#define RTMP %xmm12 +#define RMASK %xmm13 +#define R32 %xmm14 + +#define RID1 %rax +#define RID1b %al +#define RID2 %rbx +#define RID2b %bl + +#define RGI1 %rdx +#define RGI1bl %dl +#define RGI1bh %dh +#define RGI2 %rcx +#define RGI2bl %cl +#define RGI2bh %ch + +#define RFS1 %r8 +#define RFS1d %r8d +#define RFS2 %r9 +#define RFS2d %r9d +#define RFS3 %r10 +#define RFS3d %r10d + + +#define lookup_32bit(src, dst, op1, op2, op3) \ + movb src ## bl, RID1b; \ + movb src ## bh, RID2b; \ + movl s1(, RID1, 4), dst ## d; \ + op1 s2(, RID2, 4), dst ## d; \ + shrq $16, src; \ + movb src ## bl, RID1b; \ + movb src ## bh, RID2b; \ + op2 s3(, RID1, 4), dst ## d; \ + op3 s4(, RID2, 4), dst ## d; + +#define F(a, x, op0, op1, op2, op3) \ + op0 a, RKM, x; \ + vpslld RKRF, x, RTMP; \ + vpsrld RKRR, x, x; \ + vpor RTMP, x, x; \ + \ + vpshufb RMASK, x, x; \ + vmovq x, RGI1; \ + vpsrldq $8, x, x; \ + vmovq x, RGI2; \ + \ + lookup_32bit(RGI1, RFS1, op1, op2, op3); \ + shrq $16, RGI1; \ + lookup_32bit(RGI1, RFS2, op1, op2, op3); \ + shlq $32, RFS2; \ + orq RFS1, RFS2; \ + \ + lookup_32bit(RGI2, RFS1, op1, op2, op3); \ + shrq $16, RGI2; \ + lookup_32bit(RGI2, RFS3, op1, op2, op3); \ + shlq $32, RFS3; \ + orq RFS1, RFS3; \ + \ + vmovq RFS2, x; \ + vpinsrq $1, RFS3, x, x; + +#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl) +#define F2(b, x) F(b, x, vpxor, subl, addl, xorl) +#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl) + +#define subround(a, b, x, n, f) \ + F ## f(b, x); \ + vpxor a, x, a; + +#define round(l, r, n, f) \ + vbroadcastss (km+(4*n))(CTX), RKM; \ + vpinsrb $0, (kr+n)(CTX), RKRF, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + subround(l ## 1, r ## 1, RX, n, f); \ + subround(l ## 2, r ## 2, RX, n, f); \ + subround(l ## 3, r ## 3, RX, n, f); \ + subround(l ## 4, r ## 4, RX, n, f); + + +#define transpose_2x4(x0, x1, t0, t1) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t1; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; + +#define inpack_blocks(in, x0, x1, t0, t1) \ + vmovdqu (0*4*4)(in), x0; \ + vmovdqu (1*4*4)(in), x1; \ + vpshufb RMASK, x0, x0; \ + vpshufb RMASK, x1, x1; \ + \ + transpose_2x4(x0, x1, t0, t1) + +#define outunpack_blocks(out, x0, x1, t0, t1) \ + transpose_2x4(x0, x1, t0, t1) \ + \ + vpshufb RMASK, x0, x0; \ + vpshufb RMASK, x1, x1; \ + vmovdqu x0, (0*4*4)(out); \ + vmovdqu x1, (1*4*4)(out); + +#define outunpack_xor_blocks(out, x0, x1, t0, t1) \ + transpose_2x4(x0, x1, t0, t1) \ + \ + vpshufb RMASK, x0, x0; \ + vpshufb RMASK, x1, x1; \ + vpxor (0*4*4)(out), x0, x0; \ + vmovdqu x0, (0*4*4)(out); \ + vpxor (1*4*4)(out), x1, x1; \ + vmovdqu x1, (1*4*4)(out); + +.align 16 +.Lbswap_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.L32_mask: + .byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0 + +.align 16 +.global __cast5_enc_blk_16way +.type __cast5_enc_blk_16way,@function; + +__cast5_enc_blk_16way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + + pushq %rbx; + pushq %rcx; + + vmovdqu .Lbswap_mask, RMASK; + vmovdqu .L32_mask, R32; + vpxor RKRF, RKRF, RKRF; + + inpack_blocks(%rdx, RL1, RR1, RTMP, RX); + leaq (2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL2, RR2, RTMP, RX); + leaq (2*4*4)(%rax), %rax; + inpack_blocks(%rax, RL3, RR3, RTMP, RX); + leaq (2*4*4)(%rax), %rax; + inpack_blocks(%rax, RL4, RR4, RTMP, RX); + + xorq RID1, RID1; + xorq RID2, RID2; + + round(RL, RR, 0, 1); + round(RR, RL, 1, 2); + round(RL, RR, 2, 3); + round(RR, RL, 3, 1); + round(RL, RR, 4, 2); + round(RR, RL, 5, 3); + round(RL, RR, 6, 1); + round(RR, RL, 7, 2); + round(RL, RR, 8, 3); + round(RR, RL, 9, 1); + round(RL, RR, 10, 2); + round(RR, RL, 11, 3); + + movb rr(CTX), %al; + testb %al, %al; + jnz __skip_enc; + + round(RL, RR, 12, 1); + round(RR, RL, 13, 2); + round(RL, RR, 14, 3); + round(RR, RL, 15, 1); + +__skip_enc: + popq %rcx; + popq %rbx; + + testb %cl, %cl; + jnz __enc_xor16; + + outunpack_blocks(%rsi, RR1, RL1, RTMP, RX); + leaq (2*4*4)(%rsi), %rax; + outunpack_blocks(%rax, RR2, RL2, RTMP, RX); + leaq (2*4*4)(%rax), %rax; + outunpack_blocks(%rax, RR3, RL3, RTMP, RX); + leaq (2*4*4)(%rax), %rax; + outunpack_blocks(%rax, RR4, RL4, RTMP, RX); + + ret; + +__enc_xor16: + outunpack_xor_blocks(%rsi, RR1, RL1, RTMP, RX); + leaq (2*4*4)(%rsi), %rax; + outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX); + leaq (2*4*4)(%rax), %rax; + outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX); + leaq (2*4*4)(%rax), %rax; + outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX); + + ret; + +.align 16 +.global cast5_dec_blk_16way +.type cast5_dec_blk_16way,@function; + +cast5_dec_blk_16way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + pushq %rbx; + + vmovdqu .Lbswap_mask, RMASK; + vmovdqu .L32_mask, R32; + vpxor RKRF, RKRF, RKRF; + + inpack_blocks(%rdx, RL1, RR1, RTMP, RX); + leaq (2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL2, RR2, RTMP, RX); + leaq (2*4*4)(%rax), %rax; + inpack_blocks(%rax, RL3, RR3, RTMP, RX); + leaq (2*4*4)(%rax), %rax; + inpack_blocks(%rax, RL4, RR4, RTMP, RX); + + xorq RID1, RID1; + xorq RID2, RID2; + + movb rr(CTX), %al; + testb %al, %al; + jnz __skip_dec; + + round(RL, RR, 15, 1); + round(RR, RL, 14, 3); + round(RL, RR, 13, 2); + round(RR, RL, 12, 1); + +__skip_dec: + round(RL, RR, 11, 3); + round(RR, RL, 10, 2); + round(RL, RR, 9, 1); + round(RR, RL, 8, 3); + round(RL, RR, 7, 2); + round(RR, RL, 6, 1); + round(RL, RR, 5, 3); + round(RR, RL, 4, 2); + round(RL, RR, 3, 1); + round(RR, RL, 2, 3); + round(RL, RR, 1, 2); + round(RR, RL, 0, 1); + + popq %rbx; + + outunpack_blocks(%rsi, RR1, RL1, RTMP, RX); + leaq (2*4*4)(%rsi), %rax; + outunpack_blocks(%rax, RR2, RL2, RTMP, RX); + leaq (2*4*4)(%rax), %rax; + outunpack_blocks(%rax, RR3, RL3, RTMP, RX); + leaq (2*4*4)(%rax), %rax; + outunpack_blocks(%rax, RR4, RL4, RTMP, RX); + + ret; diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c new file mode 100644 index 00000000000..445aab06387 --- /dev/null +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -0,0 +1,530 @@ +/* + * Glue Code for the AVX assembler implemention of the Cast5 Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CAST5_PARALLEL_BLOCKS 16 + +asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst, + const u8 *src) +{ + __cast5_enc_blk_16way(ctx, dst, src, false); +} + +static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst, + const u8 *src) +{ + __cast5_enc_blk_16way(ctx, dst, src, true); +} + +static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst, + const u8 *src) +{ + cast5_dec_blk_16way(ctx, dst, src); +} + + +static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) +{ + return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, + NULL, fpu_enabled, nbytes); +} + +static inline void cast5_fpu_end(bool fpu_enabled) +{ + return glue_fpu_end(fpu_enabled); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + bool enc) +{ + bool fpu_enabled = false; + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = CAST5_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + + /* Process multi-block batch */ + if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + do { + if (enc) + cast5_enc_blk_xway(ctx, wdst, wsrc); + else + cast5_dec_blk_xway(ctx, wdst, wsrc); + + wsrc += bsize * CAST5_PARALLEL_BLOCKS; + wdst += bsize * CAST5_PARALLEL_BLOCKS; + nbytes -= bsize * CAST5_PARALLEL_BLOCKS; + } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (enc) + __cast5_encrypt(ctx, wdst, wsrc); + else + __cast5_decrypt(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + cast5_fpu_end(fpu_enabled); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, false); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = CAST5_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 *iv = (u64 *)walk->iv; + + do { + *dst = *src ^ *iv; + __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u64 *)walk->iv ^= *iv; + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = CAST5_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ivs[CAST5_PARALLEL_BLOCKS - 1]; + u64 last_iv; + int i; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process multi-block batch */ + if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + do { + nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); + src -= CAST5_PARALLEL_BLOCKS - 1; + dst -= CAST5_PARALLEL_BLOCKS - 1; + + for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++) + ivs[i] = src[i]; + + cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + + for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++) + *(dst + (i + 1)) ^= *(ivs + i); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } + +done: + *dst ^= *(u64 *)walk->iv; + *(u64 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes)) { + fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + cast5_fpu_end(fpu_enabled); + return err; +} + +static void ctr_crypt_final(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 *ctrblk = walk->iv; + u8 keystream[CAST5_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + __cast5_encrypt(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, CAST5_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = CAST5_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); + __be64 ctrblocks[CAST5_PARALLEL_BLOCKS]; + int i; + + /* Process multi-block batch */ + if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + do { + /* create ctrblks for parallel encrypt */ + for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; + + ctrblocks[i] = cpu_to_be64(ctrblk++); + } + + cast5_enc_blk_xway_xor(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += CAST5_PARALLEL_BLOCKS; + dst += CAST5_PARALLEL_BLOCKS; + nbytes -= bsize * CAST5_PARALLEL_BLOCKS; + } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + ctrblocks[0] = cpu_to_be64(ctrblk++); + + __cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); + *dst ^= ctrblocks[0]; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + *(__be64 *)walk->iv = cpu_to_be64(ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { + fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + cast5_fpu_end(fpu_enabled); + + if (walk.nbytes) { + ctr_crypt_final(desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + + +static struct crypto_alg cast5_algs[6] = { { + .cra_name = "__ecb-cast5-avx", + .cra_driver_name = "__driver-ecb-cast5-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST5_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast5_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = cast5_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "__cbc-cast5-avx", + .cra_driver_name = "__driver-cbc-cast5-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST5_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast5_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = cast5_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "__ctr-cast5-avx", + .cra_driver_name = "__driver-ctr-cast5-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct cast5_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = cast5_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "ecb(cast5)", + .cra_driver_name = "ecb-cast5-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST5_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "cbc(cast5)", + .cra_driver_name = "cbc-cast5-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST5_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "ctr(cast5)", + .cra_driver_name = "ctr-cast5-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_encrypt, + .geniv = "chainiv", + }, + }, +} }; + +static int __init cast5_init(void) +{ + u64 xcr0; + + if (!cpu_has_avx || !cpu_has_osxsave) { + pr_info("AVX instructions are not detected.\n"); + return -ENODEV; + } + + xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { + pr_info("AVX detected but unusable.\n"); + return -ENODEV; + } + + return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); +} + +static void __exit cast5_exit(void) +{ + crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); +} + +module_init(cast5_init); +module_exit(cast5_exit); + +MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("cast5"); diff --git a/crypto/Kconfig b/crypto/Kconfig index a3238051b03..cda97fcaa82 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -692,6 +692,20 @@ config CRYPTO_CAST5 The CAST5 encryption algorithm (synonymous with CAST-128) is described in RFC2144. +config CRYPTO_CAST5_AVX_X86_64 + tristate "CAST5 (CAST-128) cipher algorithm (x86_64/AVX)" + depends on X86 && 64BIT + select CRYPTO_ALGAPI + select CRYPTO_CRYPTD + select CRYPTO_ABLK_HELPER_X86 + select CRYPTO_CAST5 + help + The CAST5 encryption algorithm (synonymous with CAST-128) is + described in RFC2144. + + This module provides the Cast5 cipher algorithm that processes + sixteen blocks parallel using the AVX instruction set. + config CRYPTO_CAST6 tristate "CAST6 (CAST-256) cipher algorithm" select CRYPTO_ALGAPI diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 7a91e540563..def0f430b66 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1534,6 +1534,21 @@ static int alg_test_null(const struct alg_test_desc *desc, /* Please keep this list sorted by algorithm name. */ static const struct alg_test_desc alg_test_descs[] = { { + .alg = "__cbc-cast5-avx", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } + }, { .alg = "__cbc-serpent-avx", .test = alg_test_null, .suite = { @@ -1594,6 +1609,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } } + }, { + .alg = "__driver-cbc-cast5-avx", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } }, { .alg = "__driver-cbc-serpent-avx", .test = alg_test_null, @@ -1655,6 +1685,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } } + }, { + .alg = "__driver-ecb-cast5-avx", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } }, { .alg = "__driver-ecb-serpent-avx", .test = alg_test_null, @@ -1951,6 +1996,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } } + }, { + .alg = "cryptd(__driver-ecb-cast5-avx)", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } }, { .alg = "cryptd(__driver-ecb-serpent-avx)", .test = alg_test_null, -- cgit v1.2.3-70-g09d2 From 4ea1277d301eb776e321684cd4ea95116b4e8847 Mon Sep 17 00:00:00 2001 From: Johannes Goetzfried Date: Wed, 11 Jul 2012 19:38:57 +0200 Subject: crypto: cast6 - add x86_64/avx assembler implementation This patch adds a x86_64/avx assembler implementation of the Cast6 block cipher. The implementation processes eight blocks in parallel (two 4 block chunk AVX operations). The table-lookups are done in general-purpose registers. For small blocksizes the functions from the generic module are called. A good performance increase is provided for blocksizes greater or equal to 128B. Patch has been tested with tcrypt and automated filesystem tests. Tcrypt benchmark results: Intel Core i5-2500 CPU (fam:6, model:42, step:7) cast6-avx-x86_64 vs. cast6-generic 128bit key: (lrw:256bit) (xts:256bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 0.97x 1.00x 1.01x 1.01x 0.99x 0.97x 0.98x 1.01x 0.96x 0.98x 64B 0.98x 0.99x 1.02x 1.01x 0.99x 1.00x 1.01x 0.99x 1.00x 0.99x 256B 1.77x 1.84x 0.99x 1.85x 1.77x 1.77x 1.70x 1.74x 1.69x 1.72x 1024B 1.93x 1.95x 0.99x 1.96x 1.93x 1.93x 1.84x 1.85x 1.89x 1.87x 8192B 1.91x 1.95x 0.99x 1.97x 1.95x 1.91x 1.86x 1.87x 1.93x 1.90x 256bit key: (lrw:384bit) (xts:512bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 0.97x 0.99x 1.02x 1.01x 0.98x 0.99x 1.00x 1.00x 0.98x 0.98x 64B 0.98x 0.99x 1.01x 1.00x 1.00x 1.00x 1.01x 1.01x 0.97x 1.00x 256B 1.77x 1.83x 1.00x 1.86x 1.79x 1.78x 1.70x 1.76x 1.71x 1.69x 1024B 1.92x 1.95x 0.99x 1.96x 1.93x 1.93x 1.83x 1.86x 1.89x 1.87x 8192B 1.94x 1.95x 0.99x 1.97x 1.95x 1.95x 1.87x 1.87x 1.93x 1.91x Signed-off-by: Johannes Goetzfried Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 + arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 335 +++++++++++++++ arch/x86/crypto/cast6_avx_glue.c | 648 ++++++++++++++++++++++++++++++ crypto/Kconfig | 17 + crypto/testmgr.c | 60 +++ 5 files changed, 1062 insertions(+) create mode 100644 arch/x86/crypto/cast6-avx-x86_64-asm_64.S create mode 100644 arch/x86/crypto/cast6_avx_glue.c (limited to 'arch/x86') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 565e82b0014..5bacb4a226a 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o +obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o @@ -34,6 +35,7 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o +cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S new file mode 100644 index 00000000000..d258ce0d2e0 --- /dev/null +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -0,0 +1,335 @@ +/* + * Cast6 Cipher 8-way parallel algorithm (AVX/x86_64) + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "cast6-avx-x86_64-asm_64.S" +.text + +.extern cast6_s1 +.extern cast6_s2 +.extern cast6_s3 +.extern cast6_s4 + +/* structure of crypto context */ +#define km 0 +#define kr (12*4*4) + +/* s-boxes */ +#define s1 cast6_s1 +#define s2 cast6_s2 +#define s3 cast6_s3 +#define s4 cast6_s4 + +/********************************************************************** + 8-way AVX cast6 + **********************************************************************/ +#define CTX %rdi + +#define RA1 %xmm0 +#define RB1 %xmm1 +#define RC1 %xmm2 +#define RD1 %xmm3 + +#define RA2 %xmm4 +#define RB2 %xmm5 +#define RC2 %xmm6 +#define RD2 %xmm7 + +#define RX %xmm8 + +#define RKM %xmm9 +#define RKRF %xmm10 +#define RKRR %xmm11 + +#define RTMP %xmm12 +#define RMASK %xmm13 +#define R32 %xmm14 + +#define RID1 %rax +#define RID1b %al +#define RID2 %rbx +#define RID2b %bl + +#define RGI1 %rdx +#define RGI1bl %dl +#define RGI1bh %dh +#define RGI2 %rcx +#define RGI2bl %cl +#define RGI2bh %ch + +#define RFS1 %r8 +#define RFS1d %r8d +#define RFS2 %r9 +#define RFS2d %r9d +#define RFS3 %r10 +#define RFS3d %r10d + + +#define lookup_32bit(src, dst, op1, op2, op3) \ + movb src ## bl, RID1b; \ + movb src ## bh, RID2b; \ + movl s1(, RID1, 4), dst ## d; \ + op1 s2(, RID2, 4), dst ## d; \ + shrq $16, src; \ + movb src ## bl, RID1b; \ + movb src ## bh, RID2b; \ + op2 s3(, RID1, 4), dst ## d; \ + op3 s4(, RID2, 4), dst ## d; + +#define F(a, x, op0, op1, op2, op3) \ + op0 a, RKM, x; \ + vpslld RKRF, x, RTMP; \ + vpsrld RKRR, x, x; \ + vpor RTMP, x, x; \ + \ + vpshufb RMASK, x, x; \ + vmovq x, RGI1; \ + vpsrldq $8, x, x; \ + vmovq x, RGI2; \ + \ + lookup_32bit(RGI1, RFS1, op1, op2, op3); \ + shrq $16, RGI1; \ + lookup_32bit(RGI1, RFS2, op1, op2, op3); \ + shlq $32, RFS2; \ + orq RFS1, RFS2; \ + \ + lookup_32bit(RGI2, RFS1, op1, op2, op3); \ + shrq $16, RGI2; \ + lookup_32bit(RGI2, RFS3, op1, op2, op3); \ + shlq $32, RFS3; \ + orq RFS1, RFS3; \ + \ + vmovq RFS2, x; \ + vpinsrq $1, RFS3, x, x; + +#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl) +#define F2(b, x) F(b, x, vpxor, subl, addl, xorl) +#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl) + +#define qop(in, out, x, f) \ + F ## f(in ## 1, x); \ + vpxor out ## 1, x, out ## 1; \ + F ## f(in ## 2, x); \ + vpxor out ## 2, x, out ## 2; \ + +#define Q(n) \ + vbroadcastss (km+(4*(4*n+0)))(CTX), RKM; \ + vpinsrb $0, (kr+(4*n+0))(CTX), RKRF, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + qop(RD, RC, RX, 1); \ + \ + vbroadcastss (km+(4*(4*n+1)))(CTX), RKM; \ + vpinsrb $0, (kr+(4*n+1))(CTX), RKRF, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + qop(RC, RB, RX, 2); \ + \ + vbroadcastss (km+(4*(4*n+2)))(CTX), RKM; \ + vpinsrb $0, (kr+(4*n+2))(CTX), RKRF, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + qop(RB, RA, RX, 3); \ + \ + vbroadcastss (km+(4*(4*n+3)))(CTX), RKM; \ + vpinsrb $0, (kr+(4*n+3))(CTX), RKRF, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + qop(RA, RD, RX, 1); + +#define QBAR(n) \ + vbroadcastss (km+(4*(4*n+3)))(CTX), RKM; \ + vpinsrb $0, (kr+(4*n+3))(CTX), RKRF, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + qop(RA, RD, RX, 1); \ + \ + vbroadcastss (km+(4*(4*n+2)))(CTX), RKM; \ + vpinsrb $0, (kr+(4*n+2))(CTX), RKRF, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + qop(RB, RA, RX, 3); \ + \ + vbroadcastss (km+(4*(4*n+1)))(CTX), RKM; \ + vpinsrb $0, (kr+(4*n+1))(CTX), RKRF, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + qop(RC, RB, RX, 2); \ + \ + vbroadcastss (km+(4*(4*n+0)))(CTX), RKM; \ + vpinsrb $0, (kr+(4*n+0))(CTX), RKRF, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + qop(RD, RC, RX, 1); + + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x3; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; \ + vpunpcklqdq x3, t2, x2; \ + vpunpckhqdq x3, t2, x3; + +#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ + vmovdqu (0*4*4)(in), x0; \ + vmovdqu (1*4*4)(in), x1; \ + vmovdqu (2*4*4)(in), x2; \ + vmovdqu (3*4*4)(in), x3; \ + vpshufb RMASK, x0, x0; \ + vpshufb RMASK, x1, x1; \ + vpshufb RMASK, x2, x2; \ + vpshufb RMASK, x3, x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpshufb RMASK, x0, x0; \ + vpshufb RMASK, x1, x1; \ + vpshufb RMASK, x2, x2; \ + vpshufb RMASK, x3, x3; \ + vmovdqu x0, (0*4*4)(out); \ + vmovdqu x1, (1*4*4)(out); \ + vmovdqu x2, (2*4*4)(out); \ + vmovdqu x3, (3*4*4)(out); + +#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpshufb RMASK, x0, x0; \ + vpshufb RMASK, x1, x1; \ + vpshufb RMASK, x2, x2; \ + vpshufb RMASK, x3, x3; \ + vpxor (0*4*4)(out), x0, x0; \ + vmovdqu x0, (0*4*4)(out); \ + vpxor (1*4*4)(out), x1, x1; \ + vmovdqu x1, (1*4*4)(out); \ + vpxor (2*4*4)(out), x2, x2; \ + vmovdqu x2, (2*4*4)(out); \ + vpxor (3*4*4)(out), x3, x3; \ + vmovdqu x3, (3*4*4)(out); + +.align 16 +.Lbswap_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.L32_mask: + .byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0 + +.align 16 +.global __cast6_enc_blk_8way +.type __cast6_enc_blk_8way,@function; + +__cast6_enc_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + + pushq %rbx; + pushq %rcx; + + vmovdqu .Lbswap_mask, RMASK; + vmovdqu .L32_mask, R32; + vpxor RKRF, RKRF, RKRF; + + leaq (4*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM); + + xorq RID1, RID1; + xorq RID2, RID2; + + Q(0); + Q(1); + Q(2); + Q(3); + Q(4); + Q(5); + QBAR(6); + QBAR(7); + QBAR(8); + QBAR(9); + QBAR(10); + QBAR(11); + + popq %rcx; + popq %rbx; + + leaq (4*4*4)(%rsi), %rax; + + testb %cl, %cl; + jnz __enc_xor8; + + outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM); + + ret; + +__enc_xor8: + outunpack_xor_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM); + outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM); + + ret; + +.align 16 +.global cast6_dec_blk_8way +.type cast6_dec_blk_8way,@function; + +cast6_dec_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + pushq %rbx; + + vmovdqu .Lbswap_mask, RMASK; + vmovdqu .L32_mask, R32; + vpxor RKRF, RKRF, RKRF; + + leaq (4*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM); + + xorq RID1, RID1; + xorq RID2, RID2; + + Q(11); + Q(10); + Q(9); + Q(8); + Q(7); + Q(6); + QBAR(5); + QBAR(4); + QBAR(3); + QBAR(2); + QBAR(1); + QBAR(0); + + popq %rbx; + + leaq (4*4*4)(%rsi), %rax; + outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM); + + ret; diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c new file mode 100644 index 00000000000..15e5f85a501 --- /dev/null +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -0,0 +1,648 @@ +/* + * Glue Code for the AVX assembler implemention of the Cast6 Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CAST6_PARALLEL_BLOCKS 8 + +asmlinkage void __cast6_enc_blk_8way(struct cast6_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void cast6_dec_blk_8way(struct cast6_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void cast6_enc_blk_xway(struct cast6_ctx *ctx, u8 *dst, + const u8 *src) +{ + __cast6_enc_blk_8way(ctx, dst, src, false); +} + +static inline void cast6_enc_blk_xway_xor(struct cast6_ctx *ctx, u8 *dst, + const u8 *src) +{ + __cast6_enc_blk_8way(ctx, dst, src, true); +} + +static inline void cast6_dec_blk_xway(struct cast6_ctx *ctx, u8 *dst, + const u8 *src) +{ + cast6_dec_blk_8way(ctx, dst, src); +} + + +static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) +{ + u128 ivs[CAST6_PARALLEL_BLOCKS - 1]; + unsigned int j; + + for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++) + ivs[j] = src[j]; + + cast6_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + + for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++) + u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); +} + +static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) +{ + be128 ctrblk; + + u128_to_be128(&ctrblk, iv); + u128_inc(iv); + + __cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); + u128_xor(dst, src, (u128 *)&ctrblk); +} + +static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, + u128 *iv) +{ + be128 ctrblks[CAST6_PARALLEL_BLOCKS]; + unsigned int i; + + for (i = 0; i < CAST6_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; + + u128_to_be128(&ctrblks[i], iv); + u128_inc(iv); + } + + cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); +} + +static const struct common_glue_ctx cast6_enc = { + .num_funcs = 2, + .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = CAST6_PARALLEL_BLOCKS, + .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_enc_blk_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) } + } } +}; + +static const struct common_glue_ctx cast6_ctr = { + .num_funcs = 2, + .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = CAST6_PARALLEL_BLOCKS, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) } + } } +}; + +static const struct common_glue_ctx cast6_dec = { + .num_funcs = 2, + .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = CAST6_PARALLEL_BLOCKS, + .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_dec_blk_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) } + } } +}; + +static const struct common_glue_ctx cast6_dec_cbc = { + .num_funcs = 2, + .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = CAST6_PARALLEL_BLOCKS, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_decrypt_cbc_xway) } + }, { + .num_blocks = 1, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) } + } } +}; + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes); +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc, + dst, src, nbytes); +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src, + nbytes); +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes); +} + +static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes) +{ + return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS, + NULL, fpu_enabled, nbytes); +} + +static inline void cast6_fpu_end(bool fpu_enabled) +{ + glue_fpu_end(fpu_enabled); +} + +struct crypt_priv { + struct cast6_ctx *ctx; + bool fpu_enabled; +}; + +static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = CAST6_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { + cast6_enc_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + __cast6_encrypt(ctx->ctx, srcdst, srcdst); +} + +static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = CAST6_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { + cast6_dec_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + __cast6_decrypt(ctx->ctx, srcdst, srcdst); +} + +struct cast6_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct cast6_ctx cast6_ctx; +}; + +static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE, + &tfm->crt_flags); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[CAST6_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->cast6_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + cast6_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[CAST6_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->cast6_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + cast6_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static void lrw_exit_tfm(struct crypto_tfm *tfm) +{ + struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +struct cast6_xts_ctx { + struct cast6_ctx tweak_ctx; + struct cast6_ctx crypt_ctx; +}; + +static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, + flags); +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[CAST6_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + cast6_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[CAST6_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + cast6_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static struct crypto_alg cast6_algs[10] = { { + .cra_name = "__ecb-cast6-avx", + .cra_driver_name = "__driver-ecb-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast6_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = cast6_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "__cbc-cast6-avx", + .cra_driver_name = "__driver-cbc-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast6_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = cast6_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "__ctr-cast6-avx", + .cra_driver_name = "__driver-ctr-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct cast6_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = cast6_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "__lrw-cast6-avx", + .cra_driver_name = "__driver-lrw-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast6_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE + + CAST6_BLOCK_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE + + CAST6_BLOCK_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = lrw_cast6_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "__xts-cast6-avx", + .cra_driver_name = "__driver-xts-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast6_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE * 2, + .max_keysize = CAST6_MAX_KEY_SIZE * 2, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = xts_cast6_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +}, { + .cra_name = "ecb(cast6)", + .cra_driver_name = "ecb-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "cbc(cast6)", + .cra_driver_name = "cbc-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "ctr(cast6)", + .cra_driver_name = "ctr-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_encrypt, + .geniv = "chainiv", + }, + }, +}, { + .cra_name = "lrw(cast6)", + .cra_driver_name = "lrw-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE + + CAST6_BLOCK_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE + + CAST6_BLOCK_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "xts(cast6)", + .cra_driver_name = "xts-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE * 2, + .max_keysize = CAST6_MAX_KEY_SIZE * 2, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +} }; + +static int __init cast6_init(void) +{ + u64 xcr0; + + if (!cpu_has_avx || !cpu_has_osxsave) { + pr_info("AVX instructions are not detected.\n"); + return -ENODEV; + } + + xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { + pr_info("AVX detected but unusable.\n"); + return -ENODEV; + } + + return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); +} + +static void __exit cast6_exit(void) +{ + crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); +} + +module_init(cast6_init); +module_exit(cast6_exit); + +MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("cast6"); diff --git a/crypto/Kconfig b/crypto/Kconfig index cda97fcaa82..fe8ed62efe2 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -713,6 +713,23 @@ config CRYPTO_CAST6 The CAST6 encryption algorithm (synonymous with CAST-256) is described in RFC2612. +config CRYPTO_CAST6_AVX_X86_64 + tristate "CAST6 (CAST-256) cipher algorithm (x86_64/AVX)" + depends on X86 && 64BIT + select CRYPTO_ALGAPI + select CRYPTO_CRYPTD + select CRYPTO_ABLK_HELPER_X86 + select CRYPTO_GLUE_HELPER_X86 + select CRYPTO_CAST6 + select CRYPTO_LRW + select CRYPTO_XTS + help + The CAST6 encryption algorithm (synonymous with CAST-256) is + described in RFC2612. + + This module provides the Cast6 cipher algorithm that processes + eight blocks parallel using the AVX instruction set. + config CRYPTO_DES tristate "DES and Triple DES EDE cipher algorithms" select CRYPTO_ALGAPI diff --git a/crypto/testmgr.c b/crypto/testmgr.c index cff3c1c3f83..575b57c3244 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1548,6 +1548,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } } + }, { + .alg = "__cbc-cast6-avx", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } }, { .alg = "__cbc-serpent-avx", .test = alg_test_null, @@ -1624,6 +1639,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } } + }, { + .alg = "__driver-cbc-cast6-avx", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } }, { .alg = "__driver-cbc-serpent-avx", .test = alg_test_null, @@ -1700,6 +1730,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } } + }, { + .alg = "__driver-ecb-cast6-avx", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } }, { .alg = "__driver-ecb-serpent-avx", .test = alg_test_null, @@ -2026,6 +2071,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } } + }, { + .alg = "cryptd(__driver-ecb-cast6-avx)", + .test = alg_test_null, + .suite = { + .cipher = { + .enc = { + .vecs = NULL, + .count = 0 + }, + .dec = { + .vecs = NULL, + .count = 0 + } + } + } }, { .alg = "cryptd(__driver-ecb-serpent-avx)", .test = alg_test_null, -- cgit v1.2.3-70-g09d2 From e115676e042f4d9268c6b6d8cb7dc962aa6cfd7d Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 1 Aug 2012 17:01:42 +0300 Subject: KVM: x86: update KVM_SAVE_MSRS_BEGIN to correct value When MSR_KVM_PV_EOI_EN was added to msrs_to_save array KVM_SAVE_MSRS_BEGIN was not updated accordingly. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3a53bcc24f2..a87c82aa319 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -806,7 +806,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); * kvm-specific. Those are put in the beginning of the list. */ -#define KVM_SAVE_MSRS_BEGIN 9 +#define KVM_SAVE_MSRS_BEGIN 10 static u32 msrs_to_save[] = { MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, -- cgit v1.2.3-70-g09d2 From ea22571c8fd912f28e2525f7112bbb84b474ff3a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Jul 2012 13:59:37 -0400 Subject: x86: mce: Disable preemption when calling raise_local() raise_mce() has a code path which does not disable preemption when the raise_local() is called. The per cpu variable access in raise_local() depends on preemption being disabled to be functional. So that code path was either never tested or never tested with CONFIG_DEBUG_PREEMPT enabled. Add the missing preempt_disable/enable() pair around the call. Signed-off-by: Thomas Gleixner Signed-off-by: Chen Gong Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce-inject.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index fc4beb39357..753746f6dbd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -194,7 +194,11 @@ static void raise_mce(struct mce *m) put_online_cpus(); } else #endif + { + preempt_disable(); raise_local(); + preempt_enable(); + } } /* Error injection interface */ -- cgit v1.2.3-70-g09d2 From b5975917a3e5f93b5d1c95561aab0aa44327baea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Jul 2012 13:59:38 -0400 Subject: x86: mce: Serialize mce injection raise_mce() fiddles with global state, but lacks any kind of serialization. Add a mutex around the raise_mce() call, so concurrent writers do not stomp on each other toes. Signed-off-by: Thomas Gleixner Signed-off-by: Chen Gong Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce-inject.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 753746f6dbd..ddc72f83933 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) } static cpumask_var_t mce_inject_cpumask; +static DEFINE_MUTEX(mce_inject_mutex); static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) { @@ -229,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, * so do it a jiffie or two later everywhere. */ schedule_timeout(2); + + mutex_lock(&mce_inject_mutex); raise_mce(&m); + mutex_unlock(&mce_inject_mutex); return usize; } -- cgit v1.2.3-70-g09d2 From 26c3c283c5b08dd250279c06ba3ab5b094bbacc3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Jul 2012 13:59:39 -0400 Subject: x86: mce: Split timer init Split timer init function into the init and the start part, so the start part can replace the open coded version in CPU_DOWN_FAILED. Signed-off-by: Thomas Gleixner Signed-off-by: Chen Gong Acked-by: Borislav Petkov Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5e095f873e3..bd3a5e850f4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1557,23 +1557,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) } } -static void __mcheck_cpu_init_timer(void) +static void mce_start_timer(unsigned int cpu, struct timer_list *t) { - struct timer_list *t = &__get_cpu_var(mce_timer); unsigned long iv = check_interval * HZ; - setup_timer(t, mce_timer_fn, smp_processor_id()); + __this_cpu_write(mce_next_interval, iv); - if (mce_ignore_ce) + if (mce_ignore_ce || !iv) return; - __this_cpu_write(mce_next_interval, iv); - if (!iv) - return; t->expires = round_jiffies(jiffies + iv); add_timer_on(t, smp_processor_id()); } +static void __mcheck_cpu_init_timer(void) +{ + struct timer_list *t = &__get_cpu_var(mce_timer); + unsigned int cpu = smp_processor_id(); + + setup_timer(t, mce_timer_fn, cpu); + mce_start_timer(cpu, t); +} + /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) { @@ -2277,12 +2282,8 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - if (!mce_ignore_ce && check_interval) { - t->expires = round_jiffies(jiffies + - per_cpu(mce_next_interval, cpu)); - add_timer_on(t, cpu); - } smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); + mce_start_timer(cpu, t); break; case CPU_POST_DEAD: /* intentionally ignoring frozen here */ -- cgit v1.2.3-70-g09d2 From 1a65f970d10ace7a1e399f9061a65679c0ae57d0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Jul 2012 13:59:40 -0400 Subject: x86: mce: Remove the frozen cases in the hotplug code No point in having double cases if we can simply mask the FROZEN bit out. Signed-off-by: Thomas Gleixner Signed-off-by: Chen Gong Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index bd3a5e850f4..b4dde1527ed 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2262,34 +2262,32 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) unsigned int cpu = (unsigned long)hcpu; struct timer_list *t = &per_cpu(mce_timer, cpu); - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: mce_device_create(cpu); if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); break; case CPU_DEAD: - case CPU_DEAD_FROZEN: if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); mce_device_remove(cpu); break; case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: del_timer_sync(t); smp_call_function_single(cpu, mce_disable_cpu, &action, 1); break; case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); mce_start_timer(cpu, t); break; - case CPU_POST_DEAD: + } + + if (action == CPU_POST_DEAD) { /* intentionally ignoring frozen here */ cmci_rediscover(cpu); - break; } + return NOTIFY_OK; } -- cgit v1.2.3-70-g09d2 From aab2eb7a38e0e510874acca01838f5badbca6c7e Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 1 Aug 2012 18:01:10 +0900 Subject: KVM: Stop checking rmap to see if slot is being created Instead, check npages consistently. This helps to make rmap architecture specific in a later patch. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3ca90d74711..abc039d7842 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6385,7 +6385,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, *x86 needs to handle !user_alloc case. */ if (!user_alloc) { - if (npages && !old.rmap) { + if (npages && !old.npages) { unsigned long userspace_addr; userspace_addr = vm_mmap(NULL, 0, @@ -6413,7 +6413,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; - if (!user_alloc && !old.user_alloc && old.rmap && !npages) { + if (!user_alloc && !old.user_alloc && old.npages && !npages) { int ret; ret = vm_munmap(old.userspace_addr, -- cgit v1.2.3-70-g09d2 From 65fbe37c42ed75604c9a770639209dcee162ebe7 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 1 Aug 2012 18:02:01 +0900 Subject: KVM: MMU: Use gfn_to_rmap() instead of directly reading rmap array This helps to make rmap architecture specific in a later patch. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 3 ++- arch/x86/kvm/mmu_audit.c | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a9a20528e70..ee768bb2367 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1181,7 +1181,8 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, unsigned long *rmapp; while (mask) { - rmapp = &slot->rmap[gfn_offset + __ffs(mask)]; + rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), + PT_PAGE_TABLE_LEVEL, slot); __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false); /* clear the first set bit */ diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 7d7d0b9e23e..ca403f9bb0f 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -190,7 +190,6 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) { - struct kvm_memory_slot *slot; unsigned long *rmapp; u64 *sptep; struct rmap_iterator iter; @@ -198,8 +197,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) if (sp->role.direct || sp->unsync || sp->role.invalid) return; - slot = gfn_to_memslot(kvm, sp->gfn); - rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; + rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL); for (sptep = rmap_get_first(*rmapp, &iter); sptep; sptep = rmap_get_next(&iter)) { -- cgit v1.2.3-70-g09d2 From d89cc617b954aff4030fce178f7d86f59aaf713d Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 1 Aug 2012 18:03:28 +0900 Subject: KVM: Push rmap into kvm_arch_memory_slot Two reasons: - x86 can integrate rmap and rmap_pde and remove heuristics in __gfn_to_rmap(). - Some architectures do not need rmap. Since rmap is one of the most memory consuming stuff in KVM, ppc'd better restrict the allocation to Book3S HV. Signed-off-by: Takuya Yoshikawa Acked-by: Paul Mackerras Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 6 ++-- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 4 +-- arch/powerpc/kvm/powerpc.c | 8 ++++++ arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 5 +--- arch/x86/kvm/x86.c | 55 +++++++++++++++++++++---------------- include/linux/kvm_host.h | 1 - virt/kvm/kvm_main.c | 11 +------- 9 files changed, 49 insertions(+), 44 deletions(-) (limited to 'arch/x86') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 572ad014126..a29e0918172 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -221,6 +221,7 @@ struct revmap_entry { #define KVMPPC_GOT_PAGE 0x80 struct kvm_arch_memory_slot { + unsigned long *rmap; }; struct kvm_arch { diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 3c635c0616b..d95d11322a1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -705,7 +705,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, goto out_unlock; hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; - rmap = &memslot->rmap[gfn - memslot->base_gfn]; + rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; lock_rmap(rmap); /* Check if we might have been invalidated; let the guest retry if so */ @@ -788,7 +788,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, for (; gfn < gfn_end; ++gfn) { gfn_t gfn_offset = gfn - memslot->base_gfn; - ret = handler(kvm, &memslot->rmap[gfn_offset], gfn); + ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn); retval |= ret; } } @@ -1036,7 +1036,7 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) unsigned long *rmapp, *map; preempt_disable(); - rmapp = memslot->rmap; + rmapp = memslot->arch.rmap; map = memslot->dirty_bitmap; for (i = 0; i < memslot->npages; ++i) { if (kvm_test_clear_dirty(kvm, rmapp)) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 5c70d19494f..56ac1a5d991 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -84,7 +84,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return; - rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]); + rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); lock_rmap(rmap); head = *rmap & KVMPPC_RMAP_INDEX; @@ -180,7 +180,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, if (!slot_is_aligned(memslot, psize)) return H_PARAMETER; slot_fn = gfn - memslot->base_gfn; - rmap = &memslot->rmap[slot_fn]; + rmap = &memslot->arch.rmap[slot_fn]; if (!kvm->arch.using_mmu_notifiers) { physp = kvm->arch.slot_phys[memslot->id]; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 87f4dc88607..879b14a6140 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -302,10 +302,18 @@ long kvm_arch_dev_ioctl(struct file *filp, void kvm_arch_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { + if (!dont || free->arch.rmap != dont->arch.rmap) { + vfree(free->arch.rmap); + free->arch.rmap = NULL; + } } int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) { + slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); + if (!slot->arch.rmap) + return -ENOMEM; + return 0; } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 48e71318846..1309e69b57f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -504,7 +504,7 @@ struct kvm_lpage_info { }; struct kvm_arch_memory_slot { - unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1]; + unsigned long *rmap[KVM_NR_PAGE_SIZES]; struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; }; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ee768bb2367..aa9a987ddef 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -970,11 +970,8 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, { unsigned long idx; - if (likely(level == PT_PAGE_TABLE_LEVEL)) - return &slot->rmap[gfn - slot->base_gfn]; - idx = gfn_to_index(gfn, slot->base_gfn, level); - return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx]; + return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx]; } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index abc039d7842..ebf2109318e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6303,14 +6303,18 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, { int i; - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { - if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) { - kvm_kvfree(free->arch.rmap_pde[i]); - free->arch.rmap_pde[i] = NULL; + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { + if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) { + kvm_kvfree(free->arch.rmap[i]); + free->arch.rmap[i] = NULL; } - if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { - kvm_kvfree(free->arch.lpage_info[i]); - free->arch.lpage_info[i] = NULL; + if (i == 0) + continue; + + if (!dont || free->arch.lpage_info[i - 1] != + dont->arch.lpage_info[i - 1]) { + kvm_kvfree(free->arch.lpage_info[i - 1]); + free->arch.lpage_info[i - 1] = NULL; } } } @@ -6319,28 +6323,30 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) { int i; - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { unsigned long ugfn; int lpages; - int level = i + 2; + int level = i + 1; lpages = gfn_to_index(slot->base_gfn + npages - 1, slot->base_gfn, level) + 1; - slot->arch.rmap_pde[i] = - kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i])); - if (!slot->arch.rmap_pde[i]) + slot->arch.rmap[i] = + kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i])); + if (!slot->arch.rmap[i]) goto out_free; + if (i == 0) + continue; - slot->arch.lpage_info[i] = - kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); - if (!slot->arch.lpage_info[i]) + slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages * + sizeof(*slot->arch.lpage_info[i - 1])); + if (!slot->arch.lpage_info[i - 1]) goto out_free; if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) - slot->arch.lpage_info[i][0].write_count = 1; + slot->arch.lpage_info[i - 1][0].write_count = 1; if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) - slot->arch.lpage_info[i][lpages - 1].write_count = 1; + slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1; ugfn = slot->userspace_addr >> PAGE_SHIFT; /* * If the gfn and userspace address are not aligned wrt each @@ -6352,18 +6358,21 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) unsigned long j; for (j = 0; j < lpages; ++j) - slot->arch.lpage_info[i][j].write_count = 1; + slot->arch.lpage_info[i - 1][j].write_count = 1; } } return 0; out_free: - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { - kvm_kvfree(slot->arch.rmap_pde[i]); - kvm_kvfree(slot->arch.lpage_info[i]); - slot->arch.rmap_pde[i] = NULL; - slot->arch.lpage_info[i] = NULL; + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { + kvm_kvfree(slot->arch.rmap[i]); + slot->arch.rmap[i] = NULL; + if (i == 0) + continue; + + kvm_kvfree(slot->arch.lpage_info[i - 1]); + slot->arch.lpage_info[i - 1] = NULL; } return -ENOMEM; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index dbc65f9d6a2..3c16f0f1fe3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -213,7 +213,6 @@ struct kvm_memory_slot { gfn_t base_gfn; unsigned long npages; unsigned long flags; - unsigned long *rmap; unsigned long *dirty_bitmap; struct kvm_arch_memory_slot arch; unsigned long userspace_addr; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bcf973ec98f..14ec567816a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -550,16 +550,12 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) static void kvm_free_physmem_slot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - if (!dont || free->rmap != dont->rmap) - vfree(free->rmap); - if (!dont || free->dirty_bitmap != dont->dirty_bitmap) kvm_destroy_dirty_bitmap(free); kvm_arch_free_memslot(free, dont); free->npages = 0; - free->rmap = NULL; } void kvm_free_physmem(struct kvm *kvm) @@ -768,11 +764,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (npages && !old.npages) { new.user_alloc = user_alloc; new.userspace_addr = mem->userspace_addr; -#ifndef CONFIG_S390 - new.rmap = vzalloc(npages * sizeof(*new.rmap)); - if (!new.rmap) - goto out_free; -#endif /* not defined CONFIG_S390 */ + if (kvm_arch_create_memslot(&new, npages)) goto out_free; } @@ -831,7 +823,6 @@ int __kvm_set_memory_region(struct kvm *kvm, /* actual memory is freed via old in kvm_free_physmem_slot below */ if (!npages) { - new.rmap = NULL; new.dirty_bitmap = NULL; memset(&new.arch, 0, sizeof(new.arch)); } -- cgit v1.2.3-70-g09d2 From 6c8ee57be9350c5c2cafdd6a99d0462d528676e2 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 3 Aug 2012 15:37:54 +0800 Subject: KVM: introduce KVM_PFN_ERR_FAULT After that, the exported and un-inline function, get_fault_pfn, can be removed Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- include/linux/kvm_host.h | 3 ++- virt/kvm/kvm_main.c | 12 +++--------- 3 files changed, 6 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index aa9a987ddef..9cf90c8d584 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2512,7 +2512,7 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); if (!slot) - return get_fault_pfn(); + return KVM_PFN_ERR_FAULT; hva = gfn_to_hva_memslot(slot, gfn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3c16f0f1fe3..ef5554f4748 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -48,6 +48,8 @@ #define KVM_MAX_MMIO_FRAGMENTS \ (KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS) +#define KVM_PFN_ERR_FAULT (-EFAULT) + /* * vcpu->requests bit members */ @@ -443,7 +445,6 @@ void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); void kvm_set_pfn_accessed(pfn_t pfn); void kvm_get_pfn(pfn_t pfn); -pfn_t get_fault_pfn(void); int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 14ec567816a..ef0491645a1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -939,12 +939,6 @@ static pfn_t get_bad_pfn(void) return -ENOENT; } -pfn_t get_fault_pfn(void) -{ - return -EFAULT; -} -EXPORT_SYMBOL_GPL(get_fault_pfn); - static pfn_t get_hwpoison_pfn(void) { return -EHWPOISON; @@ -1115,7 +1109,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, struct vm_area_struct *vma; if (atomic) - return get_fault_pfn(); + return KVM_PFN_ERR_FAULT; down_read(¤t->mm->mmap_sem); if (npages == -EHWPOISON || @@ -1127,7 +1121,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, vma = find_vma_intersection(current->mm, addr, addr+1); if (vma == NULL) - pfn = get_fault_pfn(); + pfn = KVM_PFN_ERR_FAULT; else if ((vma->vm_flags & VM_PFNMAP)) { pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; @@ -1135,7 +1129,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, } else { if (async && (vma->vm_flags & VM_WRITE)) *async = true; - pfn = get_fault_pfn(); + pfn = KVM_PFN_ERR_FAULT; } up_read(¤t->mm->mmap_sem); } else -- cgit v1.2.3-70-g09d2 From e6c1502b3f933ace20c711ce34ab696f5a67086d Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 3 Aug 2012 15:38:36 +0800 Subject: KVM: introduce KVM_PFN_ERR_HWPOISON Then, get_hwpoison_pfn and is_hwpoison_pfn can be removed Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 13 +------------ 3 files changed, 3 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9cf90c8d584..d3cdf69da51 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2649,7 +2649,7 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct * static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) { kvm_release_pfn_clean(pfn); - if (is_hwpoison_pfn(pfn)) { + if (pfn == KVM_PFN_ERR_HWPOISON) { kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current); return 0; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ef5554f4748..840f44a096c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -49,6 +49,7 @@ (KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS) #define KVM_PFN_ERR_FAULT (-EFAULT) +#define KVM_PFN_ERR_HWPOISON (-EHWPOISON) /* * vcpu->requests bit members @@ -395,7 +396,6 @@ extern struct page *bad_page; int is_error_page(struct page *page); int is_error_pfn(pfn_t pfn); -int is_hwpoison_pfn(pfn_t pfn); int is_noslot_pfn(pfn_t pfn); int is_invalid_pfn(pfn_t pfn); int kvm_is_error_hva(unsigned long addr); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ef0491645a1..7fce2d5787a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -939,17 +939,6 @@ static pfn_t get_bad_pfn(void) return -ENOENT; } -static pfn_t get_hwpoison_pfn(void) -{ - return -EHWPOISON; -} - -int is_hwpoison_pfn(pfn_t pfn) -{ - return pfn == -EHWPOISON; -} -EXPORT_SYMBOL_GPL(is_hwpoison_pfn); - int is_noslot_pfn(pfn_t pfn) { return pfn == -ENOENT; @@ -1115,7 +1104,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, if (npages == -EHWPOISON || (!async && check_user_page_hwpoison(addr))) { up_read(¤t->mm->mmap_sem); - return get_hwpoison_pfn(); + return KVM_PFN_ERR_HWPOISON; } vma = find_vma_intersection(current->mm, addr, addr+1); -- cgit v1.2.3-70-g09d2 From cb9aaa30b133574b646d9d4766ef08a843211393 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 3 Aug 2012 15:42:10 +0800 Subject: KVM: do not release the error pfn After commit a2766325cf9f9, the error pfn is replaced by the error code, it need not be released anymore [ The patch has been compiling tested for powerpc ] Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/powerpc/kvm/e500_tlb.c | 1 - arch/x86/kvm/mmu.c | 7 +++---- arch/x86/kvm/mmu_audit.c | 4 +--- arch/x86/kvm/paging_tmpl.h | 8 ++------ virt/kvm/iommu.c | 1 - virt/kvm/kvm_main.c | 14 ++++++++------ 6 files changed, 14 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index c8f6c582674..09ce5ac128f 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -524,7 +524,6 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (is_error_pfn(pfn)) { printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", (long)gfn); - kvm_release_pfn_clean(pfn); return; } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d3cdf69da51..9651c2cd000 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2496,7 +2496,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, rmap_recycle(vcpu, sptep, gfn); } } - kvm_release_pfn_clean(pfn); + + if (!is_error_pfn(pfn)) + kvm_release_pfn_clean(pfn); } static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) @@ -2648,7 +2650,6 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct * static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) { - kvm_release_pfn_clean(pfn); if (pfn == KVM_PFN_ERR_HWPOISON) { kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current); return 0; @@ -3273,8 +3274,6 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, if (!async) return false; /* *pfn has correct page already */ - kvm_release_pfn_clean(*pfn); - if (!prefault && can_do_async_pf(vcpu)) { trace_kvm_try_async_get_page(gva, gfn); if (kvm_find_async_pf_gfn(vcpu, gfn)) { diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index ca403f9bb0f..daff69e2115 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -116,10 +116,8 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn); - if (is_error_pfn(pfn)) { - kvm_release_pfn_clean(pfn); + if (is_error_pfn(pfn)) return; - } hpa = pfn << PAGE_SHIFT; if ((*sptep & PT64_BASE_ADDR_MASK) != hpa) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index bb7cf01cae7..bf8c42bf50f 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -370,10 +370,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true); pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); - if (mmu_invalid_pfn(pfn)) { - kvm_release_pfn_clean(pfn); + if (mmu_invalid_pfn(pfn)) return; - } /* * we call mmu_set_spte() with host_writable = true because that @@ -448,10 +446,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, gfn = gpte_to_gfn(gpte); pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, pte_access & ACC_WRITE_MASK); - if (mmu_invalid_pfn(pfn)) { - kvm_release_pfn_clean(pfn); + if (mmu_invalid_pfn(pfn)) break; - } mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, NULL, PT_PAGE_TABLE_LEVEL, gfn, diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 6a67bea4019..037cb6730e6 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -107,7 +107,6 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) */ pfn = kvm_pin_pages(slot, gfn, page_size); if (is_error_pfn(pfn)) { - kvm_release_pfn_clean(pfn); gfn += 1; continue; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 93d3c6e063c..eafba99d107 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -102,9 +102,6 @@ static bool largepages_enabled = true; bool kvm_is_mmio_pfn(pfn_t pfn) { - if (is_error_pfn(pfn)) - return false; - if (pfn_valid(pfn)) { int reserved; struct page *tail = pfn_to_page(pfn); @@ -1165,10 +1162,13 @@ EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); static struct page *kvm_pfn_to_page(pfn_t pfn) { - WARN_ON(kvm_is_mmio_pfn(pfn)); + if (is_error_pfn(pfn)) + return KVM_ERR_PTR_BAD_PAGE; - if (is_error_pfn(pfn) || kvm_is_mmio_pfn(pfn)) + if (kvm_is_mmio_pfn(pfn)) { + WARN_ON(1); return KVM_ERR_PTR_BAD_PAGE; + } return pfn_to_page(pfn); } @@ -1193,7 +1193,9 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn)) + WARN_ON(is_error_pfn(pfn)); + + if (!kvm_is_mmio_pfn(pfn)) put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); -- cgit v1.2.3-70-g09d2 From 32cad84f44d186654492f1a50a1424c8906ccbd9 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 3 Aug 2012 15:42:52 +0800 Subject: KVM: do not release the error page After commit a2766325cf9f9, the error page is replaced by the error code, it need not be released anymore [ The patch has been compiling tested for powerpc ] Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/powerpc/kvm/44x_tlb.c | 1 - arch/powerpc/kvm/book3s_pr.c | 4 +--- arch/x86/kvm/svm.c | 1 - arch/x86/kvm/vmx.c | 5 ++--- arch/x86/kvm/x86.c | 9 +++------ include/linux/kvm_host.h | 2 +- virt/kvm/async_pf.c | 4 ++-- virt/kvm/kvm_main.c | 5 +++-- 8 files changed, 12 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 33aa715dab2..5dd3ab46997 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -319,7 +319,6 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, if (is_error_page(new_page)) { printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", (unsigned long long)gfn); - kvm_release_page_clean(new_page); return; } hpaddr = page_to_phys(new_page); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index a1baec340f7..05c28f59f77 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -242,10 +242,8 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) int i; hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (is_error_page(hpage)) { - kvm_release_page_clean(hpage); + if (is_error_page(hpage)) return; - } hpage_offset = pte->raddr & ~PAGE_MASK; hpage_offset &= ~0xFFFULL; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 687d0c30e55..31be4a55744 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2105,7 +2105,6 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) return kmap(page); error: - kvm_release_page_clean(page); kvm_inject_gp(&svm->vcpu, 0); return NULL; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d6e4cbc42b8..cc8ad983692 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -596,10 +596,9 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) { struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT); - if (is_error_page(page)) { - kvm_release_page_clean(page); + if (is_error_page(page)) return NULL; - } + return page; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ebf2109318e..7953a9e7cb1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1639,10 +1639,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) vcpu->arch.time_page = gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); - if (is_error_page(vcpu->arch.time_page)) { - kvm_release_page_clean(vcpu->arch.time_page); + if (is_error_page(vcpu->arch.time_page)) vcpu->arch.time_page = NULL; - } + break; } case MSR_KVM_ASYNC_PF_EN: @@ -3945,10 +3944,8 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, goto emul_write; page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); - if (is_error_page(page)) { - kvm_release_page_clean(page); + if (is_error_page(page)) goto emul_write; - } kaddr = kmap_atomic(page); kaddr += offset_in_page(gpa); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ce7c32950f4..07226f820e6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -457,7 +457,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); -void kvm_release_pfn_dirty(pfn_t); +void kvm_release_pfn_dirty(pfn_t pfn); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); void kvm_set_pfn_accessed(pfn_t pfn); diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 56f55339189..ea475cd0351 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -111,7 +111,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_entry(vcpu->async_pf.done.next, typeof(*work), link); list_del(&work->link); - if (work->page) + if (!is_error_page(work->page)) kvm_release_page_clean(work->page); kmem_cache_free(async_pf_cache, work); } @@ -138,7 +138,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) list_del(&work->queue); vcpu->async_pf.queued--; - if (work->page) + if (!is_error_page(work->page)) kvm_release_page_clean(work->page); kmem_cache_free(async_pf_cache, work); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index eafba99d107..a2e85af847c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1186,8 +1186,9 @@ EXPORT_SYMBOL_GPL(gfn_to_page); void kvm_release_page_clean(struct page *page) { - if (!is_error_page(page)) - kvm_release_pfn_clean(page_to_pfn(page)); + WARN_ON(is_error_page(page)); + + kvm_release_pfn_clean(page_to_pfn(page)); } EXPORT_SYMBOL_GPL(kvm_release_page_clean); -- cgit v1.2.3-70-g09d2 From 8a5a87d9b7aef24bcdba763d8ee14982477b0a2e Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Aug 2012 15:58:26 +0300 Subject: KVM: clean up kvm_(set|get)_apic_base kvm_get_apic_base() needlessly checks irqchip_in_kernel although it does the same no matter what result of the check is. kvm_set_apic_base() also checks for irqchip_in_kernel, but kvm_lapic_set_base() can handle this case. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7953a9e7cb1..3cafbb12ae0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -246,20 +246,14 @@ static void drop_user_return_notifiers(void *ignore) u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) { - if (irqchip_in_kernel(vcpu->kvm)) - return vcpu->arch.apic_base; - else - return vcpu->arch.apic_base; + return vcpu->arch.apic_base; } EXPORT_SYMBOL_GPL(kvm_get_apic_base); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) { /* TODO: reserve bits check */ - if (irqchip_in_kernel(vcpu->kvm)) - kvm_lapic_set_base(vcpu, data); - else - vcpu->arch.apic_base = data; + kvm_lapic_set_base(vcpu, data); } EXPORT_SYMBOL_GPL(kvm_set_apic_base); -- cgit v1.2.3-70-g09d2 From 5dbc8f3fed0b4cb04dfb276150294f21c5f0fc66 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Aug 2012 15:58:27 +0300 Subject: KVM: use kvm_lapic_set_base() to change apic_base Do not change apic_base directly. Use kvm_lapic_set_base() instead. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0cd431c85d3..49f4ac047f6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1185,7 +1185,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) update_divide_count(apic); atomic_set(&apic->lapic_timer.pending, 0); if (kvm_vcpu_is_bsp(vcpu)) - vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; + kvm_lapic_set_base(vcpu, + vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); vcpu->arch.pv_eoi.msr_val = 0; apic_update_ppr(apic); @@ -1310,8 +1311,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) HRTIMER_MODE_ABS); apic->lapic_timer.timer.function = apic_timer_fn; - apic->base_address = APIC_DEFAULT_PHYS_BASE; - vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; + kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE); kvm_lapic_reset(vcpu); kvm_iodevice_init(&apic->dev, &apic_mmio_ops); @@ -1380,8 +1380,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - apic->base_address = vcpu->arch.apic_base & - MSR_IA32_APICBASE_BASE; + kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); kvm_apic_set_version(vcpu); apic_update_ppr(apic); -- cgit v1.2.3-70-g09d2 From 6aed64a8a440360be875f31eabeacaddb83ef25f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Aug 2012 15:58:28 +0300 Subject: KVM: mark apic enabled on start up According to SDM apic is enabled on start up. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 49f4ac047f6..c3f14fe51e9 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1311,7 +1311,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) HRTIMER_MODE_ABS); apic->lapic_timer.timer.function = apic_timer_fn; - kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE); + kvm_lapic_set_base(vcpu, + APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); kvm_lapic_reset(vcpu); kvm_iodevice_init(&apic->dev, &apic_mmio_ops); -- cgit v1.2.3-70-g09d2 From c5cc421ba3219b90f11d151bc55f1608c12830fa Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Aug 2012 15:58:30 +0300 Subject: KVM: use jump label to optimize checking for HW enabled APIC in APIC_BASE MSR Usually all APICs are HW enabled so the check can be optimized out. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 29 ++++++++++++++++++++++++++++- arch/x86/kvm/lapic.h | 1 + arch/x86/kvm/x86.c | 1 + 3 files changed, 30 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c3f14fe51e9..5b46cab044a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "kvm_cache_regs.h" #include "irq.h" #include "trace.h" @@ -117,9 +118,13 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap) return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); } +struct static_key_deferred apic_hw_disabled __read_mostly; + static inline int apic_hw_enabled(struct kvm_lapic *apic) { - return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; + if (static_key_false(&apic_hw_disabled.key)) + return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; + return MSR_IA32_APICBASE_ENABLE; } static inline int apic_sw_enabled(struct kvm_lapic *apic) @@ -1055,6 +1060,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); + if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) + static_key_slow_dec_deferred(&apic_hw_disabled); + if (vcpu->arch.apic->regs) free_page((unsigned long)vcpu->arch.apic->regs); @@ -1125,6 +1133,14 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) return; } + /* update jump label if enable bit changes */ + if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) { + if (value & MSR_IA32_APICBASE_ENABLE) + static_key_slow_dec_deferred(&apic_hw_disabled); + else + static_key_slow_inc(&apic_hw_disabled.key); + } + if (!kvm_vcpu_is_bsp(apic->vcpu)) value &= ~MSR_IA32_APICBASE_BSP; @@ -1311,6 +1327,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) HRTIMER_MODE_ABS); apic->lapic_timer.timer.function = apic_timer_fn; + /* + * APIC is created enabled. This will prevent kvm_lapic_set_base from + * thinking that APIC satet has changed. + */ + vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); @@ -1598,3 +1619,9 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, addr); } + +void kvm_lapic_init(void) +{ + /* do not patch jump label more than once per second */ + jump_label_rate_limit(&apic_hw_disabled, HZ); +} diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 166766fffd9..73fa299b68e 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -78,4 +78,5 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) } int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); +void kvm_lapic_init(void); #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3cafbb12ae0..29fa18d27e6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4903,6 +4903,7 @@ int kvm_arch_init(void *opaque) if (cpu_has_xsave) host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + kvm_lapic_init(); return 0; out: -- cgit v1.2.3-70-g09d2 From f8c1ea103947038b7197bdd4c8451886a58af0c0 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Aug 2012 15:58:31 +0300 Subject: KVM: use jump label to optimize checking for SW enabled apic in spurious interrupt register Usually all APICs are SW enabled so the check can be optimized out. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5b46cab044a..7f77e96ac5e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -127,9 +127,24 @@ static inline int apic_hw_enabled(struct kvm_lapic *apic) return MSR_IA32_APICBASE_ENABLE; } -static inline int apic_sw_enabled(struct kvm_lapic *apic) +struct static_key_deferred apic_sw_disabled __read_mostly; + +static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) +{ + if ((apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { + if (val & APIC_SPIV_APIC_ENABLED) + static_key_slow_dec_deferred(&apic_sw_disabled); + else + static_key_slow_inc(&apic_sw_disabled.key); + } + apic_set_reg(apic, APIC_SPIV, val); +} + +static inline int apic_sw_enabled(struct kvm_lapic *apic) { - return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; + if (static_key_false(&apic_sw_disabled.key)) + return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; + return APIC_SPIV_APIC_ENABLED; } static inline int apic_enabled(struct kvm_lapic *apic) @@ -918,7 +933,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) u32 mask = 0x3ff; if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) mask |= APIC_SPIV_DIRECTED_EOI; - apic_set_reg(apic, APIC_SPIV, val & mask); + apic_set_spiv(apic, val & mask); if (!(val & APIC_SPIV_APIC_ENABLED)) { int i; u32 lvt_val; @@ -1055,18 +1070,23 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); void kvm_free_lapic(struct kvm_vcpu *vcpu) { + struct kvm_lapic *apic = vcpu->arch.apic; + if (!vcpu->arch.apic) return; - hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); + hrtimer_cancel(&apic->lapic_timer.timer); if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) static_key_slow_dec_deferred(&apic_hw_disabled); - if (vcpu->arch.apic->regs) - free_page((unsigned long)vcpu->arch.apic->regs); + if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) + static_key_slow_dec_deferred(&apic_sw_disabled); - kfree(vcpu->arch.apic); + if (apic->regs) + free_page((unsigned long)apic->regs); + + kfree(apic); } /* @@ -1182,7 +1202,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); apic_set_reg(apic, APIC_DFR, 0xffffffffU); - apic_set_reg(apic, APIC_SPIV, 0xff); + apic_set_spiv(apic, 0xff); apic_set_reg(apic, APIC_TASKPRI, 0); apic_set_reg(apic, APIC_LDR, 0); apic_set_reg(apic, APIC_ESR, 0); @@ -1335,6 +1355,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); + static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ kvm_lapic_reset(vcpu); kvm_iodevice_init(&apic->dev, &apic_mmio_ops); @@ -1404,6 +1425,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); kvm_apic_set_version(vcpu); + apic_set_spiv(apic, apic_get_reg(apic, APIC_SPIV)); apic_update_ppr(apic); hrtimer_cancel(&apic->lapic_timer.timer); @@ -1624,4 +1646,5 @@ void kvm_lapic_init(void) { /* do not patch jump label more than once per second */ jump_label_rate_limit(&apic_hw_disabled, HZ); + jump_label_rate_limit(&apic_sw_disabled, HZ); } -- cgit v1.2.3-70-g09d2 From 54e9818f3903902a4ea3046035739b8770880092 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Aug 2012 15:58:32 +0300 Subject: KVM: use jump label to optimize checking for in kernel local apic presence Usually all vcpus have local apic pointer initialized, so the check may be completely skipped. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 62 ++++++++++++++++++++++++++++------------------------ arch/x86/kvm/x86.c | 7 +++++- arch/x86/kvm/x86.h | 1 + 3 files changed, 41 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 7f77e96ac5e..650379ba73a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -152,6 +152,13 @@ static inline int apic_enabled(struct kvm_lapic *apic) return apic_sw_enabled(apic) && apic_hw_enabled(apic); } +static inline bool vcpu_has_lapic(struct kvm_vcpu *vcpu) +{ + if (static_key_false(&kvm_no_apic_vcpu)) + return vcpu->arch.apic; + return true; +} + #define LVT_MASK \ (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) @@ -204,7 +211,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) struct kvm_cpuid_entry2 *feat; u32 v = APIC_VERSION; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!vcpu_has_lapic(vcpu)) return; feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); @@ -305,7 +312,6 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) { - struct kvm_lapic *apic = vcpu->arch.apic; int highest_irr; /* This may race with setting of irr in __apic_accept_irq() and @@ -313,9 +319,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) * will cause vmexit immediately and the value will be recalculated * on the next vmentry. */ - if (!apic) + if (!vcpu_has_lapic(vcpu)) return 0; - highest_irr = apic_find_highest_irr(apic); + highest_irr = apic_find_highest_irr(vcpu->arch.apic); return highest_irr; } @@ -1061,9 +1067,7 @@ static int apic_mmio_write(struct kvm_io_device *this, void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) { - struct kvm_lapic *apic = vcpu->arch.apic; - - if (apic) + if (vcpu_has_lapic(vcpu)) apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); } EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); @@ -1098,10 +1102,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!apic) - return 0; - if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) + if (!vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || + apic_lvtt_period(apic)) return 0; return apic->lapic_timer.tscdeadline; @@ -1110,10 +1113,9 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!apic) - return; - if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) + if (!vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || + apic_lvtt_period(apic)) return; hrtimer_cancel(&apic->lapic_timer.timer); @@ -1125,20 +1127,21 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!apic) + if (!vcpu_has_lapic(vcpu)) return; + apic_set_tpr(apic, ((cr8 & 0x0f) << 4) | (apic_get_reg(apic, APIC_TASKPRI) & 4)); } u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) { - struct kvm_lapic *apic = vcpu->arch.apic; u64 tpr; - if (!apic) + if (!vcpu_has_lapic(vcpu)) return 0; - tpr = (u64) apic_get_reg(apic, APIC_TASKPRI); + + tpr = (u64) apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); return (tpr & 0xf0) >> 4; } @@ -1237,7 +1240,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) bool kvm_apic_present(struct kvm_vcpu *vcpu) { - return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic); + return vcpu_has_lapic(vcpu) && apic_hw_enabled(vcpu->arch.apic); } int kvm_lapic_enabled(struct kvm_vcpu *vcpu) @@ -1258,10 +1261,11 @@ static bool lapic_is_periodic(struct kvm_lapic *apic) int apic_has_pending_timer(struct kvm_vcpu *vcpu) { - struct kvm_lapic *lapic = vcpu->arch.apic; + struct kvm_lapic *apic = vcpu->arch.apic; - if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) - return atomic_read(&lapic->lapic_timer.pending); + if (vcpu_has_lapic(vcpu) && apic_enabled(apic) && + apic_lvt_enabled(apic, APIC_LVTT)) + return atomic_read(&apic->lapic_timer.pending); return 0; } @@ -1371,7 +1375,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; int highest_irr; - if (!apic || !apic_enabled(apic)) + if (!vcpu_has_lapic(vcpu) || !apic_enabled(apic)) return -1; apic_update_ppr(apic); @@ -1399,7 +1403,10 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (apic && atomic_read(&apic->lapic_timer.pending) > 0) { + if (!vcpu_has_lapic(vcpu)) + return; + + if (atomic_read(&apic->lapic_timer.pending) > 0) { if (kvm_apic_local_deliver(apic, APIC_LVTT)) atomic_dec(&apic->lapic_timer.pending); } @@ -1439,13 +1446,12 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) { - struct kvm_lapic *apic = vcpu->arch.apic; struct hrtimer *timer; - if (!apic) + if (!vcpu_has_lapic(vcpu)) return; - timer = &apic->lapic_timer.timer; + timer = &vcpu->arch.apic->lapic_timer.timer; if (hrtimer_cancel(timer)) hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } @@ -1602,7 +1608,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!vcpu_has_lapic(vcpu)) return 1; /* if this is ICR write vector before command */ @@ -1616,7 +1622,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) struct kvm_lapic *apic = vcpu->arch.apic; u32 low, high = 0; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!vcpu_has_lapic(vcpu)) return 1; if (apic_reg_read(apic, reg, 4, &low)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 29fa18d27e6..8ebf65c349e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6152,6 +6152,8 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); } +struct static_key kvm_no_apic_vcpu __read_mostly; + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { struct page *page; @@ -6184,7 +6186,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) r = kvm_create_lapic(vcpu); if (r < 0) goto fail_mmu_destroy; - } + } else + static_key_slow_inc(&kvm_no_apic_vcpu); vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, GFP_KERNEL); @@ -6224,6 +6227,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvm_mmu_destroy(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, idx); free_page((unsigned long)vcpu->arch.pio_data); + if (!irqchip_in_kernel(vcpu->kvm)) + static_key_slow_dec(&kvm_no_apic_vcpu); } int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 3d1134ddb88..2b5219c12ac 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -124,4 +124,5 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, extern u64 host_xcr0; +extern struct static_key kvm_no_apic_vcpu; #endif -- cgit v1.2.3-70-g09d2 From c48f14966cc41957d88c66dfe49a439e708ab7b8 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 5 Aug 2012 15:58:33 +0300 Subject: KVM: inline kvm_apic_present() and kvm_lapic_enabled() Those functions are used during interrupt injection. When inlined they become nops on the fast path. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 143 +++++++++++++++++++-------------------------------- arch/x86/kvm/lapic.h | 45 +++++++++++++++- 2 files changed, 96 insertions(+), 92 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 650379ba73a..333c27fa6e9 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -73,11 +73,6 @@ static unsigned int min_timer_period_us = 500; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); -static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) -{ - return *((u32 *) (apic->regs + reg_off)); -} - static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) { *((u32 *) (apic->regs + reg_off)) = val; @@ -119,19 +114,11 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap) } struct static_key_deferred apic_hw_disabled __read_mostly; - -static inline int apic_hw_enabled(struct kvm_lapic *apic) -{ - if (static_key_false(&apic_hw_disabled.key)) - return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; - return MSR_IA32_APICBASE_ENABLE; -} - struct static_key_deferred apic_sw_disabled __read_mostly; static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) { - if ((apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { + if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { if (val & APIC_SPIV_APIC_ENABLED) static_key_slow_dec_deferred(&apic_sw_disabled); else @@ -140,23 +127,9 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) apic_set_reg(apic, APIC_SPIV, val); } -static inline int apic_sw_enabled(struct kvm_lapic *apic) -{ - if (static_key_false(&apic_sw_disabled.key)) - return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; - return APIC_SPIV_APIC_ENABLED; -} - static inline int apic_enabled(struct kvm_lapic *apic) { - return apic_sw_enabled(apic) && apic_hw_enabled(apic); -} - -static inline bool vcpu_has_lapic(struct kvm_vcpu *vcpu) -{ - if (static_key_false(&kvm_no_apic_vcpu)) - return vcpu->arch.apic; - return true; + return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); } #define LVT_MASK \ @@ -168,34 +141,34 @@ static inline bool vcpu_has_lapic(struct kvm_vcpu *vcpu) static inline int kvm_apic_id(struct kvm_lapic *apic) { - return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff; + return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) { - return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); + return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); } static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) { - return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; + return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; } static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) { - return ((apic_get_reg(apic, APIC_LVTT) & + return ((kvm_apic_get_reg(apic, APIC_LVTT) & apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); } static inline int apic_lvtt_period(struct kvm_lapic *apic) { - return ((apic_get_reg(apic, APIC_LVTT) & + return ((kvm_apic_get_reg(apic, APIC_LVTT) & apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); } static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) { - return ((apic_get_reg(apic, APIC_LVTT) & + return ((kvm_apic_get_reg(apic, APIC_LVTT) & apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_TSCDEADLINE); } @@ -211,7 +184,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) struct kvm_cpuid_entry2 *feat; u32 v = APIC_VERSION; - if (!vcpu_has_lapic(vcpu)) + if (!kvm_vcpu_has_lapic(vcpu)) return; feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); @@ -319,7 +292,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) * will cause vmexit immediately and the value will be recalculated * on the next vmentry. */ - if (!vcpu_has_lapic(vcpu)) + if (!kvm_vcpu_has_lapic(vcpu)) return 0; highest_irr = apic_find_highest_irr(vcpu->arch.apic); @@ -404,8 +377,8 @@ static void apic_update_ppr(struct kvm_lapic *apic) u32 tpr, isrv, ppr, old_ppr; int isr; - old_ppr = apic_get_reg(apic, APIC_PROCPRI); - tpr = apic_get_reg(apic, APIC_TASKPRI); + old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI); + tpr = kvm_apic_get_reg(apic, APIC_TASKPRI); isr = apic_find_highest_isr(apic); isrv = (isr != -1) ? isr : 0; @@ -441,13 +414,13 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) u32 logical_id; if (apic_x2apic_mode(apic)) { - logical_id = apic_get_reg(apic, APIC_LDR); + logical_id = kvm_apic_get_reg(apic, APIC_LDR); return logical_id & mda; } - logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR)); + logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR)); - switch (apic_get_reg(apic, APIC_DFR)) { + switch (kvm_apic_get_reg(apic, APIC_DFR)) { case APIC_DFR_FLAT: if (logical_id & mda) result = 1; @@ -459,7 +432,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) break; default: apic_debug("Bad DFR vcpu %d: %08x\n", - apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); + apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); break; } @@ -617,7 +590,7 @@ static int apic_set_eoi(struct kvm_lapic *apic) apic_clear_isr(vector, apic); apic_update_ppr(apic); - if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && + if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { int trigger_mode; if (apic_test_vector(vector, apic->regs + APIC_TMR)) @@ -632,8 +605,8 @@ static int apic_set_eoi(struct kvm_lapic *apic) static void apic_send_ipi(struct kvm_lapic *apic) { - u32 icr_low = apic_get_reg(apic, APIC_ICR); - u32 icr_high = apic_get_reg(apic, APIC_ICR2); + u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); + u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2); struct kvm_lapic_irq irq; irq.vector = icr_low & APIC_VECTOR_MASK; @@ -668,7 +641,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) ASSERT(apic != NULL); /* if initial count is 0, current count should also be 0 */ - if (apic_get_reg(apic, APIC_TMICT) == 0) + if (kvm_apic_get_reg(apic, APIC_TMICT) == 0) return 0; remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); @@ -724,13 +697,13 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) break; case APIC_PROCPRI: apic_update_ppr(apic); - val = apic_get_reg(apic, offset); + val = kvm_apic_get_reg(apic, offset); break; case APIC_TASKPRI: report_tpr_access(apic, false); /* fall thru */ default: - val = apic_get_reg(apic, offset); + val = kvm_apic_get_reg(apic, offset); break; } @@ -782,7 +755,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) { - return apic_hw_enabled(apic) && + return kvm_apic_hw_enabled(apic) && addr >= apic->base_address && addr < apic->base_address + LAPIC_MMIO_LENGTH; } @@ -805,7 +778,7 @@ static void update_divide_count(struct kvm_lapic *apic) { u32 tmp1, tmp2, tdcr; - tdcr = apic_get_reg(apic, APIC_TDCR); + tdcr = kvm_apic_get_reg(apic, APIC_TDCR); tmp1 = tdcr & 0xf; tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; apic->divide_count = 0x1 << (tmp2 & 0x7); @@ -822,7 +795,7 @@ static void start_apic_timer(struct kvm_lapic *apic) if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { /* lapic timer in oneshot or periodic mode */ now = apic->lapic_timer.timer.base->get_time(); - apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) + apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->divide_count; if (!apic->lapic_timer.period) @@ -854,7 +827,7 @@ static void start_apic_timer(struct kvm_lapic *apic) "timer initial count 0x%x, period %lldns, " "expire @ 0x%016" PRIx64 ".\n", __func__, APIC_BUS_CYCLE_NS, ktime_to_ns(now), - apic_get_reg(apic, APIC_TMICT), + kvm_apic_get_reg(apic, APIC_TMICT), apic->lapic_timer.period, ktime_to_ns(ktime_add_ns(now, apic->lapic_timer.period))); @@ -886,7 +859,7 @@ static void start_apic_timer(struct kvm_lapic *apic) static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) { - int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); + int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0)); if (apic_lvt_nmi_mode(lvt0_val)) { if (!nmi_wd_enabled) { @@ -937,7 +910,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_SPIV: { u32 mask = 0x3ff; - if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) + if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) mask |= APIC_SPIV_DIRECTED_EOI; apic_set_spiv(apic, val & mask); if (!(val & APIC_SPIV_APIC_ENABLED)) { @@ -945,7 +918,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) u32 lvt_val; for (i = 0; i < APIC_LVT_NUM; i++) { - lvt_val = apic_get_reg(apic, + lvt_val = kvm_apic_get_reg(apic, APIC_LVTT + 0x10 * i); apic_set_reg(apic, APIC_LVTT + 0x10 * i, lvt_val | APIC_LVT_MASKED); @@ -974,7 +947,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVT1: case APIC_LVTERR: /* TODO: Check vector */ - if (!apic_sw_enabled(apic)) + if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; @@ -983,12 +956,12 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) break; case APIC_LVTT: - if ((apic_get_reg(apic, APIC_LVTT) & + if ((kvm_apic_get_reg(apic, APIC_LVTT) & apic->lapic_timer.timer_mode_mask) != (val & apic->lapic_timer.timer_mode_mask)) hrtimer_cancel(&apic->lapic_timer.timer); - if (!apic_sw_enabled(apic)) + if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); apic_set_reg(apic, APIC_LVTT, val); @@ -1067,7 +1040,7 @@ static int apic_mmio_write(struct kvm_io_device *this, void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) { - if (vcpu_has_lapic(vcpu)) + if (kvm_vcpu_has_lapic(vcpu)) apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); } EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); @@ -1084,7 +1057,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) static_key_slow_dec_deferred(&apic_hw_disabled); - if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) + if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) static_key_slow_dec_deferred(&apic_sw_disabled); if (apic->regs) @@ -1103,7 +1076,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || + if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) return 0; @@ -1114,7 +1087,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || + if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) return; @@ -1127,21 +1100,21 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!vcpu_has_lapic(vcpu)) + if (!kvm_vcpu_has_lapic(vcpu)) return; apic_set_tpr(apic, ((cr8 & 0x0f) << 4) - | (apic_get_reg(apic, APIC_TASKPRI) & 4)); + | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); } u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) { u64 tpr; - if (!vcpu_has_lapic(vcpu)) + if (!kvm_vcpu_has_lapic(vcpu)) return 0; - tpr = (u64) apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); + tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); return (tpr & 0xf0) >> 4; } @@ -1238,16 +1211,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) vcpu->arch.apic_base, apic->base_address); } -bool kvm_apic_present(struct kvm_vcpu *vcpu) -{ - return vcpu_has_lapic(vcpu) && apic_hw_enabled(vcpu->arch.apic); -} - -int kvm_lapic_enabled(struct kvm_vcpu *vcpu) -{ - return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic); -} - /* *---------------------------------------------------------------------- * timer interface @@ -1263,7 +1226,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (vcpu_has_lapic(vcpu) && apic_enabled(apic) && + if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT)) return atomic_read(&apic->lapic_timer.pending); @@ -1272,10 +1235,10 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) { - u32 reg = apic_get_reg(apic, lvt_type); + u32 reg = kvm_apic_get_reg(apic, lvt_type); int vector, mode, trig_mode; - if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { + if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { vector = reg & APIC_VECTOR_MASK; mode = reg & APIC_MODE_MASK; trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; @@ -1375,23 +1338,23 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; int highest_irr; - if (!vcpu_has_lapic(vcpu) || !apic_enabled(apic)) + if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) return -1; apic_update_ppr(apic); highest_irr = apic_find_highest_irr(apic); if ((highest_irr == -1) || - ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) + ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI))) return -1; return highest_irr; } int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) { - u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); + u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0); int r = 0; - if (!apic_hw_enabled(vcpu->arch.apic)) + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) r = 1; if ((lvt0 & APIC_LVT_MASKED) == 0 && GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) @@ -1403,7 +1366,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!vcpu_has_lapic(vcpu)) + if (!kvm_vcpu_has_lapic(vcpu)) return; if (atomic_read(&apic->lapic_timer.pending) > 0) { @@ -1432,7 +1395,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); kvm_apic_set_version(vcpu); - apic_set_spiv(apic, apic_get_reg(apic, APIC_SPIV)); + apic_set_spiv(apic, kvm_apic_get_reg(apic, APIC_SPIV)); apic_update_ppr(apic); hrtimer_cancel(&apic->lapic_timer.timer); @@ -1448,7 +1411,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) { struct hrtimer *timer; - if (!vcpu_has_lapic(vcpu)) + if (!kvm_vcpu_has_lapic(vcpu)) return; timer = &vcpu->arch.apic->lapic_timer.timer; @@ -1549,7 +1512,7 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) return; - tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; + tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff; max_irr = apic_find_highest_irr(apic); if (max_irr < 0) max_irr = 0; @@ -1608,7 +1571,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!vcpu_has_lapic(vcpu)) + if (!kvm_vcpu_has_lapic(vcpu)) return 1; /* if this is ICR write vector before command */ @@ -1622,7 +1585,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) struct kvm_lapic *apic = vcpu->arch.apic; u32 low, high = 0; - if (!vcpu_has_lapic(vcpu)) + if (!kvm_vcpu_has_lapic(vcpu)) return 1; if (apic_reg_read(apic, reg, 4, &low)) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 73fa299b68e..2ad9caa06f9 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -55,8 +55,6 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); -int kvm_lapic_enabled(struct kvm_vcpu *vcpu); -bool kvm_apic_present(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); @@ -79,4 +77,47 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); void kvm_lapic_init(void); + +static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off) +{ + return *((u32 *) (apic->regs + reg_off)); +} + +extern struct static_key kvm_no_apic_vcpu; + +static inline bool kvm_vcpu_has_lapic(struct kvm_vcpu *vcpu) +{ + if (static_key_false(&kvm_no_apic_vcpu)) + return vcpu->arch.apic; + return true; +} + +extern struct static_key_deferred apic_hw_disabled; + +static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic) +{ + if (static_key_false(&apic_hw_disabled.key)) + return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; + return MSR_IA32_APICBASE_ENABLE; +} + +extern struct static_key_deferred apic_sw_disabled; + +static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic) +{ + if (static_key_false(&apic_sw_disabled.key)) + return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; + return APIC_SPIV_APIC_ENABLED; +} + +static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_has_lapic(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic); +} + +static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) +{ + return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); +} + #endif -- cgit v1.2.3-70-g09d2 From a9ad773e0dd833651f0831020a0ea0265c29f2ea Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 6 Aug 2012 19:00:36 +0200 Subject: x86, cpu: Fixup tlb_flushall_shift formatting The TLB characteristics appeared like this in dmesg: [ 0.065817] Last level iTLB entries: 4KB 512, 2MB 1024, 4MB 512 [ 0.065817] Last level dTLB entries: 4KB 1024, 2MB 1024, 4MB 512 [ 0.065817] tlb_flushall_shift is 0xffffffff where tlb_flushall_shift is actually -1 but dumped as a hex number. However, the Kconfig option CONFIG_DEBUG_TLBFLUSH and the rest of the code treats this as a signed decimal and states "If you set it to -1, the code flushes the whole TLB unconditionally." So, fix its formatting in accordance with the other references to it. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344272439-29080-2-git-send-email-bp@amd64.org Acked-by: Alex Shi Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 46d8786d655..d239977f361 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -474,7 +474,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ - "tlb_flushall_shift is 0x%x\n", + "tlb_flushall_shift: %d\n", tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], -- cgit v1.2.3-70-g09d2 From 5b556332c3ab19e6375836d35ca658776e9ba0f6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 6 Aug 2012 19:00:37 +0200 Subject: x86, cpu: Push TLB detection CPUID check down Push the max CPUID leaf check into the ->detect_tlb function and remove general test case from the generic path. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344272439-29080-3-git-send-email-bp@amd64.org Acked-by: Alex Shi Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 3 +-- arch/x86/kernel/cpu/intel.c | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d239977f361..080f4a737e3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -940,8 +940,7 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - if (boot_cpu_data.cpuid_level >= 2) - cpu_detect_tlb(&boot_cpu_data); + cpu_detect_tlb(&boot_cpu_data); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0a4ce2980a5..198e019a531 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) int i, j, n; unsigned int regs[4]; unsigned char *desc = (unsigned char *)regs; + + if (c->cpuid_level < 2) + return; + /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; -- cgit v1.2.3-70-g09d2 From b46882e4c4de4813947fce940fe74af794a1eb72 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 6 Aug 2012 19:00:38 +0200 Subject: x86, cpu: Add AMD TLB size detection Read I- and DTLB entries count from CPUID on AMD. Handle all the different family-specific cases. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344272439-29080-4-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9d92e19039f..bcd200839c9 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -737,6 +737,59 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, } #endif +static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c) +{ + u32 ebx, eax, ecx, edx; + u16 mask = 0xfff; + + if (c->x86 < 0xf) + return; + + if (c->extended_cpuid_level < 0x80000006) + return; + + cpuid(0x80000006, &eax, &ebx, &ecx, &edx); + + tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; + tlb_lli_4k[ENTRIES] = ebx & mask; + + /* + * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB + * characteristics from the CPUID function 0x80000005 instead. + */ + if (c->x86 == 0xf) { + cpuid(0x80000005, &eax, &ebx, &ecx, &edx); + mask = 0xff; + } + + /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ + if (!((eax >> 16) & mask)) { + u32 a, b, c, d; + + cpuid(0x80000005, &a, &b, &c, &d); + tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff; + } else { + tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; + } + + /* a 4M entry uses two 2M entries */ + tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; + + /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ + if (!(eax & mask)) { + /* Erratum 658 */ + if (c->x86 == 0x15 && c->x86_model <= 0x1f) { + tlb_lli_2m[ENTRIES] = 1024; + } else { + cpuid(0x80000005, &eax, &ebx, &ecx, &edx); + tlb_lli_2m[ENTRIES] = eax & 0xff; + } + } else + tlb_lli_2m[ENTRIES] = eax & mask; + + tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; +} + static const struct cpu_dev __cpuinitconst amd_cpu_dev = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, @@ -756,6 +809,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { .c_size_cache = amd_size_cache, #endif .c_early_init = early_init_amd, + .c_detect_tlb = cpu_detect_tlb_amd, .c_bsp_init = bsp_init_amd, .c_init = init_amd, .c_x86_vendor = X86_VENDOR_AMD, -- cgit v1.2.3-70-g09d2 From 057237bb35a605d795fd787868a1088705f26ee5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 6 Aug 2012 19:00:39 +0200 Subject: x86, cpu: Preset default tlb_flushall_shift on AMD Run the mprotect.c microbenchmark on all our families >= K8 and preset the flushall shift variable accordingly. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344272439-29080-5-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bcd200839c9..f7e98a2c0d1 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -737,6 +737,17 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, } #endif +static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) +{ + if (!cpu_has_invlpg) + return; + + tlb_flushall_shift = 5; + + if (c->x86 <= 0x11) + tlb_flushall_shift = 4; +} + static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c) { u32 ebx, eax, ecx, edx; @@ -788,6 +799,8 @@ static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c) tlb_lli_2m[ENTRIES] = eax & mask; tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + + cpu_set_tlb_flushall_shift(c); } static const struct cpu_dev __cpuinitconst amd_cpu_dev = { -- cgit v1.2.3-70-g09d2 From 64eb0620296f924d5fded755c5ed95fb73649e06 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 8 Aug 2012 15:24:36 +0300 Subject: KVM: correctly detect APIC SW state in kvm_apic_post_state_restore() For apic_set_spiv() to track APIC SW state correctly it needs to see previous and next values of the spurious vector register, but currently memset() overwrite the old value before apic_set_spiv() get a chance to do tracking. Fix it by calling apic_set_spiv() before overwriting old value. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 7 +++++-- arch/x86/kvm/lapic.h | 3 ++- arch/x86/kvm/x86.c | 3 +-- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 333c27fa6e9..18d149d8020 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1389,13 +1389,16 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) return vector; } -void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) +void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s) { struct kvm_lapic *apic = vcpu->arch.apic; kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); + /* set SPIV separately to get count of SW disabled APICs right */ + apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); + memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); kvm_apic_set_version(vcpu); - apic_set_spiv(apic, kvm_apic_get_reg(apic, APIC_SPIV)); apic_update_ppr(apic); hrtimer_cancel(&apic->lapic_timer.timer); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 2ad9caa06f9..615a8b03016 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -54,7 +54,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); -void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); +void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8ebf65c349e..91a595827de 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2348,8 +2348,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { - memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); - kvm_apic_post_state_restore(vcpu); + kvm_apic_post_state_restore(vcpu, s); update_cr8_intercept(vcpu); return 0; -- cgit v1.2.3-70-g09d2 From 4670a300a2169e1e922593c5d35b0cdaee112901 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 9 Aug 2012 10:59:21 -0700 Subject: x86/mce: Make cmci_discover() quiet cmci_discover() works out which machine check banks support CMCI, and which of those are shared by multiple logical processors. It uses this information to ensure that exactly one cpu is designated the owner of each bank so that when interrupts are broadcast to multiple cpus, only one of them will look in a shared bank to log the error and clear the bank. At boot time cmci_discover() performs this task silently. But during certain cpu hotplug operations it prints out a set of summary lines like this: CPU 35 MCA banks CMCI:0 CMCI:1 CMCI:3 CMCI:5 CMCI:6 CMCI:7 CMCI:8 CMCI:9 CMCI:10 CMCI:11 CPU 1 MCA banks CMCI:0 CMCI:1 CMCI:3 CPU 39 MCA banks CMCI:0 CMCI:1 CMCI:3 CPU 38 MCA banks CMCI:0 CMCI:1 CMCI:3 CPU 32 MCA banks CMCI:0 CMCI:1 CMCI:3 CPU 37 MCA banks CMCI:0 CMCI:1 CMCI:3 CPU 36 MCA banks CMCI:0 CMCI:1 CMCI:3 CPU 34 MCA banks CMCI:0 CMCI:1 CMCI:3 The value of these messages seems very low. A user might painstakingly cross-check against the data sheet for a processor to ensure that all CMCI supported banks are correctly reported, but this seems improbable. If users really wanted to do this, we should print the information at boot time too. Remove the messages. Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce_intel.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 38e49bc95ff..59648e48a14 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -65,24 +65,15 @@ static void intel_threshold_interrupt(void) mce_notify_irq(); } -static void print_update(char *type, int *hdr, int num) -{ - if (*hdr == 0) - printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); - *hdr = 1; - printk(KERN_CONT " %s:%d", type, num); -} - /* * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks * on this CPU. Use the algorithm recommended in the SDM to discover shared * banks. */ -static void cmci_discover(int banks, int boot) +static void cmci_discover(int banks) { unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); unsigned long flags; - int hdr = 0; int i; raw_spin_lock_irqsave(&cmci_discover_lock, flags); @@ -96,8 +87,7 @@ static void cmci_discover(int banks, int boot) /* Already owned by someone else? */ if (val & MCI_CTL2_CMCI_EN) { - if (test_and_clear_bit(i, owned) && !boot) - print_update("SHD", &hdr, i); + clear_bit(i, owned); __clear_bit(i, __get_cpu_var(mce_poll_banks)); continue; } @@ -109,16 +99,13 @@ static void cmci_discover(int banks, int boot) /* Did the enable bit stick? -- the bank supports CMCI */ if (val & MCI_CTL2_CMCI_EN) { - if (!test_and_set_bit(i, owned) && !boot) - print_update("CMCI", &hdr, i); + set_bit(i, owned); __clear_bit(i, __get_cpu_var(mce_poll_banks)); } else { WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); } } raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); - if (hdr) - printk(KERN_CONT "\n"); } /* @@ -186,7 +173,7 @@ void cmci_rediscover(int dying) continue; /* Recheck banks in case CPUs don't all have the same */ if (cmci_supported(&banks)) - cmci_discover(banks, 0); + cmci_discover(banks); } set_cpus_allowed_ptr(current, old); @@ -200,7 +187,7 @@ void cmci_reenable(void) { int banks; if (cmci_supported(&banks)) - cmci_discover(banks, 0); + cmci_discover(banks); } static void intel_init_cmci(void) @@ -211,7 +198,7 @@ static void intel_init_cmci(void) return; mce_threshold_vector = intel_threshold_interrupt; - cmci_discover(banks, 1); + cmci_discover(banks); /* * For CPU #0 this runs with still disabled APIC, but that's * ok because only the vector is set up. We still do another -- cgit v1.2.3-70-g09d2 From 55babd8f41f122f5f4c7cebf520c766c983282c6 Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Thu, 9 Aug 2012 11:44:51 -0700 Subject: x86/mce: Add CMCI poll mode On Intel systems corrected machine check interrupts (CMCI) may be sent to multiple logical processors; possibly to all processors on the affected socket (SDM Volume 3B "15.5.1 CMCI Local APIC Interface"). This means that a persistent error (such as a stuck bit in ECC memory) may cause a storm of interrupts that greatly hinders or prevents forward progress (probably on many processors). To solve this we keep track of the rate at which each processor sees CMCI. If we exceed a threshold, we disable CMCI delivery and switch to polling the machine check banks. If the storm subsides (none of the affected processors see any more errors for a complete poll interval) we re-enable CMCI. [Tony: Added console messages when storm begins/ends and increased storm threshold from 5 to 15 so we have a few more logged entries before we disable interrupts and start dropping reports] Signed-off-by: Chen Gong Signed-off-by: Thomas Gleixner Tested-by: Chen Gong Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce-internal.h | 12 ++++ arch/x86/kernel/cpu/mcheck/mce.c | 47 +++++++++++-- arch/x86/kernel/cpu/mcheck/mce_intel.c | 108 +++++++++++++++++++++++++++++- 3 files changed, 160 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index ed44c8a6585..6a05c1d327a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -28,6 +28,18 @@ extern int mce_ser; extern struct mce_bank *mce_banks; +#ifdef CONFIG_X86_MCE_INTEL +unsigned long mce_intel_adjust_timer(unsigned long interval); +void mce_intel_cmci_poll(void); +void mce_intel_hcpu_update(unsigned long cpu); +#else +# define mce_intel_adjust_timer mce_adjust_timer_default +static inline void mce_intel_cmci_poll(void) { } +static inline void mce_intel_hcpu_update(unsigned long cpu) { } +#endif + +void mce_timer_kick(unsigned long interval); + #ifdef CONFIG_ACPI_APEI int apei_write_mce(struct mce *m); ssize_t apei_read_mce(struct mce *m, u64 *record_id); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b4dde1527ed..8c1beea6cab 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1260,6 +1260,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); +static unsigned long mce_adjust_timer_default(unsigned long interval) +{ + return interval; +} + +static unsigned long (*mce_adjust_timer)(unsigned long interval) = + mce_adjust_timer_default; + static void mce_timer_fn(unsigned long data) { struct timer_list *t = &__get_cpu_var(mce_timer); @@ -1270,6 +1278,7 @@ static void mce_timer_fn(unsigned long data) if (mce_available(__this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_poll_banks)); + mce_intel_cmci_poll(); } /* @@ -1277,14 +1286,38 @@ static void mce_timer_fn(unsigned long data) * polling interval, otherwise increase the polling interval. */ iv = __this_cpu_read(mce_next_interval); - if (mce_notify_irq()) + if (mce_notify_irq()) { iv = max(iv / 2, (unsigned long) HZ/100); - else + } else { iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); + iv = mce_adjust_timer(iv); + } __this_cpu_write(mce_next_interval, iv); + /* Might have become 0 after CMCI storm subsided */ + if (iv) { + t->expires = jiffies + iv; + add_timer_on(t, smp_processor_id()); + } +} - t->expires = jiffies + iv; - add_timer_on(t, smp_processor_id()); +/* + * Ensure that the timer is firing in @interval from now. + */ +void mce_timer_kick(unsigned long interval) +{ + struct timer_list *t = &__get_cpu_var(mce_timer); + unsigned long when = jiffies + interval; + unsigned long iv = __this_cpu_read(mce_next_interval); + + if (timer_pending(t)) { + if (time_before(when, t->expires)) + mod_timer_pinned(t, when); + } else { + t->expires = round_jiffies(when); + add_timer_on(t, smp_processor_id()); + } + if (interval < iv) + __this_cpu_write(mce_next_interval, interval); } /* Must not be called in IRQ context where del_timer_sync() can deadlock */ @@ -1548,6 +1581,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) switch (c->x86_vendor) { case X86_VENDOR_INTEL: mce_intel_feature_init(c); + mce_adjust_timer = mce_intel_adjust_timer; break; case X86_VENDOR_AMD: mce_amd_feature_init(c); @@ -1559,7 +1593,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) static void mce_start_timer(unsigned int cpu, struct timer_list *t) { - unsigned long iv = check_interval * HZ; + unsigned long iv = mce_adjust_timer(check_interval * HZ); __this_cpu_write(mce_next_interval, iv); @@ -2272,10 +2306,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); mce_device_remove(cpu); + mce_intel_hcpu_update(cpu); break; case CPU_DOWN_PREPARE: - del_timer_sync(t); smp_call_function_single(cpu, mce_disable_cpu, &action, 1); + del_timer_sync(t); break; case CPU_DOWN_FAILED: smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 59648e48a14..098386fed48 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -15,6 +15,8 @@ #include #include +#include "mce-internal.h" + /* * Support for Intel Correct Machine Check Interrupts. This allows * the CPU to raise an interrupt when a corrected machine check happened. @@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); */ static DEFINE_RAW_SPINLOCK(cmci_discover_lock); -#define CMCI_THRESHOLD 1 +#define CMCI_THRESHOLD 1 +#define CMCI_POLL_INTERVAL (30 * HZ) +#define CMCI_STORM_INTERVAL (1 * HZ) +#define CMCI_STORM_THRESHOLD 15 + +static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); +static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); +static DEFINE_PER_CPU(unsigned int, cmci_storm_state); + +enum { + CMCI_STORM_NONE, + CMCI_STORM_ACTIVE, + CMCI_STORM_SUBSIDED, +}; + +static atomic_t cmci_storm_on_cpus; static int cmci_supported(int *banks) { @@ -53,6 +70,93 @@ static int cmci_supported(int *banks) return !!(cap & MCG_CMCI_P); } +void mce_intel_cmci_poll(void) +{ + if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) + return; + machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); +} + +void mce_intel_hcpu_update(unsigned long cpu) +{ + if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) + atomic_dec(&cmci_storm_on_cpus); + + per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; +} + +unsigned long mce_intel_adjust_timer(unsigned long interval) +{ + int r; + + if (interval < CMCI_POLL_INTERVAL) + return interval; + + switch (__this_cpu_read(cmci_storm_state)) { + case CMCI_STORM_ACTIVE: + /* + * We switch back to interrupt mode once the poll timer has + * silenced itself. That means no events recorded and the + * timer interval is back to our poll interval. + */ + __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); + r = atomic_sub_return(1, &cmci_storm_on_cpus); + if (r == 0) + pr_notice("CMCI storm subsided: switching to interrupt mode\n"); + /* FALLTHROUGH */ + + case CMCI_STORM_SUBSIDED: + /* + * We wait for all cpus to go back to SUBSIDED + * state. When that happens we switch back to + * interrupt mode. + */ + if (!atomic_read(&cmci_storm_on_cpus)) { + __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); + cmci_reenable(); + cmci_recheck(); + } + return CMCI_POLL_INTERVAL; + default: + /* + * We have shiny weather. Let the poll do whatever it + * thinks. + */ + return interval; + } +} + +static bool cmci_storm_detect(void) +{ + unsigned int cnt = __this_cpu_read(cmci_storm_cnt); + unsigned long ts = __this_cpu_read(cmci_time_stamp); + unsigned long now = jiffies; + int r; + + if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) + return true; + + if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { + cnt++; + } else { + cnt = 1; + __this_cpu_write(cmci_time_stamp, now); + } + __this_cpu_write(cmci_storm_cnt, cnt); + + if (cnt <= CMCI_STORM_THRESHOLD) + return false; + + cmci_clear(); + __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); + r = atomic_add_return(1, &cmci_storm_on_cpus); + mce_timer_kick(CMCI_POLL_INTERVAL); + + if (r == 1) + pr_notice("CMCI storm detected: switching to poll mode\n"); + return true; +} + /* * The interrupt handler. This is called on every event. * Just call the poller directly to log any events. @@ -61,6 +165,8 @@ static int cmci_supported(int *banks) */ static void intel_threshold_interrupt(void) { + if (cmci_storm_detect()) + return; machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); mce_notify_irq(); } -- cgit v1.2.3-70-g09d2 From c5e63197db519bae1c33e41ea0342a50f39e7a93 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:36 +0200 Subject: perf: Unified API to record selective sets of arch registers This brings a new API to help the selective dump of registers on event sampling, and its implementation for x86 arch. Added HAVE_PERF_REGS config option to determine if the architecture provides perf registers ABI. The information about desired registers will be passed in u64 mask. It's up to the architecture to map the registers into the mask bits. For the x86 arch implementation, both 32 and 64 bit registers bits are defined within single enum to ensure 64 bit system can provide register dump for compat task if needed in the future. Original-patch-by: Frederic Weisbecker [ Added missing linux/errno.h include ] Signed-off-by: Jiri Olsa Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/Kconfig | 6 +++ arch/x86/Kconfig | 1 + arch/x86/include/asm/perf_regs.h | 33 +++++++++++++++ arch/x86/kernel/Makefile | 2 + arch/x86/kernel/perf_regs.c | 90 ++++++++++++++++++++++++++++++++++++++++ include/linux/perf_regs.h | 19 +++++++++ 6 files changed, 151 insertions(+) create mode 100644 arch/x86/include/asm/perf_regs.h create mode 100644 arch/x86/kernel/perf_regs.c create mode 100644 include/linux/perf_regs.h (limited to 'arch/x86') diff --git a/arch/Kconfig b/arch/Kconfig index 72f2fa189cc..68d827b7ae8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -222,6 +222,12 @@ config HAVE_PERF_EVENTS_NMI subsystem. Also has support for calculating CPU cycle events to determine how many clock cycles in a given period. +config HAVE_PERF_REGS + bool + help + Support selective register dumps for perf events. This includes + bit-mapping of each registers and a unique architecture id. + config HAVE_ARCH_JUMP_LABEL bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ec3a1aa4ab..3fab6ec9edc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,7 @@ config X86 select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS select HAVE_PERF_EVENTS_NMI + select HAVE_PERF_REGS select ANON_INODES select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 select HAVE_CMPXCHG_LOCAL if !M386 diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/asm/perf_regs.h new file mode 100644 index 00000000000..3f2207bfd17 --- /dev/null +++ b/arch/x86/include/asm/perf_regs.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_PERF_REGS_H +#define _ASM_X86_PERF_REGS_H + +enum perf_event_x86_regs { + PERF_REG_X86_AX, + PERF_REG_X86_BX, + PERF_REG_X86_CX, + PERF_REG_X86_DX, + PERF_REG_X86_SI, + PERF_REG_X86_DI, + PERF_REG_X86_BP, + PERF_REG_X86_SP, + PERF_REG_X86_IP, + PERF_REG_X86_FLAGS, + PERF_REG_X86_CS, + PERF_REG_X86_SS, + PERF_REG_X86_DS, + PERF_REG_X86_ES, + PERF_REG_X86_FS, + PERF_REG_X86_GS, + PERF_REG_X86_R8, + PERF_REG_X86_R9, + PERF_REG_X86_R10, + PERF_REG_X86_R11, + PERF_REG_X86_R12, + PERF_REG_X86_R13, + PERF_REG_X86_R14, + PERF_REG_X86_R15, + + PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1, + PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1, +}; +#endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8215e5652d9..8d7a619718b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_PERF_EVENTS) += perf_regs.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c new file mode 100644 index 00000000000..3d6923528b1 --- /dev/null +++ b/arch/x86/kernel/perf_regs.c @@ -0,0 +1,90 @@ +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_32 +#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX +#else +#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX +#endif + +#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) + +static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { + PT_REGS_OFFSET(PERF_REG_X86_AX, ax), + PT_REGS_OFFSET(PERF_REG_X86_BX, bx), + PT_REGS_OFFSET(PERF_REG_X86_CX, cx), + PT_REGS_OFFSET(PERF_REG_X86_DX, dx), + PT_REGS_OFFSET(PERF_REG_X86_SI, si), + PT_REGS_OFFSET(PERF_REG_X86_DI, di), + PT_REGS_OFFSET(PERF_REG_X86_BP, bp), + PT_REGS_OFFSET(PERF_REG_X86_SP, sp), + PT_REGS_OFFSET(PERF_REG_X86_IP, ip), + PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags), + PT_REGS_OFFSET(PERF_REG_X86_CS, cs), + PT_REGS_OFFSET(PERF_REG_X86_SS, ss), +#ifdef CONFIG_X86_32 + PT_REGS_OFFSET(PERF_REG_X86_DS, ds), + PT_REGS_OFFSET(PERF_REG_X86_ES, es), + PT_REGS_OFFSET(PERF_REG_X86_FS, fs), + PT_REGS_OFFSET(PERF_REG_X86_GS, gs), +#else + /* + * The pt_regs struct does not store + * ds, es, fs, gs in 64 bit mode. + */ + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, +#endif +#ifdef CONFIG_X86_64 + PT_REGS_OFFSET(PERF_REG_X86_R8, r8), + PT_REGS_OFFSET(PERF_REG_X86_R9, r9), + PT_REGS_OFFSET(PERF_REG_X86_R10, r10), + PT_REGS_OFFSET(PERF_REG_X86_R11, r11), + PT_REGS_OFFSET(PERF_REG_X86_R12, r12), + PT_REGS_OFFSET(PERF_REG_X86_R13, r13), + PT_REGS_OFFSET(PERF_REG_X86_R14, r14), + PT_REGS_OFFSET(PERF_REG_X86_R15, r15), +#endif +}; + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE(idx > ARRAY_SIZE(pt_regs_offset))) + return 0; + + return regs_get_register(regs, pt_regs_offset[idx]); +} + +#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) + +#ifdef CONFIG_X86_32 +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} +#else /* CONFIG_X86_64 */ +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ + (1ULL << PERF_REG_X86_ES) | \ + (1ULL << PERF_REG_X86_FS) | \ + (1ULL << PERF_REG_X86_GS)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + if (mask & REG_NOSUPPORT) + return -EINVAL; + + return 0; +} +#endif /* CONFIG_X86_32 */ diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h new file mode 100644 index 00000000000..a2f1a98f783 --- /dev/null +++ b/include/linux/perf_regs.h @@ -0,0 +1,19 @@ +#ifndef _LINUX_PERF_REGS_H +#define _LINUX_PERF_REGS_H + +#ifdef CONFIG_HAVE_PERF_REGS +#include +u64 perf_reg_value(struct pt_regs *regs, int idx); +int perf_reg_validate(u64 mask); +#else +static inline u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + return 0; +} + +static inline int perf_reg_validate(u64 mask) +{ + return mask ? -ENOSYS : 0; +} +#endif /* CONFIG_HAVE_PERF_REGS */ +#endif /* _LINUX_PERF_REGS_H */ -- cgit v1.2.3-70-g09d2 From 4018994f3d8785275ef0e7391b75c3462c029e56 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:37 +0200 Subject: perf: Add ability to attach user level registers dump to sample Introducing PERF_SAMPLE_REGS_USER sample type bit to trigger the dump of user level registers on sample. Registers we want to dump are specified by sample_regs_user bitmask. Only user level registers are dumped at the moment. Meaning the register values of the user space context as it was before the user entered the kernel for whatever reason (syscall, irq, exception, or a PMI happening in userspace). The layout of the sample_regs_user bitmap is described in asm/perf_regs.h for archs that support register dump. This is going to be useful to bring Dwarf CFI based stack unwinding on top of samples. Original-patch-by: Frederic Weisbecker [ Dump registers ABI specification. ] Signed-off-by: Jiri Olsa Suggested-by: Stephane Eranian Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/perf_regs.c | 15 +++++++++++ include/linux/perf_event.h | 35 +++++++++++++++++++++--- include/linux/perf_regs.h | 6 +++++ kernel/events/core.c | 66 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index 3d6923528b1..c5a3e5cfe07 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -1,5 +1,7 @@ #include #include +#include +#include #include #include #include @@ -71,6 +73,11 @@ int perf_reg_validate(u64 mask) return 0; } + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_32; +} #else /* CONFIG_X86_64 */ #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ (1ULL << PERF_REG_X86_ES) | \ @@ -87,4 +94,12 @@ int perf_reg_validate(u64 mask) return 0; } + +u64 perf_reg_abi(struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_IA32)) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} #endif /* CONFIG_X86_32 */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7602ccb3f40..3d4d84745f0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -130,8 +130,9 @@ enum perf_event_sample_format { PERF_SAMPLE_STREAM_ID = 1U << 9, PERF_SAMPLE_RAW = 1U << 10, PERF_SAMPLE_BRANCH_STACK = 1U << 11, + PERF_SAMPLE_REGS_USER = 1U << 12, - PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 13, /* non-ABI */ }; /* @@ -162,6 +163,15 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_KERNEL|\ PERF_SAMPLE_BRANCH_HV) +/* + * Values to determine ABI of the registers dump. + */ +enum perf_sample_regs_abi { + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, +}; + /* * The format of the data returned by read() on a perf event fd, * as specified by attr.read_format: @@ -194,6 +204,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 88 /* add: sample_regs_user */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -271,7 +282,13 @@ struct perf_event_attr { __u64 bp_len; __u64 config2; /* extension of config1 */ }; - __u64 branch_sample_type; /* enum branch_sample_type */ + __u64 branch_sample_type; /* enum perf_branch_sample_type */ + + /* + * Defines set of user regs to dump on samples. + * See asm/perf_regs.h for details. + */ + __u64 sample_regs_user; }; /* @@ -548,6 +565,9 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * }; */ PERF_RECORD_SAMPLE = 9, @@ -609,6 +629,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -654,6 +675,11 @@ struct perf_branch_stack { struct perf_branch_entry entries[0]; }; +struct perf_regs_user { + __u64 abi; + struct pt_regs *regs; +}; + struct task_struct; /* @@ -1133,6 +1159,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; struct perf_raw_record *raw; struct perf_branch_stack *br_stack; + struct perf_regs_user regs_user; }; static inline void perf_sample_data_init(struct perf_sample_data *data, @@ -1142,7 +1169,9 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->addr = addr; data->raw = NULL; data->br_stack = NULL; - data->period = period; + data->period = period; + data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; + data->regs_user.regs = NULL; } extern void perf_output_sample(struct perf_output_handle *handle, diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index a2f1a98f783..3c73d5fe18b 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -5,6 +5,7 @@ #include u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); +u64 perf_reg_abi(struct task_struct *task); #else static inline u64 perf_reg_value(struct pt_regs *regs, int idx) { @@ -15,5 +16,10 @@ static inline int perf_reg_validate(u64 mask) { return mask ? -ENOSYS : 0; } + +static inline u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_NONE; +} #endif /* CONFIG_HAVE_PERF_REGS */ #endif /* _LINUX_PERF_REGS_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index b7935fcec7d..d3ce97525b9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3756,6 +3756,37 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) } EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); +static void +perf_output_sample_regs(struct perf_output_handle *handle, + struct pt_regs *regs, u64 mask) +{ + int bit; + + for_each_set_bit(bit, (const unsigned long *) &mask, + sizeof(mask) * BITS_PER_BYTE) { + u64 val; + + val = perf_reg_value(regs, bit); + perf_output_put(handle, val); + } +} + +static void perf_sample_regs_user(struct perf_regs_user *regs_user, + struct pt_regs *regs) +{ + if (!user_mode(regs)) { + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + regs_user->regs = regs; + regs_user->abi = perf_reg_abi(current); + } +} + static void __perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event) @@ -4016,6 +4047,23 @@ void perf_output_sample(struct perf_output_handle *handle, perf_output_put(handle, nr); } } + + if (sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi = data->regs_user.abi; + + /* + * If there are no regs to dump, notice it through + * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE). + */ + perf_output_put(handle, abi); + + if (abi) { + u64 mask = event->attr.sample_regs_user; + perf_output_sample_regs(handle, + data->regs_user.regs, + mask); + } + } } void perf_prepare_sample(struct perf_event_header *header, @@ -4067,6 +4115,20 @@ void perf_prepare_sample(struct perf_event_header *header, } header->size += size; } + + if (sample_type & PERF_SAMPLE_REGS_USER) { + /* regs dump ABI info */ + int size = sizeof(u64); + + perf_sample_regs_user(&data->regs_user, regs); + + if (data->regs_user.regs) { + u64 mask = event->attr.sample_regs_user; + size += hweight64(mask) * sizeof(u64); + } + + header->size += size; + } } static void perf_event_output(struct perf_event *event, @@ -6142,6 +6204,10 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, attr->branch_sample_type = mask; } } + + if (attr->sample_type & PERF_SAMPLE_REGS_USER) + ret = perf_reg_validate(attr->sample_regs_user); + out: return ret; -- cgit v1.2.3-70-g09d2 From 91d7753a45f8525dc75b6be01e427dc1c378dc16 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 7 Aug 2012 15:20:38 +0200 Subject: perf: Factor __output_copy to be usable with specific copy function Adding a generic way to use __output_copy function with specific copy function via DEFINE_PERF_OUTPUT_COPY macro. Using this to add new __output_copy_user function, that provides output copy from user pointers. For x86 the copy_from_user_nmi function is used and __copy_from_user_inatomic for the rest of the architectures. This new function will be used in user stack dump on sample, coming in next patches. Signed-off-by: Jiri Olsa Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-4-git-send-email-jolsa@redhat.com Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/include/asm/perf_event.h | 2 ++ include/linux/perf_event.h | 2 +- kernel/events/internal.h | 62 ++++++++++++++++++++++++++------------- kernel/events/ring_buffer.c | 4 +-- 4 files changed, 46 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index cb4e43bce98..4fabcdf1cfa 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -262,4 +262,6 @@ static inline void perf_check_microcode(void) { } static inline void amd_pmu_disable_virt(void) { } #endif +#define arch_perf_out_copy_user copy_from_user_nmi + #endif /* _ASM_X86_PERF_EVENT_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3d4d84745f0..d41394a1af3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1319,7 +1319,7 @@ static inline bool has_branch_stack(struct perf_event *event) extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); -extern void perf_output_copy(struct perf_output_handle *handle, +extern unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index a096c19f2c2..7fd5408493d 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -2,6 +2,7 @@ #define _KERNEL_EVENTS_INTERNAL_H #include +#include /* Buffer handling */ @@ -76,30 +77,49 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb) return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); } -static inline void -__output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len) +#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ +static inline unsigned int \ +func_name(struct perf_output_handle *handle, \ + const void *buf, unsigned int len) \ +{ \ + unsigned long size, written; \ + \ + do { \ + size = min_t(unsigned long, handle->size, len); \ + \ + written = memcpy_func(handle->addr, buf, size); \ + \ + len -= written; \ + handle->addr += written; \ + buf += written; \ + handle->size -= written; \ + if (!handle->size) { \ + struct ring_buffer *rb = handle->rb; \ + \ + handle->page++; \ + handle->page &= rb->nr_pages - 1; \ + handle->addr = rb->data_pages[handle->page]; \ + handle->size = PAGE_SIZE << page_order(rb); \ + } \ + } while (len && written == size); \ + \ + return len; \ +} + +static inline int memcpy_common(void *dst, const void *src, size_t n) { - do { - unsigned long size = min_t(unsigned long, handle->size, len); - - memcpy(handle->addr, buf, size); - - len -= size; - handle->addr += size; - buf += size; - handle->size -= size; - if (!handle->size) { - struct ring_buffer *rb = handle->rb; - - handle->page++; - handle->page &= rb->nr_pages - 1; - handle->addr = rb->data_pages[handle->page]; - handle->size = PAGE_SIZE << page_order(rb); - } - } while (len); + memcpy(dst, src, n); + return n; } +DEFINE_OUTPUT_COPY(__output_copy, memcpy_common) + +#ifndef arch_perf_out_copy_user +#define arch_perf_out_copy_user __copy_from_user_inatomic +#endif + +DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) + /* Callchain handling */ extern struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 6ddaba43fb7..b4c2ad3dee7 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -182,10 +182,10 @@ out: return -ENOSPC; } -void perf_output_copy(struct perf_output_handle *handle, +unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len) { - __output_copy(handle, buf, len); + return __output_copy(handle, buf, len); } void perf_output_end(struct perf_output_handle *handle) -- cgit v1.2.3-70-g09d2 From c5ebcedb566ef17bda7b02686e0d658a7bb42ee7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Aug 2012 15:20:40 +0200 Subject: perf: Add ability to attach user stack dump to sample Introducing PERF_SAMPLE_STACK_USER sample type bit to trigger the dump of the user level stack on sample. The size of the dump is specified by sample_stack_user value. Being able to dump parts of the user stack, starting from the stack pointer, will be useful to make a post mortem dwarf CFI based stack unwinding. Added HAVE_PERF_USER_STACK_DUMP config option to determine if the architecture provides user stack dump on perf event samples. This needs access to the user stack pointer which is not unified across architectures. Enabling this for x86 architecture. Signed-off-by: Jiri Olsa Original-patch-by: Frederic Weisbecker Cc: "Frank Ch. Eigler" Cc: Arun Sharma Cc: Benjamin Redelings Cc: Corey Ashford Cc: Cyrill Gorcunov Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: Tom Zanussi Cc: Ulrich Drepper Link: http://lkml.kernel.org/r/1344345647-11536-6-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/Kconfig | 7 +++ arch/x86/Kconfig | 1 + include/linux/perf_event.h | 18 +++++- kernel/events/core.c | 150 ++++++++++++++++++++++++++++++++++++++++++++- kernel/events/internal.h | 16 +++++ 5 files changed, 190 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/Kconfig b/arch/Kconfig index 68d827b7ae8..2a83a3f6a61 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -228,6 +228,13 @@ config HAVE_PERF_REGS Support selective register dumps for perf events. This includes bit-mapping of each registers and a unique architecture id. +config HAVE_PERF_USER_STACK_DUMP + bool + help + Support user stack dumps for perf event samples. This needs + access to the user stack pointer which is not unified across + architectures. + config HAVE_ARCH_JUMP_LABEL bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3fab6ec9edc..a2d19ee750c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -61,6 +61,7 @@ config X86 select PERF_EVENTS select HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select ANON_INODES select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 select HAVE_CMPXCHG_LOCAL if !M386 diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8a73f75beb1..d1d25f6a5e2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -131,8 +131,9 @@ enum perf_event_sample_format { PERF_SAMPLE_RAW = 1U << 10, PERF_SAMPLE_BRANCH_STACK = 1U << 11, PERF_SAMPLE_REGS_USER = 1U << 12, + PERF_SAMPLE_STACK_USER = 1U << 13, - PERF_SAMPLE_MAX = 1U << 13, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 14, /* non-ABI */ }; /* @@ -205,6 +206,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ #define PERF_ATTR_SIZE_VER3 88 /* add: sample_regs_user */ +#define PERF_ATTR_SIZE_VER4 96 /* add: sample_stack_user */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -289,6 +291,14 @@ struct perf_event_attr { * See asm/perf_regs.h for details. */ __u64 sample_regs_user; + + /* + * Defines size of the user stack to dump on samples. + */ + __u32 sample_stack_user; + + /* Align to u64. */ + __u32 __reserved_2; }; /* @@ -568,6 +578,10 @@ enum perf_event_type { * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * }; */ PERF_RECORD_SAMPLE = 9, @@ -1160,6 +1174,7 @@ struct perf_sample_data { struct perf_raw_record *raw; struct perf_branch_stack *br_stack; struct perf_regs_user regs_user; + u64 stack_user_size; }; static inline void perf_sample_data_init(struct perf_sample_data *data, @@ -1172,6 +1187,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->period = period; data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; data->regs_user.regs = NULL; + data->stack_user_size = 0; } extern void perf_output_sample(struct perf_output_handle *handle, diff --git a/kernel/events/core.c b/kernel/events/core.c index d3ce97525b9..2ba890450d1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "internal.h" @@ -3787,6 +3788,101 @@ static void perf_sample_regs_user(struct perf_regs_user *regs_user, } } +/* + * Get remaining task size from user stack pointer. + * + * It'd be better to take stack vma map and limit this more + * precisly, but there's no way to get it safely under interrupt, + * so using TASK_SIZE as limit. + */ +static u64 perf_ustack_task_size(struct pt_regs *regs) +{ + unsigned long addr = perf_user_stack_pointer(regs); + + if (!addr || addr >= TASK_SIZE) + return 0; + + return TASK_SIZE - addr; +} + +static u16 +perf_sample_ustack_size(u16 stack_size, u16 header_size, + struct pt_regs *regs) +{ + u64 task_size; + + /* No regs, no stack pointer, no dump. */ + if (!regs) + return 0; + + /* + * Check if we fit in with the requested stack size into the: + * - TASK_SIZE + * If we don't, we limit the size to the TASK_SIZE. + * + * - remaining sample size + * If we don't, we customize the stack size to + * fit in to the remaining sample size. + */ + + task_size = min((u64) USHRT_MAX, perf_ustack_task_size(regs)); + stack_size = min(stack_size, (u16) task_size); + + /* Current header size plus static size and dynamic size. */ + header_size += 2 * sizeof(u64); + + /* Do we fit in with the current stack dump size? */ + if ((u16) (header_size + stack_size) < header_size) { + /* + * If we overflow the maximum size for the sample, + * we customize the stack dump size to fit in. + */ + stack_size = USHRT_MAX - header_size - sizeof(u64); + stack_size = round_up(stack_size, sizeof(u64)); + } + + return stack_size; +} + +static void +perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, + struct pt_regs *regs) +{ + /* Case of a kernel thread, nothing to dump */ + if (!regs) { + u64 size = 0; + perf_output_put(handle, size); + } else { + unsigned long sp; + unsigned int rem; + u64 dyn_size; + + /* + * We dump: + * static size + * - the size requested by user or the best one we can fit + * in to the sample max size + * data + * - user stack dump data + * dynamic size + * - the actual dumped size + */ + + /* Static size. */ + perf_output_put(handle, dump_size); + + /* Data. */ + sp = perf_user_stack_pointer(regs); + rem = __output_copy_user(handle, (void *) sp, dump_size); + dyn_size = dump_size - rem; + + perf_output_skip(handle, rem); + + /* Dynamic size. */ + perf_output_put(handle, dyn_size); + } +} + static void __perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event) @@ -4064,6 +4160,11 @@ void perf_output_sample(struct perf_output_handle *handle, mask); } } + + if (sample_type & PERF_SAMPLE_STACK_USER) + perf_output_sample_ustack(handle, + data->stack_user_size, + data->regs_user.regs); } void perf_prepare_sample(struct perf_event_header *header, @@ -4129,6 +4230,35 @@ void perf_prepare_sample(struct perf_event_header *header, header->size += size; } + + if (sample_type & PERF_SAMPLE_STACK_USER) { + /* + * Either we need PERF_SAMPLE_STACK_USER bit to be allways + * processed as the last one or have additional check added + * in case new sample type is added, because we could eat + * up the rest of the sample size. + */ + struct perf_regs_user *uregs = &data->regs_user; + u16 stack_size = event->attr.sample_stack_user; + u16 size = sizeof(u64); + + if (!uregs->abi) + perf_sample_regs_user(uregs, regs); + + stack_size = perf_sample_ustack_size(stack_size, header->size, + uregs->regs); + + /* + * If there is something to dump, add space for the dump + * itself and for the field that tells the dynamic size, + * which is how many have been actually dumped. + */ + if (stack_size) + size += sizeof(u64) + stack_size; + + data->stack_user_size = stack_size; + header->size += size; + } } static void perf_event_output(struct perf_event *event, @@ -6205,8 +6335,26 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, } } - if (attr->sample_type & PERF_SAMPLE_REGS_USER) + if (attr->sample_type & PERF_SAMPLE_REGS_USER) { ret = perf_reg_validate(attr->sample_regs_user); + if (ret) + return ret; + } + + if (attr->sample_type & PERF_SAMPLE_STACK_USER) { + if (!arch_perf_have_user_stack_dump()) + return -ENOSYS; + + /* + * We have __u32 type for the size, but so far + * we can only use __u16 as maximum due to the + * __u16 sample size limit. + */ + if (attr->sample_stack_user >= USHRT_MAX) + ret = -EINVAL; + else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) + ret = -EINVAL; + } out: return ret; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index ce7bdfc1d04..d56a64c99a8 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -158,4 +158,20 @@ static inline void put_recursion_context(int *recursion, int rctx) recursion[rctx]--; } +#ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP +static inline bool arch_perf_have_user_stack_dump(void) +{ + return true; +} + +#define perf_user_stack_pointer(regs) user_stack_pointer(regs) +#else +static inline bool arch_perf_have_user_stack_dump(void) +{ + return false; +} + +#define perf_user_stack_pointer(regs) 0 +#endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */ + #endif /* _KERNEL_EVENTS_INTERNAL_H */ -- cgit v1.2.3-70-g09d2 From 51d59c6b422f3f95940ae4e5b42f165595906aee Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 3 Aug 2012 15:57:49 -0300 Subject: KVM: x86: fix pvclock guest stopped flag reporting kvm_guest_time_update unconditionally clears hv_clock.flags field, so the notification never reaches the guest. Fix it by allowing PVCLOCK_GUEST_STOPPED to passthrough. Reviewed-by: Eric B Munson Reviewed-by: Amit Shah Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/x86.c | 13 ++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1309e69b57f..fc0e752e756 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -420,6 +420,8 @@ struct kvm_vcpu_arch { unsigned int hw_tsc_khz; unsigned int time_offset; struct page *time_page; + /* set guest stopped flag in pvclock flags field */ + bool pvclock_set_guest_stopped_request; struct { u64 msr_val; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 91a595827de..fb0d93788bf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1134,6 +1134,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) unsigned long this_tsc_khz; s64 kernel_ns, max_kernel_ns; u64 tsc_timestamp; + u8 pvclock_flags; /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); @@ -1215,7 +1216,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; vcpu->last_kernel_ns = kernel_ns; vcpu->last_guest_tsc = tsc_timestamp; - vcpu->hv_clock.flags = 0; + + pvclock_flags = 0; + if (vcpu->pvclock_set_guest_stopped_request) { + pvclock_flags |= PVCLOCK_GUEST_STOPPED; + vcpu->pvclock_set_guest_stopped_request = false; + } + + vcpu->hv_clock.flags = pvclock_flags; /* * The interface expects us to write an even number signaling that the @@ -2624,10 +2632,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, */ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) { - struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock; if (!vcpu->arch.time_page) return -EINVAL; - src->flags |= PVCLOCK_GUEST_STOPPED; + vcpu->arch.pvclock_set_guest_stopped_request = true; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); return 0; } -- cgit v1.2.3-70-g09d2 From e423ca155d3f5f16b46e30de9c818875b1fd617d Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Tue, 7 Aug 2012 13:10:13 +0530 Subject: KVM: Correct vmrun to vmcall typo Signed-off-by: Raghavendra K T Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_para.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 2f7712e08b1..20f5697888b 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -116,7 +116,7 @@ static inline bool kvm_check_and_clear_guest_paused(void) */ #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" -/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun +/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the * instructions are guaranteed to be supported. * -- cgit v1.2.3-70-g09d2 From 2a7921b7a033d5cfc176690d6297c82846c582b2 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 12 Aug 2012 16:12:29 +0300 Subject: KVM: VMX: restore MSR_IA32_DEBUGCTLMSR after VMEXIT MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT. Restore it to the correct value. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cc8ad983692..d0f4bec9fc6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6222,6 +6222,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long debugctlmsr; if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); @@ -6261,6 +6262,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_set_interrupt_shadow(vcpu, 0); atomic_switch_perf_msrs(vmx); + debugctlmsr = get_debugctlmsr(); vmx->__launched = vmx->loaded_vmcs->launched; asm( @@ -6362,6 +6364,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ + if (debugctlmsr) + update_debugctlmsr(debugctlmsr); + #ifndef CONFIG_X86_64 /* * The sysexit path does not restore ds/es, so we must set them to -- cgit v1.2.3-70-g09d2 From dbcb4e798072d114fe68813f39a9efd239ab99c0 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 13 Aug 2012 15:38:22 +0300 Subject: KVM: VMX: Advertize RDTSC exiting to nested guests All processors that support VMX have that feature, and guests (Xen) depend on it. As we already implement it, advertize it to the guest. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d0f4bec9fc6..13e0296cea4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1990,7 +1990,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) #endif CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | - CPU_BASED_RDPMC_EXITING | + CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; /* * We can allow some features even when not supported by the -- cgit v1.2.3-70-g09d2 From 28a6fdabb3ea775d3d707afd9d2728b3ced2c34d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 14 Aug 2012 19:20:28 +0300 Subject: KVM: x86: drop parameter validation in ioapic/pic We validate irq pin number when routing is setup, so code handling illegal irq # in pic and ioapic on each injection is never called. Drop it, replace with BUG_ON to catch out of bounds access bugs. Signed-off-by: Michael S. Tsirkin Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/i8259.c | 18 +++++++++--------- virt/kvm/ioapic.c | 37 +++++++++++++++++++------------------ 2 files changed, 28 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index e498b18f010..90c84f947d4 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -190,17 +190,17 @@ void kvm_pic_update_irq(struct kvm_pic *s) int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level) { - int ret = -1; + int ret, irq_level; + + BUG_ON(irq < 0 || irq >= PIC_NUM_PINS); pic_lock(s); - if (irq >= 0 && irq < PIC_NUM_PINS) { - int irq_level = __kvm_irq_line_state(&s->irq_states[irq], - irq_source_id, level); - ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level); - pic_update_irq(s); - trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, - s->pics[irq >> 3].imr, ret == 0); - } + irq_level = __kvm_irq_line_state(&s->irq_states[irq], + irq_source_id, level); + ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level); + pic_update_irq(s); + trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, + s->pics[irq >> 3].imr, ret == 0); pic_unlock(s); return ret; diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index ef61d529a6c..cfb7e4d52dc 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -197,28 +197,29 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, u32 old_irr; u32 mask = 1 << irq; union kvm_ioapic_redirect_entry entry; - int ret = 1; + int ret, irq_level; + + BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); spin_lock(&ioapic->lock); old_irr = ioapic->irr; - if (irq >= 0 && irq < IOAPIC_NUM_PINS) { - int irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], - irq_source_id, level); - entry = ioapic->redirtbl[irq]; - irq_level ^= entry.fields.polarity; - if (!irq_level) - ioapic->irr &= ~mask; - else { - int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); - ioapic->irr |= mask; - if ((edge && old_irr != ioapic->irr) || - (!edge && !entry.fields.remote_irr)) - ret = ioapic_service(ioapic, irq); - else - ret = 0; /* report coalesced interrupt */ - } - trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); + irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], + irq_source_id, level); + entry = ioapic->redirtbl[irq]; + irq_level ^= entry.fields.polarity; + if (!irq_level) { + ioapic->irr &= ~mask; + ret = 1; + } else { + int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + ioapic->irr |= mask; + if ((edge && old_irr != ioapic->irr) || + (!edge && !entry.fields.remote_irr)) + ret = ioapic_service(ioapic, irq); + else + ret = 0; /* report coalesced interrupt */ } + trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); spin_unlock(&ioapic->lock); return ret; -- cgit v1.2.3-70-g09d2 From 8fbe6a541f50eeec5e3e49bd92db23ade9496673 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 15 Aug 2012 16:00:40 +0200 Subject: KVM guest: disable stealtime on reboot to avoid mem corruption else, host continues to update stealtime after reboot, which can corrupt e.g. initramfs area. found when tracking down initramfs unpack error on initial reboot (with qemu-kvm -smp 2, no problem with single-core). Signed-off-by: Florian Westphal Signed-off-by: Marcelo Tosatti --- arch/x86/kernel/kvm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index c1d61ee4b4f..1596cc8fd79 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -354,6 +354,7 @@ static void kvm_pv_guest_cpu_reboot(void *unused) if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) wrmsrl(MSR_KVM_PV_EOI_EN, 0); kvm_pv_disable_apf(); + kvm_disable_steal_time(); } static int kvm_pv_reboot_notify(struct notifier_block *nb, -- cgit v1.2.3-70-g09d2 From 023af608254add7ba037cd634cc5f2fb21ff6420 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sun, 22 Jul 2012 18:18:37 +0300 Subject: crypto: aesni_intel - improve lrw and xts performance by utilizing parallel AES-NI hardware pipelines Use parallel LRW and XTS encryption facilities to better utilize AES-NI hardware pipelines and gain extra performance. Tcrypt benchmark results (async), old vs new ratios: Intel Core i5-2450M CPU (fam: 6, model: 42, step: 7) aes:128bit lrw:256bit xts:256bit size lrw-enc lrw-dec xts-dec xts-dec 16B 0.99x 1.00x 1.22x 1.19x 64B 1.38x 1.50x 1.58x 1.61x 256B 2.04x 2.02x 2.27x 2.29x 1024B 2.56x 2.54x 2.89x 2.92x 8192B 2.85x 2.99x 3.40x 3.23x aes:192bit lrw:320bit xts:384bit size lrw-enc lrw-dec xts-dec xts-dec 16B 1.08x 1.08x 1.16x 1.17x 64B 1.48x 1.54x 1.59x 1.65x 256B 2.18x 2.17x 2.29x 2.28x 1024B 2.67x 2.67x 2.87x 3.05x 8192B 2.93x 2.84x 3.28x 3.33x aes:256bit lrw:348bit xts:512bit size lrw-enc lrw-dec xts-dec xts-dec 16B 1.07x 1.07x 1.18x 1.19x 64B 1.56x 1.56x 1.70x 1.71x 256B 2.22x 2.24x 2.46x 2.46x 1024B 2.76x 2.77x 3.13x 3.05x 8192B 2.99x 3.05x 3.40x 3.30x Cc: Huang Ying Signed-off-by: Jussi Kivilinna Reviewed-by: Kim Phillips Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 253 ++++++++++++++++++++++++++++++++----- crypto/Kconfig | 2 + 2 files changed, 220 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 648347a0577..7c04d0da709 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -28,6 +28,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -41,18 +44,10 @@ #define HAS_CTR #endif -#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE) -#define HAS_LRW -#endif - #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) #define HAS_PCBC #endif -#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE) -#define HAS_XTS -#endif - /* This data is stored at the end of the crypto_tfm struct. * It's a type of per "session" data storage location. * This needs to be 16 byte aligned. @@ -79,6 +74,16 @@ struct aesni_hash_subkey_req_data { #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) #define RFC4106_HASH_SUBKEY_SIZE 16 +struct aesni_lrw_ctx { + struct lrw_table_ctx lrw_table; + u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; +}; + +struct aesni_xts_ctx { + u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; + u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; +}; + asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, @@ -398,13 +403,6 @@ static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm) #endif #endif -#ifdef HAS_LRW -static int ablk_lrw_init(struct crypto_tfm *tfm) -{ - return ablk_init_common(tfm, "fpu(lrw(__driver-aes-aesni))"); -} -#endif - #ifdef HAS_PCBC static int ablk_pcbc_init(struct crypto_tfm *tfm) { @@ -412,12 +410,160 @@ static int ablk_pcbc_init(struct crypto_tfm *tfm) } #endif -#ifdef HAS_XTS -static int ablk_xts_init(struct crypto_tfm *tfm) +static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) { - return ablk_init_common(tfm, "fpu(xts(__driver-aes-aesni))"); + aesni_ecb_enc(ctx, blks, blks, nbytes); +} + +static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) +{ + aesni_ecb_dec(ctx, blks, blks, nbytes); +} + +static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key, + keylen - AES_BLOCK_SIZE); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE); +} + +static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm) +{ + struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[8]; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), + .crypt_fn = lrw_xts_encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + ret = lrw_crypt(desc, dst, src, nbytes, &req); + kernel_fpu_end(); + + return ret; +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[8]; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), + .crypt_fn = lrw_xts_decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + ret = lrw_crypt(desc, dst, src, nbytes, &req); + kernel_fpu_end(); + + return ret; +} + +static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2, + keylen / 2); +} + + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[8]; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), + .tweak_fn = XTS_TWEAK_CAST(aesni_enc), + .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), + .crypt_fn = lrw_xts_encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + ret = xts_crypt(desc, dst, src, nbytes, &req); + kernel_fpu_end(); + + return ret; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[8]; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), + .tweak_fn = XTS_TWEAK_CAST(aesni_enc), + .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), + .crypt_fn = lrw_xts_decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + ret = xts_crypt(desc, dst, src, nbytes, &req); + kernel_fpu_end(); + + return ret; } -#endif #ifdef CONFIG_X86_64 static int rfc4106_init(struct crypto_tfm *tfm) @@ -1035,10 +1181,10 @@ static struct crypto_alg aesni_algs[] = { { }, #endif #endif -#ifdef HAS_LRW +#ifdef HAS_PCBC }, { - .cra_name = "lrw(aes)", - .cra_driver_name = "lrw-aes-aesni", + .cra_name = "pcbc(aes)", + .cra_driver_name = "pcbc-aes-aesni", .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, @@ -1046,12 +1192,12 @@ static struct crypto_alg aesni_algs[] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_init = ablk_lrw_init, + .cra_init = ablk_pcbc_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, - .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, .setkey = ablk_set_key, .encrypt = ablk_encrypt, @@ -1059,10 +1205,50 @@ static struct crypto_alg aesni_algs[] = { { }, }, #endif -#ifdef HAS_PCBC }, { - .cra_name = "pcbc(aes)", - .cra_driver_name = "pcbc-aes-aesni", + .cra_name = "__lrw-aes-aesni", + .cra_driver_name = "__driver-lrw-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesni_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_exit = lrw_aesni_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, + .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = lrw_aesni_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "__xts-aes-aesni", + .cra_driver_name = "__driver-xts-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesni_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_aesni_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +}, { + .cra_name = "lrw(aes)", + .cra_driver_name = "lrw-aes-aesni", .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, @@ -1070,20 +1256,18 @@ static struct crypto_alg aesni_algs[] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_init = ablk_pcbc_init, + .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, + .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, + .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, .ivsize = AES_BLOCK_SIZE, .setkey = ablk_set_key, .encrypt = ablk_encrypt, .decrypt = ablk_decrypt, }, }, -#endif -#ifdef HAS_XTS }, { .cra_name = "xts(aes)", .cra_driver_name = "xts-aes-aesni", @@ -1094,7 +1278,7 @@ static struct crypto_alg aesni_algs[] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_init = ablk_xts_init, + .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { @@ -1106,7 +1290,6 @@ static struct crypto_alg aesni_algs[] = { { .decrypt = ablk_decrypt, }, }, -#endif } }; diff --git a/crypto/Kconfig b/crypto/Kconfig index cbcc0e2eeda..213fb37be51 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -564,6 +564,8 @@ config CRYPTO_AES_NI_INTEL select CRYPTO_CRYPTD select CRYPTO_ABLK_HELPER_X86 select CRYPTO_ALGAPI + select CRYPTO_LRW + select CRYPTO_XTS help Use Intel AES-NI instructions for AES algorithm. -- cgit v1.2.3-70-g09d2 From 0570a365a6b8ccbfe7baa459de2b7396ddf2de90 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sun, 19 Aug 2012 19:06:43 +0200 Subject: x86, boot: Remove obsolete and unused constant RAMDISK The named constant RAMDISK is unused. It used to set the (obsolete) kernel boot header field ram_size, but its usage for that purpose got dropped in commit 5e47c478b0b69bc9bc3ba544e4b1ca3268f98fef ("x86: remove zImage support"). Now remove this constant too. Signed-off-by: Paul Bolle Link: http://lkml.kernel.org/r/1345396003.1771.9.camel@x61.thuisdomein Signed-off-by: H. Peter Anvin --- arch/x86/boot/header.S | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b4e15dd6786..2a017441b8b 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -32,10 +32,6 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ #define SVGA_MODE ASK_VGA #endif -#ifndef RAMDISK -#define RAMDISK 0 -#endif - #ifndef ROOT_RDONLY #define ROOT_RDONLY 1 #endif -- cgit v1.2.3-70-g09d2 From ece3234a77ebcd5bbeea6b829c9798328d290cae Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 13 Aug 2012 22:23:33 +0200 Subject: x86: dt: Use linear irq domain for ioapic(s) The former conversion to irq_domain_add_legacy() did not fully work since we miss the irq decs for NR_IRQS_LEGACY+. Ideally we could use irq_domain_add_simple() or the no-map variant (and program the virq <-> line mapping directly into ioapic) but this would require a different irq lookup in "do_IRQ()" and won't work with ACPI without changes. So this is probably easiest for everyone. Tested-by: Thierry Reding Signed-off-by: Sebastian Andrzej Siewior Cc: Grant Likely Link: http://lkml.kernel.org/r/20120813202304.GA3529@breakpoint.cc Signed-off-by: Thomas Gleixner --- arch/x86/kernel/devicetree.c | 51 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 3ae2ced4a87..b1581527a23 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = { .xlate = ioapic_xlate, }; +static void dt_add_ioapic_domain(unsigned int ioapic_num, + struct device_node *np) +{ + struct irq_domain *id; + struct mp_ioapic_gsi *gsi_cfg; + int ret; + int num; + + gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); + num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; + + id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops, + (void *)ioapic_num); + BUG_ON(!id); + if (gsi_cfg->gsi_base == 0) { + /* + * The first NR_IRQS_LEGACY irq descs are allocated in + * early_irq_init() and need just a mapping. The + * remaining irqs need both. All of them are preallocated + * and assigned so we can keep the 1:1 mapping which the ioapic + * is having. + */ + ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); + if (ret) + pr_err("Error mapping legacy IRQs: %d\n", ret); + + if (num > NR_IRQS_LEGACY) { + ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, + NR_IRQS_LEGACY, num - NR_IRQS_LEGACY); + if (ret) + pr_err("Error creating mapping for the " + "remaining IRQs: %d\n", ret); + } + irq_set_default_host(id); + } else { + ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num); + if (ret) + pr_err("Error creating IRQ mapping: %d\n", ret); + } +} + static void __init ioapic_add_ofnode(struct device_node *np) { struct resource r; @@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np) for (i = 0; i < nr_ioapics; i++) { if (r.start == mpc_ioapic_addr(i)) { - struct irq_domain *id; - struct mp_ioapic_gsi *gsi_cfg; - - gsi_cfg = mp_ioapic_gsi_routing(i); - - id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0, - &ioapic_irq_domain_ops, - (void*)i); - BUG_ON(!id); + dt_add_ioapic_domain(i, np); return; } } -- cgit v1.2.3-70-g09d2 From d3a8009b1731abb7026a840d1c2701f877f9429f Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Mon, 6 Aug 2012 22:13:00 +0800 Subject: x86/irq/i8259: Fix incorrect comment Signed-off-by: Yuanhan Liu Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8259.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 36d1853e91a..9a5c460404d 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -263,7 +263,7 @@ static void i8259A_shutdown(void) * out of. */ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ } static struct syscore_ops i8259_syscore_ops = { -- cgit v1.2.3-70-g09d2 From 8e3d9d061b5d132217629e7b5635ff0c02488e65 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 21 Aug 2012 10:57:42 +0800 Subject: KVM: x86: fix possible infinite loop caused by reexecute_instruction Currently, we reexecute all unhandleable instructions if they do not access on the mmio, however, it can not work if host map the readonly memory to guest. If the instruction try to write this kind of memory, it will fault again when guest retry it, then we will goto a infinite loop: retry instruction -> write #PF -> emulation fail -> retry instruction -> ... Fix it by retrying the instruction only when it faults on the writable memory Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fb0d93788bf..704680d0fa3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4473,6 +4473,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) { gpa_t gpa; + pfn_t pfn; if (tdp_enabled) return false; @@ -4490,8 +4491,17 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) if (gpa == UNMAPPED_GVA) return true; /* let cpu generate fault */ - if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT))) + /* + * Do not retry the unhandleable instruction if it faults on the + * readonly host memory, otherwise it will goto a infinite loop: + * retry instruction -> write #PF -> emulation fail -> retry + * instruction -> ... + */ + pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); + if (!is_error_pfn(pfn)) { + kvm_release_pfn_clean(pfn); return true; + } return false; } -- cgit v1.2.3-70-g09d2 From 037d92dc5d4691ae7cf44699c55ca83b1b441992 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 21 Aug 2012 10:59:12 +0800 Subject: KVM: introduce gfn_to_pfn_memslot_atomic It can instead of hva_to_pfn_atomic Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 5 +---- include/linux/kvm_host.h | 3 ++- virt/kvm/kvm_main.c | 14 ++++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9651c2cd000..5548971ae80 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2510,15 +2510,12 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) { struct kvm_memory_slot *slot; - unsigned long hva; slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); if (!slot) return KVM_PFN_ERR_FAULT; - hva = gfn_to_hva_memslot(slot, gfn); - - return hva_to_pfn_atomic(hva); + return gfn_to_pfn_memslot_atomic(slot, gfn); } static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d4bd4d41e35..52c86e4f6d8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -462,7 +462,6 @@ void kvm_release_page_dirty(struct page *page); void kvm_set_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); -pfn_t hva_to_pfn_atomic(unsigned long addr); pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, bool write_fault, bool *writable); @@ -470,6 +469,8 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); +pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn); + void kvm_release_pfn_dirty(pfn_t pfn); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7b94d70a323..543f9b7e5aa 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1102,12 +1102,6 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, return pfn; } -pfn_t hva_to_pfn_atomic(unsigned long addr) -{ - return hva_to_pfn(addr, true, NULL, true, NULL); -} -EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); - static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, bool write_fault, bool *writable) { @@ -1155,6 +1149,14 @@ pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) return hva_to_pfn(addr, false, NULL, true, NULL); } +pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) +{ + unsigned long addr = gfn_to_hva_memslot(slot, gfn); + + return hva_to_pfn(addr, true, NULL, true, NULL); +} +EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); + int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, int nr_pages) { -- cgit v1.2.3-70-g09d2 From 4d8b81abc47b83a1939e59df2fdb0e98dfe0eedd Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 21 Aug 2012 11:02:51 +0800 Subject: KVM: introduce readonly memslot In current code, if we map a readonly memory space from host to guest and the page is not currently mapped in the host, we will get a fault pfn and async is not allowed, then the vm will crash We introduce readonly memory region to map ROM/ROMD to the guest, read access is happy for readonly memslot, write access on readonly memslot will cause KVM_EXIT_MMIO exit Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 10 ++-- arch/x86/include/asm/kvm.h | 1 + arch/x86/kvm/mmu.c | 9 ++++ arch/x86/kvm/x86.c | 1 + include/linux/kvm.h | 6 ++- include/linux/kvm_host.h | 7 +-- virt/kvm/kvm_main.c | 96 +++++++++++++++++++++++++++++++-------- 7 files changed, 102 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index bf33aaa4c59..b91bfd43f00 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -857,7 +857,8 @@ struct kvm_userspace_memory_region { }; /* for kvm_memory_region::flags */ -#define KVM_MEM_LOG_DIRTY_PAGES 1UL +#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) +#define KVM_MEM_READONLY (1UL << 1) This ioctl allows the user to create or modify a guest physical memory slot. When changing an existing slot, it may be moved in the guest @@ -873,9 +874,12 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr be identical. This allows large pages in the guest to be backed by large pages in the host. -The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which +The flags field supports two flag, KVM_MEM_LOG_DIRTY_PAGES, which instructs kvm to keep track of writes to memory within the slot. See -the KVM_GET_DIRTY_LOG ioctl. +the KVM_GET_DIRTY_LOG ioctl. Another flag is KVM_MEM_READONLY when the +KVM_CAP_READONLY_MEM capability, it indicates the guest memory is read-only, +that means, guest is only allowed to read it. Writes will be posted to +userspace as KVM_EXIT_MMIO exits. When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory region are automatically reflected into the guest. For example, an mmap() diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 246617efd67..521bf252e34 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -25,6 +25,7 @@ #define __KVM_HAVE_DEBUGREGS #define __KVM_HAVE_XSAVE #define __KVM_HAVE_XCRS +#define __KVM_HAVE_READONLY_MEM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5548971ae80..8e312a2e141 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2647,6 +2647,15 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct * static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) { + /* + * Do not cache the mmio info caused by writing the readonly gfn + * into the spte otherwise read access on readonly gfn also can + * caused mmio page fault and treat it as mmio access. + * Return 1 to tell kvm to emulate it. + */ + if (pfn == KVM_PFN_ERR_RO_FAULT) + return 1; + if (pfn == KVM_PFN_ERR_HWPOISON) { kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current); return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 704680d0fa3..42bbf4187d2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2175,6 +2175,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_GET_TSC_KHZ: case KVM_CAP_PCI_2_3: case KVM_CAP_KVMCLOCK_CTRL: + case KVM_CAP_READONLY_MEM: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 2de335d7f63..d808694673f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -106,7 +106,8 @@ struct kvm_userspace_memory_region { * other bits are reserved for kvm internal use which are defined in * include/linux/kvm_host.h. */ -#define KVM_MEM_LOG_DIRTY_PAGES 1UL +#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) +#define KVM_MEM_READONLY (1UL << 1) /* for KVM_IRQ_LINE */ struct kvm_irq_level { @@ -621,6 +622,9 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_GET_SMMU_INFO 78 #define KVM_CAP_S390_COW 79 #define KVM_CAP_PPC_ALLOC_HTAB 80 +#ifdef __KVM_HAVE_READONLY_MEM +#define KVM_CAP_READONLY_MEM 81 +#endif #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a913ac709a9..5972c9845dd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -465,6 +465,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); +unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); void kvm_set_page_dirty(struct page *page); @@ -792,12 +793,6 @@ hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) return slot->base_gfn + gfn_offset; } -static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, - gfn_t gfn) -{ - return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; -} - static inline gpa_t gfn_to_gpa(gfn_t gfn) { return (gpa_t)gfn << PAGE_SHIFT; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e3e1658c491..3416f8a31f6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -680,7 +680,13 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) { - if (mem->flags & ~KVM_MEM_LOG_DIRTY_PAGES) + u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; + +#ifdef KVM_CAP_READONLY_MEM + valid_flags |= KVM_MEM_READONLY; +#endif + + if (mem->flags & ~valid_flags) return -EINVAL; return 0; @@ -973,18 +979,45 @@ out: return size; } -static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, - gfn_t *nr_pages) +static bool memslot_is_readonly(struct kvm_memory_slot *slot) +{ + return slot->flags & KVM_MEM_READONLY; +} + +static unsigned long __gfn_to_hva_memslot(struct kvm_memory_slot *slot, + gfn_t gfn) +{ + return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; +} + +static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, + gfn_t *nr_pages, bool write) { if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return KVM_HVA_ERR_BAD; + if (memslot_is_readonly(slot) && write) + return KVM_HVA_ERR_RO_BAD; + if (nr_pages) *nr_pages = slot->npages - (gfn - slot->base_gfn); - return gfn_to_hva_memslot(slot, gfn); + return __gfn_to_hva_memslot(slot, gfn); } +static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, + gfn_t *nr_pages) +{ + return __gfn_to_hva_many(slot, gfn, nr_pages, true); +} + +unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, + gfn_t gfn) +{ + return gfn_to_hva_many(slot, gfn, NULL); +} +EXPORT_SYMBOL_GPL(gfn_to_hva_memslot); + unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) { return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); @@ -997,7 +1030,7 @@ EXPORT_SYMBOL_GPL(gfn_to_hva); */ static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) { - return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); + return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); } static int kvm_read_hva(void *data, void __user *hva, int len) @@ -1106,6 +1139,17 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, return npages; } +static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) +{ + if (unlikely(!(vma->vm_flags & VM_READ))) + return false; + + if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE)))) + return false; + + return true; +} + /* * Pin guest page in memory and return its pfn. * @addr: host virtual address which maps memory to the guest @@ -1130,8 +1174,6 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, /* we can do it either atomically or asynchronously, not both */ BUG_ON(atomic && async); - BUG_ON(!write_fault && !writable); - if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn)) return pfn; @@ -1158,7 +1200,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, vma->vm_pgoff; BUG_ON(!kvm_is_mmio_pfn(pfn)); } else { - if (async && (vma->vm_flags & VM_WRITE)) + if (async && vma_is_valid(vma, write_fault)) *async = true; pfn = KVM_PFN_ERR_FAULT; } @@ -1167,19 +1209,40 @@ exit: return pfn; } +static pfn_t +__gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, + bool *async, bool write_fault, bool *writable) +{ + unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); + + if (addr == KVM_HVA_ERR_RO_BAD) + return KVM_PFN_ERR_RO_FAULT; + + if (kvm_is_error_hva(addr)) + return KVM_PFN_ERR_BAD; + + /* Do not map writable pfn in the readonly memslot. */ + if (writable && memslot_is_readonly(slot)) { + *writable = false; + writable = NULL; + } + + return hva_to_pfn(addr, atomic, async, write_fault, + writable); +} + static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, bool write_fault, bool *writable) { - unsigned long addr; + struct kvm_memory_slot *slot; if (async) *async = false; - addr = gfn_to_hva(kvm, gfn); - if (kvm_is_error_hva(addr)) - return KVM_PFN_ERR_BAD; + slot = gfn_to_memslot(kvm, gfn); - return hva_to_pfn(addr, atomic, async, write_fault, writable); + return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault, + writable); } pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) @@ -1210,15 +1273,12 @@ EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { - unsigned long addr = gfn_to_hva_memslot(slot, gfn); - return hva_to_pfn(addr, false, NULL, true, NULL); + return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL); } pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) { - unsigned long addr = gfn_to_hva_memslot(slot, gfn); - - return hva_to_pfn(addr, true, NULL, true, NULL); + return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL); } EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); -- cgit v1.2.3-70-g09d2 From 4dbf32c30f7e5379588a692c7bf80b05d9ef8026 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 23 Jul 2012 19:05:53 +0200 Subject: x86, microcode: Save an indentation level in reload_for_cpu Invert the uci->valid check so that the later block can be aligned on the first indentation level of the function. No functional change. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344361461-10076-3-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_core.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 4873e62db6a..63a95686502 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -276,19 +276,18 @@ static struct platform_device *microcode_pdev; static int reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + enum ucode_state ustate; int err = 0; - if (uci->valid) { - enum ucode_state ustate; - - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - } + if (!uci->valid) + return err; + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); + if (ustate == UCODE_OK) + apply_microcode_on_target(cpu); + else + if (ustate == UCODE_ERROR) + err = -EINVAL; return err; } -- cgit v1.2.3-70-g09d2 From bb9d3e473d5b324907e15dff4e54410b28ea50e2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 3 Aug 2012 15:26:50 +0200 Subject: x86, microcode: Drop uci->mc check on resume path Remove the uci->mc check on the cpu resume path because the low-level drivers do that anyway. More importantly, though, this fixes a contrived and obscure but still important case. Imagine the following: * boot machine, no new microcode in /lib/firmware * a subset of the CPUs is offlined * in the meantime, user puts new fresh microcode container into /lib/firmware and reloads it by doing $ echo 1 > /sys/devices/system/cpu/microcode/reload * offlined cores come back online and they don't get the newer microcode applied due to this check. Later patches take care of the issue on AMD. While at it, cleanup code around it. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344361461-10076-4-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_core.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 63a95686502..706a5c9b8eb 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -369,13 +369,10 @@ static void microcode_fini_cpu(int cpu) static enum ucode_state microcode_resume_cpu(int cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->mc) - return UCODE_NFOUND; - pr_debug("CPU%d updated upon resume\n", cpu); - apply_microcode_on_target(cpu); + + if (apply_microcode_on_target(cpu)) + return UCODE_ERROR; return UCODE_OK; } @@ -404,14 +401,11 @@ static enum ucode_state microcode_init_cpu(int cpu) static enum ucode_state microcode_update_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; if (uci->valid) - ustate = microcode_resume_cpu(cpu); - else - ustate = microcode_init_cpu(cpu); + return microcode_resume_cpu(cpu); - return ustate; + return microcode_init_cpu(cpu); } static int mc_device_add(struct device *dev, struct subsys_interface *sif) -- cgit v1.2.3-70-g09d2 From 09c3f0d883300c8fc2bb62e9a70cf89a2ada9a80 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 23 Jul 2012 20:15:10 +0200 Subject: x86, microcode: Cleanup cpu hotplug notifier callback Mask out CPU_TASKS_FROZEN bit so that all _FROZEN cases can be dropped. Also, add some more comments as to why CPU_ONLINE falls through to CPU_DOWN_FAILED (no break), and for the CPU_DEAD case. Realign debug printks better. Idea blatantly stolen from a tglx patch: http://marc.info/?l=linux-kernel&m=134267779513862 Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344361461-10076-5-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_core.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 706a5c9b8eb..dcde544e012 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -470,34 +470,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) struct device *dev; dev = get_cpu_device(cpu); - switch (action) { + + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: microcode_update_cpu(cpu); - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: pr_debug("CPU%d added\n", cpu); + /* + * "break" is missing on purpose here because we want to fall + * through in order to create the sysfs group. + */ + + case CPU_DOWN_FAILED: if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); break; + case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: /* Suspend is in progress, only remove the interface */ sysfs_remove_group(&dev->kobj, &mc_attr_group); pr_debug("CPU%d removed\n", cpu); break; /* + * case CPU_DEAD: + * * When a CPU goes offline, don't free up or invalidate the copy of * the microcode in kernel memory, so that we can reuse it when the * CPU comes back online without unnecessarily requesting the userspace * for it again. */ - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; } + + /* The CPU refused to come up during a system resume */ + if (action == CPU_UP_CANCELED_FROZEN) + microcode_fini_cpu(cpu); + return NOTIFY_OK; } -- cgit v1.2.3-70-g09d2 From e43f6e67ec1c142550860bbe0b51166c5ee4cac8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 1 Aug 2012 19:17:01 +0200 Subject: x86, microcode: Straighten out Kconfig text Update and clarify Kconfig help text along with menu names. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344361461-10076-6-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ec3a1aa4ab..1ccccc6efb3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -982,25 +982,25 @@ config X86_REBOOTFIXUPS Say N otherwise. config MICROCODE - tristate "/dev/cpu/microcode - microcode support" + tristate "CPU microcode loading support" select FW_LOADER ---help--- + If you say Y here, you will be able to update the microcode on certain Intel and AMD processors. The Intel support is for the - IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, - Pentium 4, Xeon etc. The AMD support is for family 0x10 and - 0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra. - You will obviously need the actual microcode binary data itself - which is not shipped with the Linux kernel. + IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, + Xeon etc. The AMD support is for families 0x10 and later. You will + obviously need the actual microcode binary data itself which is not + shipped with the Linux kernel. This option selects the general module only, you need to select at least one vendor specific module as well. - To compile this driver as a module, choose M here: the - module will be called microcode. + To compile this driver as a module, choose M here: the module + will be called microcode. config MICROCODE_INTEL - bool "Intel microcode patch loading support" + bool "Intel microcode loading support" depends on MICROCODE default MICROCODE select FW_LOADER @@ -1013,7 +1013,7 @@ config MICROCODE_INTEL . config MICROCODE_AMD - bool "AMD microcode patch loading support" + bool "AMD microcode loading support" depends on MICROCODE select FW_LOADER ---help--- -- cgit v1.2.3-70-g09d2 From e7e632f5ba240fbc313c49ed6559681ea57534e9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jul 2012 14:12:21 +0200 Subject: x86, microcode, AMD: Remove useless get_ucode_data wrapper get_ucode_data was a trivial memcpy wrapper. Remove it so as not to obfuscate code unnecessarily with no obvious gain. No functional change. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344361461-10076-7-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/microcode.h | 6 ------ arch/x86/kernel/microcode_amd.c | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 4ebe157bf73..8813be60099 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -49,12 +49,6 @@ static inline struct microcode_ops * __init init_intel_microcode(void) #ifdef CONFIG_MICROCODE_AMD extern struct microcode_ops * __init init_amd_microcode(void); extern void __exit exit_amd_microcode(void); - -static inline void get_ucode_data(void *to, const u8 *from, size_t n) -{ - memcpy(to, from, n); -} - #else static inline struct microcode_ops * __init init_amd_microcode(void) { diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 82746f942cd..94213387a3d 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -183,7 +183,7 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr, memset(patch, 0, PAGE_SIZE); /* all looks ok, get the binary patch */ - get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size); + memcpy(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size); return actual_size; } @@ -238,7 +238,7 @@ static int install_equiv_cpu_table(const u8 *buf) return -ENOMEM; } - get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); + memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); /* add header length */ return size + CONTAINER_HDR_SZ; -- cgit v1.2.3-70-g09d2 From 685ca6d797af9d41164dd64dd60145d4946fc152 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 20 Jun 2012 16:17:51 +0200 Subject: x86, microcode, AMD: Check before applying a patch Make sure we're actually applying a microcode patch to a core which really needs it. This brings only a very very very minor slowdown on F10: 0.032218828 sec vs 0.056010626 sec with this patch. And small speedup on F15: 0.487089449 sec vs 0.180551162 sec (from perf output). Also, fixup comments while at it. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344361461-10076-8-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_amd.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 94213387a3d..8fdf7d99b80 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -202,11 +202,18 @@ static int apply_microcode_amd(int cpu) if (mc_amd == NULL) return 0; - wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); - /* get patch id after patching */ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* check current patch id and patch's id for match */ + /* need to apply patch? */ + if (rev >= mc_amd->hdr.patch_id) { + c->microcode = rev; + return 0; + } + + wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); + + /* verify patch application was successful */ + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); if (rev != mc_amd->hdr.patch_id) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); -- cgit v1.2.3-70-g09d2 From 5f5b747282c6cc57b91baba37f76de27398b9e60 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 25 Jul 2012 20:06:54 +0200 Subject: x86, microcode, AMD: Read CPUID(1).EAX on the correct cpu Read the CPUID(1).EAX leaf at the correct cpu and use it to search the equivalence table for matching microcode patch. No functionality change. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344361461-10076-9-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_amd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 8fdf7d99b80..25d34b17748 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -82,6 +82,7 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); + csig->sig = cpuid_eax(0x00000001); csig->rev = c->microcode; pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); @@ -118,16 +119,15 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size, return patch_size; } -static u16 find_equiv_id(void) +static u16 find_equiv_id(unsigned int cpu) { - unsigned int current_cpu_id, i = 0; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + int i = 0; BUG_ON(equiv_cpu_table == NULL); - current_cpu_id = cpuid_eax(0x00000001); - while (equiv_cpu_table[i].installed_cpu != 0) { - if (current_cpu_id == equiv_cpu_table[i].installed_cpu) + if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu) return equiv_cpu_table[i].equiv_cpu; i++; @@ -150,7 +150,7 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr, patch_size = *(u32 *)(ucode_ptr + 4); *current_size = patch_size + SECTION_HDR_SIZE; - equiv_cpu_id = find_equiv_id(); + equiv_cpu_id = find_equiv_id(cpu); if (!equiv_cpu_id) return 0; -- cgit v1.2.3-70-g09d2 From 48e30685caa8bdc4b8d4417d8ac31db59689742c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 26 Jul 2012 15:51:00 +0200 Subject: x86, microcode: Add a refresh firmware flag to ->request_microcode_fw This is done in preparation for teaching the ucode driver to either load a new ucode patches container from userspace or use an already cached version. No functionality change in this patch. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344361461-10076-10-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/microcode.h | 4 ++-- arch/x86/kernel/microcode_amd.c | 3 ++- arch/x86/kernel/microcode_core.c | 11 ++++++----- arch/x86/kernel/microcode_intel.c | 3 ++- 4 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 8813be60099..43d921b4752 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -15,8 +15,8 @@ struct microcode_ops { enum ucode_state (*request_microcode_user) (int cpu, const void __user *buf, size_t size); - enum ucode_state (*request_microcode_fw) (int cpu, - struct device *device); + enum ucode_state (*request_microcode_fw) (int cpu, struct device *, + bool refresh_fw); void (*microcode_fini_cpu) (int cpu); diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 25d34b17748..94ecdaa2405 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -330,7 +330,8 @@ out: * * These might be larger than 2K. */ -static enum ucode_state request_microcode_amd(int cpu, struct device *device) +static enum ucode_state request_microcode_amd(int cpu, struct device *device, + bool refresh_fw) { char fw_name[36] = "amd-ucode/microcode_amd.bin"; const struct firmware *fw; diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index dcde544e012..9420972e98f 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -282,7 +282,7 @@ static int reload_for_cpu(int cpu) if (!uci->valid) return err; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); if (ustate == UCODE_OK) apply_microcode_on_target(cpu); else @@ -377,7 +377,7 @@ static enum ucode_state microcode_resume_cpu(int cpu) return UCODE_OK; } -static enum ucode_state microcode_init_cpu(int cpu) +static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) { enum ucode_state ustate; @@ -388,7 +388,8 @@ static enum ucode_state microcode_init_cpu(int cpu) if (system_state != SYSTEM_RUNNING) return UCODE_NFOUND; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, + refresh_fw); if (ustate == UCODE_OK) { pr_debug("CPU%d updated upon init\n", cpu); @@ -405,7 +406,7 @@ static enum ucode_state microcode_update_cpu(int cpu) if (uci->valid) return microcode_resume_cpu(cpu); - return microcode_init_cpu(cpu); + return microcode_init_cpu(cpu, false); } static int mc_device_add(struct device *dev, struct subsys_interface *sif) @@ -421,7 +422,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif) if (err) return err; - if (microcode_init_cpu(cpu) == UCODE_ERROR) + if (microcode_init_cpu(cpu, true) == UCODE_ERROR) return -EINVAL; return err; diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 0327e2b3c40..3544aed3933 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n) return 0; } -static enum ucode_state request_microcode_fw(int cpu, struct device *device) +static enum ucode_state request_microcode_fw(int cpu, struct device *device, + bool refresh_fw) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); -- cgit v1.2.3-70-g09d2 From c96d2c0905cc48e34f2b37b775b59932c416b343 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 1 Aug 2012 14:55:01 +0200 Subject: x86, microcode, AMD: Add reverse equiv table search We search the equivalence table using the CPUID(1) signature of the CPU in order to get the equivalence ID of the patch which we need to apply. Add a function which does the reverse - it will be needed in later patches. While at it, pull the other equiv table function up in the file so that it can be used by other functionality without forward declarations. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344361461-10076-11-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_amd.c | 46 +++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 94ecdaa2405..03ed5af7053 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -78,6 +78,36 @@ static struct equiv_cpu_entry *equiv_cpu_table; /* page-sized ucode patch buffer */ void *patch; +static u16 find_equiv_id(unsigned int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + int i = 0; + + BUG_ON(equiv_cpu_table == NULL); + + while (equiv_cpu_table[i].installed_cpu != 0) { + if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu) + return equiv_cpu_table[i].equiv_cpu; + + i++; + } + return 0; +} + +static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu) +{ + int i = 0; + + BUG_ON(!equiv_cpu_table); + + while (equiv_cpu_table[i].equiv_cpu != 0) { + if (equiv_cpu == equiv_cpu_table[i].equiv_cpu) + return equiv_cpu_table[i].installed_cpu; + i++; + } + return 0; +} + static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -119,22 +149,6 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size, return patch_size; } -static u16 find_equiv_id(unsigned int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - int i = 0; - - BUG_ON(equiv_cpu_table == NULL); - - while (equiv_cpu_table[i].installed_cpu != 0) { - if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu) - return equiv_cpu_table[i].equiv_cpu; - - i++; - } - return 0; -} - /* * we signal a good patch is found by returning its size > 0 */ -- cgit v1.2.3-70-g09d2 From a3eb3b4da106a23b5d41bf915726172e31654a65 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 1 Aug 2012 15:38:18 +0200 Subject: x86, microcode, AMD: Add a small, per-family patches cache This is a trivial cache which collects all ucode patches for the current family of CPUs on the system. If a newer patch appears due to the container file being updated in userspace, we replace our cached version with the new one. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344361461-10076-12-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_amd.c | 67 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 03ed5af7053..cacdc9a5ee4 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -78,12 +78,22 @@ static struct equiv_cpu_entry *equiv_cpu_table; /* page-sized ucode patch buffer */ void *patch; +struct ucode_patch { + struct list_head plist; + void *data; + u32 patch_id; + u16 equiv_cpu; +}; + +static LIST_HEAD(pcache); + static u16 find_equiv_id(unsigned int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; int i = 0; - BUG_ON(equiv_cpu_table == NULL); + if (!equiv_cpu_table) + return 0; while (equiv_cpu_table[i].installed_cpu != 0) { if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu) @@ -108,6 +118,61 @@ static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu) return 0; } +/* + * a small, trivial cache of per-family ucode patches + */ +static struct ucode_patch *cache_find_patch(u16 equiv_cpu) +{ + struct ucode_patch *p; + + list_for_each_entry(p, &pcache, plist) + if (p->equiv_cpu == equiv_cpu) + return p; + return NULL; +} + +static void update_cache(struct ucode_patch *new_patch) +{ + struct ucode_patch *p; + + list_for_each_entry(p, &pcache, plist) { + if (p->equiv_cpu == new_patch->equiv_cpu) { + if (p->patch_id >= new_patch->patch_id) + /* we already have the latest patch */ + return; + + list_replace(&p->plist, &new_patch->plist); + kfree(p->data); + kfree(p); + return; + } + } + /* no patch found, add it */ + list_add_tail(&new_patch->plist, &pcache); +} + +static void free_cache(void) +{ + struct ucode_patch *p; + + list_for_each_entry_reverse(p, &pcache, plist) { + __list_del(p->plist.prev, p->plist.next); + kfree(p->data); + kfree(p); + } +} + +static struct ucode_patch *find_patch(unsigned int cpu) +{ + u16 equiv_id; + + equiv_id = find_equiv_id(cpu); + if (!equiv_id) + return NULL; + + return cache_find_patch(equiv_id); +} + static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); -- cgit v1.2.3-70-g09d2 From 2efb05e8e9fa3510044e007b90263c73b6a83f84 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 1 Aug 2012 16:16:13 +0200 Subject: x86, microcode, AMD: Rewrite patch application procedure Limit the access to userspace only on the BSP where we load the container, verify the patches in it and put them in the patch cache. Then, at application time, we lookup the correct patch in the cache and use it. When we need to reload the userspace container, we do that over the reload interface: echo 1 > /sys/devices/system/cpu/microcode/reload which reloads (a possibly newer) container from userspace and applies then the newest patches from there. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1344361461-10076-13-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_amd.c | 236 ++++++++++++++++++++-------------------- 1 file changed, 121 insertions(+), 115 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index cacdc9a5ee4..5511216b443 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -75,9 +75,6 @@ struct microcode_amd { static struct equiv_cpu_entry *equiv_cpu_table; -/* page-sized ucode patch buffer */ -void *patch; - struct ucode_patch { struct list_head plist; void *data; @@ -184,7 +181,7 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) return 0; } -static unsigned int verify_ucode_size(int cpu, u32 patch_size, +static unsigned int verify_patch_size(int cpu, u32 patch_size, unsigned int size) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -214,73 +211,25 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size, return patch_size; } -/* - * we signal a good patch is found by returning its size > 0 - */ -static int get_matching_microcode(int cpu, const u8 *ucode_ptr, - unsigned int leftover_size, int rev, - unsigned int *current_size) -{ - struct microcode_header_amd *mc_hdr; - unsigned int actual_size, patch_size; - u16 equiv_cpu_id; - - /* size of the current patch we're staring at */ - patch_size = *(u32 *)(ucode_ptr + 4); - *current_size = patch_size + SECTION_HDR_SIZE; - - equiv_cpu_id = find_equiv_id(cpu); - if (!equiv_cpu_id) - return 0; - - /* - * let's look at the patch header itself now - */ - mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE); - - if (mc_hdr->processor_rev_id != equiv_cpu_id) - return 0; - - /* ucode might be chipset specific -- currently we don't support this */ - if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { - pr_err("CPU%d: chipset specific code not yet supported\n", - cpu); - return 0; - } - - if (mc_hdr->patch_id <= rev) - return 0; - - /* - * now that the header looks sane, verify its size - */ - actual_size = verify_ucode_size(cpu, patch_size, leftover_size); - if (!actual_size) - return 0; - - /* clear the patch buffer */ - memset(patch, 0, PAGE_SIZE); - - /* all looks ok, get the binary patch */ - memcpy(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size); - - return actual_size; -} - static int apply_microcode_amd(int cpu) { - u32 rev, dummy; - int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - struct microcode_amd *mc_amd = uci->mc; struct cpuinfo_x86 *c = &cpu_data(cpu); + struct microcode_amd *mc_amd; + struct ucode_cpu_info *uci; + struct ucode_patch *p; + u32 rev, dummy; + + BUG_ON(raw_smp_processor_id() != cpu); - /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); + uci = ucode_cpu_info + cpu; - if (mc_amd == NULL) + p = find_patch(cpu); + if (!p) return 0; + mc_amd = p->data; + uci->mc = p->data; + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); /* need to apply patch? */ @@ -336,61 +285,113 @@ static void free_equiv_cpu_table(void) equiv_cpu_table = NULL; } -static enum ucode_state -generic_load_microcode(int cpu, const u8 *data, size_t size) +static void cleanup(void) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct microcode_header_amd *mc_hdr = NULL; - unsigned int mc_size, leftover, current_size = 0; + free_equiv_cpu_table(); + free_cache(); +} + +/* + * We return the current size even if some of the checks failed so that + * we can skip over the next patch. If we return a negative value, we + * signal a grave error like a memory allocation has failed and the + * driver cannot continue functioning normally. In such cases, we tear + * down everything we've used up so far and exit. + */ +static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct microcode_header_amd *mc_hdr; + struct ucode_patch *patch; + unsigned int patch_size, crnt_size, ret; + u32 proc_fam; + u16 proc_id; + + patch_size = *(u32 *)(fw + 4); + crnt_size = patch_size + SECTION_HDR_SIZE; + mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE); + proc_id = mc_hdr->processor_rev_id; + + proc_fam = find_cpu_family_by_equiv_cpu(proc_id); + if (!proc_fam) { + pr_err("No patch family for equiv ID: 0x%04x\n", proc_id); + return crnt_size; + } + + /* check if patch is for the current family */ + proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff); + if (proc_fam != c->x86) + return crnt_size; + + if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { + pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", + mc_hdr->patch_id); + return crnt_size; + } + + ret = verify_patch_size(cpu, patch_size, leftover); + if (!ret) { + pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id); + return crnt_size; + } + + patch = kzalloc(sizeof(*patch), GFP_KERNEL); + if (!patch) { + pr_err("Patch allocation failure.\n"); + return -EINVAL; + } + + patch->data = kzalloc(patch_size, GFP_KERNEL); + if (!patch->data) { + pr_err("Patch data allocation failure.\n"); + kfree(patch); + return -EINVAL; + } + + /* All looks ok, copy patch... */ + memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size); + INIT_LIST_HEAD(&patch->plist); + patch->patch_id = mc_hdr->patch_id; + patch->equiv_cpu = proc_id; + + /* ... and add to cache. */ + update_cache(patch); + + return crnt_size; +} + +static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size) +{ + enum ucode_state ret = UCODE_ERROR; + unsigned int leftover; + u8 *fw = (u8 *)data; + int crnt_size = 0; int offset; - const u8 *ucode_ptr = data; - void *new_mc = NULL; - unsigned int new_rev = uci->cpu_sig.rev; - enum ucode_state state = UCODE_ERROR; - offset = install_equiv_cpu_table(ucode_ptr); + offset = install_equiv_cpu_table(data); if (offset < 0) { pr_err("failed to create equivalent cpu table\n"); - goto out; + return ret; } - ucode_ptr += offset; + fw += offset; leftover = size - offset; - if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) { + if (*(u32 *)fw != UCODE_UCODE_TYPE) { pr_err("invalid type field in container file section header\n"); - goto free_table; + free_equiv_cpu_table(); + return ret; } while (leftover) { - mc_size = get_matching_microcode(cpu, ucode_ptr, leftover, - new_rev, ¤t_size); - if (mc_size) { - mc_hdr = patch; - new_mc = patch; - new_rev = mc_hdr->patch_id; - goto out_ok; - } + crnt_size = verify_and_add_patch(cpu, fw, leftover); + if (crnt_size < 0) + return ret; - ucode_ptr += current_size; - leftover -= current_size; + fw += crnt_size; + leftover -= crnt_size; } - if (!new_mc) { - state = UCODE_NFOUND; - goto free_table; - } - -out_ok: - uci->mc = new_mc; - state = UCODE_OK; - pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", - cpu, uci->cpu_sig.rev, new_rev); - -free_table: - free_equiv_cpu_table(); - -out: - return state; + return UCODE_OK; } /* @@ -401,7 +402,7 @@ out: * * This legacy file is always smaller than 2K in size. * - * Starting at family 15h they are in family specific firmware files: + * Beginning with family 15h, they are in family-specific firmware files: * * amd-ucode/microcode_amd_fam15h.bin * amd-ucode/microcode_amd_fam16h.bin @@ -413,9 +414,13 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, bool refresh_fw) { char fw_name[36] = "amd-ucode/microcode_amd.bin"; - const struct firmware *fw; - enum ucode_state ret = UCODE_NFOUND; struct cpuinfo_x86 *c = &cpu_data(cpu); + enum ucode_state ret = UCODE_NFOUND; + const struct firmware *fw; + + /* reload ucode container only on the boot cpu */ + if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) + return UCODE_OK; if (c->x86 >= 0x15) snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); @@ -431,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, goto fw_release; } - ret = generic_load_microcode(cpu, fw->data, fw->size); + /* free old equiv table */ + free_equiv_cpu_table(); + + ret = load_microcode_amd(cpu, fw->data, fw->size); + if (ret != UCODE_OK) + cleanup(); -fw_release: + fw_release: release_firmware(fw); -out: + out: return ret; } @@ -470,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void) return NULL; } - patch = (void *)get_zeroed_page(GFP_KERNEL); - if (!patch) - return NULL; - return µcode_amd_ops; } void __exit exit_amd_microcode(void) { - free_page((unsigned long)patch); + cleanup(); } -- cgit v1.2.3-70-g09d2 From 90993cdd1800dc6ef9587431a0c625b978584e81 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 16 Aug 2012 17:00:19 -0300 Subject: x86: KVM guest: merge CONFIG_KVM_CLOCK into CONFIG_KVM_GUEST The distinction between CONFIG_KVM_CLOCK and CONFIG_KVM_GUEST is not so clear anymore, as demonstrated by recent bugs caused by poor handling of on/off combinations of these options. Merge CONFIG_KVM_CLOCK into CONFIG_KVM_GUEST. Reported-By: OGAWA Hirofumi Signed-off-by: Marcelo Tosatti --- arch/x86/Kconfig | 21 ++++++++------------- arch/x86/include/asm/kvm_para.h | 4 ++-- arch/x86/kernel/Makefile | 3 +-- arch/x86/kernel/kvm.c | 2 -- arch/x86/kernel/setup.c | 2 +- 5 files changed, 12 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ec3a1aa4ab..a42e2e99caa 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -573,23 +573,18 @@ config PARAVIRT_TIME_ACCOUNTING source "arch/x86/xen/Kconfig" -config KVM_CLOCK - bool "KVM paravirtualized clock" - select PARAVIRT - select PARAVIRT_CLOCK - ---help--- - Turning on this option will allow you to run a paravirtualized clock - when running over the KVM hypervisor. Instead of relying on a PIT - (or probably other) emulation by the underlying device model, the host - provides the guest with timing infrastructure such as time of day, and - system time - config KVM_GUEST - bool "KVM Guest support" + bool "KVM Guest support (including kvmclock)" + select PARAVIRT select PARAVIRT + select PARAVIRT_CLOCK + default y if PARAVIRT_GUEST ---help--- This option enables various optimizations for running under the KVM - hypervisor. + hypervisor. It includes a paravirtualized clock, so that instead + of relying on a PIT (or probably other) emulation by the + underlying device model, the host provides the guest with + timing infrastructure such as time of day, and system time source "arch/x86/lguest/Kconfig" diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 20f5697888b..eb3e9d85e1f 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -102,14 +102,14 @@ struct kvm_vcpu_pv_apf_data { extern void kvmclock_init(void); extern int kvm_register_clock(char *txt); -#ifdef CONFIG_KVM_CLOCK +#ifdef CONFIG_KVM_GUEST bool kvm_check_and_clear_guest_paused(void); #else static inline bool kvm_check_and_clear_guest_paused(void) { return false; } -#endif /* CONFIG_KVMCLOCK */ +#endif /* CONFIG_KVM_GUEST */ /* This instruction is vmcall. On non-VT architectures, it will generate a * trap that we will then rewrite to the appropriate instruction. diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8215e5652d9..7203298e0b8 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -81,8 +81,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o -obj-$(CONFIG_KVM_GUEST) += kvm.o -obj-$(CONFIG_KVM_CLOCK) += kvmclock.o +obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 1596cc8fd79..b3e5e51bc90 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -397,9 +397,7 @@ void kvm_disable_steal_time(void) #ifdef CONFIG_SMP static void __init kvm_smp_prepare_boot_cpu(void) { -#ifdef CONFIG_KVM_CLOCK WARN_ON(kvm_register_clock("primary cpu clock")); -#endif kvm_guest_cpu_init(); native_smp_prepare_boot_cpu(); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f4b9b80e1b9..b3386ae3438 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -957,7 +957,7 @@ void __init setup_arch(char **cmdline_p) initmem_init(); memblock_find_dma_reserve(); -#ifdef CONFIG_KVM_CLOCK +#ifdef CONFIG_KVM_GUEST kvmclock_init(); #endif -- cgit v1.2.3-70-g09d2 From 816afe4ff98ee10b1d30fd66361be132a0a5cee6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 6 Aug 2012 17:29:49 +0930 Subject: x86/smp: Don't ever patch back to UP if we unplug cpus We still patch SMP instructions to UP variants if we boot with a single CPU, but not at any other time. In particular, not if we unplug CPUs to return to a single cpu. Paul McKenney points out: mean offline overhead is 6251/48=130.2 milliseconds. If I remove the alternatives_smp_switch() from the offline path [...] the mean offline overhead is 550/42=13.1 milliseconds Basically, we're never going to get those 120ms back, and the code is pretty messy. We get rid of: 1) The "smp-alt-once" boot option. It's actually "smp-alt-boot", the documentation is wrong. It's now the default. 2) The skip_smp_alternatives flag used by suspend. 3) arch_disable_nonboot_cpus_begin() and arch_disable_nonboot_cpus_end() which were only used to set this one flag. Signed-off-by: Rusty Russell Cc: Paul McKenney Cc: Suresh Siddha Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/87vcgwwive.fsf@rustcorp.com.au Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 3 - arch/x86/include/asm/alternative.h | 4 +- arch/x86/kernel/alternative.c | 107 +++++++++--------------------------- arch/x86/kernel/smpboot.c | 20 +------ arch/x86/xen/smp.c | 6 +- kernel/cpu.c | 11 ---- 6 files changed, 32 insertions(+), 119 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ad7e2e5088c..7aef3345f73 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2638,9 +2638,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. smart2= [HW] Format: [,[,...,]] - smp-alt-once [X86-32,SMP] On a hotplug CPU system, only - attempt to substitute SMP alternatives once at boot. - smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port smsc-ircc2.ircc_sir= [HW] SIR base I/O port diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 70780689599..444704c8e18 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -60,7 +60,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end); extern void alternatives_smp_module_del(struct module *mod); -extern void alternatives_smp_switch(int smp); +extern void alternatives_enable_smp(void); extern int alternatives_text_reserved(void *start, void *end); extern bool skip_smp_alternatives; #else @@ -68,7 +68,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) {} static inline void alternatives_smp_module_del(struct module *mod) {} -static inline void alternatives_smp_switch(int smp) {} +static inline void alternatives_enable_smp(void) {} static inline int alternatives_text_reserved(void *start, void *end) { return 0; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index afb7ff79a29..af1f326a31c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -23,19 +23,6 @@ #define MAX_PATCH_LEN (255-1) -#ifdef CONFIG_HOTPLUG_CPU -static int smp_alt_once; - -static int __init bootonly(char *str) -{ - smp_alt_once = 1; - return 1; -} -__setup("smp-alt-boot", bootonly); -#else -#define smp_alt_once 1 -#endif - static int __initdata_or_module debug_alternative; static int __init debug_alt(char *str) @@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, { const s32 *poff; - if (noreplace_smp) - return; - mutex_lock(&text_mutex); for (poff = start; poff < end; poff++) { u8 *ptr = (u8 *)poff + *poff; @@ -359,7 +343,7 @@ struct smp_alt_module { }; static LIST_HEAD(smp_alt_modules); static DEFINE_MUTEX(smp_alt); -static int smp_mode = 1; /* protected by smp_alt */ +static bool uniproc_patched = false; /* protected by smp_alt */ void __init_or_module alternatives_smp_module_add(struct module *mod, char *name, @@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, { struct smp_alt_module *smp; - if (noreplace_smp) - return; + mutex_lock(&smp_alt); + if (!uniproc_patched) + goto unlock; - if (smp_alt_once) { - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(locks, locks_end, - text, text_end); - return; - } + if (num_possible_cpus() == 1) + /* Don't bother remembering, we'll never have to undo it. */ + goto smp_unlock; smp = kzalloc(sizeof(*smp), GFP_KERNEL); if (NULL == smp) - return; /* we'll run the (safe but slow) SMP code then ... */ + /* we'll run the (safe but slow) SMP code then ... */ + goto unlock; smp->mod = mod; smp->name = name; @@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, __func__, smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); - mutex_lock(&smp_alt); list_add_tail(&smp->next, &smp_alt_modules); - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(smp->locks, smp->locks_end, - smp->text, smp->text_end); +smp_unlock: + alternatives_smp_unlock(locks, locks_end, text, text_end); +unlock: mutex_unlock(&smp_alt); } @@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) { struct smp_alt_module *item; - if (smp_alt_once || noreplace_smp) - return; - mutex_lock(&smp_alt); list_for_each_entry(item, &smp_alt_modules, next) { if (mod != item->mod) continue; list_del(&item->next); - mutex_unlock(&smp_alt); - DPRINTK("%s: %s\n", __func__, item->name); kfree(item); - return; + break; } mutex_unlock(&smp_alt); } -bool skip_smp_alternatives; -void alternatives_smp_switch(int smp) +void alternatives_enable_smp(void) { struct smp_alt_module *mod; @@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp) pr_info("lockdep: fixing up alternatives\n"); #endif - if (noreplace_smp || smp_alt_once || skip_smp_alternatives) - return; - BUG_ON(!smp && (num_online_cpus() > 1)); + /* Why bother if there are no other CPUs? */ + BUG_ON(num_possible_cpus() == 1); mutex_lock(&smp_alt); - /* - * Avoid unnecessary switches because it forces JIT based VMs to - * throw away all cached translations, which can be quite costly. - */ - if (smp == smp_mode) { - /* nothing */ - } else if (smp) { + if (uniproc_patched) { pr_info("switching to SMP code\n"); + BUG_ON(num_online_cpus() != 1); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); - } else { - pr_info("switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_unlock(mod->locks, mod->locks_end, - mod->text, mod->text_end); + uniproc_patched = false; } - smp_mode = smp; mutex_unlock(&smp_alt); } @@ -540,40 +503,22 @@ void __init alternative_instructions(void) apply_alternatives(__alt_instructions, __alt_instructions_end); - /* switch to patch-once-at-boottime-only mode and free the - * tables in case we know the number of CPUs will never ever - * change */ -#ifdef CONFIG_HOTPLUG_CPU - if (num_possible_cpus() < 2) - smp_alt_once = 1; -#endif - #ifdef CONFIG_SMP - if (smp_alt_once) { - if (1 == num_possible_cpus()) { - pr_info("switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - - alternatives_smp_unlock(__smp_locks, __smp_locks_end, - _text, _etext); - } - } else { + /* Patch to UP if other cpus not imminent. */ + if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { + uniproc_patched = true; alternatives_smp_module_add(NULL, "core kernel", __smp_locks, __smp_locks_end, _text, _etext); - - /* Only switch to UP mode if we don't immediately boot others */ - if (num_present_cpus() == 1 || setup_max_cpus <= 1) - alternatives_smp_switch(0); } -#endif - apply_paravirt(__parainstructions, __parainstructions_end); - if (smp_alt_once) + if (!uniproc_patched || num_possible_cpus() == 1) free_init_pages("SMP alternatives", (unsigned long)__smp_locks, (unsigned long)__smp_locks_end); +#endif + + apply_paravirt(__parainstructions, __parainstructions_end); restart_nmi(); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7c5a8c314c0..c80a33bc528 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) unsigned long boot_error = 0; int timeout; - alternatives_smp_switch(1); + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); idle->thread.sp = (unsigned long) (((struct pt_regs *) (THREAD_SIZE + task_stack_page(idle))) - 1); @@ -1053,20 +1054,6 @@ out: preempt_enable(); } -void arch_disable_nonboot_cpus_begin(void) -{ - /* - * Avoid the smp alternatives switch during the disable_nonboot_cpus(). - * In the suspend path, we will be back in the SMP mode shortly anyways. - */ - skip_smp_alternatives = true; -} - -void arch_disable_nonboot_cpus_end(void) -{ - skip_smp_alternatives = false; -} - void arch_enable_nonboot_cpus_begin(void) { set_mtrr_aps_delayed_init(); @@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu) if (per_cpu(cpu_state, cpu) == CPU_DEAD) { if (system_state == SYSTEM_RUNNING) pr_info("CPU %u is now offline\n", cpu); - - if (1 == num_online_cpus()) - alternatives_smp_switch(0); return; } msleep(100); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index f58dca7a6e5..353c50f1870 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -377,7 +377,8 @@ static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle) return rc; if (num_online_cpus() == 1) - alternatives_smp_switch(1); + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); rc = xen_smp_intr_init(cpu); if (rc) @@ -424,9 +425,6 @@ static void xen_cpu_die(unsigned int cpu) unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); xen_uninit_lock_cpu(cpu); xen_teardown_timer(cpu); - - if (num_online_cpus() == 1) - alternatives_smp_switch(0); } static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 14d32588ccc..f6bfe3e03f6 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -439,14 +439,6 @@ EXPORT_SYMBOL_GPL(cpu_up); #ifdef CONFIG_PM_SLEEP_SMP static cpumask_var_t frozen_cpus; -void __weak arch_disable_nonboot_cpus_begin(void) -{ -} - -void __weak arch_disable_nonboot_cpus_end(void) -{ -} - int disable_nonboot_cpus(void) { int cpu, first_cpu, error = 0; @@ -458,7 +450,6 @@ int disable_nonboot_cpus(void) * with the userspace trying to use the CPU hotplug at the same time */ cpumask_clear(frozen_cpus); - arch_disable_nonboot_cpus_begin(); printk("Disabling non-boot CPUs ...\n"); for_each_online_cpu(cpu) { @@ -474,8 +465,6 @@ int disable_nonboot_cpus(void) } } - arch_disable_nonboot_cpus_end(); - if (!error) { BUG_ON(num_online_cpus() > 1); /* Make sure the CPUs won't be enabled by someone else */ -- cgit v1.2.3-70-g09d2 From d57c5d51a30152f3175d2344cb6395f08bf8ee0c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Feb 2011 13:32:18 -0500 Subject: ftrace/x86: Add support for -mfentry to x86_64 If the kernel is compiled with gcc 4.6.0 which supports -mfentry, then use that instead of mcount. With mcount, frame pointers are forced with the -pg option and we get something like: : 55 push %rbp 48 89 e5 mov %rsp,%rbp 53 push %rbx 41 51 push %r9 e8 fe 6a 39 00 callq ffffffff81483d00 31 c0 xor %eax,%eax 48 89 fb mov %rdi,%rbx 48 89 d7 mov %rdx,%rdi 48 33 73 30 xor 0x30(%rbx),%rsi 48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi With -mfentry, frame pointers are no longer forced and the call looks like this: : e8 33 af 37 00 callq ffffffff81461b40 <__fentry__> 53 push %rbx 48 89 fb mov %rdi,%rbx 31 c0 xor %eax,%eax 48 89 d7 mov %rdx,%rdi 41 51 push %r9 48 33 73 30 xor 0x30(%rbx),%rsi 48 f7 c6 ff ff ff f7 test $0xfffffffff7ffffff,%rsi This adds the ftrace hook at the beginning of the function before a frame is set up, and allows the function callbacks to be able to access parameters. As kprobes now can use function tracing (at least on x86) this speeds up the kprobe hooks that are at the beginning of the function. Link: http://lkml.kernel.org/r/20120807194100.130477900@goodmis.org Acked-by: Ingo Molnar Reviewed-by: Masami Hiramatsu Cc: Andi Kleen Signed-off-by: Steven Rostedt --- arch/x86/Kconfig | 1 + arch/x86/include/asm/ftrace.h | 7 ++++++- arch/x86/kernel/entry_64.S | 32 +++++++++++++++++++++++++++----- arch/x86/kernel/x8664_ksyms_64.c | 6 +++++- 4 files changed, 39 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a2d19ee750c..28dd891a0a1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -36,6 +36,7 @@ config X86 select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FENTRY if X86_64 select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index a6cae0c1720..9a25b522d37 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -35,7 +35,11 @@ #endif #ifdef CONFIG_FUNCTION_TRACER -#define MCOUNT_ADDR ((long)(mcount)) +#ifdef CC_USING_FENTRY +# define MCOUNT_ADDR ((long)(__fentry__)) +#else +# define MCOUNT_ADDR ((long)(mcount)) +#endif #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ #ifdef CONFIG_DYNAMIC_FTRACE @@ -46,6 +50,7 @@ #ifndef __ASSEMBLY__ extern void mcount(void); extern atomic_t modifying_ftrace_code; +extern void __fentry__(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b7a81dcb736..ed767b747fe 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -68,10 +68,18 @@ .section .entry.text, "ax" #ifdef CONFIG_FUNCTION_TRACER + +#ifdef CC_USING_FENTRY +# define function_hook __fentry__ +#else +# define function_hook mcount +#endif + #ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(mcount) + +ENTRY(function_hook) retq -END(mcount) +END(function_hook) /* skip is set if stack has been adjusted */ .macro ftrace_caller_setup skip=0 @@ -84,7 +92,11 @@ END(mcount) movq RIP(%rsp), %rdi subq $MCOUNT_INSN_SIZE, %rdi /* Load the parent_ip into the second parameter */ +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else movq 8(%rbp), %rsi +#endif .endm ENTRY(ftrace_caller) @@ -177,7 +189,8 @@ END(ftrace_regs_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ -ENTRY(mcount) + +ENTRY(function_hook) cmpl $0, function_trace_stop jne ftrace_stub @@ -199,7 +212,11 @@ trace: MCOUNT_SAVE_FRAME movq RIP(%rsp), %rdi +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else movq 8(%rbp), %rsi +#endif subq $MCOUNT_INSN_SIZE, %rdi call *ftrace_trace_function @@ -207,7 +224,7 @@ trace: MCOUNT_RESTORE_FRAME jmp ftrace_stub -END(mcount) +END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ @@ -215,9 +232,14 @@ END(mcount) ENTRY(ftrace_graph_caller) MCOUNT_SAVE_FRAME +#ifdef CC_USING_FENTRY + leaq SS+16(%rsp), %rdi + movq $0, %rdx /* No framepointers needed */ +#else leaq 8(%rbp), %rdi - movq RIP(%rsp), %rsi movq (%rbp), %rdx +#endif + movq RIP(%rsp), %rsi subq $MCOUNT_INSN_SIZE, %rsi call prepare_ftrace_return diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 6020f6f5927..1330dd10295 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -13,9 +13,13 @@ #include #ifdef CONFIG_FUNCTION_TRACER -/* mcount is defined in assembly */ +/* mcount and __fentry__ are defined in assembly */ +#ifdef CC_USING_FENTRY +EXPORT_SYMBOL(__fentry__); +#else EXPORT_SYMBOL(mcount); #endif +#endif EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); -- cgit v1.2.3-70-g09d2 From e3e45c01ae690e65f2650e5288b9af802e95a136 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 24 Aug 2012 15:34:34 +0200 Subject: perf/x86: Fix microcode revision check for SNB-PEBS The following patch makes the microcode update code path actually invoke the perf_check_microcode() function and thus potentially renabling SNB PEBS. By default, CONFIG_MICROCODE_OLD_INTERFACE is forced to Y in arch/x86/Kconfig. There is no way to disable this. That means that the code path used in arch/x86/kernel/microcode_core.c did not include the call to perf_check_microcode(). Thus, even though the microcode was updated to a version that fixes the SNB PEBS problem, perf_event would still return EOPNOTSUPP when enabling precise sampling. This patch simply adds a call to perf_check_microcode() in the call path used when OLD_INTERFACE=y. Signed-off-by: Stephane Eranian Acked-by: Borislav Petkov Cc: peterz@infradead.org Cc: andi@firstfloor.org Link: http://lkml.kernel.org/r/20120824133434.GA8014@quad Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 4873e62db6a..9e5bcf1e237 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -225,6 +225,9 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, if (do_microcode_update(buf, len) == 0) ret = (ssize_t)len; + if (ret > 0) + perf_check_microcode(); + mutex_unlock(µcode_mutex); put_online_cpus(); -- cgit v1.2.3-70-g09d2 From dd856efafe6097a5c9104725c2bca74430423db8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 27 Aug 2012 23:46:17 +0300 Subject: KVM: x86 emulator: access GPRs on demand Instead of populating the entire register file, read in registers as they are accessed, and write back only the modified ones. This saves a VMREAD and VMWRITE on Intel (for rsp, since it is not usually used during emulation), and a two 128-byte copies for the registers. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_emulate.h | 20 ++- arch/x86/kvm/emulate.c | 299 +++++++++++++++++++++++-------------- arch/x86/kvm/x86.c | 45 +++--- 3 files changed, 220 insertions(+), 144 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index c764f43b71c..282aee5d6ac 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -85,6 +85,19 @@ struct x86_instruction_info { #define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ struct x86_emulate_ops { + /* + * read_gpr: read a general purpose register (rax - r15) + * + * @reg: gpr number. + */ + ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg); + /* + * write_gpr: write a general purpose register (rax - r15) + * + * @reg: gpr number. + * @val: value to write. + */ + void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val); /* * read_std: Read bytes of standard (non-emulated/special) memory. * Used for descriptor reading. @@ -281,8 +294,10 @@ struct x86_emulate_ctxt { bool rip_relative; unsigned long _eip; struct operand memop; + u32 regs_valid; /* bitmaps of registers in _regs[] that can be read */ + u32 regs_dirty; /* bitmaps of registers in _regs[] that have been written */ /* Fields above regs are cleared together. */ - unsigned long regs[NR_VCPU_REGS]; + unsigned long _regs[NR_VCPU_REGS]; struct operand *memopp; struct fetch_cache fetch; struct read_cache io_read; @@ -394,4 +409,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); +void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt); +void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt); + #endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e8fb6c5c6c0..5e27ba53261 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -202,6 +202,42 @@ struct gprefix { #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a #define EFLG_RESERVED_ONE_MASK 2 +static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) +{ + if (!(ctxt->regs_valid & (1 << nr))) { + ctxt->regs_valid |= 1 << nr; + ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr); + } + return ctxt->_regs[nr]; +} + +static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr) +{ + ctxt->regs_valid |= 1 << nr; + ctxt->regs_dirty |= 1 << nr; + return &ctxt->_regs[nr]; +} + +static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr) +{ + reg_read(ctxt, nr); + return reg_write(ctxt, nr); +} + +static void writeback_registers(struct x86_emulate_ctxt *ctxt) +{ + unsigned reg; + + for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16) + ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]); +} + +static void invalidate_registers(struct x86_emulate_ctxt *ctxt) +{ + ctxt->regs_dirty = 0; + ctxt->regs_valid = 0; +} + /* * Instruction emulation: * Most instructions are emulated directly via a fragment of inline assembly @@ -374,8 +410,8 @@ struct gprefix { #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ do { \ unsigned long _tmp; \ - ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX]; \ - ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX]; \ + ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX); \ + ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX); \ \ __asm__ __volatile__ ( \ _PRE_EFLAGS("0", "5", "1") \ @@ -494,7 +530,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) { - masked_increment(&ctxt->regs[VCPU_REGS_RSP], stack_mask(ctxt), inc); + masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); } static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) @@ -786,14 +822,15 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, * pointer into the block that addresses the relevant register. * @highbyte_regs specifies whether to decode AH,CH,DH,BH. */ -static void *decode_register(u8 modrm_reg, unsigned long *regs, +static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, int highbyte_regs) { void *p; - p = ®s[modrm_reg]; if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) - p = (unsigned char *)®s[modrm_reg & 3] + 1; + p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; + else + p = reg_rmw(ctxt, modrm_reg); return p; } @@ -982,10 +1019,10 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, op->type = OP_REG; if (ctxt->d & ByteOp) { - op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs); + op->addr.reg = decode_register(ctxt, reg, highbyte_regs); op->bytes = 1; } else { - op->addr.reg = decode_register(reg, ctxt->regs, 0); + op->addr.reg = decode_register(ctxt, reg, 0); op->bytes = ctxt->op_bytes; } fetch_register_operand(op); @@ -1020,8 +1057,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, if (ctxt->modrm_mod == 3) { op->type = OP_REG; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; - op->addr.reg = decode_register(ctxt->modrm_rm, - ctxt->regs, ctxt->d & ByteOp); + op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, ctxt->d & ByteOp); if (ctxt->d & Sse) { op->type = OP_XMM; op->bytes = 16; @@ -1042,10 +1078,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, op->type = OP_MEM; if (ctxt->ad_bytes == 2) { - unsigned bx = ctxt->regs[VCPU_REGS_RBX]; - unsigned bp = ctxt->regs[VCPU_REGS_RBP]; - unsigned si = ctxt->regs[VCPU_REGS_RSI]; - unsigned di = ctxt->regs[VCPU_REGS_RDI]; + unsigned bx = reg_read(ctxt, VCPU_REGS_RBX); + unsigned bp = reg_read(ctxt, VCPU_REGS_RBP); + unsigned si = reg_read(ctxt, VCPU_REGS_RSI); + unsigned di = reg_read(ctxt, VCPU_REGS_RDI); /* 16-bit ModR/M decode. */ switch (ctxt->modrm_mod) { @@ -1102,17 +1138,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) modrm_ea += insn_fetch(s32, ctxt); else { - modrm_ea += ctxt->regs[base_reg]; + modrm_ea += reg_read(ctxt, base_reg); adjust_modrm_seg(ctxt, base_reg); } if (index_reg != 4) - modrm_ea += ctxt->regs[index_reg] << scale; + modrm_ea += reg_read(ctxt, index_reg) << scale; } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { if (ctxt->mode == X86EMUL_MODE_PROT64) ctxt->rip_relative = 1; } else { base_reg = ctxt->modrm_rm; - modrm_ea += ctxt->regs[base_reg]; + modrm_ea += reg_read(ctxt, base_reg); adjust_modrm_seg(ctxt, base_reg); } switch (ctxt->modrm_mod) { @@ -1250,10 +1286,10 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, if (rc->pos == rc->end) { /* refill pio read ahead */ unsigned int in_page, n; unsigned int count = ctxt->rep_prefix ? - address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) : 1; + address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1; in_page = (ctxt->eflags & EFLG_DF) ? - offset_in_page(ctxt->regs[VCPU_REGS_RDI]) : - PAGE_SIZE - offset_in_page(ctxt->regs[VCPU_REGS_RDI]); + offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : + PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, count); if (n == 0) @@ -1533,7 +1569,7 @@ static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes) struct segmented_address addr; rsp_increment(ctxt, -bytes); - addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt); + addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt); addr.seg = VCPU_SREG_SS; return segmented_write(ctxt, addr, data, bytes); @@ -1552,7 +1588,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, int rc; struct segmented_address addr; - addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt); + addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt); addr.seg = VCPU_SREG_SS; rc = segmented_read(ctxt, addr, dest, len); if (rc != X86EMUL_CONTINUE) @@ -1620,26 +1656,28 @@ static int em_enter(struct x86_emulate_ctxt *ctxt) int rc; unsigned frame_size = ctxt->src.val; unsigned nesting_level = ctxt->src2.val & 31; + ulong rbp; if (nesting_level) return X86EMUL_UNHANDLEABLE; - rc = push(ctxt, &ctxt->regs[VCPU_REGS_RBP], stack_size(ctxt)); + rbp = reg_read(ctxt, VCPU_REGS_RBP); + rc = push(ctxt, &rbp, stack_size(ctxt)); if (rc != X86EMUL_CONTINUE) return rc; - assign_masked(&ctxt->regs[VCPU_REGS_RBP], ctxt->regs[VCPU_REGS_RSP], + assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP), stack_mask(ctxt)); - assign_masked(&ctxt->regs[VCPU_REGS_RSP], - ctxt->regs[VCPU_REGS_RSP] - frame_size, + assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), + reg_read(ctxt, VCPU_REGS_RSP) - frame_size, stack_mask(ctxt)); return X86EMUL_CONTINUE; } static int em_leave(struct x86_emulate_ctxt *ctxt) { - assign_masked(&ctxt->regs[VCPU_REGS_RSP], ctxt->regs[VCPU_REGS_RBP], + assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP), stack_mask(ctxt)); - return emulate_pop(ctxt, &ctxt->regs[VCPU_REGS_RBP], ctxt->op_bytes); + return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes); } static int em_push_sreg(struct x86_emulate_ctxt *ctxt) @@ -1667,13 +1705,13 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) static int em_pusha(struct x86_emulate_ctxt *ctxt) { - unsigned long old_esp = ctxt->regs[VCPU_REGS_RSP]; + unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP); int rc = X86EMUL_CONTINUE; int reg = VCPU_REGS_RAX; while (reg <= VCPU_REGS_RDI) { (reg == VCPU_REGS_RSP) ? - (ctxt->src.val = old_esp) : (ctxt->src.val = ctxt->regs[reg]); + (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg)); rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) @@ -1702,7 +1740,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) --reg; } - rc = emulate_pop(ctxt, &ctxt->regs[reg], ctxt->op_bytes); + rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) break; --reg; @@ -1710,7 +1748,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) return rc; } -int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) +static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) { struct x86_emulate_ops *ops = ctxt->ops; int rc; @@ -1759,11 +1797,22 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) return rc; } +int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) +{ + int rc; + + invalidate_registers(ctxt); + rc = __emulate_int_real(ctxt, irq); + if (rc == X86EMUL_CONTINUE) + writeback_registers(ctxt); + return rc; +} + static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq) { switch(ctxt->mode) { case X86EMUL_MODE_REAL: - return emulate_int_real(ctxt, irq); + return __emulate_int_real(ctxt, irq); case X86EMUL_MODE_VM86: case X86EMUL_MODE_PROT16: case X86EMUL_MODE_PROT32: @@ -1970,14 +2019,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) { u64 old = ctxt->dst.orig_val64; - if (((u32) (old >> 0) != (u32) ctxt->regs[VCPU_REGS_RAX]) || - ((u32) (old >> 32) != (u32) ctxt->regs[VCPU_REGS_RDX])) { - ctxt->regs[VCPU_REGS_RAX] = (u32) (old >> 0); - ctxt->regs[VCPU_REGS_RDX] = (u32) (old >> 32); + if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || + ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { + *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); + *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); ctxt->eflags &= ~EFLG_ZF; } else { - ctxt->dst.val64 = ((u64)ctxt->regs[VCPU_REGS_RCX] << 32) | - (u32) ctxt->regs[VCPU_REGS_RBX]; + ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | + (u32) reg_read(ctxt, VCPU_REGS_RBX); ctxt->eflags |= EFLG_ZF; } @@ -2013,7 +2062,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) { /* Save real source value, then compare EAX against destination. */ ctxt->src.orig_val = ctxt->src.val; - ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; + ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); emulate_2op_SrcV(ctxt, "cmp"); if (ctxt->eflags & EFLG_ZF) { @@ -2022,7 +2071,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) } else { /* Failure: write the value we saw to EAX. */ ctxt->dst.type = OP_REG; - ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; + ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); } return X86EMUL_CONTINUE; } @@ -2159,10 +2208,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); - ctxt->regs[VCPU_REGS_RCX] = ctxt->_eip; + *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip; if (efer & EFER_LMA) { #ifdef CONFIG_X86_64 - ctxt->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; + *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF; ops->get_msr(ctxt, ctxt->mode == X86EMUL_MODE_PROT64 ? @@ -2241,7 +2290,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) ctxt->_eip = msr_data; ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); - ctxt->regs[VCPU_REGS_RSP] = msr_data; + *reg_write(ctxt, VCPU_REGS_RSP) = msr_data; return X86EMUL_CONTINUE; } @@ -2291,8 +2340,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); - ctxt->_eip = ctxt->regs[VCPU_REGS_RDX]; - ctxt->regs[VCPU_REGS_RSP] = ctxt->regs[VCPU_REGS_RCX]; + ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); + *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); return X86EMUL_CONTINUE; } @@ -2361,14 +2410,14 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, { tss->ip = ctxt->_eip; tss->flag = ctxt->eflags; - tss->ax = ctxt->regs[VCPU_REGS_RAX]; - tss->cx = ctxt->regs[VCPU_REGS_RCX]; - tss->dx = ctxt->regs[VCPU_REGS_RDX]; - tss->bx = ctxt->regs[VCPU_REGS_RBX]; - tss->sp = ctxt->regs[VCPU_REGS_RSP]; - tss->bp = ctxt->regs[VCPU_REGS_RBP]; - tss->si = ctxt->regs[VCPU_REGS_RSI]; - tss->di = ctxt->regs[VCPU_REGS_RDI]; + tss->ax = reg_read(ctxt, VCPU_REGS_RAX); + tss->cx = reg_read(ctxt, VCPU_REGS_RCX); + tss->dx = reg_read(ctxt, VCPU_REGS_RDX); + tss->bx = reg_read(ctxt, VCPU_REGS_RBX); + tss->sp = reg_read(ctxt, VCPU_REGS_RSP); + tss->bp = reg_read(ctxt, VCPU_REGS_RBP); + tss->si = reg_read(ctxt, VCPU_REGS_RSI); + tss->di = reg_read(ctxt, VCPU_REGS_RDI); tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); @@ -2384,14 +2433,14 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, ctxt->_eip = tss->ip; ctxt->eflags = tss->flag | 2; - ctxt->regs[VCPU_REGS_RAX] = tss->ax; - ctxt->regs[VCPU_REGS_RCX] = tss->cx; - ctxt->regs[VCPU_REGS_RDX] = tss->dx; - ctxt->regs[VCPU_REGS_RBX] = tss->bx; - ctxt->regs[VCPU_REGS_RSP] = tss->sp; - ctxt->regs[VCPU_REGS_RBP] = tss->bp; - ctxt->regs[VCPU_REGS_RSI] = tss->si; - ctxt->regs[VCPU_REGS_RDI] = tss->di; + *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax; + *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx; + *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx; + *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx; + *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp; + *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp; + *reg_write(ctxt, VCPU_REGS_RSI) = tss->si; + *reg_write(ctxt, VCPU_REGS_RDI) = tss->di; /* * SDM says that segment selectors are loaded before segment @@ -2476,14 +2525,14 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, tss->cr3 = ctxt->ops->get_cr(ctxt, 3); tss->eip = ctxt->_eip; tss->eflags = ctxt->eflags; - tss->eax = ctxt->regs[VCPU_REGS_RAX]; - tss->ecx = ctxt->regs[VCPU_REGS_RCX]; - tss->edx = ctxt->regs[VCPU_REGS_RDX]; - tss->ebx = ctxt->regs[VCPU_REGS_RBX]; - tss->esp = ctxt->regs[VCPU_REGS_RSP]; - tss->ebp = ctxt->regs[VCPU_REGS_RBP]; - tss->esi = ctxt->regs[VCPU_REGS_RSI]; - tss->edi = ctxt->regs[VCPU_REGS_RDI]; + tss->eax = reg_read(ctxt, VCPU_REGS_RAX); + tss->ecx = reg_read(ctxt, VCPU_REGS_RCX); + tss->edx = reg_read(ctxt, VCPU_REGS_RDX); + tss->ebx = reg_read(ctxt, VCPU_REGS_RBX); + tss->esp = reg_read(ctxt, VCPU_REGS_RSP); + tss->ebp = reg_read(ctxt, VCPU_REGS_RBP); + tss->esi = reg_read(ctxt, VCPU_REGS_RSI); + tss->edi = reg_read(ctxt, VCPU_REGS_RDI); tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); @@ -2505,14 +2554,14 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, ctxt->eflags = tss->eflags | 2; /* General purpose registers */ - ctxt->regs[VCPU_REGS_RAX] = tss->eax; - ctxt->regs[VCPU_REGS_RCX] = tss->ecx; - ctxt->regs[VCPU_REGS_RDX] = tss->edx; - ctxt->regs[VCPU_REGS_RBX] = tss->ebx; - ctxt->regs[VCPU_REGS_RSP] = tss->esp; - ctxt->regs[VCPU_REGS_RBP] = tss->ebp; - ctxt->regs[VCPU_REGS_RSI] = tss->esi; - ctxt->regs[VCPU_REGS_RDI] = tss->edi; + *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax; + *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx; + *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx; + *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx; + *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp; + *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp; + *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi; + *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi; /* * SDM says that segment selectors are loaded before segment @@ -2727,14 +2776,17 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, { int rc; + invalidate_registers(ctxt); ctxt->_eip = ctxt->eip; ctxt->dst.type = OP_NONE; rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason, has_error_code, error_code); - if (rc == X86EMUL_CONTINUE) + if (rc == X86EMUL_CONTINUE) { ctxt->eip = ctxt->_eip; + writeback_registers(ctxt); + } return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } @@ -2744,8 +2796,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, { int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; - register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes); - op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]); + register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes); + op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg)); op->addr.mem.seg = seg; } @@ -2921,7 +2973,7 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) { ctxt->dst.type = OP_REG; ctxt->dst.bytes = ctxt->src.bytes; - ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX]; + ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1); return X86EMUL_CONTINUE; @@ -2932,8 +2984,8 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt) u64 tsc = 0; ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); - ctxt->regs[VCPU_REGS_RAX] = (u32)tsc; - ctxt->regs[VCPU_REGS_RDX] = tsc >> 32; + *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc; + *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32; return X86EMUL_CONTINUE; } @@ -2941,10 +2993,10 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt) { u64 pmc; - if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc)) + if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc)) return emulate_gp(ctxt, 0); - ctxt->regs[VCPU_REGS_RAX] = (u32)pmc; - ctxt->regs[VCPU_REGS_RDX] = pmc >> 32; + *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc; + *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32; return X86EMUL_CONTINUE; } @@ -2986,9 +3038,9 @@ static int em_wrmsr(struct x86_emulate_ctxt *ctxt) { u64 msr_data; - msr_data = (u32)ctxt->regs[VCPU_REGS_RAX] - | ((u64)ctxt->regs[VCPU_REGS_RDX] << 32); - if (ctxt->ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) + msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX) + | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32); + if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; @@ -2998,11 +3050,11 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt) { u64 msr_data; - if (ctxt->ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) + if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data)) return emulate_gp(ctxt, 0); - ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data; - ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32; + *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data; + *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32; return X86EMUL_CONTINUE; } @@ -3182,8 +3234,8 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) static int em_loop(struct x86_emulate_ctxt *ctxt) { - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1); - if ((address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) != 0) && + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); + if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) jmp_rel(ctxt, ctxt->src.val); @@ -3192,7 +3244,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt) static int em_jcxz(struct x86_emulate_ctxt *ctxt) { - if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) + if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) jmp_rel(ctxt, ctxt->src.val); return X86EMUL_CONTINUE; @@ -3280,20 +3332,20 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt) { u32 eax, ebx, ecx, edx; - eax = ctxt->regs[VCPU_REGS_RAX]; - ecx = ctxt->regs[VCPU_REGS_RCX]; + eax = reg_read(ctxt, VCPU_REGS_RAX); + ecx = reg_read(ctxt, VCPU_REGS_RCX); ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); - ctxt->regs[VCPU_REGS_RAX] = eax; - ctxt->regs[VCPU_REGS_RBX] = ebx; - ctxt->regs[VCPU_REGS_RCX] = ecx; - ctxt->regs[VCPU_REGS_RDX] = edx; + *reg_write(ctxt, VCPU_REGS_RAX) = eax; + *reg_write(ctxt, VCPU_REGS_RBX) = ebx; + *reg_write(ctxt, VCPU_REGS_RCX) = ecx; + *reg_write(ctxt, VCPU_REGS_RDX) = edx; return X86EMUL_CONTINUE; } static int em_lahf(struct x86_emulate_ctxt *ctxt) { - ctxt->regs[VCPU_REGS_RAX] &= ~0xff00UL; - ctxt->regs[VCPU_REGS_RAX] |= (ctxt->eflags & 0xff) << 8; + *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL; + *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8; return X86EMUL_CONTINUE; } @@ -3450,7 +3502,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt) static int check_svme_pa(struct x86_emulate_ctxt *ctxt) { - u64 rax = ctxt->regs[VCPU_REGS_RAX]; + u64 rax = reg_read(ctxt, VCPU_REGS_RAX); /* Valid physical address? */ if (rax & 0xffff000000000000ULL) @@ -3472,7 +3524,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt) static int check_rdpmc(struct x86_emulate_ctxt *ctxt) { u64 cr4 = ctxt->ops->get_cr(ctxt, 4); - u64 rcx = ctxt->regs[VCPU_REGS_RCX]; + u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || (rcx > 3)) @@ -3930,7 +3982,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, case OpAcc: op->type = OP_REG; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; - op->addr.reg = &ctxt->regs[VCPU_REGS_RAX]; + op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); fetch_register_operand(op); op->orig_val = op->val; break; @@ -3938,19 +3990,19 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->type = OP_MEM; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.mem.ea = - register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]); + register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI)); op->addr.mem.seg = VCPU_SREG_ES; op->val = 0; break; case OpDX: op->type = OP_REG; op->bytes = 2; - op->addr.reg = &ctxt->regs[VCPU_REGS_RDX]; + op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); fetch_register_operand(op); break; case OpCL: op->bytes = 1; - op->val = ctxt->regs[VCPU_REGS_RCX] & 0xff; + op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff; break; case OpImmByte: rc = decode_imm(ctxt, op, 1, true); @@ -3981,7 +4033,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->type = OP_MEM; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.mem.ea = - register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]); + register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); op->addr.mem.seg = seg_override(ctxt); op->val = 0; break; @@ -4287,6 +4339,7 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); } + int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) { struct x86_emulate_ops *ops = ctxt->ops; @@ -4371,7 +4424,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) if (ctxt->rep_prefix && (ctxt->d & String)) { /* All REP prefixes have the same first termination condition */ - if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) { + if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { ctxt->eip = ctxt->_eip; goto done; } @@ -4444,7 +4497,7 @@ special_insn: ctxt->dst.val = ctxt->src.addr.mem.ea; break; case 0x90 ... 0x97: /* nop / xchg reg, rax */ - if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX]) + if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX)) break; rc = em_xchg(ctxt); break; @@ -4472,7 +4525,7 @@ special_insn: rc = em_grp2(ctxt); break; case 0xd2 ... 0xd3: /* Grp2 */ - ctxt->src.val = ctxt->regs[VCPU_REGS_RCX]; + ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX); rc = em_grp2(ctxt); break; case 0xe9: /* jmp rel */ @@ -4527,14 +4580,14 @@ writeback: if (ctxt->rep_prefix && (ctxt->d & String)) { struct read_cache *r = &ctxt->io_read; - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1); + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); if (!string_insn_completed(ctxt)) { /* * Re-enter guest when pio read ahead buffer is empty * or, if it is not used, after each 1024 iteration. */ - if ((r->end != 0 || ctxt->regs[VCPU_REGS_RCX] & 0x3ff) && + if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) && (r->end == 0 || r->end != r->pos)) { /* * Reset read cache. Usually happens before @@ -4542,6 +4595,7 @@ writeback: * we have to do it here. */ ctxt->mem_read.end = 0; + writeback_registers(ctxt); return EMULATION_RESTART; } goto done; /* skip rip writeback */ @@ -4556,6 +4610,9 @@ done: if (rc == X86EMUL_INTERCEPTED) return EMULATION_INTERCEPTED; + if (rc == X86EMUL_CONTINUE) + writeback_registers(ctxt); + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; twobyte_insn: @@ -4628,3 +4685,13 @@ twobyte_insn: cannot_emulate: return EMULATION_FAILED; } + +void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt) +{ + invalidate_registers(ctxt); +} + +void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt) +{ + writeback_registers(ctxt); +} diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 42bbf4187d2..e00050ce7a6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4313,7 +4313,19 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx); } +static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) +{ + return kvm_register_read(emul_to_vcpu(ctxt), reg); +} + +static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val) +{ + kvm_register_write(emul_to_vcpu(ctxt), reg, val); +} + static struct x86_emulate_ops emulate_ops = { + .read_gpr = emulator_read_gpr, + .write_gpr = emulator_write_gpr, .read_std = kvm_read_guest_virt_system, .write_std = kvm_write_guest_virt_system, .fetch = kvm_fetch_guest_virt, @@ -4348,14 +4360,6 @@ static struct x86_emulate_ops emulate_ops = { .get_cpuid = emulator_get_cpuid, }; -static void cache_all_regs(struct kvm_vcpu *vcpu) -{ - kvm_register_read(vcpu, VCPU_REGS_RAX); - kvm_register_read(vcpu, VCPU_REGS_RSP); - kvm_register_read(vcpu, VCPU_REGS_RIP); - vcpu->arch.regs_dirty = ~0; -} - static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) { u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); @@ -4382,12 +4386,10 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) kvm_queue_exception(vcpu, ctxt->exception.vector); } -static void init_decode_cache(struct x86_emulate_ctxt *ctxt, - const unsigned long *regs) +static void init_decode_cache(struct x86_emulate_ctxt *ctxt) { memset(&ctxt->twobyte, 0, - (void *)&ctxt->regs - (void *)&ctxt->twobyte); - memcpy(ctxt->regs, regs, sizeof(ctxt->regs)); + (void *)&ctxt->_regs - (void *)&ctxt->twobyte); ctxt->fetch.start = 0; ctxt->fetch.end = 0; @@ -4402,14 +4404,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; int cs_db, cs_l; - /* - * TODO: fix emulate.c to use guest_read/write_register - * instead of direct ->regs accesses, can save hundred cycles - * on Intel for instructions that don't read/change RSP, for - * for example. - */ - cache_all_regs(vcpu); - kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); ctxt->eflags = kvm_get_rflags(vcpu); @@ -4421,7 +4415,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) X86EMUL_MODE_PROT16; ctxt->guest_mode = is_guest_mode(vcpu); - init_decode_cache(ctxt, vcpu->arch.regs); + init_decode_cache(ctxt); vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } @@ -4441,7 +4435,6 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) return EMULATE_FAIL; ctxt->eip = ctxt->_eip; - memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); @@ -4599,7 +4592,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, changes registers values during IO operation */ if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { vcpu->arch.emulate_regs_need_sync_from_vcpu = false; - memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs); + emulator_invalidate_register_cache(ctxt); } restart: @@ -4637,7 +4630,6 @@ restart: toggle_interruptibility(vcpu, ctxt->interruptibility); kvm_set_rflags(vcpu, ctxt->eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); - memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); } else @@ -5591,8 +5583,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) * that usually, but some bad designed PV devices (vmware * backdoor interface) need this to work */ - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; - memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); + emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; } regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); @@ -5723,6 +5714,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, { struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; int ret; + unsigned reg; init_emulate_ctxt(vcpu); @@ -5732,7 +5724,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, if (ret) return EMULATE_FAIL; - memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); -- cgit v1.2.3-70-g09d2 From baa7e81e325bbb6ddebd7680ac1068859244b61d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:06:58 +0300 Subject: KVM: VMX: Separate saving pre-realmode state from setting segments Commit b246dd5df139 ("KVM: VMX: Fix KVM_SET_SREGS with big real mode segments") moved fix_rmode_seg() to vmx_set_segment(), so that it is applied not just on transitions to real mode, but also on KVM_SET_SREGS (migration). However fix_rmode_seg() not only munges the vmcs segments, it also sets up the save area for us to restore when returning to protected mode or to return in vmx_get_segment(). Move saving the segment into a new function, save_rmode_seg(), and call it just during the transition. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 13e0296cea4..4e49caf9224 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2768,7 +2768,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) return kvm->arch.tss_addr; } -static void fix_rmode_seg(int seg, struct kvm_save_segment *save) +static void save_rmode_seg(int seg, struct kvm_save_segment *save) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -2776,6 +2776,12 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save) save->base = vmcs_readl(sf->base); save->limit = vmcs_read32(sf->limit); save->ar = vmcs_read32(sf->ar_bytes); +} + +static void fix_rmode_seg(int seg, struct kvm_save_segment *save) +{ + struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + vmcs_write16(sf->selector, save->base >> 4); vmcs_write32(sf->base, save->base & 0xffff0); vmcs_write32(sf->limit, 0xffff); @@ -2798,6 +2804,12 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmx->emulation_required = 1; vmx->rmode.vm86_active = 1; + save_rmode_seg(VCPU_SREG_TR, &vmx->rmode.tr); + save_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es); + save_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds); + save_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); + save_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs); + /* * Very old userspace does not call KVM_SET_TSS_ADDR before entering * vcpu. Call it here with phys address pointing 16M below 4G. @@ -2812,14 +2824,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmx_segment_cache_clear(vmx); - vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); - vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); - - vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT); vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); - - vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES); vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); flags = vmcs_readl(GUEST_RFLAGS); -- cgit v1.2.3-70-g09d2 From 72fbefec26841699fee9ad0b050624aeb43d5bae Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:06:59 +0300 Subject: KVM: VMX: Fix incorrect lookup of segment S flag in fix_pmode_dataseg() fix_pmode_dataseg() looks up S in ->base instead of ->ar_bytes. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4e49caf9224..1d93079432b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2693,11 +2693,11 @@ static __exit void hardware_unsetup(void) free_kvm_area(); } -static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) +static void fix_pmode_dataseg(int seg, struct kvm_segment *save) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - if (vmcs_readl(sf->base) == save->base && (save->base & AR_S_MASK)) { + if (vmcs_readl(sf->base) == save->base && (save->ar_bytes & AR_S_MASK)) { vmcs_write16(sf->selector, save->selector); vmcs_writel(sf->base, save->base); vmcs_write32(sf->limit, save->limit); -- cgit v1.2.3-70-g09d2 From f5f7b2fe3bf849b58c8144729aba78b8e29e1e4c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:07:00 +0300 Subject: KVM: VMX: Use kvm_segment to save protected-mode segments when entering realmode Instead of using struct kvm_save_segment, use struct kvm_segment, which is what the other APIs use. This leads to some simplification. We replace save_rmode_seg() with a call to vmx_save_segment(). Since this depends on rmode.vm86_active, we move the call to before setting the flag. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 85 +++++++++++++++--------------------------------------- 1 file changed, 24 insertions(+), 61 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1d93079432b..7e95ff68b9d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -405,16 +405,16 @@ struct vcpu_vmx { struct { int vm86_active; ulong save_rflags; + struct kvm_segment segs[8]; + } rmode; + struct { + u32 bitmask; /* 4 bits per segment (1 bit per field) */ struct kvm_save_segment { u16 selector; unsigned long base; u32 limit; u32 ar; - } tr, es, ds, fs, gs; - } rmode; - struct { - u32 bitmask; /* 4 bits per segment (1 bit per field) */ - struct kvm_save_segment seg[8]; + } seg[8]; } segment_cache; int vpid; bool emulation_required; @@ -2693,15 +2693,12 @@ static __exit void hardware_unsetup(void) free_kvm_area(); } -static void fix_pmode_dataseg(int seg, struct kvm_segment *save) +static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - if (vmcs_readl(sf->base) == save->base && (save->ar_bytes & AR_S_MASK)) { - vmcs_write16(sf->selector, save->selector); - vmcs_writel(sf->base, save->base); - vmcs_write32(sf->limit, save->limit); - vmcs_write32(sf->ar_bytes, save->ar); + if (vmcs_readl(sf->base) == save->base && save->s) { + vmx_set_segment(vcpu, save, seg); } else { u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK) << AR_DPL_SHIFT; @@ -2719,10 +2716,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx_segment_cache_clear(vmx); - vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); - vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); - vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); - vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); + vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); flags = vmcs_readl(GUEST_RFLAGS); flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; @@ -2737,10 +2731,10 @@ static void enter_pmode(struct kvm_vcpu *vcpu) if (emulate_invalid_guest_state) return; - fix_pmode_dataseg(VCPU_SREG_ES, &vmx->rmode.es); - fix_pmode_dataseg(VCPU_SREG_DS, &vmx->rmode.ds); - fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); - fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); + fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); + fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); + fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); + fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); vmx_segment_cache_clear(vmx); @@ -2768,17 +2762,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) return kvm->arch.tss_addr; } -static void save_rmode_seg(int seg, struct kvm_save_segment *save) -{ - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - - save->selector = vmcs_read16(sf->selector); - save->base = vmcs_readl(sf->base); - save->limit = vmcs_read32(sf->limit); - save->ar = vmcs_read32(sf->ar_bytes); -} - -static void fix_rmode_seg(int seg, struct kvm_save_segment *save) +static void fix_rmode_seg(int seg, struct kvm_segment *save) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -2801,14 +2785,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu) if (enable_unrestricted_guest) return; + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); + vmx->emulation_required = 1; vmx->rmode.vm86_active = 1; - save_rmode_seg(VCPU_SREG_TR, &vmx->rmode.tr); - save_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es); - save_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds); - save_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); - save_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs); /* * Very old userspace does not call KVM_SET_TSS_ADDR before entering @@ -3118,7 +3103,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_save_segment *save; u32 ar; if (vmx->rmode.vm86_active @@ -3126,27 +3110,15 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS || seg == VCPU_SREG_GS) && !emulate_invalid_guest_state) { - switch (seg) { - case VCPU_SREG_TR: save = &vmx->rmode.tr; break; - case VCPU_SREG_ES: save = &vmx->rmode.es; break; - case VCPU_SREG_DS: save = &vmx->rmode.ds; break; - case VCPU_SREG_FS: save = &vmx->rmode.fs; break; - case VCPU_SREG_GS: save = &vmx->rmode.gs; break; - default: BUG(); - } - var->selector = save->selector; - var->base = save->base; - var->limit = save->limit; - ar = save->ar; + *var = vmx->rmode.segs[seg]; if (seg == VCPU_SREG_TR || var->selector == vmx_read_guest_seg_selector(vmx, seg)) - goto use_saved_rmode_seg; + return; } var->base = vmx_read_guest_seg_base(vmx, seg); var->limit = vmx_read_guest_seg_limit(vmx, seg); var->selector = vmx_read_guest_seg_selector(vmx, seg); ar = vmx_read_guest_seg_ar(vmx, seg); -use_saved_rmode_seg: if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) ar = 0; var->type = ar & 15; @@ -3235,10 +3207,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { vmcs_write16(sf->selector, var->selector); - vmx->rmode.tr.selector = var->selector; - vmx->rmode.tr.base = var->base; - vmx->rmode.tr.limit = var->limit; - vmx->rmode.tr.ar = vmx_segment_access_rights(var); + vmx->rmode.segs[VCPU_SREG_TR] = *var; return; } vmcs_writel(sf->base, var->base); @@ -3289,16 +3258,10 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_readl(GUEST_CS_BASE) >> 4); break; case VCPU_SREG_ES: - fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es); - break; case VCPU_SREG_DS: - fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds); - break; case VCPU_SREG_GS: - fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs); - break; case VCPU_SREG_FS: - fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); + fix_rmode_seg(seg, &vmx->rmode.segs[seg]); break; case VCPU_SREG_SS: vmcs_write16(GUEST_SS_SELECTOR, -- cgit v1.2.3-70-g09d2 From c865c43de66dc973865bda337022f03b6e16c8df Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:07:01 +0300 Subject: KVM: VMX: Retain limit and attributes when entering protected mode Real processors don't change segment limits and attributes while in real mode. Mimic that behaviour. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7e95ff68b9d..88eeb405560 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2696,14 +2696,14 @@ static __exit void hardware_unsetup(void) static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + struct kvm_segment tmp = *save; - if (vmcs_readl(sf->base) == save->base && save->s) { - vmx_set_segment(vcpu, save, seg); - } else { - u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK) - << AR_DPL_SHIFT; - vmcs_write32(sf->ar_bytes, 0x93 | dpl); + if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { + tmp.base = vmcs_readl(sf->base); + tmp.selector = vmcs_read16(sf->selector); + tmp.s = 1; } + vmx_set_segment(vcpu, &tmp, seg); } static void enter_pmode(struct kvm_vcpu *vcpu) -- cgit v1.2.3-70-g09d2 From 495e116684cebc5ae625916aba37fc07f345707b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:07:02 +0300 Subject: KVM: VMX: Allow real mode emulation using vm86 with dpl=0 Real mode is always entered from protected mode with dpl=0. Since the dpl doesn't affect execution, and we already override it to 3 in the vmcs (as vmx requires), we can allow execution in that state. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 88eeb405560..4811d91759a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3317,7 +3317,7 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) return false; if (var.limit != 0xffff) return false; - if (ar != 0xf3) + if ((ar | (3 << AR_DPL_SHIFT)) != 0xf3) return false; return true; -- cgit v1.2.3-70-g09d2 From e2a610d7fc3e285af8061ff071761752255d95f6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:07:03 +0300 Subject: KVM: VMX: Allow vm86 virtualization of big real mode Usually, big real mode uses large (4GB) segments. Currently we don't virtualize this; if any segment has a limit other than 0xffff, we emulate. But if we set the vmx-visible limit to 0xffff, we can use vm86 to virtualize real mode; if an access overruns the segment limit, the guest will #GP, which we will trap and forward to the emulator. This results in significantly faster execution, and less risk of hitting an unemulated instruction. If the limit is less than 0xffff, we retain the existing behaviour. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4811d91759a..fd21eb45466 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3315,7 +3315,7 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) if (var.base != (var.selector << 4)) return false; - if (var.limit != 0xffff) + if (var.limit < 0xffff) return false; if ((ar | (3 << AR_DPL_SHIFT)) != 0xf3) return false; -- cgit v1.2.3-70-g09d2 From 03ebebeb1ff5d1d6209fd8df4ffc9204df82bd55 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:07:04 +0300 Subject: KVM: x86 emulator: Leave segment limit and attributs alone in real mode When loading a segment in real mode, only the base and selector must be modified. The limit needs to be left alone, otherwise big real mode users will hit a #GP due to limit checking (currently this is suppressed because we don't check limits in real mode). Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5e27ba53261..f8b27cd2a6c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1388,19 +1388,15 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ ulong desc_addr; int ret; + u16 dummy; memset(&seg_desc, 0, sizeof seg_desc); if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) || ctxt->mode == X86EMUL_MODE_REAL) { /* set real mode segment descriptor */ + ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); set_desc_base(&seg_desc, selector << 4); - set_desc_limit(&seg_desc, 0xffff); - seg_desc.type = 3; - seg_desc.p = 1; - seg_desc.s = 1; - if (ctxt->mode == X86EMUL_MODE_VM86) - seg_desc.dpl = 3; goto load; } -- cgit v1.2.3-70-g09d2 From a5625189f6810ef79ced53989c794acfa10d3370 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:07:05 +0300 Subject: KVM: x86 emulator: Check segment limits in real mode too Segment limits are verified in real mode, not just protected mode. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index f8b27cd2a6c..5b1c701cd6d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -668,8 +668,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, la = seg_base(ctxt, addr.seg) + addr.ea; switch (ctxt->mode) { - case X86EMUL_MODE_REAL: - break; case X86EMUL_MODE_PROT64: if (((signed long)la << 16) >> 16 != la) return emulate_gp(ctxt, 0); @@ -699,7 +697,10 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, goto bad; } cpl = ctxt->ops->cpl(ctxt); - rpl = sel & 3; + if (ctxt->mode == X86EMUL_MODE_REAL) + rpl = 0; + else + rpl = sel & 3; cpl = max(cpl, rpl); if (!(desc.type & 8)) { /* data segment */ -- cgit v1.2.3-70-g09d2 From 0afbe2f8781a812c7e501ec129eff45b21f792af Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:07:06 +0300 Subject: KVM: x86 emulator: Fix #GP error code during linearization We want the segment selector, nor segment number. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5b1c701cd6d..1451cffd97e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -725,9 +725,9 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; bad: if (addr.seg == VCPU_SREG_SS) - return emulate_ss(ctxt, addr.seg); + return emulate_ss(ctxt, sel); else - return emulate_gp(ctxt, addr.seg); + return emulate_gp(ctxt, sel); } static int linearize(struct x86_emulate_ctxt *ctxt, -- cgit v1.2.3-70-g09d2 From 726364202853f843a97a8ba4a7c3cd91e3aa84b7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:07:07 +0300 Subject: KVM: VMX: Return real real-mode segment data even if emulate_invalid_guest_state=1 emulate_invalid_guest_state=1 doesn't mean we don't munge the segments in the vmcs; we do. So we need to return the real ones (maintained by vmx_set_segment). Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fd21eb45466..0d6872621ab 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3108,8 +3108,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, if (vmx->rmode.vm86_active && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS - || seg == VCPU_SREG_GS) - && !emulate_invalid_guest_state) { + || seg == VCPU_SREG_GS)) { *var = vmx->rmode.segs[seg]; if (seg == VCPU_SREG_TR || var->selector == vmx_read_guest_seg_selector(vmx, seg)) -- cgit v1.2.3-70-g09d2 From 1390a28b274e2e45f89bac67c435cbcbc5cc0790 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:07:08 +0300 Subject: KVM: VMX: Preserve segment limit and access rights in real mode While this is undocumented, real processors do not reload the segment limit and access rights when loading a segment register in real mode. Real programs rely on it so we need to comply with this behaviour. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0d6872621ab..6e6421aeca0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3113,6 +3113,9 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, if (seg == VCPU_SREG_TR || var->selector == vmx_read_guest_seg_selector(vmx, seg)) return; + var->base = vmx_read_guest_seg_base(vmx, seg); + var->selector = vmx_read_guest_seg_selector(vmx, seg); + return; } var->base = vmx_read_guest_seg_base(vmx, seg); var->limit = vmx_read_guest_seg_limit(vmx, seg); -- cgit v1.2.3-70-g09d2 From ce5668034752e52e995c8dc625337380099a088a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:07:09 +0300 Subject: KVM: VMX: Save all segment data in real mode Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6e6421aeca0..62b2d0cf2a7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3216,6 +3216,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_write32(sf->limit, var->limit); vmcs_write16(sf->selector, var->selector); if (vmx->rmode.vm86_active && var->s) { + vmx->rmode.segs[seg] = *var; /* * Hack real-mode segments into vm86 compatibility. */ -- cgit v1.2.3-70-g09d2 From a81aba14dc0ea499f4c218b5db0303b2ea8151d3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Aug 2012 17:07:10 +0300 Subject: KVM: VMX: Ignore segment G and D bits when considering whether we can virtualize We will enter the guest with G and D cleared; as real hardware ignores D in real mode, and G is taken care of by the limit test, we allow more code to run in vm86 mode. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 62b2d0cf2a7..248c2b490e9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3320,7 +3320,7 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) return false; if (var.limit < 0xffff) return false; - if ((ar | (3 << AR_DPL_SHIFT)) != 0xf3) + if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3) return false; return true; -- cgit v1.2.3-70-g09d2 From 9a7819774e4236e8736a074b7e85276967911924 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 30 Aug 2012 17:45:54 -0300 Subject: KVM: x86: remove unused variable from kvm_task_switch() Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e00050ce7a6..20f2266dfb6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5714,7 +5714,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, { struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; int ret; - unsigned reg; init_emulate_ctxt(vcpu); -- cgit v1.2.3-70-g09d2 From e09a8417823992700c063f9b8f85119f2d9016ed Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Tue, 10 Jul 2012 15:31:30 -0700 Subject: hpet: Remove unused PCI Vendor ID #define HPET_ID_VENDOR_8086 is defined but never used. It would be a redefine of PCI_VENDOR_ID_INTEL if it was ever used. Signed-off-by: Jon Mason Signed-off-by: Jiri Kosina --- arch/x86/include/asm/hpet.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 2c392d663dc..434e2106cc8 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -35,8 +35,6 @@ #define HPET_ID_NUMBER_SHIFT 8 #define HPET_ID_VENDOR_SHIFT 16 -#define HPET_ID_VENDOR_8086 0x8086 - #define HPET_CFG_ENABLE 0x001 #define HPET_CFG_LEGACY 0x002 #define HPET_LEGACY_8254 2 -- cgit v1.2.3-70-g09d2 From ec798660cf72c981ad8eed272487a0fe2b3222f2 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 3 Sep 2012 14:47:25 +0300 Subject: KVM: cleanup pic reset kvm_pic_reset() is not used anywhere. Move reset logic from pic_ioport_write() there. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/i8259.c | 52 +++++++++++----------------------------------------- 1 file changed, 11 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 90c84f947d4..848206df096 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -275,23 +275,20 @@ void kvm_pic_reset(struct kvm_kpic_state *s) { int irq, i; struct kvm_vcpu *vcpu; - u8 irr = s->irr, isr = s->imr; + u8 edge_irr = s->irr & ~s->elcr; bool found = false; s->last_irr = 0; - s->irr = 0; + s->irr &= s->elcr; s->imr = 0; - s->isr = 0; s->priority_add = 0; - s->irq_base = 0; - s->read_reg_select = 0; - s->poll = 0; s->special_mask = 0; - s->init_state = 0; - s->auto_eoi = 0; - s->rotate_on_auto_eoi = 0; - s->special_fully_nested_mode = 0; - s->init4 = 0; + s->read_reg_select = 0; + if (!s->init4) { + s->special_fully_nested_mode = 0; + s->auto_eoi = 0; + } + s->init_state = 1; kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm) if (kvm_apic_accept_pic_intr(vcpu)) { @@ -304,7 +301,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s) return; for (irq = 0; irq < PIC_NUM_PINS/2; irq++) - if (irr & (1 << irq) || isr & (1 << irq)) + if (edge_irr & (1 << irq)) pic_clear_isr(s, irq); } @@ -316,40 +313,13 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) addr &= 1; if (addr == 0) { if (val & 0x10) { - u8 edge_irr = s->irr & ~s->elcr; - int i; - bool found; - struct kvm_vcpu *vcpu; - s->init4 = val & 1; - s->last_irr = 0; - s->irr &= s->elcr; - s->imr = 0; - s->priority_add = 0; - s->special_mask = 0; - s->read_reg_select = 0; - if (!s->init4) { - s->special_fully_nested_mode = 0; - s->auto_eoi = 0; - } - s->init_state = 1; if (val & 0x02) pr_pic_unimpl("single mode not supported"); if (val & 0x08) pr_pic_unimpl( - "level sensitive irq not supported"); - - kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm) - if (kvm_apic_accept_pic_intr(vcpu)) { - found = true; - break; - } - - - if (found) - for (irq = 0; irq < PIC_NUM_PINS/2; irq++) - if (edge_irr & (1 << irq)) - pic_clear_isr(s, irq); + "level sensitive irq not supported"); + kvm_pic_reset(s); } else if (val & 0x08) { if (val & 0x04) s->poll = 1; -- cgit v1.2.3-70-g09d2 From 749c59fd15b2c18dd6c15c353a899fb6ac49b865 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Thu, 30 Aug 2012 11:32:13 +0100 Subject: KVM: PIC: fix use of uninitialised variable. Commit aea218f3cbbc (KVM: PIC: call ack notifiers for irqs that are dropped form irr) used an uninitialised variable to track whether an appropriate apic had been found. This could result in calling the ack notifier incorrectly. Cc: Gleb Natapov Cc: Avi Kivity Signed-off-by: Jamie Iles Signed-off-by: Avi Kivity --- arch/x86/kvm/i8259.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index e498b18f010..9fc9aa7ac70 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -318,7 +318,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) if (val & 0x10) { u8 edge_irr = s->irr & ~s->elcr; int i; - bool found; + bool found = false; struct kvm_vcpu *vcpu; s->init4 = val & 1; -- cgit v1.2.3-70-g09d2 From 3ec18cd8b8f8395d0df604c62ab3bc2cf3a966b4 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 20 Aug 2012 11:24:21 +0200 Subject: perf/x86: Enable Intel Cedarview Atom suppport This patch enables perf_events support for Intel Cedarview Atom (model 54) processors. Support includes PEBS and LBR. Tested on my Atom N2600 netbook. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120820092421.GA11284@quad Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 1 + arch/x86/kernel/cpu/perf_event_intel_lbr.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 7f2739e03e7..0d3d63afa76 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2008,6 +2008,7 @@ __init int intel_pmu_init(void) break; case 28: /* Atom */ + case 54: /* Cedariew */ memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 520b4265fcd..da02e9cc375 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -686,7 +686,8 @@ void intel_pmu_lbr_init_atom(void) * to have an operational LBR which can freeze * on PMU interrupt */ - if (boot_cpu_data.x86_mask < 10) { + if (boot_cpu_data.x86_model == 28 + && boot_cpu_data.x86_mask < 10) { pr_cont("LBR disabled due to erratum"); return; } -- cgit v1.2.3-70-g09d2 From 406eae5d70c1a849f4a050f708a459c2349e9dda Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 2 Sep 2012 22:20:56 -0700 Subject: x86/Kconfig: Update defconfigs to current results of "make savedefconfig" The x86 defconfigs have become somewhat out of date compared to the current result of "make savedefconfig". Update them to the current output, as a prelude to further defconfig changes, to avoid unrelated noise in those further changes. Signed-off-by: Josh Triplett Cc: Randy Dunlap Cc: Suresh Siddha Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/80c8a5fbeaf6cdb72fb78a016013427efee52668.1346649518.git.josh@joshtriplett.org Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 12 ++++-------- arch/x86/configs/x86_64_defconfig | 12 ++++-------- 2 files changed, 8 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 119db67dcb0..19034087c1b 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y @@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y CONFIG_SUN_PARTITION=y CONFIG_KARMA_PARTITION=y CONFIG_EFI_PARTITION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_X86_GENERIC=y CONFIG_HPET_TIMER=y @@ -231,8 +231,6 @@ CONFIG_SND_HRTIMER=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_NTRIG=y @@ -243,11 +241,11 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -280,7 +278,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -299,7 +296,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_STACK_USAGE=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 76eb2903809..c2c044846c7 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y @@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y CONFIG_SUN_PARTITION=y CONFIG_KARMA_PARTITION=y CONFIG_EFI_PARTITION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_CALGARY_IOMMU=y CONFIG_NR_CPUS=64 @@ -227,8 +227,6 @@ CONFIG_SND_HRTIMER=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_NTRIG=y @@ -239,11 +237,11 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -280,7 +278,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -298,7 +295,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_STACK_USAGE=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y -- cgit v1.2.3-70-g09d2 From 3fe2cb8f9e9486980ff03d7e020781cfdb028ffa Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 2 Sep 2012 22:21:05 -0700 Subject: x86/Kconfig: Switch to ext4 in defconfigs The current x86 and x86-64 defconfigs do not enable ext4, which most current distributions default to. Switch the defconfigs to ext4, so they will boot on current systems without additional configuration. Signed-off-by: Josh Triplett Cc: Randy Dunlap Cc: Suresh Siddha Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/bd8a359506b7e1287c680823de16d67608ec52fe.1346649518.git.josh@joshtriplett.org Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 7 +++---- arch/x86/configs/x86_64_defconfig | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 19034087c1b..2701b8a8775 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -260,10 +260,9 @@ CONFIG_RTC_CLASS=y CONFIG_DMADEVICES=y CONFIG_EEEPC_LAPTOP=y CONFIG_EFI_VARS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index c2c044846c7..c17614efb98 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -260,10 +260,9 @@ CONFIG_AMD_IOMMU_STATS=y CONFIG_INTEL_IOMMU=y # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_EFI_VARS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set -- cgit v1.2.3-70-g09d2 From b92f885f31db782608c8b73942111238020b1f4a Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 2 Sep 2012 22:21:13 -0700 Subject: x86/Kconfig: Disable CONFIG_CRC_T10DIF in defconfigs CONFIG_CRC_T10DIF explicitly states that it exists only for use by out-of-tree modules; anything in-kernel that needs it selects it. Thus, compile it out by default. Signed-off-by: Josh Triplett Cc: Randy Dunlap Cc: Suresh Siddha Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/3aaff7a0af1320427952d411a21b8ded29747a1f.1346649518.git.josh@joshtriplett.org Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 2701b8a8775..d833bb6d016 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -311,4 +311,3 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_CRYPTO_AES_586=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index c17614efb98..7ddcd99b240 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -309,4 +309,3 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y -- cgit v1.2.3-70-g09d2 From be2f328d8616c81b86e4013974608776c317bbeb Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 2 Sep 2012 22:21:21 -0700 Subject: x86/Kconfig: Turn off CONFIG_BLK_DEV_RAM The vast majority of systems either use initramfs or mount a root filesystem directly from the kernel. Distros have defaulted to initramfs for years. Only highly specialized systems would use an actual filesystem-image initrd at this point, and such systems don't rely on defconfig anyway. Drop initrd support (and specifically RAM block device support) from the defconfigs. Signed-off-by: Josh Triplett Cc: Randy Dunlap Cc: Suresh Siddha Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2521e983a63595cd7a331236d929577660f89c72.1346649518.git.josh@joshtriplett.org Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 2 -- arch/x86/configs/x86_64_defconfig | 2 -- 2 files changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index d833bb6d016..a6533a29222 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 7ddcd99b240..18f3cc47ab8 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y -- cgit v1.2.3-70-g09d2 From c206b9dcb1fc41b104db43e98f9b83a8c0c559ae Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sun, 2 Sep 2012 22:21:29 -0700 Subject: x86/Kconfig: Turn off DEBUG_NX_TEST module in defconfigs The x86 defconfigs include exactly one module: test_nx.ko, a special-purpose module which just exists to do evil things like executing code off the stack to see if the kernel has enabled NX support. Anyone who actually uses that module can easily enable it themselves, but the vast majority of kernel builds don't need it; disable it by default. Signed-off-by: Josh Triplett Cc: Randy Dunlap Cc: Suresh Siddha Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Arjan van de Ven Link: http://lkml.kernel.org/r/e72faf875e1172fb1cbec5e6d3cd4122df508a97.1346649518.git.josh@joshtriplett.org Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index a6533a29222..5598547281a 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -298,7 +298,6 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set -CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS_DEBUG_PROC_KEYS=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 18f3cc47ab8..671524d0f6c 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -297,7 +297,6 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set -CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS_DEBUG_PROC_KEYS=y -- cgit v1.2.3-70-g09d2 From f00026276ace77dcad1cdf17f696ae4e56e12ee6 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 2 Sep 2012 23:31:40 +0200 Subject: x86: Fix __user annotations in asm/sys_ia32.h Fix the following sparse warnings: sys_ia32.c:293:38: warning: incorrect type in argument 2 (different address spaces) sys_ia32.c:293:38: expected unsigned int [noderef] [usertype] *stat_addr sys_ia32.c:293:38: got unsigned int *stat_addr Ironically, sys_ia32.h was introduced to fix sparse warnings but missed that one. Signed-off-by: Mathias Krause Link: http://lkml.kernel.org/r/1346621506-30857-2-git-send-email-minipli@googlemail.com Signed-off-by: Ingo Molnar --- arch/x86/ia32/sys_ia32.c | 2 +- arch/x86/include/asm/sys_ia32.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 4540bece094..c5b938d92ea 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -287,7 +287,7 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, return ret; } -asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, +asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, int options) { return compat_sys_wait4(pid, stat_addr, options, NULL); diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 3fda9db4881..4ca1c611b55 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -40,7 +40,7 @@ asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, struct old_sigaction32 __user *); asmlinkage long sys32_alarm(unsigned int); -asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); +asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); asmlinkage long sys32_sysfs(int, u32, u32); asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, -- cgit v1.2.3-70-g09d2 From 3d1334064fb365ea8f299874c2b4c46de2bee74d Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 2 Sep 2012 23:31:41 +0200 Subject: x86/vdso: Add __user annotation to VDSO32_SYMBOL The address calculated by VDSO32_SYMBOL() is a pointer into userland. Add the __user annotation to fix related sparse warnings in its users. Signed-off-by: Mathias Krause Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/1346621506-30857-3-git-send-email-minipli@googlemail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/vdso.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index bb0522850b7..fddb53d6391 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -11,7 +11,8 @@ extern const char VDSO32_PRELINK[]; #define VDSO32_SYMBOL(base, name) \ ({ \ extern const char VDSO32_##name[]; \ - (void *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \ + (void __user *)(VDSO32_##name - VDSO32_PRELINK + \ + (unsigned long)(base)); \ }) #endif -- cgit v1.2.3-70-g09d2 From 0ff8fef4eaf252ee13a2d0b175a8c876415bd62a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 2 Sep 2012 23:31:42 +0200 Subject: x86/signals: ia32_signal.c: add __user casts to fix sparse warnings Fix the following sparse warnings by adding appropriate __user casts and annotations: ia32_signal.c:165:38: warning: incorrect type in argument 1 (different address spaces) ia32_signal.c:165:38: expected struct sigaltstack const [noderef] [usertype] * ia32_signal.c:165:38: got struct sigaltstack * [...] Signed-off-by: Mathias Krause Cc: Oleg Nesterov Link: http://lkml.kernel.org/r/1346621506-30857-4-git-send-email-minipli@googlemail.com Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 673ac9b63d6..452d4dd0a95 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -162,7 +162,8 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, } seg = get_fs(); set_fs(KERNEL_DS); - ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); + ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL), + (stack_t __force __user *) &uoss, regs->sp); set_fs(seg); if (ret >= 0 && uoss_ptr) { if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) @@ -361,7 +362,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - void **fpstate) + void __user **fpstate) { unsigned long sp; @@ -382,7 +383,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, if (used_math()) { sp = sp - sig_xstate_ia32_size; - *fpstate = (struct _fpstate_ia32 *) sp; + *fpstate = (struct _fpstate_ia32 __user *) sp; if (save_i387_xstate_ia32(*fpstate) < 0) return (void __user *) -1L; } @@ -448,7 +449,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, * These are actually not used anymore, but left because some * gdb versions depend on them as a marker. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) @@ -529,7 +530,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * Not actually used anymore, but left because some gdb * versions need it. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) -- cgit v1.2.3-70-g09d2 From 04d695a6828bca54d53305246545cd1f8a841ac6 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 2 Sep 2012 23:31:43 +0200 Subject: x86/pci/probe_roms: Add missing __iomem annotation to pci_map_biosrom() Stay in sync with the declaration and fix the corresponding sparse warnings. Signed-off-by: Mathias Krause Cc: Dan Williams Link: http://lkml.kernel.org/r/1346621506-30857-5-git-send-email-minipli@googlemail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/probe_roms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 0bc72e2069e..d5f15c3f7b2 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev) return oprom; } -void *pci_map_biosrom(struct pci_dev *pdev) +void __iomem *pci_map_biosrom(struct pci_dev *pdev) { struct resource *oprom = find_oprom(pdev); -- cgit v1.2.3-70-g09d2 From 5c7d03e99cb1ed449328ed9fba0c632944d39e7e Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 2 Sep 2012 23:31:44 +0200 Subject: x86/fpu/xsave: Keep __user annotation in casts Don't remove the __user annotation of the fpstate pointer, but drop the superfluous void * cast instead. This fixes the following sparse warnings: xsave.c:135:15: warning: cast removes address space of expression xsave.c:135:15: warning: incorrect type in argument 1 (different address spaces) xsave.c:135:15: expected void const volatile [noderef] * [...] Signed-off-by: Mathias Krause Cc: Suresh Siddha Link: http://lkml.kernel.org/r/1346621506-30857-6-git-send-email-minipli@googlemail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 3d3e2070911..9e1a8a7ba6e 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -132,9 +132,9 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf, fx_sw_user->xstate_size > fx_sw_user->extended_size) return -EINVAL; - err = __get_user(magic2, (__u32 *) (((void *)fpstate) + - fx_sw_user->extended_size - - FP_XSTATE_MAGIC2_SIZE)); + err = __get_user(magic2, (__u32 __user *) (fpstate + + fx_sw_user->extended_size - + FP_XSTATE_MAGIC2_SIZE)); if (err) return err; /* -- cgit v1.2.3-70-g09d2 From 2b11afd1ab502d959ae8d6d5812923151b5bc505 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 2 Sep 2012 23:31:45 +0200 Subject: x86/iommu: Drop duplicate const in __IOMMU_INIT It's redundant and makes sparse complain about it. Signed-off-by: Mathias Krause Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1346621506-30857-7-git-send-email-minipli@googlemail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/iommu_table.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h index f229b13a5f3..bbf8fb2e392 100644 --- a/arch/x86/include/asm/iommu_table.h +++ b/arch/x86/include/asm/iommu_table.h @@ -48,7 +48,7 @@ struct iommu_table_entry { #define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\ - static const struct iommu_table_entry const \ + static const struct iommu_table_entry \ __iommu_entry_##_detect __used \ __attribute__ ((unused, __section__(".iommu_table"), \ aligned((sizeof(void *))))) \ -- cgit v1.2.3-70-g09d2 From ae13b7b4e041eccf34fa4dd58581fe1441375578 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 2 Sep 2012 23:31:46 +0200 Subject: x86/iommu: Use NULL instead of plain 0 for __IOMMU_INIT IOMMU_INIT_POST and IOMMU_INIT_POST_FINISH pass the plain value 0 instead of NULL to __IOMMU_INIT. Fix this and make sparse happy by doing so. Signed-off-by: Mathias Krause Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1346621506-30857-8-git-send-email-minipli@googlemail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/iommu_table.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h index bbf8fb2e392..f42a04735a0 100644 --- a/arch/x86/include/asm/iommu_table.h +++ b/arch/x86/include/asm/iommu_table.h @@ -63,10 +63,10 @@ struct iommu_table_entry { * to stop detecting the other IOMMUs after yours has been detected. */ #define IOMMU_INIT_POST(_detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 0) + __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 0) #define IOMMU_INIT_POST_FINISH(detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 1) + __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 1) /* * A more sophisticated version of IOMMU_INIT. This variant requires: -- cgit v1.2.3-70-g09d2 From 4454d32749465ffa77d82bc1fdd196d6dedc544b Mon Sep 17 00:00:00 2001 From: Joe Millenbach Date: Sun, 2 Sep 2012 17:38:20 -0700 Subject: x86/kconfig: Remove outdated reference to Intel CPUs in CONFIG_SWIOTLB Deleted the no longer valid example of which x86 CPUs lack a hardware IOMMU, and moved the "If unsure..." statement to a new line to follow the style of surrounding options. Signed-off-by: Joe Millenbach Reviewed-by: Josh Triplett Cc: team-fjord@googlegroups.com Cc: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1346632700-29113-1-git-send-email-jmillenbach@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ec3a1aa4ab..50a1d1f9b6d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -746,10 +746,10 @@ config SWIOTLB def_bool y if X86_64 ---help--- Support for software bounce buffers used on x86-64 systems - which don't have a hardware IOMMU (e.g. the current generation - of Intel's x86-64 CPUs). Using this PCI devices which can only - access 32-bits of memory can be used on systems with more than - 3 GB of memory. If unsure, say Y. + which don't have a hardware IOMMU. Using this PCI devices + which can only access 32-bits of memory can be used on systems + with more than 3 GB of memory. + If unsure, say Y. config IOMMU_HELPER def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) -- cgit v1.2.3-70-g09d2 From 326d07cb30fed2387efccd4bf3bd8e4f28719e9e Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 30 Aug 2012 01:30:13 +0200 Subject: KVM: x86: minor size optimization Some fields can be constified and/or made static to reduce code and data size. Numbers for a 32 bit build: text data bss dec hex filename before: 3351 80 0 3431 d67 cpuid.o after: 3391 0 0 3391 d3f cpuid.o Signed-off-by: Mathias Krause Signed-off-by: Avi Kivity --- arch/x86/kvm/cpuid.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b496da684bd..ec79e773342 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -397,8 +397,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, break; } case KVM_CPUID_SIGNATURE: { - char signature[12] = "KVMKVMKVM\0\0"; - u32 *sigptr = (u32 *)signature; + static const char signature[12] = "KVMKVMKVM\0\0"; + const u32 *sigptr = (const u32 *)signature; entry->eax = KVM_CPUID_FEATURES; entry->ebx = sigptr[0]; entry->ecx = sigptr[1]; @@ -484,10 +484,10 @@ struct kvm_cpuid_param { u32 func; u32 idx; bool has_leaf_count; - bool (*qualifier)(struct kvm_cpuid_param *param); + bool (*qualifier)(const struct kvm_cpuid_param *param); }; -static bool is_centaur_cpu(struct kvm_cpuid_param *param) +static bool is_centaur_cpu(const struct kvm_cpuid_param *param) { return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; } @@ -498,7 +498,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *cpuid_entries; int limit, nent = 0, r = -E2BIG, i; u32 func; - static struct kvm_cpuid_param param[] = { + static const struct kvm_cpuid_param param[] = { { .func = 0, .has_leaf_count = true }, { .func = 0x80000000, .has_leaf_count = true }, { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true }, @@ -517,7 +517,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, r = 0; for (i = 0; i < ARRAY_SIZE(param); i++) { - struct kvm_cpuid_param *ent = ¶m[i]; + const struct kvm_cpuid_param *ent = ¶m[i]; if (ent->qualifier && !ent->qualifier(ent)) continue; -- cgit v1.2.3-70-g09d2 From 89a87c67791b840d815a2028b88cefe6906ed42c Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 30 Aug 2012 01:30:14 +0200 Subject: KVM: x86 emulator: use aligned variants of SSE register ops As the the compiler ensures that the memory operand is always aligned to a 16 byte memory location, use the aligned variant of MOVDQ for read_sse_reg() and write_sse_reg(). Signed-off-by: Mathias Krause Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 64 +++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1451cffd97e..5a0fee1a19c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -909,23 +909,23 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { ctxt->ops->get_fpu(ctxt); switch (reg) { - case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break; - case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break; - case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break; - case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break; - case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break; - case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break; - case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break; - case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break; + case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; + case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; + case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break; + case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break; + case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break; + case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break; + case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break; + case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break; #ifdef CONFIG_X86_64 - case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break; - case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break; - case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break; - case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break; - case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break; - case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break; - case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break; - case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break; + case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break; + case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break; + case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break; + case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break; + case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break; + case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break; + case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break; + case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break; #endif default: BUG(); } @@ -937,23 +937,23 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, { ctxt->ops->get_fpu(ctxt); switch (reg) { - case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break; - case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break; - case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break; - case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break; - case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break; - case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break; - case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break; - case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break; + case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; + case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; + case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break; + case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break; + case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break; + case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break; + case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break; + case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break; #ifdef CONFIG_X86_64 - case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break; - case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break; - case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break; - case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break; - case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break; - case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break; - case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break; - case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break; + case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break; + case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break; + case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break; + case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break; + case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break; + case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break; + case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break; + case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break; #endif default: BUG(); } -- cgit v1.2.3-70-g09d2 From fd0a0d82083747301f6c8084b4141bb490625016 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 30 Aug 2012 01:30:15 +0200 Subject: KVM: x86 emulator: mark opcode tables const The opcode tables never change at runtime, therefor mark them const. Signed-off-by: Mathias Krause Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5a0fee1a19c..fd06f9d6584 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -161,9 +161,9 @@ struct opcode { u64 intercept : 8; union { int (*execute)(struct x86_emulate_ctxt *ctxt); - struct opcode *group; - struct group_dual *gdual; - struct gprefix *gprefix; + const struct opcode *group; + const struct group_dual *gdual; + const struct gprefix *gprefix; } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; @@ -3574,13 +3574,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) -static struct opcode group7_rm1[] = { +static const struct opcode group7_rm1[] = { DI(SrcNone | Priv, monitor), DI(SrcNone | Priv, mwait), N, N, N, N, N, N, }; -static struct opcode group7_rm3[] = { +static const struct opcode group7_rm3[] = { DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), @@ -3591,13 +3591,13 @@ static struct opcode group7_rm3[] = { DIP(SrcNone | Prot | Priv, invlpga, check_svme), }; -static struct opcode group7_rm7[] = { +static const struct opcode group7_rm7[] = { N, DIP(SrcNone, rdtscp, check_rdtsc), N, N, N, N, N, N, }; -static struct opcode group1[] = { +static const struct opcode group1[] = { I(Lock, em_add), I(Lock | PageTable, em_or), I(Lock, em_adc), @@ -3608,11 +3608,11 @@ static struct opcode group1[] = { I(0, em_cmp), }; -static struct opcode group1A[] = { +static const struct opcode group1A[] = { I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, }; -static struct opcode group3[] = { +static const struct opcode group3[] = { I(DstMem | SrcImm, em_test), I(DstMem | SrcImm, em_test), I(DstMem | SrcNone | Lock, em_not), @@ -3623,13 +3623,13 @@ static struct opcode group3[] = { I(SrcMem, em_idiv_ex), }; -static struct opcode group4[] = { +static const struct opcode group4[] = { I(ByteOp | DstMem | SrcNone | Lock, em_grp45), I(ByteOp | DstMem | SrcNone | Lock, em_grp45), N, N, N, N, N, N, }; -static struct opcode group5[] = { +static const struct opcode group5[] = { I(DstMem | SrcNone | Lock, em_grp45), I(DstMem | SrcNone | Lock, em_grp45), I(SrcMem | Stack, em_grp45), @@ -3639,7 +3639,7 @@ static struct opcode group5[] = { I(SrcMem | Stack, em_grp45), N, }; -static struct opcode group6[] = { +static const struct opcode group6[] = { DI(Prot, sldt), DI(Prot, str), II(Prot | Priv | SrcMem16, em_lldt, lldt), @@ -3647,7 +3647,7 @@ static struct opcode group6[] = { N, N, N, N, }; -static struct group_dual group7 = { { +static const struct group_dual group7 = { { II(Mov | DstMem | Priv, em_sgdt, sgdt), II(Mov | DstMem | Priv, em_sidt, sidt), II(SrcMem | Priv, em_lgdt, lgdt), @@ -3664,7 +3664,7 @@ static struct group_dual group7 = { { EXT(0, group7_rm7), } }; -static struct opcode group8[] = { +static const struct opcode group8[] = { N, N, N, N, I(DstMem | SrcImmByte, em_bt), I(DstMem | SrcImmByte | Lock | PageTable, em_bts), @@ -3672,26 +3672,26 @@ static struct opcode group8[] = { I(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; -static struct group_dual group9 = { { +static const struct group_dual group9 = { { N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, }, { N, N, N, N, N, N, N, N, } }; -static struct opcode group11[] = { +static const struct opcode group11[] = { I(DstMem | SrcImm | Mov | PageTable, em_mov), X7(D(Undefined)), }; -static struct gprefix pfx_0f_6f_0f_7f = { +static const struct gprefix pfx_0f_6f_0f_7f = { I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), }; -static struct gprefix pfx_vmovntpx = { +static const struct gprefix pfx_vmovntpx = { I(0, em_mov), N, N, N, }; -static struct opcode opcode_table[256] = { +static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ I6ALU(Lock, em_add), I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), @@ -3808,7 +3808,7 @@ static struct opcode opcode_table[256] = { D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), }; -static struct opcode twobyte_table[256] = { +static const struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ G(0, group6), GD(0, &group7), N, N, N, I(ImplicitOps | VendorSpecific, em_syscall), -- cgit v1.2.3-70-g09d2 From 0225fb509d51fcf777eb0aa31c304c582e3248fd Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 30 Aug 2012 01:30:16 +0200 Subject: KVM: x86 emulator: constify emulate_ops We never change emulate_ops[] at runtime so it should be r/o. Signed-off-by: Mathias Krause Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 +- arch/x86/kvm/emulate.c | 22 +++++++++++----------- arch/x86/kvm/x86.c | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 282aee5d6ac..b5bb73aecc0 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -250,7 +250,7 @@ struct read_cache { }; struct x86_emulate_ctxt { - struct x86_emulate_ops *ops; + const struct x86_emulate_ops *ops; /* Register state before/after emulation. */ unsigned long eflags; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fd06f9d6584..663e95881bd 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1325,7 +1325,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt, static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, u16 selector, struct desc_ptr *dt) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; if (selector & 1 << 2) { struct desc_struct desc; @@ -1747,7 +1747,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; int rc; struct desc_ptr dt; gva_t cs_addr; @@ -2129,7 +2129,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt) static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; u32 eax, ebx, ecx, edx; /* @@ -2173,7 +2173,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) static int em_syscall(struct x86_emulate_ctxt *ctxt) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; @@ -2231,7 +2231,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) static int em_sysenter(struct x86_emulate_ctxt *ctxt) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; @@ -2294,7 +2294,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) static int em_sysexit(struct x86_emulate_ctxt *ctxt) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct cs, ss; u64 msr_data; int usermode; @@ -2357,7 +2357,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, u16 port, u16 len) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct tr_seg; u32 base3; int r; @@ -2476,7 +2476,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 tss_selector, u16 old_tss_sel, ulong old_tss_base, struct desc_struct *new_desc) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct tss_segment_16 tss_seg; int ret; u32 new_tss_base = get_desc_base(new_desc); @@ -2623,7 +2623,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 tss_selector, u16 old_tss_sel, ulong old_tss_base, struct desc_struct *new_desc) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct tss_segment_32 tss_seg; int ret; u32 new_tss_base = get_desc_base(new_desc); @@ -2667,7 +2667,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct curr_tss_desc, next_tss_desc; int ret; u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); @@ -4339,7 +4339,7 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; int rc = X86EMUL_CONTINUE; int saved_dst_type = ctxt->dst.type; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 20f2266dfb6..0dc066f0428 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4323,7 +4323,7 @@ static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulon kvm_register_write(emul_to_vcpu(ctxt), reg, val); } -static struct x86_emulate_ops emulate_ops = { +static const struct x86_emulate_ops emulate_ops = { .read_gpr = emulator_read_gpr, .write_gpr = emulator_write_gpr, .read_std = kvm_read_guest_virt_system, -- cgit v1.2.3-70-g09d2 From 0fbe9b0b19fb92eee2cf23c23d63d6b3312681e5 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 30 Aug 2012 01:30:17 +0200 Subject: KVM: x86: constify read_write_emulator_ops We never change those, make them r/o. Signed-off-by: Mathias Krause Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0dc066f0428..317241619e2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3773,14 +3773,14 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, return X86EMUL_CONTINUE; } -static struct read_write_emulator_ops read_emultor = { +static const struct read_write_emulator_ops read_emultor = { .read_write_prepare = read_prepare, .read_write_emulate = read_emulate, .read_write_mmio = vcpu_mmio_read, .read_write_exit_mmio = read_exit_mmio, }; -static struct read_write_emulator_ops write_emultor = { +static const struct read_write_emulator_ops write_emultor = { .read_write_emulate = write_emulate, .read_write_mmio = write_mmio, .read_write_exit_mmio = write_exit_mmio, @@ -3791,7 +3791,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, unsigned int bytes, struct x86_exception *exception, struct kvm_vcpu *vcpu, - struct read_write_emulator_ops *ops) + const struct read_write_emulator_ops *ops) { gpa_t gpa; int handled, ret; @@ -3840,7 +3840,7 @@ mmio: int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, void *val, unsigned int bytes, struct x86_exception *exception, - struct read_write_emulator_ops *ops) + const struct read_write_emulator_ops *ops) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; -- cgit v1.2.3-70-g09d2 From f1d248315afc55771c3991b934014daa154d05f1 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 30 Aug 2012 01:30:18 +0200 Subject: KVM: x86: more constification Signed-off-by: Mathias Krause Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 2 +- arch/x86/kvm/x86.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 18d149d8020..07ad628dadb 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -198,7 +198,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) return apic->vcpu->arch.apic_base & X2APIC_ENABLE; } -static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { +static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = { LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ LVT_MASK | APIC_MODE_MASK, /* LVTPC */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 317241619e2..666da13c34f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -817,7 +817,7 @@ static u32 msrs_to_save[] = { static unsigned num_msrs_to_save; -static u32 emulated_msrs[] = { +static const u32 emulated_msrs[] = { MSR_IA32_TSCDEADLINE, MSR_IA32_MISC_ENABLE, MSR_IA32_MCG_STATUS, -- cgit v1.2.3-70-g09d2 From 772e031899fdf3c7636d66aae9b0b57d1aaebb93 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 30 Aug 2012 01:30:19 +0200 Subject: KVM: VMX: constify lookup tables We use vmcs_field_to_offset_table[], kvm_vmx_segment_fields[] and kvm_vmx_exit_handlers[] as lookup tables only -- make them r/o. Signed-off-by: Mathias Krause Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 248c2b490e9..d62b4139a29 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -450,7 +450,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ [number##_HIGH] = VMCS12_OFFSET(name)+4 -static unsigned short vmcs_field_to_offset_table[] = { +static const unsigned short vmcs_field_to_offset_table[] = { FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), FIELD(GUEST_ES_SELECTOR, guest_es_selector), FIELD(GUEST_CS_SELECTOR, guest_cs_selector), @@ -666,7 +666,7 @@ static struct vmx_capability { .ar_bytes = GUEST_##seg##_AR_BYTES, \ } -static struct kvm_vmx_segment_field { +static const struct kvm_vmx_segment_field { unsigned selector; unsigned base; unsigned limit; @@ -2695,7 +2695,7 @@ static __exit void hardware_unsetup(void) static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; struct kvm_segment tmp = *save; if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { @@ -2764,7 +2764,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) static void fix_rmode_seg(int seg, struct kvm_segment *save) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; vmcs_write16(sf->selector, save->base >> 4); vmcs_write32(sf->base, save->base & 0xffff0); @@ -3202,7 +3202,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; u32 ar; vmx_segment_cache_clear(vmx); @@ -3572,7 +3572,7 @@ out: static void seg_setup(int seg) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; unsigned int ar; vmcs_write16(sf->selector, 0); @@ -5655,7 +5655,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) * may resume. Otherwise they set the kvm_run parameter to indicate what needs * to be done to userspace and return 0. */ -static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { +static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_EXCEPTION_NMI] = handle_exception, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, -- cgit v1.2.3-70-g09d2 From 09941fbb712655cde9b350852be7a99a6f61a03f Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 30 Aug 2012 01:30:20 +0200 Subject: KVM: SVM: constify lookup tables We never modify direct_access_msrs[], msrpm_ranges[], svm_exit_handlers[] or x86_intercept_map[] at runtime. Mark them r/o. Signed-off-by: Mathias Krause Cc: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 31be4a55744..611c72875fb 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -163,7 +163,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio); #define MSR_INVALID 0xffffffffU -static struct svm_direct_access_msrs { +static const struct svm_direct_access_msrs { u32 index; /* Index of the MSR */ bool always; /* True if intercept is always on */ } direct_access_msrs[] = { @@ -400,7 +400,7 @@ struct svm_init_data { int r; }; -static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; +static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges) #define MSRS_RANGE_SIZE 2048 @@ -3267,7 +3267,7 @@ static int pause_interception(struct vcpu_svm *svm) return 1; } -static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { +static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR0] = cr_interception, [SVM_EXIT_READ_CR3] = cr_interception, [SVM_EXIT_READ_CR4] = cr_interception, @@ -4068,7 +4068,7 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) #define POST_MEM(exit) { .exit_code = (exit), \ .stage = X86_ICPT_POST_MEMACCESS, } -static struct __x86_intercept { +static const struct __x86_intercept { u32 exit_code; enum x86_intercept_stage stage; } x86_intercept_map[] = { -- cgit v1.2.3-70-g09d2 From 2df72e9bc4c505d8357012f2924589f3d16f9d44 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 24 Aug 2012 15:54:57 -0300 Subject: KVM: split kvm_arch_flush_shadow Introducing kvm_arch_flush_shadow_memslot, to invalidate the translations of a single memory slot. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 8 +++++++- arch/powerpc/kvm/powerpc.c | 6 +++++- arch/s390/kvm/kvm-s390.c | 7 ++++++- arch/x86/kvm/x86.c | 8 +++++++- include/linux/kvm_host.h | 6 +++++- virt/kvm/kvm_main.c | 8 ++++---- 6 files changed, 34 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index eac65380bd2..8b3a9c0e771 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1613,11 +1613,17 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, return; } -void kvm_arch_flush_shadow(struct kvm *kvm) +void kvm_arch_flush_shadow_all(struct kvm *kvm) { kvm_flush_remote_tlbs(kvm); } +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + kvm_arch_flush_shadow_all(); +} + long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 879b14a6140..4d213b8b0fb 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -334,8 +334,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvmppc_core_commit_memory_region(kvm, mem); } +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ +} -void kvm_arch_flush_shadow(struct kvm *kvm) +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) { } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e83df7f0fed..ecced9d1898 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -969,7 +969,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, return; } -void kvm_arch_flush_shadow(struct kvm *kvm) +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) { } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 666da13c34f..37797a090a8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6447,12 +6447,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, spin_unlock(&kvm->mmu_lock); } -void kvm_arch_flush_shadow(struct kvm *kvm) +void kvm_arch_flush_shadow_all(struct kvm *kvm) { kvm_mmu_zap_all(kvm); kvm_reload_remote_mmus(kvm); } +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + kvm_arch_flush_shadow_all(kvm); +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9c0b3c3ae0a..40791930bc1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -458,7 +458,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, int user_alloc); bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); -void kvm_arch_flush_shadow(struct kvm *kvm); +/* flush all memory translations */ +void kvm_arch_flush_shadow_all(struct kvm *kvm); +/* flush memory translations pointing to 'slot' */ +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot); int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, int nr_pages); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6425906d7ce..a4bf05be5fe 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -408,7 +408,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, int idx; idx = srcu_read_lock(&kvm->srcu); - kvm_arch_flush_shadow(kvm); + kvm_arch_flush_shadow_all(kvm); srcu_read_unlock(&kvm->srcu, idx); } @@ -582,7 +582,7 @@ static void kvm_destroy_vm(struct kvm *kvm) #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); #else - kvm_arch_flush_shadow(kvm); + kvm_arch_flush_shadow_all(kvm); #endif kvm_arch_destroy_vm(kvm); kvm_free_physmem(kvm); @@ -814,7 +814,7 @@ int __kvm_set_memory_region(struct kvm *kvm, * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) * - kvm_is_visible_gfn (mmu_check_roots) */ - kvm_arch_flush_shadow(kvm); + kvm_arch_flush_shadow_memslot(kvm, slot); kfree(old_memslots); } @@ -854,7 +854,7 @@ int __kvm_set_memory_region(struct kvm *kvm, * mmio sptes. */ if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) - kvm_arch_flush_shadow(kvm); + kvm_arch_flush_shadow_all(kvm); kvm_free_physmem_slot(&old, &new); kfree(old_memslots); -- cgit v1.2.3-70-g09d2 From 3b4dc3a031110753b9ba36432dbd21f989fcee56 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 28 Aug 2012 17:43:26 -0300 Subject: KVM: move postcommit flush to x86, as mmio sptes are x86 specific Other arches do not need this. Signed-off-by: Marcelo Tosatti v2: fix incorrect deletion of mmio sptes on gpa move (noticed by Takuya) Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 8 ++++++++ virt/kvm/kvm_main.c | 7 ------- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 37797a090a8..6f6812ec841 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6445,6 +6445,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); kvm_mmu_slot_remove_write_access(kvm, mem->slot); spin_unlock(&kvm->mmu_lock); + /* + * If memory slot is created, or moved, we need to clear all + * mmio sptes. + */ + if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) { + kvm_mmu_zap_all(kvm); + kvm_reload_remote_mmus(kvm); + } } void kvm_arch_flush_shadow_all(struct kvm *kvm) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f41ea1262d5..4fe02d90081 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -849,13 +849,6 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); - /* - * If the new memory slot is created, we need to clear all - * mmio sptes. - */ - if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) - kvm_arch_flush_shadow_all(kvm); - kvm_free_physmem_slot(&old, &new); kfree(old_memslots); -- cgit v1.2.3-70-g09d2 From 716d51abff06f48425cef15d78ca6f36093f6dbf Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 3 Sep 2012 15:24:26 +0300 Subject: KVM: Provide userspace IO exit completion callback Current code assumes that IO exit was due to instruction emulation and handles execution back to emulator directly. This patch adds new userspace IO exit completion callback that can be set by any other code that caused IO exit to userspace. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 93 +++++++++++++++++++++++++---------------- 2 files changed, 57 insertions(+), 37 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fc0e752e756..64adb6117e1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -414,6 +414,7 @@ struct kvm_vcpu_arch { struct x86_emulate_ctxt emulate_ctxt; bool emulate_regs_need_sync_to_vcpu; bool emulate_regs_need_sync_from_vcpu; + int (*complete_userspace_io)(struct kvm_vcpu *vcpu); gpa_t time; struct pvclock_vcpu_time_info hv_clock; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6f6812ec841..f91e2c9d7cb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4544,6 +4544,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, return true; } +static int complete_emulated_mmio(struct kvm_vcpu *vcpu); +static int complete_emulated_pio(struct kvm_vcpu *vcpu); + int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, @@ -4614,13 +4617,16 @@ restart: } else if (vcpu->arch.pio.count) { if (!vcpu->arch.pio.in) vcpu->arch.pio.count = 0; - else + else { writeback = false; + vcpu->arch.complete_userspace_io = complete_emulated_pio; + } r = EMULATE_DO_MMIO; } else if (vcpu->mmio_needed) { if (!vcpu->mmio_is_write) writeback = false; r = EMULATE_DO_MMIO; + vcpu->arch.complete_userspace_io = complete_emulated_mmio; } else if (r == EMULATION_RESTART) goto restart; else @@ -5476,6 +5482,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) return r; } +static inline int complete_emulated_io(struct kvm_vcpu *vcpu) +{ + int r; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + if (r != EMULATE_DONE) + return 0; + return 1; +} + +static int complete_emulated_pio(struct kvm_vcpu *vcpu) +{ + BUG_ON(!vcpu->arch.pio.count); + + return complete_emulated_io(vcpu); +} + /* * Implements the following, as a state machine: * @@ -5492,47 +5516,37 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) * copy data * exit */ -static int complete_mmio(struct kvm_vcpu *vcpu) +static int complete_emulated_mmio(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; struct kvm_mmio_fragment *frag; - int r; - if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) - return 1; + BUG_ON(!vcpu->mmio_needed); - if (vcpu->mmio_needed) { - /* Complete previous fragment */ - frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++]; - if (!vcpu->mmio_is_write) - memcpy(frag->data, run->mmio.data, frag->len); - if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { - vcpu->mmio_needed = 0; - if (vcpu->mmio_is_write) - return 1; - vcpu->mmio_read_completed = 1; - goto done; - } - /* Initiate next fragment */ - ++frag; - run->exit_reason = KVM_EXIT_MMIO; - run->mmio.phys_addr = frag->gpa; + /* Complete previous fragment */ + frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++]; + if (!vcpu->mmio_is_write) + memcpy(frag->data, run->mmio.data, frag->len); + if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { + vcpu->mmio_needed = 0; if (vcpu->mmio_is_write) - memcpy(run->mmio.data, frag->data, frag->len); - run->mmio.len = frag->len; - run->mmio.is_write = vcpu->mmio_is_write; - return 0; - - } -done: - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - if (r != EMULATE_DONE) - return 0; - return 1; + return 1; + vcpu->mmio_read_completed = 1; + return complete_emulated_io(vcpu); + } + /* Initiate next fragment */ + ++frag; + run->exit_reason = KVM_EXIT_MMIO; + run->mmio.phys_addr = frag->gpa; + if (vcpu->mmio_is_write) + memcpy(run->mmio.data, frag->data, frag->len); + run->mmio.len = frag->len; + run->mmio.is_write = vcpu->mmio_is_write; + vcpu->arch.complete_userspace_io = complete_emulated_mmio; + return 0; } + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -5559,9 +5573,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } - r = complete_mmio(vcpu); - if (r <= 0) - goto out; + if (unlikely(vcpu->arch.complete_userspace_io)) { + int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; + vcpu->arch.complete_userspace_io = NULL; + r = cui(vcpu); + if (r <= 0) + goto out; + } else + WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); r = __vcpu_run(vcpu); -- cgit v1.2.3-70-g09d2 From 9d1b39a967871b7c69025dba7b7bdaee42871021 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 3 Sep 2012 15:24:27 +0300 Subject: KVM: emulator: make x86 emulation modes enum instead of defines Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 22 ++++++++++------------ arch/x86/kvm/emulate.c | 4 +++- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b5bb73aecc0..e9e5675c0df 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -249,6 +249,15 @@ struct read_cache { unsigned long end; }; +/* Execution mode, passed to the emulator. */ +enum x86emul_mode { + X86EMUL_MODE_REAL, /* Real mode. */ + X86EMUL_MODE_VM86, /* Virtual 8086 mode. */ + X86EMUL_MODE_PROT16, /* 16-bit protected mode. */ + X86EMUL_MODE_PROT32, /* 32-bit protected mode. */ + X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */ +}; + struct x86_emulate_ctxt { const struct x86_emulate_ops *ops; @@ -256,7 +265,7 @@ struct x86_emulate_ctxt { unsigned long eflags; unsigned long eip; /* eip before instruction emulation */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */ - int mode; + enum x86emul_mode mode; /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; @@ -308,17 +317,6 @@ struct x86_emulate_ctxt { #define REPE_PREFIX 0xf3 #define REPNE_PREFIX 0xf2 -/* Execution mode, passed to the emulator. */ -#define X86EMUL_MODE_REAL 0 /* Real mode. */ -#define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */ -#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ -#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ -#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ - -/* any protected mode */ -#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ - X86EMUL_MODE_PROT64) - /* CPUID vendors */ #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 663e95881bd..5fe06a8fbeb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2268,6 +2268,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) if (msr_data == 0x0) return emulate_gp(ctxt, 0); break; + default: + break; } ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); @@ -4400,7 +4402,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } /* Instruction can only be executed in protected mode */ - if ((ctxt->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { + if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { rc = emulate_ud(ctxt); goto done; } -- cgit v1.2.3-70-g09d2 From f3bd64c68a8f1245e3d037f70c6936cd7bb1196b Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 3 Sep 2012 15:24:28 +0300 Subject: KVM: emulator: string_addr_inc() cleanup Remove unneeded segment argument. Address structure already has correct segment which was put there during decode. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5fe06a8fbeb..415f903facd 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2790,14 +2790,13 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } -static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, - int reg, struct operand *op) +static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg, + struct operand *op) { int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes); op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg)); - op->addr.mem.seg = seg; } static int em_das(struct x86_emulate_ctxt *ctxt) @@ -4570,12 +4569,10 @@ writeback: ctxt->dst.type = saved_dst_type; if ((ctxt->d & SrcMask) == SrcSI) - string_addr_inc(ctxt, seg_override(ctxt), - VCPU_REGS_RSI, &ctxt->src); + string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src); if ((ctxt->d & DstMask) == DstDI) - string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI, - &ctxt->dst); + string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst); if (ctxt->rep_prefix && (ctxt->d & String)) { struct read_cache *r = &ctxt->io_read; -- cgit v1.2.3-70-g09d2 From b3356bf0dbb34980620f2f7def7d1b9a0d325225 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 3 Sep 2012 15:24:29 +0300 Subject: KVM: emulator: optimize "rep ins" handling Optimize "rep ins" by allowing emulator to write back more than one datum at a time. Introduce new operand type OP_MEM_STR which tells writeback() that dst contains pointer to an array that should be written back as opposite to just one data element. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 4 +++- arch/x86/kvm/emulate.c | 33 ++++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index e9e5675c0df..15f960c06ff 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -213,8 +213,9 @@ typedef u32 __attribute__((vector_size(16))) sse128_t; /* Type, address-of, and value of an instruction's operand. */ struct operand { - enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; + enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; unsigned int bytes; + unsigned int count; union { unsigned long orig_val; u64 orig_val64; @@ -234,6 +235,7 @@ struct operand { char valptr[sizeof(unsigned long) + 2]; sse128_t vec_val; u64 mm_val; + void *data; }; }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 415f903facd..39171cb307e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1301,8 +1301,15 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, rc->end = n * size; } - memcpy(dest, rc->data + rc->pos, size); - rc->pos += size; + if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) { + ctxt->dst.data = rc->data + rc->pos; + ctxt->dst.type = OP_MEM_STR; + ctxt->dst.count = (rc->end - rc->pos) / size; + rc->pos = rc->end; + } else { + memcpy(dest, rc->data + rc->pos, size); + rc->pos += size; + } return 1; } @@ -1546,6 +1553,14 @@ static int writeback(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; break; + case OP_MEM_STR: + rc = segmented_write(ctxt, + ctxt->dst.addr.mem, + ctxt->dst.data, + ctxt->dst.bytes * ctxt->dst.count); + if (rc != X86EMUL_CONTINUE) + return rc; + break; case OP_XMM: write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); break; @@ -2793,7 +2808,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg, struct operand *op) { - int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; + int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count; register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes); op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg)); @@ -3733,7 +3748,7 @@ static const struct opcode opcode_table[256] = { I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), I(SrcImmByte | Mov | Stack, em_push), I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), - I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */ + I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */ I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ X16(D(SrcImmByte)), @@ -3991,6 +4006,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI)); op->addr.mem.seg = VCPU_SREG_ES; op->val = 0; + op->count = 1; break; case OpDX: op->type = OP_REG; @@ -4034,6 +4050,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); op->addr.mem.seg = seg_override(ctxt); op->val = 0; + op->count = 1; break; case OpImmFAddr: op->type = OP_IMM; @@ -4575,8 +4592,14 @@ writeback: string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst); if (ctxt->rep_prefix && (ctxt->d & String)) { + unsigned int count; struct read_cache *r = &ctxt->io_read; - register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); + if ((ctxt->d & SrcMask) == SrcSI) + count = ctxt->src.count; + else + count = ctxt->dst.count; + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), + -count); if (!string_insn_completed(ctxt)) { /* -- cgit v1.2.3-70-g09d2 From a50abc3b2b469ee80bc0f9ef5b6d457ef72659a9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Sep 2012 20:00:52 +0300 Subject: KVM: use symbolic constant for nr interrupts interrupt_bitmap is KVM_NR_INTERRUPTS bits in size, so just use that instead of hard-coded constants and math. Signed-off-by: Michael S. Tsirkin Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f91e2c9d7cb..c4d451ed157 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2366,7 +2366,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - if (irq->irq < 0 || irq->irq >= 256) + if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS) return -EINVAL; if (irqchip_in_kernel(vcpu->kvm)) return -ENXIO; @@ -5793,7 +5793,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, if (mmu_reset_needed) kvm_mmu_reset_context(vcpu); - max_bits = (sizeof sregs->interrupt_bitmap) << 3; + max_bits = KVM_NR_INTERRUPTS; pending_vec = find_first_bit( (const unsigned long *)sregs->interrupt_bitmap, max_bits); if (pending_vec < max_bits) { -- cgit v1.2.3-70-g09d2 From f94a73f8dd5644f45f9d2e3139608ca83b932d93 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Tue, 28 Aug 2012 14:24:43 +0300 Subject: crypto: twofish-avx - tune assembler code for more performance Patch replaces 'movb' instructions with 'movzbl' to break false register dependencies and interleaves instructions better for out-of-order scheduling. Tested on Intel Core i5-2450M and AMD FX-8100. tcrypt ECB results: Intel Core i5-2450M: size old-vs-new new-vs-3way old-vs-3way enc dec enc dec enc dec 256 1.12x 1.13x 1.36x 1.37x 1.21x 1.22x 1k 1.14x 1.14x 1.48x 1.49x 1.29x 1.31x 8k 1.14x 1.14x 1.50x 1.52x 1.32x 1.33x AMD FX-8100: size old-vs-new new-vs-3way old-vs-3way enc dec enc dec enc dec 256 1.10x 1.11x 1.01x 1.01x 0.92x 0.91x 1k 1.11x 1.12x 1.08x 1.07x 0.97x 0.96x 8k 1.11x 1.13x 1.10x 1.08x 0.99x 0.97x [v2] - Do instruction interleaving another way to avoid adding new FPU<=>CPU register moves as these cause performance drop on Bulldozer. - Further interleaving improvements for better out-of-order scheduling. Tested-by: Borislav Petkov Cc: Johannes Goetzfried Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 227 +++++++++++++++++----------- 1 file changed, 142 insertions(+), 85 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index 35f45574390..1585abb13dd 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -4,6 +4,8 @@ * Copyright (C) 2012 Johannes Goetzfried * * + * Copyright © 2012 Jussi Kivilinna + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -47,16 +49,22 @@ #define RC2 %xmm6 #define RD2 %xmm7 -#define RX %xmm8 -#define RY %xmm9 +#define RX0 %xmm8 +#define RY0 %xmm9 + +#define RX1 %xmm10 +#define RY1 %xmm11 -#define RK1 %xmm10 -#define RK2 %xmm11 +#define RK1 %xmm12 +#define RK2 %xmm13 -#define RID1 %rax -#define RID1b %al -#define RID2 %rbx -#define RID2b %bl +#define RT %xmm14 +#define RR %xmm15 + +#define RID1 %rbp +#define RID1d %ebp +#define RID2 %rsi +#define RID2d %esi #define RGI1 %rdx #define RGI1bl %dl @@ -65,6 +73,13 @@ #define RGI2bl %cl #define RGI2bh %ch +#define RGI3 %rax +#define RGI3bl %al +#define RGI3bh %ah +#define RGI4 %rbx +#define RGI4bl %bl +#define RGI4bh %bh + #define RGS1 %r8 #define RGS1d %r8d #define RGS2 %r9 @@ -73,89 +88,123 @@ #define RGS3d %r10d -#define lookup_32bit(t0, t1, t2, t3, src, dst) \ - movb src ## bl, RID1b; \ - movb src ## bh, RID2b; \ - movl t0(CTX, RID1, 4), dst ## d; \ - xorl t1(CTX, RID2, 4), dst ## d; \ +#define lookup_32bit(t0, t1, t2, t3, src, dst, interleave_op, il_reg) \ + movzbl src ## bl, RID1d; \ + movzbl src ## bh, RID2d; \ shrq $16, src; \ - movb src ## bl, RID1b; \ - movb src ## bh, RID2b; \ + movl t0(CTX, RID1, 4), dst ## d; \ + movl t1(CTX, RID2, 4), RID2d; \ + movzbl src ## bl, RID1d; \ + xorl RID2d, dst ## d; \ + movzbl src ## bh, RID2d; \ + interleave_op(il_reg); \ xorl t2(CTX, RID1, 4), dst ## d; \ xorl t3(CTX, RID2, 4), dst ## d; -#define G(a, x, t0, t1, t2, t3) \ - vmovq a, RGI1; \ - vpsrldq $8, a, x; \ - vmovq x, RGI2; \ +#define dummy(d) /* do nothing */ + +#define shr_next(reg) \ + shrq $16, reg; + +#define G(gi1, gi2, x, t0, t1, t2, t3) \ + lookup_32bit(t0, t1, t2, t3, ##gi1, RGS1, shr_next, ##gi1); \ + lookup_32bit(t0, t1, t2, t3, ##gi2, RGS3, shr_next, ##gi2); \ + \ + lookup_32bit(t0, t1, t2, t3, ##gi1, RGS2, dummy, none); \ + shlq $32, RGS2; \ + orq RGS1, RGS2; \ + lookup_32bit(t0, t1, t2, t3, ##gi2, RGS1, dummy, none); \ + shlq $32, RGS1; \ + orq RGS1, RGS3; + +#define round_head_2(a, b, x1, y1, x2, y2) \ + vmovq b ## 1, RGI3; \ + vpextrq $1, b ## 1, RGI4; \ \ - lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \ - shrq $16, RGI1; \ - lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \ - shlq $32, RGS2; \ - orq RGS1, RGS2; \ + G(RGI1, RGI2, x1, s0, s1, s2, s3); \ + vmovq a ## 2, RGI1; \ + vpextrq $1, a ## 2, RGI2; \ + vmovq RGS2, x1; \ + vpinsrq $1, RGS3, x1, x1; \ \ - lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \ - shrq $16, RGI2; \ - lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \ - shlq $32, RGS3; \ - orq RGS1, RGS3; \ + G(RGI3, RGI4, y1, s1, s2, s3, s0); \ + vmovq b ## 2, RGI3; \ + vpextrq $1, b ## 2, RGI4; \ + vmovq RGS2, y1; \ + vpinsrq $1, RGS3, y1, y1; \ \ - vmovq RGS2, x; \ - vpinsrq $1, RGS3, x, x; + G(RGI1, RGI2, x2, s0, s1, s2, s3); \ + vmovq RGS2, x2; \ + vpinsrq $1, RGS3, x2, x2; \ + \ + G(RGI3, RGI4, y2, s1, s2, s3, s0); \ + vmovq RGS2, y2; \ + vpinsrq $1, RGS3, y2, y2; -#define encround(a, b, c, d, x, y) \ - G(a, x, s0, s1, s2, s3); \ - G(b, y, s1, s2, s3, s0); \ +#define encround_tail(a, b, c, d, x, y, prerotate) \ vpaddd x, y, x; \ + vpaddd x, RK1, RT;\ + prerotate(b); \ + vpxor RT, c, c; \ vpaddd y, x, y; \ - vpaddd x, RK1, x; \ vpaddd y, RK2, y; \ - vpxor x, c, c; \ - vpsrld $1, c, x; \ + vpsrld $1, c, RT; \ vpslld $(32 - 1), c, c; \ - vpor c, x, c; \ - vpslld $1, d, x; \ - vpsrld $(32 - 1), d, d; \ - vpor d, x, d; \ - vpxor d, y, d; - -#define decround(a, b, c, d, x, y) \ - G(a, x, s0, s1, s2, s3); \ - G(b, y, s1, s2, s3, s0); \ + vpor c, RT, c; \ + vpxor d, y, d; \ + +#define decround_tail(a, b, c, d, x, y, prerotate) \ vpaddd x, y, x; \ + vpaddd x, RK1, RT;\ + prerotate(a); \ + vpxor RT, c, c; \ vpaddd y, x, y; \ vpaddd y, RK2, y; \ vpxor d, y, d; \ vpsrld $1, d, y; \ vpslld $(32 - 1), d, d; \ vpor d, y, d; \ - vpslld $1, c, y; \ - vpsrld $(32 - 1), c, c; \ - vpor c, y, c; \ - vpaddd x, RK1, x; \ - vpxor x, c, c; - -#define encrypt_round(n, a, b, c, d) \ - vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ - vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ - encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ - encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); - -#define decrypt_round(n, a, b, c, d) \ - vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ - vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ - decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ - decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); + +#define rotate_1l(x) \ + vpslld $1, x, RR; \ + vpsrld $(32 - 1), x, x; \ + vpor x, RR, x; + +#define preload_rgi(c) \ + vmovq c, RGI1; \ + vpextrq $1, c, RGI2; + +#define encrypt_round(n, a, b, c, d, preload, prerotate) \ + vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ + vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ + round_head_2(a, b, RX0, RY0, RX1, RY1); \ + encround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \ + preload(c ## 1); \ + encround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate); + +#define decrypt_round(n, a, b, c, d, preload, prerotate) \ + vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ + vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ + round_head_2(a, b, RX0, RY0, RX1, RY1); \ + decround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \ + preload(c ## 1); \ + decround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate); #define encrypt_cycle(n) \ - encrypt_round((2*n), RA, RB, RC, RD); \ - encrypt_round(((2*n) + 1), RC, RD, RA, RB); + encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \ + encrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); + +#define encrypt_cycle_last(n) \ + encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \ + encrypt_round(((2*n) + 1), RC, RD, RA, RB, dummy, dummy); #define decrypt_cycle(n) \ - decrypt_round(((2*n) + 1), RC, RD, RA, RB); \ - decrypt_round((2*n), RA, RB, RC, RD); + decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \ + decrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); +#define decrypt_cycle_last(n) \ + decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \ + decrypt_round((2*n), RA, RB, RC, RD, dummy, dummy); #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ vpunpckldq x1, x0, t0; \ @@ -216,17 +265,20 @@ __twofish_enc_blk_8way: * %rcx: bool, if true: xor output */ + pushq %rbp; pushq %rbx; pushq %rcx; vmovdqu w(CTX), RK1; leaq (4*4*4)(%rdx), %rax; - inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); - inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); + preload_rgi(RA1); + rotate_1l(RD1); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); + rotate_1l(RD2); - xorq RID1, RID1; - xorq RID2, RID2; + movq %rsi, %r11; encrypt_cycle(0); encrypt_cycle(1); @@ -235,26 +287,27 @@ __twofish_enc_blk_8way: encrypt_cycle(4); encrypt_cycle(5); encrypt_cycle(6); - encrypt_cycle(7); + encrypt_cycle_last(7); vmovdqu (w+4*4)(CTX), RK1; popq %rcx; popq %rbx; + popq %rbp; - leaq (4*4*4)(%rsi), %rax; + leaq (4*4*4)(%r11), %rax; testb %cl, %cl; jnz __enc_xor8; - outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); - outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + outunpack_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); + outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); ret; __enc_xor8: - outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); - outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + outunpack_xor_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); + outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); ret; @@ -269,16 +322,19 @@ twofish_dec_blk_8way: * %rdx: src */ + pushq %rbp; pushq %rbx; vmovdqu (w+4*4)(CTX), RK1; leaq (4*4*4)(%rdx), %rax; - inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); - inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); + preload_rgi(RC1); + rotate_1l(RA1); + inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); + rotate_1l(RA2); - xorq RID1, RID1; - xorq RID2, RID2; + movq %rsi, %r11; decrypt_cycle(7); decrypt_cycle(6); @@ -287,14 +343,15 @@ twofish_dec_blk_8way: decrypt_cycle(3); decrypt_cycle(2); decrypt_cycle(1); - decrypt_cycle(0); + decrypt_cycle_last(0); vmovdqu (w)(CTX), RK1; popq %rbx; + popq %rbp; - leaq (4*4*4)(%rsi), %rax; - outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); - outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); + leaq (4*4*4)(%r11), %rax; + outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); ret; -- cgit v1.2.3-70-g09d2 From ddaea7869d29beb9e0042c96ea52c9cca2afd68a Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Tue, 28 Aug 2012 14:24:49 +0300 Subject: crypto: cast5-avx - tune assembler code for more performance Patch replaces 'movb' instructions with 'movzbl' to break false register dependencies, interleaves instructions better for out-of-order scheduling and merges constant 16-bit rotation with round-key variable rotation. tcrypt ECB results (128bit key): Intel Core i5-2450M: size old-vs-new new-vs-generic old-vs-generic enc dec enc dec enc dec 256 1.18x 1.18x 2.45x 2.47x 2.08x 2.10x 1k 1.20x 1.20x 2.73x 2.73x 2.28x 2.28x 8k 1.20x 1.19x 2.73x 2.73x 2.28x 2.29x [v2] - Do instruction interleaving another way to avoid adding new FPU<=>CPU register moves as these cause performance drop on Bulldozer. - Improvements to round-key variable rotation handling. - Further interleaving improvements for better out-of-order scheduling. Cc: Johannes Goetzfried Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 266 ++++++++++++++++++------------ 1 file changed, 160 insertions(+), 106 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index 94693c877e3..a41a3aaba22 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -4,6 +4,8 @@ * Copyright (C) 2012 Johannes Goetzfried * * + * Copyright © 2012 Jussi Kivilinna + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -22,7 +24,6 @@ */ .file "cast5-avx-x86_64-asm_64.S" -.text .extern cast5_s1 .extern cast5_s2 @@ -57,17 +58,19 @@ #define RX %xmm8 #define RKM %xmm9 -#define RKRF %xmm10 -#define RKRR %xmm11 +#define RKR %xmm10 +#define RKRF %xmm11 +#define RKRR %xmm12 + +#define R32 %xmm13 +#define R1ST %xmm14 -#define RTMP %xmm12 -#define RMASK %xmm13 -#define R32 %xmm14 +#define RTMP %xmm15 -#define RID1 %rax -#define RID1b %al -#define RID2 %rbx -#define RID2b %bl +#define RID1 %rbp +#define RID1d %ebp +#define RID2 %rsi +#define RID2d %esi #define RGI1 %rdx #define RGI1bl %dl @@ -76,6 +79,13 @@ #define RGI2bl %cl #define RGI2bh %ch +#define RGI3 %rax +#define RGI3bl %al +#define RGI3bh %ah +#define RGI4 %rbx +#define RGI4bl %bl +#define RGI4bh %bh + #define RFS1 %r8 #define RFS1d %r8d #define RFS2 %r9 @@ -84,60 +94,84 @@ #define RFS3d %r10d -#define lookup_32bit(src, dst, op1, op2, op3) \ - movb src ## bl, RID1b; \ - movb src ## bh, RID2b; \ +#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + shrq $16, src; \ movl s1(, RID1, 4), dst ## d; \ op1 s2(, RID2, 4), dst ## d; \ - shrq $16, src; \ - movb src ## bl, RID1b; \ - movb src ## bh, RID2b; \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + interleave_op(il_reg); \ op2 s3(, RID1, 4), dst ## d; \ op3 s4(, RID2, 4), dst ## d; -#define F(a, x, op0, op1, op2, op3) \ +#define dummy(d) /* do nothing */ + +#define shr_next(reg) \ + shrq $16, reg; + +#define F_head(a, x, gi1, gi2, op0) \ op0 a, RKM, x; \ - vpslld RKRF, x, RTMP; \ - vpsrld RKRR, x, x; \ + vpslld RKRF, x, RTMP; \ + vpsrld RKRR, x, x; \ vpor RTMP, x, x; \ \ - vpshufb RMASK, x, x; \ - vmovq x, RGI1; \ - vpsrldq $8, x, x; \ - vmovq x, RGI2; \ - \ - lookup_32bit(RGI1, RFS1, op1, op2, op3); \ - shrq $16, RGI1; \ - lookup_32bit(RGI1, RFS2, op1, op2, op3); \ - shlq $32, RFS2; \ - orq RFS1, RFS2; \ + vmovq x, gi1; \ + vpextrq $1, x, gi2; + +#define F_tail(a, x, gi1, gi2, op1, op2, op3) \ + lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \ + lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \ \ - lookup_32bit(RGI2, RFS1, op1, op2, op3); \ - shrq $16, RGI2; \ - lookup_32bit(RGI2, RFS3, op1, op2, op3); \ - shlq $32, RFS3; \ - orq RFS1, RFS3; \ + lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \ + shlq $32, RFS2; \ + orq RFS1, RFS2; \ + lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \ + shlq $32, RFS1; \ + orq RFS1, RFS3; \ \ - vmovq RFS2, x; \ + vmovq RFS2, x; \ vpinsrq $1, RFS3, x, x; -#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl) -#define F2(b, x) F(b, x, vpxor, subl, addl, xorl) -#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl) +#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \ + F_head(b1, RX, RGI1, RGI2, op0); \ + F_head(b2, RX, RGI3, RGI4, op0); \ + \ + F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \ + F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \ + \ + vpxor a1, RX, a1; \ + vpxor a2, RTMP, a2; + +#define F1_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl) +#define F2_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl) +#define F3_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl) -#define subround(a, b, x, n, f) \ - F ## f(b, x); \ - vpxor a, x, a; +#define subround(a1, b1, a2, b2, f) \ + F ## f ## _2(a1, b1, a2, b2); #define round(l, r, n, f) \ vbroadcastss (km+(4*n))(CTX), RKM; \ - vpinsrb $0, (kr+n)(CTX), RKRF, RKRF; \ + vpand R1ST, RKR, RKRF; \ vpsubq RKRF, R32, RKRR; \ - subround(l ## 1, r ## 1, RX, n, f); \ - subround(l ## 2, r ## 2, RX, n, f); \ - subround(l ## 3, r ## 3, RX, n, f); \ - subround(l ## 4, r ## 4, RX, n, f); + vpsrldq $1, RKR, RKR; \ + subround(l ## 1, r ## 1, l ## 2, r ## 2, f); \ + subround(l ## 3, r ## 3, l ## 4, r ## 4, f); + +#define enc_preload_rkr() \ + vbroadcastss .L16_mask, RKR; \ + /* add 16-bit rotation to key rotations (mod 32) */ \ + vpxor kr(CTX), RKR, RKR; +#define dec_preload_rkr() \ + vbroadcastss .L16_mask, RKR; \ + /* add 16-bit rotation to key rotations (mod 32) */ \ + vpxor kr(CTX), RKR, RKR; \ + vpshufb .Lbswap128_mask, RKR, RKR; #define transpose_2x4(x0, x1, t0, t1) \ vpunpckldq x1, x0, t0; \ @@ -146,37 +180,47 @@ vpunpcklqdq t1, t0, x0; \ vpunpckhqdq t1, t0, x1; -#define inpack_blocks(in, x0, x1, t0, t1) \ +#define inpack_blocks(in, x0, x1, t0, t1, rmask) \ vmovdqu (0*4*4)(in), x0; \ vmovdqu (1*4*4)(in), x1; \ - vpshufb RMASK, x0, x0; \ - vpshufb RMASK, x1, x1; \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ \ transpose_2x4(x0, x1, t0, t1) -#define outunpack_blocks(out, x0, x1, t0, t1) \ +#define outunpack_blocks(out, x0, x1, t0, t1, rmask) \ transpose_2x4(x0, x1, t0, t1) \ \ - vpshufb RMASK, x0, x0; \ - vpshufb RMASK, x1, x1; \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ vmovdqu x0, (0*4*4)(out); \ vmovdqu x1, (1*4*4)(out); -#define outunpack_xor_blocks(out, x0, x1, t0, t1) \ +#define outunpack_xor_blocks(out, x0, x1, t0, t1, rmask) \ transpose_2x4(x0, x1, t0, t1) \ \ - vpshufb RMASK, x0, x0; \ - vpshufb RMASK, x1, x1; \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ vpxor (0*4*4)(out), x0, x0; \ vmovdqu x0, (0*4*4)(out); \ vpxor (1*4*4)(out), x1, x1; \ vmovdqu x1, (1*4*4)(out); +.data + .align 16 .Lbswap_mask: .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.L16_mask: + .byte 16, 16, 16, 16 .L32_mask: - .byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0 + .byte 32, 0, 0, 0 +.Lfirst_mask: + .byte 0x1f, 0, 0, 0 + +.text .align 16 .global __cast5_enc_blk_16way @@ -190,23 +234,24 @@ __cast5_enc_blk_16way: * %rcx: bool, if true: xor output */ + pushq %rbp; pushq %rbx; pushq %rcx; - vmovdqu .Lbswap_mask, RMASK; - vmovdqu .L32_mask, R32; - vpxor RKRF, RKRF, RKRF; + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + enc_preload_rkr(); - inpack_blocks(%rdx, RL1, RR1, RTMP, RX); - leaq (2*4*4)(%rdx), %rax; - inpack_blocks(%rax, RL2, RR2, RTMP, RX); - leaq (2*4*4)(%rax), %rax; - inpack_blocks(%rax, RL3, RR3, RTMP, RX); - leaq (2*4*4)(%rax), %rax; - inpack_blocks(%rax, RL4, RR4, RTMP, RX); + leaq 1*(2*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM); + inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM); - xorq RID1, RID1; - xorq RID2, RID2; + movq %rsi, %r11; round(RL, RR, 0, 1); round(RR, RL, 1, 2); @@ -221,8 +266,8 @@ __cast5_enc_blk_16way: round(RL, RR, 10, 2); round(RR, RL, 11, 3); - movb rr(CTX), %al; - testb %al, %al; + movzbl rr(CTX), %eax; + testl %eax, %eax; jnz __skip_enc; round(RL, RR, 12, 1); @@ -233,28 +278,30 @@ __cast5_enc_blk_16way: __skip_enc: popq %rcx; popq %rbx; + popq %rbp; + + vmovdqa .Lbswap_mask, RKM; + leaq 1*(2*4*4)(%r11), %rax; testb %cl, %cl; jnz __enc_xor16; - outunpack_blocks(%rsi, RR1, RL1, RTMP, RX); - leaq (2*4*4)(%rsi), %rax; - outunpack_blocks(%rax, RR2, RL2, RTMP, RX); - leaq (2*4*4)(%rax), %rax; - outunpack_blocks(%rax, RR3, RL3, RTMP, RX); - leaq (2*4*4)(%rax), %rax; - outunpack_blocks(%rax, RR4, RL4, RTMP, RX); + outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM); + outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%r11), %rax; + outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%r11), %rax; + outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM); ret; __enc_xor16: - outunpack_xor_blocks(%rsi, RR1, RL1, RTMP, RX); - leaq (2*4*4)(%rsi), %rax; - outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX); - leaq (2*4*4)(%rax), %rax; - outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX); - leaq (2*4*4)(%rax), %rax; - outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX); + outunpack_xor_blocks(%r11, RR1, RL1, RTMP, RX, RKM); + outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%r11), %rax; + outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%r11), %rax; + outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX, RKM); ret; @@ -269,25 +316,26 @@ cast5_dec_blk_16way: * %rdx: src */ + pushq %rbp; pushq %rbx; - vmovdqu .Lbswap_mask, RMASK; - vmovdqu .L32_mask, R32; - vpxor RKRF, RKRF, RKRF; + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + dec_preload_rkr(); - inpack_blocks(%rdx, RL1, RR1, RTMP, RX); - leaq (2*4*4)(%rdx), %rax; - inpack_blocks(%rax, RL2, RR2, RTMP, RX); - leaq (2*4*4)(%rax), %rax; - inpack_blocks(%rax, RL3, RR3, RTMP, RX); - leaq (2*4*4)(%rax), %rax; - inpack_blocks(%rax, RL4, RR4, RTMP, RX); + leaq 1*(2*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM); + inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM); - xorq RID1, RID1; - xorq RID2, RID2; + movq %rsi, %r11; - movb rr(CTX), %al; - testb %al, %al; + movzbl rr(CTX), %eax; + testl %eax, %eax; jnz __skip_dec; round(RL, RR, 15, 1); @@ -295,7 +343,7 @@ cast5_dec_blk_16way: round(RL, RR, 13, 2); round(RR, RL, 12, 1); -__skip_dec: +__dec_tail: round(RL, RR, 11, 3); round(RR, RL, 10, 2); round(RL, RR, 9, 1); @@ -309,14 +357,20 @@ __skip_dec: round(RL, RR, 1, 2); round(RR, RL, 0, 1); + vmovdqa .Lbswap_mask, RKM; popq %rbx; + popq %rbp; - outunpack_blocks(%rsi, RR1, RL1, RTMP, RX); - leaq (2*4*4)(%rsi), %rax; - outunpack_blocks(%rax, RR2, RL2, RTMP, RX); - leaq (2*4*4)(%rax), %rax; - outunpack_blocks(%rax, RR3, RL3, RTMP, RX); - leaq (2*4*4)(%rax), %rax; - outunpack_blocks(%rax, RR4, RL4, RTMP, RX); + leaq 1*(2*4*4)(%r11), %rax; + outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM); + outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%r11), %rax; + outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%r11), %rax; + outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM); ret; + +__skip_dec: + vpsrldq $4, RKR, RKR; + jmp __dec_tail; -- cgit v1.2.3-70-g09d2 From c09220e1bc97d83cae445cab8dcb057fabd62361 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Tue, 28 Aug 2012 14:24:54 +0300 Subject: crypto: cast6-avx - tune assembler code for more performance Patch replaces 'movb' instructions with 'movzbl' to break false register dependencies, interleaves instructions better for out-of-order scheduling and merges constant 16-bit rotation with round-key variable rotation. tcrypt ECB results: Intel Core i5-2450M: size old-vs-new new-vs-generic old-vs-generic enc dec enc dec enc dec 256 1.13x 1.19x 2.05x 2.17x 1.82x 1.82x 1k 1.18x 1.21x 2.26x 2.33x 1.93x 1.93x 8k 1.19x 1.19x 2.32x 2.33x 1.95x 1.95x [v2] - Do instruction interleaving another way to avoid adding new FPU<=>CPU register moves as these cause performance drop on Bulldozer. - Improvements to round-key variable rotation handling. - Further interleaving improvements for better out-of-order scheduling. Cc: Johannes Goetzfried Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 276 ++++++++++++++++++------------ 1 file changed, 162 insertions(+), 114 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index d258ce0d2e0..218d283772f 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -4,6 +4,8 @@ * Copyright (C) 2012 Johannes Goetzfried * * + * Copyright © 2012 Jussi Kivilinna + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -22,7 +24,6 @@ */ .file "cast6-avx-x86_64-asm_64.S" -.text .extern cast6_s1 .extern cast6_s2 @@ -54,20 +55,21 @@ #define RC2 %xmm6 #define RD2 %xmm7 -#define RX %xmm8 +#define RX %xmm8 #define RKM %xmm9 -#define RKRF %xmm10 -#define RKRR %xmm11 +#define RKR %xmm10 +#define RKRF %xmm11 +#define RKRR %xmm12 +#define R32 %xmm13 +#define R1ST %xmm14 -#define RTMP %xmm12 -#define RMASK %xmm13 -#define R32 %xmm14 +#define RTMP %xmm15 -#define RID1 %rax -#define RID1b %al -#define RID2 %rbx -#define RID2b %bl +#define RID1 %rbp +#define RID1d %ebp +#define RID2 %rsi +#define RID2d %esi #define RGI1 %rdx #define RGI1bl %dl @@ -76,6 +78,13 @@ #define RGI2bl %cl #define RGI2bh %ch +#define RGI3 %rax +#define RGI3bl %al +#define RGI3bh %ah +#define RGI4 %rbx +#define RGI4bl %bl +#define RGI4bh %bh + #define RFS1 %r8 #define RFS1d %r8d #define RFS2 %r9 @@ -84,95 +93,106 @@ #define RFS3d %r10d -#define lookup_32bit(src, dst, op1, op2, op3) \ - movb src ## bl, RID1b; \ - movb src ## bh, RID2b; \ +#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + shrq $16, src; \ movl s1(, RID1, 4), dst ## d; \ op1 s2(, RID2, 4), dst ## d; \ - shrq $16, src; \ - movb src ## bl, RID1b; \ - movb src ## bh, RID2b; \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + interleave_op(il_reg); \ op2 s3(, RID1, 4), dst ## d; \ op3 s4(, RID2, 4), dst ## d; -#define F(a, x, op0, op1, op2, op3) \ +#define dummy(d) /* do nothing */ + +#define shr_next(reg) \ + shrq $16, reg; + +#define F_head(a, x, gi1, gi2, op0) \ op0 a, RKM, x; \ - vpslld RKRF, x, RTMP; \ - vpsrld RKRR, x, x; \ + vpslld RKRF, x, RTMP; \ + vpsrld RKRR, x, x; \ vpor RTMP, x, x; \ \ - vpshufb RMASK, x, x; \ - vmovq x, RGI1; \ - vpsrldq $8, x, x; \ - vmovq x, RGI2; \ - \ - lookup_32bit(RGI1, RFS1, op1, op2, op3); \ - shrq $16, RGI1; \ - lookup_32bit(RGI1, RFS2, op1, op2, op3); \ - shlq $32, RFS2; \ - orq RFS1, RFS2; \ + vmovq x, gi1; \ + vpextrq $1, x, gi2; + +#define F_tail(a, x, gi1, gi2, op1, op2, op3) \ + lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \ + lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \ \ - lookup_32bit(RGI2, RFS1, op1, op2, op3); \ - shrq $16, RGI2; \ - lookup_32bit(RGI2, RFS3, op1, op2, op3); \ - shlq $32, RFS3; \ - orq RFS1, RFS3; \ + lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \ + shlq $32, RFS2; \ + orq RFS1, RFS2; \ + lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \ + shlq $32, RFS1; \ + orq RFS1, RFS3; \ \ - vmovq RFS2, x; \ + vmovq RFS2, x; \ vpinsrq $1, RFS3, x, x; -#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl) -#define F2(b, x) F(b, x, vpxor, subl, addl, xorl) -#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl) +#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \ + F_head(b1, RX, RGI1, RGI2, op0); \ + F_head(b2, RX, RGI3, RGI4, op0); \ + \ + F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \ + F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \ + \ + vpxor a1, RX, a1; \ + vpxor a2, RTMP, a2; + +#define F1_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl) +#define F2_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl) +#define F3_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl) -#define qop(in, out, x, f) \ - F ## f(in ## 1, x); \ - vpxor out ## 1, x, out ## 1; \ - F ## f(in ## 2, x); \ - vpxor out ## 2, x, out ## 2; \ +#define qop(in, out, f) \ + F ## f ## _2(out ## 1, in ## 1, out ## 2, in ## 2); + +#define get_round_keys(nn) \ + vbroadcastss (km+(4*(nn)))(CTX), RKM; \ + vpand R1ST, RKR, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + vpsrldq $1, RKR, RKR; #define Q(n) \ - vbroadcastss (km+(4*(4*n+0)))(CTX), RKM; \ - vpinsrb $0, (kr+(4*n+0))(CTX), RKRF, RKRF; \ - vpsubq RKRF, R32, RKRR; \ - qop(RD, RC, RX, 1); \ + get_round_keys(4*n+0); \ + qop(RD, RC, 1); \ \ - vbroadcastss (km+(4*(4*n+1)))(CTX), RKM; \ - vpinsrb $0, (kr+(4*n+1))(CTX), RKRF, RKRF; \ - vpsubq RKRF, R32, RKRR; \ - qop(RC, RB, RX, 2); \ + get_round_keys(4*n+1); \ + qop(RC, RB, 2); \ \ - vbroadcastss (km+(4*(4*n+2)))(CTX), RKM; \ - vpinsrb $0, (kr+(4*n+2))(CTX), RKRF, RKRF; \ - vpsubq RKRF, R32, RKRR; \ - qop(RB, RA, RX, 3); \ + get_round_keys(4*n+2); \ + qop(RB, RA, 3); \ \ - vbroadcastss (km+(4*(4*n+3)))(CTX), RKM; \ - vpinsrb $0, (kr+(4*n+3))(CTX), RKRF, RKRF; \ - vpsubq RKRF, R32, RKRR; \ - qop(RA, RD, RX, 1); + get_round_keys(4*n+3); \ + qop(RA, RD, 1); #define QBAR(n) \ - vbroadcastss (km+(4*(4*n+3)))(CTX), RKM; \ - vpinsrb $0, (kr+(4*n+3))(CTX), RKRF, RKRF; \ - vpsubq RKRF, R32, RKRR; \ - qop(RA, RD, RX, 1); \ + get_round_keys(4*n+3); \ + qop(RA, RD, 1); \ \ - vbroadcastss (km+(4*(4*n+2)))(CTX), RKM; \ - vpinsrb $0, (kr+(4*n+2))(CTX), RKRF, RKRF; \ - vpsubq RKRF, R32, RKRR; \ - qop(RB, RA, RX, 3); \ + get_round_keys(4*n+2); \ + qop(RB, RA, 3); \ \ - vbroadcastss (km+(4*(4*n+1)))(CTX), RKM; \ - vpinsrb $0, (kr+(4*n+1))(CTX), RKRF, RKRF; \ - vpsubq RKRF, R32, RKRR; \ - qop(RC, RB, RX, 2); \ + get_round_keys(4*n+1); \ + qop(RC, RB, 2); \ \ - vbroadcastss (km+(4*(4*n+0)))(CTX), RKM; \ - vpinsrb $0, (kr+(4*n+0))(CTX), RKRF, RKRF; \ - vpsubq RKRF, R32, RKRR; \ - qop(RD, RC, RX, 1); + get_round_keys(4*n+0); \ + qop(RD, RC, 1); + +#define shuffle(mask) \ + vpshufb mask, RKR, RKR; +#define preload_rkr(n, do_mask, mask) \ + vbroadcastss .L16_mask, RKR; \ + /* add 16-bit rotation to key rotations (mod 32) */ \ + vpxor (kr+n*16)(CTX), RKR, RKR; \ + do_mask(mask); #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ vpunpckldq x1, x0, t0; \ @@ -185,37 +205,37 @@ vpunpcklqdq x3, t2, x2; \ vpunpckhqdq x3, t2, x3; -#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ +#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2, rmask) \ vmovdqu (0*4*4)(in), x0; \ vmovdqu (1*4*4)(in), x1; \ vmovdqu (2*4*4)(in), x2; \ vmovdqu (3*4*4)(in), x3; \ - vpshufb RMASK, x0, x0; \ - vpshufb RMASK, x1, x1; \ - vpshufb RMASK, x2, x2; \ - vpshufb RMASK, x3, x3; \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpshufb rmask, x2, x2; \ + vpshufb rmask, x3, x3; \ \ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) -#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ +#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ \ - vpshufb RMASK, x0, x0; \ - vpshufb RMASK, x1, x1; \ - vpshufb RMASK, x2, x2; \ - vpshufb RMASK, x3, x3; \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpshufb rmask, x2, x2; \ + vpshufb rmask, x3, x3; \ vmovdqu x0, (0*4*4)(out); \ vmovdqu x1, (1*4*4)(out); \ vmovdqu x2, (2*4*4)(out); \ vmovdqu x3, (3*4*4)(out); -#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ +#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ \ - vpshufb RMASK, x0, x0; \ - vpshufb RMASK, x1, x1; \ - vpshufb RMASK, x2, x2; \ - vpshufb RMASK, x3, x3; \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpshufb rmask, x2, x2; \ + vpshufb rmask, x3, x3; \ vpxor (0*4*4)(out), x0, x0; \ vmovdqu x0, (0*4*4)(out); \ vpxor (1*4*4)(out), x1, x1; \ @@ -225,11 +245,29 @@ vpxor (3*4*4)(out), x3, x3; \ vmovdqu x3, (3*4*4)(out); +.data + .align 16 .Lbswap_mask: .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.Lrkr_enc_Q_Q_QBAR_QBAR: + .byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12 +.Lrkr_enc_QBAR_QBAR_QBAR_QBAR: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.Lrkr_dec_Q_Q_Q_Q: + .byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 +.Lrkr_dec_Q_Q_QBAR_QBAR: + .byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0 +.Lrkr_dec_QBAR_QBAR_QBAR_QBAR: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.L16_mask: + .byte 16, 16, 16, 16 .L32_mask: - .byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0 + .byte 32, 0, 0, 0 +.Lfirst_mask: + .byte 0x1f, 0, 0, 0 + +.text .align 16 .global __cast6_enc_blk_8way @@ -243,28 +281,31 @@ __cast6_enc_blk_8way: * %rcx: bool, if true: xor output */ + pushq %rbp; pushq %rbx; pushq %rcx; - vmovdqu .Lbswap_mask, RMASK; - vmovdqu .L32_mask, R32; - vpxor RKRF, RKRF, RKRF; + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; leaq (4*4*4)(%rdx), %rax; - inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM); - inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM); + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); - xorq RID1, RID1; - xorq RID2, RID2; + movq %rsi, %r11; + preload_rkr(0, dummy, none); Q(0); Q(1); Q(2); Q(3); + preload_rkr(1, shuffle, .Lrkr_enc_Q_Q_QBAR_QBAR); Q(4); Q(5); QBAR(6); QBAR(7); + preload_rkr(2, shuffle, .Lrkr_enc_QBAR_QBAR_QBAR_QBAR); QBAR(8); QBAR(9); QBAR(10); @@ -272,20 +313,22 @@ __cast6_enc_blk_8way: popq %rcx; popq %rbx; + popq %rbp; - leaq (4*4*4)(%rsi), %rax; + vmovdqa .Lbswap_mask, RKM; + leaq (4*4*4)(%r11), %rax; testb %cl, %cl; jnz __enc_xor8; - outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM); - outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM); + outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); ret; __enc_xor8: - outunpack_xor_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM); - outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM); + outunpack_xor_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); ret; @@ -300,36 +343,41 @@ cast6_dec_blk_8way: * %rdx: src */ + pushq %rbp; pushq %rbx; - vmovdqu .Lbswap_mask, RMASK; - vmovdqu .L32_mask, R32; - vpxor RKRF, RKRF, RKRF; + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; leaq (4*4*4)(%rdx), %rax; - inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM); - inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM); + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); - xorq RID1, RID1; - xorq RID2, RID2; + movq %rsi, %r11; + preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q); Q(11); Q(10); Q(9); Q(8); + preload_rkr(1, shuffle, .Lrkr_dec_Q_Q_QBAR_QBAR); Q(7); Q(6); QBAR(5); QBAR(4); + preload_rkr(0, shuffle, .Lrkr_dec_QBAR_QBAR_QBAR_QBAR); QBAR(3); QBAR(2); QBAR(1); QBAR(0); popq %rbx; + popq %rbp; - leaq (4*4*4)(%rsi), %rax; - outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM); - outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM); + vmovdqa .Lbswap_mask, RKM; + leaq (4*4*4)(%r11), %rax; + outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); ret; -- cgit v1.2.3-70-g09d2 From 1ffb72a39a92c1a03b3c732280e924c02c509cd3 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Tue, 28 Aug 2012 16:46:59 +0300 Subject: crypto: camellia-x86_64 - fix sparse warnings (constant is so big) Fix "constant 0xXXXXXXXXXXXXXXXX is so big it's unsigned long" sparse warnings. Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia_glue.c | 1376 +++++++++++++++++++-------------------- 1 file changed, 688 insertions(+), 688 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 7a74d7bb326..42ffd2bbab5 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -92,715 +92,715 @@ static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) /* camellia sboxes */ const u64 camellia_sp10011110[256] = { - 0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00, - 0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700, - 0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400, - 0x8500008585858500, 0x5700005757575700, 0x3500003535353500, - 0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00, - 0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00, - 0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500, - 0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100, - 0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00, - 0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500, - 0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600, - 0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00, - 0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00, - 0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000, - 0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00, - 0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00, - 0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900, - 0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700, - 0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900, - 0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600, - 0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00, - 0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00, - 0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00, - 0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00, - 0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00, - 0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100, - 0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00, - 0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200, - 0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600, - 0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700, - 0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100, - 0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700, - 0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00, - 0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00, - 0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200, - 0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600, - 0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200, - 0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400, - 0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300, - 0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100, - 0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800, - 0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00, - 0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00, - 0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00, - 0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200, - 0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00, - 0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000, - 0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200, - 0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000, - 0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700, - 0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00, - 0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00, - 0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00, - 0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00, - 0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00, - 0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300, - 0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600, - 0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00, - 0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200, - 0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600, - 0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600, - 0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600, - 0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300, - 0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00, - 0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700, - 0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00, - 0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900, - 0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600, - 0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400, - 0x5900005959595900, 0x7800007878787800, 0x9800009898989800, - 0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700, - 0x4600004646464600, 0x7100007171717100, 0xba0000babababa00, - 0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00, - 0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200, - 0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200, - 0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500, - 0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00, - 0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900, - 0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00, - 0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400, - 0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300, - 0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900, - 0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500, - 0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400, - 0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000, - 0x9e00009e9e9e9e00, + 0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL, + 0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL, + 0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL, + 0x8500008585858500ULL, 0x5700005757575700ULL, 0x3500003535353500ULL, + 0xea0000eaeaeaea00ULL, 0x0c00000c0c0c0c00ULL, 0xae0000aeaeaeae00ULL, + 0x4100004141414100ULL, 0x2300002323232300ULL, 0xef0000efefefef00ULL, + 0x6b00006b6b6b6b00ULL, 0x9300009393939300ULL, 0x4500004545454500ULL, + 0x1900001919191900ULL, 0xa50000a5a5a5a500ULL, 0x2100002121212100ULL, + 0xed0000edededed00ULL, 0x0e00000e0e0e0e00ULL, 0x4f00004f4f4f4f00ULL, + 0x4e00004e4e4e4e00ULL, 0x1d00001d1d1d1d00ULL, 0x6500006565656500ULL, + 0x9200009292929200ULL, 0xbd0000bdbdbdbd00ULL, 0x8600008686868600ULL, + 0xb80000b8b8b8b800ULL, 0xaf0000afafafaf00ULL, 0x8f00008f8f8f8f00ULL, + 0x7c00007c7c7c7c00ULL, 0xeb0000ebebebeb00ULL, 0x1f00001f1f1f1f00ULL, + 0xce0000cececece00ULL, 0x3e00003e3e3e3e00ULL, 0x3000003030303000ULL, + 0xdc0000dcdcdcdc00ULL, 0x5f00005f5f5f5f00ULL, 0x5e00005e5e5e5e00ULL, + 0xc50000c5c5c5c500ULL, 0x0b00000b0b0b0b00ULL, 0x1a00001a1a1a1a00ULL, + 0xa60000a6a6a6a600ULL, 0xe10000e1e1e1e100ULL, 0x3900003939393900ULL, + 0xca0000cacacaca00ULL, 0xd50000d5d5d5d500ULL, 0x4700004747474700ULL, + 0x5d00005d5d5d5d00ULL, 0x3d00003d3d3d3d00ULL, 0xd90000d9d9d9d900ULL, + 0x0100000101010100ULL, 0x5a00005a5a5a5a00ULL, 0xd60000d6d6d6d600ULL, + 0x5100005151515100ULL, 0x5600005656565600ULL, 0x6c00006c6c6c6c00ULL, + 0x4d00004d4d4d4d00ULL, 0x8b00008b8b8b8b00ULL, 0x0d00000d0d0d0d00ULL, + 0x9a00009a9a9a9a00ULL, 0x6600006666666600ULL, 0xfb0000fbfbfbfb00ULL, + 0xcc0000cccccccc00ULL, 0xb00000b0b0b0b000ULL, 0x2d00002d2d2d2d00ULL, + 0x7400007474747400ULL, 0x1200001212121200ULL, 0x2b00002b2b2b2b00ULL, + 0x2000002020202000ULL, 0xf00000f0f0f0f000ULL, 0xb10000b1b1b1b100ULL, + 0x8400008484848400ULL, 0x9900009999999900ULL, 0xdf0000dfdfdfdf00ULL, + 0x4c00004c4c4c4c00ULL, 0xcb0000cbcbcbcb00ULL, 0xc20000c2c2c2c200ULL, + 0x3400003434343400ULL, 0x7e00007e7e7e7e00ULL, 0x7600007676767600ULL, + 0x0500000505050500ULL, 0x6d00006d6d6d6d00ULL, 0xb70000b7b7b7b700ULL, + 0xa90000a9a9a9a900ULL, 0x3100003131313100ULL, 0xd10000d1d1d1d100ULL, + 0x1700001717171700ULL, 0x0400000404040400ULL, 0xd70000d7d7d7d700ULL, + 0x1400001414141400ULL, 0x5800005858585800ULL, 0x3a00003a3a3a3a00ULL, + 0x6100006161616100ULL, 0xde0000dededede00ULL, 0x1b00001b1b1b1b00ULL, + 0x1100001111111100ULL, 0x1c00001c1c1c1c00ULL, 0x3200003232323200ULL, + 0x0f00000f0f0f0f00ULL, 0x9c00009c9c9c9c00ULL, 0x1600001616161600ULL, + 0x5300005353535300ULL, 0x1800001818181800ULL, 0xf20000f2f2f2f200ULL, + 0x2200002222222200ULL, 0xfe0000fefefefe00ULL, 0x4400004444444400ULL, + 0xcf0000cfcfcfcf00ULL, 0xb20000b2b2b2b200ULL, 0xc30000c3c3c3c300ULL, + 0xb50000b5b5b5b500ULL, 0x7a00007a7a7a7a00ULL, 0x9100009191919100ULL, + 0x2400002424242400ULL, 0x0800000808080800ULL, 0xe80000e8e8e8e800ULL, + 0xa80000a8a8a8a800ULL, 0x6000006060606000ULL, 0xfc0000fcfcfcfc00ULL, + 0x6900006969696900ULL, 0x5000005050505000ULL, 0xaa0000aaaaaaaa00ULL, + 0xd00000d0d0d0d000ULL, 0xa00000a0a0a0a000ULL, 0x7d00007d7d7d7d00ULL, + 0xa10000a1a1a1a100ULL, 0x8900008989898900ULL, 0x6200006262626200ULL, + 0x9700009797979700ULL, 0x5400005454545400ULL, 0x5b00005b5b5b5b00ULL, + 0x1e00001e1e1e1e00ULL, 0x9500009595959500ULL, 0xe00000e0e0e0e000ULL, + 0xff0000ffffffff00ULL, 0x6400006464646400ULL, 0xd20000d2d2d2d200ULL, + 0x1000001010101000ULL, 0xc40000c4c4c4c400ULL, 0x0000000000000000ULL, + 0x4800004848484800ULL, 0xa30000a3a3a3a300ULL, 0xf70000f7f7f7f700ULL, + 0x7500007575757500ULL, 0xdb0000dbdbdbdb00ULL, 0x8a00008a8a8a8a00ULL, + 0x0300000303030300ULL, 0xe60000e6e6e6e600ULL, 0xda0000dadadada00ULL, + 0x0900000909090900ULL, 0x3f00003f3f3f3f00ULL, 0xdd0000dddddddd00ULL, + 0x9400009494949400ULL, 0x8700008787878700ULL, 0x5c00005c5c5c5c00ULL, + 0x8300008383838300ULL, 0x0200000202020200ULL, 0xcd0000cdcdcdcd00ULL, + 0x4a00004a4a4a4a00ULL, 0x9000009090909000ULL, 0x3300003333333300ULL, + 0x7300007373737300ULL, 0x6700006767676700ULL, 0xf60000f6f6f6f600ULL, + 0xf30000f3f3f3f300ULL, 0x9d00009d9d9d9d00ULL, 0x7f00007f7f7f7f00ULL, + 0xbf0000bfbfbfbf00ULL, 0xe20000e2e2e2e200ULL, 0x5200005252525200ULL, + 0x9b00009b9b9b9b00ULL, 0xd80000d8d8d8d800ULL, 0x2600002626262600ULL, + 0xc80000c8c8c8c800ULL, 0x3700003737373700ULL, 0xc60000c6c6c6c600ULL, + 0x3b00003b3b3b3b00ULL, 0x8100008181818100ULL, 0x9600009696969600ULL, + 0x6f00006f6f6f6f00ULL, 0x4b00004b4b4b4b00ULL, 0x1300001313131300ULL, + 0xbe0000bebebebe00ULL, 0x6300006363636300ULL, 0x2e00002e2e2e2e00ULL, + 0xe90000e9e9e9e900ULL, 0x7900007979797900ULL, 0xa70000a7a7a7a700ULL, + 0x8c00008c8c8c8c00ULL, 0x9f00009f9f9f9f00ULL, 0x6e00006e6e6e6e00ULL, + 0xbc0000bcbcbcbc00ULL, 0x8e00008e8e8e8e00ULL, 0x2900002929292900ULL, + 0xf50000f5f5f5f500ULL, 0xf90000f9f9f9f900ULL, 0xb60000b6b6b6b600ULL, + 0x2f00002f2f2f2f00ULL, 0xfd0000fdfdfdfd00ULL, 0xb40000b4b4b4b400ULL, + 0x5900005959595900ULL, 0x7800007878787800ULL, 0x9800009898989800ULL, + 0x0600000606060600ULL, 0x6a00006a6a6a6a00ULL, 0xe70000e7e7e7e700ULL, + 0x4600004646464600ULL, 0x7100007171717100ULL, 0xba0000babababa00ULL, + 0xd40000d4d4d4d400ULL, 0x2500002525252500ULL, 0xab0000abababab00ULL, + 0x4200004242424200ULL, 0x8800008888888800ULL, 0xa20000a2a2a2a200ULL, + 0x8d00008d8d8d8d00ULL, 0xfa0000fafafafa00ULL, 0x7200007272727200ULL, + 0x0700000707070700ULL, 0xb90000b9b9b9b900ULL, 0x5500005555555500ULL, + 0xf80000f8f8f8f800ULL, 0xee0000eeeeeeee00ULL, 0xac0000acacacac00ULL, + 0x0a00000a0a0a0a00ULL, 0x3600003636363600ULL, 0x4900004949494900ULL, + 0x2a00002a2a2a2a00ULL, 0x6800006868686800ULL, 0x3c00003c3c3c3c00ULL, + 0x3800003838383800ULL, 0xf10000f1f1f1f100ULL, 0xa40000a4a4a4a400ULL, + 0x4000004040404000ULL, 0x2800002828282800ULL, 0xd30000d3d3d3d300ULL, + 0x7b00007b7b7b7b00ULL, 0xbb0000bbbbbbbb00ULL, 0xc90000c9c9c9c900ULL, + 0x4300004343434300ULL, 0xc10000c1c1c1c100ULL, 0x1500001515151500ULL, + 0xe30000e3e3e3e300ULL, 0xad0000adadadad00ULL, 0xf40000f4f4f4f400ULL, + 0x7700007777777700ULL, 0xc70000c7c7c7c700ULL, 0x8000008080808000ULL, + 0x9e00009e9e9e9e00ULL, }; const u64 camellia_sp22000222[256] = { - 0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858, - 0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e, - 0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9, - 0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a, - 0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d, - 0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf, - 0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a, - 0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242, - 0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e, - 0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca, - 0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d, - 0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f, - 0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e, - 0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060, - 0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc, - 0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434, - 0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272, - 0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e, - 0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3, - 0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad, - 0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8, - 0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a, - 0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7, - 0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a, - 0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656, - 0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363, - 0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf, - 0x9898000000989898, 0x9797000000979797, 0x8585000000858585, - 0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec, - 0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f, - 0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3, - 0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf, - 0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474, - 0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636, - 0x2222000000222222, 0x3838000000383838, 0x6464000000646464, - 0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c, - 0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5, - 0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888, - 0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787, - 0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323, - 0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1, - 0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9, - 0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555, - 0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa, - 0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4, - 0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6, - 0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1, - 0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5, - 0x2020000000202020, 0x8989000000898989, 0x0000000000000000, - 0x9090000000909090, 0x4747000000474747, 0xefef000000efefef, - 0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515, - 0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5, - 0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb, - 0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8, - 0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b, - 0x9494000000949494, 0x2121000000212121, 0x6666000000666666, - 0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed, - 0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe, - 0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4, - 0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c, - 0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d, - 0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d, - 0xdede000000dedede, 0x9696000000969696, 0x2626000000262626, - 0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c, - 0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f, - 0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc, - 0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252, - 0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d, - 0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969, - 0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131, - 0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf, - 0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575, - 0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757, - 0x8484000000848484, 0x1111000000111111, 0x4545000000454545, - 0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4, - 0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa, - 0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959, - 0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292, - 0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878, - 0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949, - 0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7, - 0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393, - 0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a, - 0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9, - 0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101, - 0x3d3d0000003d3d3d, + 0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL, + 0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL, + 0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL, + 0x0b0b0000000b0b0bULL, 0xaeae000000aeaeaeULL, 0x6a6a0000006a6a6aULL, + 0xd5d5000000d5d5d5ULL, 0x1818000000181818ULL, 0x5d5d0000005d5d5dULL, + 0x8282000000828282ULL, 0x4646000000464646ULL, 0xdfdf000000dfdfdfULL, + 0xd6d6000000d6d6d6ULL, 0x2727000000272727ULL, 0x8a8a0000008a8a8aULL, + 0x3232000000323232ULL, 0x4b4b0000004b4b4bULL, 0x4242000000424242ULL, + 0xdbdb000000dbdbdbULL, 0x1c1c0000001c1c1cULL, 0x9e9e0000009e9e9eULL, + 0x9c9c0000009c9c9cULL, 0x3a3a0000003a3a3aULL, 0xcaca000000cacacaULL, + 0x2525000000252525ULL, 0x7b7b0000007b7b7bULL, 0x0d0d0000000d0d0dULL, + 0x7171000000717171ULL, 0x5f5f0000005f5f5fULL, 0x1f1f0000001f1f1fULL, + 0xf8f8000000f8f8f8ULL, 0xd7d7000000d7d7d7ULL, 0x3e3e0000003e3e3eULL, + 0x9d9d0000009d9d9dULL, 0x7c7c0000007c7c7cULL, 0x6060000000606060ULL, + 0xb9b9000000b9b9b9ULL, 0xbebe000000bebebeULL, 0xbcbc000000bcbcbcULL, + 0x8b8b0000008b8b8bULL, 0x1616000000161616ULL, 0x3434000000343434ULL, + 0x4d4d0000004d4d4dULL, 0xc3c3000000c3c3c3ULL, 0x7272000000727272ULL, + 0x9595000000959595ULL, 0xabab000000abababULL, 0x8e8e0000008e8e8eULL, + 0xbaba000000bababaULL, 0x7a7a0000007a7a7aULL, 0xb3b3000000b3b3b3ULL, + 0x0202000000020202ULL, 0xb4b4000000b4b4b4ULL, 0xadad000000adadadULL, + 0xa2a2000000a2a2a2ULL, 0xacac000000acacacULL, 0xd8d8000000d8d8d8ULL, + 0x9a9a0000009a9a9aULL, 0x1717000000171717ULL, 0x1a1a0000001a1a1aULL, + 0x3535000000353535ULL, 0xcccc000000ccccccULL, 0xf7f7000000f7f7f7ULL, + 0x9999000000999999ULL, 0x6161000000616161ULL, 0x5a5a0000005a5a5aULL, + 0xe8e8000000e8e8e8ULL, 0x2424000000242424ULL, 0x5656000000565656ULL, + 0x4040000000404040ULL, 0xe1e1000000e1e1e1ULL, 0x6363000000636363ULL, + 0x0909000000090909ULL, 0x3333000000333333ULL, 0xbfbf000000bfbfbfULL, + 0x9898000000989898ULL, 0x9797000000979797ULL, 0x8585000000858585ULL, + 0x6868000000686868ULL, 0xfcfc000000fcfcfcULL, 0xecec000000ecececULL, + 0x0a0a0000000a0a0aULL, 0xdada000000dadadaULL, 0x6f6f0000006f6f6fULL, + 0x5353000000535353ULL, 0x6262000000626262ULL, 0xa3a3000000a3a3a3ULL, + 0x2e2e0000002e2e2eULL, 0x0808000000080808ULL, 0xafaf000000afafafULL, + 0x2828000000282828ULL, 0xb0b0000000b0b0b0ULL, 0x7474000000747474ULL, + 0xc2c2000000c2c2c2ULL, 0xbdbd000000bdbdbdULL, 0x3636000000363636ULL, + 0x2222000000222222ULL, 0x3838000000383838ULL, 0x6464000000646464ULL, + 0x1e1e0000001e1e1eULL, 0x3939000000393939ULL, 0x2c2c0000002c2c2cULL, + 0xa6a6000000a6a6a6ULL, 0x3030000000303030ULL, 0xe5e5000000e5e5e5ULL, + 0x4444000000444444ULL, 0xfdfd000000fdfdfdULL, 0x8888000000888888ULL, + 0x9f9f0000009f9f9fULL, 0x6565000000656565ULL, 0x8787000000878787ULL, + 0x6b6b0000006b6b6bULL, 0xf4f4000000f4f4f4ULL, 0x2323000000232323ULL, + 0x4848000000484848ULL, 0x1010000000101010ULL, 0xd1d1000000d1d1d1ULL, + 0x5151000000515151ULL, 0xc0c0000000c0c0c0ULL, 0xf9f9000000f9f9f9ULL, + 0xd2d2000000d2d2d2ULL, 0xa0a0000000a0a0a0ULL, 0x5555000000555555ULL, + 0xa1a1000000a1a1a1ULL, 0x4141000000414141ULL, 0xfafa000000fafafaULL, + 0x4343000000434343ULL, 0x1313000000131313ULL, 0xc4c4000000c4c4c4ULL, + 0x2f2f0000002f2f2fULL, 0xa8a8000000a8a8a8ULL, 0xb6b6000000b6b6b6ULL, + 0x3c3c0000003c3c3cULL, 0x2b2b0000002b2b2bULL, 0xc1c1000000c1c1c1ULL, + 0xffff000000ffffffULL, 0xc8c8000000c8c8c8ULL, 0xa5a5000000a5a5a5ULL, + 0x2020000000202020ULL, 0x8989000000898989ULL, 0x0000000000000000ULL, + 0x9090000000909090ULL, 0x4747000000474747ULL, 0xefef000000efefefULL, + 0xeaea000000eaeaeaULL, 0xb7b7000000b7b7b7ULL, 0x1515000000151515ULL, + 0x0606000000060606ULL, 0xcdcd000000cdcdcdULL, 0xb5b5000000b5b5b5ULL, + 0x1212000000121212ULL, 0x7e7e0000007e7e7eULL, 0xbbbb000000bbbbbbULL, + 0x2929000000292929ULL, 0x0f0f0000000f0f0fULL, 0xb8b8000000b8b8b8ULL, + 0x0707000000070707ULL, 0x0404000000040404ULL, 0x9b9b0000009b9b9bULL, + 0x9494000000949494ULL, 0x2121000000212121ULL, 0x6666000000666666ULL, + 0xe6e6000000e6e6e6ULL, 0xcece000000cececeULL, 0xeded000000edededULL, + 0xe7e7000000e7e7e7ULL, 0x3b3b0000003b3b3bULL, 0xfefe000000fefefeULL, + 0x7f7f0000007f7f7fULL, 0xc5c5000000c5c5c5ULL, 0xa4a4000000a4a4a4ULL, + 0x3737000000373737ULL, 0xb1b1000000b1b1b1ULL, 0x4c4c0000004c4c4cULL, + 0x9191000000919191ULL, 0x6e6e0000006e6e6eULL, 0x8d8d0000008d8d8dULL, + 0x7676000000767676ULL, 0x0303000000030303ULL, 0x2d2d0000002d2d2dULL, + 0xdede000000dededeULL, 0x9696000000969696ULL, 0x2626000000262626ULL, + 0x7d7d0000007d7d7dULL, 0xc6c6000000c6c6c6ULL, 0x5c5c0000005c5c5cULL, + 0xd3d3000000d3d3d3ULL, 0xf2f2000000f2f2f2ULL, 0x4f4f0000004f4f4fULL, + 0x1919000000191919ULL, 0x3f3f0000003f3f3fULL, 0xdcdc000000dcdcdcULL, + 0x7979000000797979ULL, 0x1d1d0000001d1d1dULL, 0x5252000000525252ULL, + 0xebeb000000ebebebULL, 0xf3f3000000f3f3f3ULL, 0x6d6d0000006d6d6dULL, + 0x5e5e0000005e5e5eULL, 0xfbfb000000fbfbfbULL, 0x6969000000696969ULL, + 0xb2b2000000b2b2b2ULL, 0xf0f0000000f0f0f0ULL, 0x3131000000313131ULL, + 0x0c0c0000000c0c0cULL, 0xd4d4000000d4d4d4ULL, 0xcfcf000000cfcfcfULL, + 0x8c8c0000008c8c8cULL, 0xe2e2000000e2e2e2ULL, 0x7575000000757575ULL, + 0xa9a9000000a9a9a9ULL, 0x4a4a0000004a4a4aULL, 0x5757000000575757ULL, + 0x8484000000848484ULL, 0x1111000000111111ULL, 0x4545000000454545ULL, + 0x1b1b0000001b1b1bULL, 0xf5f5000000f5f5f5ULL, 0xe4e4000000e4e4e4ULL, + 0x0e0e0000000e0e0eULL, 0x7373000000737373ULL, 0xaaaa000000aaaaaaULL, + 0xf1f1000000f1f1f1ULL, 0xdddd000000ddddddULL, 0x5959000000595959ULL, + 0x1414000000141414ULL, 0x6c6c0000006c6c6cULL, 0x9292000000929292ULL, + 0x5454000000545454ULL, 0xd0d0000000d0d0d0ULL, 0x7878000000787878ULL, + 0x7070000000707070ULL, 0xe3e3000000e3e3e3ULL, 0x4949000000494949ULL, + 0x8080000000808080ULL, 0x5050000000505050ULL, 0xa7a7000000a7a7a7ULL, + 0xf6f6000000f6f6f6ULL, 0x7777000000777777ULL, 0x9393000000939393ULL, + 0x8686000000868686ULL, 0x8383000000838383ULL, 0x2a2a0000002a2a2aULL, + 0xc7c7000000c7c7c7ULL, 0x5b5b0000005b5b5bULL, 0xe9e9000000e9e9e9ULL, + 0xeeee000000eeeeeeULL, 0x8f8f0000008f8f8fULL, 0x0101000000010101ULL, + 0x3d3d0000003d3d3dULL, }; const u64 camellia_sp03303033[256] = { - 0x0038380038003838, 0x0041410041004141, 0x0016160016001616, - 0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393, - 0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272, - 0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a, - 0x0075750075007575, 0x0006060006000606, 0x0057570057005757, - 0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7, - 0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2, - 0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090, - 0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7, - 0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2, - 0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343, - 0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7, - 0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f, - 0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818, - 0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f, - 0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d, - 0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c, - 0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3, - 0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec, - 0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b, - 0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636, - 0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686, - 0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd, - 0x0066660066006666, 0x0058580058005858, 0x0096960096009696, - 0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595, - 0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8, - 0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef, - 0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161, - 0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b, - 0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb, - 0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8, - 0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb, - 0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d, - 0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d, - 0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919, - 0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b, - 0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979, - 0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222, - 0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1, - 0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8, - 0x0012120012001212, 0x0004040004000404, 0x0074740074007474, - 0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e, - 0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555, - 0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe, - 0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131, - 0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad, - 0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070, - 0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969, - 0x0008080008000808, 0x0062620062006262, 0x0000000000000000, - 0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb, - 0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545, - 0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d, - 0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee, - 0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e, - 0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6, - 0x0025250025002525, 0x0048480048004848, 0x0099990099009999, - 0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b, - 0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf, - 0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929, - 0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313, - 0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363, - 0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b, - 0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989, - 0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717, - 0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3, - 0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737, - 0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494, - 0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b, - 0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a, - 0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c, - 0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3, - 0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d, - 0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5, - 0x0021210021002121, 0x0044440044004444, 0x0051510051005151, - 0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939, - 0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa, - 0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656, - 0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4, - 0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e, - 0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252, - 0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9, - 0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4, - 0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a, - 0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a, - 0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040, - 0x004f4f004f004f4f, + 0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL, + 0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL, + 0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL, + 0x00c2c200c200c2c2ULL, 0x00abab00ab00ababULL, 0x009a9a009a009a9aULL, + 0x0075750075007575ULL, 0x0006060006000606ULL, 0x0057570057005757ULL, + 0x00a0a000a000a0a0ULL, 0x0091910091009191ULL, 0x00f7f700f700f7f7ULL, + 0x00b5b500b500b5b5ULL, 0x00c9c900c900c9c9ULL, 0x00a2a200a200a2a2ULL, + 0x008c8c008c008c8cULL, 0x00d2d200d200d2d2ULL, 0x0090900090009090ULL, + 0x00f6f600f600f6f6ULL, 0x0007070007000707ULL, 0x00a7a700a700a7a7ULL, + 0x0027270027002727ULL, 0x008e8e008e008e8eULL, 0x00b2b200b200b2b2ULL, + 0x0049490049004949ULL, 0x00dede00de00dedeULL, 0x0043430043004343ULL, + 0x005c5c005c005c5cULL, 0x00d7d700d700d7d7ULL, 0x00c7c700c700c7c7ULL, + 0x003e3e003e003e3eULL, 0x00f5f500f500f5f5ULL, 0x008f8f008f008f8fULL, + 0x0067670067006767ULL, 0x001f1f001f001f1fULL, 0x0018180018001818ULL, + 0x006e6e006e006e6eULL, 0x00afaf00af00afafULL, 0x002f2f002f002f2fULL, + 0x00e2e200e200e2e2ULL, 0x0085850085008585ULL, 0x000d0d000d000d0dULL, + 0x0053530053005353ULL, 0x00f0f000f000f0f0ULL, 0x009c9c009c009c9cULL, + 0x0065650065006565ULL, 0x00eaea00ea00eaeaULL, 0x00a3a300a300a3a3ULL, + 0x00aeae00ae00aeaeULL, 0x009e9e009e009e9eULL, 0x00ecec00ec00ececULL, + 0x0080800080008080ULL, 0x002d2d002d002d2dULL, 0x006b6b006b006b6bULL, + 0x00a8a800a800a8a8ULL, 0x002b2b002b002b2bULL, 0x0036360036003636ULL, + 0x00a6a600a600a6a6ULL, 0x00c5c500c500c5c5ULL, 0x0086860086008686ULL, + 0x004d4d004d004d4dULL, 0x0033330033003333ULL, 0x00fdfd00fd00fdfdULL, + 0x0066660066006666ULL, 0x0058580058005858ULL, 0x0096960096009696ULL, + 0x003a3a003a003a3aULL, 0x0009090009000909ULL, 0x0095950095009595ULL, + 0x0010100010001010ULL, 0x0078780078007878ULL, 0x00d8d800d800d8d8ULL, + 0x0042420042004242ULL, 0x00cccc00cc00ccccULL, 0x00efef00ef00efefULL, + 0x0026260026002626ULL, 0x00e5e500e500e5e5ULL, 0x0061610061006161ULL, + 0x001a1a001a001a1aULL, 0x003f3f003f003f3fULL, 0x003b3b003b003b3bULL, + 0x0082820082008282ULL, 0x00b6b600b600b6b6ULL, 0x00dbdb00db00dbdbULL, + 0x00d4d400d400d4d4ULL, 0x0098980098009898ULL, 0x00e8e800e800e8e8ULL, + 0x008b8b008b008b8bULL, 0x0002020002000202ULL, 0x00ebeb00eb00ebebULL, + 0x000a0a000a000a0aULL, 0x002c2c002c002c2cULL, 0x001d1d001d001d1dULL, + 0x00b0b000b000b0b0ULL, 0x006f6f006f006f6fULL, 0x008d8d008d008d8dULL, + 0x0088880088008888ULL, 0x000e0e000e000e0eULL, 0x0019190019001919ULL, + 0x0087870087008787ULL, 0x004e4e004e004e4eULL, 0x000b0b000b000b0bULL, + 0x00a9a900a900a9a9ULL, 0x000c0c000c000c0cULL, 0x0079790079007979ULL, + 0x0011110011001111ULL, 0x007f7f007f007f7fULL, 0x0022220022002222ULL, + 0x00e7e700e700e7e7ULL, 0x0059590059005959ULL, 0x00e1e100e100e1e1ULL, + 0x00dada00da00dadaULL, 0x003d3d003d003d3dULL, 0x00c8c800c800c8c8ULL, + 0x0012120012001212ULL, 0x0004040004000404ULL, 0x0074740074007474ULL, + 0x0054540054005454ULL, 0x0030300030003030ULL, 0x007e7e007e007e7eULL, + 0x00b4b400b400b4b4ULL, 0x0028280028002828ULL, 0x0055550055005555ULL, + 0x0068680068006868ULL, 0x0050500050005050ULL, 0x00bebe00be00bebeULL, + 0x00d0d000d000d0d0ULL, 0x00c4c400c400c4c4ULL, 0x0031310031003131ULL, + 0x00cbcb00cb00cbcbULL, 0x002a2a002a002a2aULL, 0x00adad00ad00adadULL, + 0x000f0f000f000f0fULL, 0x00caca00ca00cacaULL, 0x0070700070007070ULL, + 0x00ffff00ff00ffffULL, 0x0032320032003232ULL, 0x0069690069006969ULL, + 0x0008080008000808ULL, 0x0062620062006262ULL, 0x0000000000000000ULL, + 0x0024240024002424ULL, 0x00d1d100d100d1d1ULL, 0x00fbfb00fb00fbfbULL, + 0x00baba00ba00babaULL, 0x00eded00ed00ededULL, 0x0045450045004545ULL, + 0x0081810081008181ULL, 0x0073730073007373ULL, 0x006d6d006d006d6dULL, + 0x0084840084008484ULL, 0x009f9f009f009f9fULL, 0x00eeee00ee00eeeeULL, + 0x004a4a004a004a4aULL, 0x00c3c300c300c3c3ULL, 0x002e2e002e002e2eULL, + 0x00c1c100c100c1c1ULL, 0x0001010001000101ULL, 0x00e6e600e600e6e6ULL, + 0x0025250025002525ULL, 0x0048480048004848ULL, 0x0099990099009999ULL, + 0x00b9b900b900b9b9ULL, 0x00b3b300b300b3b3ULL, 0x007b7b007b007b7bULL, + 0x00f9f900f900f9f9ULL, 0x00cece00ce00ceceULL, 0x00bfbf00bf00bfbfULL, + 0x00dfdf00df00dfdfULL, 0x0071710071007171ULL, 0x0029290029002929ULL, + 0x00cdcd00cd00cdcdULL, 0x006c6c006c006c6cULL, 0x0013130013001313ULL, + 0x0064640064006464ULL, 0x009b9b009b009b9bULL, 0x0063630063006363ULL, + 0x009d9d009d009d9dULL, 0x00c0c000c000c0c0ULL, 0x004b4b004b004b4bULL, + 0x00b7b700b700b7b7ULL, 0x00a5a500a500a5a5ULL, 0x0089890089008989ULL, + 0x005f5f005f005f5fULL, 0x00b1b100b100b1b1ULL, 0x0017170017001717ULL, + 0x00f4f400f400f4f4ULL, 0x00bcbc00bc00bcbcULL, 0x00d3d300d300d3d3ULL, + 0x0046460046004646ULL, 0x00cfcf00cf00cfcfULL, 0x0037370037003737ULL, + 0x005e5e005e005e5eULL, 0x0047470047004747ULL, 0x0094940094009494ULL, + 0x00fafa00fa00fafaULL, 0x00fcfc00fc00fcfcULL, 0x005b5b005b005b5bULL, + 0x0097970097009797ULL, 0x00fefe00fe00fefeULL, 0x005a5a005a005a5aULL, + 0x00acac00ac00acacULL, 0x003c3c003c003c3cULL, 0x004c4c004c004c4cULL, + 0x0003030003000303ULL, 0x0035350035003535ULL, 0x00f3f300f300f3f3ULL, + 0x0023230023002323ULL, 0x00b8b800b800b8b8ULL, 0x005d5d005d005d5dULL, + 0x006a6a006a006a6aULL, 0x0092920092009292ULL, 0x00d5d500d500d5d5ULL, + 0x0021210021002121ULL, 0x0044440044004444ULL, 0x0051510051005151ULL, + 0x00c6c600c600c6c6ULL, 0x007d7d007d007d7dULL, 0x0039390039003939ULL, + 0x0083830083008383ULL, 0x00dcdc00dc00dcdcULL, 0x00aaaa00aa00aaaaULL, + 0x007c7c007c007c7cULL, 0x0077770077007777ULL, 0x0056560056005656ULL, + 0x0005050005000505ULL, 0x001b1b001b001b1bULL, 0x00a4a400a400a4a4ULL, + 0x0015150015001515ULL, 0x0034340034003434ULL, 0x001e1e001e001e1eULL, + 0x001c1c001c001c1cULL, 0x00f8f800f800f8f8ULL, 0x0052520052005252ULL, + 0x0020200020002020ULL, 0x0014140014001414ULL, 0x00e9e900e900e9e9ULL, + 0x00bdbd00bd00bdbdULL, 0x00dddd00dd00ddddULL, 0x00e4e400e400e4e4ULL, + 0x00a1a100a100a1a1ULL, 0x00e0e000e000e0e0ULL, 0x008a8a008a008a8aULL, + 0x00f1f100f100f1f1ULL, 0x00d6d600d600d6d6ULL, 0x007a7a007a007a7aULL, + 0x00bbbb00bb00bbbbULL, 0x00e3e300e300e3e3ULL, 0x0040400040004040ULL, + 0x004f4f004f004f4fULL, }; const u64 camellia_sp00444404[256] = { - 0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3, - 0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057, - 0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023, - 0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5, - 0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d, - 0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af, - 0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e, - 0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b, - 0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5, - 0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a, - 0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b, - 0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0, - 0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0, - 0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb, - 0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d, - 0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004, - 0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de, - 0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c, - 0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe, - 0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a, - 0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060, - 0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0, - 0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054, - 0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064, - 0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3, - 0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6, - 0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087, - 0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090, - 0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d, - 0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8, - 0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081, - 0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063, - 0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f, - 0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9, - 0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078, - 0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071, - 0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088, - 0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9, - 0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036, - 0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1, - 0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb, - 0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad, - 0x0000777777770077, 0x0000808080800080, 0x0000828282820082, - 0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5, - 0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c, - 0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093, - 0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e, - 0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd, - 0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb, - 0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f, - 0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1, - 0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d, - 0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056, - 0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066, - 0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012, - 0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099, - 0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e, - 0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031, - 0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058, - 0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c, - 0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018, - 0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2, - 0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008, - 0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050, - 0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089, - 0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095, - 0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4, - 0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db, - 0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f, - 0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002, - 0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067, - 0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2, - 0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037, - 0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b, - 0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079, - 0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e, - 0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd, - 0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a, - 0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025, - 0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa, - 0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee, - 0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068, - 0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028, - 0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1, - 0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7, - 0x00009e9e9e9e009e, + 0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL, + 0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL, + 0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL, + 0x00006b6b6b6b006bULL, 0x0000454545450045ULL, 0x0000a5a5a5a500a5ULL, + 0x0000edededed00edULL, 0x00004f4f4f4f004fULL, 0x00001d1d1d1d001dULL, + 0x0000929292920092ULL, 0x0000868686860086ULL, 0x0000afafafaf00afULL, + 0x00007c7c7c7c007cULL, 0x00001f1f1f1f001fULL, 0x00003e3e3e3e003eULL, + 0x0000dcdcdcdc00dcULL, 0x00005e5e5e5e005eULL, 0x00000b0b0b0b000bULL, + 0x0000a6a6a6a600a6ULL, 0x0000393939390039ULL, 0x0000d5d5d5d500d5ULL, + 0x00005d5d5d5d005dULL, 0x0000d9d9d9d900d9ULL, 0x00005a5a5a5a005aULL, + 0x0000515151510051ULL, 0x00006c6c6c6c006cULL, 0x00008b8b8b8b008bULL, + 0x00009a9a9a9a009aULL, 0x0000fbfbfbfb00fbULL, 0x0000b0b0b0b000b0ULL, + 0x0000747474740074ULL, 0x00002b2b2b2b002bULL, 0x0000f0f0f0f000f0ULL, + 0x0000848484840084ULL, 0x0000dfdfdfdf00dfULL, 0x0000cbcbcbcb00cbULL, + 0x0000343434340034ULL, 0x0000767676760076ULL, 0x00006d6d6d6d006dULL, + 0x0000a9a9a9a900a9ULL, 0x0000d1d1d1d100d1ULL, 0x0000040404040004ULL, + 0x0000141414140014ULL, 0x00003a3a3a3a003aULL, 0x0000dededede00deULL, + 0x0000111111110011ULL, 0x0000323232320032ULL, 0x00009c9c9c9c009cULL, + 0x0000535353530053ULL, 0x0000f2f2f2f200f2ULL, 0x0000fefefefe00feULL, + 0x0000cfcfcfcf00cfULL, 0x0000c3c3c3c300c3ULL, 0x00007a7a7a7a007aULL, + 0x0000242424240024ULL, 0x0000e8e8e8e800e8ULL, 0x0000606060600060ULL, + 0x0000696969690069ULL, 0x0000aaaaaaaa00aaULL, 0x0000a0a0a0a000a0ULL, + 0x0000a1a1a1a100a1ULL, 0x0000626262620062ULL, 0x0000545454540054ULL, + 0x00001e1e1e1e001eULL, 0x0000e0e0e0e000e0ULL, 0x0000646464640064ULL, + 0x0000101010100010ULL, 0x0000000000000000ULL, 0x0000a3a3a3a300a3ULL, + 0x0000757575750075ULL, 0x00008a8a8a8a008aULL, 0x0000e6e6e6e600e6ULL, + 0x0000090909090009ULL, 0x0000dddddddd00ddULL, 0x0000878787870087ULL, + 0x0000838383830083ULL, 0x0000cdcdcdcd00cdULL, 0x0000909090900090ULL, + 0x0000737373730073ULL, 0x0000f6f6f6f600f6ULL, 0x00009d9d9d9d009dULL, + 0x0000bfbfbfbf00bfULL, 0x0000525252520052ULL, 0x0000d8d8d8d800d8ULL, + 0x0000c8c8c8c800c8ULL, 0x0000c6c6c6c600c6ULL, 0x0000818181810081ULL, + 0x00006f6f6f6f006fULL, 0x0000131313130013ULL, 0x0000636363630063ULL, + 0x0000e9e9e9e900e9ULL, 0x0000a7a7a7a700a7ULL, 0x00009f9f9f9f009fULL, + 0x0000bcbcbcbc00bcULL, 0x0000292929290029ULL, 0x0000f9f9f9f900f9ULL, + 0x00002f2f2f2f002fULL, 0x0000b4b4b4b400b4ULL, 0x0000787878780078ULL, + 0x0000060606060006ULL, 0x0000e7e7e7e700e7ULL, 0x0000717171710071ULL, + 0x0000d4d4d4d400d4ULL, 0x0000abababab00abULL, 0x0000888888880088ULL, + 0x00008d8d8d8d008dULL, 0x0000727272720072ULL, 0x0000b9b9b9b900b9ULL, + 0x0000f8f8f8f800f8ULL, 0x0000acacacac00acULL, 0x0000363636360036ULL, + 0x00002a2a2a2a002aULL, 0x00003c3c3c3c003cULL, 0x0000f1f1f1f100f1ULL, + 0x0000404040400040ULL, 0x0000d3d3d3d300d3ULL, 0x0000bbbbbbbb00bbULL, + 0x0000434343430043ULL, 0x0000151515150015ULL, 0x0000adadadad00adULL, + 0x0000777777770077ULL, 0x0000808080800080ULL, 0x0000828282820082ULL, + 0x0000ecececec00ecULL, 0x0000272727270027ULL, 0x0000e5e5e5e500e5ULL, + 0x0000858585850085ULL, 0x0000353535350035ULL, 0x00000c0c0c0c000cULL, + 0x0000414141410041ULL, 0x0000efefefef00efULL, 0x0000939393930093ULL, + 0x0000191919190019ULL, 0x0000212121210021ULL, 0x00000e0e0e0e000eULL, + 0x00004e4e4e4e004eULL, 0x0000656565650065ULL, 0x0000bdbdbdbd00bdULL, + 0x0000b8b8b8b800b8ULL, 0x00008f8f8f8f008fULL, 0x0000ebebebeb00ebULL, + 0x0000cececece00ceULL, 0x0000303030300030ULL, 0x00005f5f5f5f005fULL, + 0x0000c5c5c5c500c5ULL, 0x00001a1a1a1a001aULL, 0x0000e1e1e1e100e1ULL, + 0x0000cacacaca00caULL, 0x0000474747470047ULL, 0x00003d3d3d3d003dULL, + 0x0000010101010001ULL, 0x0000d6d6d6d600d6ULL, 0x0000565656560056ULL, + 0x00004d4d4d4d004dULL, 0x00000d0d0d0d000dULL, 0x0000666666660066ULL, + 0x0000cccccccc00ccULL, 0x00002d2d2d2d002dULL, 0x0000121212120012ULL, + 0x0000202020200020ULL, 0x0000b1b1b1b100b1ULL, 0x0000999999990099ULL, + 0x00004c4c4c4c004cULL, 0x0000c2c2c2c200c2ULL, 0x00007e7e7e7e007eULL, + 0x0000050505050005ULL, 0x0000b7b7b7b700b7ULL, 0x0000313131310031ULL, + 0x0000171717170017ULL, 0x0000d7d7d7d700d7ULL, 0x0000585858580058ULL, + 0x0000616161610061ULL, 0x00001b1b1b1b001bULL, 0x00001c1c1c1c001cULL, + 0x00000f0f0f0f000fULL, 0x0000161616160016ULL, 0x0000181818180018ULL, + 0x0000222222220022ULL, 0x0000444444440044ULL, 0x0000b2b2b2b200b2ULL, + 0x0000b5b5b5b500b5ULL, 0x0000919191910091ULL, 0x0000080808080008ULL, + 0x0000a8a8a8a800a8ULL, 0x0000fcfcfcfc00fcULL, 0x0000505050500050ULL, + 0x0000d0d0d0d000d0ULL, 0x00007d7d7d7d007dULL, 0x0000898989890089ULL, + 0x0000979797970097ULL, 0x00005b5b5b5b005bULL, 0x0000959595950095ULL, + 0x0000ffffffff00ffULL, 0x0000d2d2d2d200d2ULL, 0x0000c4c4c4c400c4ULL, + 0x0000484848480048ULL, 0x0000f7f7f7f700f7ULL, 0x0000dbdbdbdb00dbULL, + 0x0000030303030003ULL, 0x0000dadadada00daULL, 0x00003f3f3f3f003fULL, + 0x0000949494940094ULL, 0x00005c5c5c5c005cULL, 0x0000020202020002ULL, + 0x00004a4a4a4a004aULL, 0x0000333333330033ULL, 0x0000676767670067ULL, + 0x0000f3f3f3f300f3ULL, 0x00007f7f7f7f007fULL, 0x0000e2e2e2e200e2ULL, + 0x00009b9b9b9b009bULL, 0x0000262626260026ULL, 0x0000373737370037ULL, + 0x00003b3b3b3b003bULL, 0x0000969696960096ULL, 0x00004b4b4b4b004bULL, + 0x0000bebebebe00beULL, 0x00002e2e2e2e002eULL, 0x0000797979790079ULL, + 0x00008c8c8c8c008cULL, 0x00006e6e6e6e006eULL, 0x00008e8e8e8e008eULL, + 0x0000f5f5f5f500f5ULL, 0x0000b6b6b6b600b6ULL, 0x0000fdfdfdfd00fdULL, + 0x0000595959590059ULL, 0x0000989898980098ULL, 0x00006a6a6a6a006aULL, + 0x0000464646460046ULL, 0x0000babababa00baULL, 0x0000252525250025ULL, + 0x0000424242420042ULL, 0x0000a2a2a2a200a2ULL, 0x0000fafafafa00faULL, + 0x0000070707070007ULL, 0x0000555555550055ULL, 0x0000eeeeeeee00eeULL, + 0x00000a0a0a0a000aULL, 0x0000494949490049ULL, 0x0000686868680068ULL, + 0x0000383838380038ULL, 0x0000a4a4a4a400a4ULL, 0x0000282828280028ULL, + 0x00007b7b7b7b007bULL, 0x0000c9c9c9c900c9ULL, 0x0000c1c1c1c100c1ULL, + 0x0000e3e3e3e300e3ULL, 0x0000f4f4f4f400f4ULL, 0x0000c7c7c7c700c7ULL, + 0x00009e9e9e9e009eULL, }; const u64 camellia_sp02220222[256] = { - 0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858, - 0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e, - 0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9, - 0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a, - 0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d, - 0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf, - 0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a, - 0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242, - 0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e, - 0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca, - 0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d, - 0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f, - 0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e, - 0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060, - 0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc, - 0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434, - 0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272, - 0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e, - 0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3, - 0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad, - 0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8, - 0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a, - 0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7, - 0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a, - 0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656, - 0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363, - 0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf, - 0x0098989800989898, 0x0097979700979797, 0x0085858500858585, - 0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec, - 0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f, - 0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3, - 0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf, - 0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474, - 0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636, - 0x0022222200222222, 0x0038383800383838, 0x0064646400646464, - 0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c, - 0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5, - 0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888, - 0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787, - 0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323, - 0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1, - 0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9, - 0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555, - 0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa, - 0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4, - 0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6, - 0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1, - 0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5, - 0x0020202000202020, 0x0089898900898989, 0x0000000000000000, - 0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef, - 0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515, - 0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5, - 0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb, - 0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8, - 0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b, - 0x0094949400949494, 0x0021212100212121, 0x0066666600666666, - 0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed, - 0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe, - 0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4, - 0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c, - 0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d, - 0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d, - 0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626, - 0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c, - 0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f, - 0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc, - 0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252, - 0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d, - 0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969, - 0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131, - 0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf, - 0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575, - 0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757, - 0x0084848400848484, 0x0011111100111111, 0x0045454500454545, - 0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4, - 0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa, - 0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959, - 0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292, - 0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878, - 0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949, - 0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7, - 0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393, - 0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a, - 0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9, - 0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101, - 0x003d3d3d003d3d3d, + 0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL, + 0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL, + 0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL, + 0x000b0b0b000b0b0bULL, 0x00aeaeae00aeaeaeULL, 0x006a6a6a006a6a6aULL, + 0x00d5d5d500d5d5d5ULL, 0x0018181800181818ULL, 0x005d5d5d005d5d5dULL, + 0x0082828200828282ULL, 0x0046464600464646ULL, 0x00dfdfdf00dfdfdfULL, + 0x00d6d6d600d6d6d6ULL, 0x0027272700272727ULL, 0x008a8a8a008a8a8aULL, + 0x0032323200323232ULL, 0x004b4b4b004b4b4bULL, 0x0042424200424242ULL, + 0x00dbdbdb00dbdbdbULL, 0x001c1c1c001c1c1cULL, 0x009e9e9e009e9e9eULL, + 0x009c9c9c009c9c9cULL, 0x003a3a3a003a3a3aULL, 0x00cacaca00cacacaULL, + 0x0025252500252525ULL, 0x007b7b7b007b7b7bULL, 0x000d0d0d000d0d0dULL, + 0x0071717100717171ULL, 0x005f5f5f005f5f5fULL, 0x001f1f1f001f1f1fULL, + 0x00f8f8f800f8f8f8ULL, 0x00d7d7d700d7d7d7ULL, 0x003e3e3e003e3e3eULL, + 0x009d9d9d009d9d9dULL, 0x007c7c7c007c7c7cULL, 0x0060606000606060ULL, + 0x00b9b9b900b9b9b9ULL, 0x00bebebe00bebebeULL, 0x00bcbcbc00bcbcbcULL, + 0x008b8b8b008b8b8bULL, 0x0016161600161616ULL, 0x0034343400343434ULL, + 0x004d4d4d004d4d4dULL, 0x00c3c3c300c3c3c3ULL, 0x0072727200727272ULL, + 0x0095959500959595ULL, 0x00ababab00abababULL, 0x008e8e8e008e8e8eULL, + 0x00bababa00bababaULL, 0x007a7a7a007a7a7aULL, 0x00b3b3b300b3b3b3ULL, + 0x0002020200020202ULL, 0x00b4b4b400b4b4b4ULL, 0x00adadad00adadadULL, + 0x00a2a2a200a2a2a2ULL, 0x00acacac00acacacULL, 0x00d8d8d800d8d8d8ULL, + 0x009a9a9a009a9a9aULL, 0x0017171700171717ULL, 0x001a1a1a001a1a1aULL, + 0x0035353500353535ULL, 0x00cccccc00ccccccULL, 0x00f7f7f700f7f7f7ULL, + 0x0099999900999999ULL, 0x0061616100616161ULL, 0x005a5a5a005a5a5aULL, + 0x00e8e8e800e8e8e8ULL, 0x0024242400242424ULL, 0x0056565600565656ULL, + 0x0040404000404040ULL, 0x00e1e1e100e1e1e1ULL, 0x0063636300636363ULL, + 0x0009090900090909ULL, 0x0033333300333333ULL, 0x00bfbfbf00bfbfbfULL, + 0x0098989800989898ULL, 0x0097979700979797ULL, 0x0085858500858585ULL, + 0x0068686800686868ULL, 0x00fcfcfc00fcfcfcULL, 0x00ececec00ecececULL, + 0x000a0a0a000a0a0aULL, 0x00dadada00dadadaULL, 0x006f6f6f006f6f6fULL, + 0x0053535300535353ULL, 0x0062626200626262ULL, 0x00a3a3a300a3a3a3ULL, + 0x002e2e2e002e2e2eULL, 0x0008080800080808ULL, 0x00afafaf00afafafULL, + 0x0028282800282828ULL, 0x00b0b0b000b0b0b0ULL, 0x0074747400747474ULL, + 0x00c2c2c200c2c2c2ULL, 0x00bdbdbd00bdbdbdULL, 0x0036363600363636ULL, + 0x0022222200222222ULL, 0x0038383800383838ULL, 0x0064646400646464ULL, + 0x001e1e1e001e1e1eULL, 0x0039393900393939ULL, 0x002c2c2c002c2c2cULL, + 0x00a6a6a600a6a6a6ULL, 0x0030303000303030ULL, 0x00e5e5e500e5e5e5ULL, + 0x0044444400444444ULL, 0x00fdfdfd00fdfdfdULL, 0x0088888800888888ULL, + 0x009f9f9f009f9f9fULL, 0x0065656500656565ULL, 0x0087878700878787ULL, + 0x006b6b6b006b6b6bULL, 0x00f4f4f400f4f4f4ULL, 0x0023232300232323ULL, + 0x0048484800484848ULL, 0x0010101000101010ULL, 0x00d1d1d100d1d1d1ULL, + 0x0051515100515151ULL, 0x00c0c0c000c0c0c0ULL, 0x00f9f9f900f9f9f9ULL, + 0x00d2d2d200d2d2d2ULL, 0x00a0a0a000a0a0a0ULL, 0x0055555500555555ULL, + 0x00a1a1a100a1a1a1ULL, 0x0041414100414141ULL, 0x00fafafa00fafafaULL, + 0x0043434300434343ULL, 0x0013131300131313ULL, 0x00c4c4c400c4c4c4ULL, + 0x002f2f2f002f2f2fULL, 0x00a8a8a800a8a8a8ULL, 0x00b6b6b600b6b6b6ULL, + 0x003c3c3c003c3c3cULL, 0x002b2b2b002b2b2bULL, 0x00c1c1c100c1c1c1ULL, + 0x00ffffff00ffffffULL, 0x00c8c8c800c8c8c8ULL, 0x00a5a5a500a5a5a5ULL, + 0x0020202000202020ULL, 0x0089898900898989ULL, 0x0000000000000000ULL, + 0x0090909000909090ULL, 0x0047474700474747ULL, 0x00efefef00efefefULL, + 0x00eaeaea00eaeaeaULL, 0x00b7b7b700b7b7b7ULL, 0x0015151500151515ULL, + 0x0006060600060606ULL, 0x00cdcdcd00cdcdcdULL, 0x00b5b5b500b5b5b5ULL, + 0x0012121200121212ULL, 0x007e7e7e007e7e7eULL, 0x00bbbbbb00bbbbbbULL, + 0x0029292900292929ULL, 0x000f0f0f000f0f0fULL, 0x00b8b8b800b8b8b8ULL, + 0x0007070700070707ULL, 0x0004040400040404ULL, 0x009b9b9b009b9b9bULL, + 0x0094949400949494ULL, 0x0021212100212121ULL, 0x0066666600666666ULL, + 0x00e6e6e600e6e6e6ULL, 0x00cecece00cececeULL, 0x00ededed00edededULL, + 0x00e7e7e700e7e7e7ULL, 0x003b3b3b003b3b3bULL, 0x00fefefe00fefefeULL, + 0x007f7f7f007f7f7fULL, 0x00c5c5c500c5c5c5ULL, 0x00a4a4a400a4a4a4ULL, + 0x0037373700373737ULL, 0x00b1b1b100b1b1b1ULL, 0x004c4c4c004c4c4cULL, + 0x0091919100919191ULL, 0x006e6e6e006e6e6eULL, 0x008d8d8d008d8d8dULL, + 0x0076767600767676ULL, 0x0003030300030303ULL, 0x002d2d2d002d2d2dULL, + 0x00dedede00dededeULL, 0x0096969600969696ULL, 0x0026262600262626ULL, + 0x007d7d7d007d7d7dULL, 0x00c6c6c600c6c6c6ULL, 0x005c5c5c005c5c5cULL, + 0x00d3d3d300d3d3d3ULL, 0x00f2f2f200f2f2f2ULL, 0x004f4f4f004f4f4fULL, + 0x0019191900191919ULL, 0x003f3f3f003f3f3fULL, 0x00dcdcdc00dcdcdcULL, + 0x0079797900797979ULL, 0x001d1d1d001d1d1dULL, 0x0052525200525252ULL, + 0x00ebebeb00ebebebULL, 0x00f3f3f300f3f3f3ULL, 0x006d6d6d006d6d6dULL, + 0x005e5e5e005e5e5eULL, 0x00fbfbfb00fbfbfbULL, 0x0069696900696969ULL, + 0x00b2b2b200b2b2b2ULL, 0x00f0f0f000f0f0f0ULL, 0x0031313100313131ULL, + 0x000c0c0c000c0c0cULL, 0x00d4d4d400d4d4d4ULL, 0x00cfcfcf00cfcfcfULL, + 0x008c8c8c008c8c8cULL, 0x00e2e2e200e2e2e2ULL, 0x0075757500757575ULL, + 0x00a9a9a900a9a9a9ULL, 0x004a4a4a004a4a4aULL, 0x0057575700575757ULL, + 0x0084848400848484ULL, 0x0011111100111111ULL, 0x0045454500454545ULL, + 0x001b1b1b001b1b1bULL, 0x00f5f5f500f5f5f5ULL, 0x00e4e4e400e4e4e4ULL, + 0x000e0e0e000e0e0eULL, 0x0073737300737373ULL, 0x00aaaaaa00aaaaaaULL, + 0x00f1f1f100f1f1f1ULL, 0x00dddddd00ddddddULL, 0x0059595900595959ULL, + 0x0014141400141414ULL, 0x006c6c6c006c6c6cULL, 0x0092929200929292ULL, + 0x0054545400545454ULL, 0x00d0d0d000d0d0d0ULL, 0x0078787800787878ULL, + 0x0070707000707070ULL, 0x00e3e3e300e3e3e3ULL, 0x0049494900494949ULL, + 0x0080808000808080ULL, 0x0050505000505050ULL, 0x00a7a7a700a7a7a7ULL, + 0x00f6f6f600f6f6f6ULL, 0x0077777700777777ULL, 0x0093939300939393ULL, + 0x0086868600868686ULL, 0x0083838300838383ULL, 0x002a2a2a002a2a2aULL, + 0x00c7c7c700c7c7c7ULL, 0x005b5b5b005b5b5bULL, 0x00e9e9e900e9e9e9ULL, + 0x00eeeeee00eeeeeeULL, 0x008f8f8f008f8f8fULL, 0x0001010100010101ULL, + 0x003d3d3d003d3d3dULL, }; const u64 camellia_sp30333033[256] = { - 0x3800383838003838, 0x4100414141004141, 0x1600161616001616, - 0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393, - 0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272, - 0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a, - 0x7500757575007575, 0x0600060606000606, 0x5700575757005757, - 0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7, - 0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2, - 0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090, - 0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7, - 0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2, - 0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343, - 0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7, - 0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f, - 0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818, - 0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f, - 0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d, - 0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c, - 0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3, - 0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec, - 0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b, - 0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636, - 0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686, - 0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd, - 0x6600666666006666, 0x5800585858005858, 0x9600969696009696, - 0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595, - 0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8, - 0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef, - 0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161, - 0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b, - 0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb, - 0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8, - 0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb, - 0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d, - 0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d, - 0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919, - 0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b, - 0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979, - 0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222, - 0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1, - 0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8, - 0x1200121212001212, 0x0400040404000404, 0x7400747474007474, - 0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e, - 0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555, - 0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe, - 0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131, - 0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad, - 0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070, - 0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969, - 0x0800080808000808, 0x6200626262006262, 0x0000000000000000, - 0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb, - 0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545, - 0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d, - 0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee, - 0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e, - 0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6, - 0x2500252525002525, 0x4800484848004848, 0x9900999999009999, - 0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b, - 0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf, - 0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929, - 0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313, - 0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363, - 0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b, - 0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989, - 0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717, - 0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3, - 0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737, - 0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494, - 0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b, - 0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a, - 0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c, - 0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3, - 0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d, - 0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5, - 0x2100212121002121, 0x4400444444004444, 0x5100515151005151, - 0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939, - 0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa, - 0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656, - 0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4, - 0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e, - 0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252, - 0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9, - 0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4, - 0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a, - 0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a, - 0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040, - 0x4f004f4f4f004f4f, + 0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL, + 0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL, + 0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL, + 0xc200c2c2c200c2c2ULL, 0xab00ababab00ababULL, 0x9a009a9a9a009a9aULL, + 0x7500757575007575ULL, 0x0600060606000606ULL, 0x5700575757005757ULL, + 0xa000a0a0a000a0a0ULL, 0x9100919191009191ULL, 0xf700f7f7f700f7f7ULL, + 0xb500b5b5b500b5b5ULL, 0xc900c9c9c900c9c9ULL, 0xa200a2a2a200a2a2ULL, + 0x8c008c8c8c008c8cULL, 0xd200d2d2d200d2d2ULL, 0x9000909090009090ULL, + 0xf600f6f6f600f6f6ULL, 0x0700070707000707ULL, 0xa700a7a7a700a7a7ULL, + 0x2700272727002727ULL, 0x8e008e8e8e008e8eULL, 0xb200b2b2b200b2b2ULL, + 0x4900494949004949ULL, 0xde00dedede00dedeULL, 0x4300434343004343ULL, + 0x5c005c5c5c005c5cULL, 0xd700d7d7d700d7d7ULL, 0xc700c7c7c700c7c7ULL, + 0x3e003e3e3e003e3eULL, 0xf500f5f5f500f5f5ULL, 0x8f008f8f8f008f8fULL, + 0x6700676767006767ULL, 0x1f001f1f1f001f1fULL, 0x1800181818001818ULL, + 0x6e006e6e6e006e6eULL, 0xaf00afafaf00afafULL, 0x2f002f2f2f002f2fULL, + 0xe200e2e2e200e2e2ULL, 0x8500858585008585ULL, 0x0d000d0d0d000d0dULL, + 0x5300535353005353ULL, 0xf000f0f0f000f0f0ULL, 0x9c009c9c9c009c9cULL, + 0x6500656565006565ULL, 0xea00eaeaea00eaeaULL, 0xa300a3a3a300a3a3ULL, + 0xae00aeaeae00aeaeULL, 0x9e009e9e9e009e9eULL, 0xec00ececec00ececULL, + 0x8000808080008080ULL, 0x2d002d2d2d002d2dULL, 0x6b006b6b6b006b6bULL, + 0xa800a8a8a800a8a8ULL, 0x2b002b2b2b002b2bULL, 0x3600363636003636ULL, + 0xa600a6a6a600a6a6ULL, 0xc500c5c5c500c5c5ULL, 0x8600868686008686ULL, + 0x4d004d4d4d004d4dULL, 0x3300333333003333ULL, 0xfd00fdfdfd00fdfdULL, + 0x6600666666006666ULL, 0x5800585858005858ULL, 0x9600969696009696ULL, + 0x3a003a3a3a003a3aULL, 0x0900090909000909ULL, 0x9500959595009595ULL, + 0x1000101010001010ULL, 0x7800787878007878ULL, 0xd800d8d8d800d8d8ULL, + 0x4200424242004242ULL, 0xcc00cccccc00ccccULL, 0xef00efefef00efefULL, + 0x2600262626002626ULL, 0xe500e5e5e500e5e5ULL, 0x6100616161006161ULL, + 0x1a001a1a1a001a1aULL, 0x3f003f3f3f003f3fULL, 0x3b003b3b3b003b3bULL, + 0x8200828282008282ULL, 0xb600b6b6b600b6b6ULL, 0xdb00dbdbdb00dbdbULL, + 0xd400d4d4d400d4d4ULL, 0x9800989898009898ULL, 0xe800e8e8e800e8e8ULL, + 0x8b008b8b8b008b8bULL, 0x0200020202000202ULL, 0xeb00ebebeb00ebebULL, + 0x0a000a0a0a000a0aULL, 0x2c002c2c2c002c2cULL, 0x1d001d1d1d001d1dULL, + 0xb000b0b0b000b0b0ULL, 0x6f006f6f6f006f6fULL, 0x8d008d8d8d008d8dULL, + 0x8800888888008888ULL, 0x0e000e0e0e000e0eULL, 0x1900191919001919ULL, + 0x8700878787008787ULL, 0x4e004e4e4e004e4eULL, 0x0b000b0b0b000b0bULL, + 0xa900a9a9a900a9a9ULL, 0x0c000c0c0c000c0cULL, 0x7900797979007979ULL, + 0x1100111111001111ULL, 0x7f007f7f7f007f7fULL, 0x2200222222002222ULL, + 0xe700e7e7e700e7e7ULL, 0x5900595959005959ULL, 0xe100e1e1e100e1e1ULL, + 0xda00dadada00dadaULL, 0x3d003d3d3d003d3dULL, 0xc800c8c8c800c8c8ULL, + 0x1200121212001212ULL, 0x0400040404000404ULL, 0x7400747474007474ULL, + 0x5400545454005454ULL, 0x3000303030003030ULL, 0x7e007e7e7e007e7eULL, + 0xb400b4b4b400b4b4ULL, 0x2800282828002828ULL, 0x5500555555005555ULL, + 0x6800686868006868ULL, 0x5000505050005050ULL, 0xbe00bebebe00bebeULL, + 0xd000d0d0d000d0d0ULL, 0xc400c4c4c400c4c4ULL, 0x3100313131003131ULL, + 0xcb00cbcbcb00cbcbULL, 0x2a002a2a2a002a2aULL, 0xad00adadad00adadULL, + 0x0f000f0f0f000f0fULL, 0xca00cacaca00cacaULL, 0x7000707070007070ULL, + 0xff00ffffff00ffffULL, 0x3200323232003232ULL, 0x6900696969006969ULL, + 0x0800080808000808ULL, 0x6200626262006262ULL, 0x0000000000000000ULL, + 0x2400242424002424ULL, 0xd100d1d1d100d1d1ULL, 0xfb00fbfbfb00fbfbULL, + 0xba00bababa00babaULL, 0xed00ededed00ededULL, 0x4500454545004545ULL, + 0x8100818181008181ULL, 0x7300737373007373ULL, 0x6d006d6d6d006d6dULL, + 0x8400848484008484ULL, 0x9f009f9f9f009f9fULL, 0xee00eeeeee00eeeeULL, + 0x4a004a4a4a004a4aULL, 0xc300c3c3c300c3c3ULL, 0x2e002e2e2e002e2eULL, + 0xc100c1c1c100c1c1ULL, 0x0100010101000101ULL, 0xe600e6e6e600e6e6ULL, + 0x2500252525002525ULL, 0x4800484848004848ULL, 0x9900999999009999ULL, + 0xb900b9b9b900b9b9ULL, 0xb300b3b3b300b3b3ULL, 0x7b007b7b7b007b7bULL, + 0xf900f9f9f900f9f9ULL, 0xce00cecece00ceceULL, 0xbf00bfbfbf00bfbfULL, + 0xdf00dfdfdf00dfdfULL, 0x7100717171007171ULL, 0x2900292929002929ULL, + 0xcd00cdcdcd00cdcdULL, 0x6c006c6c6c006c6cULL, 0x1300131313001313ULL, + 0x6400646464006464ULL, 0x9b009b9b9b009b9bULL, 0x6300636363006363ULL, + 0x9d009d9d9d009d9dULL, 0xc000c0c0c000c0c0ULL, 0x4b004b4b4b004b4bULL, + 0xb700b7b7b700b7b7ULL, 0xa500a5a5a500a5a5ULL, 0x8900898989008989ULL, + 0x5f005f5f5f005f5fULL, 0xb100b1b1b100b1b1ULL, 0x1700171717001717ULL, + 0xf400f4f4f400f4f4ULL, 0xbc00bcbcbc00bcbcULL, 0xd300d3d3d300d3d3ULL, + 0x4600464646004646ULL, 0xcf00cfcfcf00cfcfULL, 0x3700373737003737ULL, + 0x5e005e5e5e005e5eULL, 0x4700474747004747ULL, 0x9400949494009494ULL, + 0xfa00fafafa00fafaULL, 0xfc00fcfcfc00fcfcULL, 0x5b005b5b5b005b5bULL, + 0x9700979797009797ULL, 0xfe00fefefe00fefeULL, 0x5a005a5a5a005a5aULL, + 0xac00acacac00acacULL, 0x3c003c3c3c003c3cULL, 0x4c004c4c4c004c4cULL, + 0x0300030303000303ULL, 0x3500353535003535ULL, 0xf300f3f3f300f3f3ULL, + 0x2300232323002323ULL, 0xb800b8b8b800b8b8ULL, 0x5d005d5d5d005d5dULL, + 0x6a006a6a6a006a6aULL, 0x9200929292009292ULL, 0xd500d5d5d500d5d5ULL, + 0x2100212121002121ULL, 0x4400444444004444ULL, 0x5100515151005151ULL, + 0xc600c6c6c600c6c6ULL, 0x7d007d7d7d007d7dULL, 0x3900393939003939ULL, + 0x8300838383008383ULL, 0xdc00dcdcdc00dcdcULL, 0xaa00aaaaaa00aaaaULL, + 0x7c007c7c7c007c7cULL, 0x7700777777007777ULL, 0x5600565656005656ULL, + 0x0500050505000505ULL, 0x1b001b1b1b001b1bULL, 0xa400a4a4a400a4a4ULL, + 0x1500151515001515ULL, 0x3400343434003434ULL, 0x1e001e1e1e001e1eULL, + 0x1c001c1c1c001c1cULL, 0xf800f8f8f800f8f8ULL, 0x5200525252005252ULL, + 0x2000202020002020ULL, 0x1400141414001414ULL, 0xe900e9e9e900e9e9ULL, + 0xbd00bdbdbd00bdbdULL, 0xdd00dddddd00ddddULL, 0xe400e4e4e400e4e4ULL, + 0xa100a1a1a100a1a1ULL, 0xe000e0e0e000e0e0ULL, 0x8a008a8a8a008a8aULL, + 0xf100f1f1f100f1f1ULL, 0xd600d6d6d600d6d6ULL, 0x7a007a7a7a007a7aULL, + 0xbb00bbbbbb00bbbbULL, 0xe300e3e3e300e3e3ULL, 0x4000404040004040ULL, + 0x4f004f4f4f004f4fULL, }; const u64 camellia_sp44044404[256] = { - 0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3, - 0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057, - 0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023, - 0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5, - 0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d, - 0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af, - 0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e, - 0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b, - 0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5, - 0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a, - 0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b, - 0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0, - 0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0, - 0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb, - 0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d, - 0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004, - 0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de, - 0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c, - 0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe, - 0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a, - 0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060, - 0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0, - 0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054, - 0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064, - 0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3, - 0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6, - 0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087, - 0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090, - 0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d, - 0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8, - 0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081, - 0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063, - 0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f, - 0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9, - 0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078, - 0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071, - 0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088, - 0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9, - 0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036, - 0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1, - 0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb, - 0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad, - 0x7777007777770077, 0x8080008080800080, 0x8282008282820082, - 0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5, - 0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c, - 0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093, - 0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e, - 0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd, - 0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb, - 0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f, - 0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1, - 0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d, - 0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056, - 0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066, - 0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012, - 0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099, - 0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e, - 0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031, - 0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058, - 0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c, - 0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018, - 0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2, - 0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008, - 0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050, - 0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089, - 0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095, - 0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4, - 0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db, - 0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f, - 0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002, - 0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067, - 0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2, - 0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037, - 0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b, - 0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079, - 0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e, - 0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd, - 0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a, - 0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025, - 0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa, - 0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee, - 0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068, - 0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028, - 0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1, - 0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7, - 0x9e9e009e9e9e009e, + 0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL, + 0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL, + 0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL, + 0x6b6b006b6b6b006bULL, 0x4545004545450045ULL, 0xa5a500a5a5a500a5ULL, + 0xeded00ededed00edULL, 0x4f4f004f4f4f004fULL, 0x1d1d001d1d1d001dULL, + 0x9292009292920092ULL, 0x8686008686860086ULL, 0xafaf00afafaf00afULL, + 0x7c7c007c7c7c007cULL, 0x1f1f001f1f1f001fULL, 0x3e3e003e3e3e003eULL, + 0xdcdc00dcdcdc00dcULL, 0x5e5e005e5e5e005eULL, 0x0b0b000b0b0b000bULL, + 0xa6a600a6a6a600a6ULL, 0x3939003939390039ULL, 0xd5d500d5d5d500d5ULL, + 0x5d5d005d5d5d005dULL, 0xd9d900d9d9d900d9ULL, 0x5a5a005a5a5a005aULL, + 0x5151005151510051ULL, 0x6c6c006c6c6c006cULL, 0x8b8b008b8b8b008bULL, + 0x9a9a009a9a9a009aULL, 0xfbfb00fbfbfb00fbULL, 0xb0b000b0b0b000b0ULL, + 0x7474007474740074ULL, 0x2b2b002b2b2b002bULL, 0xf0f000f0f0f000f0ULL, + 0x8484008484840084ULL, 0xdfdf00dfdfdf00dfULL, 0xcbcb00cbcbcb00cbULL, + 0x3434003434340034ULL, 0x7676007676760076ULL, 0x6d6d006d6d6d006dULL, + 0xa9a900a9a9a900a9ULL, 0xd1d100d1d1d100d1ULL, 0x0404000404040004ULL, + 0x1414001414140014ULL, 0x3a3a003a3a3a003aULL, 0xdede00dedede00deULL, + 0x1111001111110011ULL, 0x3232003232320032ULL, 0x9c9c009c9c9c009cULL, + 0x5353005353530053ULL, 0xf2f200f2f2f200f2ULL, 0xfefe00fefefe00feULL, + 0xcfcf00cfcfcf00cfULL, 0xc3c300c3c3c300c3ULL, 0x7a7a007a7a7a007aULL, + 0x2424002424240024ULL, 0xe8e800e8e8e800e8ULL, 0x6060006060600060ULL, + 0x6969006969690069ULL, 0xaaaa00aaaaaa00aaULL, 0xa0a000a0a0a000a0ULL, + 0xa1a100a1a1a100a1ULL, 0x6262006262620062ULL, 0x5454005454540054ULL, + 0x1e1e001e1e1e001eULL, 0xe0e000e0e0e000e0ULL, 0x6464006464640064ULL, + 0x1010001010100010ULL, 0x0000000000000000ULL, 0xa3a300a3a3a300a3ULL, + 0x7575007575750075ULL, 0x8a8a008a8a8a008aULL, 0xe6e600e6e6e600e6ULL, + 0x0909000909090009ULL, 0xdddd00dddddd00ddULL, 0x8787008787870087ULL, + 0x8383008383830083ULL, 0xcdcd00cdcdcd00cdULL, 0x9090009090900090ULL, + 0x7373007373730073ULL, 0xf6f600f6f6f600f6ULL, 0x9d9d009d9d9d009dULL, + 0xbfbf00bfbfbf00bfULL, 0x5252005252520052ULL, 0xd8d800d8d8d800d8ULL, + 0xc8c800c8c8c800c8ULL, 0xc6c600c6c6c600c6ULL, 0x8181008181810081ULL, + 0x6f6f006f6f6f006fULL, 0x1313001313130013ULL, 0x6363006363630063ULL, + 0xe9e900e9e9e900e9ULL, 0xa7a700a7a7a700a7ULL, 0x9f9f009f9f9f009fULL, + 0xbcbc00bcbcbc00bcULL, 0x2929002929290029ULL, 0xf9f900f9f9f900f9ULL, + 0x2f2f002f2f2f002fULL, 0xb4b400b4b4b400b4ULL, 0x7878007878780078ULL, + 0x0606000606060006ULL, 0xe7e700e7e7e700e7ULL, 0x7171007171710071ULL, + 0xd4d400d4d4d400d4ULL, 0xabab00ababab00abULL, 0x8888008888880088ULL, + 0x8d8d008d8d8d008dULL, 0x7272007272720072ULL, 0xb9b900b9b9b900b9ULL, + 0xf8f800f8f8f800f8ULL, 0xacac00acacac00acULL, 0x3636003636360036ULL, + 0x2a2a002a2a2a002aULL, 0x3c3c003c3c3c003cULL, 0xf1f100f1f1f100f1ULL, + 0x4040004040400040ULL, 0xd3d300d3d3d300d3ULL, 0xbbbb00bbbbbb00bbULL, + 0x4343004343430043ULL, 0x1515001515150015ULL, 0xadad00adadad00adULL, + 0x7777007777770077ULL, 0x8080008080800080ULL, 0x8282008282820082ULL, + 0xecec00ececec00ecULL, 0x2727002727270027ULL, 0xe5e500e5e5e500e5ULL, + 0x8585008585850085ULL, 0x3535003535350035ULL, 0x0c0c000c0c0c000cULL, + 0x4141004141410041ULL, 0xefef00efefef00efULL, 0x9393009393930093ULL, + 0x1919001919190019ULL, 0x2121002121210021ULL, 0x0e0e000e0e0e000eULL, + 0x4e4e004e4e4e004eULL, 0x6565006565650065ULL, 0xbdbd00bdbdbd00bdULL, + 0xb8b800b8b8b800b8ULL, 0x8f8f008f8f8f008fULL, 0xebeb00ebebeb00ebULL, + 0xcece00cecece00ceULL, 0x3030003030300030ULL, 0x5f5f005f5f5f005fULL, + 0xc5c500c5c5c500c5ULL, 0x1a1a001a1a1a001aULL, 0xe1e100e1e1e100e1ULL, + 0xcaca00cacaca00caULL, 0x4747004747470047ULL, 0x3d3d003d3d3d003dULL, + 0x0101000101010001ULL, 0xd6d600d6d6d600d6ULL, 0x5656005656560056ULL, + 0x4d4d004d4d4d004dULL, 0x0d0d000d0d0d000dULL, 0x6666006666660066ULL, + 0xcccc00cccccc00ccULL, 0x2d2d002d2d2d002dULL, 0x1212001212120012ULL, + 0x2020002020200020ULL, 0xb1b100b1b1b100b1ULL, 0x9999009999990099ULL, + 0x4c4c004c4c4c004cULL, 0xc2c200c2c2c200c2ULL, 0x7e7e007e7e7e007eULL, + 0x0505000505050005ULL, 0xb7b700b7b7b700b7ULL, 0x3131003131310031ULL, + 0x1717001717170017ULL, 0xd7d700d7d7d700d7ULL, 0x5858005858580058ULL, + 0x6161006161610061ULL, 0x1b1b001b1b1b001bULL, 0x1c1c001c1c1c001cULL, + 0x0f0f000f0f0f000fULL, 0x1616001616160016ULL, 0x1818001818180018ULL, + 0x2222002222220022ULL, 0x4444004444440044ULL, 0xb2b200b2b2b200b2ULL, + 0xb5b500b5b5b500b5ULL, 0x9191009191910091ULL, 0x0808000808080008ULL, + 0xa8a800a8a8a800a8ULL, 0xfcfc00fcfcfc00fcULL, 0x5050005050500050ULL, + 0xd0d000d0d0d000d0ULL, 0x7d7d007d7d7d007dULL, 0x8989008989890089ULL, + 0x9797009797970097ULL, 0x5b5b005b5b5b005bULL, 0x9595009595950095ULL, + 0xffff00ffffff00ffULL, 0xd2d200d2d2d200d2ULL, 0xc4c400c4c4c400c4ULL, + 0x4848004848480048ULL, 0xf7f700f7f7f700f7ULL, 0xdbdb00dbdbdb00dbULL, + 0x0303000303030003ULL, 0xdada00dadada00daULL, 0x3f3f003f3f3f003fULL, + 0x9494009494940094ULL, 0x5c5c005c5c5c005cULL, 0x0202000202020002ULL, + 0x4a4a004a4a4a004aULL, 0x3333003333330033ULL, 0x6767006767670067ULL, + 0xf3f300f3f3f300f3ULL, 0x7f7f007f7f7f007fULL, 0xe2e200e2e2e200e2ULL, + 0x9b9b009b9b9b009bULL, 0x2626002626260026ULL, 0x3737003737370037ULL, + 0x3b3b003b3b3b003bULL, 0x9696009696960096ULL, 0x4b4b004b4b4b004bULL, + 0xbebe00bebebe00beULL, 0x2e2e002e2e2e002eULL, 0x7979007979790079ULL, + 0x8c8c008c8c8c008cULL, 0x6e6e006e6e6e006eULL, 0x8e8e008e8e8e008eULL, + 0xf5f500f5f5f500f5ULL, 0xb6b600b6b6b600b6ULL, 0xfdfd00fdfdfd00fdULL, + 0x5959005959590059ULL, 0x9898009898980098ULL, 0x6a6a006a6a6a006aULL, + 0x4646004646460046ULL, 0xbaba00bababa00baULL, 0x2525002525250025ULL, + 0x4242004242420042ULL, 0xa2a200a2a2a200a2ULL, 0xfafa00fafafa00faULL, + 0x0707000707070007ULL, 0x5555005555550055ULL, 0xeeee00eeeeee00eeULL, + 0x0a0a000a0a0a000aULL, 0x4949004949490049ULL, 0x6868006868680068ULL, + 0x3838003838380038ULL, 0xa4a400a4a4a400a4ULL, 0x2828002828280028ULL, + 0x7b7b007b7b7b007bULL, 0xc9c900c9c9c900c9ULL, 0xc1c100c1c1c100c1ULL, + 0xe3e300e3e3e300e3ULL, 0xf4f400f4f4f400f4ULL, 0xc7c700c7c7c700c7ULL, + 0x9e9e009e9e9e009eULL, }; const u64 camellia_sp11101110[256] = { - 0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00, - 0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700, - 0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400, - 0x8585850085858500, 0x5757570057575700, 0x3535350035353500, - 0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00, - 0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00, - 0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500, - 0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100, - 0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00, - 0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500, - 0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600, - 0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00, - 0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00, - 0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000, - 0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00, - 0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00, - 0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900, - 0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700, - 0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900, - 0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600, - 0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00, - 0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00, - 0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00, - 0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00, - 0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00, - 0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100, - 0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00, - 0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200, - 0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600, - 0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700, - 0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100, - 0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700, - 0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00, - 0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00, - 0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200, - 0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600, - 0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200, - 0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400, - 0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300, - 0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100, - 0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800, - 0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00, - 0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00, - 0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00, - 0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200, - 0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00, - 0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000, - 0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200, - 0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000, - 0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700, - 0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00, - 0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00, - 0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00, - 0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00, - 0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00, - 0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300, - 0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600, - 0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00, - 0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200, - 0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600, - 0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600, - 0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600, - 0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300, - 0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00, - 0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700, - 0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00, - 0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900, - 0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600, - 0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400, - 0x5959590059595900, 0x7878780078787800, 0x9898980098989800, - 0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700, - 0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00, - 0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00, - 0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200, - 0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200, - 0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500, - 0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00, - 0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900, - 0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00, - 0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400, - 0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300, - 0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900, - 0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500, - 0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400, - 0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000, - 0x9e9e9e009e9e9e00, + 0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL, + 0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL, + 0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL, + 0x8585850085858500ULL, 0x5757570057575700ULL, 0x3535350035353500ULL, + 0xeaeaea00eaeaea00ULL, 0x0c0c0c000c0c0c00ULL, 0xaeaeae00aeaeae00ULL, + 0x4141410041414100ULL, 0x2323230023232300ULL, 0xefefef00efefef00ULL, + 0x6b6b6b006b6b6b00ULL, 0x9393930093939300ULL, 0x4545450045454500ULL, + 0x1919190019191900ULL, 0xa5a5a500a5a5a500ULL, 0x2121210021212100ULL, + 0xededed00ededed00ULL, 0x0e0e0e000e0e0e00ULL, 0x4f4f4f004f4f4f00ULL, + 0x4e4e4e004e4e4e00ULL, 0x1d1d1d001d1d1d00ULL, 0x6565650065656500ULL, + 0x9292920092929200ULL, 0xbdbdbd00bdbdbd00ULL, 0x8686860086868600ULL, + 0xb8b8b800b8b8b800ULL, 0xafafaf00afafaf00ULL, 0x8f8f8f008f8f8f00ULL, + 0x7c7c7c007c7c7c00ULL, 0xebebeb00ebebeb00ULL, 0x1f1f1f001f1f1f00ULL, + 0xcecece00cecece00ULL, 0x3e3e3e003e3e3e00ULL, 0x3030300030303000ULL, + 0xdcdcdc00dcdcdc00ULL, 0x5f5f5f005f5f5f00ULL, 0x5e5e5e005e5e5e00ULL, + 0xc5c5c500c5c5c500ULL, 0x0b0b0b000b0b0b00ULL, 0x1a1a1a001a1a1a00ULL, + 0xa6a6a600a6a6a600ULL, 0xe1e1e100e1e1e100ULL, 0x3939390039393900ULL, + 0xcacaca00cacaca00ULL, 0xd5d5d500d5d5d500ULL, 0x4747470047474700ULL, + 0x5d5d5d005d5d5d00ULL, 0x3d3d3d003d3d3d00ULL, 0xd9d9d900d9d9d900ULL, + 0x0101010001010100ULL, 0x5a5a5a005a5a5a00ULL, 0xd6d6d600d6d6d600ULL, + 0x5151510051515100ULL, 0x5656560056565600ULL, 0x6c6c6c006c6c6c00ULL, + 0x4d4d4d004d4d4d00ULL, 0x8b8b8b008b8b8b00ULL, 0x0d0d0d000d0d0d00ULL, + 0x9a9a9a009a9a9a00ULL, 0x6666660066666600ULL, 0xfbfbfb00fbfbfb00ULL, + 0xcccccc00cccccc00ULL, 0xb0b0b000b0b0b000ULL, 0x2d2d2d002d2d2d00ULL, + 0x7474740074747400ULL, 0x1212120012121200ULL, 0x2b2b2b002b2b2b00ULL, + 0x2020200020202000ULL, 0xf0f0f000f0f0f000ULL, 0xb1b1b100b1b1b100ULL, + 0x8484840084848400ULL, 0x9999990099999900ULL, 0xdfdfdf00dfdfdf00ULL, + 0x4c4c4c004c4c4c00ULL, 0xcbcbcb00cbcbcb00ULL, 0xc2c2c200c2c2c200ULL, + 0x3434340034343400ULL, 0x7e7e7e007e7e7e00ULL, 0x7676760076767600ULL, + 0x0505050005050500ULL, 0x6d6d6d006d6d6d00ULL, 0xb7b7b700b7b7b700ULL, + 0xa9a9a900a9a9a900ULL, 0x3131310031313100ULL, 0xd1d1d100d1d1d100ULL, + 0x1717170017171700ULL, 0x0404040004040400ULL, 0xd7d7d700d7d7d700ULL, + 0x1414140014141400ULL, 0x5858580058585800ULL, 0x3a3a3a003a3a3a00ULL, + 0x6161610061616100ULL, 0xdedede00dedede00ULL, 0x1b1b1b001b1b1b00ULL, + 0x1111110011111100ULL, 0x1c1c1c001c1c1c00ULL, 0x3232320032323200ULL, + 0x0f0f0f000f0f0f00ULL, 0x9c9c9c009c9c9c00ULL, 0x1616160016161600ULL, + 0x5353530053535300ULL, 0x1818180018181800ULL, 0xf2f2f200f2f2f200ULL, + 0x2222220022222200ULL, 0xfefefe00fefefe00ULL, 0x4444440044444400ULL, + 0xcfcfcf00cfcfcf00ULL, 0xb2b2b200b2b2b200ULL, 0xc3c3c300c3c3c300ULL, + 0xb5b5b500b5b5b500ULL, 0x7a7a7a007a7a7a00ULL, 0x9191910091919100ULL, + 0x2424240024242400ULL, 0x0808080008080800ULL, 0xe8e8e800e8e8e800ULL, + 0xa8a8a800a8a8a800ULL, 0x6060600060606000ULL, 0xfcfcfc00fcfcfc00ULL, + 0x6969690069696900ULL, 0x5050500050505000ULL, 0xaaaaaa00aaaaaa00ULL, + 0xd0d0d000d0d0d000ULL, 0xa0a0a000a0a0a000ULL, 0x7d7d7d007d7d7d00ULL, + 0xa1a1a100a1a1a100ULL, 0x8989890089898900ULL, 0x6262620062626200ULL, + 0x9797970097979700ULL, 0x5454540054545400ULL, 0x5b5b5b005b5b5b00ULL, + 0x1e1e1e001e1e1e00ULL, 0x9595950095959500ULL, 0xe0e0e000e0e0e000ULL, + 0xffffff00ffffff00ULL, 0x6464640064646400ULL, 0xd2d2d200d2d2d200ULL, + 0x1010100010101000ULL, 0xc4c4c400c4c4c400ULL, 0x0000000000000000ULL, + 0x4848480048484800ULL, 0xa3a3a300a3a3a300ULL, 0xf7f7f700f7f7f700ULL, + 0x7575750075757500ULL, 0xdbdbdb00dbdbdb00ULL, 0x8a8a8a008a8a8a00ULL, + 0x0303030003030300ULL, 0xe6e6e600e6e6e600ULL, 0xdadada00dadada00ULL, + 0x0909090009090900ULL, 0x3f3f3f003f3f3f00ULL, 0xdddddd00dddddd00ULL, + 0x9494940094949400ULL, 0x8787870087878700ULL, 0x5c5c5c005c5c5c00ULL, + 0x8383830083838300ULL, 0x0202020002020200ULL, 0xcdcdcd00cdcdcd00ULL, + 0x4a4a4a004a4a4a00ULL, 0x9090900090909000ULL, 0x3333330033333300ULL, + 0x7373730073737300ULL, 0x6767670067676700ULL, 0xf6f6f600f6f6f600ULL, + 0xf3f3f300f3f3f300ULL, 0x9d9d9d009d9d9d00ULL, 0x7f7f7f007f7f7f00ULL, + 0xbfbfbf00bfbfbf00ULL, 0xe2e2e200e2e2e200ULL, 0x5252520052525200ULL, + 0x9b9b9b009b9b9b00ULL, 0xd8d8d800d8d8d800ULL, 0x2626260026262600ULL, + 0xc8c8c800c8c8c800ULL, 0x3737370037373700ULL, 0xc6c6c600c6c6c600ULL, + 0x3b3b3b003b3b3b00ULL, 0x8181810081818100ULL, 0x9696960096969600ULL, + 0x6f6f6f006f6f6f00ULL, 0x4b4b4b004b4b4b00ULL, 0x1313130013131300ULL, + 0xbebebe00bebebe00ULL, 0x6363630063636300ULL, 0x2e2e2e002e2e2e00ULL, + 0xe9e9e900e9e9e900ULL, 0x7979790079797900ULL, 0xa7a7a700a7a7a700ULL, + 0x8c8c8c008c8c8c00ULL, 0x9f9f9f009f9f9f00ULL, 0x6e6e6e006e6e6e00ULL, + 0xbcbcbc00bcbcbc00ULL, 0x8e8e8e008e8e8e00ULL, 0x2929290029292900ULL, + 0xf5f5f500f5f5f500ULL, 0xf9f9f900f9f9f900ULL, 0xb6b6b600b6b6b600ULL, + 0x2f2f2f002f2f2f00ULL, 0xfdfdfd00fdfdfd00ULL, 0xb4b4b400b4b4b400ULL, + 0x5959590059595900ULL, 0x7878780078787800ULL, 0x9898980098989800ULL, + 0x0606060006060600ULL, 0x6a6a6a006a6a6a00ULL, 0xe7e7e700e7e7e700ULL, + 0x4646460046464600ULL, 0x7171710071717100ULL, 0xbababa00bababa00ULL, + 0xd4d4d400d4d4d400ULL, 0x2525250025252500ULL, 0xababab00ababab00ULL, + 0x4242420042424200ULL, 0x8888880088888800ULL, 0xa2a2a200a2a2a200ULL, + 0x8d8d8d008d8d8d00ULL, 0xfafafa00fafafa00ULL, 0x7272720072727200ULL, + 0x0707070007070700ULL, 0xb9b9b900b9b9b900ULL, 0x5555550055555500ULL, + 0xf8f8f800f8f8f800ULL, 0xeeeeee00eeeeee00ULL, 0xacacac00acacac00ULL, + 0x0a0a0a000a0a0a00ULL, 0x3636360036363600ULL, 0x4949490049494900ULL, + 0x2a2a2a002a2a2a00ULL, 0x6868680068686800ULL, 0x3c3c3c003c3c3c00ULL, + 0x3838380038383800ULL, 0xf1f1f100f1f1f100ULL, 0xa4a4a400a4a4a400ULL, + 0x4040400040404000ULL, 0x2828280028282800ULL, 0xd3d3d300d3d3d300ULL, + 0x7b7b7b007b7b7b00ULL, 0xbbbbbb00bbbbbb00ULL, 0xc9c9c900c9c9c900ULL, + 0x4343430043434300ULL, 0xc1c1c100c1c1c100ULL, 0x1515150015151500ULL, + 0xe3e3e300e3e3e300ULL, 0xadadad00adadad00ULL, 0xf4f4f400f4f4f400ULL, + 0x7777770077777700ULL, 0xc7c7c700c7c7c700ULL, 0x8080800080808000ULL, + 0x9e9e9e009e9e9e00ULL, }; /* key constants */ -- cgit v1.2.3-70-g09d2 From d4c9dbc61fe0ca042b835c6f234af12fa5f18310 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 7 Sep 2012 07:54:52 +0100 Subject: x86/mm: Fix range check in tlbflush debugfs interface Since the shift count settable there is used for shifting values of type "unsigned long", its value must not match or exceed BITS_PER_LONG (otherwise the shift operations are undefined). Similarly, the value must not be negative (but -1 must be permitted, as that's the value used to distinguish the case of the fine grained flushing being disabled). Signed-off-by: Jan Beulich Cc: Alex Shi Link: http://lkml.kernel.org/r/5049B65C020000780009990C@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/mm/tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 613cd83e8c0..a085c560b4a 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -320,7 +320,7 @@ static ssize_t tlbflush_write_file(struct file *file, if (kstrtos8(buf, 0, &shift)) return -EINVAL; - if (shift > 64) + if (shift < -1 || shift >= BITS_PER_LONG) return -EINVAL; tlb_flushall_shift = shift; -- cgit v1.2.3-70-g09d2 From 4f97704555672f9ab48ca623561e96a9430bec9a Mon Sep 17 00:00:00 2001 From: "Ren, Yongjie" Date: Fri, 7 Sep 2012 07:36:59 +0000 Subject: KVM: x86: Check INVPCID feature bit in EBX of leaf 7 Checks and operations on the INVPCID feature bit should use EBX of CPUID leaf 7 instead of ECX. Signed-off-by: Junjie Mao Signed-off-by: Yongjie Ren Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c00f03de1b7..002b4a566e2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6575,7 +6575,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) /* Exposing INVPCID only when PCID is exposed */ best = kvm_find_cpuid_entry(vcpu, 0x7, 0); if (vmx_invpcid_supported() && - best && (best->ecx & bit(X86_FEATURE_INVPCID)) && + best && (best->ebx & bit(X86_FEATURE_INVPCID)) && guest_cpuid_has_pcid(vcpu)) { exec_control |= SECONDARY_EXEC_ENABLE_INVPCID; vmcs_write32(SECONDARY_VM_EXEC_CONTROL, @@ -6585,7 +6585,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); if (best) - best->ecx &= ~bit(X86_FEATURE_INVPCID); + best->ebx &= ~bit(X86_FEATURE_INVPCID); } } -- cgit v1.2.3-70-g09d2 From 05194cfc547528a01d655262c49da3b1311c4b11 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 9 Sep 2012 11:12:04 -0700 Subject: x86, cpufeature: Add feature bit for SMAP Add CPUID feature bit for Supervisor Mode Access Prevention (SMAP). Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/n/tip-ethzcr5nipikl6hd5q8ssepq@git.kernel.org --- arch/x86/include/asm/cpufeature.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 6b7ee5ff682..633b6176cf6 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -209,6 +209,7 @@ #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) -- cgit v1.2.3-70-g09d2 From 3dc9a633f8a65b39c5897874138027328bfb0a94 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 4 Sep 2012 08:28:02 +0000 Subject: acpi-cpufreq: Add support for modern AMD CPUs The programming model for P-states on modern AMD CPUs is very similar to that of Intel and VIA. It makes sense to consolidate this support into one driver rather than duplicating functionality between two of them. This patch adds support for AMDs with hardware P-state control to acpi-cpufreq. Signed-off-by: Matthew Garrett Signed-off-by: Andre Przywara Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/msr-index.h | 2 ++ drivers/cpufreq/Kconfig.x86 | 3 ++- drivers/cpufreq/acpi-cpufreq.c | 43 ++++++++++++++++++++++++++++++++++------ 3 files changed, 41 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 957ec87385a..1e1f3eb5863 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -248,6 +248,8 @@ #define MSR_IA32_PERF_STATUS 0x00000198 #define MSR_IA32_PERF_CTL 0x00000199 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 78ff7ee4895..8d12e378a7e 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -23,7 +23,8 @@ config X86_ACPI_CPUFREQ help This driver adds a CPUFreq driver which utilizes the ACPI Processor Performance States. - This driver also supports Intel Enhanced Speedstep. + This driver also supports Intel Enhanced Speedstep and newer + AMD CPUs. To compile this driver as a module, choose M here: the module will be called acpi-cpufreq. diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 56c6c6b4eb4..067a61f06bb 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -54,10 +54,12 @@ MODULE_LICENSE("GPL"); enum { UNDEFINED_CAPABLE = 0, SYSTEM_INTEL_MSR_CAPABLE, + SYSTEM_AMD_MSR_CAPABLE, SYSTEM_IO_CAPABLE, }; #define INTEL_MSR_RANGE (0xffff) +#define AMD_MSR_RANGE (0x7) struct acpi_cpufreq_data { struct acpi_processor_performance *acpi_data; @@ -82,6 +84,13 @@ static int check_est_cpu(unsigned int cpuid) return cpu_has(cpu, X86_FEATURE_EST); } +static int check_amd_hwpstate_cpu(unsigned int cpuid) +{ + struct cpuinfo_x86 *cpu = &cpu_data(cpuid); + + return cpu_has(cpu, X86_FEATURE_HW_PSTATE); +} + static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) { struct acpi_processor_performance *perf; @@ -101,7 +110,11 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) int i; struct acpi_processor_performance *perf; - msr &= INTEL_MSR_RANGE; + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + msr &= AMD_MSR_RANGE; + else + msr &= INTEL_MSR_RANGE; + perf = data->acpi_data; for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { @@ -115,6 +128,7 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) { switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: + case SYSTEM_AMD_MSR_CAPABLE: return extract_msr(val, data); case SYSTEM_IO_CAPABLE: return extract_io(val, data); @@ -150,6 +164,7 @@ static void do_drv_read(void *_cmd) switch (cmd->type) { case SYSTEM_INTEL_MSR_CAPABLE: + case SYSTEM_AMD_MSR_CAPABLE: rdmsr(cmd->addr.msr.reg, cmd->val, h); break; case SYSTEM_IO_CAPABLE: @@ -174,6 +189,9 @@ static void do_drv_write(void *_cmd) lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); wrmsr(cmd->addr.msr.reg, lo, hi); break; + case SYSTEM_AMD_MSR_CAPABLE: + wrmsr(cmd->addr.msr.reg, cmd->val, 0); + break; case SYSTEM_IO_CAPABLE: acpi_os_write_port((acpi_io_address)cmd->addr.io.port, cmd->val, @@ -217,6 +235,10 @@ static u32 get_cur_val(const struct cpumask *mask) cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; break; + case SYSTEM_AMD_MSR_CAPABLE: + cmd.type = SYSTEM_AMD_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_AMD_PERF_STATUS; + break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data; @@ -326,6 +348,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, cmd.addr.msr.reg = MSR_IA32_PERF_CTL; cmd.val = (u32) perf->states[next_perf_state].control; break; + case SYSTEM_AMD_MSR_CAPABLE: + cmd.type = SYSTEM_AMD_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_AMD_PERF_CTL; + cmd.val = (u32) perf->states[next_perf_state].control; + break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; cmd.addr.io.port = perf->control_register.address; @@ -580,12 +607,16 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) break; case ACPI_ADR_SPACE_FIXED_HARDWARE: pr_debug("HARDWARE addr space\n"); - if (!check_est_cpu(cpu)) { - result = -ENODEV; - goto err_unreg; + if (check_est_cpu(cpu)) { + data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; + break; } - data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; - break; + if (check_amd_hwpstate_cpu(cpu)) { + data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE; + break; + } + result = -ENODEV; + goto err_unreg; default: pr_debug("Unknown addr space %d\n", (u32) (perf->control_register.space_id)); -- cgit v1.2.3-70-g09d2 From f594065faf4f9067c2283a34619fc0714e79a98d Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 4 Sep 2012 08:28:06 +0000 Subject: ACPI: Add fixups for AMD P-state figures Some AMD systems may round the frequencies in ACPI tables to 100MHz boundaries. We can obtain the real frequencies from MSRs, so add a quirk to fix these frequencies up on AMD systems. Signed-off-by: Matthew Garrett Signed-off-by: Andre Przywara Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/msr-index.h | 1 + drivers/acpi/processor_perflib.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1e1f3eb5863..fbee9714d9a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -248,6 +248,7 @@ #define MSR_IA32_PERF_STATUS 0x00000198 #define MSR_IA32_PERF_CTL 0x00000199 +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 #define MSR_AMD_PERF_STATUS 0xc0010063 #define MSR_AMD_PERF_CTL 0xc0010062 diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index a093dc163a4..836bfe06904 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -324,6 +324,34 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr) return result; } +#ifdef CONFIG_X86 +/* + * Some AMDs have 50MHz frequency multiples, but only provide 100MHz rounding + * in their ACPI data. Calculate the real values and fix up the _PSS data. + */ +static void amd_fixup_frequency(struct acpi_processor_px *px, int i) +{ + u32 hi, lo, fid, did; + int index = px->control & 0x00000007; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return; + + if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) + || boot_cpu_data.x86 == 0x11) { + rdmsr(MSR_AMD_PSTATE_DEF_BASE + index, lo, hi); + fid = lo & 0x3f; + did = (lo >> 6) & 7; + if (boot_cpu_data.x86 == 0x10) + px->core_frequency = (100 * (fid + 0x10)) >> did; + else + px->core_frequency = (100 * (fid + 8)) >> did; + } +} +#else +static void amd_fixup_frequency(struct acpi_processor_px *px, int i) {}; +#endif + static int acpi_processor_get_performance_states(struct acpi_processor *pr) { int result = 0; @@ -379,6 +407,8 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr) goto end; } + amd_fixup_frequency(px, i); + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "State [%d]: core_frequency[%d] power[%d] transition_latency[%d] bus_master_latency[%d] control[0x%x] status[0x%x]\n", i, -- cgit v1.2.3-70-g09d2 From 92b5265d38f6a4d33e9d43974f176f18547687d6 Mon Sep 17 00:00:00 2001 From: "Liu, Jinsong" Date: Mon, 10 Sep 2012 06:55:39 +0800 Subject: KVM: Depend on HIGH_RES_TIMERS KVM lapic timer and tsc deadline timer based on hrtimer, setting a leftmost node to rb tree and then do hrtimer reprogram. If hrtimer not configured as high resolution, hrtimer_enqueue_reprogram do nothing and then make kvm lapic timer and tsc deadline timer fail. Signed-off-by: Liu, Jinsong Signed-off-by: Avi Kivity --- arch/x86/kvm/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 45c044f0fff..586f0005980 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -20,6 +20,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM + depends on HIGH_RES_TIMERS # for device assignment: depends on PCI # for TASKSTATS/TASK_DELAY_ACCT: -- cgit v1.2.3-70-g09d2 From 7de5bdc96c372ab875408c86e0099958dba89f56 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 7 Sep 2012 14:15:03 +0800 Subject: KVM: MMU: remove unnecessary check Checking the return of kvm_mmu_get_page is unnecessary since it is guaranteed by memory cache Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 399c177212b..aa0b469ee07 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2616,11 +2616,6 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, iterator.level - 1, 1, ACC_ALL, iterator.sptep); - if (!sp) { - pgprintk("nonpaging_map: ENOMEM\n"); - kvm_release_pfn_clean(pfn); - return -ENOMEM; - } mmu_spte_set(iterator.sptep, __pa(sp->spt) -- cgit v1.2.3-70-g09d2 From 4484141a94f4a5afea6ebc0b2abba0aa1b0ae9d1 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 7 Sep 2012 14:14:20 +0800 Subject: KVM: fix error paths for failed gfn_to_page() calls This bug was triggered: [ 4220.198458] BUG: unable to handle kernel paging request at fffffffffffffffe [ 4220.203907] IP: [] put_page+0xf/0x34 ...... [ 4220.237326] Call Trace: [ 4220.237361] [] kvm_arch_destroy_vm+0xf9/0x101 [kvm] [ 4220.237382] [] kvm_put_kvm+0xcc/0x127 [kvm] [ 4220.237401] [] kvm_vcpu_release+0x18/0x1c [kvm] [ 4220.237407] [] __fput+0x111/0x1ed [ 4220.237411] [] ____fput+0xe/0x10 [ 4220.237418] [] task_work_run+0x5d/0x88 [ 4220.237424] [] do_exit+0x2bf/0x7ca The test case: printf(fmt, ##args); \ exit(-1);} while (0) static int create_vm(void) { int sys_fd, vm_fd; sys_fd = open("/dev/kvm", O_RDWR); if (sys_fd < 0) die("open /dev/kvm fail.\n"); vm_fd = ioctl(sys_fd, KVM_CREATE_VM, 0); if (vm_fd < 0) die("KVM_CREATE_VM fail.\n"); return vm_fd; } static int create_vcpu(int vm_fd) { int vcpu_fd; vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); if (vcpu_fd < 0) die("KVM_CREATE_VCPU ioctl.\n"); printf("Create vcpu.\n"); return vcpu_fd; } static void *vcpu_thread(void *arg) { int vm_fd = (int)(long)arg; create_vcpu(vm_fd); return NULL; } int main(int argc, char *argv[]) { pthread_t thread; int vm_fd; (void)argc; (void)argv; vm_fd = create_vm(); pthread_create(&thread, NULL, vcpu_thread, (void *)(long)vm_fd); printf("Exit.\n"); return 0; } It caused by release kvm->arch.ept_identity_map_addr which is the error page. The parent thread can send KILL signal to the vcpu thread when it was exiting which stops faulting pages and potentially allocating memory. So gfn_to_pfn/gfn_to_page may fail at this time Fixed by checking the page before it is used Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 19 ++++++++++++++++--- arch/x86/kvm/x86.c | 13 ++++++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 002b4a566e2..b1eb202ee76 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3619,6 +3619,7 @@ static void seg_setup(int seg) static int alloc_apic_access_page(struct kvm *kvm) { + struct page *page; struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; @@ -3633,7 +3634,13 @@ static int alloc_apic_access_page(struct kvm *kvm) if (r) goto out; - kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); + page = gfn_to_page(kvm, 0xfee00); + if (is_error_page(page)) { + r = -EFAULT; + goto out; + } + + kvm->arch.apic_access_page = page; out: mutex_unlock(&kvm->slots_lock); return r; @@ -3641,6 +3648,7 @@ out: static int alloc_identity_pagetable(struct kvm *kvm) { + struct page *page; struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; @@ -3656,8 +3664,13 @@ static int alloc_identity_pagetable(struct kvm *kvm) if (r) goto out; - kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, - kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); + page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); + if (is_error_page(page)) { + r = -EFAULT; + goto out; + } + + kvm->arch.ept_identity_pagetable = page; out: mutex_unlock(&kvm->slots_lock); return r; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 148ed666e31..2966c847d48 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5113,17 +5113,20 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) !kvm_event_needs_reinjection(vcpu); } -static void vapic_enter(struct kvm_vcpu *vcpu) +static int vapic_enter(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; struct page *page; if (!apic || !apic->vapic_addr) - return; + return 0; page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); + if (is_error_page(page)) + return -EFAULT; vcpu->arch.apic->vapic_page = page; + return 0; } static void vapic_exit(struct kvm_vcpu *vcpu) @@ -5430,7 +5433,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - vapic_enter(vcpu); + r = vapic_enter(vcpu); + if (r) { + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + return r; + } r = 1; while (r > 0) { -- cgit v1.2.3-70-g09d2 From 280050cc81ccb2e06e4061228ee34c0cc86b1560 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Sep 2012 22:48:33 +0200 Subject: x86 bpf_jit: support MOD operation commit b6069a9570 (filter: add MOD operation) added generic support for modulus operation in BPF. This patch brings JIT support for x86_64 Signed-off-by: Eric Dumazet Cc: Andi Kleen Cc: George Bakos Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 33643a8bcbb..106c5782912 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -280,6 +280,31 @@ void bpf_jit_compile(struct sk_filter *fp) } EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ break; + case BPF_S_ALU_MOD_X: /* A %= X; */ + seen |= SEEN_XREG; + EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ + if (pc_ret0 > 0) { + /* addrs[pc_ret0 - 1] is start address of target + * (addrs[i] - 6) is the address following this jmp + * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long) + */ + EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] - + (addrs[i] - 6)); + } else { + EMIT_COND_JMP(X86_JNE, 2 + 5); + CLEAR_A(); + EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */ + } + EMIT2(0x31, 0xd2); /* xor %edx,%edx */ + EMIT2(0xf7, 0xf3); /* div %ebx */ + EMIT2(0x89, 0xd0); /* mov %edx,%eax */ + break; + case BPF_S_ALU_MOD_K: /* A %= K; */ + EMIT2(0x31, 0xd2); /* xor %edx,%edx */ + EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ + EMIT2(0xf7, 0xf1); /* div %ecx */ + EMIT2(0x89, 0xd0); /* mov %edx,%eax */ + break; case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */ EMIT(K, 4); -- cgit v1.2.3-70-g09d2 From 2fc136eecd0c647a6b13fcd00d0c41a1a28f35a5 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 12 Sep 2012 12:44:30 +0100 Subject: xen/m2p: do not reuse kmap_op->dev_bus_addr If the caller passes a valid kmap_op to m2p_add_override, we use kmap_op->dev_bus_addr to store the original mfn, but dev_bus_addr is part of the interface with Xen and if we are batching the hypercalls it might not have been written by the hypervisor yet. That means that later on Xen will write to it and we'll think that the original mfn is actually what Xen has written to it. Rather than "stealing" struct members from kmap_op, keep using page->index to store the original mfn and add another parameter to m2p_remove_override to get the corresponding kmap_op instead. It is now responsibility of the caller to keep track of which kmap_op corresponds to a particular page in the m2p_override (gntdev, the only user of this interface that passes a valid kmap_op, is already doing that). CC: stable@kernel.org Reported-and-Tested-By: Sander Eikelenboom Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/page.h | 3 ++- arch/x86/xen/p2m.c | 27 +++++++++++---------------- drivers/block/xen-blkback/blkback.c | 2 +- drivers/xen/gntdev.c | 5 +++-- drivers/xen/grant-table.c | 6 ++++-- include/xen/grant_table.h | 3 ++- 6 files changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 93971e841dd..472b9b78301 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -51,7 +51,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, extern int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op); -extern int m2p_remove_override(struct page *page, bool clear_pte); +extern int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 76ba0e97e53..72213da605f 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -828,9 +828,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, xen_mc_issue(PARAVIRT_LAZY_MMU); } - /* let's use dev_bus_addr to record the old mfn instead */ - kmap_op->dev_bus_addr = page->index; - page->index = (unsigned long) kmap_op; } spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); @@ -857,7 +854,8 @@ int m2p_add_override(unsigned long mfn, struct page *page, return 0; } EXPORT_SYMBOL_GPL(m2p_add_override); -int m2p_remove_override(struct page *page, bool clear_pte) +int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; unsigned long mfn; @@ -887,10 +885,8 @@ int m2p_remove_override(struct page *page, bool clear_pte) WARN_ON(!PagePrivate(page)); ClearPagePrivate(page); - if (clear_pte) { - struct gnttab_map_grant_ref *map_op = - (struct gnttab_map_grant_ref *) page->index; - set_phys_to_machine(pfn, map_op->dev_bus_addr); + set_phys_to_machine(pfn, page->index); + if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; struct gnttab_unmap_grant_ref *unmap_op; @@ -902,13 +898,13 @@ int m2p_remove_override(struct page *page, bool clear_pte) * issued. In this case handle is going to -1 because * it hasn't been modified yet. */ - if (map_op->handle == -1) + if (kmap_op->handle == -1) xen_mc_flush(); /* - * Now if map_op->handle is negative it means that the + * Now if kmap_op->handle is negative it means that the * hypercall actually returned an error. */ - if (map_op->handle == GNTST_general_error) { + if (kmap_op->handle == GNTST_general_error) { printk(KERN_WARNING "m2p_remove_override: " "pfn %lx mfn %lx, failed to modify kernel mappings", pfn, mfn); @@ -918,8 +914,8 @@ int m2p_remove_override(struct page *page, bool clear_pte) mcs = xen_mc_entry( sizeof(struct gnttab_unmap_grant_ref)); unmap_op = mcs.args; - unmap_op->host_addr = map_op->host_addr; - unmap_op->handle = map_op->handle; + unmap_op->host_addr = kmap_op->host_addr; + unmap_op->handle = kmap_op->handle; unmap_op->dev_bus_addr = 0; MULTI_grant_table_op(mcs.mc, @@ -930,10 +926,9 @@ int m2p_remove_override(struct page *page, bool clear_pte) set_pte_at(&init_mm, address, ptep, pfn_pte(pfn, PAGE_KERNEL)); __flush_tlb_single(address); - map_op->host_addr = 0; + kmap_op->host_addr = 0; } - } else - set_phys_to_machine(pfn, page->index); + } /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present * somewhere in this domain, even before being added to the diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 73f196ca713..c6decb901e5 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -337,7 +337,7 @@ static void xen_blkbk_unmap(struct pending_req *req) invcount++; } - ret = gnttab_unmap_refs(unmap, pages, invcount, false); + ret = gnttab_unmap_refs(unmap, NULL, pages, invcount); BUG_ON(ret); } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 1ffd03bf8e1..7f124160848 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -314,8 +314,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } } - err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, - pages, true); + err = gnttab_unmap_refs(map->unmap_ops + offset, + use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset, + pages); if (err) return err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 0bfc1ef1125..006726688ba 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -870,7 +870,8 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, EXPORT_SYMBOL_GPL(gnttab_map_refs); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct page **pages, unsigned int count, bool clear_pte) + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) { int i, ret; bool lazy = false; @@ -888,7 +889,8 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, } for (i = 0; i < count; i++) { - ret = m2p_remove_override(pages[i], clear_pte); + ret = m2p_remove_override(pages[i], kmap_ops ? + &kmap_ops[i] : NULL); if (ret) return ret; } diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 11e27c3af3c..f19fff8650e 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -187,6 +187,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct page **pages, unsigned int count, bool clear_pte); + struct gnttab_map_grant_ref *kunmap_ops, + struct page **pages, unsigned int count); #endif /* __ASM_GNTTAB_H__ */ -- cgit v1.2.3-70-g09d2 From ecba9a52acdf20530d561b7634b80c35c308943a Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 5 Sep 2012 19:30:01 +0900 Subject: KVM: x86: lapic: Clean up find_highest_vector() and count_vectors() find_highest_vector() and count_vectors(): - Instead of using magic values, define and use proper macros. find_highest_vector(): - Remove likely() which is there only for historical reasons and not doing correct branch predictions anymore. Using such heuristics to optimize this function is not worth it now. Let CPUs predict things instead. - Stop checking word[0] separately. This was only needed for doing likely() optimization. - Use for loop, not while, to iterate over the register array to make the code clearer. Note that we actually confirmed that the likely() did wrong predictions by inserting debug code. Acked-by: Michael S. Tsirkin Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/lapic.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 07ad628dadb..6f9fd633c88 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -66,6 +66,7 @@ #define APIC_DEST_NOSHORT 0x0 #define APIC_DEST_MASK 0x800 #define MAX_APIC_VECTOR 256 +#define APIC_VECTORS_PER_REG 32 #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) @@ -208,25 +209,30 @@ static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = { static int find_highest_vector(void *bitmap) { - u32 *word = bitmap; - int word_offset = MAX_APIC_VECTOR >> 5; + int vec; + u32 *reg; - while ((word_offset != 0) && (word[(--word_offset) << 2] == 0)) - continue; + for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; + vec >= 0; vec -= APIC_VECTORS_PER_REG) { + reg = bitmap + REG_POS(vec); + if (*reg) + return fls(*reg) - 1 + vec; + } - if (likely(!word_offset && !word[0])) - return -1; - else - return fls(word[word_offset << 2]) - 1 + (word_offset << 5); + return -1; } static u8 count_vectors(void *bitmap) { - u32 *word = bitmap; - int word_offset; + int vec; + u32 *reg; u8 count = 0; - for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset) - count += hweight32(word[word_offset << 2]); + + for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { + reg = bitmap + REG_POS(vec); + count += hweight32(*reg); + } + return count; } -- cgit v1.2.3-70-g09d2 From 35534b201c9f115c68962c095b5a9aad204d025f Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 29 Aug 2012 15:01:22 +0200 Subject: perf/x86: Export Sandy Bridge uncore clockticks event in sysfs This patch exports the clockticks event and its encoding to user level. The clockticks event was exported for Nehalem/Westmere but not for Sandy Bridge (client). Given that it uses a special encoding, it needs to be exported to user tools, so users can do: # perf stat -a -C 0 -e uncore_cbox_0/clockticks/ sleep 1 Signed-off-by: Stephane Eranian Acked-by: Yan, Zheng Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120829130122.GA32336@quad Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 0a5571080e7..38e4894165b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -661,6 +661,11 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box) } } +static struct uncore_event_desc snb_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + { /* end: all zeroes */ }, +}; + static struct attribute *snb_uncore_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -704,6 +709,7 @@ static struct intel_uncore_type snb_uncore_cbox = { .constraints = snb_uncore_cbox_constraints, .ops = &snb_uncore_msr_ops, .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, }; static struct intel_uncore_type *snb_msr_uncores[] = { -- cgit v1.2.3-70-g09d2 From bad9ac2d7f878a31cf1ae8c1ee3768077d222bcb Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 25 Jul 2012 19:12:45 +0200 Subject: perf/x86/ibs: Check syscall attribute flags Current implementation simply ignores attribute flags. Thus, there is no notification to userland of unsupported features. Check syscall's attribute flags to let userland know if a feature is supported by the kernel. This is also needed to distinguish between future kernels what might support a feature. Cc: v3.5.. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120910093018.GO8285@erda.amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_amd_ibs.c | 12 ++++++++++++ include/linux/perf_event.h | 2 ++ 2 files changed, 14 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 7bfb5bec863..eebd5ffe1bb 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -209,6 +209,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config) return -EOPNOTSUPP; } +static const struct perf_event_attr ibs_notsupp = { + .exclude_user = 1, + .exclude_kernel = 1, + .exclude_hv = 1, + .exclude_idle = 1, + .exclude_host = 1, + .exclude_guest = 1, +}; + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -229,6 +238,9 @@ static int perf_ibs_init(struct perf_event *event) if (event->pmu != &perf_ibs->pmu) return -ENOENT; + if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp)) + return -EINVAL; + if (config & ~perf_ibs->config_mask) return -EINVAL; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 33ed9d605f9..bdb41612bfe 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -274,6 +274,8 @@ struct perf_event_attr { __u64 branch_sample_type; /* enum branch_sample_type */ }; +#define perf_flags(attr) (*(&(attr)->read_format + 1)) + /* * Ioctls that can be done on a perf event fd: */ -- cgit v1.2.3-70-g09d2 From 6eebdda35e6b18d0dddb2a44e34211bd94f0cad6 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 24 Aug 2012 23:58:47 +0400 Subject: x86: Drop unnecessary kernel_eflags variable on 64-bit On 64 bit x86 we save the current eflags in cpu_init for use in ret_from_fork. Strictly speaking reserved bits in EFLAGS should be read as written but in practise it is unlikely that EFLAGS could ever be extended in this way and the kernel alread clears any undefined flags early on. The equivalent 32 bit code simply hard codes 0x0202 as the new EFLAGS. This change makes 64 bit use the same mechanism to setup the initial EFLAGS on fork. Note that 64 bit resets EFLAGS before calling schedule_tail() as opposed to 32 bit which calls schedule_tail() first. Therefore the correct value for EFLAGS has opposite IF bit. Signed-off-by: Ian Campbell Signed-off-by: Cyrill Gorcunov Acked-by: Andi Kleen Acked-by: "H. Peter Anvin" Cc: Brian Gerst Cc: Peter Zijlstra Cc: Pekka Enberg Cc: Andi Kleen Link: http://lkml.kernel.org/r/20120824195847.GA31628@moon Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 - arch/x86/kernel/cpu/common.c | 4 ---- arch/x86/kernel/entry_64.S | 2 +- 3 files changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d048cad9bca..9738b39e4eb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -423,7 +423,6 @@ DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); -extern unsigned long kernel_eflags; extern asmlinkage void ignore_sysret(void); #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c5fcc..9961e2e2370 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1116,8 +1116,6 @@ void syscall_init(void) X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); } -unsigned long kernel_eflags; - /* * Copies of the original ist values from the tss are only accessed during * debugging, no special alignment required. @@ -1299,8 +1297,6 @@ void __cpuinit cpu_init(void) fpu_init(); xsave_init(); - raw_local_save_flags(kernel_eflags); - if (is_uv_system()) uv_cpu_init(); } diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 69babd8c834..b1dac12dc5e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -440,7 +440,7 @@ ENTRY(ret_from_fork) LOCK ; btr $TIF_FORK,TI_flags(%r8) - pushq_cfi kernel_eflags(%rip) + pushq_cfi $0x0002 popfq_cfi # reset kernel eflags call schedule_tail # rdi: 'prev' task parameter -- cgit v1.2.3-70-g09d2 From 73e8f3d7e2cb23614d5115703d76d8e54764b641 Mon Sep 17 00:00:00 2001 From: T Makphaibulchoke Date: Tue, 28 Aug 2012 21:21:43 -0600 Subject: x86/mm/init.c: Fix devmem_is_allowed() off by one Fixing an off-by-one error in devmem_is_allowed(), which allows accesses to physical addresses 0x100000-0x100fff, an extra page past 1MB. Signed-off-by: T Makphaibulchoke Acked-by: H. Peter Anvin Cc: yinghai@kernel.org Cc: tiwai@suse.de Cc: dhowells@redhat.com Link: http://lkml.kernel.org/r/1346210503-14276-1-git-send-email-tmac@hp.com Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index e0e6990723e..ab1f6a93b52 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -319,7 +319,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, */ int devmem_is_allowed(unsigned long pagenr) { - if (pagenr <= 256) + if (pagenr < 256) return 1; if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) return 0; -- cgit v1.2.3-70-g09d2 From 1edfbb4153bd29bcf8d2236676238d5237972be1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 10 Sep 2012 12:04:16 +0100 Subject: x86/64: Adjust types of temporaries used by ffs()/fls()/fls64() The 64-bit special cases of the former two (the thrird one is 64-bit only anyway) don't need to use "long" temporaries, as the result will always fit in a 32-bit variable, and the functions return plain "int". This avoids a few REX prefixes, i.e. minimally reduces code size. Signed-off-by: Jan Beulich Cc: Linus Torvalds Link: http://lkml.kernel.org/r/504DE550020000780009A258@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bitops.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 72f5009deb5..ebaee695394 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -417,10 +417,9 @@ static inline int ffs(int x) * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ - long tmp = -1; asm("bsfl %1,%0" : "=r" (r) - : "rm" (x), "0" (tmp)); + : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsfl %1,%0\n\t" "cmovzl %2,%0" @@ -459,10 +458,9 @@ static inline int fls(int x) * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ - long tmp = -1; asm("bsrl %1,%0" : "=r" (r) - : "rm" (x), "0" (tmp)); + : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsrl %1,%0\n\t" "cmovzl %2,%0" @@ -490,13 +488,13 @@ static inline int fls(int x) #ifdef CONFIG_X86_64 static __always_inline int fls64(__u64 x) { - long bitpos = -1; + int bitpos = -1; /* * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before. */ - asm("bsrq %1,%0" + asm("bsrq %1,%q0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; -- cgit v1.2.3-70-g09d2 From 5870661c091e827973674cc3469b50c959008c2b Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 10 Sep 2012 12:24:43 +0100 Subject: x86: Prefer TZCNT over BFS Following a relatively recent compiler change, make use of the fact that for non-zero input BSF and TZCNT produce the same result, and that CPUs not knowing of TZCNT will treat the instruction as BSF (i.e. ignore what looks like a REP prefix to them). The assumption here is that TZCNT would never have worse performance than BSF. For the moment, only do this when the respective generic-CPU option is selected (as there are no specific-CPU options covering the CPUs supporting TZCNT), and don't do that when size optimization was requested. Signed-off-by: Jan Beulich Cc: Linus Torvalds Link: http://lkml.kernel.org/r/504DEA1B020000780009A277@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bitops.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index ebaee695394..b2af6645ea7 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -347,6 +347,19 @@ static int test_bit(int nr, const volatile unsigned long *addr); ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +#if (defined(CONFIG_X86_GENERIC) || defined(CONFIG_GENERIC_CPU)) \ + && !defined(CONFIG_CC_OPTIMIZE_FOR_SIZE) +/* + * Since BSF and TZCNT have sufficiently similar semantics for the purposes + * for which we use them here, BMI-capable hardware will decode the prefixed + * variant as 'tzcnt ...' and may execute that faster than 'bsf ...', while + * older hardware will ignore the REP prefix and decode it as 'bsf ...'. + */ +# define BSF_PREFIX "rep;" +#else +# define BSF_PREFIX +#endif + /** * __ffs - find first set bit in word * @word: The word to search @@ -355,7 +368,7 @@ static int test_bit(int nr, const volatile unsigned long *addr); */ static inline unsigned long __ffs(unsigned long word) { - asm("bsf %1,%0" + asm(BSF_PREFIX "bsf %1,%0" : "=r" (word) : "rm" (word)); return word; @@ -369,12 +382,14 @@ static inline unsigned long __ffs(unsigned long word) */ static inline unsigned long ffz(unsigned long word) { - asm("bsf %1,%0" + asm(BSF_PREFIX "bsf %1,%0" : "=r" (word) : "r" (~word)); return word; } +#undef BSF_PREFIX + /* * __fls: find last set bit in word * @word: The word to search -- cgit v1.2.3-70-g09d2 From 3120e25efdc0834c88e1c0f8394e2087444f8c19 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 10 Sep 2012 12:41:45 +0100 Subject: x86/Kconfig: Clean up Kconfig defaults The main goal here is to have the resulting .config no carry any options that aren't enabled and can't be (i.e such where the default is "no" and can't be changed), so that if any such option later gets a user visible prompt, the user will actually be prompted on a "make ...oldconfig" rather than keeping the previously invisible option disabled. There's a little bit of other trivial cleanup mixed in here. Signed-off-by: Jan Beulich Cc: Linus Torvalds Link: http://lkml.kernel.org/r/504DEE19020000780009A285@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 50 ++++++++++++++++++++++++++++++-------------------- arch/x86/Kconfig.cpu | 5 +++-- 2 files changed, 33 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ec3a1aa4ab..3fb871908ab 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -7,11 +7,13 @@ config 64BIT Say no to build a 32-bit kernel - formerly known as i386 config X86_32 - def_bool !64BIT + def_bool y + depends on !64BIT select CLKSRC_I8253 config X86_64 - def_bool 64BIT + def_bool y + depends on 64BIT select X86_DEV_DMA_OPS ### Arch settings @@ -99,7 +101,8 @@ config X86 select GENERIC_STRNLEN_USER config INSTRUCTION_DECODER - def_bool (KPROBES || PERF_EVENTS || UPROBES) + def_bool y + depends on KPROBES || PERF_EVENTS || UPROBES config OUTPUT_FORMAT string @@ -127,13 +130,15 @@ config SBUS bool config NEED_DMA_MAP_STATE - def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG) + def_bool y + depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG config NEED_SG_DMA_LENGTH def_bool y config GENERIC_ISA_DMA - def_bool ISA_DMA_API + def_bool y + depends on ISA_DMA_API config GENERIC_BUG def_bool y @@ -150,13 +155,16 @@ config GENERIC_GPIO bool config ARCH_MAY_HAVE_PC_FDC - def_bool ISA_DMA_API + def_bool y + depends on ISA_DMA_API config RWSEM_GENERIC_SPINLOCK - def_bool !X86_XADD + def_bool y + depends on !X86_XADD config RWSEM_XCHGADD_ALGORITHM - def_bool X86_XADD + def_bool y + depends on X86_XADD config GENERIC_CALIBRATE_DELAY def_bool y @@ -752,7 +760,8 @@ config SWIOTLB 3 GB of memory. If unsure, say Y. config IOMMU_HELPER - def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) + def_bool y + depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" @@ -1159,10 +1168,12 @@ config X86_PAE consumes more pagetable space per process. config ARCH_PHYS_ADDR_T_64BIT - def_bool X86_64 || X86_PAE + def_bool y + depends on X86_64 || X86_PAE config ARCH_DMA_ADDR_T_64BIT - def_bool X86_64 || HIGHMEM64G + def_bool y + depends on X86_64 || HIGHMEM64G config DIRECT_GBPAGES bool "Enable 1GB pages for kernel pagetables" if EXPERT @@ -1285,8 +1296,8 @@ config ARCH_SELECT_MEMORY_MODEL depends on ARCH_SPARSEMEM_ENABLE config ARCH_MEMORY_PROBE - def_bool X86_64 - depends on MEMORY_HOTPLUG + def_bool y + depends on X86_64 && MEMORY_HOTPLUG config ARCH_PROC_KCORE_TEXT def_bool y @@ -1975,7 +1986,6 @@ config PCI_MMCONFIG config PCI_CNB20LE_QUIRK bool "Read CNB20LE Host Bridge Windows" if EXPERT - default n depends on PCI && EXPERIMENTAL help Read the PCI windows out of the CNB20LE host bridge. This allows @@ -2186,18 +2196,18 @@ config COMPAT depends on IA32_EMULATION || X86_X32 select ARCH_WANT_OLD_COMPAT_IPC +if COMPAT config COMPAT_FOR_U64_ALIGNMENT - def_bool COMPAT - depends on X86_64 + def_bool y config SYSVIPC_COMPAT def_bool y - depends on COMPAT && SYSVIPC + depends on SYSVIPC config KEYS_COMPAT - bool - depends on COMPAT && KEYS - default y + def_bool y + depends on KEYS +endif endmenu diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 706e12e9984..f3b86d0df44 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -306,7 +306,8 @@ config X86_INTERNODE_CACHE_SHIFT default X86_L1_CACHE_SHIFT config X86_CMPXCHG - def_bool X86_64 || (X86_32 && !M386) + def_bool y + depends on X86_64 || (X86_32 && !M386) config X86_L1_CACHE_SHIFT int @@ -317,7 +318,7 @@ config X86_L1_CACHE_SHIFT config X86_XADD def_bool y - depends on X86_64 || !M386 + depends on !M386 config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" -- cgit v1.2.3-70-g09d2 From a5e37863ab31d78faddff15675c89979792bc0bd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Sep 2012 23:31:00 +0900 Subject: ftrace/x86: Adjust x86 regs.ip as like as x86-64 Adjust x86 regs.ip to ip + MCOUNT_INSN_SIZE as like as on x86-64. This helps us to consolidate codes which use regs->ip on both of x86/x86-64. Link: http://lkml.kernel.org/r/20120905143100.10329.60109.stgit@localhost.localdomain Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_32.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 061ac17ee97..f438a44bf8f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1148,7 +1148,6 @@ ENTRY(ftrace_regs_caller) * ip location, and move flags into the return ip location. */ pushl 4(%esp) /* save return ip into ip slot */ - subl $MCOUNT_INSN_SIZE, (%esp) /* Adjust ip */ pushl $0 /* Load 0 into orig_ax */ pushl %gs @@ -1169,6 +1168,7 @@ ENTRY(ftrace_regs_caller) movl $__KERNEL_CS,13*4(%esp) movl 12*4(%esp), %eax /* Load ip (1st parameter) */ + subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ pushl %esp /* Save pt_regs as 4th parameter */ @@ -1180,7 +1180,6 @@ GLOBAL(ftrace_regs_call) movl 14*4(%esp), %eax /* Move flags back into cs */ movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ movl 12*4(%esp), %eax /* Get return ip from regs->ip */ - addl $MCOUNT_INSN_SIZE, %eax movl %eax, 14*4(%esp) /* Put return ip back for ret */ popl %ebx -- cgit v1.2.3-70-g09d2 From 4b036d54bf849a75d0103b33d92a53f89ecb9315 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Sep 2012 23:31:12 +0900 Subject: kprobes/x86: Fix kprobes to collectly handle IP on ftrace Current kprobe_ftrace_handler expects regs->ip == ip, but it is incorrect (originally on x86-64). Actually, ftrace handler sets regs->ip = ip + MCOUNT_INSN_SIZE. kprobe_ftrace_handler must take care for that. Link: http://lkml.kernel.org/r/20120905143112.10329.72069.stgit@localhost.localdomain Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/kernel/kprobes.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 47ae1023a93..f49f60cca40 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1072,7 +1072,8 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (kprobe_running()) { kprobes_inc_nmissed_count(p); } else { - regs->ip += sizeof(kprobe_opcode_t); + /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ + regs->ip = ip + sizeof(kprobe_opcode_t); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -1080,13 +1081,15 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, p->pre_handler(p, regs); if (unlikely(p->post_handler)) { - /* Emulate singlestep as if there is a 5byte nop */ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ regs->ip = ip + MCOUNT_INSN_SIZE; kcb->kprobe_status = KPROBE_HIT_SSDONE; p->post_handler(p, regs, 0); } __this_cpu_write(current_kprobe, NULL); - regs->ip = ip; /* Recover for next callback */ } end: local_irq_restore(flags); -- cgit v1.2.3-70-g09d2 From 47d5a5f88b9d25d6464c9b60c28f391e84e3ed65 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 Sep 2012 23:31:18 +0900 Subject: ftrace/x86-64: Allow to change RIP in handlers Allow ftrace handlers to change RIP register (regs->ip) in handlers. This will allow handlers to call another function instead of original function. Link: http://lkml.kernel.org/r/20120905143118.10329.5078.stgit@localhost.localdomain Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/kernel/entry_64.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index ed767b747fe..e9cc2b32bdf 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -165,6 +165,10 @@ GLOBAL(ftrace_regs_call) movq EFLAGS(%rsp), %rax movq %rax, SS(%rsp) + /* Handlers can change the RIP */ + movq RIP(%rsp), %rax + movq %rax, SS+8(%rsp) + /* restore the rest of pt_regs */ movq R15(%rsp), %r15 movq R14(%rsp), %r14 -- cgit v1.2.3-70-g09d2 From c6aaf4d0bb86e2154ea31a33804cec300611255f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Sep 2012 23:31:25 +0900 Subject: kprobes/x86: Fix to support jprobes on ftrace-based kprobe Fix kprobes/x86 to support jprobes on ftrace-based kprobes. Because of -mfentry support of ftrace, ftrace is now put on the beginning of function where jprobes are put. Originally ftrace-based kprobes doesn't support jprobe because it will change regs->ip and ftrace doesn't support changing IP and ftrace itself doesn't conflict jprobe. However, ftrace -mfentry support moves mcount call on the top of functions where jprobes are put. This means that jprobe always conflicts with ftrace-based kprobe and fails. This patch allows ftrace-based kprobes to support jprobes by allowing to modify regs->ip and kprobes breakpoint handler also allows to skip singlestepping because there is a ftrace call (not an original instruction). Link: http://lkml.kernel.org/r/20120905143125.10329.90836.stgit@localhost.localdomain Reported-by: Fengguang Wu Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/kernel/kprobes.c | 42 +++++++++++++++++++++++++++++------------- kernel/kprobes.c | 3 --- 2 files changed, 29 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index f49f60cca40..b7c2a85d192 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -541,6 +541,8 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb); /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. @@ -599,6 +601,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } else if (kprobe_running()) { p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { +#ifdef KPROBES_CAN_USE_FTRACE + if (kprobe_ftrace(p)) { + skip_singlestep(p, regs, kcb); + return 1; + } +#endif setup_singlestep(p, regs, kcb, 0); return 1; } @@ -1053,6 +1061,21 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } #ifdef KPROBES_CAN_USE_FTRACE +static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); +} + /* Ftrace callback handler for kprobes */ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) @@ -1077,19 +1100,12 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (p->pre_handler) - p->pre_handler(p, regs); - - if (unlikely(p->post_handler)) { - /* - * Emulate singlestep (and also recover regs->ip) - * as if there is a 5byte nop - */ - regs->ip = ip + MCOUNT_INSN_SIZE; - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - __this_cpu_write(current_kprobe, NULL); + if (!p->pre_handler || !p->pre_handler(p, regs)) + skip_singlestep(p, regs, kcb); + /* + * If pre_handler returns !0, it sets regs->ip and + * resets current kprobe. + */ } end: local_irq_restore(flags); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 35b4315d84f..098f396aa40 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1418,9 +1418,6 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p, /* Given address is not on the instruction boundary */ if ((unsigned long)p->addr != ftrace_addr) return -EILSEQ; - /* break_handler (jprobe) can not work with ftrace */ - if (p->break_handler) - return -EINVAL; p->flags |= KPROBE_FLAG_FTRACE; #else /* !KPROBES_CAN_USE_FTRACE */ return -EINVAL; -- cgit v1.2.3-70-g09d2 From bdc1e47217315be14ba04881b0a4c8ecb3ff320c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 20 Aug 2012 12:47:34 +0200 Subject: uprobes/x86: Implement x86 specific arch_uprobe_*_step The arch specific implementation behaves like user_enable_single_step() except that it does not disable single stepping if it was already enabled by ptrace. This allows the debugger to single step over an uprobe. The state of block stepping is not restored. It makes only sense together with TF and if that was enabled then the debugger is notified. Note: this is still not correct. For example, TIF_SINGLESTEP check is not right, the application itself can set X86_EFLAGS_TF. And otoh we leak TIF_SINGLESTEP (set by enable) if the probed insn is "popf". See the next patches, we need the changes in arch/x86/kernel/step.c first. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/include/asm/uprobes.h | 2 ++ arch/x86/kernel/uprobes.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index f3971bbcd1d..cee58624cb3 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -46,6 +46,8 @@ struct arch_uprobe_task { #ifdef CONFIG_X86_64 unsigned long saved_scratch_register; #endif +#define UPROBE_CLEAR_TF (1 << 0) + unsigned int restore_flags; }; extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 36fd42091fa..309a0e02b12 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -41,6 +41,9 @@ /* Adjust the return address of a call insn */ #define UPROBE_FIX_CALL 0x2 +/* Instruction will modify TF, don't change it */ +#define UPROBE_FIX_SETF 0x4 + #define UPROBE_FIX_RIP_AX 0x8000 #define UPROBE_FIX_RIP_CX 0x4000 @@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) insn_get_opcode(insn); /* should be a nop */ switch (OPCODE1(insn)) { + case 0x9d: + /* popf */ + auprobe->fixups |= UPROBE_FIX_SETF; + break; case 0xc3: /* ret/lret */ case 0xcb: case 0xc2: @@ -673,3 +680,29 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) } return false; } + +void arch_uprobe_enable_step(struct arch_uprobe *auprobe) +{ + struct uprobe_task *utask = current->utask; + struct arch_uprobe_task *autask = &utask->autask; + + autask->restore_flags = 0; + if (!test_tsk_thread_flag(current, TIF_SINGLESTEP) && + !(auprobe->fixups & UPROBE_FIX_SETF)) + autask->restore_flags |= UPROBE_CLEAR_TF; + /* + * The state of TIF_BLOCKSTEP is not saved. With the TF flag set we + * would to examine the opcode and the flags to make it right. Without + * TF block stepping makes no sense. + */ + user_enable_single_step(current); +} + +void arch_uprobe_disable_step(struct arch_uprobe *auprobe) +{ + struct uprobe_task *utask = current->utask; + struct arch_uprobe_task *autask = &utask->autask; + + if (autask->restore_flags & UPROBE_CLEAR_TF) + user_disable_single_step(current); +} -- cgit v1.2.3-70-g09d2 From 848e8f5f0ad3169560c516fff6471be65f76e69f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 3 Aug 2012 17:31:46 +0200 Subject: ptrace/x86: Introduce set_task_blockstep() helper No functional changes, preparation for the next fix and for uprobes single-step fixes. Move the code playing with TIF_BLOCKSTEP/DEBUGCTLMSR_BTF into the new helper, set_task_blockstep(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/step.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c346d116148..7a514986ca0 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -157,6 +157,21 @@ static int enable_single_step(struct task_struct *child) return 1; } +static void set_task_blockstep(struct task_struct *task, bool on) +{ + unsigned long debugctl; + + debugctl = get_debugctlmsr(); + if (on) { + debugctl |= DEBUGCTLMSR_BTF; + set_tsk_thread_flag(task, TIF_BLOCKSTEP); + } else { + debugctl &= ~DEBUGCTLMSR_BTF; + clear_tsk_thread_flag(task, TIF_BLOCKSTEP); + } + update_debugctlmsr(debugctl); +} + /* * Enable single or block step. */ @@ -169,19 +184,10 @@ static void enable_step(struct task_struct *child, bool block) * So no one should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ - if (enable_single_step(child) && block) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - set_tsk_thread_flag(child, TIF_BLOCKSTEP); - } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (enable_single_step(child) && block) + set_task_blockstep(child, true); + else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); } void user_enable_single_step(struct task_struct *child) @@ -199,13 +205,8 @@ void user_disable_single_step(struct task_struct *child) /* * Make sure block stepping (BTF) is disabled. */ - if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); -- cgit v1.2.3-70-g09d2 From 95cf00fa5d5e2a200a2c044c84bde8389a237e02 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 11 Aug 2012 18:06:42 +0200 Subject: ptrace/x86: Partly fix set_task_blockstep()->update_debugctlmsr() logic Afaics the usage of update_debugctlmsr() and TIF_BLOCKSTEP in step.c was always very wrong. 1. update_debugctlmsr() was simply unneeded. The child sleeps TASK_TRACED, __switch_to_xtra(next_p => child) should notice TIF_BLOCKSTEP and set/clear DEBUGCTLMSR_BTF after resume if needed. 2. It is wrong. The state of DEBUGCTLMSR_BTF bit in CPU register should always match the state of current's TIF_BLOCKSTEP bit. 3. Even get_debugctlmsr() + update_debugctlmsr() itself does not look right. Irq can change other bits in MSR_IA32_DEBUGCTLMSR register or the caller can be preempted in between. 4. It is not safe to play with TIF_BLOCKSTEP if task != current. DEBUGCTLMSR_BTF and TIF_BLOCKSTEP should always match each other if the task is running. The tracee is stopped but it can be SIGKILL'ed right before set/clear_tsk_thread_flag(). However, now that uprobes uses user_enable_single_step(current) we can't simply remove update_debugctlmsr(). So this patch adds the additional "task == current" check and disables irqs to avoid the race with interrupts/preemption. Unfortunately this patch doesn't solve the last problem, we need another fix. Probably we should teach ptrace_stop() to set/clear single/block stepping after resume. And afaics there is yet another problem: perf can play with MSR_IA32_DEBUGCTLMSR from nmi, this obviously means that even __switch_to_xtra() has problems. Signed-off-by: Oleg Nesterov --- arch/x86/kernel/step.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 7a514986ca0..f89cdc6ccd5 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -161,6 +161,16 @@ static void set_task_blockstep(struct task_struct *task, bool on) { unsigned long debugctl; + /* + * Ensure irq/preemption can't change debugctl in between. + * Note also that both TIF_BLOCKSTEP and debugctl should + * be changed atomically wrt preemption. + * FIXME: this means that set/clear TIF_BLOCKSTEP is simply + * wrong if task != current, SIGKILL can wakeup the stopped + * tracee and set/clear can play with the running task, this + * can confuse the next __switch_to_xtra(). + */ + local_irq_disable(); debugctl = get_debugctlmsr(); if (on) { debugctl |= DEBUGCTLMSR_BTF; @@ -169,7 +179,9 @@ static void set_task_blockstep(struct task_struct *task, bool on) debugctl &= ~DEBUGCTLMSR_BTF; clear_tsk_thread_flag(task, TIF_BLOCKSTEP); } - update_debugctlmsr(debugctl); + if (task == current) + update_debugctlmsr(debugctl); + local_irq_enable(); } /* -- cgit v1.2.3-70-g09d2 From 9bd1190a11c9d2c59d35cb999b8d170ad52aab5f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 3 Sep 2012 15:24:17 +0200 Subject: uprobes/x86: Do not (ab)use TIF_SINGLESTEP/user_*_single_step() for single-stepping user_enable/disable_single_step() was designed for ptrace, it assumes a single user and does unnecessary and wrong things for uprobes. For example: - arch_uprobe_enable_step() can't trust TIF_SINGLESTEP, an application itself can set X86_EFLAGS_TF which must be preserved after arch_uprobe_disable_step(). - we do not want to set TIF_SINGLESTEP/TIF_FORCED_TF in arch_uprobe_enable_step(), this only makes sense for ptrace. - otoh we leak TIF_SINGLESTEP if arch_uprobe_disable_step() doesn't do user_disable_single_step(), the application will be killed after the next syscall. - arch_uprobe_enable_step() does access_process_vm() we do not need/want. Change arch_uprobe_enable/disable_step() to set/clear X86_EFLAGS_TF directly, this is much simpler and more correct. However, we need to clear TIF_BLOCKSTEP/DEBUGCTLMSR_BTF before executing the probed insn, add set_task_blockstep(false). Note: with or without this patch, there is another (hopefully minor) problem. A probed "pushf" insn can see the wrong X86_EFLAGS_TF set by uprobes. Perhaps we should change _disable to update the stack, or teach arch_uprobe_skip_sstep() to emulate this insn. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/include/asm/processor.h | 2 ++ arch/x86/kernel/step.c | 2 +- arch/x86/kernel/uprobes.c | 32 ++++++++++++++++++-------------- 3 files changed, 21 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d048cad9bca..433d2e5c98a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -759,6 +759,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } +extern void set_task_blockstep(struct task_struct *task, bool on); + /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index f89cdc6ccd5..cd3b2438a98 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -157,7 +157,7 @@ static int enable_single_step(struct task_struct *child) return 1; } -static void set_task_blockstep(struct task_struct *task, bool on) +void set_task_blockstep(struct task_struct *task, bool on) { unsigned long debugctl; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 309a0e02b12..3b4aae68efe 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -683,26 +683,30 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) void arch_uprobe_enable_step(struct arch_uprobe *auprobe) { - struct uprobe_task *utask = current->utask; - struct arch_uprobe_task *autask = &utask->autask; + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + struct pt_regs *regs = task_pt_regs(task); autask->restore_flags = 0; - if (!test_tsk_thread_flag(current, TIF_SINGLESTEP) && - !(auprobe->fixups & UPROBE_FIX_SETF)) + if (!(regs->flags & X86_EFLAGS_TF) && + !(auprobe->fixups & UPROBE_FIX_SETF)) autask->restore_flags |= UPROBE_CLEAR_TF; - /* - * The state of TIF_BLOCKSTEP is not saved. With the TF flag set we - * would to examine the opcode and the flags to make it right. Without - * TF block stepping makes no sense. - */ - user_enable_single_step(current); + + regs->flags |= X86_EFLAGS_TF; + if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) + set_task_blockstep(task, false); } void arch_uprobe_disable_step(struct arch_uprobe *auprobe) { - struct uprobe_task *utask = current->utask; - struct arch_uprobe_task *autask = &utask->autask; - + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + struct pt_regs *regs = task_pt_regs(task); + /* + * The state of TIF_BLOCKSTEP was not saved so we can get an extra + * SIGTRAP if we do not clear TF. We need to examine the opcode to + * make it right. + */ if (autask->restore_flags & UPROBE_CLEAR_TF) - user_disable_single_step(current); + regs->flags &= ~X86_EFLAGS_TF; } -- cgit v1.2.3-70-g09d2 From 3a4664aa8362d9fa9110828f55afa9f9fcd7e484 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 3 Sep 2012 16:05:10 +0200 Subject: uprobes/x86: Xol should send SIGTRAP if X86_EFLAGS_TF was set arch_uprobe_disable_step() correctly preserves X86_EFLAGS_TF and returns to user-mode. But this means the application gets SIGTRAP only after the next insn. This means that UPROBE_CLEAR_TF logic is not really right. _enable should only record the state of X86_EFLAGS_TF, and _disable should check it separately from UPROBE_FIX_SETF. Remove arch_uprobe_task->restore_flags, add ->saved_tf instead, and change enable/disable accordingly. This assumes that the probed insn was not trapped, see the next patch. arch_uprobe_skip_sstep() logic has the same problem, change it to check X86_EFLAGS_TF and send SIGTRAP as well. We will cleanup this all after we fold enable/disable_step into pre/post_hol hooks. Note: send_sig(SIGTRAP) is not actually right, we need send_sigtrap(). But this needs more changes, handle_swbp() does the same and this is equally wrong. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/include/asm/uprobes.h | 3 +-- arch/x86/kernel/uprobes.c | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index cee58624cb3..d561ff5a3d4 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -46,8 +46,7 @@ struct arch_uprobe_task { #ifdef CONFIG_X86_64 unsigned long saved_scratch_register; #endif -#define UPROBE_CLEAR_TF (1 << 0) - unsigned int restore_flags; + unsigned int saved_tf; }; extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 3b4aae68efe..7e993d1f199 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -653,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * Skip these instructions as per the currently known x86 ISA. * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } */ -bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { int i; @@ -681,16 +681,21 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) return false; } +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + bool ret = __skip_sstep(auprobe, regs); + if (ret && (regs->flags & X86_EFLAGS_TF)) + send_sig(SIGTRAP, current, 0); + return ret; +} + void arch_uprobe_enable_step(struct arch_uprobe *auprobe) { struct task_struct *task = current; struct arch_uprobe_task *autask = &task->utask->autask; struct pt_regs *regs = task_pt_regs(task); - autask->restore_flags = 0; - if (!(regs->flags & X86_EFLAGS_TF) && - !(auprobe->fixups & UPROBE_FIX_SETF)) - autask->restore_flags |= UPROBE_CLEAR_TF; + autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); regs->flags |= X86_EFLAGS_TF; if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) @@ -707,6 +712,8 @@ void arch_uprobe_disable_step(struct arch_uprobe *auprobe) * SIGTRAP if we do not clear TF. We need to examine the opcode to * make it right. */ - if (autask->restore_flags & UPROBE_CLEAR_TF) + if (autask->saved_tf) + send_sig(SIGTRAP, task, 0); + else if (!(auprobe->fixups & UPROBE_FIX_SETF)) regs->flags &= ~X86_EFLAGS_TF; } -- cgit v1.2.3-70-g09d2 From d6a00b35e411519d774d978cdf80e4406d01b36b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 8 Sep 2012 18:38:15 +0200 Subject: uprobes/x86: Fix arch_uprobe_disable_step() && UTASK_SSTEP_TRAPPED interaction arch_uprobe_disable_step() should also take UTASK_SSTEP_TRAPPED into account. In this case the probed insn was not executed, we need to clear X86_EFLAGS_TF if it was set by us and that is all. Again, this code will look more clean when we move it into arch_uprobe_post_xol() and arch_uprobe_abort_xol(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 7e993d1f199..9538f00827a 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -706,14 +706,20 @@ void arch_uprobe_disable_step(struct arch_uprobe *auprobe) { struct task_struct *task = current; struct arch_uprobe_task *autask = &task->utask->autask; + bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED); struct pt_regs *regs = task_pt_regs(task); /* * The state of TIF_BLOCKSTEP was not saved so we can get an extra * SIGTRAP if we do not clear TF. We need to examine the opcode to * make it right. */ - if (autask->saved_tf) - send_sig(SIGTRAP, task, 0); - else if (!(auprobe->fixups & UPROBE_FIX_SETF)) - regs->flags &= ~X86_EFLAGS_TF; + if (unlikely(trapped)) { + if (!autask->saved_tf) + regs->flags &= ~X86_EFLAGS_TF; + } else { + if (autask->saved_tf) + send_sig(SIGTRAP, task, 0); + else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + regs->flags &= ~X86_EFLAGS_TF; + } } -- cgit v1.2.3-70-g09d2 From baedbf02b1912225d60dd7403acb4b4e003088b5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 3 Sep 2012 17:02:16 +0200 Subject: uprobes: Make arch_uprobe_task->saved_trap_nr "unsigned int" Make arch_uprobe_task->saved_trap_nr "unsigned int" and move it down after ->saved_scratch_register, this changes sizeof() from 24 to 16. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/include/asm/uprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index d561ff5a3d4..8ff8be7835a 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -42,10 +42,10 @@ struct arch_uprobe { }; struct arch_uprobe_task { - unsigned long saved_trap_nr; #ifdef CONFIG_X86_64 unsigned long saved_scratch_register; #endif + unsigned int saved_trap_nr; unsigned int saved_tf; }; -- cgit v1.2.3-70-g09d2 From 38cb5ef4473c6f510fae3a00bdac3acd550e3796 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 26 Jul 2012 18:00:27 -0400 Subject: X86: Improve GOP detection in the EFI boot stub We currently use the PCI IO protocol as a proxy for a functional GOP. This is less than ideal, since some platforms will put the GOP on output devices rather than the GPU itself. Move to using the conout protocol. This is not guaranteed per-spec, but is part of the consplitter implementation that causes this problem in the first place and so should be reliable. Signed-off-by: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 29 ++++++++++++++++------------- arch/x86/boot/compressed/eboot.h | 4 ++++ 2 files changed, 20 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index b3e0227df2c..d5e4044505d 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -276,8 +276,9 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, nr_gops = size / sizeof(void *); for (i = 0; i < nr_gops; i++) { struct efi_graphics_output_mode_info *info; - efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; - void *pciio; + efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; + bool conout_found = false; + void *dummy; void *h = gop_handle[i]; status = efi_call_phys3(sys_table->boottime->handle_protocol, @@ -285,19 +286,21 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, if (status != EFI_SUCCESS) continue; - efi_call_phys3(sys_table->boottime->handle_protocol, - h, &pciio_proto, &pciio); + status = efi_call_phys3(sys_table->boottime->handle_protocol, + h, &conout_proto, &dummy); + + if (status == EFI_SUCCESS) + conout_found = true; status = efi_call_phys4(gop->query_mode, gop, gop->mode->mode, &size, &info); - if (status == EFI_SUCCESS && (!first_gop || pciio)) { + if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* - * Apple provide GOPs that are not backed by - * real hardware (they're used to handle - * multiple displays). The workaround is to - * search for a GOP implementing the PCIIO - * protocol, and if one isn't found, to just - * fallback to the first GOP. + * Systems that use the UEFI Console Splitter may + * provide multiple GOP devices, not all of which are + * backed by real hardware. The workaround is to search + * for a GOP implementing the ConOut protocol, and if + * one isn't found, to just fall back to the first GOP. */ width = info->horizontal_resolution; height = info->vertical_resolution; @@ -308,10 +311,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, pixels_per_scan_line = info->pixels_per_scan_line; /* - * Once we've found a GOP supporting PCIIO, + * Once we've found a GOP supporting ConOut, * don't bother looking any further. */ - if (pciio) + if (conout_found) break; first_gop = gop; diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 3b6e15627c5..e5b0a8f91c5 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -14,6 +14,10 @@ #define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT) #define EFI_READ_CHUNK_SIZE (1024 * 1024) +#define EFI_CONSOLE_OUT_DEVICE_GUID \ + EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \ + 0x3f, 0xc1, 0x4d) + #define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 #define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 #define PIXEL_BIT_MASK 2 -- cgit v1.2.3-70-g09d2 From 9dead5bbb825d7c25c0400e61de83075046322d0 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 26 Jul 2012 18:00:00 -0400 Subject: efi: Build EFI stub with EFI-appropriate options We can't assume the presence of the red zone while we're still in a boot services environment, so we should build with -fno-red-zone to avoid problems. Change the size of wchar at the same time to make string handling simpler. Signed-off-by: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index e398bb5d63b..8a84501acb1 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -28,6 +28,9 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ $(obj)/piggy.o +$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone +$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone + ifeq ($(CONFIG_EFI_STUB), y) VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o endif -- cgit v1.2.3-70-g09d2 From d6cf86d8f23253225fe2a763d627ecf7dfee9dae Mon Sep 17 00:00:00 2001 From: Seiji Aguchi Date: Tue, 24 Jul 2012 13:27:23 +0000 Subject: efi: initialize efi.runtime_version to make query_variable_info/update_capsule workable A value of efi.runtime_version is checked before calling update_capsule()/query_variable_info() as follows. But it isn't initialized anywhere. static efi_status_t virt_efi_query_variable_info(u32 attr, u64 *storage_space, u64 *remaining_space, u64 *max_variable_size) { if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; This patch initializes a value of efi.runtime_version at boot time. Signed-off-by: Seiji Aguchi Acked-by: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 92660edaa1e..f55a4ce6dc4 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -890,6 +890,7 @@ void __init efi_enter_virtual_mode(void) * * Call EFI services through wrapper functions. */ + efi.runtime_version = efi_systab.fw_revision; efi.get_time = virt_efi_get_time; efi.set_time = virt_efi_set_time; efi.get_wakeup_time = virt_efi_get_wakeup_time; -- cgit v1.2.3-70-g09d2 From f462ed939de67c20528bc08f11d2fc4f2d59c0d5 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 27 Jul 2012 12:58:53 -0400 Subject: efifb: Skip DMI checks if the bootloader knows what it's doing The majority of the DMI checks in efifb are for cases where the bootloader has provided invalid information. However, on some machines the overrides may do more harm than good due to configuration differences between machines with the same machine identifier. It turns out that it's possible for the bootloader to get the correct information on GOP-based systems, but we can't guarantee that the kernel's being booted with one that's been updated to do so. Add support for a capabilities flag that can be set by the bootloader, and skip the DMI checks in that case. Additionally, set this flag in the UEFI stub code. Signed-off-by: Matthew Garrett Acked-by: Peter Jones Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 2 ++ drivers/video/efifb.c | 4 +++- include/linux/screen_info.h | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index d5e4044505d..bbd83b9cb4d 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -379,6 +379,8 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, si->rsvd_pos = 0; } + si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; + free_handle: efi_call_phys1(sys_table->boottime->free_pool, gop_handle); return status; diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c index b4a632ada40..932abaa58a8 100644 --- a/drivers/video/efifb.c +++ b/drivers/video/efifb.c @@ -553,7 +553,9 @@ static int __init efifb_init(void) int ret; char *option = NULL; - dmi_check_system(dmi_system_table); + if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || + !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS)) + dmi_check_system(dmi_system_table); if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) return -ENODEV; diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 899fbb487c9..fb3c5a8fef3 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -68,6 +68,8 @@ struct screen_info { #define VIDEO_FLAGS_NOCURSOR (1 << 0) /* The video mode has no cursor set */ +#define VIDEO_CAPABILITY_SKIP_QUIRKS (1 << 0) + #ifdef __KERNEL__ extern struct screen_info screen_info; -- cgit v1.2.3-70-g09d2 From e9b10953edbccd3744e039ffc060ab2692f17856 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 27 Jul 2012 17:20:49 -0400 Subject: x86, EFI: Calculate the EFI framebuffer size instead of trusting the firmware Seth Forshee reported that his system was reporting that the EFI framebuffer stretched from 0x90010000-0xb0010000 despite the GPU's BAR only covering 0x90000000-0x9ffffff. It's safer to calculate this value from the pixel stride and screen height (values we already depend on) rather than face potential problems with resource allocation later on. Signed-off-by: Matthew Garrett Tested-by: Seth Forshee Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index bbd83b9cb4d..c760e073963 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -331,7 +331,6 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, si->lfb_width = width; si->lfb_height = height; si->lfb_base = fb_base; - si->lfb_size = fb_size; si->pages = 1; if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { @@ -379,6 +378,8 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, si->rsvd_pos = 0; } + si->lfb_size = si->lfb_linelength * si->lfb_height; + si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; free_handle: -- cgit v1.2.3-70-g09d2 From 83287ea420ced7242a704488aab0fcdcf2ced9ab Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 16 Sep 2012 15:10:57 +0300 Subject: KVM: VMX: Make lto-friendly LTO (link-time optimization) doesn't like local labels to be referred to from a different function, since the two functions may be built in separate compilation units. Use an external variable instead. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d62b4139a29..5faf12ace54 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -127,6 +127,8 @@ module_param(ple_gap, int, S_IRUGO); static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; module_param(ple_window, int, S_IRUGO); +extern const ulong vmx_return; + #define NR_AUTOLOAD_MSRS 8 #define VMCS02_POOL_SIZE 1 @@ -3724,8 +3726,7 @@ static void vmx_set_constant_host_state(void) native_store_idt(&dt); vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ - asm("mov $.Lkvm_vmx_return, %0" : "=r"(tmpl)); - vmcs_writel(HOST_RIP, tmpl); /* 22.2.5 */ + vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); vmcs_write32(HOST_IA32_SYSENTER_CS, low32); @@ -6276,11 +6277,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */ /* Enter guest mode */ - "jne .Llaunched \n\t" + "jne 1f \n\t" __ex(ASM_VMX_VMLAUNCH) "\n\t" - "jmp .Lkvm_vmx_return \n\t" - ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" - ".Lkvm_vmx_return: " + "jmp 2f \n\t" + "1: " __ex(ASM_VMX_VMRESUME) "\n\t" + "2: " /* Save guest registers, load host registers, keep flags */ "mov %0, %c[wordsize](%%"R"sp) \n\t" "pop %0 \n\t" @@ -6306,6 +6307,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "pop %%"R"bp; pop %%"R"dx \n\t" "setbe %c[fail](%0) \n\t" + ".pushsection .rodata \n\t" + ".global vmx_return \n\t" + "vmx_return: " _ASM_PTR " 2b \n\t" + ".popsection" : : "c"(vmx), "d"((unsigned long)HOST_RSP), [launched]"i"(offsetof(struct vcpu_vmx, __launched)), [fail]"i"(offsetof(struct vcpu_vmx, fail)), -- cgit v1.2.3-70-g09d2 From b188c81f2e1a188ddda6a3d353e5b546c30a9b90 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 16 Sep 2012 15:10:58 +0300 Subject: KVM: VMX: Make use of asm.h Use macros for bitness-insensitive register names, instead of rolling our own. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 69 ++++++++++++++++++++++++------------------------------ 1 file changed, 30 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5faf12ace54..30bcb953afe 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6184,14 +6184,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host); } -#ifdef CONFIG_X86_64 -#define R "r" -#define Q "q" -#else -#define R "e" -#define Q "l" -#endif - static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6240,30 +6232,30 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->__launched = vmx->loaded_vmcs->launched; asm( /* Store host registers */ - "push %%"R"dx; push %%"R"bp;" - "push %%"R"cx \n\t" /* placeholder for guest rcx */ - "push %%"R"cx \n\t" - "cmp %%"R"sp, %c[host_rsp](%0) \n\t" + "push %%" _ASM_DX "; push %%" _ASM_BP ";" + "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */ + "push %%" _ASM_CX " \n\t" + "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t" "je 1f \n\t" - "mov %%"R"sp, %c[host_rsp](%0) \n\t" + "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t" __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" "1: \n\t" /* Reload cr2 if changed */ - "mov %c[cr2](%0), %%"R"ax \n\t" - "mov %%cr2, %%"R"dx \n\t" - "cmp %%"R"ax, %%"R"dx \n\t" + "mov %c[cr2](%0), %%" _ASM_AX " \n\t" + "mov %%cr2, %%" _ASM_DX " \n\t" + "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" "je 2f \n\t" - "mov %%"R"ax, %%cr2 \n\t" + "mov %%" _ASM_AX", %%cr2 \n\t" "2: \n\t" /* Check if vmlaunch of vmresume is needed */ "cmpl $0, %c[launched](%0) \n\t" /* Load guest registers. Don't clobber flags. */ - "mov %c[rax](%0), %%"R"ax \n\t" - "mov %c[rbx](%0), %%"R"bx \n\t" - "mov %c[rdx](%0), %%"R"dx \n\t" - "mov %c[rsi](%0), %%"R"si \n\t" - "mov %c[rdi](%0), %%"R"di \n\t" - "mov %c[rbp](%0), %%"R"bp \n\t" + "mov %c[rax](%0), %%" _ASM_AX " \n\t" + "mov %c[rbx](%0), %%" _ASM_BX " \n\t" + "mov %c[rdx](%0), %%" _ASM_DX " \n\t" + "mov %c[rsi](%0), %%" _ASM_SI " \n\t" + "mov %c[rdi](%0), %%" _ASM_DI " \n\t" + "mov %c[rbp](%0), %%" _ASM_BP " \n\t" #ifdef CONFIG_X86_64 "mov %c[r8](%0), %%r8 \n\t" "mov %c[r9](%0), %%r9 \n\t" @@ -6274,7 +6266,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %c[r14](%0), %%r14 \n\t" "mov %c[r15](%0), %%r15 \n\t" #endif - "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */ + "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */ /* Enter guest mode */ "jne 1f \n\t" @@ -6283,15 +6275,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "1: " __ex(ASM_VMX_VMRESUME) "\n\t" "2: " /* Save guest registers, load host registers, keep flags */ - "mov %0, %c[wordsize](%%"R"sp) \n\t" + "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" "pop %0 \n\t" - "mov %%"R"ax, %c[rax](%0) \n\t" - "mov %%"R"bx, %c[rbx](%0) \n\t" - "pop"Q" %c[rcx](%0) \n\t" - "mov %%"R"dx, %c[rdx](%0) \n\t" - "mov %%"R"si, %c[rsi](%0) \n\t" - "mov %%"R"di, %c[rdi](%0) \n\t" - "mov %%"R"bp, %c[rbp](%0) \n\t" + "mov %%" _ASM_AX ", %c[rax](%0) \n\t" + "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" + __ASM_SIZE(pop) " %c[rcx](%0) \n\t" + "mov %%" _ASM_DX ", %c[rdx](%0) \n\t" + "mov %%" _ASM_SI ", %c[rsi](%0) \n\t" + "mov %%" _ASM_DI ", %c[rdi](%0) \n\t" + "mov %%" _ASM_BP ", %c[rbp](%0) \n\t" #ifdef CONFIG_X86_64 "mov %%r8, %c[r8](%0) \n\t" "mov %%r9, %c[r9](%0) \n\t" @@ -6302,10 +6294,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r14, %c[r14](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t" #endif - "mov %%cr2, %%"R"ax \n\t" - "mov %%"R"ax, %c[cr2](%0) \n\t" + "mov %%cr2, %%" _ASM_AX " \n\t" + "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" - "pop %%"R"bp; pop %%"R"dx \n\t" + "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" "setbe %c[fail](%0) \n\t" ".pushsection .rodata \n\t" ".global vmx_return \n\t" @@ -6335,9 +6327,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), [wordsize]"i"(sizeof(ulong)) : "cc", "memory" - , R"ax", R"bx", R"di", R"si" #ifdef CONFIG_X86_64 + , "rax", "rbx", "rdi", "rsi" , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" +#else + , "eax", "ebx", "edi", "esi" #endif ); @@ -6389,9 +6383,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_complete_interrupts(vmx); } -#undef R -#undef Q - static void vmx_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); -- cgit v1.2.3-70-g09d2 From 7454766f7bead388251aedee35a478356a7f4e72 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 16 Sep 2012 15:10:59 +0300 Subject: KVM: SVM: Make use of asm.h Use macros for bitness-insensitive register names, instead of rolling our own. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 46 ++++++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 611c72875fb..818fceb3091 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3782,12 +3782,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) svm_complete_interrupts(svm); } -#ifdef CONFIG_X86_64 -#define R "r" -#else -#define R "e" -#endif - static void svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3814,13 +3808,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); asm volatile ( - "push %%"R"bp; \n\t" - "mov %c[rbx](%[svm]), %%"R"bx \n\t" - "mov %c[rcx](%[svm]), %%"R"cx \n\t" - "mov %c[rdx](%[svm]), %%"R"dx \n\t" - "mov %c[rsi](%[svm]), %%"R"si \n\t" - "mov %c[rdi](%[svm]), %%"R"di \n\t" - "mov %c[rbp](%[svm]), %%"R"bp \n\t" + "push %%" _ASM_BP "; \n\t" + "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" + "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t" + "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t" + "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t" + "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t" + "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t" #ifdef CONFIG_X86_64 "mov %c[r8](%[svm]), %%r8 \n\t" "mov %c[r9](%[svm]), %%r9 \n\t" @@ -3833,20 +3827,20 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif /* Enter guest mode */ - "push %%"R"ax \n\t" - "mov %c[vmcb](%[svm]), %%"R"ax \n\t" + "push %%" _ASM_AX " \n\t" + "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t" __ex(SVM_VMLOAD) "\n\t" __ex(SVM_VMRUN) "\n\t" __ex(SVM_VMSAVE) "\n\t" - "pop %%"R"ax \n\t" + "pop %%" _ASM_AX " \n\t" /* Save guest registers, load host registers */ - "mov %%"R"bx, %c[rbx](%[svm]) \n\t" - "mov %%"R"cx, %c[rcx](%[svm]) \n\t" - "mov %%"R"dx, %c[rdx](%[svm]) \n\t" - "mov %%"R"si, %c[rsi](%[svm]) \n\t" - "mov %%"R"di, %c[rdi](%[svm]) \n\t" - "mov %%"R"bp, %c[rbp](%[svm]) \n\t" + "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t" + "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t" + "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t" + "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t" + "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t" + "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t" #ifdef CONFIG_X86_64 "mov %%r8, %c[r8](%[svm]) \n\t" "mov %%r9, %c[r9](%[svm]) \n\t" @@ -3857,7 +3851,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t" #endif - "pop %%"R"bp" + "pop %%" _ASM_BP : : [svm]"a"(svm), [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), @@ -3878,9 +3872,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) #endif : "cc", "memory" - , R"bx", R"cx", R"dx", R"si", R"di" #ifdef CONFIG_X86_64 + , "rbx", "rcx", "rdx", "rsi", "rdi" , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" +#else + , "ebx", "ecx", "edx", "esi", "edi" #endif ); @@ -3940,8 +3936,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) mark_all_clean(svm->vmcb); } -#undef R - static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); -- cgit v1.2.3-70-g09d2 From 314d9f63f385096580e9e2a06eaa0745d92fe4ac Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 10 Sep 2012 15:53:49 +0800 Subject: perf/x86: Add cpumask for uncore pmu This patch adds a cpumask file to the uncore pmu sysfs directory. The cpumask file contains one active cpu for every socket. Signed-off-by: "Yan, Zheng" Acked-by: Peter Zijlstra Acked-by: Ingo Molnar Cc: Andi Kleen Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: "Yan, Zheng" Link: http://lkml.kernel.org/r/1347263631-23175-2-git-send-email-zheng.z.yan@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 28 ++++++++++++++++++++++++--- arch/x86/kernel/cpu/perf_event_intel_uncore.h | 6 ++++-- 2 files changed, 29 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 0a5571080e7..62ec3e6af7e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2341,6 +2341,27 @@ int uncore_pmu_event_init(struct perf_event *event) return ret; } +static ssize_t uncore_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask); + + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} + +static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); + +static struct attribute *uncore_pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group uncore_pmu_attr_group = { + .attrs = uncore_pmu_attrs, +}; + static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) { int ret; @@ -2378,8 +2399,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) free_percpu(type->pmus[i].box); kfree(type->pmus); type->pmus = NULL; - kfree(type->attr_groups[1]); - type->attr_groups[1] = NULL; + kfree(type->events_group); + type->events_group = NULL; } static void __init uncore_types_exit(struct intel_uncore_type **types) @@ -2431,9 +2452,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type) for (j = 0; j < i; j++) attrs[j] = &type->event_descs[j].attr.attr; - type->attr_groups[1] = events_group; + type->events_group = events_group; } + type->pmu_group = &uncore_pmu_attr_group; type->pmus = pmus; return 0; fail: diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 5b81c1856aa..e68a4550e95 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -369,10 +369,12 @@ struct intel_uncore_type { struct intel_uncore_pmu *pmus; struct intel_uncore_ops *ops; struct uncore_event_desc *event_descs; - const struct attribute_group *attr_groups[3]; + const struct attribute_group *attr_groups[4]; }; -#define format_group attr_groups[0] +#define pmu_group attr_groups[0] +#define format_group attr_groups[1] +#define events_group attr_groups[2] struct intel_uncore_ops { void (*init_box)(struct intel_uncore_box *); -- cgit v1.2.3-70-g09d2 From 9fc77441e5e1bf80b794cc546d2243ee9f4afb75 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 16 Sep 2012 11:50:30 +0300 Subject: KVM: make processes waiting on vcpu mutex killable vcpu mutex can be held for unlimited time so taking it with mutex_lock on an ioctl is wrong: one process could be passed a vcpu fd and call this ioctl on the vcpu used by another process, it will then be unkillable until the owner exits. Call mutex_lock_killable instead and return status. Note: mutex_lock_interruptible would be even nicer, but I am not sure all users are prepared to handle EINTR from these ioctls. They might misinterpret it as an error. Cleanup paths expect a vcpu that can't be used by any userspace so this will always succeed - catch bugs by calling BUG_ON. Catch callers that don't check return state by adding __must_check. Signed-off-by: Michael S. Tsirkin Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 12 +++++++++--- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 10 +++++++--- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c4d451ed157..19047eafa38 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6016,7 +6016,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) int r; vcpu->arch.mtrr_state.have_fixed = 1; - vcpu_load(vcpu); + r = vcpu_load(vcpu); + if (r) + return r; r = kvm_arch_vcpu_reset(vcpu); if (r == 0) r = kvm_mmu_setup(vcpu); @@ -6027,9 +6029,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { + int r; vcpu->arch.apf.msr_val = 0; - vcpu_load(vcpu); + r = vcpu_load(vcpu); + BUG_ON(r); kvm_mmu_unload(vcpu); vcpu_put(vcpu); @@ -6275,7 +6279,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { - vcpu_load(vcpu); + int r; + r = vcpu_load(vcpu); + BUG_ON(r); kvm_mmu_unload(vcpu); vcpu_put(vcpu); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 40791930bc1..80bfc880921 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -408,7 +408,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); -void vcpu_load(struct kvm_vcpu *vcpu); +int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4fe02d90081..cc3f6dc506e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -131,11 +131,12 @@ bool kvm_is_mmio_pfn(pfn_t pfn) /* * Switches to specified vcpu, until a matching vcpu_put() */ -void vcpu_load(struct kvm_vcpu *vcpu) +int vcpu_load(struct kvm_vcpu *vcpu) { int cpu; - mutex_lock(&vcpu->mutex); + if (mutex_lock_killable(&vcpu->mutex)) + return -EINTR; if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { /* The thread running this VCPU changed. */ struct pid *oldpid = vcpu->pid; @@ -148,6 +149,7 @@ void vcpu_load(struct kvm_vcpu *vcpu) preempt_notifier_register(&vcpu->preempt_notifier); kvm_arch_vcpu_load(vcpu, cpu); put_cpu(); + return 0; } void vcpu_put(struct kvm_vcpu *vcpu) @@ -1891,7 +1893,9 @@ static long kvm_vcpu_ioctl(struct file *filp, #endif - vcpu_load(vcpu); + r = vcpu_load(vcpu); + if (r) + return r; switch (ioctl) { case KVM_RUN: r = -EINVAL; -- cgit v1.2.3-70-g09d2 From e57dbaf77f372ac461b5b0b353c65efae9739a00 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 13 Sep 2011 15:23:21 +0200 Subject: x86, mce: Enable MCA support by default MCA is the basic support for hardware error logging and reporting, and it is majorly unwise to run without it so enable machine check software support by default on x86. Signed-off-by: Borislav Petkov Acked-by: Tony Luck --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ec3a1aa4ab..3d2d2ef0e16 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -871,6 +871,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS config X86_MCE bool "Machine Check / overheating reporting" + default y ---help--- Machine Check support allows the processor to notify the kernel if it detects a problem (e.g. overheating, data corruption). -- cgit v1.2.3-70-g09d2 From 57639bedd2b8268daa791efcfd0367b9031b057d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 22 May 2012 18:47:38 +0200 Subject: x86, MCE: Remove unused defines Those were sitting there unused since the dawn of time, drop them. Signed-off-by: Borislav Petkov --- arch/x86/include/asm/mce.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index a3ac52b29cb..ccaf7c581c8 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -116,19 +116,9 @@ struct mce_log { /* Software defined banks */ #define MCE_EXTENDED_BANK 128 #define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 - -#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */ -#define K8_MCE_THRESHOLD_BANK_0 (MCE_THRESHOLD_BASE + 0 * 9) -#define K8_MCE_THRESHOLD_BANK_1 (MCE_THRESHOLD_BASE + 1 * 9) -#define K8_MCE_THRESHOLD_BANK_2 (MCE_THRESHOLD_BASE + 2 * 9) -#define K8_MCE_THRESHOLD_BANK_3 (MCE_THRESHOLD_BASE + 3 * 9) -#define K8_MCE_THRESHOLD_BANK_4 (MCE_THRESHOLD_BASE + 4 * 9) -#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) -#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) - +#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) #ifdef __KERNEL__ - extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); -- cgit v1.2.3-70-g09d2 From 050902c011712ad4703038fa4489ec4edd87d396 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 24 Jul 2012 16:05:27 -0700 Subject: x86, signal: Cleanup ifdefs and is_ia32, is_x32 Use config_enabled() to cleanup the definitions of is_ia32/is_x32. Move the function prototypes to the header file to cleanup ifdefs, and move the x32_setup_rt_frame() code around. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1343171129-2747-2-git-send-email-suresh.b.siddha@intel.com Merged in compilation fix from, Link: http://lkml.kernel.org/r/1344544736.8326.17.camel@sbsiddha-desk.sc.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fpu-internal.h | 26 ++++- arch/x86/include/asm/signal.h | 4 + arch/x86/kernel/signal.c | 196 +++++++++++++++--------------------- 3 files changed, 110 insertions(+), 116 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 75f4c6d6a33..6f595435ff9 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -32,7 +33,6 @@ extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, xstateregs_set; - /* * xstateregs_active == fpregs_active. Please refer to the comment * at the definition of fpregs_active. @@ -55,6 +55,22 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft); static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif +static inline int is_ia32_compat_frame(void) +{ + return config_enabled(CONFIG_IA32_EMULATION) && + test_thread_flag(TIF_IA32); +} + +static inline int is_ia32_frame(void) +{ + return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); +} + +static inline int is_x32_frame(void) +{ + return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); +} + #define X87_FSW_ES (1 << 7) /* Exception Summary */ static __always_inline __pure bool use_xsaveopt(void) @@ -180,6 +196,11 @@ static inline void fpu_fxsave(struct fpu *fpu) #endif } +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs *regs); + #else /* CONFIG_X86_32 */ /* perform fxrstor iff the processor has extended states, otherwise frstor */ @@ -204,6 +225,9 @@ static inline void fpu_fxsave(struct fpu *fpu) : [fx] "=m" (fpu->state->fxsave)); } +#define ia32_setup_frame __setup_frame +#define ia32_setup_rt_frame __setup_rt_frame + #endif /* CONFIG_X86_64 */ /* diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 598457cbd0f..323973f4abf 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -31,6 +31,10 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; +#ifndef CONFIG_COMPAT +typedef sigset_t compat_sigset_t; +#endif + #else /* Here we must cater to libcs that poke about in kernel headers. */ diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b280908a376..bed431a3816 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -209,24 +209,21 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, unsigned long sp = regs->sp; int onsigstack = on_sig_stack(sp); -#ifdef CONFIG_X86_64 /* redzone */ - sp -= 128; -#endif /* CONFIG_X86_64 */ + if (config_enabled(CONFIG_X86_64)) + sp -= 128; if (!onsigstack) { /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { if (current->sas_ss_size) sp = current->sas_ss_sp + current->sas_ss_size; - } else { -#ifdef CONFIG_X86_32 - /* This is the legacy signal stack switching. */ - if ((regs->ss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) + } else if (config_enabled(CONFIG_X86_32) && + (regs->ss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + /* This is the legacy signal stack switching. */ sp = (unsigned long) ka->sa.sa_restorer; -#endif /* CONFIG_X86_32 */ } } @@ -474,6 +471,74 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } #endif /* CONFIG_X86_32 */ +static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, + siginfo_t *info, compat_sigset_t *set, + struct pt_regs *regs) +{ +#ifdef CONFIG_X86_X32_ABI + struct rt_sigframe_x32 __user *frame; + void __user *restorer; + int err = 0; + void __user *fpstate = NULL; + + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return -EFAULT; + + if (ka->sa.sa_flags & SA_SIGINFO) { + if (copy_siginfo_to_user32(&frame->info, info)) + return -EFAULT; + } + + put_user_try { + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + put_user_ex(0, &frame->uc.uc__pad0); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + if (ka->sa.sa_flags & SA_RESTORER) { + restorer = ka->sa.sa_restorer; + } else { + /* could use a vstub here */ + restorer = NULL; + err |= -EFAULT; + } + put_user_ex(restorer, &frame->pretcode); + } put_user_catch(err); + + if (err) + return -EFAULT; + + /* Set up registers for signal handler */ + regs->sp = (unsigned long) frame; + regs->ip = (unsigned long) ka->sa.sa_handler; + + /* We use the x32 calling convention here... */ + regs->di = sig; + regs->si = (unsigned long) &frame->info; + regs->dx = (unsigned long) &frame->uc; + + loadsegment(ds, __USER_DS); + loadsegment(es, __USER_DS); + + regs->cs = __USER_CS; + regs->ss = __USER_DS; +#endif /* CONFIG_X86_X32_ABI */ + + return 0; +} + #ifdef CONFIG_X86_32 /* * Atomically swap in the new signal mask, and wait for a signal. @@ -612,55 +677,22 @@ static int signr_convert(int sig) return sig; } -#ifdef CONFIG_X86_32 - -#define is_ia32 1 -#define ia32_setup_frame __setup_frame -#define ia32_setup_rt_frame __setup_rt_frame - -#else /* !CONFIG_X86_32 */ - -#ifdef CONFIG_IA32_EMULATION -#define is_ia32 test_thread_flag(TIF_IA32) -#else /* !CONFIG_IA32_EMULATION */ -#define is_ia32 0 -#endif /* CONFIG_IA32_EMULATION */ - -#ifdef CONFIG_X86_X32_ABI -#define is_x32 test_thread_flag(TIF_X32) - -static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, - siginfo_t *info, compat_sigset_t *set, - struct pt_regs *regs); -#else /* !CONFIG_X86_X32_ABI */ -#define is_x32 0 -#endif /* CONFIG_X86_X32_ABI */ - -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs); -int ia32_setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs *regs); - -#endif /* CONFIG_X86_32 */ - static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct pt_regs *regs) { int usig = signr_convert(sig); sigset_t *set = sigmask_to_save(); + compat_sigset_t *cset = (compat_sigset_t *) set; /* Set up the stack frame */ - if (is_ia32) { + if (is_ia32_frame()) { if (ka->sa.sa_flags & SA_SIGINFO) - return ia32_setup_rt_frame(usig, ka, info, set, regs); + return ia32_setup_rt_frame(usig, ka, info, cset, regs); else - return ia32_setup_frame(usig, ka, set, regs); -#ifdef CONFIG_X86_X32_ABI - } else if (is_x32) { - return x32_setup_rt_frame(usig, ka, info, - (compat_sigset_t *)set, regs); -#endif + return ia32_setup_frame(usig, ka, cset, regs); + } else if (is_x32_frame()) { + return x32_setup_rt_frame(usig, ka, info, cset, regs); } else { return __setup_rt_frame(sig, ka, info, set, regs); } @@ -824,72 +856,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) } #ifdef CONFIG_X86_X32_ABI -static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, - siginfo_t *info, compat_sigset_t *set, - struct pt_regs *regs) -{ - struct rt_sigframe_x32 __user *frame; - void __user *restorer; - int err = 0; - void __user *fpstate = NULL; - - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - return -EFAULT; - - if (ka->sa.sa_flags & SA_SIGINFO) { - if (copy_siginfo_to_user32(&frame->info, info)) - return -EFAULT; - } - - put_user_try { - /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); - put_user_ex(0, &frame->uc.uc_link); - put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - put_user_ex(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - put_user_ex(0, &frame->uc.uc__pad0); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - - if (ka->sa.sa_flags & SA_RESTORER) { - restorer = ka->sa.sa_restorer; - } else { - /* could use a vstub here */ - restorer = NULL; - err |= -EFAULT; - } - put_user_ex(restorer, &frame->pretcode); - } put_user_catch(err); - - if (err) - return -EFAULT; - - /* Set up registers for signal handler */ - regs->sp = (unsigned long) frame; - regs->ip = (unsigned long) ka->sa.sa_handler; - - /* We use the x32 calling convention here... */ - regs->di = sig; - regs->si = (unsigned long) &frame->info; - regs->dx = (unsigned long) &frame->uc; - - loadsegment(ds, __USER_DS); - loadsegment(es, __USER_DS); - - regs->cs = __USER_CS; - regs->ss = __USER_DS; - - return 0; -} - asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) { struct rt_sigframe_x32 __user *frame; -- cgit v1.2.3-70-g09d2 From 0ca5bd0d886578ad0afeceaa83458c0f35cb3c6b Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 24 Jul 2012 16:05:28 -0700 Subject: x86, fpu: Consolidate inline asm routines for saving/restoring fpu state Consolidate x86, x86_64 inline asm routines saving/restoring fpu state using config_enabled(). Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1343171129-2747-3-git-send-email-suresh.b.siddha@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fpu-internal.h | 184 +++++++++++++++--------------------- arch/x86/include/asm/xsave.h | 6 +- arch/x86/kernel/xsave.c | 4 +- 3 files changed, 81 insertions(+), 113 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 6f595435ff9..016acb30fa4 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -97,34 +97,24 @@ static inline void sanitize_i387_state(struct task_struct *tsk) __sanitize_i387_state(tsk); } -#ifdef CONFIG_X86_64 -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) -{ - int err; - - /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxrstorq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "m" (*fx), "0" (0)); -#else - asm volatile("1: rex64/fxrstor (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "R" (fx), "m" (*fx), "0" (0)); -#endif - return err; +#define check_insn(insn, output, input...) \ +({ \ + int err; \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +static inline int fsave_user(struct i387_fsave_struct __user *fx) +{ + return check_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); } static inline int fxsave_user(struct i387_fxsave_struct __user *fx) @@ -140,90 +130,73 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) if (unlikely(err)) return -EFAULT; - /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxsaveq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), [fx] "=m" (*fx) - : "0" (0)); -#else - asm volatile("1: rex64/fxsave (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), "=m" (*fx) - : [fx] "R" (fx), "0" (0)); -#endif - if (unlikely(err) && - __clear_user(fx, sizeof(struct i387_fxsave_struct))) - err = -EFAULT; - /* No need to clear here because the caller clears USED_MATH */ - return err; + if (config_enabled(CONFIG_X86_32)) + return check_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return check_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); + + /* See comment in fpu_fxsave() below. */ + return check_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); } -static inline void fpu_fxsave(struct fpu *fpu) +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { - /* Using "rex64; fxsave %0" is broken because, if the memory operand - uses any extended registers for addressing, a second REX prefix - will be generated (to the assembler, rex64 followed by semicolon - is a separate instruction), and hence the 64-bitness is lost. */ + if (config_enabled(CONFIG_X86_32)) + return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -#ifdef CONFIG_AS_FXSAVEQ - /* Using "fxsaveq %0" would be the ideal choice, but is only supported - starting with gas 2.16. */ - __asm__ __volatile__("fxsaveq %0" - : "=m" (fpu->state->fxsave)); -#else - /* Using, as a workaround, the properly prefixed form below isn't - accepted by any binutils version so far released, complaining that - the same type of prefix is used twice if an extended register is - needed for addressing (fix submitted to mainline 2005-11-21). - asm volatile("rex64/fxsave %0" - : "=m" (fpu->state->fxsave)); - This, however, we can work around by forcing the compiler to select - an addressing mode that doesn't require extended registers. */ - asm volatile("rex64/fxsave (%[fx])" - : "=m" (fpu->state->fxsave) - : [fx] "R" (&fpu->state->fxsave)); -#endif + /* See comment in fpu_fxsave() below. */ + return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), + "m" (*fx)); } -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - compat_sigset_t *set, struct pt_regs *regs); -int ia32_setup_frame(int sig, struct k_sigaction *ka, - compat_sigset_t *set, struct pt_regs *regs); - -#else /* CONFIG_X86_32 */ - -/* perform fxrstor iff the processor has extended states, otherwise frstor */ -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +static inline int frstor_checking(struct i387_fsave_struct *fx) { - /* - * The "nop" is needed to make the instructions the same - * length. - */ - alternative_input( - "nop ; frstor %1", - "fxrstor %1", - X86_FEATURE_FXSR, - "m" (*fx)); - - return 0; + return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } static inline void fpu_fxsave(struct fpu *fpu) { - asm volatile("fxsave %[fx]" - : [fx] "=m" (fpu->state->fxsave)); + if (config_enabled(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave)); + else { + /* Using "rex64; fxsave %0" is broken because, if the memory + * operand uses any extended registers for addressing, a second + * REX prefix will be generated (to the assembler, rex64 + * followed by semicolon is a separate instruction), and hence + * the 64-bitness is lost. + * + * Using "fxsaveq %0" would be the ideal choice, but is only + * supported starting with gas 2.16. + * + * Using, as a workaround, the properly prefixed form below + * isn't accepted by any binutils version so far released, + * complaining that the same type of prefix is used twice if + * an extended register is needed for addressing (fix submitted + * to mainline 2005-11-21). + * + * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave)); + * + * This, however, we can work around by forcing the compiler to + * select an addressing mode that doesn't require extended + * registers. + */ + asm volatile( "rex64/fxsave (%[fx])" + : "=m" (fpu->state->fxsave) + : [fx] "R" (&fpu->state->fxsave)); + } } +#ifdef CONFIG_X86_64 + +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs *regs); + +#else /* CONFIG_X86_32 */ #define ia32_setup_frame __setup_frame #define ia32_setup_rt_frame __setup_rt_frame @@ -272,17 +245,14 @@ static inline int __save_init_fpu(struct task_struct *tsk) return fpu_save_init(&tsk->thread.fpu); } -static inline int fpu_fxrstor_checking(struct fpu *fpu) -{ - return fxrstor_checking(&fpu->state->fxsave); -} - static inline int fpu_restore_checking(struct fpu *fpu) { if (use_xsave()) - return fpu_xrstor_checking(fpu); + return fpu_xrstor_checking(&fpu->state->xsave); + else if (use_fxsr()) + return fxrstor_checking(&fpu->state->fxsave); else - return fpu_fxrstor_checking(fpu); + return frstor_checking(&fpu->state->fsave); } static inline int restore_fpu_checking(struct task_struct *tsk) diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 8a1b6f9b594..aaac810e861 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -42,9 +42,8 @@ extern int check_for_xstate(struct i387_fxsave_struct __user *buf, void __user *fpstate, struct _fpx_sw_bytes *sw); -static inline int fpu_xrstor_checking(struct fpu *fpu) +static inline int fpu_xrstor_checking(struct xsave_struct *fx) { - struct xsave_struct *fx = &fpu->state->xsave; int err; asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" @@ -84,9 +83,6 @@ static inline int xsave_user(struct xsave_struct __user *buf) : [err] "=r" (err) : "D" (buf), "a" (-1), "d" (-1), "0" (0) : "memory"); - if (unlikely(err) && __clear_user(buf, xstate_size)) - err = -EFAULT; - /* No need to clear here because the caller clears USED_MATH */ return err; } diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 9e1a8a7ba6e..7a3d4df9eaf 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -176,8 +176,10 @@ int save_i387_xstate(void __user *buf) else err = fxsave_user(buf); - if (err) + if (unlikely(err)) { + __clear_user(buf, xstate_size); return err; + } user_fpu_end(); } else { sanitize_i387_state(tsk); -- cgit v1.2.3-70-g09d2 From 72a671ced66db6d1c2bfff1c930a101ac8d08204 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 24 Jul 2012 16:05:29 -0700 Subject: x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels Currently for x86 and x86_32 binaries, fpstate in the user sigframe is copied to/from the fpstate in the task struct. And in the case of signal delivery for x86_64 binaries, if the fpstate is live in the CPU registers, then the live state is copied directly to the user sigframe. Otherwise fpstate in the task struct is copied to the user sigframe. During restore, fpstate in the user sigframe is restored directly to the live CPU registers. Historically, different code paths led to different bugs. For example, x86_64 code path was not preemption safe till recently. Also there is lot of code duplication for support of new features like xsave etc. Unify signal handling code paths for x86 and x86_64 kernels. New strategy is as follows: Signal delivery: Both for 32/64-bit frames, align the core math frame area to 64bytes as needed by xsave (this where the main fpu/extended state gets copied to and excludes the legacy compatibility fsave header for the 32-bit [f]xsave frames). If the state is live, copy the register state directly to the user frame. If not live, copy the state in the thread struct to the user frame. And for 32-bit [f]xsave frames, construct the fsave header separately before the actual [f]xsave area. Signal return: As the 32-bit frames with [f]xstate has an additional 'fsave' header, copy everything back from the user sigframe to the fpstate in the task structure and reconstruct the fxstate from the 'fsave' header (Also user passed pointers may not be correctly aligned for any attempt to directly restore any partial state). At the next fpstate usage, everything will be restored to the live CPU registers. For all the 64-bit frames and the 32-bit fsave frame, restore the state from the user sigframe directly to the live CPU registers. 64-bit signals always restored the math frame directly, so we can expect the math frame pointer to be correctly aligned. For 32-bit fsave frames, there are no alignment requirements, so we can restore the state directly. "lat_sig catch" microbenchmark numbers (for x86, x86_64, x86_32 binaries) are with in the noise range with this change. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1343171129-2747-4-git-send-email-suresh.b.siddha@intel.com [ Merged in compilation fix ] Link: http://lkml.kernel.org/r/1344544736.8326.17.camel@sbsiddha-desk.sc.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/ia32/ia32_signal.c | 9 +- arch/x86/include/asm/fpu-internal.h | 111 +++++---- arch/x86/include/asm/xsave.h | 6 +- arch/x86/kernel/i387.c | 246 +------------------- arch/x86/kernel/process.c | 10 - arch/x86/kernel/ptrace.c | 3 - arch/x86/kernel/signal.c | 15 +- arch/x86/kernel/xsave.c | 432 +++++++++++++++++++++--------------- 8 files changed, 348 insertions(+), 484 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 452d4dd0a95..8c77c64fbd2 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -251,7 +251,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, get_user_ex(tmp, &sc->fpstate); buf = compat_ptr(tmp); - err |= restore_i387_xstate_ia32(buf); + err |= restore_xstate_sig(buf, 1); get_user_ex(*pax, &sc->ax); } get_user_catch(err); @@ -382,9 +382,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, sp = (unsigned long) ka->sa.sa_restorer; if (used_math()) { - sp = sp - sig_xstate_ia32_size; + unsigned long fx_aligned, math_size; + + sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); *fpstate = (struct _fpstate_ia32 __user *) sp; - if (save_i387_xstate_ia32(*fpstate) < 0) + if (save_xstate_sig(*fpstate, (void __user *)fx_aligned, + math_size) < 0) return (void __user *) -1L; } diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 016acb30fa4..4fbb4195bc6 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -22,11 +22,30 @@ #include #include -extern unsigned int sig_xstate_size; +#ifdef CONFIG_X86_64 +# include +# include +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs *regs); +#else +# define user_i387_ia32_struct user_i387_struct +# define user32_fxsr_struct user_fxsr_struct +# define ia32_setup_frame __setup_frame +# define ia32_setup_rt_frame __setup_rt_frame +#endif + +extern unsigned int mxcsr_feature_mask; extern void fpu_init(void); DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); +extern void convert_from_fxsr(struct user_i387_ia32_struct *env, + struct task_struct *tsk); +extern void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env); + extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, xstateregs_get; @@ -39,19 +58,11 @@ extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, */ #define xstateregs_active fpregs_active -extern struct _fpx_sw_bytes fx_sw_reserved; -#ifdef CONFIG_IA32_EMULATION -extern unsigned int sig_xstate_ia32_size; -extern struct _fpx_sw_bytes fx_sw_reserved_ia32; -struct _fpstate_ia32; -struct _xstate_ia32; -extern int save_i387_xstate_ia32(void __user *buf); -extern int restore_i387_xstate_ia32(void __user *buf); -#endif - #ifdef CONFIG_MATH_EMULATION +# define HAVE_HWFP (boot_cpu_data.hard_math) extern void finit_soft_fpu(struct i387_soft_struct *soft); #else +# define HAVE_HWFP 1 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif @@ -119,17 +130,6 @@ static inline int fsave_user(struct i387_fsave_struct __user *fx) static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { - int err; - - /* - * Clear the bytes not touched by the fxsave and reserved - * for the SW usage. - */ - err = __clear_user(&fx->sw_reserved, - sizeof(struct _fpx_sw_bytes)); - if (unlikely(err)) - return -EFAULT; - if (config_enabled(CONFIG_X86_32)) return check_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); else if (config_enabled(CONFIG_AS_FXSAVEQ)) @@ -189,19 +189,6 @@ static inline void fpu_fxsave(struct fpu *fpu) : [fx] "R" (&fpu->state->fxsave)); } } -#ifdef CONFIG_X86_64 - -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - compat_sigset_t *set, struct pt_regs *regs); -int ia32_setup_frame(int sig, struct k_sigaction *ka, - compat_sigset_t *set, struct pt_regs *regs); - -#else /* CONFIG_X86_32 */ - -#define ia32_setup_frame __setup_frame -#define ia32_setup_rt_frame __setup_rt_frame - -#endif /* CONFIG_X86_64 */ /* * These must be called with preempt disabled. Returns @@ -392,10 +379,28 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) /* * Signal frame handlers... */ -extern int save_i387_xstate(void __user *buf); -extern int restore_i387_xstate(void __user *buf); +extern int save_xstate_sig(void __user *buf, void __user *fx, int size); +extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); + +static inline int xstate_sigframe_size(void) +{ + return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; +} + +static inline int restore_xstate_sig(void __user *buf, int ia32_frame) +{ + void __user *buf_fx = buf; + int size = xstate_sigframe_size(); + + if (ia32_frame && use_fxsr()) { + buf_fx = buf + sizeof(struct i387_fsave_struct); + size += sizeof(struct i387_fsave_struct); + } + + return __restore_xstate_sig(buf, buf_fx, size); +} -static inline void __clear_fpu(struct task_struct *tsk) +static inline void __drop_fpu(struct task_struct *tsk) { if (__thread_has_fpu(tsk)) { /* Ignore delayed exceptions from user space */ @@ -443,11 +448,21 @@ static inline void save_init_fpu(struct task_struct *tsk) preempt_enable(); } -static inline void clear_fpu(struct task_struct *tsk) +static inline void stop_fpu_preload(struct task_struct *tsk) { + tsk->fpu_counter = 0; +} + +static inline void drop_fpu(struct task_struct *tsk) +{ + /* + * Forget coprocessor state.. + */ + stop_fpu_preload(tsk); preempt_disable(); - __clear_fpu(tsk); + __drop_fpu(tsk); preempt_enable(); + clear_used_math(); } /* @@ -511,4 +526,20 @@ static inline void fpu_copy(struct fpu *dst, struct fpu *src) extern void fpu_finit(struct fpu *fpu); +static inline unsigned long +alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, + unsigned long *size) +{ + unsigned long frame_size = xstate_sigframe_size(); + + *buf_fx = sp = round_down(sp - frame_size, 64); + if (ia32_frame && use_fxsr()) { + frame_size += sizeof(struct i387_fsave_struct); + sp -= sizeof(struct i387_fsave_struct); + } + + *size = frame_size; + return sp; +} + #endif diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index aaac810e861..c1d989a1519 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -38,9 +38,6 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern int init_fpu(struct task_struct *child); -extern int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *sw); static inline int fpu_xrstor_checking(struct xsave_struct *fx) { @@ -68,8 +65,7 @@ static inline int xsave_user(struct xsave_struct __user *buf) * Clear the xsave header first, so that reserved fields are * initialized to zero. */ - err = __clear_user(&buf->xsave_hdr, - sizeof(struct xsave_hdr_struct)); + err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr)); if (unlikely(err)) return -EFAULT; diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f250431fb50..ab6a2e8028a 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -19,20 +19,6 @@ #include #include -#ifdef CONFIG_X86_64 -# include -# include -#else -# define save_i387_xstate_ia32 save_i387_xstate -# define restore_i387_xstate_ia32 restore_i387_xstate -# define _fpstate_ia32 _fpstate -# define _xstate_ia32 _xstate -# define sig_xstate_ia32_size sig_xstate_size -# define fx_sw_reserved_ia32 fx_sw_reserved -# define user_i387_ia32_struct user_i387_struct -# define user32_fxsr_struct user_fxsr_struct -#endif - /* * Were we in an interrupt that interrupted kernel mode? * @@ -113,16 +99,9 @@ void unlazy_fpu(struct task_struct *tsk) } EXPORT_SYMBOL(unlazy_fpu); -#ifdef CONFIG_MATH_EMULATION -# define HAVE_HWFP (boot_cpu_data.hard_math) -#else -# define HAVE_HWFP 1 -#endif - -static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); -unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); static struct i387_fxsave_struct fx_scratch __cpuinitdata; static void __cpuinit mxcsr_feature_mask_init(void) @@ -454,7 +433,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) * FXSR floating point environment conversions. */ -static void +void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; @@ -491,8 +470,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) memcpy(&to[i], &from[i], sizeof(to[0])); } -static void convert_to_fxsr(struct task_struct *tsk, - const struct user_i387_ia32_struct *env) +void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env) { struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; @@ -588,223 +567,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, return ret; } -/* - * Signal frame handlers. - */ - -static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave; - - fp->status = fp->swd; - if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) - return -1; - return 1; -} - -static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; - struct user_i387_ia32_struct env; - int err = 0; - - convert_from_fxsr(&env, tsk); - if (__copy_to_user(buf, &env, sizeof(env))) - return -1; - - err |= __put_user(fx->swd, &buf->status); - err |= __put_user(X86_FXSR_MAGIC, &buf->magic); - if (err) - return -1; - - if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) - return -1; - return 1; -} - -static int save_i387_xsave(void __user *buf) -{ - struct task_struct *tsk = current; - struct _fpstate_ia32 __user *fx = buf; - int err = 0; - - - sanitize_i387_state(tsk); - - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. - * This will enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. - * - * xsave aware applications can change the xstate_bv in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; - - if (save_i387_fxsave(fx) < 0) - return -1; - - err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, - sizeof(struct _fpx_sw_bytes)); - err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *) (buf + sig_xstate_ia32_size - - FP_XSTATE_MAGIC2_SIZE)); - if (err) - return -1; - - return 1; -} - -int save_i387_xstate_ia32(void __user *buf) -{ - struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; - struct task_struct *tsk = current; - - if (!used_math()) - return 0; - - if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) - return -EACCES; - /* - * This will cause a "finit" to be triggered by the next - * attempted FPU operation by the 'current' process. - */ - clear_used_math(); - - if (!HAVE_HWFP) { - return fpregs_soft_get(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, fp) ? -1 : 1; - } - - unlazy_fpu(tsk); - - if (cpu_has_xsave) - return save_i387_xsave(fp); - if (cpu_has_fxsr) - return save_i387_fxsave(fp); - else - return save_i387_fsave(fp); -} - -static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - - return __copy_from_user(&tsk->thread.fpu.state->fsave, buf, - sizeof(struct i387_fsave_struct)); -} - -static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, - unsigned int size) -{ - struct task_struct *tsk = current; - struct user_i387_ia32_struct env; - int err; - - err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0], - size); - /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; - if (err || __copy_from_user(&env, buf, sizeof(env))) - return 1; - convert_to_fxsr(tsk, &env); - - return 0; -} - -static int restore_i387_xsave(void __user *buf) -{ - struct _fpx_sw_bytes fx_sw_user; - struct _fpstate_ia32 __user *fx_user = - ((struct _fpstate_ia32 __user *) buf); - struct i387_fxsave_struct __user *fx = - (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; - struct xsave_hdr_struct *xsave_hdr = - ¤t->thread.fpu.state->xsave.xsave_hdr; - u64 mask; - int err; - - if (check_for_xstate(fx, buf, &fx_sw_user)) - goto fx_only; - - mask = fx_sw_user.xstate_bv; - - err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); - - xsave_hdr->xstate_bv &= pcntxt_mask; - /* - * These bits must be zero. - */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; - - /* - * Init the state that is not present in the memory layout - * and enabled by the OS. - */ - mask = ~(pcntxt_mask & ~mask); - xsave_hdr->xstate_bv &= mask; - - return err; -fx_only: - /* - * Couldn't find the extended state information in the memory - * layout. Restore the FP/SSE and init the other extended state - * enabled by the OS. - */ - xsave_hdr->xstate_bv = XSTATE_FPSSE; - return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); -} - -int restore_i387_xstate_ia32(void __user *buf) -{ - int err; - struct task_struct *tsk = current; - struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; - - if (HAVE_HWFP) - clear_fpu(tsk); - - if (!buf) { - if (used_math()) { - clear_fpu(tsk); - clear_used_math(); - } - - return 0; - } else - if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) - return -EACCES; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - if (HAVE_HWFP) { - if (cpu_has_xsave) - err = restore_i387_xsave(buf); - else if (cpu_has_fxsr) - err = restore_i387_fxsave(fp, sizeof(struct - i387_fxsave_struct)); - else - err = restore_i387_fsave(fp); - } else { - err = fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, fp) != 0; - } - set_used_math(); - - return err; -} - /* * FPU state for core dumps. * This is only used for a.out dumps now. diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ef6a8456f71..30069d1a6a4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -97,16 +97,6 @@ void arch_task_cache_init(void) SLAB_PANIC | SLAB_NOTRACK, NULL); } -static inline void drop_fpu(struct task_struct *tsk) -{ - /* - * Forget coprocessor state.. - */ - tsk->fpu_counter = 0; - clear_fpu(tsk); - clear_used_math(); -} - /* * Free current thread data structures etc.. */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c4c6a5c2bf0..861a9d1a463 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1332,9 +1332,6 @@ static const struct user_regset_view user_x86_64_view = { #define genregs32_get genregs_get #define genregs32_set genregs_set -#define user_i387_ia32_struct user_i387_struct -#define user32_fxsr_struct user_fxsr_struct - #endif /* CONFIG_X86_64 */ #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index bed431a3816..e10f96a7e04 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -114,7 +114,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, regs->orig_ax = -1; /* disable syscall checks */ get_user_ex(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); + err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); get_user_ex(*pax, &sc->ax); } get_user_catch(err); @@ -206,7 +206,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, void __user **fpstate) { /* Default to using normal stack */ + unsigned long math_size = 0; unsigned long sp = regs->sp; + unsigned long buf_fx = 0; int onsigstack = on_sig_stack(sp); /* redzone */ @@ -228,10 +230,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, } if (used_math()) { - sp -= sig_xstate_size; -#ifdef CONFIG_X86_64 - sp = round_down(sp, 64); -#endif /* CONFIG_X86_64 */ + sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), + &buf_fx, &math_size); *fpstate = (void __user *)sp; } @@ -244,8 +244,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, if (onsigstack && !likely(on_sig_stack(sp))) return (void __user *)-1L; - /* save i387 state */ - if (used_math() && save_i387_xstate(*fpstate) < 0) + /* save i387 and extended state */ + if (used_math() && + save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) return (void __user *)-1L; return (void __user *)sp; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 7a3d4df9eaf..0923d27f23d 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -10,9 +10,7 @@ #include #include #include -#ifdef CONFIG_IA32_EMULATION -#include -#endif +#include #include /* @@ -25,11 +23,7 @@ u64 pcntxt_mask; */ static struct xsave_struct *init_xstate_buf; -struct _fpx_sw_bytes fx_sw_reserved; -#ifdef CONFIG_IA32_EMULATION -struct _fpx_sw_bytes fx_sw_reserved_ia32; -#endif - +static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; /* @@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; */ void __sanitize_i387_state(struct task_struct *tsk) { - u64 xstate_bv; - int feature_bit = 0x2; struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; + int feature_bit = 0x2; + u64 xstate_bv; if (!fx) return; @@ -104,215 +98,314 @@ void __sanitize_i387_state(struct task_struct *tsk) * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. */ -int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *fx_sw_user) +static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw) { int min_xstate_size = sizeof(struct i387_fxsave_struct) + sizeof(struct xsave_hdr_struct); unsigned int magic2; - int err; - err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], - sizeof(struct _fpx_sw_bytes)); - if (err) - return -EFAULT; + if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) + return -1; - /* - * First Magic check failed. - */ - if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) - return -EINVAL; + /* Check for the first magic field and other error scenarios. */ + if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || + fx_sw->xstate_size < min_xstate_size || + fx_sw->xstate_size > xstate_size || + fx_sw->xstate_size > fx_sw->extended_size) + return -1; - /* - * Check for error scenarios. - */ - if (fx_sw_user->xstate_size < min_xstate_size || - fx_sw_user->xstate_size > xstate_size || - fx_sw_user->xstate_size > fx_sw_user->extended_size) - return -EINVAL; - - err = __get_user(magic2, (__u32 __user *) (fpstate + - fx_sw_user->extended_size - - FP_XSTATE_MAGIC2_SIZE)); - if (err) - return err; /* * Check for the presence of second magic word at the end of memory * layout. This detects the case where the user just copied the legacy * fpstate layout with out copying the extended state information * in the memory layout. */ - if (magic2 != FP_XSTATE_MAGIC2) - return -EFAULT; + if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) + || magic2 != FP_XSTATE_MAGIC2) + return -1; return 0; } -#ifdef CONFIG_X86_64 /* * Signal frame handlers. */ +static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) +{ + if (use_fxsr()) { + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct user_i387_ia32_struct env; + struct _fpstate_ia32 __user *fp = buf; -int save_i387_xstate(void __user *buf) + convert_from_fxsr(&env, tsk); + + if (__copy_to_user(buf, &env, sizeof(env)) || + __put_user(xsave->i387.swd, &fp->status) || + __put_user(X86_FXSR_MAGIC, &fp->magic)) + return -1; + } else { + struct i387_fsave_struct __user *fp = buf; + u32 swd; + if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) + return -1; + } + + return 0; +} + +static inline int save_xstate_epilog(void __user *buf, int ia32_frame) { - struct task_struct *tsk = current; - int err = 0; + struct xsave_struct __user *x = buf; + struct _fpx_sw_bytes *sw_bytes; + u32 xstate_bv; + int err; - if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) - return -EACCES; + /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ + sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; + err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); - BUG_ON(sig_xstate_size < xstate_size); + if (!use_xsave()) + return err; - if ((unsigned long)buf % 64) - pr_err("%s: bad fpstate %p\n", __func__, buf); + err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); - if (!used_math()) - return 0; + /* + * Read the xstate_bv which we copied (directly from the cpu or + * from the state in task struct) to the user buffers. + */ + err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); - if (user_has_fpu()) { - if (use_xsave()) - err = xsave_user(buf); - else - err = fxsave_user(buf); + /* + * For legacy compatible, we always set FP/SSE bits in the bit + * vector while saving the state to the user context. This will + * enable us capturing any changes(during sigreturn) to + * the FP/SSE bits by the legacy applications which don't touch + * xstate_bv in the xsave header. + * + * xsave aware apps can change the xstate_bv in the xsave + * header as well as change any contents in the memory layout. + * xrestore as part of sigreturn will capture all the changes. + */ + xstate_bv |= XSTATE_FPSSE; - if (unlikely(err)) { - __clear_user(buf, xstate_size); - return err; - } + err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); + + return err; +} + +static inline int save_user_xstate(struct xsave_struct __user *buf) +{ + int err; + + if (use_xsave()) + err = xsave_user(buf); + else if (use_fxsr()) + err = fxsave_user((struct i387_fxsave_struct __user *) buf); + else + err = fsave_user((struct i387_fsave_struct __user *) buf); + + if (unlikely(err) && __clear_user(buf, xstate_size)) + err = -EFAULT; + return err; +} + +/* + * Save the fpu, extended register state to the user signal frame. + * + * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save + * state is copied. + * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. + * + * buf == buf_fx for 64-bit frames and 32-bit fsave frame. + * buf != buf_fx for 32-bit frames with fxstate. + * + * If the fpu, extended register state is live, save the state directly + * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, + * copy the thread's fpu state to the user frame starting at 'buf_fx'. + * + * If this is a 32-bit frame with fxstate, put a fsave header before + * the aligned state at 'buf_fx'. + * + * For [f]xsave state, update the SW reserved fields in the [f]xsave frame + * indicating the absence/presence of the extended state to the user. + */ +int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) +{ + struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave; + struct task_struct *tsk = current; + int ia32_fxstate = (buf != buf_fx); + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); + + if (!access_ok(VERIFY_WRITE, buf, size)) + return -EACCES; + + if (!HAVE_HWFP) + return fpregs_soft_get(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), NULL, + (struct _fpstate_ia32 __user *) buf) ? -1 : 1; + + if (user_has_fpu()) { + /* Save the live register state to the user directly. */ + if (save_user_xstate(buf_fx)) + return -1; + /* Update the thread's fxstate to save the fsave header. */ + if (ia32_fxstate) + fpu_fxsave(&tsk->thread.fpu); user_fpu_end(); } else { sanitize_i387_state(tsk); - if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, - xstate_size)) + if (__copy_to_user(buf_fx, xsave, xstate_size)) return -1; } - clear_used_math(); /* trigger finit */ + /* Save the fsave header for the 32-bit frames. */ + if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) + return -1; - if (use_xsave()) { - struct _fpstate __user *fx = buf; - struct _xstate __user *x = buf; - u64 xstate_bv; + if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) + return -1; + + drop_fpu(tsk); /* trigger finit */ + + return 0; +} - err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, - sizeof(struct _fpx_sw_bytes)); +static inline void +sanitize_restored_xstate(struct task_struct *tsk, + struct user_i387_ia32_struct *ia32_env, + u64 xstate_bv, int fx_only) +{ + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr; - err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *) (buf + sig_xstate_size - - FP_XSTATE_MAGIC2_SIZE)); + if (use_xsave()) { + /* These bits must be zero. */ + xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; /* - * Read the xstate_bv which we copied (directly from the cpu or - * from the state in task struct) to the user buffers and - * set the FP/SSE bits. + * Init the state that is not present in the memory + * layout and not enabled by the OS. */ - err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv); + if (fx_only) + xsave_hdr->xstate_bv = XSTATE_FPSSE; + else + xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv); + } + if (use_fxsr()) { /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. This will - * enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. - * - * xsave aware apps can change the xstate_bv in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. + * mscsr reserved bits must be masked to zero for security + * reasons. */ - xstate_bv |= XSTATE_FPSSE; - - err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); + xsave->i387.mxcsr &= mxcsr_feature_mask; - if (err) - return err; + convert_to_fxsr(tsk, ia32_env); } - - return 1; } /* - * Restore the extended state if present. Otherwise, restore the FP/SSE - * state. + * Restore the extended state if present. Otherwise, restore the FP/SSE state. */ -static int restore_user_xstate(void __user *buf) +static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) { - struct _fpx_sw_bytes fx_sw_user; - u64 mask; - int err; - - if (((unsigned long)buf % 64) || - check_for_xstate(buf, buf, &fx_sw_user)) - goto fx_only; - - mask = fx_sw_user.xstate_bv; - - /* - * restore the state passed by the user. - */ - err = xrestore_user(buf, mask); - if (err) - return err; - - /* - * init the state skipped by the user. - */ - mask = pcntxt_mask & ~mask; - if (unlikely(mask)) - xrstor_state(init_xstate_buf, mask); - - return 0; - -fx_only: - /* - * couldn't find the extended state information in the - * memory layout. Restore just the FP/SSE and init all - * the other extended state. - */ - xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); - return fxrstor_checking((__force struct i387_fxsave_struct *)buf); + if (use_xsave()) { + if ((unsigned long)buf % 64 || fx_only) { + u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE; + xrstor_state(init_xstate_buf, init_bv); + return fxrstor_checking((__force void *) buf); + } else { + u64 init_bv = pcntxt_mask & ~xbv; + if (unlikely(init_bv)) + xrstor_state(init_xstate_buf, init_bv); + return xrestore_user(buf, xbv); + } + } else if (use_fxsr()) { + return fxrstor_checking((__force void *) buf); + } else + return frstor_checking((__force void *) buf); } -/* - * This restores directly out of user space. Exceptions are handled. - */ -int restore_i387_xstate(void __user *buf) +int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) { + int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; - int err = 0; + int state_size = xstate_size; + u64 xstate_bv = 0; + int fx_only = 0; + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); if (!buf) { - if (used_math()) - goto clear; + drop_fpu(tsk); return 0; - } else - if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) - return -EACCES; + } + + if (!access_ok(VERIFY_READ, buf, size)) + return -EACCES; + + if (!used_math() && init_fpu(tsk)) + return -1; - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; + if (!HAVE_HWFP) { + return fpregs_soft_set(current, NULL, + 0, sizeof(struct user_i387_ia32_struct), + NULL, buf) != 0; } - user_fpu_begin(); - if (use_xsave()) - err = restore_user_xstate(buf); - else - err = fxrstor_checking((__force struct i387_fxsave_struct *) - buf); - if (unlikely(err)) { + if (use_xsave()) { + struct _fpx_sw_bytes fx_sw_user; + if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { + /* + * Couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + state_size = sizeof(struct i387_fxsave_struct); + fx_only = 1; + } else { + state_size = fx_sw_user.xstate_size; + xstate_bv = fx_sw_user.xstate_bv; + } + } + + if (ia32_fxstate) { + /* + * For 32-bit frames with fxstate, copy the user state to the + * thread's fpu state, reconstruct fxstate from the fsave + * header. Sanitize the copied state etc. + */ + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct user_i387_ia32_struct env; + + stop_fpu_preload(tsk); + unlazy_fpu(tsk); + + if (__copy_from_user(xsave, buf_fx, state_size) || + __copy_from_user(&env, buf, sizeof(env))) { + drop_fpu(tsk); + return -1; + } + + sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); + } else { /* - * Encountered an error while doing the restore from the - * user buffer, clear the fpu state. + * For 64-bit frames and 32-bit fsave frames, restore the user + * state to the registers directly (with exceptions handled). */ -clear: - clear_fpu(tsk); - clear_used_math(); + user_fpu_begin(); + if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { + drop_fpu(tsk); + return -1; + } } - return err; + + return 0; } -#endif /* * Prepare the SW reserved portion of the fxsave memory layout, indicating @@ -323,31 +416,22 @@ clear: */ static void prepare_fx_sw_frame(void) { - int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + - FP_XSTATE_MAGIC2_SIZE; - - sig_xstate_size = sizeof(struct _fpstate) + size_extended; - -#ifdef CONFIG_IA32_EMULATION - sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; -#endif + int fsave_header_size = sizeof(struct i387_fsave_struct); + int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; - memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); + if (config_enabled(CONFIG_X86_32)) + size += fsave_header_size; fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; - fx_sw_reserved.extended_size = sig_xstate_size; + fx_sw_reserved.extended_size = size; fx_sw_reserved.xstate_bv = pcntxt_mask; fx_sw_reserved.xstate_size = xstate_size; -#ifdef CONFIG_IA32_EMULATION - memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, - sizeof(struct _fpx_sw_bytes)); - fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; -#endif -} -#ifdef CONFIG_X86_64 -unsigned int sig_xstate_size = sizeof(struct _fpstate); -#endif + if (config_enabled(CONFIG_IA32_EMULATION)) { + fx_sw_reserved_ia32 = fx_sw_reserved; + fx_sw_reserved_ia32.extended_size += fsave_header_size; + } +} /* * Enable the extended processor state save/restore feature -- cgit v1.2.3-70-g09d2 From e962591749dfd4df9fea2c530ed7a3cfed50e5aa Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 24 Aug 2012 14:12:57 -0700 Subject: x86, fpu: drop_fpu() before restoring new state from sigframe No need to save the state with unlazy_fpu(), that is about to get overwritten by the state from the signal frame. Instead use drop_fpu() and continue to restore the new state. Also fold the stop_fpu_preload() into drop_fpu(). Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1345842782-24175-2-git-send-email-suresh.b.siddha@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fpu-internal.h | 7 +------ arch/x86/kernel/xsave.c | 8 +++----- 2 files changed, 4 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 4fbb4195bc6..78169d133d4 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -448,17 +448,12 @@ static inline void save_init_fpu(struct task_struct *tsk) preempt_enable(); } -static inline void stop_fpu_preload(struct task_struct *tsk) -{ - tsk->fpu_counter = 0; -} - static inline void drop_fpu(struct task_struct *tsk) { /* * Forget coprocessor state.. */ - stop_fpu_preload(tsk); + tsk->fpu_counter = 0; preempt_disable(); __drop_fpu(tsk); preempt_enable(); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 0923d27f23d..07ddc870640 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -382,16 +382,14 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; struct user_i387_ia32_struct env; - stop_fpu_preload(tsk); - unlazy_fpu(tsk); + drop_fpu(tsk); if (__copy_from_user(xsave, buf_fx, state_size) || - __copy_from_user(&env, buf, sizeof(env))) { - drop_fpu(tsk); + __copy_from_user(&env, buf, sizeof(env))) return -1; - } sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); + set_used_math(); } else { /* * For 64-bit frames and 32-bit fsave frames, restore the user -- cgit v1.2.3-70-g09d2 From 377ffbcc536a5a6666dc077395163ab149c02610 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 24 Aug 2012 14:12:58 -0700 Subject: x86, fpu: remove unnecessary user_fpu_end() in save_xstate_sig() Few lines below we do drop_fpu() which is more safer. Remove the unnecessary user_fpu_end() in save_xstate_sig(), which allows the drop_fpu() to ignore any pending exceptions from the user-space and drop the current fpu. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1345842782-24175-3-git-send-email-suresh.b.siddha@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fpu-internal.h | 17 +++-------------- arch/x86/kernel/xsave.c | 1 - 2 files changed, 3 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 78169d133d4..52202a6b12a 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -412,22 +412,11 @@ static inline void __drop_fpu(struct task_struct *tsk) } /* - * The actual user_fpu_begin/end() functions - * need to be preemption-safe. + * Need to be preemption-safe. * - * NOTE! user_fpu_end() must be used only after you - * have saved the FP state, and user_fpu_begin() must - * be used only immediately before restoring it. - * These functions do not do any save/restore on - * their own. + * NOTE! user_fpu_begin() must be used only immediately before restoring + * it. This function does not do any save/restore on their own. */ -static inline void user_fpu_end(void) -{ - preempt_disable(); - __thread_fpu_end(current); - preempt_enable(); -} - static inline void user_fpu_begin(void) { preempt_disable(); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 07ddc870640..4ac5f2e135b 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -255,7 +255,6 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) /* Update the thread's fxstate to save the fsave header. */ if (ia32_fxstate) fpu_fxsave(&tsk->thread.fpu); - user_fpu_end(); } else { sanitize_i387_state(tsk); if (__copy_to_user(buf_fx, xsave, xstate_size)) -- cgit v1.2.3-70-g09d2 From 9c1c3fac53378c9782c18f80107965578d7b7167 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 24 Aug 2012 14:12:59 -0700 Subject: x86, kvm: use kernel_fpu_begin/end() in kvm_load/put_guest_fpu() kvm's guest fpu save/restore should be wrapped around kernel_fpu_begin/end(). This will avoid for example taking a DNA in kvm_load_guest_fpu() when it tries to load the fpu immediately after doing unlazy_fpu() on the host side. More importantly this will prevent the host process fpu from being corrupted. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1345842782-24175-4-git-send-email-suresh.b.siddha@intel.com Cc: Avi Kivity Signed-off-by: H. Peter Anvin --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 148ed666e31..cf637f572bd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5972,7 +5972,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) */ kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; - unlazy_fpu(current); + kernel_fpu_begin(); fpu_restore_checking(&vcpu->arch.guest_fpu); trace_kvm_fpu(1); } @@ -5986,6 +5986,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) vcpu->guest_fpu_loaded = 0; fpu_save_init(&vcpu->arch.guest_fpu); + kernel_fpu_end(); ++vcpu->stat.fpu_reload; kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); trace_kvm_fpu(0); -- cgit v1.2.3-70-g09d2 From 841e3604d35aa70d399146abdc526d8c89a2c2f5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 24 Aug 2012 14:13:00 -0700 Subject: x86, fpu: always use kernel_fpu_begin/end() for in-kernel FPU usage use kernel_fpu_begin/end() instead of unconditionally accessing cr0 and saving/restoring just the few used xmm/ymm registers. This has some advantages like: * If the task's FPU state is already active, then kernel_fpu_begin() will just save the user-state and avoiding the read/write of cr0. In general, cr0 accesses are much slower. * Manual save/restore of xmm/ymm registers will affect the 'modified' and the 'init' optimizations brought in the by xsaveopt/xrstor infrastructure. * Foward compatibility with future vector register extensions will be a problem if the xmm/ymm registers are manually saved and restored (corrupting the extended state of those vector registers). With this patch, there was no significant difference in the xor throughput using AVX, measured during boot. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1345842782-24175-5-git-send-email-suresh.b.siddha@intel.com Cc: Jim Kukunas Cc: NeilBrown Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/xor_32.h | 56 ++++++-------------------------------- arch/x86/include/asm/xor_64.h | 61 +++++++----------------------------------- arch/x86/include/asm/xor_avx.h | 54 +++++++++---------------------------- 3 files changed, 29 insertions(+), 142 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 454570891bd..aabd5850bdb 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = { * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) */ -#define XMMS_SAVE \ -do { \ - preempt_disable(); \ - cr0 = read_cr0(); \ - clts(); \ - asm volatile( \ - "movups %%xmm0,(%0) ;\n\t" \ - "movups %%xmm1,0x10(%0) ;\n\t" \ - "movups %%xmm2,0x20(%0) ;\n\t" \ - "movups %%xmm3,0x30(%0) ;\n\t" \ - : \ - : "r" (xmm_save) \ - : "memory"); \ -} while (0) - -#define XMMS_RESTORE \ -do { \ - asm volatile( \ - "sfence ;\n\t" \ - "movups (%0),%%xmm0 ;\n\t" \ - "movups 0x10(%0),%%xmm1 ;\n\t" \ - "movups 0x20(%0),%%xmm2 ;\n\t" \ - "movups 0x30(%0),%%xmm3 ;\n\t" \ - : \ - : "r" (xmm_save) \ - : "memory"); \ - write_cr0(cr0); \ - preempt_enable(); \ -} while (0) - -#define ALIGN16 __attribute__((aligned(16))) - #define OFFS(x) "16*("#x")" #define PF_OFFS(x) "256+16*("#x")" #define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" @@ -587,10 +555,8 @@ static void xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) : : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); /* Make sure GCC forgets anything it knows about p4 or p5, such that it won't pass to the asm volatile below a @@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, like assuming they have some legal value. */ asm("" : "=r" (p4), "=r" (p5)); - XMMS_RESTORE; + kernel_fpu_end(); } static struct xor_block_template xor_block_pIII_sse = { diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h index b9b2323e90f..5fc06d0b7eb 100644 --- a/arch/x86/include/asm/xor_64.h +++ b/arch/x86/include/asm/xor_64.h @@ -34,41 +34,7 @@ * no advantages to be gotten from x86-64 here anyways. */ -typedef struct { - unsigned long a, b; -} __attribute__((aligned(16))) xmm_store_t; - -/* Doesn't use gcc to save the XMM registers, because there is no easy way to - tell it to do a clts before the register saving. */ -#define XMMS_SAVE \ -do { \ - preempt_disable(); \ - asm volatile( \ - "movq %%cr0,%0 ;\n\t" \ - "clts ;\n\t" \ - "movups %%xmm0,(%1) ;\n\t" \ - "movups %%xmm1,0x10(%1) ;\n\t" \ - "movups %%xmm2,0x20(%1) ;\n\t" \ - "movups %%xmm3,0x30(%1) ;\n\t" \ - : "=&r" (cr0) \ - : "r" (xmm_save) \ - : "memory"); \ -} while (0) - -#define XMMS_RESTORE \ -do { \ - asm volatile( \ - "sfence ;\n\t" \ - "movups (%1),%%xmm0 ;\n\t" \ - "movups 0x10(%1),%%xmm1 ;\n\t" \ - "movups 0x20(%1),%%xmm2 ;\n\t" \ - "movups 0x30(%1),%%xmm3 ;\n\t" \ - "movq %0,%%cr0 ;\n\t" \ - : \ - : "r" (cr0), "r" (xmm_save) \ - : "memory"); \ - preempt_enable(); \ -} while (0) +#include #define OFFS(x) "16*("#x")" #define PF_OFFS(x) "256+16*("#x")" @@ -91,10 +57,8 @@ static void xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { unsigned int lines = bytes >> 8; - unsigned long cr0; - xmm_store_t xmm_save[4]; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) : [inc] "r" (256UL) : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { unsigned int lines = bytes >> 8; - xmm_store_t xmm_save[4]; - unsigned long cr0; - - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) : [inc] "r" (256UL) : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { unsigned int lines = bytes >> 8; - xmm_store_t xmm_save[4]; - unsigned long cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, : [inc] "r" (256UL) : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { unsigned int lines = bytes >> 8; - xmm_store_t xmm_save[4]; - unsigned long cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, : [inc] "r" (256UL) : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static struct xor_block_template xor_block_sse = { diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 2510d35f480..7ea79c5fa1f 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -20,32 +20,6 @@ #include #include -#define ALIGN32 __aligned(32) - -#define YMM_SAVED_REGS 4 - -#define YMMS_SAVE \ -do { \ - preempt_disable(); \ - cr0 = read_cr0(); \ - clts(); \ - asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \ - asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \ - asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \ - asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \ -} while (0); - -#define YMMS_RESTORE \ -do { \ - asm volatile("sfence" : : : "memory"); \ - asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \ - asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \ - asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \ - asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \ - write_cr0(cr0); \ - preempt_enable(); \ -} while (0); - #define BLOCK4(i) \ BLOCK(32 * i, 0) \ BLOCK(32 * (i + 1), 1) \ @@ -60,10 +34,9 @@ do { \ static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -82,16 +55,15 @@ do { \ p1 = (unsigned long *)((uintptr_t)p1 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, unsigned long *p2) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -113,16 +85,15 @@ do { \ p2 = (unsigned long *)((uintptr_t)p2 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, unsigned long *p2, unsigned long *p3) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -147,16 +118,15 @@ do { \ p3 = (unsigned long *)((uintptr_t)p3 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -184,7 +154,7 @@ do { \ p4 = (unsigned long *)((uintptr_t)p4 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static struct xor_block_template xor_block_avx = { -- cgit v1.2.3-70-g09d2 From 304bceda6a18ae0b0240b8aac9a6bdf8ce2d2469 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 24 Aug 2012 14:13:02 -0700 Subject: x86, fpu: use non-lazy fpu restore for processors supporting xsave Fundamental model of the current Linux kernel is to lazily init and restore FPU instead of restoring the task state during context switch. This changes that fundamental lazy model to the non-lazy model for the processors supporting xsave feature. Reasons driving this model change are: i. Newer processors support optimized state save/restore using xsaveopt and xrstor by tracking the INIT state and MODIFIED state during context-switch. This is faster than modifying the cr0.TS bit which has serializing semantics. ii. Newer glibc versions use SSE for some of the optimized copy/clear routines. With certain workloads (like boot, kernel-compilation etc), application completes its work with in the first 5 task switches, thus taking upto 5 #DNA traps with the kernel not getting a chance to apply the above mentioned pre-load heuristic. iii. Some xstate features (like AMD's LWP feature) don't honor the cr0.TS bit and thus will not work correctly in the presence of lazy restore. Non-lazy state restore is needed for enabling such features. Some data on a two socket SNB system: * Saved 20K DNA exceptions during boot on a two socket SNB system. * Saved 50K DNA exceptions during kernel-compilation workload. * Improved throughput of the AVX based checksumming function inside the kernel by ~15% as xsave/xrstor is faster than the serializing clts/stts pair. Also now kernel_fpu_begin/end() relies on the patched alternative instructions. So move check_fpu() which uses the kernel_fpu_begin/end() after alternative_instructions(). Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1345842782-24175-7-git-send-email-suresh.b.siddha@intel.com Merge 32-bit boot fix from, Link: http://lkml.kernel.org/r/1347300665-6209-4-git-send-email-suresh.b.siddha@intel.com Cc: Jim Kukunas Cc: NeilBrown Cc: Avi Kivity Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fpu-internal.h | 96 ++++++++++++++++++++++++------------- arch/x86/include/asm/i387.h | 1 + arch/x86/include/asm/xsave.h | 1 + arch/x86/kernel/cpu/bugs.c | 7 ++- arch/x86/kernel/i387.c | 20 ++++++-- arch/x86/kernel/process.c | 12 +++-- arch/x86/kernel/process_32.c | 4 -- arch/x86/kernel/process_64.c | 4 -- arch/x86/kernel/traps.c | 5 +- arch/x86/kernel/xsave.c | 57 +++++++++++++++++----- 10 files changed, 146 insertions(+), 61 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 52202a6b12a..8ca0f9f45ac 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -291,15 +291,48 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) static inline void __thread_fpu_end(struct task_struct *tsk) { __thread_clear_has_fpu(tsk); - stts(); + if (!use_xsave()) + stts(); } static inline void __thread_fpu_begin(struct task_struct *tsk) { - clts(); + if (!use_xsave()) + clts(); __thread_set_has_fpu(tsk); } +static inline void __drop_fpu(struct task_struct *tsk) +{ + if (__thread_has_fpu(tsk)) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + __thread_fpu_end(tsk); + } +} + +static inline void drop_fpu(struct task_struct *tsk) +{ + /* + * Forget coprocessor state.. + */ + preempt_disable(); + tsk->fpu_counter = 0; + __drop_fpu(tsk); + clear_used_math(); + preempt_enable(); +} + +static inline void drop_init_fpu(struct task_struct *tsk) +{ + if (!use_xsave()) + drop_fpu(tsk); + else + xrstor_state(init_xstate_buf, -1); +} + /* * FPU state switching for scheduling. * @@ -333,7 +366,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta { fpu_switch_t fpu; - fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; + /* + * If the task has used the math, pre-load the FPU on xsave processors + * or if the past 5 consecutive context-switches used math. + */ + fpu.preload = tsk_used_math(new) && (use_xsave() || + new->fpu_counter > 5); if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) cpu = ~0; @@ -345,14 +383,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta new->fpu_counter++; __thread_set_has_fpu(new); prefetch(new->thread.fpu.state); - } else + } else if (!use_xsave()) stts(); } else { old->fpu_counter = 0; old->thread.fpu.last_cpu = ~0; if (fpu.preload) { new->fpu_counter++; - if (fpu_lazy_restore(new, cpu)) + if (!use_xsave() && fpu_lazy_restore(new, cpu)) fpu.preload = 0; else prefetch(new->thread.fpu.state); @@ -372,7 +410,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) { if (fpu.preload) { if (unlikely(restore_fpu_checking(new))) - __thread_fpu_end(new); + drop_init_fpu(new); } } @@ -400,17 +438,6 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame) return __restore_xstate_sig(buf, buf_fx, size); } -static inline void __drop_fpu(struct task_struct *tsk) -{ - if (__thread_has_fpu(tsk)) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - __thread_fpu_end(tsk); - } -} - /* * Need to be preemption-safe. * @@ -431,24 +458,18 @@ static inline void user_fpu_begin(void) static inline void save_init_fpu(struct task_struct *tsk) { WARN_ON_ONCE(!__thread_has_fpu(tsk)); + + if (use_xsave()) { + xsave_state(&tsk->thread.fpu.state->xsave, -1); + return; + } + preempt_disable(); __save_init_fpu(tsk); __thread_fpu_end(tsk); preempt_enable(); } -static inline void drop_fpu(struct task_struct *tsk) -{ - /* - * Forget coprocessor state.. - */ - tsk->fpu_counter = 0; - preempt_disable(); - __drop_fpu(tsk); - preempt_enable(); - clear_used_math(); -} - /* * i387 state interaction */ @@ -503,12 +524,21 @@ static inline void fpu_free(struct fpu *fpu) } } -static inline void fpu_copy(struct fpu *dst, struct fpu *src) +static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) { - memcpy(dst->state, src->state, xstate_size); -} + if (use_xsave()) { + struct xsave_struct *xsave = &dst->thread.fpu.state->xsave; -extern void fpu_finit(struct fpu *fpu); + memset(&xsave->xsave_hdr, 0, sizeof(struct xsave_hdr_struct)); + xsave_state(xsave, -1); + } else { + struct fpu *dfpu = &dst->thread.fpu; + struct fpu *sfpu = &src->thread.fpu; + + unlazy_fpu(src); + memcpy(dfpu->state, sfpu->state, xstate_size); + } +} static inline unsigned long alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 257d9cca214..6c3bd378281 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -19,6 +19,7 @@ struct pt_regs; struct user_i387_struct; extern int init_fpu(struct task_struct *child); +extern void fpu_finit(struct fpu *fpu); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index c1d989a1519..2ddee1b8779 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -34,6 +34,7 @@ extern unsigned int xstate_size; extern u64 pcntxt_mask; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; +extern struct xsave_struct *init_xstate_buf; extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c97bb7b5a9f..d0e910da16c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -165,10 +165,15 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); #endif check_config(); - check_fpu(); check_hlt(); check_popad(); init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); + + /* + * kernel_fpu_begin/end() in check_fpu() relies on the patched + * alternative instructions. + */ + check_fpu(); } diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index ab6a2e8028a..528557470dd 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -22,7 +22,15 @@ /* * Were we in an interrupt that interrupted kernel mode? * - * We can do a kernel_fpu_begin/end() pair *ONLY* if that + * For now, on xsave platforms we will return interrupted + * kernel FPU as not-idle. TBD: As we use non-lazy FPU restore + * for xsave platforms, ideally we can change the return value + * to something like __thread_has_fpu(current). But we need to + * be careful of doing __thread_clear_has_fpu() before saving + * the FPU etc for supporting nested uses etc. For now, take + * the simple route! + * + * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that * pair does nothing at all: the thread must not have fpu (so * that we don't try to save the FPU state), and TS must * be set (so that the clts/stts pair does nothing that is @@ -30,6 +38,9 @@ */ static inline bool interrupted_kernel_fpu_idle(void) { + if (use_xsave()) + return 0; + return !__thread_has_fpu(current) && (read_cr0() & X86_CR0_TS); } @@ -73,7 +84,7 @@ void kernel_fpu_begin(void) __save_init_fpu(me); __thread_clear_has_fpu(me); /* We do 'stts()' in kernel_fpu_end() */ - } else { + } else if (!use_xsave()) { this_cpu_write(fpu_owner_task, NULL); clts(); } @@ -82,7 +93,10 @@ EXPORT_SYMBOL(kernel_fpu_begin); void kernel_fpu_end(void) { - stts(); + if (use_xsave()) + math_state_restore(); + else + stts(); preempt_enable(); } EXPORT_SYMBOL(kernel_fpu_end); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 30069d1a6a4..c21e30f8923 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { int ret; - unlazy_fpu(src); - *dst = *src; if (fpu_allocated(&src->thread.fpu)) { memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); ret = fpu_alloc(&dst->thread.fpu); if (ret) return ret; - fpu_copy(&dst->thread.fpu, &src->thread.fpu); + fpu_copy(dst, src); } return 0; } @@ -153,7 +151,13 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - drop_fpu(tsk); + drop_init_fpu(tsk); + /* + * Free the FPU state for non xsave platforms. They get reallocated + * lazily at the first use. + */ + if (!use_xsave()) + free_thread_xstate(tsk); } static void hard_disable_TSC(void) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 516fa186121..b9ff83c7135 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->cs = __USER_CS; regs->ip = new_ip; regs->sp = new_sp; - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0a980c9d7cb..8a6d20ce197 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, regs->cs = _cs; regs->ss = _ss; regs->flags = X86_EFLAGS_IF; - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); } void diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b481341c936..ac7d5275f6e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -613,11 +613,12 @@ void math_state_restore(void) } __thread_fpu_begin(tsk); + /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ if (unlikely(restore_fpu_checking(tsk))) { - __thread_fpu_end(tsk); + drop_init_fpu(tsk); force_sig(SIGSEGV, tsk); return; } @@ -629,6 +630,8 @@ EXPORT_SYMBOL_GPL(math_state_restore); dotraplinkage void __kprobes do_device_not_available(struct pt_regs *regs, long error_code) { + BUG_ON(use_xsave()); + #ifdef CONFIG_MATH_EMULATION if (read_cr0() & X86_CR0_EM) { struct math_emu_info info = { }; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 4ac5f2e135b..e7752bd7cac 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -21,7 +21,7 @@ u64 pcntxt_mask; /* * Represents init state for the supported extended state. */ -static struct xsave_struct *init_xstate_buf; +struct xsave_struct *init_xstate_buf; static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; @@ -268,7 +268,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) return -1; - drop_fpu(tsk); /* trigger finit */ + drop_init_fpu(tsk); /* trigger finit */ return 0; } @@ -340,7 +340,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) config_enabled(CONFIG_IA32_EMULATION)); if (!buf) { - drop_fpu(tsk); + drop_init_fpu(tsk); return 0; } @@ -380,15 +380,30 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) */ struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; struct user_i387_ia32_struct env; + int err = 0; + /* + * Drop the current fpu which clears used_math(). This ensures + * that any context-switch during the copy of the new state, + * avoids the intermediate state from getting restored/saved. + * Thus avoiding the new restored state from getting corrupted. + * We will be ready to restore/save the state only after + * set_used_math() is again set. + */ drop_fpu(tsk); if (__copy_from_user(xsave, buf_fx, state_size) || - __copy_from_user(&env, buf, sizeof(env))) - return -1; + __copy_from_user(&env, buf, sizeof(env))) { + err = -1; + } else { + sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); + set_used_math(); + } - sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); - set_used_math(); + if (use_xsave()) + math_state_restore(); + + return err; } else { /* * For 64-bit frames and 32-bit fsave frames, restore the user @@ -396,7 +411,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) */ user_fpu_begin(); if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { - drop_fpu(tsk); + drop_init_fpu(tsk); return -1; } } @@ -435,10 +450,28 @@ static void prepare_fx_sw_frame(void) */ static inline void xstate_enable(void) { + clts(); set_in_cr4(X86_CR4_OSXSAVE); xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); } +/* + * This is same as math_state_restore(). But use_xsave() is not yet + * patched to use math_state_restore(). + */ +static inline void init_restore_xstate(void) +{ + init_fpu(current); + __thread_fpu_begin(current); + xrstor_state(init_xstate_buf, -1); +} + +static inline void xstate_enable_ap(void) +{ + xstate_enable(); + init_restore_xstate(); +} + /* * Record the offsets and sizes of different state managed by the xsave * memory layout. @@ -479,7 +512,6 @@ static void __init setup_xstate_init(void) __alignof__(struct xsave_struct)); init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; - clts(); /* * Init all the features state with header_bv being 0x0 */ @@ -489,7 +521,6 @@ static void __init setup_xstate_init(void) * of any feature which is not represented by all zero's. */ xsave_state(init_xstate_buf, -1); - stts(); } /* @@ -533,6 +564,10 @@ static void __init xstate_enable_boot_cpu(void) pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", pcntxt_mask, xstate_size); + + current->thread.fpu.state = + alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); + init_restore_xstate(); } /* @@ -551,6 +586,6 @@ void __cpuinit xsave_init(void) return; this_func = next_func; - next_func = xstate_enable; + next_func = xstate_enable_ap; this_func(); } -- cgit v1.2.3-70-g09d2 From 5d2bd7009f306c82afddd1ca4d9763ad8473c216 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 6 Sep 2012 14:58:52 -0700 Subject: x86, fpu: decouple non-lazy/eager fpu restore from xsave Decouple non-lazy/eager fpu restore policy from the existence of the xsave feature. Introduce a synthetic CPUID flag to represent the eagerfpu policy. "eagerfpu=on" boot paramter will enable the policy. Requested-by: H. Peter Anvin Requested-by: Linus Torvalds Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1347300665-6209-2-git-send-email-suresh.b.siddha@intel.com Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 4 ++ arch/x86/include/asm/cpufeature.h | 2 + arch/x86/include/asm/fpu-internal.h | 54 ++++++++++++++++------- arch/x86/kernel/cpu/common.c | 2 - arch/x86/kernel/i387.c | 25 ++++------- arch/x86/kernel/process.c | 2 +- arch/x86/kernel/traps.c | 2 +- arch/x86/kernel/xsave.c | 87 ++++++++++++++++++++++++------------- 8 files changed, 112 insertions(+), 66 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ad7e2e5088c..741d064fdc6 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1833,6 +1833,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. + eagerfpu= [X86] + on enable eager fpu restore + off disable eager fpu restore + nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or wfi(ARM) instruction doesn't work correctly and not to use it. This is also useful when using JTAG debugger. diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 6b7ee5ff682..5dd2b473ccf 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -97,6 +97,7 @@ #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ +#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ @@ -305,6 +306,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) +#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 8ca0f9f45ac..0ca72f0d4b4 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -38,6 +38,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, extern unsigned int mxcsr_feature_mask; extern void fpu_init(void); +extern void eager_fpu_init(void); DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); @@ -84,6 +85,11 @@ static inline int is_x32_frame(void) #define X87_FSW_ES (1 << 7) /* Exception Summary */ +static __always_inline __pure bool use_eager_fpu(void) +{ + return static_cpu_has(X86_FEATURE_EAGER_FPU); +} + static __always_inline __pure bool use_xsaveopt(void) { return static_cpu_has(X86_FEATURE_XSAVEOPT); @@ -99,6 +105,14 @@ static __always_inline __pure bool use_fxsr(void) return static_cpu_has(X86_FEATURE_FXSR); } +static inline void fx_finit(struct i387_fxsave_struct *fx) +{ + memset(fx, 0, xstate_size); + fx->cwd = 0x37f; + if (cpu_has_xmm) + fx->mxcsr = MXCSR_DEFAULT; +} + extern void __sanitize_i387_state(struct task_struct *); static inline void sanitize_i387_state(struct task_struct *tsk) @@ -291,13 +305,13 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) static inline void __thread_fpu_end(struct task_struct *tsk) { __thread_clear_has_fpu(tsk); - if (!use_xsave()) + if (!use_eager_fpu()) stts(); } static inline void __thread_fpu_begin(struct task_struct *tsk) { - if (!use_xsave()) + if (!use_eager_fpu()) clts(); __thread_set_has_fpu(tsk); } @@ -327,10 +341,14 @@ static inline void drop_fpu(struct task_struct *tsk) static inline void drop_init_fpu(struct task_struct *tsk) { - if (!use_xsave()) + if (!use_eager_fpu()) drop_fpu(tsk); - else - xrstor_state(init_xstate_buf, -1); + else { + if (use_xsave()) + xrstor_state(init_xstate_buf, -1); + else + fxrstor_checking(&init_xstate_buf->i387); + } } /* @@ -370,7 +388,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta * If the task has used the math, pre-load the FPU on xsave processors * or if the past 5 consecutive context-switches used math. */ - fpu.preload = tsk_used_math(new) && (use_xsave() || + fpu.preload = tsk_used_math(new) && (use_eager_fpu() || new->fpu_counter > 5); if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) @@ -383,14 +401,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta new->fpu_counter++; __thread_set_has_fpu(new); prefetch(new->thread.fpu.state); - } else if (!use_xsave()) + } else if (!use_eager_fpu()) stts(); } else { old->fpu_counter = 0; old->thread.fpu.last_cpu = ~0; if (fpu.preload) { new->fpu_counter++; - if (!use_xsave() && fpu_lazy_restore(new, cpu)) + if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) fpu.preload = 0; else prefetch(new->thread.fpu.state); @@ -452,6 +470,14 @@ static inline void user_fpu_begin(void) preempt_enable(); } +static inline void __save_fpu(struct task_struct *tsk) +{ + if (use_xsave()) + xsave_state(&tsk->thread.fpu.state->xsave, -1); + else + fpu_fxsave(&tsk->thread.fpu); +} + /* * These disable preemption on their own and are safe */ @@ -459,8 +485,8 @@ static inline void save_init_fpu(struct task_struct *tsk) { WARN_ON_ONCE(!__thread_has_fpu(tsk)); - if (use_xsave()) { - xsave_state(&tsk->thread.fpu.state->xsave, -1); + if (use_eager_fpu()) { + __save_fpu(tsk); return; } @@ -526,11 +552,9 @@ static inline void fpu_free(struct fpu *fpu) static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) { - if (use_xsave()) { - struct xsave_struct *xsave = &dst->thread.fpu.state->xsave; - - memset(&xsave->xsave_hdr, 0, sizeof(struct xsave_hdr_struct)); - xsave_state(xsave, -1); + if (use_eager_fpu()) { + memset(&dst->thread.fpu.state->xsave, 0, xstate_size); + __save_fpu(dst); } else { struct fpu *dfpu = &dst->thread.fpu; struct fpu *sfpu = &src->thread.fpu; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c5fcc..b0fe078614d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1297,7 +1297,6 @@ void __cpuinit cpu_init(void) dbg_restore_debug_regs(); fpu_init(); - xsave_init(); raw_local_save_flags(kernel_eflags); @@ -1352,6 +1351,5 @@ void __cpuinit cpu_init(void) dbg_restore_debug_regs(); fpu_init(); - xsave_init(); } #endif diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 528557470dd..6782e398386 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -22,9 +22,8 @@ /* * Were we in an interrupt that interrupted kernel mode? * - * For now, on xsave platforms we will return interrupted - * kernel FPU as not-idle. TBD: As we use non-lazy FPU restore - * for xsave platforms, ideally we can change the return value + * For now, with eagerfpu we will return interrupted kernel FPU + * state as not-idle. TBD: Ideally we can change the return value * to something like __thread_has_fpu(current). But we need to * be careful of doing __thread_clear_has_fpu() before saving * the FPU etc for supporting nested uses etc. For now, take @@ -38,7 +37,7 @@ */ static inline bool interrupted_kernel_fpu_idle(void) { - if (use_xsave()) + if (use_eager_fpu()) return 0; return !__thread_has_fpu(current) && @@ -84,7 +83,7 @@ void kernel_fpu_begin(void) __save_init_fpu(me); __thread_clear_has_fpu(me); /* We do 'stts()' in kernel_fpu_end() */ - } else if (!use_xsave()) { + } else if (!use_eager_fpu()) { this_cpu_write(fpu_owner_task, NULL); clts(); } @@ -93,7 +92,7 @@ EXPORT_SYMBOL(kernel_fpu_begin); void kernel_fpu_end(void) { - if (use_xsave()) + if (use_eager_fpu()) math_state_restore(); else stts(); @@ -122,7 +121,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) { unsigned long mask = 0; - clts(); if (cpu_has_fxsr) { memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); asm volatile("fxsave %0" : : "m" (fx_scratch)); @@ -131,7 +129,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) mask = 0x0000ffbf; } mxcsr_feature_mask &= mask; - stts(); } static void __cpuinit init_thread_xstate(void) @@ -185,9 +182,8 @@ void __cpuinit fpu_init(void) init_thread_xstate(); mxcsr_feature_mask_init(); - /* clean state in init */ - current_thread_info()->status = 0; - clear_used_math(); + xsave_init(); + eager_fpu_init(); } void fpu_finit(struct fpu *fpu) @@ -198,12 +194,7 @@ void fpu_finit(struct fpu *fpu) } if (cpu_has_fxsr) { - struct i387_fxsave_struct *fx = &fpu->state->fxsave; - - memset(fx, 0, xstate_size); - fx->cwd = 0x37f; - if (cpu_has_xmm) - fx->mxcsr = MXCSR_DEFAULT; + fx_finit(&fpu->state->fxsave); } else { struct i387_fsave_struct *fp = &fpu->state->fsave; memset(fp, 0, xstate_size); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c21e30f8923..dc3567e083f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -156,7 +156,7 @@ void flush_thread(void) * Free the FPU state for non xsave platforms. They get reallocated * lazily at the first use. */ - if (!use_xsave()) + if (!use_eager_fpu()) free_thread_xstate(tsk); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ac7d5275f6e..4f4aba0551b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -630,7 +630,7 @@ EXPORT_SYMBOL_GPL(math_state_restore); dotraplinkage void __kprobes do_device_not_available(struct pt_regs *regs, long error_code) { - BUG_ON(use_xsave()); + BUG_ON(use_eager_fpu()); #ifdef CONFIG_MATH_EMULATION if (read_cr0() & X86_CR0_EM) { diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index e7752bd7cac..c0afd2c4376 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -400,7 +400,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) set_used_math(); } - if (use_xsave()) + if (use_eager_fpu()) math_state_restore(); return err; @@ -450,28 +450,10 @@ static void prepare_fx_sw_frame(void) */ static inline void xstate_enable(void) { - clts(); set_in_cr4(X86_CR4_OSXSAVE); xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); } -/* - * This is same as math_state_restore(). But use_xsave() is not yet - * patched to use math_state_restore(). - */ -static inline void init_restore_xstate(void) -{ - init_fpu(current); - __thread_fpu_begin(current); - xrstor_state(init_xstate_buf, -1); -} - -static inline void xstate_enable_ap(void) -{ - xstate_enable(); - init_restore_xstate(); -} - /* * Record the offsets and sizes of different state managed by the xsave * memory layout. @@ -500,17 +482,20 @@ static void __init setup_xstate_features(void) /* * setup the xstate image representing the init state */ -static void __init setup_xstate_init(void) +static void __init setup_init_fpu_buf(void) { - setup_xstate_features(); - /* * Setup init_xstate_buf to represent the init state of * all the features managed by the xsave */ init_xstate_buf = alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); - init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; + fx_finit(&init_xstate_buf->i387); + + if (!cpu_has_xsave) + return; + + setup_xstate_features(); /* * Init all the features state with header_bv being 0x0 @@ -523,6 +508,17 @@ static void __init setup_xstate_init(void) xsave_state(init_xstate_buf, -1); } +static int disable_eagerfpu; +static int __init eager_fpu_setup(char *s) +{ + if (!strcmp(s, "on")) + setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + else if (!strcmp(s, "off")) + disable_eagerfpu = 1; + return 1; +} +__setup("eagerfpu=", eager_fpu_setup); + /* * Enable and initialize the xsave feature. */ @@ -559,15 +555,10 @@ static void __init xstate_enable_boot_cpu(void) update_regset_xstate_info(xstate_size, pcntxt_mask); prepare_fx_sw_frame(); - - setup_xstate_init(); + setup_init_fpu_buf(); pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", pcntxt_mask, xstate_size); - - current->thread.fpu.state = - alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); - init_restore_xstate(); } /* @@ -586,6 +577,42 @@ void __cpuinit xsave_init(void) return; this_func = next_func; - next_func = xstate_enable_ap; + next_func = xstate_enable; this_func(); } + +static inline void __init eager_fpu_init_bp(void) +{ + current->thread.fpu.state = + alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); + if (!init_xstate_buf) + setup_init_fpu_buf(); +} + +void __cpuinit eager_fpu_init(void) +{ + static __refdata void (*boot_func)(void) = eager_fpu_init_bp; + + clear_used_math(); + current_thread_info()->status = 0; + if (!cpu_has_eager_fpu) { + stts(); + return; + } + + if (boot_func) { + boot_func(); + boot_func = NULL; + } + + /* + * This is same as math_state_restore(). But use_xsave() is + * not yet patched to use math_state_restore(). + */ + init_fpu(current); + __thread_fpu_begin(current); + if (cpu_has_xsave) + xrstor_state(init_xstate_buf, -1); + else + fxrstor_checking(&init_xstate_buf->i387); +} -- cgit v1.2.3-70-g09d2 From 212b02125f3725127148475059a70031bd031bdb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 6 Sep 2012 15:05:18 -0700 Subject: x86, fpu: enable eagerfpu by default for xsaveopt xsaveopt/xrstor support optimized state save/restore by tracking the INIT state and MODIFIED state during context-switch. Enable eagerfpu by default for processors supporting xsaveopt. Can be disabled by passing "eagerfpu=off" boot parameter. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1347300665-6209-3-git-send-email-suresh.b.siddha@intel.com Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 2 +- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/kernel/xsave.c | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 741d064fdc6..e8f7faaa457 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1834,7 +1834,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. enabling legacy floating-point and sse state. eagerfpu= [X86] - on enable eager fpu restore + on enable eager fpu restore (default for xsaveopt) off disable eager fpu restore nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 5dd2b473ccf..0debdb51844 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -300,6 +300,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) +#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT) #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index c0afd2c4376..e99f75439f6 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -557,6 +557,9 @@ static void __init xstate_enable_boot_cpu(void) prepare_fx_sw_frame(); setup_init_fpu_buf(); + if (cpu_has_xsaveopt && !disable_eagerfpu) + setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", pcntxt_mask, xstate_size); } -- cgit v1.2.3-70-g09d2 From e00229819f306b1f86134095347e9187dc346bd1 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 10 Sep 2012 10:32:32 -0700 Subject: x86, fpu: make eagerfpu= boot param tri-state Add the "eagerfpu=auto" (that selects the default scheme in enabling eagerfpu) which can override compiled-in boot parameters like "eagerfpu=on/off" (that force enable/disable eagerfpu). Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1347300665-6209-5-git-send-email-suresh.b.siddha@intel.com Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 4 +++- arch/x86/kernel/xsave.c | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e8f7faaa457..46a6a8288de 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1834,8 +1834,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. enabling legacy floating-point and sse state. eagerfpu= [X86] - on enable eager fpu restore (default for xsaveopt) + on enable eager fpu restore off disable eager fpu restore + auto selects the default scheme, which automatically + enables eagerfpu restore for xsaveopt. nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or wfi(ARM) instruction doesn't work correctly and not to diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index e99f75439f6..4e89b3dd408 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -508,13 +508,15 @@ static void __init setup_init_fpu_buf(void) xsave_state(init_xstate_buf, -1); } -static int disable_eagerfpu; +static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; static int __init eager_fpu_setup(char *s) { if (!strcmp(s, "on")) - setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + eagerfpu = ENABLE; else if (!strcmp(s, "off")) - disable_eagerfpu = 1; + eagerfpu = DISABLE; + else if (!strcmp(s, "auto")) + eagerfpu = AUTO; return 1; } __setup("eagerfpu=", eager_fpu_setup); @@ -557,8 +559,9 @@ static void __init xstate_enable_boot_cpu(void) prepare_fx_sw_frame(); setup_init_fpu_buf(); - if (cpu_has_xsaveopt && !disable_eagerfpu) - setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + /* Auto enable eagerfpu for xsaveopt */ + if (cpu_has_xsaveopt && eagerfpu != DISABLE) + eagerfpu = ENABLE; pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", pcntxt_mask, xstate_size); @@ -598,6 +601,10 @@ void __cpuinit eager_fpu_init(void) clear_used_math(); current_thread_info()->status = 0; + + if (eagerfpu == ENABLE) + setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + if (!cpu_has_eager_fpu) { stts(); return; -- cgit v1.2.3-70-g09d2 From a8615af4bc3621cb01096541dafa6f68352ec2d9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 10 Sep 2012 10:40:08 -0700 Subject: x86, fpu: remove cpu_has_xmm check in the fx_finit() CPUs with FXSAVE but no XMM/MXCSR (Pentium II from Intel, Crusoe/TM-3xxx/5xxx from Transmeta, and presumably some of the K6 generation from AMD) ever looked at the mxcsr field during fxrstor/fxsave. So remove the cpu_has_xmm check in the fx_finit() Reported-by: Al Viro Acked-by: H. Peter Anvin Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1347300665-6209-6-git-send-email-suresh.b.siddha@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fpu-internal.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 0ca72f0d4b4..92f3c6ed817 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -109,8 +109,7 @@ static inline void fx_finit(struct i387_fxsave_struct *fx) { memset(fx, 0, xstate_size); fx->cwd = 0x37f; - if (cpu_has_xmm) - fx->mxcsr = MXCSR_DEFAULT; + fx->mxcsr = MXCSR_DEFAULT; } extern void __sanitize_i387_state(struct task_struct *); -- cgit v1.2.3-70-g09d2 From 3ddbebf878ac8d958bb34e87a742a6b3adc283a3 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 17 Sep 2012 13:22:53 +0200 Subject: PCI: Discard __init annotations for pci_fixup_irqs() and related functions Remove the __init annotations in order to keep pci_fixup_irqs() around after init (e.g. for hotplug). This requires the same change for the implementation of pcibios_update_irq() on all architectures. While at it, all __devinit annotations are removed as well, since they will be useless now that HOTPLUG is always on. Signed-off-by: Thierry Reding Signed-off-by: Bjorn Helgaas Acked-by: Greg Kroah-Hartman --- arch/alpha/kernel/pci.c | 2 +- arch/arm/kernel/bios32.c | 2 +- arch/ia64/pci/pci.c | 2 +- arch/mips/pci/pci.c | 2 +- arch/sh/drivers/pci/pci.c | 2 +- arch/sparc/kernel/leon_pci.c | 2 +- arch/tile/kernel/pci.c | 2 +- arch/tile/kernel/pci_gx.c | 2 +- arch/unicore32/kernel/pci.c | 2 +- arch/x86/pci/visws.c | 2 +- arch/xtensa/kernel/pci.c | 2 +- drivers/pci/setup-irq.c | 4 ++-- 12 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index 9816d5a4d17..920392f61ef 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -256,7 +256,7 @@ pcibios_fixup_bus(struct pci_bus *bus) } } -void __init +void pcibios_update_irq(struct pci_dev *dev, int irq) { pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index 2b2f25e7fef..0174fe6effe 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -272,7 +272,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8152, pci_fixup_it -void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) +void pcibios_update_irq(struct pci_dev *dev, int irq) { if (debug_pci) printk("PCI: Assigning IRQ %02d to %s\n", irq, pci_name(dev)); diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 81acc7a57f3..27db6a8afc4 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -461,7 +461,7 @@ void pcibios_set_master (struct pci_dev *dev) /* No special bus mastering setup handling */ } -void __devinit +void pcibios_update_irq (struct pci_dev *dev, int irq) { pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index 690356808f8..64f0419e585 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -313,7 +313,7 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus) } } -void __init +void pcibios_update_irq(struct pci_dev *dev, int irq) { pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index 40db2d0aef3..1bd3e089b74 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -192,7 +192,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } -void __init pcibios_update_irq(struct pci_dev *dev, int irq) +void pcibios_update_irq(struct pci_dev *dev, int irq) { pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index 21dcda75a52..404621b775f 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -102,7 +102,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } -void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) +void pcibios_update_irq(struct pci_dev *dev, int irq) { #ifdef CONFIG_PCI_DEBUG printk(KERN_DEBUG "LEONPCI: Assigning IRQ %02d to %s\n", irq, diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c index 33c10864d2f..6245bba8b1d 100644 --- a/arch/tile/kernel/pci.c +++ b/arch/tile/kernel/pci.c @@ -406,7 +406,7 @@ void pcibios_set_master(struct pci_dev *dev) /* * This is called from the generic Linux layer. */ -void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) +void pcibios_update_irq(struct pci_dev *dev, int irq) { pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index 0e213e35ffc..5faad0b1bd2 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -1036,7 +1036,7 @@ char __devinit *pcibios_setup(char *str) /* * This is called from the generic Linux layer. */ -void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) +void pcibios_update_irq(struct pci_dev *dev, int irq) { pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c index 46cb6c9de6c..c07ecc5baff 100644 --- a/arch/unicore32/kernel/pci.c +++ b/arch/unicore32/kernel/pci.c @@ -154,7 +154,7 @@ void __init puv3_pci_adjust_zones(unsigned long *zone_size, zhole_size[0] = 0; } -void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) +void pcibios_update_irq(struct pci_dev *dev, int irq) { if (debug_pci) printk(KERN_DEBUG "PCI: Assigning IRQ %02d to %s\n", diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 6f2f8eeed17..9d736e7ff64 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c @@ -62,7 +62,7 @@ out: return irq; } -void __init pcibios_update_irq(struct pci_dev *dev, int irq) +void pcibios_update_irq(struct pci_dev *dev, int irq) { pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c index 69759e9cb3e..6f9b40c47e9 100644 --- a/arch/xtensa/kernel/pci.c +++ b/arch/xtensa/kernel/pci.c @@ -212,7 +212,7 @@ void pcibios_set_master(struct pci_dev *dev) /* the next one is stolen from the alpha port... */ -void __init +void pcibios_update_irq(struct pci_dev *dev, int irq) { pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c index eb219a1d16f..270ae7b9712 100644 --- a/drivers/pci/setup-irq.c +++ b/drivers/pci/setup-irq.c @@ -18,7 +18,7 @@ #include -static void __init +static void pdev_fixup_irq(struct pci_dev *dev, u8 (*swizzle)(struct pci_dev *, u8 *), int (*map_irq)(const struct pci_dev *, u8, u8)) @@ -54,7 +54,7 @@ pdev_fixup_irq(struct pci_dev *dev, pcibios_update_irq(dev, irq); } -void __init +void pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *), int (*map_irq)(const struct pci_dev *, u8, u8)) { -- cgit v1.2.3-70-g09d2 From 8885b7b637fa9aca7e1b00581a0173c6956966d3 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 17 Sep 2012 13:22:54 +0200 Subject: PCI: Provide a default pcibios_update_irq() Most architectures implement this in exactly the same way. Instead of having each architecture duplicate this function, provide a single implementation in the core and make it a weak symbol so that it can be overridden on architectures where it is required. Signed-off-by: Thierry Reding Signed-off-by: Bjorn Helgaas --- arch/alpha/kernel/pci.c | 6 ------ arch/arm/kernel/bios32.c | 9 --------- arch/ia64/pci/pci.c | 8 -------- arch/m68k/kernel/pcibios.c | 5 ----- arch/mips/pci/pci.c | 6 ------ arch/sh/drivers/pci/pci.c | 5 ----- arch/sparc/kernel/leon_pci.c | 9 --------- arch/sparc/kernel/pci.c | 4 ---- arch/tile/kernel/pci.c | 8 -------- arch/tile/kernel/pci_gx.c | 8 -------- arch/unicore32/kernel/pci.c | 8 -------- arch/x86/pci/visws.c | 5 ----- arch/xtensa/kernel/pci.c | 8 -------- drivers/pci/setup-irq.c | 5 +++++ 14 files changed, 5 insertions(+), 89 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index 920392f61ef..ef757147cbf 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -256,12 +256,6 @@ pcibios_fixup_bus(struct pci_bus *bus) } } -void -pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index 0174fe6effe..9cf16b83bbb 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -270,15 +270,6 @@ static void __devinit pci_fixup_it8152(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8152, pci_fixup_it8152); - - -void pcibios_update_irq(struct pci_dev *dev, int irq) -{ - if (debug_pci) - printk("PCI: Assigning IRQ %02d to %s\n", irq, pci_name(dev)); - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - /* * If the bus contains any of these devices, then we must not turn on * parity checking of any kind. Currently this is CyberPro 20x0 only. diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 27db6a8afc4..a7ebe944027 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -461,14 +461,6 @@ void pcibios_set_master (struct pci_dev *dev) /* No special bus mastering setup handling */ } -void -pcibios_update_irq (struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); - - /* ??? FIXME -- record old value for shutdown. */ -} - int pcibios_enable_device (struct pci_dev *dev, int mask) { diff --git a/arch/m68k/kernel/pcibios.c b/arch/m68k/kernel/pcibios.c index b2988aa1840..73fa0b56a06 100644 --- a/arch/m68k/kernel/pcibios.c +++ b/arch/m68k/kernel/pcibios.c @@ -87,11 +87,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return 0; } -void pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - void __devinit pcibios_fixup_bus(struct pci_bus *bus) { struct pci_dev *dev; diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index 64f0419e585..04e35bcde07 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -313,12 +313,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus) } } -void -pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - #ifdef CONFIG_HOTPLUG EXPORT_SYMBOL(PCIBIOS_MIN_IO); EXPORT_SYMBOL(PCIBIOS_MIN_MEM); diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index 1bd3e089b74..a7e078f2e2e 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -192,11 +192,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } -void pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - static void __init pcibios_bus_report_status_early(struct pci_channel *hose, int top_bus, int current_bus, diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index 404621b775f..fc052116156 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -102,15 +102,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } -void pcibios_update_irq(struct pci_dev *dev, int irq) -{ -#ifdef CONFIG_PCI_DEBUG - printk(KERN_DEBUG "LEONPCI: Assigning IRQ %02d to %s\n", irq, - pci_name(dev)); -#endif - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - /* in/out routines taken from pcic.c * * This probably belongs here rather than ioport.c because diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 065b88c4f86..acc8c838ff7 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -622,10 +622,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *pbus) { } -void pcibios_update_irq(struct pci_dev *pdev, int irq) -{ -} - resource_size_t pcibios_align_resource(void *data, const struct resource *res, resource_size_t size, resource_size_t align) { diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c index 6245bba8b1d..dbdab34f27c 100644 --- a/arch/tile/kernel/pci.c +++ b/arch/tile/kernel/pci.c @@ -403,14 +403,6 @@ void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling. */ } -/* - * This is called from the generic Linux layer. - */ -void pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - /* * Enable memory and/or address decoding, as appropriate, for the * device described by the 'dev' struct. diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index 5faad0b1bd2..2ba6d052f85 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -1033,14 +1033,6 @@ char __devinit *pcibios_setup(char *str) return str; } -/* - * This is called from the generic Linux layer. - */ -void pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - /* * Enable memory address decoding, as appropriate, for the * device described by the 'dev' struct. The I/O decoding diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c index c07ecc5baff..b0056f68d32 100644 --- a/arch/unicore32/kernel/pci.c +++ b/arch/unicore32/kernel/pci.c @@ -154,14 +154,6 @@ void __init puv3_pci_adjust_zones(unsigned long *zone_size, zhole_size[0] = 0; } -void pcibios_update_irq(struct pci_dev *dev, int irq) -{ - if (debug_pci) - printk(KERN_DEBUG "PCI: Assigning IRQ %02d to %s\n", - irq, pci_name(dev)); - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - /* * If the bus contains any of these devices, then we must not turn on * parity checking of any kind. diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 9d736e7ff64..3e6d2a6db86 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c @@ -62,11 +62,6 @@ out: return irq; } -void pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - int __init pci_visws_init(void) { pcibios_enable_irq = &pci_visws_enable_irq; diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c index 6f9b40c47e9..54354de38a7 100644 --- a/arch/xtensa/kernel/pci.c +++ b/arch/xtensa/kernel/pci.c @@ -210,14 +210,6 @@ void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling */ } -/* the next one is stolen from the alpha port... */ - -void -pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { u16 cmd, old_cmd; diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c index 270ae7b9712..9bd6864ec5d 100644 --- a/drivers/pci/setup-irq.c +++ b/drivers/pci/setup-irq.c @@ -17,6 +17,11 @@ #include #include +void __weak pcibios_update_irq(struct pci_dev *dev, int irq) +{ + dev_dbg(&dev->dev, "assigning IRQ %02d\n", irq); + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); +} static void pdev_fixup_irq(struct pci_dev *dev, -- cgit v1.2.3-70-g09d2 From 1e6dd8adc78d4a153db253d051fd4ef6c49c9019 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 5 Sep 2012 15:31:26 +0300 Subject: perf: Fix off by one test in perf_reg_value() The test should be >= ARRAY_SIZE() instead of > ARRAY_SIZE(). Signed-off-by: Dan Carpenter Acked-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20120905123126.GC6128@elgon.mountain Signed-off-by: Ingo Molnar --- arch/x86/kernel/perf_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index c5a3e5cfe07..e309cc5c276 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -57,7 +57,7 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { u64 perf_reg_value(struct pt_regs *regs, int idx) { - if (WARN_ON_ONCE(idx > ARRAY_SIZE(pt_regs_offset))) + if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset))) return 0; return regs_get_register(regs, pt_regs_offset[idx]); -- cgit v1.2.3-70-g09d2 From 924e101a7ab6f884047f4344e5f1154a4bcd63a6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 14 Sep 2012 18:37:46 +0200 Subject: x86/debug: Dump family, model, stepping of the boot CPU When acting on a user bug report, we find ourselves constantly asking for /proc/cpuinfo in order to know the exact family, model, stepping of the CPU in question. Instead of having to ask this, add this to dmesg so that it is visible and no ambiguities can ensue from looking at the official name string of the CPU coming from CPUID and trying to map it to f/m/s. Output then looks like this: [ 0.146041] smpboot: CPU0: AMD FX(tm)-8100 Eight-Core Processor (fam: 15, model: 01, stepping: 02) Signed-off-by: Borislav Petkov Cc: Andreas Herrmann Link: http://lkml.kernel.org/r/1347640666-13638-1-git-send-email-bp@amd64.org [ tweaked it minimally to add commas. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c5fcc..1cc48ff91cb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1023,14 +1023,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT "%s ", vendor); if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); + printk(KERN_CONT "%s", strim(c->x86_model_id)); else printk(KERN_CONT "%d86", c->x86); + printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); + if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); + printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); else - printk(KERN_CONT "\n"); + printk(KERN_CONT ")\n"); print_cpu_msr(c); } -- cgit v1.2.3-70-g09d2 From e26a44a2d618a491d5c6a2a8aaf66ee03a94739f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 18 Sep 2012 12:16:14 +0100 Subject: x86: Use REP BSF unconditionally Make "REP BSF" unconditional, as per the suggestion of hpa and Linus, this removes the insane BSF_PREFIX conditional and simplifies the logic. Suggested-by: "H. Peter Anvin" Suggested-by: Linus Torvalds Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/5058741E020000780009C014@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bitops.h | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index b2af6645ea7..6dfd0195bb5 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -347,19 +347,6 @@ static int test_bit(int nr, const volatile unsigned long *addr); ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) -#if (defined(CONFIG_X86_GENERIC) || defined(CONFIG_GENERIC_CPU)) \ - && !defined(CONFIG_CC_OPTIMIZE_FOR_SIZE) -/* - * Since BSF and TZCNT have sufficiently similar semantics for the purposes - * for which we use them here, BMI-capable hardware will decode the prefixed - * variant as 'tzcnt ...' and may execute that faster than 'bsf ...', while - * older hardware will ignore the REP prefix and decode it as 'bsf ...'. - */ -# define BSF_PREFIX "rep;" -#else -# define BSF_PREFIX -#endif - /** * __ffs - find first set bit in word * @word: The word to search @@ -368,7 +355,7 @@ static int test_bit(int nr, const volatile unsigned long *addr); */ static inline unsigned long __ffs(unsigned long word) { - asm(BSF_PREFIX "bsf %1,%0" + asm("rep; bsf %1,%0" : "=r" (word) : "rm" (word)); return word; @@ -382,14 +369,12 @@ static inline unsigned long __ffs(unsigned long word) */ static inline unsigned long ffz(unsigned long word) { - asm(BSF_PREFIX "bsf %1,%0" + asm("rep; bsf %1,%0" : "=r" (word) : "r" (~word)); return word; } -#undef BSF_PREFIX - /* * __fls: find last set bit in word * @word: The word to search -- cgit v1.2.3-70-g09d2 From 20a36e39d59757252edbbdcf9574ae2998733ce9 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Sep 2012 01:07:01 +0200 Subject: perf/x86: Fix Intel Ivy Bridge support This patch updates the existing Intel IvyBridge (model 58) support with proper PEBS event constraints. It cannot reuse the same as SandyBridge because some events (0xd3) are specific to IvyBridge. Also there is no UOPS_DISPATCHED.THREAD on IVB, so do not populate the PERF_COUNT_HW_STALLED_CYCLES_BACKEND mapping. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Link: http://lkml.kernel.org/r/20120910230701.GA5898@quad Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 2 ++ arch/x86/kernel/cpu/perf_event_intel.c | 24 +++++++++++++++++++++++- arch/x86/kernel/cpu/perf_event_intel_ds.c | 14 ++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6605a81ba33..8b6defe7eef 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -586,6 +586,8 @@ extern struct event_constraint intel_westmere_pebs_event_constraints[]; extern struct event_constraint intel_snb_pebs_event_constraints[]; +extern struct event_constraint intel_ivb_pebs_event_constraints[]; + struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_enable(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 0d3d63afa76..6bca492b854 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2048,7 +2048,6 @@ __init int intel_pmu_init(void) case 42: /* SandyBridge */ case 45: /* SandyBridge, "Romely-EP" */ x86_add_quirk(intel_sandybridge_quirk); - case 58: /* IvyBridge */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, @@ -2073,6 +2072,29 @@ __init int intel_pmu_init(void) pr_cont("SandyBridge events, "); break; + case 58: /* IvyBridge */ + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_snb(); + + x86_pmu.event_constraints = intel_snb_event_constraints; + x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + x86_pmu.extra_regs = intel_snb_extra_regs; + /* all extra regs are per-cpu when HT is on */ + x86_pmu.er_flags |= ERF_HAS_RSP_1; + x86_pmu.er_flags |= ERF_NO_HT_SHARING; + + /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); + + pr_cont("IvyBridge events, "); + break; + default: switch (x86_pmu.version) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index e38d97bf425..826054a4f2e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -407,6 +407,20 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_ivb_pebs_event_constraints[] = { + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *c; -- cgit v1.2.3-70-g09d2 From 4b8073e467e6a66b6a5a8e799d28bc3b243c0d78 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Tue, 18 Sep 2012 18:36:14 +0200 Subject: arch/x86: Remove unecessary semicolons Found by http://coccinelle.lip6.fr/ Signed-off-by: Peter Senna Tschudin Cc: avi@redhat.com Cc: mtosatti@redhat.com Cc: a.p.zijlstra@chello.nl Cc: rusty@rustcorp.com.au Cc: masami.hiramatsu.pt@hitachi.com Cc: suresh.b.siddha@intel.com Cc: joerg.roedel@amd.com Cc: agordeev@redhat.com Cc: yinghai@kernel.org Cc: bhelgaas@google.com Cc: liuj97@gmail.com Link: http://lkml.kernel.org/r/1347986174-30287-7-git-send-email-peter.senna@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 4 ++-- arch/x86/kernel/apic/apic.c | 2 +- arch/x86/kvm/vmx.c | 2 +- arch/x86/pci/mmconfig-shared.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ced4534baed..3318b1e53e0 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -317,7 +317,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end, /* turn DS segment override prefix into lock prefix */ if (*ptr == 0x3e) text_poke(ptr, ((unsigned char []){0xf0}), 1); - }; + } mutex_unlock(&text_mutex); } @@ -338,7 +338,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, /* turn lock prefix into DS segment override prefix */ if (*ptr == 0xf0) text_poke(ptr, ((unsigned char []){0x3E}), 1); - }; + } mutex_unlock(&text_mutex); } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 24deb308232..b17416e72fb 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs) apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); i++; v1 >>= 1; - }; + } apic_printk(APIC_DEBUG, KERN_CONT "\n"); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b1eb202ee76..b06737d122f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4543,7 +4543,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_SET_TPR; return 0; } - }; + } break; case 2: /* clts */ handle_clts(vcpu); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 937bcece700..704b9ec043d 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -585,7 +585,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) while (i >= sizeof(struct acpi_mcfg_allocation)) { entries++; i -= sizeof(struct acpi_mcfg_allocation); - }; + } if (entries == 0) { pr_err(PREFIX "MMCONFIG has no entries\n"); return -ENODEV; -- cgit v1.2.3-70-g09d2 From 2d297480037e1d9100ca504737820c1bf65db6c0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 5 Sep 2012 15:30:42 +0300 Subject: x86, microcode, AMD: Fix use after free in free_cache() list_for_each_entry_reverse() dereferences the iterator, but we already freed it. I don't see a reason that this has to be done in reverse order so change it to use list_for_each_entry_safe(). Signed-off-by: Dan Carpenter Signed-off-by: Borislav Petkov --- arch/x86/kernel/microcode_amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 5511216b443..7720ff5a9ee 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -150,9 +150,9 @@ static void update_cache(struct ucode_patch *new_patch) static void free_cache(void) { - struct ucode_patch *p; + struct ucode_patch *p, *tmp; - list_for_each_entry_reverse(p, &pcache, plist) { + list_for_each_entry_safe(p, tmp, &pcache, plist) { __list_del(p->plist.prev, p->plist.next); kfree(p->data); kfree(p); -- cgit v1.2.3-70-g09d2 From bd49940a35ec7d488ae63bd625639893b3385b97 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 19 Sep 2012 08:30:55 -0400 Subject: xen/boot: Disable BIOS SMP MP table search. As the initial domain we are able to search/map certain regions of memory to harvest configuration data. For all low-level we use ACPI tables - for interrupts we use exclusively ACPI _PRT (so DSDT) and MADT for INT_SRC_OVR. The SMP MP table is not used at all. As a matter of fact we do not even support machines that only have SMP MP but no ACPI tables. Lets follow how Moorestown does it and just disable searching for BIOS SMP tables. This also fixes an issue on HP Proliant BL680c G5 and DL380 G6: 9f->100 for 1:1 PTE Freeing 9f-100 pfn range: 97 pages freed 1-1 mapping on 9f->100 .. snip.. e820: BIOS-provided physical RAM map: Xen: [mem 0x0000000000000000-0x000000000009efff] usable Xen: [mem 0x000000000009f400-0x00000000000fffff] reserved Xen: [mem 0x0000000000100000-0x00000000cfd1dfff] usable .. snip.. Scan for SMP in [mem 0x00000000-0x000003ff] Scan for SMP in [mem 0x0009fc00-0x0009ffff] Scan for SMP in [mem 0x000f0000-0x000fffff] found SMP MP-table at [mem 0x000f4fa0-0x000f4faf] mapped at [ffff8800000f4fa0] (XEN) mm.c:908:d0 Error getting mfn 100 (pfn 5555555555555555) from L1 entry 0000000000100461 for l1e_owner=0, pg_owner=0 (XEN) mm.c:4995:d0 ptwr_emulate: could not get_page_from_l1e() BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] xen_set_pte_init+0x66/0x71 . snip.. Pid: 0, comm: swapper Not tainted 3.6.0-rc6upstream-00188-gb6fb969-dirty #2 HP ProLiant BL680c G5 .. snip.. Call Trace: [] __early_ioremap+0x18a/0x248 [] ? printk+0x48/0x4a [] early_ioremap+0x13/0x15 [] get_mpc_size+0x2f/0x67 [] smp_scan_config+0x10c/0x136 [] default_find_smp_config+0x36/0x5a [] setup_arch+0x5b3/0xb5b [] ? printk+0x48/0x4a [] start_kernel+0x90/0x390 [] x86_64_start_reservations+0x131/0x136 [] xen_start_kernel+0x65f/0x661 (XEN) Domain 0 crashed: 'noreboot' set - not rebooting. which is that ioremap would end up mapping 0xff using _PAGE_IOMAP (which is what early_ioremap sticks as a flag) - which meant we would get MFN 0xFF (pte ff461, which is OK), and then it would also map 0x100 (b/c ioremap tries to get page aligned request, and it was trying to map 0xf4fa0 + PAGE_SIZE - so it mapped the next page) as _PAGE_IOMAP. Since 0x100 is actually a RAM page, and the _PAGE_IOMAP bypasses the P2M lookup we would happily set the PTE to 1000461. Xen would deny the request since we do not have access to the Machine Frame Number (MFN) of 0x100. The P2M[0x100] is for example 0x80140. CC: stable@vger.kernel.org Fixes-Oracle-Bugzilla: https://bugzilla.oracle.com/bugzilla/show_bug.cgi?id=13665 Acked-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9642d4a3860..1fbe75a95f1 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1452,6 +1452,10 @@ asmlinkage void __init xen_start_kernel(void) pci_request_acs(); xen_acpi_sleep_register(); + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; } #ifdef CONFIG_PCI /* PCI BIOS service won't work from a PV guest. */ -- cgit v1.2.3-70-g09d2 From 8ea667f259e3767fd3ee85a885c14e417835695e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 12 Sep 2012 13:44:53 +0300 Subject: KVM: MMU: Push clean gpte write protection out of gpte_access() gpte_access() computes the access permissions of a guest pte and also write-protects clean gptes. This is wrong when we are servicing a write fault (since we'll be setting the dirty bit momentarily) but correct when instantiating a speculative spte, or when servicing a read fault (since we'll want to trap a following write in order to set the dirty bit). It doesn't seem to hurt in practice, but in order to make the code readable, push the write protection out of gpte_access() and into a new protect_clean_gpte() which is called explicitly when needed. Reviewed-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 12 ++++++++++++ arch/x86/kvm/mmu.h | 3 ++- arch/x86/kvm/paging_tmpl.h | 24 ++++++++++++------------ 3 files changed, 26 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index aa0b469ee07..54c9cb4fdfa 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3408,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; } +static inline void protect_clean_gpte(unsigned *access, unsigned gpte) +{ + unsigned mask; + + BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); + + mask = (unsigned)~ACC_WRITE_MASK; + /* Allow write access to dirty gptes */ + mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK; + *access &= mask; +} + static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, int *nr_present) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e374db9af02..2832081e9b2 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -18,7 +18,8 @@ #define PT_PCD_MASK (1ULL << 4) #define PT_ACCESSED_SHIFT 5 #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) -#define PT_DIRTY_MASK (1ULL << 6) +#define PT_DIRTY_SHIFT 6 +#define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT) #define PT_PAGE_SIZE_MASK (1ULL << 7) #define PT_PAT_MASK (1ULL << 7) #define PT_GLOBAL_MASK (1ULL << 8) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index bf8c42bf50f..bf7b4ffafab 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -101,14 +101,11 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, return (ret != orig_pte); } -static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte, - bool last) +static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) { unsigned access; access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; - if (last && !is_dirty_gpte(gpte)) - access &= ~ACC_WRITE_MASK; #if PTTYPE == 64 if (vcpu->arch.mmu.nx) @@ -222,8 +219,7 @@ retry_walk: last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte); if (last_gpte) { - pte_access = pt_access & - FNAME(gpte_access)(vcpu, pte, true); + pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); /* check if the kernel is fetching from user page */ if (unlikely(pte_access & PT_USER_MASK) && kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) @@ -274,7 +270,7 @@ retry_walk: break; } - pt_access &= FNAME(gpte_access)(vcpu, pte, false); + pt_access &= FNAME(gpte_access)(vcpu, pte); --walker->level; } @@ -283,7 +279,9 @@ retry_walk: goto error; } - if (write_fault && unlikely(!is_dirty_gpte(pte))) { + if (!write_fault) + protect_clean_gpte(&pte_access, pte); + else if (unlikely(!is_dirty_gpte(pte))) { int ret; trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); @@ -368,7 +366,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return; pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); - pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true); + pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); + protect_clean_gpte(&pte_access, gpte); pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); if (mmu_invalid_pfn(pfn)) return; @@ -441,8 +440,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) continue; - pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, - true); + pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); + protect_clean_gpte(&pte_access, gpte); gfn = gpte_to_gfn(gpte); pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, pte_access & ACC_WRITE_MASK); @@ -794,7 +793,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) gfn = gpte_to_gfn(gpte); pte_access = sp->role.access; - pte_access &= FNAME(gpte_access)(vcpu, gpte, true); + pte_access &= FNAME(gpte_access)(vcpu, gpte); + protect_clean_gpte(&pte_access, gpte); if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) continue; -- cgit v1.2.3-70-g09d2 From edc2ae84eb40a3c062210fe01af1cae1633cc810 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 12 Sep 2012 13:53:08 +0300 Subject: KVM: MMU: Optimize gpte_access() slightly If nx is disabled, then is gpte[63] is set we will hit a reserved bit set fault before checking permissions; so we can ignore the setting of efer.nxe. Reviewed-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index bf7b4ffafab..064bcb32d84 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -106,10 +106,8 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) unsigned access; access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; - #if PTTYPE == 64 - if (vcpu->arch.mmu.nx) - access &= ~(gpte >> PT64_NX_SHIFT); + access &= ~(gpte >> PT64_NX_SHIFT); #endif return access; } -- cgit v1.2.3-70-g09d2 From 3d34adec7081621ff51c195be045b87d75c0c49d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 12 Sep 2012 14:03:28 +0300 Subject: KVM: MMU: Move gpte_access() out of paging_tmpl.h We no longer rely on paging_tmpl.h defines; so we can move the function to mmu.c. Rely on zero extension to 64 bits to get the correct nx behaviour. Reviewed-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 10 ++++++++++ arch/x86/kvm/paging_tmpl.h | 21 +++++---------------- 2 files changed, 15 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 54c9cb4fdfa..f297a2ccf4f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3437,6 +3437,16 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, return false; } +static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte) +{ + unsigned access; + + access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; + access &= ~(gpte >> PT64_NX_SHIFT); + + return access; +} + #define PTTYPE 64 #include "paging_tmpl.h" #undef PTTYPE diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 064bcb32d84..1cbf576852c 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -101,17 +101,6 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, return (ret != orig_pte); } -static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) -{ - unsigned access; - - access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; -#if PTTYPE == 64 - access &= ~(gpte >> PT64_NX_SHIFT); -#endif - return access; -} - static bool FNAME(is_last_gpte)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t gpte) @@ -217,7 +206,7 @@ retry_walk: last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte); if (last_gpte) { - pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); + pte_access = pt_access & gpte_access(vcpu, pte); /* check if the kernel is fetching from user page */ if (unlikely(pte_access & PT_USER_MASK) && kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) @@ -268,7 +257,7 @@ retry_walk: break; } - pt_access &= FNAME(gpte_access)(vcpu, pte); + pt_access &= gpte_access(vcpu, pte); --walker->level; } @@ -364,7 +353,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return; pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); - pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); + pte_access = sp->role.access & gpte_access(vcpu, gpte); protect_clean_gpte(&pte_access, gpte); pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); if (mmu_invalid_pfn(pfn)) @@ -438,7 +427,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) continue; - pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); + pte_access = sp->role.access & gpte_access(vcpu, gpte); protect_clean_gpte(&pte_access, gpte); gfn = gpte_to_gfn(gpte); pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, @@ -791,7 +780,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) gfn = gpte_to_gfn(gpte); pte_access = sp->role.access; - pte_access &= FNAME(gpte_access)(vcpu, gpte); + pte_access &= gpte_access(vcpu, gpte); protect_clean_gpte(&pte_access, gpte); if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) -- cgit v1.2.3-70-g09d2 From 8cbc70696f149e44753b0fe60162b4ff96c2dd2b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 16 Sep 2012 14:18:51 +0300 Subject: KVM: MMU: Update accessed and dirty bits after guest pagetable walk While unspecified, the behaviour of Intel processors is to first perform the page table walk, then, if the walk was successful, to atomically update the accessed and dirty bits of walked paging elements. While we are not required to follow this exactly, doing so will allow us to perform the access permissions check after the walk is complete, rather than after each walk step. (the tricky case is SMEP: a zero in any pte's U bit makes the referenced page a supervisor page, so we can't fault on a one bit during the walk itself). Reviewed-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 76 ++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 1cbf576852c..35a05dd2f69 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -63,10 +63,12 @@ */ struct guest_walker { int level; + unsigned max_level; gfn_t table_gfn[PT_MAX_FULL_LEVELS]; pt_element_t ptes[PT_MAX_FULL_LEVELS]; pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; + pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; unsigned pt_access; unsigned pte_access; gfn_t gfn; @@ -119,6 +121,43 @@ static bool FNAME(is_last_gpte)(struct guest_walker *walker, return false; } +static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, + struct kvm_mmu *mmu, + struct guest_walker *walker, + int write_fault) +{ + unsigned level, index; + pt_element_t pte, orig_pte; + pt_element_t __user *ptep_user; + gfn_t table_gfn; + int ret; + + for (level = walker->max_level; level >= walker->level; --level) { + pte = orig_pte = walker->ptes[level - 1]; + table_gfn = walker->table_gfn[level - 1]; + ptep_user = walker->ptep_user[level - 1]; + index = offset_in_page(ptep_user) / sizeof(pt_element_t); + if (!(pte & PT_ACCESSED_MASK)) { + trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); + pte |= PT_ACCESSED_MASK; + } + if (level == walker->level && write_fault && !is_dirty_gpte(pte)) { + trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); + pte |= PT_DIRTY_MASK; + } + if (pte == orig_pte) + continue; + + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); + if (ret) + return ret; + + mark_page_dirty(vcpu->kvm, table_gfn); + walker->ptes[level] = pte; + } + return 0; +} + /* * Fetch a guest pte for a guest virtual address */ @@ -126,6 +165,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gva_t addr, u32 access) { + int ret; pt_element_t pte; pt_element_t __user *uninitialized_var(ptep_user); gfn_t table_gfn; @@ -153,6 +193,7 @@ retry_walk: --walker->level; } #endif + walker->max_level = walker->level; ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); @@ -183,6 +224,7 @@ retry_walk: ptep_user = (pt_element_t __user *)((void *)host_addr + offset); if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) goto error; + walker->ptep_user[walker->level - 1] = ptep_user; trace_kvm_mmu_paging_element(pte, walker->level); @@ -214,21 +256,6 @@ retry_walk: eperm = true; } - if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { - int ret; - trace_kvm_mmu_set_accessed_bit(table_gfn, index, - sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, - pte, pte|PT_ACCESSED_MASK); - if (unlikely(ret < 0)) - goto error; - else if (ret) - goto retry_walk; - - mark_page_dirty(vcpu->kvm, table_gfn); - pte |= PT_ACCESSED_MASK; - } - walker->ptes[walker->level - 1] = pte; if (last_gpte) { @@ -268,21 +295,12 @@ retry_walk: if (!write_fault) protect_clean_gpte(&pte_access, pte); - else if (unlikely(!is_dirty_gpte(pte))) { - int ret; - trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, - pte, pte|PT_DIRTY_MASK); - if (unlikely(ret < 0)) - goto error; - else if (ret) - goto retry_walk; - - mark_page_dirty(vcpu->kvm, table_gfn); - pte |= PT_DIRTY_MASK; - walker->ptes[walker->level - 1] = pte; - } + ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); + if (unlikely(ret < 0)) + goto error; + else if (ret) + goto retry_walk; walker->pt_access = pt_access; walker->pte_access = pte_access; -- cgit v1.2.3-70-g09d2 From 97d64b788114be1c4dc4bfe7a8ba2bf9643fe6af Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 12 Sep 2012 14:52:00 +0300 Subject: KVM: MMU: Optimize pte permission checks walk_addr_generic() permission checks are a maze of branchy code, which is performed four times per lookup. It depends on the type of access, efer.nxe, cr0.wp, cr4.smep, and in the near future, cr4.smap. Optimize this away by precalculating all variants and storing them in a bitmap. The bitmap is recalculated when rarely-changing variables change (cr0, cr4) and is indexed by the often-changing variables (page fault error code, pte access permissions). The permission check is moved to the end of the loop, otherwise an SMEP fault could be reported as a false positive, when PDE.U=1 but PTE.U=0. Noted by Xiao Guangrong. The result is short, branch-free code. Reviewed-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/kvm/mmu.c | 38 ++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/mmu.h | 19 ++++++++----------- arch/x86/kvm/paging_tmpl.h | 22 ++++------------------ arch/x86/kvm/x86.c | 11 ++++------- 5 files changed, 61 insertions(+), 36 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 64adb6117e1..3318bde206a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -287,6 +287,13 @@ struct kvm_mmu { union kvm_mmu_page_role base_role; bool direct_map; + /* + * Bitmap; bit set = permission fault + * Byte index: page fault error code [4:1] + * Bit index: pte permissions in ACC_* format + */ + u8 permissions[16]; + u64 *pae_root; u64 *lm_root; u64 rsvd_bits_mask[2][4]; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f297a2ccf4f..9c6188931f8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3516,6 +3516,38 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, } } +static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) +{ + unsigned bit, byte, pfec; + u8 map; + bool fault, x, w, u, wf, uf, ff, smep; + + smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); + for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { + pfec = byte << 1; + map = 0; + wf = pfec & PFERR_WRITE_MASK; + uf = pfec & PFERR_USER_MASK; + ff = pfec & PFERR_FETCH_MASK; + for (bit = 0; bit < 8; ++bit) { + x = bit & ACC_EXEC_MASK; + w = bit & ACC_WRITE_MASK; + u = bit & ACC_USER_MASK; + + /* Not really needed: !nx will cause pte.nx to fault */ + x |= !mmu->nx; + /* Allow supervisor writes if !cr0.wp */ + w |= !is_write_protection(vcpu) && !uf; + /* Disallow supervisor fetches of user code if cr4.smep */ + x &= !(smep && u && !uf); + + fault = (ff && !x) || (uf && !u) || (wf && !w); + map |= fault << bit; + } + mmu->permissions[byte] = map; + } +} + static int paging64_init_context_common(struct kvm_vcpu *vcpu, struct kvm_mmu *context, int level) @@ -3524,6 +3556,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, context->root_level = level; reset_rsvds_bits_mask(vcpu, context); + update_permission_bitmask(vcpu, context); ASSERT(is_pae(vcpu)); context->new_cr3 = paging_new_cr3; @@ -3552,6 +3585,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, context->root_level = PT32_ROOT_LEVEL; reset_rsvds_bits_mask(vcpu, context); + update_permission_bitmask(vcpu, context); context->new_cr3 = paging_new_cr3; context->page_fault = paging32_page_fault; @@ -3612,6 +3646,8 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->gva_to_gpa = paging32_gva_to_gpa; } + update_permission_bitmask(vcpu, context); + return 0; } @@ -3687,6 +3723,8 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) g_context->gva_to_gpa = paging32_gva_to_gpa_nested; } + update_permission_bitmask(vcpu, g_context); + return 0; } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 2832081e9b2..584660775d0 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -89,17 +89,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu) return kvm_read_cr0_bits(vcpu, X86_CR0_WP); } -static inline bool check_write_user_access(struct kvm_vcpu *vcpu, - bool write_fault, bool user_fault, - unsigned long pte) +/* + * Will a fault with a given page-fault error code (pfec) cause a permission + * fault with the given access (in ACC_* format)? + */ +static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access, + unsigned pfec) { - if (unlikely(write_fault && !is_writable_pte(pte) - && (user_fault || is_write_protection(vcpu)))) - return false; - - if (unlikely(user_fault && !(pte & PT_USER_MASK))) - return false; - - return true; + return (mmu->permissions[pfec >> 1] >> pte_access) & 1; } + #endif diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 35a05dd2f69..8f6c59fadbb 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -169,7 +169,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, pt_element_t pte; pt_element_t __user *uninitialized_var(ptep_user); gfn_t table_gfn; - unsigned index, pt_access, uninitialized_var(pte_access); + unsigned index, pt_access, pte_access; gpa_t pte_gpa; bool eperm, last_gpte; int offset; @@ -237,24 +237,9 @@ retry_walk: goto error; } - if (!check_write_user_access(vcpu, write_fault, user_fault, - pte)) - eperm = true; - -#if PTTYPE == 64 - if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) - eperm = true; -#endif + pte_access = pt_access & gpte_access(vcpu, pte); last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte); - if (last_gpte) { - pte_access = pt_access & gpte_access(vcpu, pte); - /* check if the kernel is fetching from user page */ - if (unlikely(pte_access & PT_USER_MASK) && - kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) - if (fetch_fault && !user_fault) - eperm = true; - } walker->ptes[walker->level - 1] = pte; @@ -284,10 +269,11 @@ retry_walk: break; } - pt_access &= gpte_access(vcpu, pte); + pt_access &= pte_access; --walker->level; } + eperm |= permission_fault(mmu, pte_access, access); if (unlikely(eperm)) { errcode |= PFERR_PRESENT_MASK; goto error; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 19047eafa38..497226e49d4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3672,20 +3672,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, gpa_t *gpa, struct x86_exception *exception, bool write) { - u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0) + | (write ? PFERR_WRITE_MASK : 0); - if (vcpu_match_mmio_gva(vcpu, gva) && - check_write_user_access(vcpu, write, access, - vcpu->arch.access)) { + if (vcpu_match_mmio_gva(vcpu, gva) + && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) { *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | (gva & (PAGE_SIZE - 1)); trace_vcpu_match_mmio(gva, *gpa, write, false); return 1; } - if (write) - access |= PFERR_WRITE_MASK; - *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); if (*gpa == UNMAPPED_GVA) -- cgit v1.2.3-70-g09d2 From 13d22b6aebb000aeaf137862c6c0e0c4d138d798 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 12 Sep 2012 15:12:09 +0300 Subject: KVM: MMU: Simplify walk_addr_generic() loop The page table walk is coded as an infinite loop, with a special case on the last pte. Code it as an ordinary loop with a termination condition on the last pte (large page or walk length exhausted), and put the last pte handling code after the loop where it belongs. Reviewed-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 60 +++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 8f6c59fadbb..1b4c14d235a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -171,12 +171,15 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, gfn_t table_gfn; unsigned index, pt_access, pte_access; gpa_t pte_gpa; - bool eperm, last_gpte; + bool eperm; int offset; const int write_fault = access & PFERR_WRITE_MASK; const int user_fault = access & PFERR_USER_MASK; const int fetch_fault = access & PFERR_FETCH_MASK; u16 errcode = 0; + gpa_t real_gpa; + gfn_t gfn; + u32 ac; trace_kvm_mmu_pagetable_walk(addr, access); retry_walk: @@ -197,12 +200,16 @@ retry_walk: ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); - pt_access = ACC_ALL; + pt_access = pte_access = ACC_ALL; + ++walker->level; - for (;;) { + do { gfn_t real_gfn; unsigned long host_addr; + pt_access &= pte_access; + --walker->level; + index = PT_INDEX(addr, walker->level); table_gfn = gpte_to_gfn(pte); @@ -239,39 +246,8 @@ retry_walk: pte_access = pt_access & gpte_access(vcpu, pte); - last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte); - walker->ptes[walker->level - 1] = pte; - - if (last_gpte) { - int lvl = walker->level; - gpa_t real_gpa; - gfn_t gfn; - u32 ac; - - gfn = gpte_to_gfn_lvl(pte, lvl); - gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; - - if (PTTYPE == 32 && - walker->level == PT_DIRECTORY_LEVEL && - is_cpuid_PSE36()) - gfn += pse36_gfn_delta(pte); - - ac = write_fault | fetch_fault | user_fault; - - real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), - ac); - if (real_gpa == UNMAPPED_GVA) - return 0; - - walker->gfn = real_gpa >> PAGE_SHIFT; - - break; - } - - pt_access &= pte_access; - --walker->level; - } + } while (!FNAME(is_last_gpte)(walker, vcpu, mmu, pte)); eperm |= permission_fault(mmu, pte_access, access); if (unlikely(eperm)) { @@ -279,6 +255,20 @@ retry_walk: goto error; } + gfn = gpte_to_gfn_lvl(pte, walker->level); + gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT; + + if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) + gfn += pse36_gfn_delta(pte); + + ac = write_fault | fetch_fault | user_fault; + + real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), ac); + if (real_gpa == UNMAPPED_GVA) + return 0; + + walker->gfn = real_gpa >> PAGE_SHIFT; + if (!write_fault) protect_clean_gpte(&pte_access, pte); -- cgit v1.2.3-70-g09d2 From 6fd01b711bee96ce3356f7b6f370ab708e37504b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 12 Sep 2012 20:46:56 +0300 Subject: KVM: MMU: Optimize is_last_gpte() Instead of branchy code depending on level, gpte.ps, and mmu configuration, prepare everything in a bitmap during mode changes and look it up during runtime. Reviewed-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/kvm/mmu.c | 31 +++++++++++++++++++++++++++++++ arch/x86/kvm/mmu.h | 3 ++- arch/x86/kvm/paging_tmpl.h | 20 +------------------- 4 files changed, 41 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3318bde206a..43aeb942283 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -298,6 +298,13 @@ struct kvm_mmu { u64 *lm_root; u64 rsvd_bits_mask[2][4]; + /* + * Bitmap: bit set = last pte in walk + * index[0:1]: level (zero-based) + * index[2]: pte.ps + */ + u8 last_pte_bitmap; + bool nx; u64 pdptrs[4]; /* pae */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9c6188931f8..d289fee1ffb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3447,6 +3447,15 @@ static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte) return access; } +static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) +{ + unsigned index; + + index = level - 1; + index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2); + return mmu->last_pte_bitmap & (1 << index); +} + #define PTTYPE 64 #include "paging_tmpl.h" #undef PTTYPE @@ -3548,6 +3557,24 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu } } +static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) +{ + u8 map; + unsigned level, root_level = mmu->root_level; + const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */ + + if (root_level == PT32E_ROOT_LEVEL) + --root_level; + /* PT_PAGE_TABLE_LEVEL always terminates */ + map = 1 | (1 << ps_set_index); + for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) { + if (level <= PT_PDPE_LEVEL + && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu))) + map |= 1 << (ps_set_index | (level - 1)); + } + mmu->last_pte_bitmap = map; +} + static int paging64_init_context_common(struct kvm_vcpu *vcpu, struct kvm_mmu *context, int level) @@ -3557,6 +3584,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, reset_rsvds_bits_mask(vcpu, context); update_permission_bitmask(vcpu, context); + update_last_pte_bitmap(vcpu, context); ASSERT(is_pae(vcpu)); context->new_cr3 = paging_new_cr3; @@ -3586,6 +3614,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, reset_rsvds_bits_mask(vcpu, context); update_permission_bitmask(vcpu, context); + update_last_pte_bitmap(vcpu, context); context->new_cr3 = paging_new_cr3; context->page_fault = paging32_page_fault; @@ -3647,6 +3676,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) } update_permission_bitmask(vcpu, context); + update_last_pte_bitmap(vcpu, context); return 0; } @@ -3724,6 +3754,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) } update_permission_bitmask(vcpu, g_context); + update_last_pte_bitmap(vcpu, g_context); return 0; } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 584660775d0..69871080e86 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -20,7 +20,8 @@ #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) #define PT_DIRTY_SHIFT 6 #define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT) -#define PT_PAGE_SIZE_MASK (1ULL << 7) +#define PT_PAGE_SIZE_SHIFT 7 +#define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT) #define PT_PAT_MASK (1ULL << 7) #define PT_GLOBAL_MASK (1ULL << 8) #define PT64_NX_SHIFT 63 diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 1b4c14d235a..134ea7b1c58 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -103,24 +103,6 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, return (ret != orig_pte); } -static bool FNAME(is_last_gpte)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - pt_element_t gpte) -{ - if (walker->level == PT_PAGE_TABLE_LEVEL) - return true; - - if ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(gpte) && - (PTTYPE == 64 || is_pse(vcpu))) - return true; - - if ((walker->level == PT_PDPE_LEVEL) && is_large_pte(gpte) && - (mmu->root_level == PT64_ROOT_LEVEL)) - return true; - - return false; -} - static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, struct guest_walker *walker, @@ -247,7 +229,7 @@ retry_walk: pte_access = pt_access & gpte_access(vcpu, pte); walker->ptes[walker->level - 1] = pte; - } while (!FNAME(is_last_gpte)(walker, vcpu, mmu, pte)); + } while (!is_last_gpte(mmu, walker->level, pte)); eperm |= permission_fault(mmu, pte_access, access); if (unlikely(eperm)) { -- cgit v1.2.3-70-g09d2 From 71331a1da1e3a66d14bb3864f99e32d84ab5a76f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 16 Sep 2012 14:49:15 +0300 Subject: KVM: MMU: Eliminate eperm temporary 'eperm' is no longer used in the walker loop, so we can eliminate it. Reviewed-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 134ea7b1c58..95a64d1dccc 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -153,7 +153,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, gfn_t table_gfn; unsigned index, pt_access, pte_access; gpa_t pte_gpa; - bool eperm; int offset; const int write_fault = access & PFERR_WRITE_MASK; const int user_fault = access & PFERR_USER_MASK; @@ -165,7 +164,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, trace_kvm_mmu_pagetable_walk(addr, access); retry_walk: - eperm = false; walker->level = mmu->root_level; pte = mmu->get_cr3(vcpu); @@ -231,8 +229,7 @@ retry_walk: walker->ptes[walker->level - 1] = pte; } while (!is_last_gpte(mmu, walker->level, pte)); - eperm |= permission_fault(mmu, pte_access, access); - if (unlikely(eperm)) { + if (unlikely(permission_fault(mmu, pte_access, access))) { errcode |= PFERR_PRESENT_MASK; goto error; } -- cgit v1.2.3-70-g09d2 From b514c30f7729b66af481fde3c1225e832e339d5b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 16 Sep 2012 15:03:02 +0300 Subject: KVM: MMU: Avoid access/dirty update loop if all is well Keep track of accessed/dirty bits; if they are all set, do not enter the accessed/dirty update loop. Reviewed-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 95a64d1dccc..810c1da2ee4 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, pt_element_t pte; pt_element_t __user *uninitialized_var(ptep_user); gfn_t table_gfn; - unsigned index, pt_access, pte_access; + unsigned index, pt_access, pte_access, accessed_dirty, shift; gpa_t pte_gpa; int offset; const int write_fault = access & PFERR_WRITE_MASK; @@ -180,6 +180,7 @@ retry_walk: ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); + accessed_dirty = PT_ACCESSED_MASK; pt_access = pte_access = ACC_ALL; ++walker->level; @@ -224,6 +225,7 @@ retry_walk: goto error; } + accessed_dirty &= pte; pte_access = pt_access & gpte_access(vcpu, pte); walker->ptes[walker->level - 1] = pte; @@ -251,11 +253,23 @@ retry_walk: if (!write_fault) protect_clean_gpte(&pte_access, pte); - ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); - if (unlikely(ret < 0)) - goto error; - else if (ret) - goto retry_walk; + /* + * On a write fault, fold the dirty bit into accessed_dirty by shifting it one + * place right. + * + * On a read fault, do nothing. + */ + shift = write_fault >> ilog2(PFERR_WRITE_MASK); + shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT; + accessed_dirty &= pte >> shift; + + if (unlikely(!accessed_dirty)) { + ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); + if (unlikely(ret < 0)) + goto error; + else if (ret) + goto retry_walk; + } walker->pt_access = pt_access; walker->pte_access = pte_access; -- cgit v1.2.3-70-g09d2 From c5421519f30bd5ed77857a78de6dc8414385e602 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 19 Sep 2012 19:33:48 +0300 Subject: KVM: MMU: Eliminate pointless temporary 'ac' 'ac' essentially reconstructs the 'access' variable we already have, except for the PFERR_PRESENT_MASK and PFERR_RSVD_MASK. As these are not used by callees, just use 'access' directly. Reviewed-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 810c1da2ee4..714e2c01a6f 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -160,7 +160,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, u16 errcode = 0; gpa_t real_gpa; gfn_t gfn; - u32 ac; trace_kvm_mmu_pagetable_walk(addr, access); retry_walk: @@ -242,9 +241,7 @@ retry_walk: if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) gfn += pse36_gfn_delta(pte); - ac = write_fault | fetch_fault | user_fault; - - real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), ac); + real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access); if (real_gpa == UNMAPPED_GVA) return 0; -- cgit v1.2.3-70-g09d2 From 1e08ec4a130e2745d96df169e67c58df98a07311 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 13 Sep 2012 17:19:24 +0300 Subject: KVM: optimize apic interrupt delivery Most interrupt are delivered to only one vcpu. Use pre-build tables to find interrupt destination instead of looping through all vcpus. In case of logical mode loop only through vcpus in a logical cluster irq is sent to. Signed-off-by: Gleb Natapov Acked-by: Michael S. Tsirkin Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 12 +++ arch/x86/kvm/lapic.c | 188 +++++++++++++++++++++++++++++++++++++--- arch/x86/kvm/lapic.h | 3 + arch/x86/kvm/x86.c | 2 + virt/kvm/irq_comm.c | 7 +- 5 files changed, 199 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 43aeb942283..0b902c98f27 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -525,6 +525,16 @@ struct kvm_arch_memory_slot { struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; }; +struct kvm_apic_map { + struct rcu_head rcu; + u8 ldr_bits; + /* fields bellow are used to decode ldr values in different modes */ + u32 cid_shift, cid_mask, lid_mask; + struct kvm_lapic *phys_map[256]; + /* first index is cluster id second is cpu id in a cluster */ + struct kvm_lapic *logical_map[16][16]; +}; + struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; @@ -542,6 +552,8 @@ struct kvm_arch { struct kvm_ioapic *vioapic; struct kvm_pit *vpit; int vapics_in_nmi_mode; + struct mutex apic_map_lock; + struct kvm_apic_map *apic_map; unsigned int tss_addr; struct page *apic_access_page; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6f9fd633c88..c6e6b721b6e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -140,11 +140,110 @@ static inline int apic_enabled(struct kvm_lapic *apic) (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) +static inline int apic_x2apic_mode(struct kvm_lapic *apic) +{ + return apic->vcpu->arch.apic_base & X2APIC_ENABLE; +} + static inline int kvm_apic_id(struct kvm_lapic *apic) { return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } +static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) +{ + u16 cid; + ldr >>= 32 - map->ldr_bits; + cid = (ldr >> map->cid_shift) & map->cid_mask; + + BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); + + return cid; +} + +static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) +{ + ldr >>= (32 - map->ldr_bits); + return ldr & map->lid_mask; +} + +static void recalculate_apic_map(struct kvm *kvm) +{ + struct kvm_apic_map *new, *old = NULL; + struct kvm_vcpu *vcpu; + int i; + + new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL); + + mutex_lock(&kvm->arch.apic_map_lock); + + if (!new) + goto out; + + new->ldr_bits = 8; + /* flat mode is default */ + new->cid_shift = 8; + new->cid_mask = 0; + new->lid_mask = 0xff; + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvm_lapic *apic = vcpu->arch.apic; + u16 cid, lid; + u32 ldr; + + if (!kvm_apic_present(vcpu)) + continue; + + /* + * All APICs have to be configured in the same mode by an OS. + * We take advatage of this while building logical id loockup + * table. After reset APICs are in xapic/flat mode, so if we + * find apic with different setting we assume this is the mode + * OS wants all apics to be in; build lookup table accordingly. + */ + if (apic_x2apic_mode(apic)) { + new->ldr_bits = 32; + new->cid_shift = 16; + new->cid_mask = new->lid_mask = 0xffff; + } else if (kvm_apic_sw_enabled(apic) && + !new->cid_mask /* flat mode */ && + kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { + new->cid_shift = 4; + new->cid_mask = 0xf; + new->lid_mask = 0xf; + } + + new->phys_map[kvm_apic_id(apic)] = apic; + + ldr = kvm_apic_get_reg(apic, APIC_LDR); + cid = apic_cluster_id(new, ldr); + lid = apic_logical_id(new, ldr); + + if (lid) + new->logical_map[cid][ffs(lid) - 1] = apic; + } +out: + old = rcu_dereference_protected(kvm->arch.apic_map, + lockdep_is_held(&kvm->arch.apic_map_lock)); + rcu_assign_pointer(kvm->arch.apic_map, new); + mutex_unlock(&kvm->arch.apic_map_lock); + + if (old) + kfree_rcu(old, rcu); +} + +static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) +{ + apic_set_reg(apic, APIC_ID, id << 24); + recalculate_apic_map(apic->vcpu->kvm); +} + +static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) +{ + apic_set_reg(apic, APIC_LDR, id); + recalculate_apic_map(apic->vcpu->kvm); +} + static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) { return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); @@ -194,11 +293,6 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) apic_set_reg(apic, APIC_LVR, v); } -static inline int apic_x2apic_mode(struct kvm_lapic *apic) -{ - return apic->vcpu->arch.apic_base & X2APIC_ENABLE; -} - static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = { LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ @@ -483,6 +577,72 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, return result; } +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, int *r) +{ + struct kvm_apic_map *map; + unsigned long bitmap = 1; + struct kvm_lapic **dst; + int i; + bool ret = false; + + *r = -1; + + if (irq->shorthand == APIC_DEST_SELF) { + *r = kvm_apic_set_irq(src->vcpu, irq); + return true; + } + + if (irq->shorthand) + return false; + + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); + + if (!map) + goto out; + + if (irq->dest_mode == 0) { /* physical mode */ + if (irq->delivery_mode == APIC_DM_LOWEST || + irq->dest_id == 0xff) + goto out; + dst = &map->phys_map[irq->dest_id & 0xff]; + } else { + u32 mda = irq->dest_id << (32 - map->ldr_bits); + + dst = map->logical_map[apic_cluster_id(map, mda)]; + + bitmap = apic_logical_id(map, mda); + + if (irq->delivery_mode == APIC_DM_LOWEST) { + int l = -1; + for_each_set_bit(i, &bitmap, 16) { + if (!dst[i]) + continue; + if (l < 0) + l = i; + else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) + l = i; + } + + bitmap = (l >= 0) ? 1 << l : 0; + } + } + + for_each_set_bit(i, &bitmap, 16) { + if (!dst[i]) + continue; + if (*r < 0) + *r = 0; + *r += kvm_apic_set_irq(dst[i]->vcpu, irq); + } + + ret = true; +out: + rcu_read_unlock(); + return ret; +} + /* * Add a pending IRQ into lapic. * Return 1 if successfully added and 0 if discarded. @@ -886,7 +1046,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) switch (reg) { case APIC_ID: /* Local APIC ID */ if (!apic_x2apic_mode(apic)) - apic_set_reg(apic, APIC_ID, val); + kvm_apic_set_id(apic, val >> 24); else ret = 1; break; @@ -902,15 +1062,16 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LDR: if (!apic_x2apic_mode(apic)) - apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK); + kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); else ret = 1; break; case APIC_DFR: - if (!apic_x2apic_mode(apic)) + if (!apic_x2apic_mode(apic)) { apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); - else + recalculate_apic_map(apic->vcpu->kvm); + } else ret = 1; break; @@ -1141,6 +1302,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) static_key_slow_dec_deferred(&apic_hw_disabled); else static_key_slow_inc(&apic_hw_disabled.key); + recalculate_apic_map(vcpu->kvm); } if (!kvm_vcpu_is_bsp(apic->vcpu)) @@ -1150,7 +1312,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) if (apic_x2apic_mode(apic)) { u32 id = kvm_apic_id(apic); u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf)); - apic_set_reg(apic, APIC_LDR, ldr); + kvm_apic_set_ldr(apic, ldr); } apic->base_address = apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_BASE; @@ -1175,7 +1337,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); - apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); + kvm_apic_set_id(apic, vcpu->vcpu_id); kvm_apic_set_version(apic->vcpu); for (i = 0; i < APIC_LVT_NUM; i++) @@ -1186,7 +1348,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) apic_set_reg(apic, APIC_DFR, 0xffffffffU); apic_set_spiv(apic, 0xff); apic_set_reg(apic, APIC_TASKPRI, 0); - apic_set_reg(apic, APIC_LDR, 0); + kvm_apic_set_ldr(apic, 0); apic_set_reg(apic, APIC_ESR, 0); apic_set_reg(apic, APIC_ICR, 0); apic_set_reg(apic, APIC_ICR2, 0); @@ -1404,6 +1566,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, /* set SPIV separately to get count of SW disabled APICs right */ apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); + /* call kvm_apic_set_id() to put apic into apic_map */ + kvm_apic_set_id(apic, kvm_apic_id(apic)); kvm_apic_set_version(vcpu); apic_update_ppr(apic); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 615a8b03016..e5ebf9f3571 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -52,6 +52,9 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, int *r); + u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 497226e49d4..fc2a0a132e4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6270,6 +6270,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); raw_spin_lock_init(&kvm->arch.tsc_write_lock); + mutex_init(&kvm->arch.apic_map_lock); return 0; } @@ -6322,6 +6323,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) put_page(kvm->arch.apic_access_page); if (kvm->arch.ept_identity_pagetable) put_page(kvm->arch.ept_identity_pagetable); + kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); } void kvm_arch_free_memslot(struct kvm_memory_slot *free, diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 7118be0f2f2..3ca89c451d6 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -68,8 +68,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_vcpu *vcpu, *lowest = NULL; if (irq->dest_mode == 0 && irq->dest_id == 0xff && - kvm_is_dm_lowest_prio(irq)) + kvm_is_dm_lowest_prio(irq)) { printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); + irq->delivery_mode = APIC_DM_FIXED; + } + + if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r)) + return r; kvm_for_each_vcpu(i, vcpu, kvm) { if (!kvm_apic_present(vcpu)) -- cgit v1.2.3-70-g09d2 From 50a011f6409e888d5f41343024d24885281f048c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 20 Sep 2012 14:43:54 +0200 Subject: kprobes/x86: Move skip_singlestep up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I get this warning: arch/x86/kernel/kprobes.c:544:23: warning: ‘skip_singlestep’ declared ‘static’ but never defined on tip/auto-latest. Put the skip_singlestep function declaration up, in KPROBES_CAN_USE_FTRACE and drop the superfluous forward declaration. Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1348145034-16603-1-git-send-email-bp@amd64.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index b7c2a85d192..57916c0d3cf 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -541,8 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +#ifdef KPROBES_CAN_USE_FTRACE static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb); + struct kprobe_ctlblk *kcb) +{ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); +} +#endif + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. @@ -1061,21 +1076,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } #ifdef KPROBES_CAN_USE_FTRACE -static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - /* - * Emulate singlestep (and also recover regs->ip) - * as if there is a 5byte nop - */ - regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; - if (unlikely(p->post_handler)) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - __this_cpu_write(current_kprobe, NULL); -} - /* Ftrace callback handler for kprobes */ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) -- cgit v1.2.3-70-g09d2 From e76623d69408d0bd66a296c6ee5eae1b17a6adfc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 2 Aug 2012 22:12:06 +0400 Subject: x86: get rid of TIF_IRET hackery TIF_NOTIFY_RESUME will work in precisely the same way; all that is achieved by TIF_IRET is appearing that there's some work to be done, so we end up on the iret exit path. Just use NOTIFY_RESUME. And for execve() do that in 32bit start_thread(), not sys_execve() itself. Signed-off-by: Al Viro --- arch/x86/include/asm/thread_info.h | 2 -- arch/x86/kernel/process.c | 8 -------- arch/x86/kernel/process_32.c | 5 +++++ arch/x86/kernel/signal.c | 4 ---- arch/x86/kernel/vm86_32.c | 6 +++--- 5 files changed, 8 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 89f794f007e..c509d07bdbd 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -79,7 +79,6 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_IRET 5 /* force IRET */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -104,7 +103,6 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_IRET (1 << TIF_IRET) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ef6a8456f71..7162e9c1f59 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -351,14 +351,6 @@ long sys_execve(const char __user *name, if (IS_ERR(filename)) return error; error = do_execve(filename, argv, envp, regs); - -#ifdef CONFIG_X86_32 - if (error == 0) { - /* Make sure we don't return using sysenter.. */ - set_thread_flag(TIF_IRET); - } -#endif - putname(filename); return error; } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 516fa186121..75fcad146de 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -194,6 +194,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) * Free the old FP and other extended state */ free_thread_xstate(current); + /* + * force it to the iret return path by making it look as if there was + * some work pending. + */ + set_thread_flag(TIF_NOTIFY_RESUME); } EXPORT_SYMBOL_GPL(start_thread); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b280908a376..c648fc52987 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -800,10 +800,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) } if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) fire_user_return_notifiers(); - -#ifdef CONFIG_X86_32 - clear_thread_flag(TIF_IRET); -#endif /* CONFIG_X86_32 */ } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 54abcc0baf2..5c9687b1bde 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -561,9 +561,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) if ((trapno == 3) || (trapno == 1)) { KVM86->regs32->ax = VM86_TRAP + (trapno << 8); /* setting this flag forces the code in entry_32.S to - call save_v86_state() and change the stack pointer - to KVM86->regs32 */ - set_thread_flag(TIF_IRET); + the path where we call save_v86_state() and change + the stack pointer to KVM86->regs32 */ + set_thread_flag(TIF_NOTIFY_RESUME); return 0; } do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); -- cgit v1.2.3-70-g09d2 From 8e2c85aa6c7a158d967db75931db7f13d20d31f4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Sep 2012 13:39:47 -0400 Subject: um: let signal_delivered() do SIGTRAP on singlestepping into handler ... rather than duplicating that in sigframe setup code (and doing that inconsistently, at that) Signed-off-by: Al Viro --- arch/um/kernel/signal.c | 6 +++++- arch/x86/um/signal.c | 6 ------ 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 7362d58efc2..cc9c2350e41 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -22,9 +22,13 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, struct k_sigaction *ka, siginfo_t *info) { sigset_t *oldset = sigmask_to_save(); + int singlestep = 0; unsigned long sp; int err; + if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + singlestep = 1; + /* Did we come from a system call? */ if (PT_REGS_SYSCALL_NR(regs) >= 0) { /* If so, check system call restarting.. */ @@ -61,7 +65,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, if (err) force_sigsegv(signr, current); else - signal_delivered(signr, info, ka, regs, 0); + signal_delivered(signr, info, ka, regs, singlestep); } static int kern_do_signal(struct pt_regs *regs) diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index a508cea1350..ba7363ecf89 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -416,9 +416,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) 0; PT_REGS_CX(regs) = (unsigned long) 0; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } @@ -466,9 +463,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) &frame->info; PT_REGS_CX(regs) = (unsigned long) &frame->uc; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } -- cgit v1.2.3-70-g09d2 From a4d94ff8aa864c05b33c2de1f8c5d0176d7a4b63 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Sep 2012 09:28:25 -0400 Subject: um: kill thread->forking we only use that to tell copy_thread() done by syscall from that done by kernel_thread(). However, it's easier to do simply by checking PF_KTHREAD in thread flags. Merge sys_clone() guts for 32bit and 64bit, while we are at it... Signed-off-by: Al Viro --- arch/um/include/asm/processor-generic.h | 9 --------- arch/um/kernel/process.c | 8 ++++---- arch/um/kernel/syscall.c | 24 ++++++++++++------------ arch/x86/um/shared/sysdep/syscalls.h | 2 ++ arch/x86/um/sys_call_table_32.c | 2 +- arch/x86/um/syscalls_32.c | 27 +++++++-------------------- arch/x86/um/syscalls_64.c | 23 +++-------------------- 7 files changed, 29 insertions(+), 66 deletions(-) (limited to 'arch/x86') diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 69f1c57a8d0..33a6a2423bd 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -20,14 +20,6 @@ struct mm_struct; struct thread_struct { struct task_struct *saved_task; - /* - * This flag is set to 1 before calling do_fork (and analyzed in - * copy_thread) to mark that we are begin called from userspace (fork / - * vfork / clone), and reset to 0 after. It is left to 0 when called - * from kernelspace (i.e. kernel_thread() or fork_idle(), - * as of 2.6.11). - */ - int forking; struct pt_regs regs; int singlestep_syscall; void *fault_addr; @@ -58,7 +50,6 @@ struct thread_struct { #define INIT_THREAD \ { \ - .forking = 0, \ .regs = EMPTY_REGS, \ .fault_addr = NULL, \ .prev_sched = NULL, \ diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 57fc7028714..c5f5afa5074 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -181,11 +181,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, struct pt_regs *regs) { void (*handler)(void); + int kthread = current->flags & PF_KTHREAD; int ret = 0; p->thread = (struct thread_struct) INIT_THREAD; - if (current->thread.forking) { + if (!kthread) { memcpy(&p->thread.regs.regs, ®s->regs, sizeof(p->thread.regs.regs)); PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0); @@ -195,8 +196,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, handler = fork_handler; arch_copy_thread(¤t->thread.arch, &p->thread.arch); - } - else { + } else { get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); p->thread.request.u.thread = current->thread.request.u.thread; handler = new_thread_handler; @@ -204,7 +204,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, new_thread(task_stack_page(p), &p->thread.switch_buf, handler); - if (current->thread.forking) { + if (!kthread) { clear_flushed_tls(p); /* diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index f958cb876ee..a4c6d8eee74 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -17,25 +17,25 @@ long sys_fork(void) { - long ret; - - current->thread.forking = 1; - ret = do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), + return do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; } long sys_vfork(void) { - long ret; - - current->thread.forking = 1; - ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, UPT_SP(¤t->thread.regs.regs), ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; +} + +long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid) +{ + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); + + return do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); } long old_mmap(unsigned long addr, unsigned long len, diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h index bd9a89b67e4..ca255a805ed 100644 --- a/arch/x86/um/shared/sysdep/syscalls.h +++ b/arch/x86/um/shared/sysdep/syscalls.h @@ -1,3 +1,5 @@ +extern long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid); #ifdef __i386__ #include "syscalls_32.h" #else diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 68d1dc91b37..b5408cecac6 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -28,7 +28,7 @@ #define ptregs_execve sys_execve #define ptregs_iopl sys_iopl #define ptregs_vm86old sys_vm86old -#define ptregs_clone sys_clone +#define ptregs_clone i386_clone #define ptregs_vm86 sys_vm86 #define ptregs_sigaltstack sys_sigaltstack #define ptregs_vfork sys_vfork diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c index b853e8600b9..db444c7218f 100644 --- a/arch/x86/um/syscalls_32.c +++ b/arch/x86/um/syscalls_32.c @@ -3,37 +3,24 @@ * Licensed under the GPL */ -#include "linux/sched.h" -#include "linux/shm.h" -#include "linux/ipc.h" -#include "linux/syscalls.h" -#include "asm/mman.h" -#include "asm/uaccess.h" -#include "asm/unistd.h" +#include +#include /* * The prototype on i386 is: * - * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr) + * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls * * and the "newtls" arg. on i386 is read by copy_thread directly from the * register saved on the stack. */ -long sys_clone(unsigned long clone_flags, unsigned long newsp, - int __user *parent_tid, void *newtls, int __user *child_tid) +long i386_clone(unsigned long clone_flags, unsigned long newsp, + int __user *parent_tid, void *newtls, int __user *child_tid) { - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; + return sys_clone(clone_flags, newsp, parent_tid, child_tid); } + long sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) { diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index f3d82bb6e15..adb08eb5c22 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -5,12 +5,9 @@ * Licensed under the GPL */ -#include "linux/linkage.h" -#include "linux/personality.h" -#include "linux/utsname.h" -#include "asm/prctl.h" /* XXX This should get the constants from libc */ -#include "asm/uaccess.h" -#include "os.h" +#include +#include /* XXX This should get the constants from libc */ +#include long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) { @@ -79,20 +76,6 @@ long sys_arch_prctl(int code, unsigned long addr) return arch_prctl(current, code, (unsigned long __user *) addr); } -long sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tid, void __user *child_tid) -{ - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; -} - void arch_switch_to(struct task_struct *to) { if ((to->thread.arch.fs == 0) || (to->mm == NULL)) -- cgit v1.2.3-70-g09d2 From 24cc7fb69a5b5edfdff1d38c6a213d6a33648829 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 20 Sep 2012 10:28:45 -0400 Subject: x86/kbuild: archscripts depends on scripts_basic While building the SUSE kernel packages, which build the scripts, make clean, and then build everything, we have been running into spurious build failures. We tracked them down to a simple dependency issue: $ make mrproper CLEAN arch/x86/tools CLEAN scripts/basic $ cp patches/config/x86_64/desktop .config $ make archscripts HOSTCC arch/x86/tools/relocs /bin/sh: scripts/basic/fixdep: No such file or directory make[3]: *** [arch/x86/tools/relocs] Error 1 make[2]: *** [archscripts] Error 2 make[1]: *** [sub-make] Error 2 make: *** [all] Error 2 This was introduced by commit 6520fe55 (x86, realmode: 16-bit real-mode code support for relocs), which added the archscripts dependency to archprepare. This patch adds the scripts_basic dependency to the x86 archscripts. Signed-off-by: Jeff Mahoney Signed-off-by: Michal Marek --- arch/x86/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b0c5276861e..c098ca4671d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -138,7 +138,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) -archscripts: +archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs ### -- cgit v1.2.3-70-g09d2 From 26bf264e871a4b9a8ac09c21a2b518e7f23830d5 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 17 Sep 2012 16:31:13 +0800 Subject: KVM: x86: Export svm/vmx exit code and vector code to userspace Exporting KVM exit information to userspace to be consumed by perf. Signed-off-by: Dong Hao [ Dong Hao : rebase it on acme's git tree ] Signed-off-by: Xiao Guangrong Acked-by: Marcelo Tosatti Cc: Avi Kivity Cc: David Ahern Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: kvm@vger.kernel.org Cc: Runzhen Wang Link: http://lkml.kernel.org/r/1347870675-31495-2-git-send-email-haodong@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/include/asm/kvm.h | 16 ++++ arch/x86/include/asm/kvm_host.h | 16 ---- arch/x86/include/asm/svm.h | 205 +++++++++++++++++++++++++--------------- arch/x86/include/asm/vmx.h | 127 ++++++++++++++++--------- arch/x86/kvm/trace.h | 89 ----------------- 5 files changed, 230 insertions(+), 223 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 246617efd67..41e08cb6a09 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,22 @@ #include #include +#define DE_VECTOR 0 +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define OF_VECTOR 4 +#define BR_VECTOR 5 +#define UD_VECTOR 6 +#define NM_VECTOR 7 +#define DF_VECTOR 8 +#define TS_VECTOR 10 +#define NP_VECTOR 11 +#define SS_VECTOR 12 +#define GP_VECTOR 13 +#define PF_VECTOR 14 +#define MF_VECTOR 16 +#define MC_VECTOR 18 + /* Select x86 specific features in */ #define __KVM_HAVE_PIT #define __KVM_HAVE_IOAPIC diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09155d64cf7..1eaa6b05667 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -75,22 +75,6 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -#define DE_VECTOR 0 -#define DB_VECTOR 1 -#define BP_VECTOR 3 -#define OF_VECTOR 4 -#define BR_VECTOR 5 -#define UD_VECTOR 6 -#define NM_VECTOR 7 -#define DF_VECTOR 8 -#define TS_VECTOR 10 -#define NP_VECTOR 11 -#define SS_VECTOR 12 -#define GP_VECTOR 13 -#define PF_VECTOR 14 -#define MF_VECTOR 16 -#define MC_VECTOR 18 - #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f2b83bc7d78..cdf5674dd23 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -1,6 +1,135 @@ #ifndef __SVM_H #define __SVM_H +#define SVM_EXIT_READ_CR0 0x000 +#define SVM_EXIT_READ_CR3 0x003 +#define SVM_EXIT_READ_CR4 0x004 +#define SVM_EXIT_READ_CR8 0x008 +#define SVM_EXIT_WRITE_CR0 0x010 +#define SVM_EXIT_WRITE_CR3 0x013 +#define SVM_EXIT_WRITE_CR4 0x014 +#define SVM_EXIT_WRITE_CR8 0x018 +#define SVM_EXIT_READ_DR0 0x020 +#define SVM_EXIT_READ_DR1 0x021 +#define SVM_EXIT_READ_DR2 0x022 +#define SVM_EXIT_READ_DR3 0x023 +#define SVM_EXIT_READ_DR4 0x024 +#define SVM_EXIT_READ_DR5 0x025 +#define SVM_EXIT_READ_DR6 0x026 +#define SVM_EXIT_READ_DR7 0x027 +#define SVM_EXIT_WRITE_DR0 0x030 +#define SVM_EXIT_WRITE_DR1 0x031 +#define SVM_EXIT_WRITE_DR2 0x032 +#define SVM_EXIT_WRITE_DR3 0x033 +#define SVM_EXIT_WRITE_DR4 0x034 +#define SVM_EXIT_WRITE_DR5 0x035 +#define SVM_EXIT_WRITE_DR6 0x036 +#define SVM_EXIT_WRITE_DR7 0x037 +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_INTR 0x060 +#define SVM_EXIT_NMI 0x061 +#define SVM_EXIT_SMI 0x062 +#define SVM_EXIT_INIT 0x063 +#define SVM_EXIT_VINTR 0x064 +#define SVM_EXIT_CR0_SEL_WRITE 0x065 +#define SVM_EXIT_IDTR_READ 0x066 +#define SVM_EXIT_GDTR_READ 0x067 +#define SVM_EXIT_LDTR_READ 0x068 +#define SVM_EXIT_TR_READ 0x069 +#define SVM_EXIT_IDTR_WRITE 0x06a +#define SVM_EXIT_GDTR_WRITE 0x06b +#define SVM_EXIT_LDTR_WRITE 0x06c +#define SVM_EXIT_TR_WRITE 0x06d +#define SVM_EXIT_RDTSC 0x06e +#define SVM_EXIT_RDPMC 0x06f +#define SVM_EXIT_PUSHF 0x070 +#define SVM_EXIT_POPF 0x071 +#define SVM_EXIT_CPUID 0x072 +#define SVM_EXIT_RSM 0x073 +#define SVM_EXIT_IRET 0x074 +#define SVM_EXIT_SWINT 0x075 +#define SVM_EXIT_INVD 0x076 +#define SVM_EXIT_PAUSE 0x077 +#define SVM_EXIT_HLT 0x078 +#define SVM_EXIT_INVLPG 0x079 +#define SVM_EXIT_INVLPGA 0x07a +#define SVM_EXIT_IOIO 0x07b +#define SVM_EXIT_MSR 0x07c +#define SVM_EXIT_TASK_SWITCH 0x07d +#define SVM_EXIT_FERR_FREEZE 0x07e +#define SVM_EXIT_SHUTDOWN 0x07f +#define SVM_EXIT_VMRUN 0x080 +#define SVM_EXIT_VMMCALL 0x081 +#define SVM_EXIT_VMLOAD 0x082 +#define SVM_EXIT_VMSAVE 0x083 +#define SVM_EXIT_STGI 0x084 +#define SVM_EXIT_CLGI 0x085 +#define SVM_EXIT_SKINIT 0x086 +#define SVM_EXIT_RDTSCP 0x087 +#define SVM_EXIT_ICEBP 0x088 +#define SVM_EXIT_WBINVD 0x089 +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_NPF 0x400 + +#define SVM_EXIT_ERR -1 + +#define SVM_EXIT_REASONS \ + { SVM_EXIT_READ_CR0, "read_cr0" }, \ + { SVM_EXIT_READ_CR3, "read_cr3" }, \ + { SVM_EXIT_READ_CR4, "read_cr4" }, \ + { SVM_EXIT_READ_CR8, "read_cr8" }, \ + { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ + { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ + { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ + { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ + { SVM_EXIT_READ_DR0, "read_dr0" }, \ + { SVM_EXIT_READ_DR1, "read_dr1" }, \ + { SVM_EXIT_READ_DR2, "read_dr2" }, \ + { SVM_EXIT_READ_DR3, "read_dr3" }, \ + { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ + { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ + { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ + { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ + { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ + { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ + { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ + { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ + { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ + { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ + { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ + { SVM_EXIT_INTR, "interrupt" }, \ + { SVM_EXIT_NMI, "nmi" }, \ + { SVM_EXIT_SMI, "smi" }, \ + { SVM_EXIT_INIT, "init" }, \ + { SVM_EXIT_VINTR, "vintr" }, \ + { SVM_EXIT_CPUID, "cpuid" }, \ + { SVM_EXIT_INVD, "invd" }, \ + { SVM_EXIT_HLT, "hlt" }, \ + { SVM_EXIT_INVLPG, "invlpg" }, \ + { SVM_EXIT_INVLPGA, "invlpga" }, \ + { SVM_EXIT_IOIO, "io" }, \ + { SVM_EXIT_MSR, "msr" }, \ + { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ + { SVM_EXIT_SHUTDOWN, "shutdown" }, \ + { SVM_EXIT_VMRUN, "vmrun" }, \ + { SVM_EXIT_VMMCALL, "hypercall" }, \ + { SVM_EXIT_VMLOAD, "vmload" }, \ + { SVM_EXIT_VMSAVE, "vmsave" }, \ + { SVM_EXIT_STGI, "stgi" }, \ + { SVM_EXIT_CLGI, "clgi" }, \ + { SVM_EXIT_SKINIT, "skinit" }, \ + { SVM_EXIT_WBINVD, "wbinvd" }, \ + { SVM_EXIT_MONITOR, "monitor" }, \ + { SVM_EXIT_MWAIT, "mwait" }, \ + { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_NPF, "npf" } + +#ifdef __KERNEL__ + enum { INTERCEPT_INTR, INTERCEPT_NMI, @@ -264,81 +393,6 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXITINFO_REG_MASK 0x0F -#define SVM_EXIT_READ_CR0 0x000 -#define SVM_EXIT_READ_CR3 0x003 -#define SVM_EXIT_READ_CR4 0x004 -#define SVM_EXIT_READ_CR8 0x008 -#define SVM_EXIT_WRITE_CR0 0x010 -#define SVM_EXIT_WRITE_CR3 0x013 -#define SVM_EXIT_WRITE_CR4 0x014 -#define SVM_EXIT_WRITE_CR8 0x018 -#define SVM_EXIT_READ_DR0 0x020 -#define SVM_EXIT_READ_DR1 0x021 -#define SVM_EXIT_READ_DR2 0x022 -#define SVM_EXIT_READ_DR3 0x023 -#define SVM_EXIT_READ_DR4 0x024 -#define SVM_EXIT_READ_DR5 0x025 -#define SVM_EXIT_READ_DR6 0x026 -#define SVM_EXIT_READ_DR7 0x027 -#define SVM_EXIT_WRITE_DR0 0x030 -#define SVM_EXIT_WRITE_DR1 0x031 -#define SVM_EXIT_WRITE_DR2 0x032 -#define SVM_EXIT_WRITE_DR3 0x033 -#define SVM_EXIT_WRITE_DR4 0x034 -#define SVM_EXIT_WRITE_DR5 0x035 -#define SVM_EXIT_WRITE_DR6 0x036 -#define SVM_EXIT_WRITE_DR7 0x037 -#define SVM_EXIT_EXCP_BASE 0x040 -#define SVM_EXIT_INTR 0x060 -#define SVM_EXIT_NMI 0x061 -#define SVM_EXIT_SMI 0x062 -#define SVM_EXIT_INIT 0x063 -#define SVM_EXIT_VINTR 0x064 -#define SVM_EXIT_CR0_SEL_WRITE 0x065 -#define SVM_EXIT_IDTR_READ 0x066 -#define SVM_EXIT_GDTR_READ 0x067 -#define SVM_EXIT_LDTR_READ 0x068 -#define SVM_EXIT_TR_READ 0x069 -#define SVM_EXIT_IDTR_WRITE 0x06a -#define SVM_EXIT_GDTR_WRITE 0x06b -#define SVM_EXIT_LDTR_WRITE 0x06c -#define SVM_EXIT_TR_WRITE 0x06d -#define SVM_EXIT_RDTSC 0x06e -#define SVM_EXIT_RDPMC 0x06f -#define SVM_EXIT_PUSHF 0x070 -#define SVM_EXIT_POPF 0x071 -#define SVM_EXIT_CPUID 0x072 -#define SVM_EXIT_RSM 0x073 -#define SVM_EXIT_IRET 0x074 -#define SVM_EXIT_SWINT 0x075 -#define SVM_EXIT_INVD 0x076 -#define SVM_EXIT_PAUSE 0x077 -#define SVM_EXIT_HLT 0x078 -#define SVM_EXIT_INVLPG 0x079 -#define SVM_EXIT_INVLPGA 0x07a -#define SVM_EXIT_IOIO 0x07b -#define SVM_EXIT_MSR 0x07c -#define SVM_EXIT_TASK_SWITCH 0x07d -#define SVM_EXIT_FERR_FREEZE 0x07e -#define SVM_EXIT_SHUTDOWN 0x07f -#define SVM_EXIT_VMRUN 0x080 -#define SVM_EXIT_VMMCALL 0x081 -#define SVM_EXIT_VMLOAD 0x082 -#define SVM_EXIT_VMSAVE 0x083 -#define SVM_EXIT_STGI 0x084 -#define SVM_EXIT_CLGI 0x085 -#define SVM_EXIT_SKINIT 0x086 -#define SVM_EXIT_RDTSCP 0x087 -#define SVM_EXIT_ICEBP 0x088 -#define SVM_EXIT_WBINVD 0x089 -#define SVM_EXIT_MONITOR 0x08a -#define SVM_EXIT_MWAIT 0x08b -#define SVM_EXIT_MWAIT_COND 0x08c -#define SVM_EXIT_XSETBV 0x08d -#define SVM_EXIT_NPF 0x400 - -#define SVM_EXIT_ERR -1 - #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" @@ -350,3 +404,4 @@ struct __attribute__ ((__packed__)) vmcb { #endif +#endif diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 74fcb963595..36ec21c36d6 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -25,6 +25,88 @@ * */ +#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 + +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 + +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_INVPCID 58 + +#define VMX_EXIT_REASONS \ + { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ + { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ + { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ + { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ + { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ + { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ + { EXIT_REASON_CPUID, "CPUID" }, \ + { EXIT_REASON_HLT, "HLT" }, \ + { EXIT_REASON_INVLPG, "INVLPG" }, \ + { EXIT_REASON_RDPMC, "RDPMC" }, \ + { EXIT_REASON_RDTSC, "RDTSC" }, \ + { EXIT_REASON_VMCALL, "VMCALL" }, \ + { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ + { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ + { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ + { EXIT_REASON_VMPTRST, "VMPTRST" }, \ + { EXIT_REASON_VMREAD, "VMREAD" }, \ + { EXIT_REASON_VMRESUME, "VMRESUME" }, \ + { EXIT_REASON_VMWRITE, "VMWRITE" }, \ + { EXIT_REASON_VMOFF, "VMOFF" }, \ + { EXIT_REASON_VMON, "VMON" }, \ + { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ + { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ + { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ + { EXIT_REASON_MSR_READ, "MSR_READ" }, \ + { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ + { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ + { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ + { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ + { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ + { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ + { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ + { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ + { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ + { EXIT_REASON_WBINVD, "WBINVD" } + +#ifdef __KERNEL__ + #include /* @@ -241,49 +323,6 @@ enum vmcs_field { HOST_RIP = 0x00006c16, }; -#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 - -#define EXIT_REASON_EXCEPTION_NMI 0 -#define EXIT_REASON_EXTERNAL_INTERRUPT 1 -#define EXIT_REASON_TRIPLE_FAULT 2 - -#define EXIT_REASON_PENDING_INTERRUPT 7 -#define EXIT_REASON_NMI_WINDOW 8 -#define EXIT_REASON_TASK_SWITCH 9 -#define EXIT_REASON_CPUID 10 -#define EXIT_REASON_HLT 12 -#define EXIT_REASON_INVD 13 -#define EXIT_REASON_INVLPG 14 -#define EXIT_REASON_RDPMC 15 -#define EXIT_REASON_RDTSC 16 -#define EXIT_REASON_VMCALL 18 -#define EXIT_REASON_VMCLEAR 19 -#define EXIT_REASON_VMLAUNCH 20 -#define EXIT_REASON_VMPTRLD 21 -#define EXIT_REASON_VMPTRST 22 -#define EXIT_REASON_VMREAD 23 -#define EXIT_REASON_VMRESUME 24 -#define EXIT_REASON_VMWRITE 25 -#define EXIT_REASON_VMOFF 26 -#define EXIT_REASON_VMON 27 -#define EXIT_REASON_CR_ACCESS 28 -#define EXIT_REASON_DR_ACCESS 29 -#define EXIT_REASON_IO_INSTRUCTION 30 -#define EXIT_REASON_MSR_READ 31 -#define EXIT_REASON_MSR_WRITE 32 -#define EXIT_REASON_INVALID_STATE 33 -#define EXIT_REASON_MWAIT_INSTRUCTION 36 -#define EXIT_REASON_MONITOR_INSTRUCTION 39 -#define EXIT_REASON_PAUSE_INSTRUCTION 40 -#define EXIT_REASON_MCE_DURING_VMENTRY 41 -#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 -#define EXIT_REASON_APIC_ACCESS 44 -#define EXIT_REASON_EPT_VIOLATION 48 -#define EXIT_REASON_EPT_MISCONFIG 49 -#define EXIT_REASON_WBINVD 54 -#define EXIT_REASON_XSETBV 55 -#define EXIT_REASON_INVPCID 58 - /* * Interruption-information format */ @@ -488,3 +527,5 @@ enum vm_instruction_error_number { }; #endif + +#endif diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a71faf727ff..bca63f04dcc 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -183,95 +183,6 @@ TRACE_EVENT(kvm_apic, #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 -#define VMX_EXIT_REASONS \ - { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ - { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ - { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ - { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ - { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ - { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ - { EXIT_REASON_CPUID, "CPUID" }, \ - { EXIT_REASON_HLT, "HLT" }, \ - { EXIT_REASON_INVLPG, "INVLPG" }, \ - { EXIT_REASON_RDPMC, "RDPMC" }, \ - { EXIT_REASON_RDTSC, "RDTSC" }, \ - { EXIT_REASON_VMCALL, "VMCALL" }, \ - { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ - { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ - { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ - { EXIT_REASON_VMPTRST, "VMPTRST" }, \ - { EXIT_REASON_VMREAD, "VMREAD" }, \ - { EXIT_REASON_VMRESUME, "VMRESUME" }, \ - { EXIT_REASON_VMWRITE, "VMWRITE" }, \ - { EXIT_REASON_VMOFF, "VMOFF" }, \ - { EXIT_REASON_VMON, "VMON" }, \ - { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ - { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ - { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ - { EXIT_REASON_MSR_READ, "MSR_READ" }, \ - { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ - { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ - { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ - { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ - { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ - { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ - { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ - { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ - { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ - { EXIT_REASON_WBINVD, "WBINVD" } - -#define SVM_EXIT_REASONS \ - { SVM_EXIT_READ_CR0, "read_cr0" }, \ - { SVM_EXIT_READ_CR3, "read_cr3" }, \ - { SVM_EXIT_READ_CR4, "read_cr4" }, \ - { SVM_EXIT_READ_CR8, "read_cr8" }, \ - { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ - { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ - { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ - { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ - { SVM_EXIT_READ_DR0, "read_dr0" }, \ - { SVM_EXIT_READ_DR1, "read_dr1" }, \ - { SVM_EXIT_READ_DR2, "read_dr2" }, \ - { SVM_EXIT_READ_DR3, "read_dr3" }, \ - { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ - { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ - { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ - { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ - { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ - { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ - { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ - { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ - { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ - { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ - { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ - { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ - { SVM_EXIT_INTR, "interrupt" }, \ - { SVM_EXIT_NMI, "nmi" }, \ - { SVM_EXIT_SMI, "smi" }, \ - { SVM_EXIT_INIT, "init" }, \ - { SVM_EXIT_VINTR, "vintr" }, \ - { SVM_EXIT_CPUID, "cpuid" }, \ - { SVM_EXIT_INVD, "invd" }, \ - { SVM_EXIT_HLT, "hlt" }, \ - { SVM_EXIT_INVLPG, "invlpg" }, \ - { SVM_EXIT_INVLPGA, "invlpga" }, \ - { SVM_EXIT_IOIO, "io" }, \ - { SVM_EXIT_MSR, "msr" }, \ - { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ - { SVM_EXIT_SHUTDOWN, "shutdown" }, \ - { SVM_EXIT_VMRUN, "vmrun" }, \ - { SVM_EXIT_VMMCALL, "hypercall" }, \ - { SVM_EXIT_VMLOAD, "vmload" }, \ - { SVM_EXIT_VMSAVE, "vmsave" }, \ - { SVM_EXIT_STGI, "stgi" }, \ - { SVM_EXIT_CLGI, "clgi" }, \ - { SVM_EXIT_SKINIT, "skinit" }, \ - { SVM_EXIT_WBINVD, "wbinvd" }, \ - { SVM_EXIT_MONITOR, "monitor" }, \ - { SVM_EXIT_MWAIT, "mwait" }, \ - { SVM_EXIT_XSETBV, "xsetbv" }, \ - { SVM_EXIT_NPF, "npf" } - /* * Tracepoint for kvm guest exit: */ -- cgit v1.2.3-70-g09d2 From 8bd753be7a96443dd5111cb07ed5907a3787c978 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 21 Sep 2012 12:43:06 -0700 Subject: x86-32, mm: The WP test should be done on a kernel page PAGE_READONLY includes user permission, but this is a page used exclusively by the kernel; use PAGE_KERNEL_RO instead. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1348256595-29119-3-git-send-email-hpa@linux.intel.com --- arch/x86/mm/init_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 575d86f85ce..e537b351e8c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -712,7 +712,7 @@ static void __init test_wp_bit(void) "Checking if this processor honours the WP bit even in supervisor mode..."); /* Any page-aligned address will do, the test is non-destructive */ - __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); + __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_KERNEL_RO); boot_cpu_data.wp_works_ok = do_test_wp_bit(); clear_fixmap(FIX_WP_TEST); -- cgit v1.2.3-70-g09d2 From 85fdf05cc395f23384cb0adb22765cbaa9653b54 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 21 Sep 2012 12:43:07 -0700 Subject: x86, smap: Add CR4 bit for SMAP Add X86_CR4_SMAP to . Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1348256595-29119-4-git-send-email-hpa@linux.intel.com --- arch/x86/include/asm/processor-flags.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index aea1d1d848c..680cf09ed10 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -65,6 +65,7 @@ #define X86_CR4_PCIDE 0x00020000 /* enable PCID support */ #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ +#define X86_CR4_SMAP 0x00200000 /* enable SMAP support */ /* * x86-64 Task Priority Register, CR8 -- cgit v1.2.3-70-g09d2 From 9cebed423c84a56b871327dd77e555d1d2186a6b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 21 Sep 2012 12:43:08 -0700 Subject: x86, alternative: Use .pushsection/.popsection .section/.previous doesn't nest. Use .pushsection/.popsection in so that they can be properly nested. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1348256595-29119-5-git-send-email-hpa@linux.intel.com --- arch/x86/include/asm/alternative-asm.h | 4 ++-- arch/x86/include/asm/alternative.h | 32 ++++++++++++++++---------------- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 952bd0100c5..018d29fe634 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -5,10 +5,10 @@ #ifdef CONFIG_SMP .macro LOCK_PREFIX 672: lock - .section .smp_locks,"a" + .pushsection .smp_locks,"a" .balign 4 .long 672b - . - .previous + .popsection .endm #else .macro LOCK_PREFIX diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 70780689599..87bc00d4efe 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -29,10 +29,10 @@ #ifdef CONFIG_SMP #define LOCK_PREFIX_HERE \ - ".section .smp_locks,\"a\"\n" \ - ".balign 4\n" \ - ".long 671f - .\n" /* offset */ \ - ".previous\n" \ + ".pushsection .smp_locks,\"a\"\n" \ + ".balign 4\n" \ + ".long 671f - .\n" /* offset */ \ + ".popsection\n" \ "671:" #define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " @@ -99,30 +99,30 @@ static inline int alternatives_text_reserved(void *start, void *end) /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, newinstr, feature) \ OLDINSTR(oldinstr) \ - ".section .altinstructions,\"a\"\n" \ + ".pushsection .altinstructions,\"a\"\n" \ ALTINSTR_ENTRY(feature, 1) \ - ".previous\n" \ - ".section .discard,\"aw\",@progbits\n" \ + ".popsection\n" \ + ".pushsection .discard,\"aw\",@progbits\n" \ DISCARD_ENTRY(1) \ - ".previous\n" \ - ".section .altinstr_replacement, \"ax\"\n" \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ - ".previous" + ".popsection" #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ OLDINSTR(oldinstr) \ - ".section .altinstructions,\"a\"\n" \ + ".pushsection .altinstructions,\"a\"\n" \ ALTINSTR_ENTRY(feature1, 1) \ ALTINSTR_ENTRY(feature2, 2) \ - ".previous\n" \ - ".section .discard,\"aw\",@progbits\n" \ + ".popsection\n" \ + ".pushsection .discard,\"aw\",@progbits\n" \ DISCARD_ENTRY(1) \ DISCARD_ENTRY(2) \ - ".previous\n" \ - ".section .altinstr_replacement, \"ax\"\n" \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ - ".previous" + ".popsection" /* * This must be included *after* the definition of ALTERNATIVE due to -- cgit v1.2.3-70-g09d2 From 76f30759f690db21ca567a20665ed2679ad3235b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 21 Sep 2012 12:43:09 -0700 Subject: x86, alternative: Add header guards to Add header guards to protect against multiple inclusion. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1348256595-29119-6-git-send-email-hpa@linux.intel.com --- arch/x86/include/asm/alternative-asm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 018d29fe634..372231c22a4 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -1,3 +1,6 @@ +#ifndef _ASM_X86_ALTERNATIVE_ASM_H +#define _ASM_X86_ALTERNATIVE_ASM_H + #ifdef __ASSEMBLY__ #include @@ -24,3 +27,5 @@ .endm #endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_ALTERNATIVE_ASM_H */ -- cgit v1.2.3-70-g09d2 From 51ae4a2d775e1ee456282d7c60e49693d0a8555d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 21 Sep 2012 12:43:10 -0700 Subject: x86, smap: Add a header file with macros for STAC/CLAC The STAC/CLAC instructions are only available with SMAP, but on the other hand they aren't needed if SMAP is not available, or before we start to run userspace, so construct them as alternatives which start out as noops and are enabled by the alternatives mechanism. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1348256595-29119-7-git-send-email-hpa@linux.intel.com --- arch/x86/Kconfig | 11 ++++++ arch/x86/include/asm/smap.h | 91 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 arch/x86/include/asm/smap.h (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ec3a1aa4ab..5ce86941333 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1487,6 +1487,17 @@ config ARCH_RANDOM If supported, this is a high bandwidth, cryptographically secure hardware random number generator. +config X86_SMAP + def_bool y + prompt "Supervisor Mode Access Prevention" if EXPERT + ---help--- + Supervisor Mode Access Prevention (SMAP) is a security + feature in newer Intel processors. There is a small + performance cost if this enabled and turned on; there is + also a small increase in the kernel size if this is enabled. + + If unsure, say Y. + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h new file mode 100644 index 00000000000..3989c2492eb --- /dev/null +++ b/arch/x86/include/asm/smap.h @@ -0,0 +1,91 @@ +/* + * Supervisor Mode Access Prevention support + * + * Copyright (C) 2012 Intel Corporation + * Author: H. Peter Anvin + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#ifndef _ASM_X86_SMAP_H +#define _ASM_X86_SMAP_H + +#include +#include +#include + +/* "Raw" instruction opcodes */ +#define __ASM_CLAC .byte 0x0f,0x01,0xca +#define __ASM_STAC .byte 0x0f,0x01,0xcb + +#ifdef __ASSEMBLY__ + +#include + +#ifdef CONFIG_X86_SMAP + +#define ASM_CLAC \ + 661: ASM_NOP3 ; \ + .pushsection .altinstr_replacement, "ax" ; \ + 662: __ASM_CLAC ; \ + .popsection ; \ + .pushsection .altinstructions, "a" ; \ + altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ + .popsection + +#define ASM_STAC \ + 661: ASM_NOP3 ; \ + .pushsection .altinstr_replacement, "ax" ; \ + 662: __ASM_STAC ; \ + .popsection ; \ + .pushsection .altinstructions, "a" ; \ + altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ + .popsection + +#else /* CONFIG_X86_SMAP */ + +#define ASM_CLAC +#define ASM_STAC + +#endif /* CONFIG_X86_SMAP */ + +#else /* __ASSEMBLY__ */ + +#include + +#ifdef CONFIG_X86_SMAP + +static inline void clac(void) +{ + /* Note: a barrier is implicit in alternative() */ + alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); +} + +static inline void stac(void) +{ + /* Note: a barrier is implicit in alternative() */ + alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); +} + +/* These macros can be used in asm() statements */ +#define ASM_CLAC \ + ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) +#define ASM_STAC \ + ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) + +#else /* CONFIG_X86_SMAP */ + +static inline void clac(void) { } +static inline void stac(void) { } + +#define ASM_CLAC +#define ASM_STAC + +#endif /* CONFIG_X86_SMAP */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_SMAP_H */ -- cgit v1.2.3-70-g09d2 From a052858fabb376b695f2c125633daa6728e0f284 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 21 Sep 2012 12:43:11 -0700 Subject: x86, uaccess: Merge prototypes for clear_user/__clear_user The prototypes for clear_user() and __clear_user() are identical in the 32- and 64-bit headers. No functionality change. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1348256595-29119-8-git-send-email-hpa@linux.intel.com --- arch/x86/include/asm/uaccess.h | 3 +++ arch/x86/include/asm/uaccess_32.h | 3 --- arch/x86/include/asm/uaccess_64.h | 3 --- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index e1f3a17034f..2c7df3d184f 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -569,6 +569,9 @@ strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); +unsigned long __must_check clear_user(void __user *mem, unsigned long len); +unsigned long __must_check __clear_user(void __user *mem, unsigned long len); + /* * movsl can be slow when source and dest are not both 8-byte aligned */ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 576e39bca6a..7f760a9f1f6 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -213,7 +213,4 @@ static inline unsigned long __must_check copy_from_user(void *to, return n; } -unsigned long __must_check clear_user(void __user *mem, unsigned long len); -unsigned long __must_check __clear_user(void __user *mem, unsigned long len); - #endif /* _ASM_X86_UACCESS_32_H */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index d8def8b3dba..142810c457d 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -217,9 +217,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } } -__must_check unsigned long clear_user(void __user *mem, unsigned long len); -__must_check unsigned long __clear_user(void __user *mem, unsigned long len); - static __must_check __always_inline int __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) { -- cgit v1.2.3-70-g09d2 From 63bcff2a307b9bcc712a8251eb27df8b2e117967 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 21 Sep 2012 12:43:12 -0700 Subject: x86, smap: Add STAC and CLAC instructions to control user space access When Supervisor Mode Access Prevention (SMAP) is enabled, access to userspace from the kernel is controlled by the AC flag. To make the performance of manipulating that flag acceptable, there are two new instructions, STAC and CLAC, to set and clear it. This patch adds those instructions, via alternative(), when the SMAP feature is enabled. It also adds X86_EFLAGS_AC unconditionally to the SYSCALL entry mask; there is simply no reason to make that one conditional. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1348256595-29119-9-git-send-email-hpa@linux.intel.com --- arch/x86/ia32/ia32entry.S | 6 ++++++ arch/x86/include/asm/fpu-internal.h | 10 ++++++---- arch/x86/include/asm/futex.h | 19 +++++++++++++------ arch/x86/include/asm/smap.h | 4 ++-- arch/x86/include/asm/uaccess.h | 31 +++++++++++++++++++------------ arch/x86/include/asm/xsave.h | 10 ++++++---- arch/x86/kernel/cpu/common.c | 3 ++- arch/x86/kernel/entry_64.S | 11 ++++++++++- arch/x86/lib/copy_user_64.S | 7 +++++++ arch/x86/lib/copy_user_nocache_64.S | 3 +++ arch/x86/lib/getuser.S | 10 ++++++++++ arch/x86/lib/putuser.S | 8 +++++++- arch/x86/lib/usercopy_32.c | 13 ++++++++++++- arch/x86/lib/usercopy_64.c | 3 +++ 14 files changed, 106 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 20e5f7ba0e6..9c289504e68 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -146,8 +147,10 @@ ENTRY(ia32_sysenter_target) SAVE_ARGS 0,1,0 /* no need to do an access_ok check here because rbp has been 32bit zero extended */ + ASM_STAC 1: movl (%rbp),%ebp _ASM_EXTABLE(1b,ia32_badarg) + ASM_CLAC orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) CFI_REMEMBER_STATE @@ -301,8 +304,10 @@ ENTRY(ia32_cstar_target) /* no need to do an access_ok check here because r8 has been 32bit zero extended */ /* hardware stack frame is complete now */ + ASM_STAC 1: movl (%r8),%r9d _ASM_EXTABLE(1b,ia32_badarg) + ASM_CLAC orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) CFI_REMEMBER_STATE @@ -365,6 +370,7 @@ cstar_tracesys: END(ia32_cstar_target) ia32_badarg: + ASM_CLAC movq $-EFAULT,%rax jmp ia32_sysret CFI_ENDPROC diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 75f4c6d6a33..0fe13583a02 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -126,8 +126,9 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) /* See comment in fxsave() below. */ #ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxsaveq %[fx]\n\t" - "2:\n" + asm volatile(ASM_STAC "\n" + "1: fxsaveq %[fx]\n\t" + "2: " ASM_CLAC "\n" ".section .fixup,\"ax\"\n" "3: movl $-1,%[err]\n" " jmp 2b\n" @@ -136,8 +137,9 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) : [err] "=r" (err), [fx] "=m" (*fx) : "0" (0)); #else - asm volatile("1: rex64/fxsave (%[fx])\n\t" - "2:\n" + asm volatile(ASM_STAC "\n" + "1: rex64/fxsave (%[fx])\n\t" + "2: " ASM_CLAC "\n" ".section .fixup,\"ax\"\n" "3: movl $-1,%[err]\n" " jmp 2b\n" diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 71ecbcba1a4..f373046e63e 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -9,10 +9,13 @@ #include #include #include +#include #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ - asm volatile("1:\t" insn "\n" \ - "2:\t.section .fixup,\"ax\"\n" \ + asm volatile("\t" ASM_STAC "\n" \ + "1:\t" insn "\n" \ + "2:\t" ASM_CLAC "\n" \ + "\t.section .fixup,\"ax\"\n" \ "3:\tmov\t%3, %1\n" \ "\tjmp\t2b\n" \ "\t.previous\n" \ @@ -21,12 +24,14 @@ : "i" (-EFAULT), "0" (oparg), "1" (0)) #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ - asm volatile("1:\tmovl %2, %0\n" \ + asm volatile("\t" ASM_STAC "\n" \ + "1:\tmovl %2, %0\n" \ "\tmovl\t%0, %3\n" \ "\t" insn "\n" \ "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ "\tjnz\t1b\n" \ - "3:\t.section .fixup,\"ax\"\n" \ + "3:\t" ASM_CLAC "\n" \ + "\t.section .fixup,\"ax\"\n" \ "4:\tmov\t%5, %1\n" \ "\tjmp\t3b\n" \ "\t.previous\n" \ @@ -122,8 +127,10 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" - "2:\t.section .fixup, \"ax\"\n" + asm volatile("\t" ASM_STAC "\n" + "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" + "2:\t" ASM_CLAC "\n" + "\t.section .fixup, \"ax\"\n" "3:\tmov %3, %0\n" "\tjmp 2b\n" "\t.previous\n" diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 3989c2492eb..8d3120f4e27 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -58,13 +58,13 @@ #ifdef CONFIG_X86_SMAP -static inline void clac(void) +static __always_inline void clac(void) { /* Note: a barrier is implicit in alternative() */ alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); } -static inline void stac(void) +static __always_inline void stac(void) { /* Note: a barrier is implicit in alternative() */ alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 2c7df3d184f..b92ece13c23 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -9,6 +9,7 @@ #include #include #include +#include #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -192,9 +193,10 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 #define __put_user_asm_u64(x, addr, err, errret) \ - asm volatile("1: movl %%eax,0(%2)\n" \ + asm volatile(ASM_STAC "\n" \ + "1: movl %%eax,0(%2)\n" \ "2: movl %%edx,4(%2)\n" \ - "3:\n" \ + "3: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "4: movl %3,%0\n" \ " jmp 3b\n" \ @@ -205,9 +207,10 @@ extern int __get_user_bad(void); : "A" (x), "r" (addr), "i" (errret), "0" (err)) #define __put_user_asm_ex_u64(x, addr) \ - asm volatile("1: movl %%eax,0(%1)\n" \ + asm volatile(ASM_STAC "\n" \ + "1: movl %%eax,0(%1)\n" \ "2: movl %%edx,4(%1)\n" \ - "3:\n" \ + "3: " ASM_CLAC "\n" \ _ASM_EXTABLE_EX(1b, 2b) \ _ASM_EXTABLE_EX(2b, 3b) \ : : "A" (x), "r" (addr)) @@ -379,8 +382,9 @@ do { \ } while (0) #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("1: mov"itype" %2,%"rtype"1\n" \ - "2:\n" \ + asm volatile(ASM_STAC "\n" \ + "1: mov"itype" %2,%"rtype"1\n" \ + "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ " xor"itype" %"rtype"1,%"rtype"1\n" \ @@ -412,8 +416,9 @@ do { \ } while (0) #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ - asm volatile("1: mov"itype" %1,%"rtype"0\n" \ - "2:\n" \ + asm volatile(ASM_STAC "\n" \ + "1: mov"itype" %1,%"rtype"0\n" \ + "2: " ASM_CLAC "\n" \ _ASM_EXTABLE_EX(1b, 2b) \ : ltype(x) : "m" (__m(addr))) @@ -443,8 +448,9 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("1: mov"itype" %"rtype"1,%2\n" \ - "2:\n" \ + asm volatile(ASM_STAC "\n" \ + "1: mov"itype" %"rtype"1,%2\n" \ + "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ " jmp 2b\n" \ @@ -454,8 +460,9 @@ struct __large_struct { unsigned long buf[100]; }; : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) #define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ - asm volatile("1: mov"itype" %"rtype"0,%1\n" \ - "2:\n" \ + asm volatile(ASM_STAC "\n" \ + "1: mov"itype" %"rtype"0,%1\n" \ + "2: " ASM_CLAC "\n" \ _ASM_EXTABLE_EX(1b, 2b) \ : : ltype(x), "m" (__m(addr))) diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 8a1b6f9b594..2a923bd5434 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -74,8 +74,9 @@ static inline int xsave_user(struct xsave_struct __user *buf) if (unlikely(err)) return -EFAULT; - __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" - "2:\n" + __asm__ __volatile__(ASM_STAC "\n" + "1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" + "2: " ASM_CLAC "\n" ".section .fixup,\"ax\"\n" "3: movl $-1,%[err]\n" " jmp 2b\n" @@ -97,8 +98,9 @@ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) u32 lmask = mask; u32 hmask = mask >> 32; - __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" - "2:\n" + __asm__ __volatile__(ASM_STAC "\n" + "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" + "2: " ASM_CLAC "\n" ".section .fixup,\"ax\"\n" "3: movl $-1,%[err]\n" " jmp 2b\n" diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c5fcc..cd43e525fde 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1113,7 +1113,8 @@ void syscall_init(void) /* Flags to clear on syscall */ wrmsrl(MSR_SYSCALL_MASK, - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| + X86_EFLAGS_IOPL|X86_EFLAGS_AC); } unsigned long kernel_eflags; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 69babd8c834..ce87e3d10c6 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -56,6 +56,7 @@ #include #include #include +#include #include /* Avoid __ASSEMBLER__'ifying just for this. */ @@ -465,7 +466,8 @@ END(ret_from_fork) * System call entry. Up to 6 arguments in registers are supported. * * SYSCALL does not save anything on the stack and does not change the - * stack pointer. + * stack pointer. However, it does mask the flags register for us, so + * CLD and CLAC are not needed. */ /* @@ -884,6 +886,7 @@ END(interrupt) */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: + ASM_CLAC XCPT_FRAME addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ @@ -1023,6 +1026,7 @@ END(common_interrupt) */ .macro apicinterrupt num sym do_sym ENTRY(\sym) + ASM_CLAC INTR_FRAME pushq_cfi $~(\num) .Lcommon_\sym: @@ -1077,6 +1081,7 @@ apicinterrupt IRQ_WORK_VECTOR \ */ .macro zeroentry sym do_sym ENTRY(\sym) + ASM_CLAC INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ @@ -1094,6 +1099,7 @@ END(\sym) .macro paranoidzeroentry sym do_sym ENTRY(\sym) + ASM_CLAC INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ @@ -1112,6 +1118,7 @@ END(\sym) #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) .macro paranoidzeroentry_ist sym do_sym ist ENTRY(\sym) + ASM_CLAC INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ @@ -1131,6 +1138,7 @@ END(\sym) .macro errorentry sym do_sym ENTRY(\sym) + ASM_CLAC XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME subq $ORIG_RAX-R15, %rsp @@ -1149,6 +1157,7 @@ END(\sym) /* error code is on the stack already */ .macro paranoiderrorentry sym do_sym ENTRY(\sym) + ASM_CLAC XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME subq $ORIG_RAX-R15, %rsp diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 5b2995f4557..a30ca15be21 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -17,6 +17,7 @@ #include #include #include +#include /* * By placing feature2 after feature1 in altinstructions section, we logically @@ -130,6 +131,7 @@ ENDPROC(bad_from_user) */ ENTRY(copy_user_generic_unrolled) CFI_STARTPROC + ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ ALIGN_DESTINATION @@ -177,6 +179,7 @@ ENTRY(copy_user_generic_unrolled) decl %ecx jnz 21b 23: xor %eax,%eax + ASM_CLAC ret .section .fixup,"ax" @@ -232,6 +235,7 @@ ENDPROC(copy_user_generic_unrolled) */ ENTRY(copy_user_generic_string) CFI_STARTPROC + ASM_STAC andl %edx,%edx jz 4f cmpl $8,%edx @@ -246,6 +250,7 @@ ENTRY(copy_user_generic_string) 3: rep movsb 4: xorl %eax,%eax + ASM_CLAC ret .section .fixup,"ax" @@ -273,12 +278,14 @@ ENDPROC(copy_user_generic_string) */ ENTRY(copy_user_enhanced_fast_string) CFI_STARTPROC + ASM_STAC andl %edx,%edx jz 2f movl %edx,%ecx 1: rep movsb 2: xorl %eax,%eax + ASM_CLAC ret .section .fixup,"ax" diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index cacddc7163e..6a4f43c2d9e 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -15,6 +15,7 @@ #include #include #include +#include .macro ALIGN_DESTINATION #ifdef FIX_ALIGNMENT @@ -48,6 +49,7 @@ */ ENTRY(__copy_user_nocache) CFI_STARTPROC + ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ ALIGN_DESTINATION @@ -95,6 +97,7 @@ ENTRY(__copy_user_nocache) decl %ecx jnz 21b 23: xorl %eax,%eax + ASM_CLAC sfence ret diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index b33b1fb1e6d..156b9c80467 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -33,6 +33,7 @@ #include #include #include +#include .text ENTRY(__get_user_1) @@ -40,8 +41,10 @@ ENTRY(__get_user_1) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 1: movzb (%_ASM_AX),%edx xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_1) @@ -53,8 +56,10 @@ ENTRY(__get_user_2) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 2: movzwl -1(%_ASM_AX),%edx xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_2) @@ -66,8 +71,10 @@ ENTRY(__get_user_4) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 3: mov -3(%_ASM_AX),%edx xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_4) @@ -80,8 +87,10 @@ ENTRY(__get_user_8) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 4: movq -7(%_ASM_AX),%_ASM_DX xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_8) @@ -91,6 +100,7 @@ bad_get_user: CFI_STARTPROC xor %edx,%edx mov $(-EFAULT),%_ASM_AX + ASM_CLAC ret CFI_ENDPROC END(bad_get_user) diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 7f951c8f76c..fc6ba17a7ee 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -15,6 +15,7 @@ #include #include #include +#include /* @@ -31,7 +32,8 @@ #define ENTER CFI_STARTPROC ; \ GET_THREAD_INFO(%_ASM_BX) -#define EXIT ret ; \ +#define EXIT ASM_CLAC ; \ + ret ; \ CFI_ENDPROC .text @@ -39,6 +41,7 @@ ENTRY(__put_user_1) ENTER cmp TI_addr_limit(%_ASM_BX),%_ASM_CX jae bad_put_user + ASM_STAC 1: movb %al,(%_ASM_CX) xor %eax,%eax EXIT @@ -50,6 +53,7 @@ ENTRY(__put_user_2) sub $1,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user + ASM_STAC 2: movw %ax,(%_ASM_CX) xor %eax,%eax EXIT @@ -61,6 +65,7 @@ ENTRY(__put_user_4) sub $3,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user + ASM_STAC 3: movl %eax,(%_ASM_CX) xor %eax,%eax EXIT @@ -72,6 +77,7 @@ ENTRY(__put_user_8) sub $7,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user + ASM_STAC 4: mov %_ASM_AX,(%_ASM_CX) #ifdef CONFIG_X86_32 5: movl %edx,4(%_ASM_CX) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1781b2f950e..98f6d6b68f5 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -42,10 +42,11 @@ do { \ int __d0; \ might_fault(); \ __asm__ __volatile__( \ + ASM_STAC "\n" \ "0: rep; stosl\n" \ " movl %2,%0\n" \ "1: rep; stosb\n" \ - "2:\n" \ + "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "3: lea 0(%2,%0,4),%0\n" \ " jmp 2b\n" \ @@ -626,10 +627,12 @@ survive: return n; } #endif + stac(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel(to, from, n); + clac(); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -637,10 +640,12 @@ EXPORT_SYMBOL(__copy_to_user_ll); unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n) { + stac(); if (movsl_is_ok(to, from, n)) __copy_user_zeroing(to, from, n); else n = __copy_user_zeroing_intel(to, from, n); + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll); @@ -648,11 +653,13 @@ EXPORT_SYMBOL(__copy_from_user_ll); unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, unsigned long n) { + stac(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel((void __user *)to, (const void *)from, n); + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nozero); @@ -660,6 +667,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero); unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, unsigned long n) { + stac(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && cpu_has_xmm2) n = __copy_user_zeroing_intel_nocache(to, from, n); @@ -668,6 +676,7 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, #else __copy_user_zeroing(to, from, n); #endif + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache); @@ -675,6 +684,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { + stac(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && cpu_has_xmm2) n = __copy_user_intel_nocache(to, from, n); @@ -683,6 +693,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr #else __copy_user(to, from, n); #endif + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index e5b130bc2d0..05928aae911 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -18,6 +18,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) might_fault(); /* no memory constraint because it doesn't change any memory gcc knows about */ + stac(); asm volatile( " testq %[size8],%[size8]\n" " jz 4f\n" @@ -40,6 +41,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) : [size8] "=&c"(size), [dst] "=&D" (__d0) : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), [zero] "r" (0UL), [eight] "r" (8UL)); + clac(); return size; } EXPORT_SYMBOL(__clear_user); @@ -82,5 +84,6 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) if (__put_user_nocheck(c, to++, sizeof(char))) break; + clac(); return len; } -- cgit v1.2.3-70-g09d2 From 52b6179ac87d33c2eeaff5292786a10fe98cff64 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 21 Sep 2012 12:43:13 -0700 Subject: x86, smap: Turn on Supervisor Mode Access Prevention If Supervisor Mode Access Prevention is available and not disabled by the user, turn it on. Also fix the expansion of SMEP (Supervisor Mode Execution Prevention.) Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1348256595-29119-10-git-send-email-hpa@linux.intel.com --- Documentation/kernel-parameters.txt | 6 +++++- arch/x86/kernel/cpu/common.c | 26 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ad7e2e5088c..49c5c41b07e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1812,8 +1812,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. noexec=on: enable non-executable mappings (default) noexec=off: disable non-executable mappings + nosmap [X86] + Disable SMAP (Supervisor Mode Access Prevention) + even if it is supported by processor. + nosmep [X86] - Disable SMEP (Supervisor Mode Execution Protection) + Disable SMEP (Supervisor Mode Execution Prevention) even if it is supported by processor. noexec32 [X86-64] diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cd43e525fde..7d35d659411 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -278,6 +278,31 @@ static __cpuinit void setup_smep(struct cpuinfo_x86 *c) } } +static int disable_smap __cpuinitdata; +static __init int setup_disable_smap(char *arg) +{ + disable_smap = 1; + return 1; +} +__setup("nosmap", setup_disable_smap); + +static __cpuinit void setup_smap(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_SMAP)) { + if (unlikely(disable_smap)) { + setup_clear_cpu_cap(X86_FEATURE_SMAP); + clear_in_cr4(X86_CR4_SMAP); + } else { + set_in_cr4(X86_CR4_SMAP); + /* + * Don't use clac() here since alternatives + * haven't run yet... + */ + asm volatile(__stringify(__ASM_CLAC) ::: "memory"); + } + } +} + /* * Some CPU features depend on higher CPUID levels, which may not always * be available due to CPUID level capping or broken virtualization @@ -713,6 +738,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) filter_cpuid_features(c, false); setup_smep(c); + setup_smap(c); if (this_cpu->c_bsp_init) this_cpu->c_bsp_init(c); -- cgit v1.2.3-70-g09d2 From 40d3cd6695014bf3c44e2ca66b610b18acaf923d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 21 Sep 2012 12:43:14 -0700 Subject: x86, smap: A page fault due to SMAP is an oops If we get a page fault due to SMAP, trigger an oops rather than spinning forever. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1348256595-29119-11-git-send-email-hpa@linux.intel.com --- arch/x86/mm/fault.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 76dcd9d8e0b..f2fb75d46b9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -995,6 +995,17 @@ static int fault_in_kernel_space(unsigned long address) return address >= TASK_SIZE_MAX; } +static inline bool smap_violation(int error_code, struct pt_regs *regs) +{ + if (error_code & PF_USER) + return false; + + if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC)) + return false; + + return true; +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -1088,6 +1099,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); + if (static_cpu_has(X86_FEATURE_SMAP)) { + if (unlikely(smap_violation(error_code, regs))) { + bad_area_nosemaphore(regs, error_code, address); + return; + } + } + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); /* -- cgit v1.2.3-70-g09d2 From 5e88353d8b5f483bc1c873ad24ac2b59a6b66c73 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 21 Sep 2012 12:43:15 -0700 Subject: x86, smap: Reduce the SMAP overhead for signal handling Signal handling contains a bunch of accesses to individual user space items, which causes an excessive number of STAC and CLAC instructions. Instead, let get/put_user_try ... get/put_user_catch() contain the STAC and CLAC instructions. This means that get/put_user_try no longer nests, and furthermore that it is no longer legal to use user space access functions other than __get/put_user_ex() inside those blocks. However, these macros are x86-specific anyway and are only used in the signal-handling paths; a simple reordering of moving the larger subroutine calls out of the try...catch blocks resolves that problem. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1348256595-29119-12-git-send-email-hpa@linux.intel.com --- arch/x86/ia32/ia32_signal.c | 12 +++++++----- arch/x86/include/asm/uaccess.h | 14 ++++++-------- arch/x86/kernel/signal.c | 24 ++++++++++++++---------- 3 files changed, 27 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 673ac9b63d6..05e62a312bd 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -250,11 +250,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, get_user_ex(tmp, &sc->fpstate); buf = compat_ptr(tmp); - err |= restore_i387_xstate_ia32(buf); get_user_ex(*pax, &sc->ax); } get_user_catch(err); + err |= restore_i387_xstate_ia32(buf); + return err; } @@ -502,7 +503,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sig, &frame->sig); put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo); put_user_ex(ptr_to_compat(&frame->uc), &frame->puc); - err |= copy_siginfo_to_user32(&frame->info, info); /* Create the ucontext. */ if (cpu_has_xsave) @@ -514,9 +514,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; @@ -532,6 +529,11 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); } put_user_catch(err); + err |= copy_siginfo_to_user32(&frame->info, info); + err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) return -EFAULT; diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b92ece13c23..a91acfbb1a9 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -416,9 +416,8 @@ do { \ } while (0) #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ - asm volatile(ASM_STAC "\n" \ - "1: mov"itype" %1,%"rtype"0\n" \ - "2: " ASM_CLAC "\n" \ + asm volatile("1: mov"itype" %1,%"rtype"0\n" \ + "2:\n" \ _ASM_EXTABLE_EX(1b, 2b) \ : ltype(x) : "m" (__m(addr))) @@ -460,9 +459,8 @@ struct __large_struct { unsigned long buf[100]; }; : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) #define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ - asm volatile(ASM_STAC "\n" \ - "1: mov"itype" %"rtype"0,%1\n" \ - "2: " ASM_CLAC "\n" \ + asm volatile("1: mov"itype" %"rtype"0,%1\n" \ + "2:\n" \ _ASM_EXTABLE_EX(1b, 2b) \ : : ltype(x), "m" (__m(addr))) @@ -470,13 +468,13 @@ struct __large_struct { unsigned long buf[100]; }; * uaccess_try and catch */ #define uaccess_try do { \ - int prev_err = current_thread_info()->uaccess_err; \ current_thread_info()->uaccess_err = 0; \ + stac(); \ barrier(); #define uaccess_catch(err) \ + clac(); \ (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ - current_thread_info()->uaccess_err = prev_err; \ } while (0) /** diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b280908a376..932612887e9 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -114,11 +114,12 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, regs->orig_ax = -1; /* disable syscall checks */ get_user_ex(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); get_user_ex(*pax, &sc->ax); } get_user_catch(err); + err |= restore_i387_xstate(buf); + return err; } @@ -357,7 +358,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sig, &frame->sig); put_user_ex(&frame->info, &frame->pinfo); put_user_ex(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); /* Create the ucontext. */ if (cpu_has_xsave) @@ -369,9 +369,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); /* Set up to return from userspace. */ restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); @@ -389,6 +386,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); } put_user_catch(err); + err |= copy_siginfo_to_user(&frame->info, info); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) return -EFAULT; @@ -436,8 +438,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); /* Set up to return from userspace. If provided, use a stub already in userspace. */ @@ -450,6 +450,9 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } } put_user_catch(err); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) return -EFAULT; @@ -855,9 +858,6 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, &frame->uc.uc_stack.ss_flags); put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); put_user_ex(0, &frame->uc.uc__pad0); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (ka->sa.sa_flags & SA_RESTORER) { restorer = ka->sa.sa_restorer; @@ -869,6 +869,10 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, put_user_ex(restorer, &frame->pretcode); } put_user_catch(err); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) return -EFAULT; -- cgit v1.2.3-70-g09d2 From 4cd8daf05c7071ac80008c8d4368860110fa6466 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 19 Sep 2012 10:49:00 -0700 Subject: x86/PCI: Clear host bridge aperture struct resource Use kzalloc() so the struct resource doesn't contain garbage in fields we don't initialize. [bhelgaas: changelog] Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Cc: x86@kernel.org --- arch/x86/pci/acpi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 505acdd6d60..192397c9860 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -305,7 +305,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->flags = flags; res->start = start; res->end = end; - res->child = NULL; if (!pci_use_crs) { dev_printk(KERN_DEBUG, &info->bridge->dev, @@ -434,7 +433,7 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, size = sizeof(*info->res) * info->res_num; info->res_num = 0; - info->res = kmalloc(size, GFP_KERNEL); + info->res = kzalloc(size, GFP_KERNEL); if (!info->res) return; -- cgit v1.2.3-70-g09d2 From e59d1b0a24199db01978e6c1e89859eda93ce683 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 21 Sep 2012 13:58:10 -0700 Subject: x86-32, smap: Add STAC/CLAC instructions to 32-bit kernel entry The changes to entry_32.S got missed in checkin: 63bcff2a x86, smap: Add STAC and CLAC instructions to control user space access The resulting kernel was largely functional but SMAP protection could have been bypassed. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1348256595-29119-9-git-send-email-hpa@linux.intel.com --- arch/x86/kernel/entry_32.S | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 623f2883747..9ebbecab6e9 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -57,6 +57,7 @@ #include #include #include +#include /* Avoid __ASSEMBLER__'ifying just for this. */ #include @@ -407,7 +408,9 @@ sysenter_past_esp: */ cmpl $__PAGE_OFFSET-3,%ebp jae syscall_fault + ASM_STAC 1: movl (%ebp),%ebp + ASM_CLAC movl %ebp,PT_EBP(%esp) _ASM_EXTABLE(1b,syscall_fault) @@ -488,6 +491,7 @@ ENDPROC(ia32_sysenter_target) # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway + ASM_CLAC pushl_cfi %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) @@ -670,6 +674,7 @@ END(syscall_exit_work) RING0_INT_FRAME # can't unwind into user space anyway syscall_fault: + ASM_CLAC GET_THREAD_INFO(%ebp) movl $-EFAULT,PT_EAX(%esp) jmp resume_userspace @@ -825,6 +830,7 @@ END(interrupt) */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: + ASM_CLAC addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ SAVE_ALL TRACE_IRQS_OFF @@ -841,6 +847,7 @@ ENDPROC(common_interrupt) #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ + ASM_CLAC; \ pushl_cfi $~(nr); \ SAVE_ALL; \ TRACE_IRQS_OFF \ @@ -857,6 +864,7 @@ ENDPROC(name) ENTRY(coprocessor_error) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_coprocessor_error jmp error_code @@ -865,6 +873,7 @@ END(coprocessor_error) ENTRY(simd_coprocessor_error) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 #ifdef CONFIG_X86_INVD_BUG /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ @@ -886,6 +895,7 @@ END(simd_coprocessor_error) ENTRY(device_not_available) RING0_INT_FRAME + ASM_CLAC pushl_cfi $-1 # mark this as an int pushl_cfi $do_device_not_available jmp error_code @@ -906,6 +916,7 @@ END(native_irq_enable_sysexit) ENTRY(overflow) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_overflow jmp error_code @@ -914,6 +925,7 @@ END(overflow) ENTRY(bounds) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_bounds jmp error_code @@ -922,6 +934,7 @@ END(bounds) ENTRY(invalid_op) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_invalid_op jmp error_code @@ -930,6 +943,7 @@ END(invalid_op) ENTRY(coprocessor_segment_overrun) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_coprocessor_segment_overrun jmp error_code @@ -938,6 +952,7 @@ END(coprocessor_segment_overrun) ENTRY(invalid_TSS) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_invalid_TSS jmp error_code CFI_ENDPROC @@ -945,6 +960,7 @@ END(invalid_TSS) ENTRY(segment_not_present) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_segment_not_present jmp error_code CFI_ENDPROC @@ -952,6 +968,7 @@ END(segment_not_present) ENTRY(stack_segment) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_stack_segment jmp error_code CFI_ENDPROC @@ -959,6 +976,7 @@ END(stack_segment) ENTRY(alignment_check) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_alignment_check jmp error_code CFI_ENDPROC @@ -966,6 +984,7 @@ END(alignment_check) ENTRY(divide_error) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 # no error code pushl_cfi $do_divide_error jmp error_code @@ -975,6 +994,7 @@ END(divide_error) #ifdef CONFIG_X86_MCE ENTRY(machine_check) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi machine_check_vector jmp error_code @@ -984,6 +1004,7 @@ END(machine_check) ENTRY(spurious_interrupt_bug) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_spurious_interrupt_bug jmp error_code @@ -1207,6 +1228,7 @@ return_to_handler: ENTRY(page_fault) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_page_fault ALIGN error_code: @@ -1279,6 +1301,7 @@ END(page_fault) ENTRY(debug) RING0_INT_FRAME + ASM_CLAC cmpl $ia32_sysenter_target,(%esp) jne debug_stack_correct FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn @@ -1303,6 +1326,7 @@ END(debug) */ ENTRY(nmi) RING0_INT_FRAME + ASM_CLAC pushl_cfi %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax @@ -1373,6 +1397,7 @@ END(nmi) ENTRY(int3) RING0_INT_FRAME + ASM_CLAC pushl_cfi $-1 # mark this as an int SAVE_ALL TRACE_IRQS_OFF @@ -1393,6 +1418,7 @@ END(general_protection) #ifdef CONFIG_KVM_GUEST ENTRY(async_page_fault) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_async_page_fault jmp error_code CFI_ENDPROC -- cgit v1.2.3-70-g09d2 From b1a74bf8212367be2b1d6685c11a84e056eaaaf1 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 20 Sep 2012 11:01:49 -0700 Subject: x86, kvm: fix kvm's usage of kernel_fpu_begin/end() Preemption is disabled between kernel_fpu_begin/end() and as such it is not a good idea to use these routines in kvm_load/put_guest_fpu() which can be very far apart. kvm_load/put_guest_fpu() routines are already called with preemption disabled and KVM already uses the preempt notifier to save the guest fpu state using kvm_put_guest_fpu(). So introduce __kernel_fpu_begin/end() routines which don't touch preemption and use them instead of kernel_fpu_begin/end() for KVM's use model of saving/restoring guest FPU state. Also with this change (and with eagerFPU model), fix the host cr0.TS vm-exit state in the case of VMX. For eagerFPU case, host cr0.TS is always clear. So no need to worry about it. For the traditional lazyFPU restore case, change the cr0.TS bit for the host state during vm-exit to be always clear and cr0.TS bit is set in the __vmx_load_host_state() when the FPU (guest FPU or the host task's FPU) state is not active. This ensures that the host/guest FPU state is properly saved, restored during context-switch and with interrupts (using irq_fpu_usable()) not stomping on the active FPU state. Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1348164109.26695.338.camel@sbsiddha-desk.sc.intel.com Cc: Avi Kivity Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/i387.h | 28 ++++++++++++++++++++++++++-- arch/x86/kernel/i387.c | 13 +++++-------- arch/x86/kvm/vmx.c | 10 +++++++--- arch/x86/kvm/x86.c | 4 ++-- 4 files changed, 40 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 6c3bd378281..ed8089d6909 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -24,8 +24,32 @@ extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); extern bool irq_fpu_usable(void); -extern void kernel_fpu_begin(void); -extern void kernel_fpu_end(void); + +/* + * Careful: __kernel_fpu_begin/end() must be called with preempt disabled + * and they don't touch the preempt state on their own. + * If you enable preemption after __kernel_fpu_begin(), preempt notifier + * should call the __kernel_fpu_end() to prevent the kernel/user FPU + * state from getting corrupted. KVM for example uses this model. + * + * All other cases use kernel_fpu_begin/end() which disable preemption + * during kernel FPU usage. + */ +extern void __kernel_fpu_begin(void); +extern void __kernel_fpu_end(void); + +static inline void kernel_fpu_begin(void) +{ + WARN_ON_ONCE(!irq_fpu_usable()); + preempt_disable(); + __kernel_fpu_begin(); +} + +static inline void kernel_fpu_end(void) +{ + __kernel_fpu_end(); + preempt_enable(); +} /* * Some instructions like VIA's padlock instructions generate a spurious diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 6782e398386..675a0501244 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -73,32 +73,29 @@ bool irq_fpu_usable(void) } EXPORT_SYMBOL(irq_fpu_usable); -void kernel_fpu_begin(void) +void __kernel_fpu_begin(void) { struct task_struct *me = current; - WARN_ON_ONCE(!irq_fpu_usable()); - preempt_disable(); if (__thread_has_fpu(me)) { __save_init_fpu(me); __thread_clear_has_fpu(me); - /* We do 'stts()' in kernel_fpu_end() */ + /* We do 'stts()' in __kernel_fpu_end() */ } else if (!use_eager_fpu()) { this_cpu_write(fpu_owner_task, NULL); clts(); } } -EXPORT_SYMBOL(kernel_fpu_begin); +EXPORT_SYMBOL(__kernel_fpu_begin); -void kernel_fpu_end(void) +void __kernel_fpu_end(void) { if (use_eager_fpu()) math_state_restore(); else stts(); - preempt_enable(); } -EXPORT_SYMBOL(kernel_fpu_end); +EXPORT_SYMBOL(__kernel_fpu_end); void unlazy_fpu(struct task_struct *tsk) { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c00f03de1b7..70dfcec3c46 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1493,8 +1493,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) #ifdef CONFIG_X86_64 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); #endif - if (user_has_fpu()) - clts(); + /* + * If the FPU is not active (through the host task or + * the guest vcpu), then restore the cr0.TS bit. + */ + if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded) + stts(); load_gdt(&__get_cpu_var(host_gdt)); } @@ -3730,7 +3734,7 @@ static void vmx_set_constant_host_state(void) unsigned long tmpl; struct desc_ptr dt; - vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ + vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */ vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cf637f572bd..02b2cd52069 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5972,7 +5972,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) */ kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; - kernel_fpu_begin(); + __kernel_fpu_begin(); fpu_restore_checking(&vcpu->arch.guest_fpu); trace_kvm_fpu(1); } @@ -5986,7 +5986,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) vcpu->guest_fpu_loaded = 0; fpu_save_init(&vcpu->arch.guest_fpu); - kernel_fpu_end(); + __kernel_fpu_end(); ++vcpu->stat.fpu_reload; kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); trace_kvm_fpu(0); -- cgit v1.2.3-70-g09d2 From 7a84428af7ca6a847f058c9ff244a18a2664fd1b Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Sep 2012 11:58:03 -0600 Subject: KVM: Add resampling irqfds for level triggered interrupts To emulate level triggered interrupts, add a resample option to KVM_IRQFD. When specified, a new resamplefd is provided that notifies the user when the irqchip has been resampled by the VM. This may, for instance, indicate an EOI. Also in this mode, posting of an interrupt through an irqfd only asserts the interrupt. On resampling, the interrupt is automatically de-asserted prior to user notification. This enables level triggered interrupts to be posted and re-enabled from vfio with no userspace intervention. All resampling irqfds can make use of a single irq source ID, so we reserve a new one for this interface. Signed-off-by: Alex Williamson Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 13 ++++ arch/x86/kvm/x86.c | 4 + include/linux/kvm.h | 12 ++- include/linux/kvm_host.h | 5 +- virt/kvm/eventfd.c | 150 +++++++++++++++++++++++++++++++++++++- virt/kvm/irq_comm.c | 6 ++ 6 files changed, 184 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 36befa775fd..f6ec3a92e62 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1950,6 +1950,19 @@ the guest using the specified gsi pin. The irqfd is removed using the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd and kvm_irqfd.gsi. +With KVM_CAP_IRQFD_RESAMPLE, KVM_IRQFD supports a de-assert and notify +mechanism allowing emulation of level-triggered, irqfd-based +interrupts. When KVM_IRQFD_FLAG_RESAMPLE is set the user must pass an +additional eventfd in the kvm_irqfd.resamplefd field. When operating +in resample mode, posting of an interrupt through kvm_irq.fd asserts +the specified gsi in the irqchip. When the irqchip is resampled, such +as from an EOI, the gsi is de-asserted and the user is notifed via +kvm_irqfd.resamplefd. It is the user's responsibility to re-queue +the interrupt if the device making use of it still requires service. +Note that closing the resamplefd is not sufficient to disable the +irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment +and need not be specified with KVM_IRQFD_FLAG_DEASSIGN. + 4.76 KVM_PPC_ALLOCATE_HTAB Capability: KVM_CAP_PPC_ALLOC_HTAB diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fc2a0a132e4..7d44204c604 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2176,6 +2176,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PCI_2_3: case KVM_CAP_KVMCLOCK_CTRL: case KVM_CAP_READONLY_MEM: + case KVM_CAP_IRQFD_RESAMPLE: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -6268,6 +6269,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); + /* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */ + set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, + &kvm->arch.irq_sources_bitmap); raw_spin_lock_init(&kvm->arch.tsc_write_lock); mutex_init(&kvm->arch.apic_map_lock); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index d808694673f..0a6d6ba44c8 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -625,6 +625,7 @@ struct kvm_ppc_smmu_info { #ifdef __KVM_HAVE_READONLY_MEM #define KVM_CAP_READONLY_MEM 81 #endif +#define KVM_CAP_IRQFD_RESAMPLE 82 #ifdef KVM_CAP_IRQ_ROUTING @@ -690,12 +691,21 @@ struct kvm_xen_hvm_config { #endif #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) +/* + * Available with KVM_CAP_IRQFD_RESAMPLE + * + * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies + * the irqfd to operate in resampling mode for level triggered interrupt + * emlation. See Documentation/virtual/kvm/api.txt. + */ +#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) struct kvm_irqfd { __u32 fd; __u32 gsi; __u32 flags; - __u8 pad[20]; + __u32 resamplefd; + __u8 pad[16]; }; struct kvm_clock_data { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 80bfc880921..2850656e2e9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -119,7 +119,8 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_PMU 16 #define KVM_REQ_PMI 17 -#define KVM_USERSPACE_IRQ_SOURCE_ID 0 +#define KVM_USERSPACE_IRQ_SOURCE_ID 0 +#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 struct kvm; struct kvm_vcpu; @@ -343,6 +344,8 @@ struct kvm { struct { spinlock_t lock; struct list_head items; + struct list_head resampler_list; + struct mutex resampler_lock; } irqfds; struct list_head ioeventfds; #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 7d7e2aaffec..356965c9d10 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -43,6 +43,31 @@ * -------------------------------------------------------------------- */ +/* + * Resampling irqfds are a special variety of irqfds used to emulate + * level triggered interrupts. The interrupt is asserted on eventfd + * trigger. On acknowledgement through the irq ack notifier, the + * interrupt is de-asserted and userspace is notified through the + * resamplefd. All resamplers on the same gsi are de-asserted + * together, so we don't need to track the state of each individual + * user. We can also therefore share the same irq source ID. + */ +struct _irqfd_resampler { + struct kvm *kvm; + /* + * List of resampling struct _irqfd objects sharing this gsi. + * RCU list modified under kvm->irqfds.resampler_lock + */ + struct list_head list; + struct kvm_irq_ack_notifier notifier; + /* + * Entry in list of kvm->irqfd.resampler_list. Use for sharing + * resamplers among irqfds on the same gsi. + * Accessed and modified under kvm->irqfds.resampler_lock + */ + struct list_head link; +}; + struct _irqfd { /* Used for MSI fast-path */ struct kvm *kvm; @@ -52,6 +77,12 @@ struct _irqfd { /* Used for level IRQ fast-path */ int gsi; struct work_struct inject; + /* The resampler used by this irqfd (resampler-only) */ + struct _irqfd_resampler *resampler; + /* Eventfd notified on resample (resampler-only) */ + struct eventfd_ctx *resamplefd; + /* Entry in list of irqfds for a resampler (resampler-only) */ + struct list_head resampler_link; /* Used for setup/shutdown */ struct eventfd_ctx *eventfd; struct list_head list; @@ -67,8 +98,58 @@ irqfd_inject(struct work_struct *work) struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); struct kvm *kvm = irqfd->kvm; - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); + if (!irqfd->resampler) { + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); + } else + kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, + irqfd->gsi, 1); +} + +/* + * Since resampler irqfds share an IRQ source ID, we de-assert once + * then notify all of the resampler irqfds using this GSI. We can't + * do multiple de-asserts or we risk racing with incoming re-asserts. + */ +static void +irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) +{ + struct _irqfd_resampler *resampler; + struct _irqfd *irqfd; + + resampler = container_of(kian, struct _irqfd_resampler, notifier); + + kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, + resampler->notifier.gsi, 0); + + rcu_read_lock(); + + list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) + eventfd_signal(irqfd->resamplefd, 1); + + rcu_read_unlock(); +} + +static void +irqfd_resampler_shutdown(struct _irqfd *irqfd) +{ + struct _irqfd_resampler *resampler = irqfd->resampler; + struct kvm *kvm = resampler->kvm; + + mutex_lock(&kvm->irqfds.resampler_lock); + + list_del_rcu(&irqfd->resampler_link); + synchronize_rcu(); + + if (list_empty(&resampler->list)) { + list_del(&resampler->link); + kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); + kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, + resampler->notifier.gsi, 0); + kfree(resampler); + } + + mutex_unlock(&kvm->irqfds.resampler_lock); } /* @@ -92,6 +173,11 @@ irqfd_shutdown(struct work_struct *work) */ flush_work_sync(&irqfd->inject); + if (irqfd->resampler) { + irqfd_resampler_shutdown(irqfd); + eventfd_ctx_put(irqfd->resamplefd); + } + /* * It is now safe to release the object's resources */ @@ -203,7 +289,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) struct kvm_irq_routing_table *irq_rt; struct _irqfd *irqfd, *tmp; struct file *file = NULL; - struct eventfd_ctx *eventfd = NULL; + struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; int ret; unsigned int events; @@ -231,6 +317,54 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) irqfd->eventfd = eventfd; + if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { + struct _irqfd_resampler *resampler; + + resamplefd = eventfd_ctx_fdget(args->resamplefd); + if (IS_ERR(resamplefd)) { + ret = PTR_ERR(resamplefd); + goto fail; + } + + irqfd->resamplefd = resamplefd; + INIT_LIST_HEAD(&irqfd->resampler_link); + + mutex_lock(&kvm->irqfds.resampler_lock); + + list_for_each_entry(resampler, + &kvm->irqfds.resampler_list, list) { + if (resampler->notifier.gsi == irqfd->gsi) { + irqfd->resampler = resampler; + break; + } + } + + if (!irqfd->resampler) { + resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); + if (!resampler) { + ret = -ENOMEM; + mutex_unlock(&kvm->irqfds.resampler_lock); + goto fail; + } + + resampler->kvm = kvm; + INIT_LIST_HEAD(&resampler->list); + resampler->notifier.gsi = irqfd->gsi; + resampler->notifier.irq_acked = irqfd_resampler_ack; + INIT_LIST_HEAD(&resampler->link); + + list_add(&resampler->link, &kvm->irqfds.resampler_list); + kvm_register_irq_ack_notifier(kvm, + &resampler->notifier); + irqfd->resampler = resampler; + } + + list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); + synchronize_rcu(); + + mutex_unlock(&kvm->irqfds.resampler_lock); + } + /* * Install our own custom wake-up handling so we are notified via * a callback whenever someone signals the underlying eventfd @@ -276,6 +410,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) return 0; fail: + if (irqfd->resampler) + irqfd_resampler_shutdown(irqfd); + + if (resamplefd && !IS_ERR(resamplefd)) + eventfd_ctx_put(resamplefd); + if (eventfd && !IS_ERR(eventfd)) eventfd_ctx_put(eventfd); @@ -291,6 +431,8 @@ kvm_eventfd_init(struct kvm *kvm) { spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); + INIT_LIST_HEAD(&kvm->irqfds.resampler_list); + mutex_init(&kvm->irqfds.resampler_lock); INIT_LIST_HEAD(&kvm->ioeventfds); } @@ -340,7 +482,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) { - if (args->flags & ~KVM_IRQFD_FLAG_DEASSIGN) + if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) return -EINVAL; if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 3ca89c451d6..2eb58af7ee9 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -228,6 +228,9 @@ int kvm_request_irq_source_id(struct kvm *kvm) } ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); +#ifdef CONFIG_X86 + ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); +#endif set_bit(irq_source_id, bitmap); unlock: mutex_unlock(&kvm->irq_lock); @@ -238,6 +241,9 @@ unlock: void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) { ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); +#ifdef CONFIG_X86 + ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); +#endif mutex_lock(&kvm->irq_lock); if (irq_source_id < 0 || -- cgit v1.2.3-70-g09d2 From c863901075a42d50678616d8ee4b96ef13080498 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 21 Sep 2012 05:42:55 +0200 Subject: KVM: x86: Fix guest debug across vcpu INIT reset If we reset a vcpu on INIT, we so far overwrote dr7 as provided by KVM_SET_GUEST_DEBUG, and we also cleared switch_db_regs unconditionally. Fix this by saving the dr7 used for guest debugging and calculating the effective register value as well as switch_db_regs on any potential change. This will change to focus of the set_guest_debug vendor op to update_dp_bp_intercept. Found while trying to stop on start_secondary. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/svm.c | 23 ++++------------------- arch/x86/kvm/vmx.c | 14 +------------- arch/x86/kvm/x86.c | 26 +++++++++++++++++--------- 4 files changed, 24 insertions(+), 43 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0b902c98f27..c9a91368fc5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -471,6 +471,7 @@ struct kvm_vcpu_arch { unsigned long dr6; unsigned long dr7; unsigned long eff_db[KVM_NR_DB_REGS]; + unsigned long guest_debug_dr7; u64 mcg_cap; u64 mcg_status; @@ -647,8 +648,7 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); - void (*set_guest_debug)(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg); + void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 818fceb3091..d017df3899e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1146,7 +1146,6 @@ static void init_vmcb(struct vcpu_svm *svm) svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; - save->dr7 = 0x400; kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; @@ -1643,7 +1642,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, mark_dirty(svm->vmcb, VMCB_SEG); } -static void update_db_intercept(struct kvm_vcpu *vcpu) +static void update_db_bp_intercept(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1663,20 +1662,6 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) vcpu->guest_debug = 0; } -static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) - svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; - else - svm->vmcb->save.dr7 = vcpu->arch.dr7; - - mark_dirty(svm->vmcb, VMCB_DR); - - update_db_intercept(vcpu); -} - static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) { if (sd->next_asid > sd->max_asid) { @@ -1748,7 +1733,7 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_intercept(&svm->vcpu); + update_db_bp_intercept(&svm->vcpu); } if (svm->vcpu.guest_debug & @@ -3659,7 +3644,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) */ svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_intercept(vcpu); + update_db_bp_intercept(vcpu); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -4253,7 +4238,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .set_guest_debug = svm_guest_debug, + .update_db_bp_intercept = update_db_bp_intercept, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 30bcb953afe..5d46c905e06 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2288,16 +2288,6 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) -{ - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) - vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); - else - vmcs_writel(GUEST_DR7, vcpu->arch.dr7); - - update_exception_bitmap(vcpu); -} - static __init int cpu_has_kvm_support(void) { return cpu_has_vmx(); @@ -3960,8 +3950,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) kvm_rip_write(vcpu, 0); kvm_register_write(vcpu, VCPU_REGS_RSP, 0); - vmcs_writel(GUEST_DR7, 0x400); - vmcs_writel(GUEST_GDTR_BASE, 0); vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); @@ -7237,7 +7225,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, - .set_guest_debug = set_guest_debug, + .update_db_bp_intercept = update_exception_bitmap, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7d44204c604..b16d4a5bfa4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -692,6 +692,18 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_cr8); +static void kvm_update_dr7(struct kvm_vcpu *vcpu) +{ + unsigned long dr7; + + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + dr7 = vcpu->arch.guest_debug_dr7; + else + dr7 = vcpu->arch.dr7; + kvm_x86_ops->set_dr7(vcpu, dr7); + vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK); +} + static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { switch (dr) { @@ -717,10 +729,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) if (val & 0xffffffff00000000ULL) return -1; /* #GP */ vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { - kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); - vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK); - } + kvm_update_dr7(vcpu); break; } @@ -5851,13 +5860,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { for (i = 0; i < KVM_NR_DB_REGS; ++i) vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; - vcpu->arch.switch_db_regs = - (dbg->arch.debugreg[7] & DR7_BP_EN_MASK); + vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7]; } else { for (i = 0; i < KVM_NR_DB_REGS; i++) vcpu->arch.eff_db[i] = vcpu->arch.db[i]; - vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); } + kvm_update_dr7(vcpu); if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + @@ -5869,7 +5877,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, */ kvm_set_rflags(vcpu, rflags); - kvm_x86_ops->set_guest_debug(vcpu, dbg); + kvm_x86_ops->update_db_bp_intercept(vcpu); r = 0; @@ -6045,10 +6053,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) vcpu->arch.nmi_pending = 0; vcpu->arch.nmi_injected = false; - vcpu->arch.switch_db_regs = 0; memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); vcpu->arch.dr6 = DR6_FIXED_1; vcpu->arch.dr7 = DR7_FIXED_1; + kvm_update_dr7(vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu); vcpu->arch.apf.msr_val = 0; -- cgit v1.2.3-70-g09d2 From a2db672aa305a045404615e5222ba681bab6cf58 Mon Sep 17 00:00:00 2001 From: Silas Boyd-Wickizer Date: Fri, 3 Aug 2012 12:33:27 -0700 Subject: Use get_online_cpus to avoid races involving CPU hotplug If arch/x86/kernel/msr.c is a module, a CPU might offline or online between the for_each_online_cpu(i) loop and the call to register_hotcpu_notifier in msr_init or the call to unregister_hotcpu_notifier in msr_exit. The potential races can lead to leaks/duplicates, attempts to destroy non-existant devices, or random pointer dereferences. For example, in msr_init if: for_each_online_cpu(i) { err = msr_device_create(i); if (err != 0) goto out_class; } <----- CPU offlines register_hotcpu_notifier(&msr_class_cpu_notifier); and the CPU never onlines before msr_exit, then the module will never call msr_device_destroy for the associated CPU. This fix surrounds for_each_online_cpu and register_hotcpu_notifier or unregister_hotcpu_notifier with get_online_cpus+put_online_cpus. Tested on a VM. Signed-off-by: Silas Boyd-Wickizer Signed-off-by: Paul E. McKenney --- arch/x86/kernel/msr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index eb113693f04..a7c5661f849 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -257,12 +257,14 @@ static int __init msr_init(void) goto out_chrdev; } msr_class->devnode = msr_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = msr_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&msr_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -271,6 +273,7 @@ out_class: i = 0; for_each_online_cpu(i) msr_device_destroy(i); + put_online_cpus(); class_destroy(msr_class); out_chrdev: __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); @@ -281,11 +284,13 @@ out: static void __exit msr_exit(void) { int cpu = 0; + get_online_cpus(); for_each_online_cpu(cpu) msr_device_destroy(cpu); class_destroy(msr_class); __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); unregister_hotcpu_notifier(&msr_class_cpu_notifier); + put_online_cpus(); } module_init(msr_init); -- cgit v1.2.3-70-g09d2 From 429227bbe55647aa42f8f63cac61e4544e248629 Mon Sep 17 00:00:00 2001 From: Silas Boyd-Wickizer Date: Fri, 3 Aug 2012 12:34:50 -0700 Subject: Use get_online_cpus to avoid races involving CPU hotplug If arch/x86/kernel/cpuid.c is a module, a CPU might offline or online between the for_each_online_cpu() loop and the call to register_hotcpu_notifier in cpuid_init or the call to unregister_hotcpu_notifier in cpuid_exit. The potential races can lead to leaks/duplicates, attempts to destroy non-existant devices, or random pointer dereferences. For example, in cpuid_exit if: for_each_online_cpu(cpu) cpuid_device_destroy(cpu); class_destroy(cpuid_class); __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); <----- CPU onlines unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); the hotcpu notifier will attempt to create a device for the cpuid_class, which the module already destroyed. This fix surrounds for_each_online_cpu and register_hotcpu_notifier or unregister_hotcpu_notifier with get_online_cpus+put_online_cpus. Tested on a VM. Signed-off-by: Silas Boyd-Wickizer Signed-off-by: Paul E. McKenney --- arch/x86/kernel/cpuid.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 39472dd2323..60c78917190 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -199,12 +199,14 @@ static int __init cpuid_init(void) goto out_chrdev; } cpuid_class->devnode = cpuid_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = cpuid_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -214,6 +216,7 @@ out_class: for_each_online_cpu(i) { cpuid_device_destroy(i); } + put_online_cpus(); class_destroy(cpuid_class); out_chrdev: __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); @@ -225,11 +228,13 @@ static void __exit cpuid_exit(void) { int cpu = 0; + get_online_cpus(); for_each_online_cpu(cpu) cpuid_device_destroy(cpu); class_destroy(cpuid_class); __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); } module_init(cpuid_init); -- cgit v1.2.3-70-g09d2 From 8d54db795dfb1049d45dc34f0dddbc5347ec5642 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 17 Aug 2012 10:22:37 -0400 Subject: xen/boot: Disable NUMA for PV guests. The hypervisor is in charge of allocating the proper "NUMA" memory and dealing with the CPU scheduler to keep them bound to the proper NUMA node. The PV guests (and PVHVM) have no inkling of where they run and do not need to know that right now. In the future we will need to inject NUMA configuration data (if a guest spans two or more NUMA nodes) so that the kernel can make the right choices. But those patches are not yet present. In the meantime, disable the NUMA capability in the PV guest, which also fixes a bootup issue. Andre says: "we see Dom0 crashes due to the kernel detecting the NUMA topology not by ACPI, but directly from the northbridge (CONFIG_AMD_NUMA). This will detect the actual NUMA config of the physical machine, but will crash about the mismatch with Dom0's virtual memory. Variation of the theme: Dom0 sees what it's not supposed to see. This happens with the said config option enabled and on a machine where this scanning is still enabled (K8 and Fam10h, not Bulldozer class) We have this dump then: NUMA: Warning: node ids are out of bound, from=-1 to=-1 distance=10 Scanning NUMA topology in Northbridge 24 Number of physical nodes 4 Node 0 MemBase 0000000000000000 Limit 0000000040000000 Node 1 MemBase 0000000040000000 Limit 0000000138000000 Node 2 MemBase 0000000138000000 Limit 00000001f8000000 Node 3 MemBase 00000001f8000000 Limit 0000000238000000 Initmem setup node 0 0000000000000000-0000000040000000 NODE_DATA [000000003ffd9000 - 000000003fffffff] Initmem setup node 1 0000000040000000-0000000138000000 NODE_DATA [0000000137fd9000 - 0000000137ffffff] Initmem setup node 2 0000000138000000-00000001f8000000 NODE_DATA [00000001f095e000 - 00000001f0984fff] Initmem setup node 3 00000001f8000000-0000000238000000 Cannot find 159744 bytes in node 3 BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] __alloc_bootmem_node+0x43/0x96 Pid: 0, comm: swapper Not tainted 3.3.6 #1 AMD Dinar/Dinar RIP: e030:[] [] __alloc_bootmem_node+0x43/0x96 .. snip.. [] sparse_early_usemaps_alloc_node+0x64/0x178 [] sparse_init+0xe4/0x25a [] paging_init+0x13/0x22 [] setup_arch+0x9c6/0xa9b [] ? printk+0x3c/0x3e [] start_kernel+0xe5/0x468 [] x86_64_start_reservations+0xba/0xc1 [] ? xen_setup_runstate_info+0x2c/0x36 [] xen_start_kernel+0x565/0x56c " so we just disable NUMA scanning by setting numa_off=1. CC: stable@vger.kernel.org Reported-and-Tested-by: Andre Przywara Acked-by: Andre Przywara Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d11ca11d14f..e2d62d697b5 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -544,4 +545,7 @@ void __init xen_arch_setup(void) disable_cpufreq(); WARN_ON(set_pm_idle_to_default()); fiddle_vdso(); +#ifdef CONFIG_NUMA + numa_off = 1; +#endif } -- cgit v1.2.3-70-g09d2 From b3c869d35b9b014f63ac0beacd31c57372084d01 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 4 Sep 2012 12:42:27 -0400 Subject: jiffies: Remove compile time assumptions about CLOCK_TICK_RATE CLOCK_TICK_RATE is used to accurately caclulate exactly how a tick will be at a given HZ. This is useful, because while we'd expect NSEC_PER_SEC/HZ, the underlying hardware will have some granularity limit, so we won't be able to have exactly HZ ticks per second. This slight error can cause timekeeping quality problems when using the jiffies or other jiffies driven clocksources. Thus we currently use compile time CLOCK_TICK_RATE value to generate SHIFTED_HZ and NSEC_PER_JIFFIES, which we then use to adjust the jiffies clocksource to correct this error. Unfortunately though, since CLOCK_TICK_RATE is a compile time value, and the jiffies clocksource is registered very early during boot, there are a number of cases where there are different possible hardware timers that have different tick rates. This causes problems in cases like ARM where there are numerous different types of hardware, each having their own compile-time CLOCK_TICK_RATE, making it hard to accurately support different hardware with a single kernel. For the most part, this doesn't matter all that much, as not too many systems actually utilize the jiffies or jiffies driven clocksource. Usually there are other highres clocksources who's granularity error is negligable. Even so, we have some complicated calcualtions that we do everywhere to handle these edge cases. This patch removes the compile time SHIFTED_HZ value, and introduces a register_refined_jiffies() function. This results in the default jiffies clock as being assumed a perfect HZ freq, and allows archtectures that care about jiffies accuracy to call register_refined_jiffies() with the tick rate, specified dynamically at boot. This allows us, where necessary, to not have a compile time CLOCK_TICK_RATE constant, simplifies the jiffies code, and still provides a way to have an accurate jiffies clock. NOTE: Since this patch does not add register_refinied_jiffies() calls for every arch, it may cause time quality regressions in some cases. Its likely these will not be noticable, but if they are an issue, adding the following to the end of setup_arch() should resolve the regression: register_refinied_jiffies(CLOCK_TICK_RATE) Cc: Catalin Marinas Cc: Arnd Bergmann Cc: Richard Cochran Cc: Prarit Bhargava Cc: Thomas Gleixner Signed-off-by: John Stultz --- arch/x86/kernel/setup.c | 3 +++ include/linux/jiffies.h | 15 ++------------- kernel/time/jiffies.c | 32 +++++++++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f4b9b80e1b9..4062f15bfbd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -68,6 +68,7 @@ #include #include #include +#include #include