summaryrefslogtreecommitdiff
path: root/virt
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2015-11-04 16:24:17 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2015-11-04 16:24:17 +0100
commit197a4f4b063e4e7a603ff1de56b3cf0400fabc30 (patch)
tree36a3d057cec3aff49cf2fe9df3e63218595dd68b /virt
parentd6cf98e06ea4c4071596bc28f2a0f21412d5c6dc (diff)
parent26caea7693cb99833fe4ecc544c842289d6b3f69 (diff)
Merge tag 'kvm-arm-for-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/ARM Changes for v4.4-rc1 Includes a number of fixes for the arch-timer, introducing proper level-triggered semantics for the arch-timers, a series of patches to synchronously halt a guest (prerequisite for IRQ forwarding), some tracepoint improvements, a tweak for the EL2 panic handlers, some more VGIC cleanups getting rid of redundant state, and finally a stylistic change that gets rid of some ctags warnings. Conflicts: arch/x86/include/asm/kvm_host.h
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/arch_timer.c174
-rw-r--r--virt/kvm/arm/trace.h63
-rw-r--r--virt/kvm/arm/vgic-v2.c6
-rw-r--r--virt/kvm/arm/vgic-v3.c6
-rw-r--r--virt/kvm/arm/vgic.c327
-rw-r--r--virt/kvm/kvm_main.c3
6 files changed, 331 insertions, 248 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 48c6e1ac6827..21a0ab2d8919 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -28,6 +28,8 @@
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
+#include "trace.h"
+
static struct timecounter *timecounter;
static struct workqueue_struct *wqueue;
static unsigned int host_vtimer_irq;
@@ -59,18 +61,6 @@ static void timer_disarm(struct arch_timer_cpu *timer)
}
}
-static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
-{
- int ret;
- struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
- kvm_vgic_set_phys_irq_active(timer->map, true);
- ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
- timer->map,
- timer->irq->level);
- WARN_ON(ret);
-}
-
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
{
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
@@ -111,14 +101,20 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
return HRTIMER_NORESTART;
}
+static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
+ (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
+}
+
bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
cycle_t cval, now;
- if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
- !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) ||
- kvm_vgic_get_phys_irq_active(timer->map))
+ if (!kvm_timer_irq_can_fire(vcpu))
return false;
cval = timer->cntv_cval;
@@ -127,62 +123,143 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
return cval <= now;
}
+static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
+{
+ int ret;
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ BUG_ON(!vgic_initialized(vcpu->kvm));
+
+ timer->irq.level = new_level;
+ trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq,
+ timer->irq.level);
+ ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
+ timer->map,
+ timer->irq.level);
+ WARN_ON(ret);
+}
+
+/*
+ * Check if there was a change in the timer state (should we raise or lower
+ * the line level to the GIC).
+ */
+static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ /*
+ * If userspace modified the timer registers via SET_ONE_REG before
+ * the vgic was initialized, we mustn't set the timer->irq.level value
+ * because the guest would never see the interrupt. Instead wait
+ * until we call this function from kvm_timer_flush_hwstate.
+ */
+ if (!vgic_initialized(vcpu->kvm))
+ return;
+
+ if (kvm_timer_should_fire(vcpu) != timer->irq.level)
+ kvm_timer_update_irq(vcpu, !timer->irq.level);
+}
+
+/*
+ * Schedule the background timer before calling kvm_vcpu_block, so that this
+ * thread is removed from its waitqueue and made runnable when there's a timer
+ * interrupt to handle.
+ */
+void kvm_timer_schedule(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ u64 ns;
+ cycle_t cval, now;
+
+ BUG_ON(timer_is_armed(timer));
+
+ /*
+ * No need to schedule a background timer if the guest timer has
+ * already expired, because kvm_vcpu_block will return before putting
+ * the thread to sleep.
+ */
+ if (kvm_timer_should_fire(vcpu))
+ return;
+
+ /*
+ * If the timer is not capable of raising interrupts (disabled or
+ * masked), then there's no more work for us to do.
+ */
+ if (!kvm_timer_irq_can_fire(vcpu))
+ return;
+
+ /* The timer has not yet expired, schedule a background timer */
+ cval = timer->cntv_cval;
+ now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
+ ns = cyclecounter_cyc2ns(timecounter->cc,
+ cval - now,
+ timecounter->mask,
+ &timecounter->frac);
+ timer_arm(timer, ns);
+}
+
+void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ timer_disarm(timer);
+}
+
/**
* kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
* @vcpu: The vcpu pointer
*
- * Disarm any pending soft timers, since the world-switch code will write the
- * virtual timer state back to the physical CPU.
+ * Check if the virtual timer has expired while we were running in the host,
+ * and inject an interrupt if that was the case.
*/
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ bool phys_active;
+ int ret;
- /*
- * We're about to run this vcpu again, so there is no need to
- * keep the background timer running, as we're about to
- * populate the CPU timer again.
- */
- timer_disarm(timer);
+ kvm_timer_update_state(vcpu);
/*
- * If the timer expired while we were not scheduled, now is the time
- * to inject it.
+ * If we enter the guest with the virtual input level to the VGIC
+ * asserted, then we have already told the VGIC what we need to, and
+ * we don't need to exit from the guest until the guest deactivates
+ * the already injected interrupt, so therefore we should set the
+ * hardware active state to prevent unnecessary exits from the guest.
+ *
+ * Conversely, if the virtual input level is deasserted, then always
+ * clear the hardware active state to ensure that hardware interrupts
+ * from the timer triggers a guest exit.
*/
- if (kvm_timer_should_fire(vcpu))
- kvm_timer_inject_irq(vcpu);
+ if (timer->irq.level)
+ phys_active = true;
+ else
+ phys_active = false;
+
+ ret = irq_set_irqchip_state(timer->map->irq,
+ IRQCHIP_STATE_ACTIVE,
+ phys_active);
+ WARN_ON(ret);
}
/**
* kvm_timer_sync_hwstate - sync timer state from cpu
* @vcpu: The vcpu pointer
*
- * Check if the virtual timer was armed and either schedule a corresponding
- * soft timer or inject directly if already expired.
+ * Check if the virtual timer has expired while we were running in the guest,
+ * and inject an interrupt if that was the case.
*/
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- cycle_t cval, now;
- u64 ns;
BUG_ON(timer_is_armed(timer));
- if (kvm_timer_should_fire(vcpu)) {
- /*
- * Timer has already expired while we were not
- * looking. Inject the interrupt and carry on.
- */
- kvm_timer_inject_irq(vcpu);
- return;
- }
-
- cval = timer->cntv_cval;
- now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
- ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask,
- &timecounter->frac);
- timer_arm(timer, ns);
+ /*
+ * The guest could have modified the timer registers or the timer
+ * could have expired, update the timer state.
+ */
+ kvm_timer_update_state(vcpu);
}
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
@@ -197,7 +274,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
* kvm_vcpu_set_target(). To handle this, we determine
* vcpu timer irq number when the vcpu is reset.
*/
- timer->irq = irq;
+ timer->irq.irq = irq->irq;
/*
* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
@@ -206,6 +283,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
* the ARMv7 architecture.
*/
timer->cntv_ctl = 0;
+ kvm_timer_update_state(vcpu);
/*
* Tell the VGIC that the virtual interrupt is tied to a
@@ -250,6 +328,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
default:
return -1;
}
+
+ kvm_timer_update_state(vcpu);
return 0;
}
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
new file mode 100644
index 000000000000..37d8b98867d5
--- /dev/null
+++ b/virt/kvm/arm/trace.h
@@ -0,0 +1,63 @@
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+/*
+ * Tracepoints for vgic
+ */
+TRACE_EVENT(vgic_update_irq_pending,
+ TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
+ TP_ARGS(vcpu_id, irq, level),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_id )
+ __field( __u32, irq )
+ __field( bool, level )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu_id;
+ __entry->irq = irq;
+ __entry->level = level;
+ ),
+
+ TP_printk("VCPU: %ld, IRQ %d, level: %d",
+ __entry->vcpu_id, __entry->irq, __entry->level)
+);
+
+/*
+ * Tracepoints for arch_timer
+ */
+TRACE_EVENT(kvm_timer_update_irq,
+ TP_PROTO(unsigned long vcpu_id, __u32 irq, int level),
+ TP_ARGS(vcpu_id, irq, level),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_id )
+ __field( __u32, irq )
+ __field( int, level )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu_id;
+ __entry->irq = irq;
+ __entry->level = level;
+ ),
+
+ TP_printk("VCPU: %ld, IRQ %d, level %d",
+ __entry->vcpu_id, __entry->irq, __entry->level)
+);
+
+#endif /* _TRACE_KVM_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 8d7b04db8471..ff02f08df74d 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
-}
-static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
- struct vgic_lr lr_desc)
-{
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
else
@@ -158,6 +154,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
* anyway.
*/
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
/* Get the show on the road... */
vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
@@ -166,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
static const struct vgic_ops vgic_v2_ops = {
.get_lr = vgic_v2_get_lr,
.set_lr = vgic_v2_set_lr,
- .sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
.get_elrsr = vgic_v2_get_elrsr,
.get_eisr = vgic_v2_get_eisr,
.clear_eisr = vgic_v2_clear_eisr,
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 7dd5d62f10a1..487d6357b7e7 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
}
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
-}
-static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
- struct vgic_lr lr_desc)
-{
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
else
@@ -193,6 +189,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
* anyway.
*/
vgic_v3->vgic_vmcr = 0;
+ vgic_v3->vgic_elrsr = ~0;
/*
* If we are emulating a GICv3, we do it in an non-GICv2-compatible
@@ -211,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
static const struct vgic_ops vgic_v3_ops = {
.get_lr = vgic_v3_get_lr,
.set_lr = vgic_v3_set_lr,
- .sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
.get_elrsr = vgic_v3_get_elrsr,
.get_eisr = vgic_v3_get_eisr,
.clear_eisr = vgic_v3_clear_eisr,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 6bd1c9bf7ae7..fe451d4885ae 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -34,6 +34,9 @@
#include <asm/kvm.h>
#include <kvm/iodev.h>
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
/*
* How the whole thing works (courtesy of Christoffer Dall):
*
@@ -102,11 +105,13 @@
#include "vgic.h"
static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
-static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
+static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu);
static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
+static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu);
static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
int virt_irq);
+static int compute_pending_for_cpu(struct kvm_vcpu *vcpu);
static const struct vgic_ops *vgic_ops;
static const struct vgic_params *vgic;
@@ -357,6 +362,11 @@ static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
+ if (!vgic_dist_irq_get_level(vcpu, irq)) {
+ vgic_dist_irq_clear_pending(vcpu, irq);
+ if (!compute_pending_for_cpu(vcpu))
+ clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
+ }
}
static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
@@ -654,10 +664,9 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
vgic_reg_access(mmio, &val, offset,
ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
if (mmio->is_write) {
- if (offset < 8) {
- *reg = ~0U; /* Force PPIs/SGIs to 1 */
+ /* Ignore writes to read-only SGI and PPI bits */
+ if (offset < 8)
return false;
- }
val = vgic_cfg_compress(val);
if (offset & 4) {
@@ -683,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ u64 elrsr = vgic_get_elrsr(vcpu);
+ unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
int i;
- for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+ for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) {
struct vgic_lr lr = vgic_get_lr(vcpu, i);
/*
@@ -706,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
* interrupt then move the active state to the
* distributor tracking bit.
*/
- if (lr.state & LR_STATE_ACTIVE) {
+ if (lr.state & LR_STATE_ACTIVE)
vgic_irq_set_active(vcpu, lr.irq);
- lr.state &= ~LR_STATE_ACTIVE;
- }
/*
* Reestablish the pending state on the distributor and the
- * CPU interface. It may have already been pending, but that
- * is fine, then we are only setting a few bits that were
- * already set.
- */
- if (lr.state & LR_STATE_PENDING) {
- vgic_dist_irq_set_pending(vcpu, lr.irq);
- lr.state &= ~LR_STATE_PENDING;
- }
-
- vgic_set_lr(vcpu, i, lr);
-
- /*
- * Mark the LR as free for other use.
+ * CPU interface and mark the LR as free for other use.
*/
- BUG_ON(lr.state & LR_STATE_MASK);
- vgic_retire_lr(i, lr.irq, vcpu);
- vgic_irq_clear_queued(vcpu, lr.irq);
+ vgic_retire_lr(i, vcpu);
/* Finally update the VGIC state. */
vgic_update_state(vcpu->kvm);
@@ -982,6 +977,12 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
pend_shared = vcpu->arch.vgic_cpu.pending_shared;
+ if (!dist->enabled) {
+ bitmap_zero(pend_percpu, VGIC_NR_PRIVATE_IRQS);
+ bitmap_zero(pend_shared, nr_shared);
+ return 0;
+ }
+
pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
@@ -1009,11 +1010,6 @@ void vgic_update_state(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int c;
- if (!dist->enabled) {
- set_bit(0, dist->irq_pending_on_cpu);
- return;
- }
-
kvm_for_each_vcpu(c, vcpu, kvm) {
if (compute_pending_for_cpu(vcpu))
set_bit(c, dist->irq_pending_on_cpu);
@@ -1036,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
vgic_ops->set_lr(vcpu, lr, vlr);
}
-static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
- struct vgic_lr vlr)
-{
- vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
-}
-
static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
{
return vgic_ops->get_elrsr(vcpu);
@@ -1087,16 +1077,23 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu)
vgic_ops->enable(vcpu);
}
-static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
+static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu)
{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
+ vgic_irq_clear_queued(vcpu, vlr.irq);
+
+ /*
+ * We must transfer the pending state back to the distributor before
+ * retiring the LR, otherwise we may loose edge-triggered interrupts.
+ */
+ if (vlr.state & LR_STATE_PENDING) {
+ vgic_dist_irq_set_pending(vcpu, vlr.irq);
+ vlr.hwirq = 0;
+ }
+
vlr.state = 0;
vgic_set_lr(vcpu, lr_nr, vlr);
- clear_bit(lr_nr, vgic_cpu->lr_used);
- vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
- vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
}
/*
@@ -1110,17 +1107,15 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
*/
static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ u64 elrsr = vgic_get_elrsr(vcpu);
+ unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
int lr;
- for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
+ for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
- if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
- vgic_retire_lr(lr, vlr.irq, vcpu);
- if (vgic_irq_is_queued(vcpu, vlr.irq))
- vgic_irq_clear_queued(vcpu, vlr.irq);
- }
+ if (!vgic_irq_is_enabled(vcpu, vlr.irq))
+ vgic_retire_lr(lr, vcpu);
}
}
@@ -1132,7 +1127,8 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
vgic_irq_clear_active(vcpu, irq);
vgic_update_state(vcpu->kvm);
- } else if (vgic_dist_irq_is_pending(vcpu, irq)) {
+ } else {
+ WARN_ON(!vgic_dist_irq_is_pending(vcpu, irq));
vlr.state |= LR_STATE_PENDING;
kvm_debug("Set pending: 0x%x\n", vlr.state);
}
@@ -1159,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
}
vgic_set_lr(vcpu, lr_nr, vlr);
- vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
}
/*
@@ -1169,8 +1164,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
*/
bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ u64 elrsr = vgic_get_elrsr(vcpu);
+ unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
struct vgic_lr vlr;
int lr;
@@ -1181,28 +1177,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
kvm_debug("Queue IRQ%d\n", irq);
- lr = vgic_cpu->vgic_irq_lr_map[irq];
-
/* Do we have an active interrupt for the same CPUID? */
- if (lr != LR_EMPTY) {
+ for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
vlr = vgic_get_lr(vcpu, lr);
- if (vlr.source == sgi_source_id) {
+ if (vlr.irq == irq && vlr.source == sgi_source_id) {
kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
- BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
return true;
}
}
/* Try to use another LR for this interrupt */
- lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
- vgic->nr_lr);
+ lr = find_first_bit(elrsr_ptr, vgic->nr_lr);
if (lr >= vgic->nr_lr)
return false;
kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
- vgic_cpu->vgic_irq_lr_map[irq] = lr;
- set_bit(lr, vgic_cpu->lr_used);
vlr.irq = irq;
vlr.source = sgi_source_id;
@@ -1240,7 +1230,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
unsigned long *pa_percpu, *pa_shared;
- int i, vcpu_id, lr, ret;
+ int i, vcpu_id;
int overflow = 0;
int nr_shared = vgic_nr_shared_irqs(dist);
@@ -1295,39 +1285,62 @@ epilog:
*/
clear_bit(vcpu_id, dist->irq_pending_on_cpu);
}
+}
- for (lr = 0; lr < vgic->nr_lr; lr++) {
- struct vgic_lr vlr;
+static int process_queued_irq(struct kvm_vcpu *vcpu,
+ int lr, struct vgic_lr vlr)
+{
+ int pending = 0;
- if (!test_bit(lr, vgic_cpu->lr_used))
- continue;
+ /*
+ * If the IRQ was EOIed (called from vgic_process_maintenance) or it
+ * went from active to non-active (called from vgic_sync_hwirq) it was
+ * also ACKed and we we therefore assume we can clear the soft pending
+ * state (should it had been set) for this interrupt.
+ *
+ * Note: if the IRQ soft pending state was set after the IRQ was
+ * acked, it actually shouldn't be cleared, but we have no way of
+ * knowing that unless we start trapping ACKs when the soft-pending
+ * state is set.
+ */
+ vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
- vlr = vgic_get_lr(vcpu, lr);
+ /*
+ * Tell the gic to start sampling this interrupt again.
+ */
+ vgic_irq_clear_queued(vcpu, vlr.irq);
- /*
- * If we have a mapping, and the virtual interrupt is
- * presented to the guest (as pending or active), then we must
- * set the state to active in the physical world. See
- * Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt.
- */
- if (vlr.state & LR_HW) {
- struct irq_phys_map *map;
- map = vgic_irq_map_search(vcpu, vlr.irq);
-
- ret = irq_set_irqchip_state(map->irq,
- IRQCHIP_STATE_ACTIVE,
- true);
- WARN_ON(ret);
+ /* Any additional pending interrupt? */
+ if (vgic_irq_is_edge(vcpu, vlr.irq)) {
+ BUG_ON(!(vlr.state & LR_HW));
+ pending = vgic_dist_irq_is_pending(vcpu, vlr.irq);
+ } else {
+ if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
+ vgic_cpu_irq_set(vcpu, vlr.irq);
+ pending = 1;
+ } else {
+ vgic_dist_irq_clear_pending(vcpu, vlr.irq);
+ vgic_cpu_irq_clear(vcpu, vlr.irq);
}
}
+
+ /*
+ * Despite being EOIed, the LR may not have
+ * been marked as empty.
+ */
+ vlr.state = 0;
+ vlr.hwirq = 0;
+ vgic_set_lr(vcpu, lr, vlr);
+
+ return pending;
}
static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
{
u32 status = vgic_get_interrupt_status(vcpu);
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- bool level_pending = false;
struct kvm *kvm = vcpu->kvm;
+ int level_pending = 0;
kvm_debug("STATUS = %08x\n", status);
@@ -1342,54 +1355,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
- WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
- spin_lock(&dist->lock);
- vgic_irq_clear_queued(vcpu, vlr.irq);
+ WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
WARN_ON(vlr.state & LR_STATE_MASK);
- vlr.state = 0;
- vgic_set_lr(vcpu, lr, vlr);
- /*
- * If the IRQ was EOIed it was also ACKed and we we
- * therefore assume we can clear the soft pending
- * state (should it had been set) for this interrupt.
- *
- * Note: if the IRQ soft pending state was set after
- * the IRQ was acked, it actually shouldn't be
- * cleared, but we have no way of knowing that unless
- * we start trapping ACKs when the soft-pending state
- * is set.
- */
- vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
/*
* kvm_notify_acked_irq calls kvm_set_irq()
- * to reset the IRQ level. Need to release the
- * lock for kvm_set_irq to grab it.
+ * to reset the IRQ level, which grabs the dist->lock
+ * so we call this before taking the dist->lock.
*/
- spin_unlock(&dist->lock);
-
kvm_notify_acked_irq(kvm, 0,
vlr.irq - VGIC_NR_PRIVATE_IRQS);
- spin_lock(&dist->lock);
-
- /* Any additional pending interrupt? */
- if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
- vgic_cpu_irq_set(vcpu, vlr.irq);
- level_pending = true;
- } else {
- vgic_dist_irq_clear_pending(vcpu, vlr.irq);
- vgic_cpu_irq_clear(vcpu, vlr.irq);
- }
+ spin_lock(&dist->lock);
+ level_pending |= process_queued_irq(vcpu, lr, vlr);
spin_unlock(&dist->lock);
-
- /*
- * Despite being EOIed, the LR may not have
- * been marked as empty.
- */
- vgic_sync_lr_elrsr(vcpu, lr, vlr);
}
}
@@ -1410,40 +1391,40 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
/*
* Save the physical active state, and reset it to inactive.
*
- * Return 1 if HW interrupt went from active to inactive, and 0 otherwise.
+ * Return true if there's a pending forwarded interrupt to queue.
*/
-static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
+static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
struct irq_phys_map *map;
+ bool phys_active;
+ bool level_pending;
int ret;
if (!(vlr.state & LR_HW))
- return 0;
+ return false;
map = vgic_irq_map_search(vcpu, vlr.irq);
- BUG_ON(!map || !map->active);
+ BUG_ON(!map);
ret = irq_get_irqchip_state(map->irq,
IRQCHIP_STATE_ACTIVE,
- &map->active);
+ &phys_active);
WARN_ON(ret);
- if (map->active) {
- ret = irq_set_irqchip_state(map->irq,
- IRQCHIP_STATE_ACTIVE,
- false);
- WARN_ON(ret);
+ if (phys_active)
return 0;
- }
- return 1;
+ spin_lock(&dist->lock);
+ level_pending = process_queued_irq(vcpu, lr, vlr);
+ spin_unlock(&dist->lock);
+ return level_pending;
}
/* Sync back the VGIC state after a guest run */
static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
u64 elrsr;
unsigned long *elrsr_ptr;
@@ -1451,40 +1432,18 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
bool level_pending;
level_pending = vgic_process_maintenance(vcpu);
- elrsr = vgic_get_elrsr(vcpu);
- elrsr_ptr = u64_to_bitmask(&elrsr);
/* Deal with HW interrupts, and clear mappings for empty LRs */
for (lr = 0; lr < vgic->nr_lr; lr++) {
- struct vgic_lr vlr;
-
- if (!test_bit(lr, vgic_cpu->lr_used))
- continue;
-
- vlr = vgic_get_lr(vcpu, lr);
- if (vgic_sync_hwirq(vcpu, vlr)) {
- /*
- * So this is a HW interrupt that the guest
- * EOI-ed. Clean the LR state and allow the
- * interrupt to be sampled again.
- */
- vlr.state = 0;
- vlr.hwirq = 0;
- vgic_set_lr(vcpu, lr, vlr);
- vgic_irq_clear_queued(vcpu, vlr.irq);
- set_bit(lr, elrsr_ptr);
- }
-
- if (!test_bit(lr, elrsr_ptr))
- continue;
-
- clear_bit(lr, vgic_cpu->lr_used);
+ struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
+ level_pending |= vgic_sync_hwirq(vcpu, lr, vlr);
BUG_ON(vlr.irq >= dist->nr_irqs);
- vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
}
/* Check if we still have something up our sleeve... */
+ elrsr = vgic_get_elrsr(vcpu);
+ elrsr_ptr = u64_to_bitmask(&elrsr);
pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
if (level_pending || pending < vgic->nr_lr)
set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
@@ -1574,6 +1533,8 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
int enabled;
bool ret = true, can_inject = true;
+ trace_vgic_update_irq_pending(cpuid, irq_num, level);
+
if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
return -EINVAL;
@@ -1607,8 +1568,12 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
} else {
if (level_triggered) {
vgic_dist_irq_clear_level(vcpu, irq_num);
- if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
+ if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) {
vgic_dist_irq_clear_pending(vcpu, irq_num);
+ vgic_cpu_irq_clear(vcpu, irq_num);
+ if (!compute_pending_for_cpu(vcpu))
+ clear_bit(cpuid, dist->irq_pending_on_cpu);
+ }
}
ret = false;
@@ -1849,30 +1814,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
}
/**
- * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
- *
- * Return the logical active state of a mapped interrupt. This doesn't
- * necessarily reflects the current HW state.
- */
-bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
-{
- BUG_ON(!map);
- return map->active;
-}
-
-/**
- * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
- *
- * Set the logical active state of a mapped interrupt. This doesn't
- * immediately affects the HW state.
- */
-void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
-{
- BUG_ON(!map);
- map->active = active;
-}
-
-/**
* kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
* @vcpu: The VCPU pointer
* @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
@@ -1927,12 +1868,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
kfree(vgic_cpu->pending_shared);
kfree(vgic_cpu->active_shared);
kfree(vgic_cpu->pend_act_shared);
- kfree(vgic_cpu->vgic_irq_lr_map);
vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
vgic_cpu->pending_shared = NULL;
vgic_cpu->active_shared = NULL;
vgic_cpu->pend_act_shared = NULL;
- vgic_cpu->vgic_irq_lr_map = NULL;
}
static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
@@ -1943,18 +1882,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
- vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
if (!vgic_cpu->pending_shared
|| !vgic_cpu->active_shared
- || !vgic_cpu->pend_act_shared
- || !vgic_cpu->vgic_irq_lr_map) {
+ || !vgic_cpu->pend_act_shared) {
kvm_vgic_vcpu_destroy(vcpu);
return -ENOMEM;
}
- memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
-
/*
* Store the number of LRs per vcpu, so we don't have to go
* all the way to the distributor structure to find out. Only
@@ -2096,14 +2031,24 @@ int vgic_init(struct kvm *kvm)
break;
}
- for (i = 0; i < dist->nr_irqs; i++) {
- if (i < VGIC_NR_PPIS)
+ /*
+ * Enable and configure all SGIs to be edge-triggere and
+ * configure all PPIs as level-triggered.
+ */
+ for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
+ if (i < VGIC_NR_SGIS) {
+ /* SGIs */
vgic_bitmap_set_irq_val(&dist->irq_enabled,
vcpu->vcpu_id, i, 1);
- if (i < VGIC_NR_PRIVATE_IRQS)
vgic_bitmap_set_irq_val(&dist->irq_cfg,
vcpu->vcpu_id, i,
VGIC_CFG_EDGE);
+ } else if (i < VGIC_NR_PRIVATE_IRQS) {
+ /* PPIs */
+ vgic_bitmap_set_irq_val(&dist->irq_cfg,
+ vcpu->vcpu_id, i,
+ VGIC_CFG_LEVEL);
+ }
}
vgic_enable(vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a75502c93c3e..484079efea5b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2021,6 +2021,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
} while (single_task_running() && ktime_before(cur, stop));
}
+ kvm_arch_vcpu_blocking(vcpu);
+
for (;;) {
prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
@@ -2034,6 +2036,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
finish_wait(&vcpu->wq, &wait);
cur = ktime_get();
+ kvm_arch_vcpu_unblocking(vcpu);
out:
block_ns = ktime_to_ns(cur) - ktime_to_ns(start);