summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDominique Martinet <asmadeus@codewreck.org>2015-11-02 18:24:46 +0100
committerDominique Martinet <asmadeus@codewreck.org>2015-11-02 18:24:46 +0100
commitd1e1fee90e89ae592fc376d49ea013c98db7f2d2 (patch)
tree115abd8f130eb389878965e16789f503ba1b9961
parent3b5d6dfa988c074694bfef0459992318b1e2e005 (diff)
add IB SRIOV support
-rwxr-xr-xkvm-wrapper.sh114
1 files changed, 109 insertions, 5 deletions
diff --git a/kvm-wrapper.sh b/kvm-wrapper.sh
index 0d210e3..629cbb9 100755
--- a/kvm-wrapper.sh
+++ b/kvm-wrapper.sh
@@ -359,23 +359,117 @@ function pci_bind_driver ()
echo "Unbinding pci device ($PCIDOMAIN [$PCIVENDOR]) and binding to $BIND_DRIVER_NAME"
# Add id, unbind, and bind
- echo "$PCIVENDOR" > "$PCI_BIND_DRIVER/new_id"
- echo "$PCIDOMAIN" > "/sys/bus/pci/devices/$PCIDOMAIN/driver/unbind"
- echo "$PCIDOMAIN" > "$PCI_BIND_DRIVER/bind"
+ echo "$PCIVENDOR" > "$PCI_BIND_DRIVER/new_id" || fail_exit "couldn't add new_id"
+ [[ -w "/sys/bus/pci/devices/$PCIDOMAIN/driver/unbind" ]] && \
+ { echo "$PCIDOMAIN" > "/sys/bus/pci/devices/$PCIDOMAIN/driver/unbind" || fail_exit "couldn't unbind"; }
+ echo "$PCIDOMAIN" > "$PCI_BIND_DRIVER/bind" || fail_exit "couldn't bind"
+}
+
+function pci_unbind_driver()
+{
+ local BIND_DRIVER_NAME=$1
+ local PCIDOMAIN=$2
+ [[ "$(echo "$PCIDOMAIN" | tr -dc ":" | wc -c)" == "1" ]] && PCIDOMAIN="0000:$PCIDOMAIN"
+
+ local PCI_BIND_DRIVER="/sys/bus/pci/drivers/$BIND_DRIVER_NAME"
+
+ echo "$PCIDOMAIN" > $PCI_BIND_DRIVER/unbind
}
# PCI assign helper (pci-stub)
-function pci_stubify ()
+function pci_stubify()
{
pci_bind_driver "pci-stub" "$1"
}
+function pci_unstubify()
+{
+ pci_unbind_driver "pci-stub" "$1"
+}
+
# PCI vfio helper (vfio-pci)
-function pci_vfiofy ()
+function pci_vfiofy()
{
pci_bind_driver "vfio-pci" "$1"
}
+function pci_unvfiofy()
+{
+ pci_unbind_driver "vfio-pci" "$1"
+}
+
+# helper for vfio
+function ib_sriov()
+{
+ local PKEYS="$1"
+ local IB_DEV="$2"
+
+ # pick one compatible card at random
+
+ [[ -z "$IB_DEV" ]] && IB_DEV=$(ls -d /sys/class/infiniband/*/device/virtfn0 | sort -r | awk -F/ '{print $5; exit}')
+ [[ -z "$IB_DEV" ]] && fail_exit "No VFIO compatible IB adapter"
+ local DEVDIR="/sys/class/infiniband/${IB_DEV}/device"
+ local PCIDOMAIN=inval
+ local VIRTFN
+
+ if [[ "$IB_DEV" =~ "mlx5_"* ]]; then
+ # arbitrary number: create all vfs available
+ [[ $(cat /sys/class/infiniband/$IB_DEV/device/mlx5_num_vfs) == "0" ]] && \
+ cat /sys/class/infiniband/$IB_DEV/device/sriov_totalvfs > /sys/class/infiniband/$IB_DEV/device/mlx5_num_vfs
+ fi
+
+
+ # pick a virtfn - this is totally racy, but sod it.
+ for VIRTFN in $DEVDIR/virtfn*; do
+ PCIDOMAIN=$(basename $(readlink $VIRTFN))
+ [[ -e /sys/bus/pci/drivers/vfio-pci/$PCIDOMAIN ]] || break
+ PCIDOMAIN=inval
+ done
+ [[ $PCIDOMAIN = inval ]] && fail_exit "no vfio virtfn available for $IB_DEV"
+
+ # hack - register everything to pci-stub first so we can tell appart which are used or not
+ local PCIVENDOR="$(cat "/sys/bus/pci/devices/$PCIDOMAIN/vendor" |sed 's/^0x//')"
+ PCIVENDOR+=" $(cat "/sys/bus/pci/devices/$PCIDOMAIN/device" |sed 's/^0x//')"
+ echo "$PCIVENDOR" > "/sys/bus/pci/drivers/pci-stub/new_id" || fail_exit "couldn't add new_id (pci-stub)"
+
+ # set pkey if able
+ if [[ -e /sys/class/infiniband/$IB_DEV/iov/$PCIDOMAIN/ports/1/pkey_idx ]]; then
+ VFIDX=0
+ [[ -z "$PKEYS" ]] || for PKEY in "$PKEYS"; do
+ local PKEY_IDX=$(basename "$(grep -lZw 0x8$PKEY /sys/class/infiniband/$IB_DEV/ports/1/pkeys/*)")
+ [[ -z "$PKEY_IDX" ]] && fail_exit "pkey $PKEY not found for $IB_DEV"
+ echo $PKEY_IDX > /sys/class/infiniband/$IB_DEV/iov/$PCIDOMAIN/ports/1/pkey_idx/$VFIDX || fail_exit "couldn't set pkey"
+ VFIDX=$((VFIDX+1))
+ done
+ PKEY_IDX=$(basename "$(grep -lZw 0x7fff /sys/class/infiniband/$IB_DEV/ports/1/pkeys/*)")
+ echo $PKEY_IDX > /sys/class/infiniband/$IB_DEV/iov/$PCIDOMAIN/ports/1/pkey_idx/$VFIDX
+ fi
+
+ # set guid and policy for mlx5
+ if [[ "$IB_DEV" =~ "mlx5_"* ]]; then
+ local GUID=$(sed -e 's/\([a-f0-9]\{2\}\)\([a-f0-9]\{2\}\)/\1:\2/g' < /sys/class/infiniband/$IB_DEV/node_guid)
+ local VIRTFN_IDX=${VIRTFN#*virtfn}
+ echo Follow > /sys/class/infiniband/$IB_DEV/device/sriov/$VIRTFN_IDX/policy
+ echo $GUID > /sys/class/infiniband/$IB_DEV/device/sriov/$VIRTFN_IDX/node
+ printf "%s:%x\n" "${GUID%:*}" $((0x${GUID##*:}+$VIRTFN_IDX+1)) > /sys/class/infiniband/$IB_DEV/device/sriov/$VIRTFN_IDX/port
+ fi
+
+ KVM_VFIO_DOMAIN+=("$PCIDOMAIN")
+}
+
+function ib_unsriov()
+{
+ for PCIDOMAIN in ${KVM_VFIO_DOMAIN}; do
+ [[ -e /sys/class/infiniband/*/iov/$PCIDOMAIN ]] || continue
+ grep -lZv none /sys/class/infiniband/*/iov/$PCIDOMAIN/ports/1/pkey_idx/* | \
+ tr '\0' '\n' | \
+ while read file; do
+ echo none > $file
+ done
+ done
+}
+
+
# Change perms. Meant to run forked.
function serial_perms_forked()
{
@@ -569,6 +663,9 @@ function kvm_start_vm ()
KVM_PCIASSIGN+="-device pci-assign,id=${KVM_PCIASSIGN_ID[$i]:-pciassign${i}},host=${KVM_PCIASSIGN_DOMAIN[$i]} "
done
+ # IB sriov
+ [[ -n "$KVM_SRIOV_PKEYS$KVM_SRIOV_DEV" ]] && ib_sriov "$KVM_SRIOV_PKEYS" "$KVM_SRIOV_DEV"
+
# vfio assignement
local KVM_VFIO=""
for i in ${!KVM_VFIO_DOMAIN[@]}; do
@@ -610,6 +707,13 @@ function kvm_start_vm ()
# If drive is a lv in the main vg, deactivate the lv
unprepare_disks
+ ib_unsriov
+ for DOMAIN in ${KVM_PCIASSIGN_DOMAIN[@]}; do
+ pci_unstubify $DOMAIN
+ done
+ for DOMAIN in ${KVM_VFIO_DOMAIN[@]}; do
+ pci_unvfiofy $DOMAIN
+ done
[[ "$KVM_RETURN_VALUE" != "0" ]] && fail_exit "Something went wrong with kvm execution, read above"