diff options
author | Dominique Martinet <asmadeus@codewreck.org> | 2015-11-02 18:24:46 +0100 |
---|---|---|
committer | Dominique Martinet <asmadeus@codewreck.org> | 2015-11-02 18:24:46 +0100 |
commit | d1e1fee90e89ae592fc376d49ea013c98db7f2d2 (patch) | |
tree | 115abd8f130eb389878965e16789f503ba1b9961 | |
parent | 3b5d6dfa988c074694bfef0459992318b1e2e005 (diff) |
add IB SRIOV support
-rwxr-xr-x | kvm-wrapper.sh | 114 |
1 files changed, 109 insertions, 5 deletions
diff --git a/kvm-wrapper.sh b/kvm-wrapper.sh index 0d210e3..629cbb9 100755 --- a/kvm-wrapper.sh +++ b/kvm-wrapper.sh @@ -359,23 +359,117 @@ function pci_bind_driver () echo "Unbinding pci device ($PCIDOMAIN [$PCIVENDOR]) and binding to $BIND_DRIVER_NAME" # Add id, unbind, and bind - echo "$PCIVENDOR" > "$PCI_BIND_DRIVER/new_id" - echo "$PCIDOMAIN" > "/sys/bus/pci/devices/$PCIDOMAIN/driver/unbind" - echo "$PCIDOMAIN" > "$PCI_BIND_DRIVER/bind" + echo "$PCIVENDOR" > "$PCI_BIND_DRIVER/new_id" || fail_exit "couldn't add new_id" + [[ -w "/sys/bus/pci/devices/$PCIDOMAIN/driver/unbind" ]] && \ + { echo "$PCIDOMAIN" > "/sys/bus/pci/devices/$PCIDOMAIN/driver/unbind" || fail_exit "couldn't unbind"; } + echo "$PCIDOMAIN" > "$PCI_BIND_DRIVER/bind" || fail_exit "couldn't bind" +} + +function pci_unbind_driver() +{ + local BIND_DRIVER_NAME=$1 + local PCIDOMAIN=$2 + [[ "$(echo "$PCIDOMAIN" | tr -dc ":" | wc -c)" == "1" ]] && PCIDOMAIN="0000:$PCIDOMAIN" + + local PCI_BIND_DRIVER="/sys/bus/pci/drivers/$BIND_DRIVER_NAME" + + echo "$PCIDOMAIN" > $PCI_BIND_DRIVER/unbind } # PCI assign helper (pci-stub) -function pci_stubify () +function pci_stubify() { pci_bind_driver "pci-stub" "$1" } +function pci_unstubify() +{ + pci_unbind_driver "pci-stub" "$1" +} + # PCI vfio helper (vfio-pci) -function pci_vfiofy () +function pci_vfiofy() { pci_bind_driver "vfio-pci" "$1" } +function pci_unvfiofy() +{ + pci_unbind_driver "vfio-pci" "$1" +} + +# helper for vfio +function ib_sriov() +{ + local PKEYS="$1" + local IB_DEV="$2" + + # pick one compatible card at random + + [[ -z "$IB_DEV" ]] && IB_DEV=$(ls -d /sys/class/infiniband/*/device/virtfn0 | sort -r | awk -F/ '{print $5; exit}') + [[ -z "$IB_DEV" ]] && fail_exit "No VFIO compatible IB adapter" + local DEVDIR="/sys/class/infiniband/${IB_DEV}/device" + local PCIDOMAIN=inval + local VIRTFN + + if [[ "$IB_DEV" =~ "mlx5_"* ]]; then + # arbitrary number: create all vfs available + [[ $(cat /sys/class/infiniband/$IB_DEV/device/mlx5_num_vfs) == "0" ]] && \ + cat /sys/class/infiniband/$IB_DEV/device/sriov_totalvfs > /sys/class/infiniband/$IB_DEV/device/mlx5_num_vfs + fi + + + # pick a virtfn - this is totally racy, but sod it. + for VIRTFN in $DEVDIR/virtfn*; do + PCIDOMAIN=$(basename $(readlink $VIRTFN)) + [[ -e /sys/bus/pci/drivers/vfio-pci/$PCIDOMAIN ]] || break + PCIDOMAIN=inval + done + [[ $PCIDOMAIN = inval ]] && fail_exit "no vfio virtfn available for $IB_DEV" + + # hack - register everything to pci-stub first so we can tell appart which are used or not + local PCIVENDOR="$(cat "/sys/bus/pci/devices/$PCIDOMAIN/vendor" |sed 's/^0x//')" + PCIVENDOR+=" $(cat "/sys/bus/pci/devices/$PCIDOMAIN/device" |sed 's/^0x//')" + echo "$PCIVENDOR" > "/sys/bus/pci/drivers/pci-stub/new_id" || fail_exit "couldn't add new_id (pci-stub)" + + # set pkey if able + if [[ -e /sys/class/infiniband/$IB_DEV/iov/$PCIDOMAIN/ports/1/pkey_idx ]]; then + VFIDX=0 + [[ -z "$PKEYS" ]] || for PKEY in "$PKEYS"; do + local PKEY_IDX=$(basename "$(grep -lZw 0x8$PKEY /sys/class/infiniband/$IB_DEV/ports/1/pkeys/*)") + [[ -z "$PKEY_IDX" ]] && fail_exit "pkey $PKEY not found for $IB_DEV" + echo $PKEY_IDX > /sys/class/infiniband/$IB_DEV/iov/$PCIDOMAIN/ports/1/pkey_idx/$VFIDX || fail_exit "couldn't set pkey" + VFIDX=$((VFIDX+1)) + done + PKEY_IDX=$(basename "$(grep -lZw 0x7fff /sys/class/infiniband/$IB_DEV/ports/1/pkeys/*)") + echo $PKEY_IDX > /sys/class/infiniband/$IB_DEV/iov/$PCIDOMAIN/ports/1/pkey_idx/$VFIDX + fi + + # set guid and policy for mlx5 + if [[ "$IB_DEV" =~ "mlx5_"* ]]; then + local GUID=$(sed -e 's/\([a-f0-9]\{2\}\)\([a-f0-9]\{2\}\)/\1:\2/g' < /sys/class/infiniband/$IB_DEV/node_guid) + local VIRTFN_IDX=${VIRTFN#*virtfn} + echo Follow > /sys/class/infiniband/$IB_DEV/device/sriov/$VIRTFN_IDX/policy + echo $GUID > /sys/class/infiniband/$IB_DEV/device/sriov/$VIRTFN_IDX/node + printf "%s:%x\n" "${GUID%:*}" $((0x${GUID##*:}+$VIRTFN_IDX+1)) > /sys/class/infiniband/$IB_DEV/device/sriov/$VIRTFN_IDX/port + fi + + KVM_VFIO_DOMAIN+=("$PCIDOMAIN") +} + +function ib_unsriov() +{ + for PCIDOMAIN in ${KVM_VFIO_DOMAIN}; do + [[ -e /sys/class/infiniband/*/iov/$PCIDOMAIN ]] || continue + grep -lZv none /sys/class/infiniband/*/iov/$PCIDOMAIN/ports/1/pkey_idx/* | \ + tr '\0' '\n' | \ + while read file; do + echo none > $file + done + done +} + + # Change perms. Meant to run forked. function serial_perms_forked() { @@ -569,6 +663,9 @@ function kvm_start_vm () KVM_PCIASSIGN+="-device pci-assign,id=${KVM_PCIASSIGN_ID[$i]:-pciassign${i}},host=${KVM_PCIASSIGN_DOMAIN[$i]} " done + # IB sriov + [[ -n "$KVM_SRIOV_PKEYS$KVM_SRIOV_DEV" ]] && ib_sriov "$KVM_SRIOV_PKEYS" "$KVM_SRIOV_DEV" + # vfio assignement local KVM_VFIO="" for i in ${!KVM_VFIO_DOMAIN[@]}; do @@ -610,6 +707,13 @@ function kvm_start_vm () # If drive is a lv in the main vg, deactivate the lv unprepare_disks + ib_unsriov + for DOMAIN in ${KVM_PCIASSIGN_DOMAIN[@]}; do + pci_unstubify $DOMAIN + done + for DOMAIN in ${KVM_VFIO_DOMAIN[@]}; do + pci_unvfiofy $DOMAIN + done [[ "$KVM_RETURN_VALUE" != "0" ]] && fail_exit "Something went wrong with kvm execution, read above" |