From b5a6b8ec6c5a0a16f8edeabc4464b6e0031f49df Mon Sep 17 00:00:00 2001 From: Hareesh Puthalath Date: Tue, 28 May 2024 15:48:00 +0200 Subject: [PATCH] OVS-DOCA configuration instructions Signed-off-by: Hareesh Puthalath --- docs/features/hardware-offload/ovs-doca.md | 220 +++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 docs/features/hardware-offload/ovs-doca.md diff --git a/docs/features/hardware-offload/ovs-doca.md b/docs/features/hardware-offload/ovs-doca.md new file mode 100644 index 00000000000..c17190fcd82 --- /dev/null +++ b/docs/features/hardware-offload/ovs-doca.md @@ -0,0 +1,220 @@ +# OVS-DOCA + +## Introduction + +OVS supports Hardware Offload features which allows to offload OVS data-plane to the NIC while maintaining OVS control-plane unmodified. It is using SR-IOV technology with VF representor host net-device. + +DOCA-OVS extends traditional OVS-DPDK and OVS-Kernel data-path offload interfaces (DPIF), adds OVS-DOCA as an additional DPIF implementation. It preserves the same interfaces as OVS-DPDK and OVS-Kernel while utilizing the DOCA Flow library. OVS-DOCA uses unique hardware offload mechanisms and application techniques to maximize performance and adds other features. + +## Motivation: Why use ovs-doca instead of ovs-dpdk + +OVS-DOCA is also a userland based virtual switch application like OVS-DPDK. Like OVS-DPDK it uses DPDK and PMD (Poll Mode Driver). But the main difference is that OVS-DOCA uses the DOCA-flow api instead of the rte_flow used by DPDK. This allows it to use hardware steering for offloads instead of software steering. + +## Supported Controllers + +- Nvidia ConnectX-6DX NIC +- Nvidia ConnectX-7 NIC +- Nvidia BlueField 2/3 DPU + +## Installing OVS-DOCA +OVS-DOCA is part of the DOCA-Host package. DOCA-Host is available in different installation profiles, each of which provides subset of the full DOCA installation. For the purposes of ovn-kubernetes we need the DOCA packaged version of OVS which is available in the `doca-networking` profile. This includes +- MLNX_OFED drivers and tools +- DOCA Core +- MLNX-DPDK +- OVS-DOCA +- DOCA Flow +- DOCA IPsec + +Read more details including supported OS and kernel versions at: [DOCA-Profiles](https://docs.nvidia.com/doca/sdk/nvidia+doca+profiles/index.html) + +Installation packages for various distributions are found [here](https://docs.nvidia.com/doca/sdk/nvidia+doca+installation+guide+for+linux/index.html#src-2654401500_id-.NVIDIADOCAInstallationGuideforLinuxv2.7.0-BlueFieldNetworkingPlatformImageInstallation) + +Installation instructions on the Host are found [here](https://docs.nvidia.com/doca/sdk/nvidia+doca+installation+guide+for+linux/index.html#src-2654401500_id-.NVIDIADOCAInstallationGuideforLinuxv2.7.0-InstallingSoftwareonHost) + +## Worker Node Configuation + + +### Configure Huge Pages +``` +mkdir -p /hugepages +mount -t hugetlbfs hugetlbfs /hugepages +echo 4096 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages +``` +Note: Persistentance of huge pages (node reboot etc.) is beyond the scope of this document. + +### Switchdev mode + +- Unbind all VFs, turn on switchdev and bind all VFs back. +``` +echo 0000:04:00.2 > /sys/bus/pci/drivers/mlx5_core/unbind +echo 0000:04:00.3 > /sys/bus/pci/drivers/mlx5_core/unbind + +echo switchdev > /sys/class/net/enp4s0f0/compat/devlink/mode + + +echo 0000:04:00.2 > /sys/bus/pci/drivers/mlx5_core/bind +echo 0000:04:00.3 > /sys/bus/pci/drivers/mlx5_core/bind +``` + +### Enable DOCA and hardware offloads + +``` +ovs-vsctl --no-wait set Open_vSwitch . other_config:doca-init=true +ovs-vsctl set Open_vSwitch . other_config:hw-offload=true +``` + +### Restart OVS +``` +systemctl restart openvswitch +``` + +### Usage + +- Create OVS-DOCA bridge of type `netdev`: + +``` +ovs-vsctl --no-wait add-br br-ex -- set bridge br-ex datapath_type=netdev +``` + +- Adding interfaces to OVS: use type dpdk + +``` +# PF +ovs-vsctl add-port br0-ovs enp4s0f0 -- set Interface enp4s0f0 type=dpdk +# Representor +ovs-vsctl add-port br0-ovs enp4s0f0_0 -- set Interface enp4s0f0_0 type=dpdk +``` + +### OVN Integration Bridge + +Following configuration will ensure that OVN integration bridge (br-int) will be netdev data type. + +``` +ovs-vsctl set open . external-ids:ovn-bridge-datapath-type=netdev +``` + +## Worker Node SR-IOV network device plugin configuration + +This configuration data for the SRIOV device plugin. + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: sriovdp-config + namespace: kube-system +data: + config.json: | + { + "resourceList": [ + { + "resourceName": "p0_vfs", + "resourcePrefix": "nvidia.com", + "selectors": { + "vendors": ["15b3"], + "devices": ["1014", "1016", "1018", "101a", "101c", "101e"], + "pfNames": ["p0#1-3"], + "isRdma": true + } + } + ] + } +``` + +Note: Adjust the values on the field pfNames to your setup. Replace p0 with your PF Name (eg: enp4s0f0) and use the selector to include the range of VFs to be used for kubernetes pods. VF0 is usually reserved for management port. You can override on a per node basis with a config file in `/etc/pcidp/config.json`. + +Deploy SR-IOV network device plugin as daemonset see https://github.com/k8snetworkplumbingwg/sriov-network-device-plugin + +## Multus CNI configuration + + +Deploy multus CNI as daemonset based on https://github.com/k8snetworkplumbingwg/multus-cni/blob/master/deployments/multus-daemonset.yml + +## NetworkAttachementDefinition +Create NetworkAttachementDefinition CRD with OVN CNI config + +```yaml +apiVersion: "k8s.cni.cncf.io/v1" +kind: NetworkAttachmentDefinition +metadata: + name: ovn-primary + namespace: default + annotations: + k8s.v1.cni.cncf.io/resourceName: nvidia.com/p0_vfs +spec: + config: '{ + "cniVersion" : "0.4.0", + "name" : "ovn-primary", + "type" : "ovn-k8s-cni-overlay", + "logFile": "/var/log/ovn-kubernetes/ovn-k8s-cni-overlay.log", + "logLevel": "5", + "logfile-maxsize": 100, + "logfile-maxbackups": 5, + "logfile-maxage": 5 + }' + +``` + +## Deploy POD with OVS hardware-offload + +Create POD spec and + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: netshoot-deploy +spec: + replicas: 3 + selector: + matchLabels: + app: netshoot + template: + metadata: + annotations: + v1.multus-cni.io/default-network: default/ovn-primary # <== Network + name: netshoot + labels: + app: netshoot + spec: + containers: + - name: netshoot + command: + - /bin/sh + - -c + - 'trap : TERM INT; sleep infinity & wait' + image: "nicolaka/netshoot:v0.12" + securityContext: + capabilities: + add: ["NET_ADMIN"] + resources: + requests: + nvidia.com/p0_vfs: '1' # ⇐ Notice the resource used here + limits: + nvidia.com/p0_vfs: '1' + +``` + +## How to enable this feature on an OVN-Kubernetes cluster? + +No special configuration or knobs need to be set on the ovn-kubernetes side. There are no user facing API changes or changes in OVN constructs or OVS Flows as such. + +Add the following OVS configuration to ensure that OVN integration bridge (br-int) will be netdev data type. + +``` +ovs-vsctl set open . external-ids:ovn-bridge-datapath-type=netdev +``` + +OVN-kubernetes will detect the datapath type and set interface configurations as needed. + +Also, the external bridge may be also set to type `netdev` + +# References + +* [DOCA-Profiles](https://docs.nvidia.com/doca/sdk/nvidia+doca+profiles/index.html) + +* [Installation packages for various distributions](https://docs.nvidia.com/doca/sdk/nvidia+doca+installation+guide+for+linux/index.html#src-2654401500_id-.NVIDIADOCAInstallationGuideforLinuxv2.7.0-BlueFieldNetworkingPlatformImageInstallation) + +* [Installation instructions on the Host](https://docs.nvidia.com/doca/sdk/nvidia+doca+installation+guide+for+linux/index.html#src-2654401500_id-.NVIDIADOCAInstallationGuideforLinuxv2.7.0-InstallingSoftwareonHost) + +