IB网卡流量带宽监控

Updated on with 0 views and 0 comments

全部指标参考值来自如下命令:

root@ucloud-wlcb-gpu-061:/etc/zabbix/externalscripts# /usr/bin/mlnx_perf -i ibs31 -t 10 -c 1 
Initializingmlnx_perf...
Samplingstarted.
rx_vport_rdma_unicast_packets: 6,478,784.59
   rx_vport_rdma_unicast_bytes: 24,807,016,550Bps   = 198,456.13Mbps   
tx_vport_rdma_unicast_packets: 6,459,893.9
   tx_vport_rdma_unicast_bytes: 24,692,785,131.59Bps = 197,542.28Mbps   
         rx_corrected_bits_phy: 106
             rx_err_lane_0_phy: 22.50
             rx_err_lane_1_phy: 24.89
             rx_err_lane_2_phy: 46.39
             rx_err_lane_3_phy: 12.19
--------

root@ucloud-wlcb-gpu-061:/etc/zabbix/externalscripts# /usr/bin/mlnx_perf --help
Usage: mlnx_perf-i<interface> [options]

Options:
  --version             showprogram's version number and exit
  -h, --help            showthishelpmessageandexit
  -iINTF, --interface=INTF
                        Interfacename
  -tINTERVAL, --interval=INTERVAL
                        Intervalbetweenmeasurementsinseconds
  -cCOUNT, --count=COUNT
                        Exitcounter-exitaftercountingnumberofintervals
                        ( defaultis-1: donotexit)

脚本内容:

#!/usr/bin/python3
#2023年11月17日 -- Autor : cuijianzhe
import subprocess
import json
import sys
def IB_network_discovery():
    CMD = "ifconfig -a | grep -o 'ib\w*'"
    Result_str = subprocess.getoutput(CMD).split()
    ib_list = []
    for key in Result_str:
        ib_list += [{'{#IBNAME}':key}]
    print(json.dumps({'data':ib_list},sort_keys=True,indent=4,separators=(',',':')))


def net_traffic_List(ib_name):
    CMD = ''' /usr/bin/mlnx_perf -i %s -t 10 -c 1 | grep vport_rdma_unicast_bytes | awk -F "=" '{print $2}' | awk '{print $1}' '''%ib_name
    Result_str = subprocess.getoutput(CMD).replace(",","")
    if Result_str == "":
        output = str("0\n0")
        with open('/etc/zabbix/externalscripts/{}'.format(ib_name), 'w') as f:
            f.write(output)
    else:
        with open('/etc/zabbix/externalscripts/{}'.format(ib_name),'w') as f:
            f.write(Result_str)

if __name__ == '__main__':
    # Check if at least one argument is provided
    if len(sys.argv) >= 2:
        describe = sys.argv[1]
        if describe == 'IB_network_discovery':
            IB_network_discovery()
        elif describe == 'net_traffic_total':
            if len(sys.argv) >= 3:  # Check if second argument is provided
                ib_name = sys.argv[2]
                print(net_traffic_List(ib_name))
            else:
                print("Error: 'net_traffic_total' requires an IB name as the second argument")
        else:
            print(f"Invalid option '{describe}'")
    else:
        print('Error: At least one argument is required')

zabbix 配置文件:

UserParameter=ib_network.discovery,/usr/bin/python3 /etc/zabbix/externalscripts/ib_network_discovery.py IB_network_discovery
UserParameter=ib_network.get[*],/usr/bin/python3 /etc/zabbix/externalscripts/ib_network_discovery.py  net_traffic_total $1
UserParameter=ib_network.rx[*],cat /etc/zabbix/externalscripts/$1 | head -n 1
UserParameter=ib_network.tx[*],cat /etc/zabbix/externalscripts/$1 | tail -n 1

添加自动发现模板:

image20231118115159dswdt25.png

添加监控项原型

image20231118115231r5blqte.png

获取数值监控项,此值没有值,只是希望可以获取到后面两项监控项

image20231118115250805u1by.png

rx 和 tx 值

image202311181153386h3x1go.png

最后看图形

image20231118115411v4odd8t.png

shell 脚本自动发现 IB 网卡名称

#!/bin/bash
IBS=(`ifconfig -a | grep -o 'ib\w*'`)
LENGTH=${#IBS[*]}

printf "{\n"
printf  '\t'"\"data\":["
for ((i=0;i<$LENGTH;i++))
do
        printf '\n\t\t{'
        printf "\"{#IB_NAME}\":\"${IBS[$i]}\"}"
        if [ $i -lt $[$LENGTH-1] ];then
                printf ','
        fi
done
printf  "\n\t]\n"
printf "}\n"

标题:IB网卡流量带宽监控
作者:cuijianzhe
地址:https://cuijianzhe.github.io/articles/2023/11/18/1700290073339.html