Skip to content

Commit 41e62c6

Browse files
[pcieutil] Add 'pcie-aer' sub-command to display AER stats (sonic-net#1169)
- What I did Add new "pcie-aer" sub-command in pcieutil to display the AER stats. "pcieutil pcie-aer" has four sub-commands - 'all', 'correctable', 'fatal' and 'non-fatal'. 'all' command displays the AER stats for all severities. 'correctable', 'fatal' and 'non-fatal' commands display the AER stats of respective severity. 'device', 'no-zero' options for pcie-aer sub commands ``` root@sonic:/home/admin# pcieutil pcie-aer Usage: pcieutil pcie-aer [OPTIONS] COMMAND [ARGS]... Display PCIe AER status Options: --help Show this message and exit. Commands: all Show all PCIe AER attributes correctable Show PCIe AER correctable attributes fatal Show PCIe AER fatal attributes non-fatal Show PCIe AER non-fatal attributes root@sonic:/home/admin# root@sonic:/home/admin# pcieutil pcie-aer all --help Usage: pcieutil pcie-aer all [OPTIONS] Show all PCIe AER attributes Options: -d, --device <BUS>:<DEV>.<FN> Display stats only for the specified device -nz, --no-zero Display non-zero AER stats --help Show this message and exit. root@sonic:/home/admin# ``` Depends on: sonic-net/sonic-platform-daemons#100 - How I did it Add new functions in pcieutil, to implement sub-commands for retrieving AER stats from STATE_DB and output it in tabular format.
1 parent 47f412b commit 41e62c6

File tree

3 files changed

+470
-0
lines changed

3 files changed

+470
-0
lines changed

pcieutil/main.py

+167
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,15 @@
77

88
try:
99
import os
10+
import re
1011
import sys
12+
from collections import OrderedDict
1113

1214
import click
1315
from sonic_py_common import device_info, logger
16+
from swsssdk import SonicV2Connector
17+
from tabulate import tabulate
18+
import utilities_common.cli as clicommon
1419
except ImportError as e:
1520
raise ImportError("%s - required module not found" % str(e))
1621

@@ -105,6 +110,168 @@ def show():
105110
click.echo("bus:dev.fn %s:%s.%s - dev_id=0x%s, %s" % (Bus, Dev, Fn, Id, Name))
106111

107112

113+
# PCIe AER stats helpers
114+
115+
aer_fields = {
116+
"correctable": ['RxErr', 'BadTLP', 'BadDLLP', 'Rollover', 'Timeout', 'NonFatalErr', 'CorrIntErr', 'HeaderOF', 'TOTAL_ERR_COR'],
117+
"fatal": ['Undefined', 'DLP', 'SDES', 'TLP', 'FCP', 'CmpltTO', 'CmpltAbrt', 'UnxCmplt', 'RxOF', 'MalfTLP', 'ECRC', 'UnsupReq',
118+
'ACSViol', 'UncorrIntErr', 'BlockedTLP', 'AtomicOpBlocked', 'TLPBlockedErr', 'TOTAL_ERR_FATAL'],
119+
"non_fatal": ['Undefined', 'DLP', 'SDES', 'TLP', 'FCP', 'CmpltTO', 'CmpltAbrt', 'UnxCmplt', 'RxOF', 'MalfTLP', 'ECRC', 'UnsupReq',
120+
'ACSViol', 'UncorrIntErr', 'BlockedTLP', 'AtomicOpBlocked', 'TLPBlockedErr', 'TOTAL_ERR_NONFATAL']
121+
}
122+
123+
124+
class PcieDevice(click.ParamType):
125+
name = "<Bus>:<Dev>.<Fn>"
126+
127+
def convert(self, value, param, ctx):
128+
match = re.match(r'([0-9A-Fa-f]{1,2}):([0-9A-Fa-f]{1,2})\.([0-9A-Fa-f])', value)
129+
130+
if not match:
131+
self.fail('{} is not in <Bus>:<Dev>.<Fn> format'.format(value), param, ctx)
132+
133+
Bus, Dev, Fn = [int(val, 16) for val in match.groups()]
134+
if Bus > 255:
135+
self.fail('Invalid Bus number', param, ctx)
136+
137+
if Dev > 31:
138+
self.fail('Invalid Dev number', param, ctx)
139+
140+
if Fn > 7:
141+
self.fail('Invalid Fn number', param, ctx)
142+
143+
return "%02x:%02x.%d" % (Bus, Dev, Fn)
144+
145+
146+
_pcie_aer_click_options = [
147+
click.Option(['-d', '--device', 'device_key'],
148+
type=PcieDevice(),
149+
help="Display stats only for the specified device"),
150+
click.Option(['-v', '--verbose'],
151+
is_flag=True,
152+
help="Display all stats")
153+
]
154+
155+
156+
class PcieAerCommand(click.Command):
157+
'''This subclass of click.Command provides common options, help
158+
and short help text for PCIe AER commands'''
159+
160+
def __init__(self, *args, **kwargs):
161+
super(PcieAerCommand, self).__init__(*args, **kwargs)
162+
self.params = _pcie_aer_click_options
163+
164+
def format_help_text(self, ctx, formatter):
165+
formatter.write_paragraph()
166+
with formatter.indentation():
167+
formatter.write_text("Show {} PCIe AER attributes".format(self.name.replace("_", "-")))
168+
formatter.write_text("(Default: Display only non-zero attributes)")
169+
170+
def get_short_help_str(self, limit):
171+
return "Show {} PCIe AER attributes".format(self.name.replace("_", "-"))
172+
173+
174+
def pcie_aer_display(ctx, severity):
175+
device_key = ctx.params['device_key']
176+
no_zero = not ctx.params['verbose']
177+
header = ["AER - " + severity.upper().replace("_", "")]
178+
fields = aer_fields[severity]
179+
pcie_dev_list = list()
180+
dev_found = False
181+
182+
statedb = SonicV2Connector()
183+
statedb.connect(statedb.STATE_DB)
184+
185+
table = OrderedDict()
186+
for field in fields:
187+
table[field] = [field]
188+
189+
if device_key:
190+
pcie_dev_list = ["PCIE_DEVICE|%s" % device_key]
191+
else:
192+
keys = statedb.keys(statedb.STATE_DB, "PCIE_DEVICE|*")
193+
if keys:
194+
pcie_dev_list = sorted(keys)
195+
196+
for pcie_dev_key in pcie_dev_list:
197+
aer_attribute = statedb.get_all(statedb.STATE_DB, pcie_dev_key)
198+
if not aer_attribute:
199+
continue
200+
201+
if device_key:
202+
dev_found = True
203+
204+
if no_zero and all(val == '0' for key, val in aer_attribute.items() if key.startswith(severity)):
205+
continue
206+
207+
pcie_dev = pcie_dev_key.split("|")[1]
208+
Id = aer_attribute['id']
209+
210+
# Tabulate Header
211+
device_name = "%s\n%s" % (pcie_dev, Id)
212+
header.append(device_name)
213+
214+
# Tabulate Row
215+
for field in fields:
216+
key = severity + "|" + field
217+
table[field].append(aer_attribute.get(key, 'NA'))
218+
219+
if device_key and not dev_found:
220+
ctx.exit("Device not found in DB")
221+
222+
# Strip fields with no non-zero value
223+
if no_zero:
224+
for field in fields:
225+
if all(val == '0' for val in table[field][1:]):
226+
del table[field]
227+
228+
if not (no_zero and (len(header) == 1)):
229+
if ctx.obj:
230+
click.echo("")
231+
232+
click.echo(tabulate(list(table.values()), header, tablefmt="grid"))
233+
ctx.obj = True
234+
235+
236+
# Show PCIe AER status
237+
@cli.group(cls=clicommon.AliasedGroup)
238+
@click.pass_context
239+
def pcie_aer(ctx):
240+
'''Display PCIe AER status'''
241+
# Set True to insert a line between severities in 'all' context
242+
ctx.obj = False
243+
244+
245+
@pcie_aer.command(cls=PcieAerCommand)
246+
@click.pass_context
247+
def correctable(ctx, device_key, verbose):
248+
'''Show correctable PCIe AER attributes'''
249+
pcie_aer_display(ctx, "correctable")
250+
251+
252+
@pcie_aer.command(cls=PcieAerCommand)
253+
@click.pass_context
254+
def fatal(ctx, device_key, verbose):
255+
'''Show fatal PCIe AER attributes'''
256+
pcie_aer_display(ctx, "fatal")
257+
258+
259+
@pcie_aer.command(cls=PcieAerCommand)
260+
@click.pass_context
261+
def non_fatal(ctx, device_key, verbose):
262+
'''Show non-fatal PCIe AER attributes'''
263+
pcie_aer_display(ctx, "non_fatal")
264+
265+
266+
@pcie_aer.command(name='all', cls=PcieAerCommand)
267+
@click.pass_context
268+
def all_errors(ctx, device_key, verbose):
269+
'''Show all PCIe AER attributes'''
270+
pcie_aer_display(ctx, "correctable")
271+
pcie_aer_display(ctx, "fatal")
272+
pcie_aer_display(ctx, "non_fatal")
273+
274+
108275
# Show PCIE Vender ID and Device ID
109276
@cli.command()
110277
def check():

tests/mock_tables/state_db.json

+98
Original file line numberDiff line numberDiff line change
@@ -455,5 +455,103 @@
455455
"CHASSIS_MIDPLANE_TABLE|LINE-CARD1": {
456456
"ip_address": "192.168.1.2",
457457
"access": "False"
458+
},
459+
"PCIE_DEVICE|00:01.0": {
460+
"correctable|BadDLLP": "0",
461+
"correctable|BadTLP": "0",
462+
"correctable|BadTLP": "1",
463+
"correctable|CorrIntErr": "0",
464+
"correctable|HeaderOF": "0",
465+
"correctable|NonFatalErr": "0",
466+
"correctable|Rollover": "0",
467+
"correctable|RxErr": "0",
468+
"correctable|TOTAL_ERR_COR": "0",
469+
"correctable|TOTAL_ERR_COR": "1",
470+
"correctable|Timeout": "0",
471+
"fatal|ACSViol": "0",
472+
"fatal|AtomicOpBlocked": "0",
473+
"fatal|BlockedTLP": "0",
474+
"fatal|CmpltAbrt": "0",
475+
"fatal|CmpltTO": "0",
476+
"fatal|DLP": "0",
477+
"fatal|ECRC": "0",
478+
"fatal|FCP": "0",
479+
"fatal|MalfTLP": "0",
480+
"fatal|RxOF": "0",
481+
"fatal|SDES": "0",
482+
"fatal|TLP": "0",
483+
"fatal|TLPBlockedErr": "0",
484+
"fatal|TOTAL_ERR_FATAL": "0",
485+
"fatal|UncorrIntErr": "0",
486+
"fatal|Undefined": "0",
487+
"fatal|UnsupReq": "0",
488+
"fatal|UnxCmplt": "0",
489+
"id": "0x0001",
490+
"non_fatal|ACSViol": "0",
491+
"non_fatal|AtomicOpBlocked": "0",
492+
"non_fatal|BlockedTLP": "0",
493+
"non_fatal|CmpltAbrt": "0",
494+
"non_fatal|CmpltTO": "0",
495+
"non_fatal|DLP": "0",
496+
"non_fatal|ECRC": "0",
497+
"non_fatal|FCP": "0",
498+
"non_fatal|MalfTLP": "1",
499+
"non_fatal|RxOF": "0",
500+
"non_fatal|SDES": "0",
501+
"non_fatal|TLP": "0",
502+
"non_fatal|TLPBlockedErr": "0",
503+
"non_fatal|TOTAL_ERR_NONFATAL": "1",
504+
"non_fatal|UncorrIntErr": "0",
505+
"non_fatal|Undefined": "0",
506+
"non_fatal|UnsupReq": "0",
507+
"non_fatal|UnxCmplt": "0"
508+
},
509+
"PCIE_DEVICE|01:00.0": {
510+
"correctable|BadDLLP": "0",
511+
"correctable|BadTLP": "0",
512+
"correctable|CorrIntErr": "0",
513+
"correctable|HeaderOF": "0",
514+
"correctable|NonFatalErr": "0",
515+
"correctable|Rollover": "0",
516+
"correctable|RxErr": "1",
517+
"correctable|TOTAL_ERR_COR": "1",
518+
"correctable|Timeout": "0",
519+
"fatal|ACSViol": "0",
520+
"fatal|AtomicOpBlocked": "0",
521+
"fatal|BlockedTLP": "0",
522+
"fatal|CmpltAbrt": "0",
523+
"fatal|CmpltTO": "0",
524+
"fatal|DLP": "0",
525+
"fatal|ECRC": "0",
526+
"fatal|FCP": "0",
527+
"fatal|MalfTLP": "0",
528+
"fatal|RxOF": "0",
529+
"fatal|SDES": "0",
530+
"fatal|TLP": "0",
531+
"fatal|TLPBlockedErr": "0",
532+
"fatal|TOTAL_ERR_FATAL": "0",
533+
"fatal|UncorrIntErr": "0",
534+
"fatal|Undefined": "0",
535+
"fatal|UnsupReq": "0",
536+
"fatal|UnxCmplt": "0",
537+
"id": "0x0002",
538+
"non_fatal|ACSViol": "0",
539+
"non_fatal|AtomicOpBlocked": "0",
540+
"non_fatal|BlockedTLP": "0",
541+
"non_fatal|CmpltAbrt": "0",
542+
"non_fatal|CmpltTO": "0",
543+
"non_fatal|DLP": "0",
544+
"non_fatal|ECRC": "0",
545+
"non_fatal|FCP": "0",
546+
"non_fatal|MalfTLP": "0",
547+
"non_fatal|RxOF": "0",
548+
"non_fatal|SDES": "0",
549+
"non_fatal|TLP": "0",
550+
"non_fatal|TLPBlockedErr": "0",
551+
"non_fatal|TOTAL_ERR_NONFATAL": "0",
552+
"non_fatal|UncorrIntErr": "0",
553+
"non_fatal|Undefined": "0",
554+
"non_fatal|UnsupReq": "0",
555+
"non_fatal|UnxCmplt": "0"
458556
}
459557
}

0 commit comments

Comments
 (0)