Skip to content

Commit 0d69e13

Browse files
author
Fox Snowpatch
committed
1 parent 77dff0c commit 0d69e13

File tree

2 files changed

+67
-1
lines changed

2 files changed

+67
-1
lines changed

drivers/pci/pci.h

+1
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,7 @@ struct aer_err_info {
412412

413413
unsigned int status; /* COR/UNCOR Error Status */
414414
unsigned int mask; /* COR/UNCOR Error Mask */
415+
unsigned int anfe_status; /* UNCOR Error Status for ANFE */
415416
struct pcie_tlp_log tlp; /* TLP Header */
416417
};
417418

drivers/pci/pcie/aer.c

+66-1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ struct aer_stats {
107107
PCI_ERR_ROOT_MULTI_COR_RCV | \
108108
PCI_ERR_ROOT_MULTI_UNCOR_RCV)
109109

110+
#define AER_ERR_ANFE_UNC_MASK (PCI_ERR_UNC_POISON_TLP | \
111+
PCI_ERR_UNC_COMP_TIME | \
112+
PCI_ERR_UNC_COMP_ABORT | \
113+
PCI_ERR_UNC_UNX_COMP | \
114+
PCI_ERR_UNC_UNSUP)
115+
110116
static int pcie_aer_disable;
111117
static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
112118

@@ -675,6 +681,7 @@ static void __aer_print_error(struct pci_dev *dev,
675681
{
676682
const char **strings;
677683
unsigned long status = info->status & ~info->mask;
684+
unsigned long anfe_status = info->anfe_status;
678685
const char *level, *errmsg;
679686
int i;
680687

@@ -695,6 +702,20 @@ static void __aer_print_error(struct pci_dev *dev,
695702
info->first_error == i ? " (First)" : "");
696703
}
697704
pci_dev_aer_stats_incr(dev, info);
705+
706+
if (!anfe_status)
707+
return;
708+
709+
strings = aer_uncorrectable_error_string;
710+
pci_printk(level, dev, "Uncorrectable errors that may cause Advisory Non-Fatal:\n");
711+
712+
for_each_set_bit(i, &anfe_status, 32) {
713+
errmsg = strings[i];
714+
if (!errmsg)
715+
errmsg = "Unknown Error Bit";
716+
717+
pci_printk(level, dev, " [%2d] %s\n", i, errmsg);
718+
}
698719
}
699720

700721
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
@@ -1094,9 +1115,14 @@ static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info)
10941115
* Correctable error does not need software intervention.
10951116
* No need to go through error recovery process.
10961117
*/
1097-
if (aer)
1118+
if (aer) {
10981119
pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS,
10991120
info->status);
1121+
if (info->anfe_status)
1122+
pci_write_config_dword(dev,
1123+
aer + PCI_ERR_UNCOR_STATUS,
1124+
info->anfe_status);
1125+
}
11001126
if (pcie_aer_is_native(dev)) {
11011127
struct pci_driver *pdrv = dev->driver;
11021128

@@ -1196,6 +1222,41 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
11961222
EXPORT_SYMBOL_GPL(aer_recover_queue);
11971223
#endif
11981224

1225+
static void anfe_get_uc_status(struct pci_dev *dev, struct aer_err_info *info)
1226+
{
1227+
u32 uncor_mask, uncor_status;
1228+
u16 device_status;
1229+
int aer = dev->aer_cap;
1230+
1231+
if (pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &device_status))
1232+
return;
1233+
/*
1234+
* Take the most conservative route here. If there are
1235+
* Non-Fatal/Fatal errors detected, do not assume any
1236+
* bit in uncor_status is set by ANFE.
1237+
*/
1238+
if (device_status & (PCI_EXP_DEVSTA_NFED | PCI_EXP_DEVSTA_FED))
1239+
return;
1240+
1241+
pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &uncor_status);
1242+
pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &uncor_mask);
1243+
/*
1244+
* According to PCIe Base Specification Revision 6.1,
1245+
* Section 6.2.3.2.4, if an UNCOR error is raised as
1246+
* Advisory Non-Fatal error, it will match the following
1247+
* conditions:
1248+
* a. The severity of the error is Non-Fatal.
1249+
* b. The error is one of the following:
1250+
* 1. Poisoned TLP (Section 6.2.3.2.4.3)
1251+
* 2. Completion Timeout (Section 6.2.3.2.4.4)
1252+
* 3. Completer Abort (Section 6.2.3.2.4.1)
1253+
* 4. Unexpected Completion (Section 6.2.3.2.4.5)
1254+
* 5. Unsupported Request (Section 6.2.3.2.4.1)
1255+
*/
1256+
info->anfe_status = uncor_status & ~uncor_mask & ~info->severity &
1257+
AER_ERR_ANFE_UNC_MASK;
1258+
}
1259+
11991260
/**
12001261
* aer_get_device_error_info - read error status from dev and store it to info
12011262
* @dev: pointer to the device expected to have a error record
@@ -1213,6 +1274,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
12131274

12141275
/* Must reset in this function */
12151276
info->status = 0;
1277+
info->anfe_status = 0;
12161278
info->tlp_header_valid = 0;
12171279

12181280
/* The device might not support AER */
@@ -1226,6 +1288,9 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
12261288
&info->mask);
12271289
if (!(info->status & ~info->mask))
12281290
return 0;
1291+
1292+
if (info->status & PCI_ERR_COR_ADV_NFAT)
1293+
anfe_get_uc_status(dev, info);
12291294
} else if (type == PCI_EXP_TYPE_ROOT_PORT ||
12301295
type == PCI_EXP_TYPE_RC_EC ||
12311296
type == PCI_EXP_TYPE_DOWNSTREAM ||

0 commit comments

Comments
 (0)