@@ -107,6 +107,12 @@ struct aer_stats {
107
107
PCI_ERR_ROOT_MULTI_COR_RCV | \
108
108
PCI_ERR_ROOT_MULTI_UNCOR_RCV)
109
109
110
+ #define AER_ERR_ANFE_UNC_MASK (PCI_ERR_UNC_POISON_TLP | \
111
+ PCI_ERR_UNC_COMP_TIME | \
112
+ PCI_ERR_UNC_COMP_ABORT | \
113
+ PCI_ERR_UNC_UNX_COMP | \
114
+ PCI_ERR_UNC_UNSUP)
115
+
110
116
static int pcie_aer_disable ;
111
117
static pci_ers_result_t aer_root_reset (struct pci_dev * dev );
112
118
@@ -675,6 +681,7 @@ static void __aer_print_error(struct pci_dev *dev,
675
681
{
676
682
const char * * strings ;
677
683
unsigned long status = info -> status & ~info -> mask ;
684
+ unsigned long anfe_status = info -> anfe_status ;
678
685
const char * level , * errmsg ;
679
686
int i ;
680
687
@@ -695,6 +702,20 @@ static void __aer_print_error(struct pci_dev *dev,
695
702
info -> first_error == i ? " (First)" : "" );
696
703
}
697
704
pci_dev_aer_stats_incr (dev , info );
705
+
706
+ if (!anfe_status )
707
+ return ;
708
+
709
+ strings = aer_uncorrectable_error_string ;
710
+ pci_printk (level , dev , "Uncorrectable errors that may cause Advisory Non-Fatal:\n" );
711
+
712
+ for_each_set_bit (i , & anfe_status , 32 ) {
713
+ errmsg = strings [i ];
714
+ if (!errmsg )
715
+ errmsg = "Unknown Error Bit" ;
716
+
717
+ pci_printk (level , dev , " [%2d] %s\n" , i , errmsg );
718
+ }
698
719
}
699
720
700
721
void aer_print_error (struct pci_dev * dev , struct aer_err_info * info )
@@ -1094,9 +1115,14 @@ static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info)
1094
1115
* Correctable error does not need software intervention.
1095
1116
* No need to go through error recovery process.
1096
1117
*/
1097
- if (aer )
1118
+ if (aer ) {
1098
1119
pci_write_config_dword (dev , aer + PCI_ERR_COR_STATUS ,
1099
1120
info -> status );
1121
+ if (info -> anfe_status )
1122
+ pci_write_config_dword (dev ,
1123
+ aer + PCI_ERR_UNCOR_STATUS ,
1124
+ info -> anfe_status );
1125
+ }
1100
1126
if (pcie_aer_is_native (dev )) {
1101
1127
struct pci_driver * pdrv = dev -> driver ;
1102
1128
@@ -1196,6 +1222,41 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
1196
1222
EXPORT_SYMBOL_GPL (aer_recover_queue );
1197
1223
#endif
1198
1224
1225
+ static void anfe_get_uc_status (struct pci_dev * dev , struct aer_err_info * info )
1226
+ {
1227
+ u32 uncor_mask , uncor_status ;
1228
+ u16 device_status ;
1229
+ int aer = dev -> aer_cap ;
1230
+
1231
+ if (pcie_capability_read_word (dev , PCI_EXP_DEVSTA , & device_status ))
1232
+ return ;
1233
+ /*
1234
+ * Take the most conservative route here. If there are
1235
+ * Non-Fatal/Fatal errors detected, do not assume any
1236
+ * bit in uncor_status is set by ANFE.
1237
+ */
1238
+ if (device_status & (PCI_EXP_DEVSTA_NFED | PCI_EXP_DEVSTA_FED ))
1239
+ return ;
1240
+
1241
+ pci_read_config_dword (dev , aer + PCI_ERR_UNCOR_STATUS , & uncor_status );
1242
+ pci_read_config_dword (dev , aer + PCI_ERR_UNCOR_MASK , & uncor_mask );
1243
+ /*
1244
+ * According to PCIe Base Specification Revision 6.1,
1245
+ * Section 6.2.3.2.4, if an UNCOR error is raised as
1246
+ * Advisory Non-Fatal error, it will match the following
1247
+ * conditions:
1248
+ * a. The severity of the error is Non-Fatal.
1249
+ * b. The error is one of the following:
1250
+ * 1. Poisoned TLP (Section 6.2.3.2.4.3)
1251
+ * 2. Completion Timeout (Section 6.2.3.2.4.4)
1252
+ * 3. Completer Abort (Section 6.2.3.2.4.1)
1253
+ * 4. Unexpected Completion (Section 6.2.3.2.4.5)
1254
+ * 5. Unsupported Request (Section 6.2.3.2.4.1)
1255
+ */
1256
+ info -> anfe_status = uncor_status & ~uncor_mask & ~info -> severity &
1257
+ AER_ERR_ANFE_UNC_MASK ;
1258
+ }
1259
+
1199
1260
/**
1200
1261
* aer_get_device_error_info - read error status from dev and store it to info
1201
1262
* @dev: pointer to the device expected to have a error record
@@ -1213,6 +1274,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
1213
1274
1214
1275
/* Must reset in this function */
1215
1276
info -> status = 0 ;
1277
+ info -> anfe_status = 0 ;
1216
1278
info -> tlp_header_valid = 0 ;
1217
1279
1218
1280
/* The device might not support AER */
@@ -1226,6 +1288,9 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
1226
1288
& info -> mask );
1227
1289
if (!(info -> status & ~info -> mask ))
1228
1290
return 0 ;
1291
+
1292
+ if (info -> status & PCI_ERR_COR_ADV_NFAT )
1293
+ anfe_get_uc_status (dev , info );
1229
1294
} else if (type == PCI_EXP_TYPE_ROOT_PORT ||
1230
1295
type == PCI_EXP_TYPE_RC_EC ||
1231
1296
type == PCI_EXP_TYPE_DOWNSTREAM ||
0 commit comments