diff --git a/docs/api.rst b/docs/api.rst index 5bb0a31..51318c0 100755 --- a/docs/api.rst +++ b/docs/api.rst @@ -26,8 +26,8 @@ These are the standard spatial regression models supported by the `spreg` packag spreg.GM_Lag spreg.ML_Lag - spreg.GMM_Error spreg.ML_Error + spreg.GMM_Error spreg.GM_Error spreg.GM_Error_Het spreg.GM_Error_Hom @@ -141,6 +141,7 @@ Diagnostic tests are useful for identifying model fit, sufficiency, and specific spreg.panel_rLMerror spreg.panel_Hausman spreg.sputils.spmultiplier + spreg.diagnostics_probit.sp_tests Spatial Specification Search @@ -184,4 +185,4 @@ Tools for simulating synthetic data according to data-generating processes impli spreg.dgp.dgp_probit spreg.dgp.make_bin spreg.dgp.make_heterror - spreg.dgp.make_vmult \ No newline at end of file + spreg.dgp.make_vmult diff --git a/spreg/__init__.py b/spreg/__init__.py index 278b2d6..eb65a82 100755 --- a/spreg/__init__.py +++ b/spreg/__init__.py @@ -8,6 +8,7 @@ from .diagnostics_sp import * from .diagnostics_sur import * from .diagnostics_tsls import * +from .diagnostics_probit import * from .error_sp import * from .error_sp_het import * from .error_sp_het_regimes import * diff --git a/spreg/diagnostics_probit.py b/spreg/diagnostics_probit.py new file mode 100755 index 0000000..65d958c --- /dev/null +++ b/spreg/diagnostics_probit.py @@ -0,0 +1,343 @@ +""" +Diagnostics in probit regression. + +""" +__author__ = ( + "Luc Anselin lanselin@gmail.com, Pedro Amaral pedrovma@gmail.com " +) + +from math import sqrt, pi + +from libpysal.common import MISSINGVALUE +import numpy as np +import numpy.linalg as la +import scipy.sparse as SP +from scipy import stats +from scipy.stats import norm + +__all__ = [ + "pred_table", + "probit_fit", + "probit_lrtest", + "mcfad_rho", + "probit_ape", + "sp_tests", + "moran_KP", +] + + +def pred_table(reg): + """ + Calculates a table comparing predicted to actual outcomes for a + discrete choice model + + Parameters + ---------- + reg : regression object + output instance from a probit regression model + + Returns + ---------- + predtab_vals : dictionary + includes margins and cells of actual and predicted + values for discrete choice model + actpos : observed positives (=1) + actneg : observed negatives (=0) + predpos : predicted positives + predneg : predicted negatives + truepos : predicted 1 when actual = 1 + falsepos : predicted 1 when actual = 0 + trueneg : predicted 0 when actual = 0 + falseneg : predicted 0 when actual = 1 + + """ + predtab_vals = {} + pos = reg.y.sum() + predtab_vals["actpos"] = int(pos) + neg = reg.n - pos + predtab_vals["actneg"] = int(neg) + act1 = (reg.y == 1) * 1 + act0 = (reg.y == 0) * 1 + ppos = reg.predybin.sum() + predtab_vals["predpos"] = ppos + pneg = reg.n - ppos + predtab_vals["predneg"] = pneg + pred1 = (reg.predybin == 1) * 1 + pred0 = (reg.predybin == 0) * 1 + truep = (pred1 * act1) * 1 + predtab_vals["truepos"] = truep.sum() + truen = (pred0 * act0) * 1 + predtab_vals["trueneg"] = truen.sum() + fpos = (pred1 * act0) * 1 + predtab_vals["falsepos"] = fpos.sum() + fneg = (pred0 * act1) * 1 + predtab_vals["falseneg"] = fneg.sum() + + return predtab_vals + + +def probit_fit(reg): + """ + Various measures of fit for discrete choice models, derived from the + prediction table (pred_table) + + Parameters + ---------- + reg : regression object + output instance from a probit regression model + must contain predtable attribute + + Returns + ---------- + prob_fit : a dictionary containing various measures of fit + TPR : true positive rate (sensitivity, recall, hit rate) + TNR : true negative rate (specificity, selectivity) + PREDPC : accuracy, percent correctly predicted + BA : balanced accuracy + + """ + + prob_fit = {} + prob_fit["TPR"] = 100.0 * reg.predtable["truepos"] / reg.predtable["actpos"] + prob_fit["TNR"] = 100.0 * reg.predtable["trueneg"] / reg.predtable["actneg"] + prob_fit["BA"] = (prob_fit["TPR"] + prob_fit["TNR"])/2.0 + prob_fit["PREDPC"] = 100.0 * (reg.predtable["truepos"] + reg.predtable["trueneg"]) / reg.n + + return prob_fit + +def probit_lrtest(regprob): + """ + Likelihood ratio test statistic for probit model + + Parameters + ---------- + regprob : probit regression object + + Returns + ------- + + likratio : dictionary + contains the statistic for the null model (L0), the LR test(likr), + the degrees of freedom (df) and the p-value (pvalue) + L0 : float + log likelihood of null model + likr : float + likelihood ratio statistic + df : integer + degrees of freedom + p-value : float + p-value + """ + + likratio = {} + P = np.mean(regprob.y) + L0 = regprob.n * (P * np.log(P) + (1 - P) * np.log(1 - P)) + likratio["L0"] = L0 + LR = -2.0 * (L0 - regprob.logl) + likratio["likr"] = LR + likratio["df"] = regprob.k + pval = stats.chisqprob(LR, regprob.k) + likratio["p-value"] = pval + + return likratio + +def mcfad_rho(regprob): + """ + McFadden's rho measure of fit + + Parameters + --------- + regprob : probit regression object + + Returns + ------- + rho : McFadden's rho (1 - L/L0) + + """ + + rho = 1.0 - (regprob.logl / regprob.L0) + return rho + +def probit_ape(regprob): + """ + Average partial effects + + Parameters + ---------- + regprob : probit regression object + + Returns + ------- + tuple with: + scale : the scale of the marginal effects, determined by regprob.scalem + Default: 'phimean' (Mean of individual marginal effects) + Alternative: 'xmean' (Marginal effects at variables mean) + slopes : marginal effects or average partial effects (not for constant) + slopes_vm : estimates of variance of marginal effects (not for constant) + slopes_std_err : estimates of standard errors of marginal effects + slopes_z_stat : tuple with z-statistics and p-values for marginal effects + + """ + + + if regprob.scalem == "xmean": + xmb = regprob.xmean.T @ regprob.betas + scale = stats.norm.pdf(xmb) + + elif regprob.scalem == "phimean": + scale = np.mean(regprob.phiy,axis=0) + + # average partial effects (no constant) + slopes = (regprob.betas[1:,0] * scale).reshape(-1,1) + + # variance of partial effects + xmb = regprob.xmean.T @ regprob.betas + bxt = regprob.betas @ regprob.xmean.T + dfdb = np.eye(regprob.k) - xmb * bxt + slopes_vm = (scale ** 2) * ((dfdb @ regprob.vm) @ dfdb.T) + + # standard errors + slopes_std_err = np.sqrt(slopes_vm[1:,1:].diagonal()).reshape(-1,1) + + # z-stats and p-values + sl_zStat = slopes / slopes_std_err + slopes_z_stat = [(sl_zStat[i,0],stats.norm.sf(abs(sl_zStat[i,0])) * 2) for i in range(len(slopes))] + + + return (scale, slopes,slopes_vm[1:,1:],slopes_std_err,slopes_z_stat) + + +def sp_tests(regprob=None, obj_list=None): + """ + Calculates tests for spatial dependence in Probit models + + Parameters + ---------- + regprob : regression object from spreg + output instance from a probit model + obj_list : list + list of regression elements from both libpysal and statsmodels' ProbitResults + The list should be such as: + [libpysal.weights, ProbitResults.fittedvalues, ProbitResults.resid_response, ProbitResults.resid_generalized] + + Returns + ------- + tuple with LM_Err, moran, ps as 2x1 arrays with statistic and p-value + LM_Err: Pinkse + moran : Kelejian-Prucha generalized Moran + ps : Pinkse-Slade + + Examples + -------- + The results of this function will be automatically added to the output of the probit model if using spreg. + If using the Probit estimator from statsmodels, the user can call the function with the obj_list argument. + The argument obj_list should be a list with the following elements, in this order: + [libpysal.weights, ProbitResults.fittedvalues, ProbitResults.resid_response, ProbitResults.resid_generalized] + The function will then return and print the results of the spatial diagnostics. + + >>> import libpysal + >>> import statsmodels.api as sm + >>> import geopandas as gpd + >>> from spreg.diagnostics_probit import sp_tests + + >>> columb = libpysal.examples.load_example('Columbus') + >>> dfs = gpd.read_file(columb.get_path("columbus.shp")) + >>> w = libpysal.weights.Queen.from_dataframe(dfs) + >>> w.transform='r' + + >>> y = (dfs["CRIME"] > 40).astype(float) + >>> X = dfs[["INC","HOVAL"]] + >>> X = sm.add_constant(X) + + >>> probit_mod = sm.Probit(y, X) + >>> probit_res = probit_mod.fit(disp=False) + >>> LM_err, moran, ps = sp_tests(obj_list=[w, probit_res.fittedvalues, probit_res.resid_response, probit_res.resid_generalized]) + PROBIT MODEL DIAGNOSTICS FOR SPATIAL DEPENDENCE + TEST DF VALUE PROB + Kelejian-Prucha (error) 1 1.721 0.0852 + Pinkse (error) 1 3.132 0.0768 + Pinkse-Slade (error) 1 2.558 0.1097 + + """ + if regprob: + w, Phi, phi, u_naive, u_gen, n = regprob.w, regprob.predy, regprob.phiy, regprob.u_naive, regprob.u_gen, regprob.n + elif obj_list: + w, fittedvalues, u_naive, u_gen = obj_list + Phi = norm.cdf(fittedvalues) + phi = norm.pdf(fittedvalues) + n = w.n + + try: + w = w.sparse + except: + w = w + + # Pinkse_error: + Phi_prod = Phi * (1 - Phi) + sig2 = np.sum((phi * phi) / Phi_prod) / n + LM_err_num = np.dot(u_gen.T, (w * u_gen)) ** 2 + trWW = np.sum((w * w).diagonal()) + trWWWWp = trWW + np.sum((w * w.T).diagonal()) + LM_err = float(1.0 * LM_err_num / (sig2 ** 2 * trWWWWp)) + LM_err = np.array([LM_err, stats.chisqprob(LM_err, 1)]) + # KP_error: + moran = moran_KP(w, u_naive, Phi_prod) + # Pinkse-Slade_error: + u_std = u_naive / np.sqrt(Phi_prod) + ps_num = np.dot(u_std.T, (w * u_std)) ** 2 + trWpW = np.sum((w.T * w).diagonal()) + ps = float(ps_num / (trWW + trWpW)) + # chi-square instead of bootstrap. + ps = np.array([ps, stats.chisqprob(ps, 1)]) + + if obj_list: + from .output import _probit_out + reg_simile = type('reg_simile', (object,), {})() + reg_simile.Pinkse_error = LM_err + reg_simile.KP_error = moran + reg_simile.PS_error = ps + print("PROBIT MODEL "+_probit_out(reg_simile, spat_diag=True, sptests_only=True)[1:]) + + return LM_err, moran, ps + +def moran_KP(w, u, sig2i): + """ + Calculates Kelejian-Prucha Moran-flavoured tests + + Parameters + ---------- + + w : W + PySAL weights instance aligned with y + u : array + nx1 array of naive residuals + sig2i : array + nx1 array of individual variance + + Returns + ------- + moran : array, Kelejian-Prucha Moran's I with p-value + """ + try: + w = w.sparse + except: + pass + moran_num = np.dot(u.T, (w * u)) + E = SP.lil_matrix(w.get_shape()) + E.setdiag(sig2i.flat) + E = E.asformat("csr") + WE = w * E + moran_den = np.sqrt(np.sum((WE * WE + (w.T * E) * WE).diagonal())) + moran = float(1.0 * moran_num / moran_den) + moran = np.array([moran, stats.norm.sf(abs(moran)) * 2.0]) + return moran + + +def _test(): + import doctest + + doctest.testmod() + + +if __name__ == "__main__": + _test() diff --git a/spreg/diagnostics_sp.py b/spreg/diagnostics_sp.py index 01660d4..66bd2d4 100644 --- a/spreg/diagnostics_sp.py +++ b/spreg/diagnostics_sp.py @@ -3,7 +3,7 @@ """ __author__ = "Luc Anselin lanselin@gmail.com, Daniel Arribas-Bel darribas@asu.edu, Pedro Amaral pedrovma@gmail.com" -from .utils import spdot +from .sputils import spdot # from scipy.stats.stats import chisqprob from scipy import stats @@ -698,7 +698,10 @@ def lm_wx(reg, w): # preliminaries # set up X1 (constant) and X (no constant) as x1 and xx x1 = reg.x - xx = x1[:,1:] + try: + xx = reg.x[:, reg.output["var_type"] == 'x'] + except KeyError: + xx = reg.x[:, reg._var_type == 'x'] # WX wx = w.sparse * xx # end of preliminaries @@ -739,7 +742,10 @@ def lm_spdurbin(reg,w): # preliminaries # set up X1 (constant) and X (no constant) as x1 and xx x1 = reg.x - xx = x1[:,1:] + try: + xx = reg.x[:, reg.output["var_type"] == 'x'] + except KeyError: + xx = reg.x[:, reg._var_type == 'x'] k = x1.shape[1] # WX wx = w.sparse * xx @@ -761,15 +767,15 @@ def lm_spdurbin(reg,w): jj1b = np.hstack((np.zeros((1,k)),np.array([reg.n/(2.0*reg.sig2n)]).reshape(1,1))) jj11 = np.vstack((jj1a,jj1b)) # J_12: matrix with k-1 rows X1'WX1b and X1'WX, and 1 row of zeros - jj12a = np.hstack((x1.T @ wxb, x1.T @ wx)) - jj12 = np.vstack((jj12a,np.zeros((1,k)))) + jj12a = np.hstack((x1.T @ wxb, spdot(x1.T, wx, array_out=True))) + jj12 = np.vstack((jj12a,np.zeros((1,jj12a.shape[1])))) # J_22 matrix with diagonal elements b'X1'W'WX1b + T.sig2n and X'W'WX # and off-diagonal element b'X1'W'WX jj22a = wxb.T @ wxb + pp * reg.sig2n jj22a = jj22a.reshape(1,1) - wxbtwx = (wxb.T @ wx).reshape(1,k-1) + wxbtwx = (wxb.T @ wx).reshape(1,xx.shape[1]) jj22b = np.hstack((jj22a,wxbtwx)) - wxtwx = wx.T @ wx + wxtwx = spdot(wx.T,wx, array_out=True) jj22c = np.hstack((wxbtwx.T,wxtwx)) jj22 = np.vstack((jj22b,jj22c)) # J^22 (the inverse) from J^22 = (J_22 - J_21.J_11^-1.J_12)^-1 diff --git a/spreg/error_sp.py b/spreg/error_sp.py index 1d8e109..587f22a 100644 --- a/spreg/error_sp.py +++ b/spreg/error_sp.py @@ -9,16 +9,7 @@ import numpy as np from numpy import linalg as la from . import ols as OLS -from .utils import ( - set_endog, - sp_att, - optim_moments, - get_spFilter, - get_lags, - spdot, - RegressionPropsY, - set_warn, -) +from .utils import set_endog, sp_att, optim_moments, get_spFilter, get_lags, spdot, RegressionPropsY, set_warn from . import twosls as TSLS from . import user_output as USER import pandas as pd @@ -32,6 +23,7 @@ class BaseGM_Error(RegressionPropsY): + """ GMM method for a spatial error model (note: no consistency checks diagnostics or constant added); based on Kelejian and Prucha @@ -99,6 +91,7 @@ class BaseGM_Error(RegressionPropsY): """ def __init__(self, y, x, w, hard_bound=False): + # 1a. OLS --> \tilde{betas} ols = OLS.BaseOLS(y=y, x=x) self.n, self.k = ols.x.shape @@ -127,6 +120,7 @@ def __init__(self, y, x, w, hard_bound=False): class GM_Error(BaseGM_Error): + """ GMM method for a spatial error model, with results and diagnostics; based on Kelejian and Prucha (1998, 1999) :cite:`Kelejian1998` :cite:`Kelejian1999`. @@ -300,63 +294,43 @@ class GM_Error(BaseGM_Error): """ def __init__( - self, - y, - x, - w, - slx_lags=0, - slx_vars="All", - vm=False, - name_y=None, - name_x=None, - name_w=None, - name_ds=None, - latex=False, - hard_bound=False, - ): + self, y, x, w, slx_lags=0, slx_vars="All",vm=False, name_y=None, name_x=None, name_w=None, name_ds=None, latex=False, + hard_bound=False): + n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x( - name_x, x_constant - ) # intialize in case of None, contains constant + name_x = USER.set_name_x(name_x, x_constant) # intialize in case of None, contains constant set_warn(self, warn) - + self.title = "GM SPATIALLY WEIGHTED LEAST SQUARES" - if slx_lags > 0: - # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - # x_constant = np.hstack((x_constant, lag_x)) - # name_x += USER.set_name_spatial_lags(name_x, slx_lags) - # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - - x_constant, name_x = USER.flex_wx( - w, - x=x_constant, - name_x=name_x, - constant=True, - slx_lags=slx_lags, - slx_vars=slx_vars, - ) + if slx_lags >0: + #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + #x_constant = np.hstack((x_constant, lag_x)) +# name_x += USER.set_name_spatial_lags(name_x, slx_lags) + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - self.title += " WITH SLX (SLX-Error)" + x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) - BaseGM_Error.__init__( - self, y=y, x=x_constant, w=w.sparse, hard_bound=hard_bound - ) + self.title += " WITH SLX (SLX-Error)" + + BaseGM_Error.__init__(self, y=y, x=x_constant, w=w.sparse, hard_bound=hard_bound) self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) - # self.name_x = USER.set_name_x(name_x, x_constant) +# self.name_x = USER.set_name_x(name_x, x_constant) self.name_x = name_x # already includes constant self.name_x.append("lambda") self.name_w = USER.set_name_w(name_w, w) - self.output = pd.DataFrame(self.name_x, columns=["var_names"]) - self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] - self.output["regime"], self.output["equation"] = (0, 0) + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = ['o'] + ['x'] * (len(self.name_x) - 2) + ['lambda'] + self.output['regime'], self.output['equation'] = (0, 0) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class BaseGM_Endog_Error(RegressionPropsY): + """ GMM method for a spatial error model with endogenous variables (note: no consistency checks, diagnostics or constant added); based on Kelejian and @@ -439,6 +413,7 @@ class BaseGM_Endog_Error(RegressionPropsY): """ def __init__(self, y, x, yend, q, w, hard_bound=False): + # 1a. TSLS --> \tilde{betas} tsls = TSLS.BaseTSLS(y=y, x=x, yend=yend, q=q) self.n, self.k = tsls.z.shape @@ -467,6 +442,7 @@ def __init__(self, y, x, yend, q, w, hard_bound=False): class GM_Endog_Error(BaseGM_Endog_Error): + """ GMM method for a spatial error model with endogenous variables, with results and diagnostics; based on Kelejian and Prucha (1998, @@ -492,7 +468,7 @@ class GM_Endog_Error(BaseGM_Endog_Error): Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the SLX-Error type. slx_vars : either "All" (default) or list of booleans to select x variables - to be lagged + to be lagged vm : boolean If True, include variance-covariance matrix in summary results @@ -692,38 +668,29 @@ def __init__( latex=False, hard_bound=False, ): + n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x( - name_x, x_constant - ) # initialize for None, includes constant + name_x = USER.set_name_x(name_x, x_constant) # initialize for None, includes constant set_warn(self, warn) self.title = "GM SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES" - if slx_lags > 0: - # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - # x_constant = np.hstack((x_constant, lag_x)) - # name_x += USER.set_name_spatial_lags(name_x, slx_lags) - # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - - x_constant, name_x = USER.flex_wx( - w, - x=x_constant, - name_x=name_x, - constant=True, - slx_lags=slx_lags, - slx_vars=slx_vars, - ) + if slx_lags >0: + #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + #x_constant = np.hstack((x_constant, lag_x)) +# name_x += USER.set_name_spatial_lags(name_x, slx_lags) + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - self.title += " WITH SLX (SLX-Error)" - BaseGM_Endog_Error.__init__( - self, y=y, x=x_constant, w=w.sparse, yend=yend, q=q, hard_bound=hard_bound - ) + x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + + self.title += " WITH SLX (SLX-Error)" + BaseGM_Endog_Error.__init__(self, y=y, x=x_constant, w=w.sparse, yend=yend, q=q, hard_bound=hard_bound) self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) - # self.name_x = USER.set_name_x(name_x, x_constant) +# self.name_x = USER.set_name_x(name_x, x_constant) self.name_x = name_x # already includes constant self.name_yend = USER.set_name_yend(name_yend, yend) self.name_z = self.name_x + self.name_yend @@ -731,15 +698,15 @@ def __init__( self.name_q = USER.set_name_q(name_q, q) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) - self.output = pd.DataFrame(self.name_z, columns=["var_names"]) - self.output["var_type"] = ( - ["x"] * len(self.name_x) + ["yend"] * len(self.name_yend) + ["lambda"] - ) - self.output["regime"], self.output["equation"] = (0, 0) + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = ['o']+['x'] * (len(self.name_x)-1) + ['yend'] * len(self.name_yend) + ['lambda'] + self.output['regime'], self.output['equation'] = (0, 0) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class GM_Combo(BaseGM_Endog_Error): + """ GMM method for a spatial lag and error model with endogenous variables, with results and diagnostics; based on Kelejian and Prucha (1998, @@ -1004,6 +971,7 @@ def __init__( latex=False, hard_bound=False, ): + n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -1012,24 +980,22 @@ def __init__( name_x = USER.set_name_x(name_x, x_constant) if slx_lags > 0: - yend2, q2, wx = set_endog( - y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags, slx_vars - ) + yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags,slx_vars) x_constant = np.hstack((x_constant, wx)) else: yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + + set_warn(self, warn) # OLD - # if slx_lags == 0: - # yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) - # else: - # yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) - # x_constant = np.hstack((x_constant, wx)) - - BaseGM_Endog_Error.__init__( - self, y=y, x=x_constant, w=w.sparse, yend=yend2, q=q2, hard_bound=hard_bound - ) + #if slx_lags == 0: + #yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + #else: + #yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + #x_constant = np.hstack((x_constant, wx)) + + BaseGM_Endog_Error.__init__(self, y=y, x=x_constant, w=w.sparse, yend=yend2, q=q2, hard_bound=hard_bound) self.rho = self.betas[-2] self.predy_e, self.e_pred, warn = sp_att( @@ -1038,37 +1004,36 @@ def __init__( set_warn(self, warn) self.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO MODEL" # OLD - # if slx_lags > 0: - # name_x += USER.set_name_spatial_lags(name_x, slx_lags) - # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - # self.title += " WITH SLX (GNSM)" + #if slx_lags > 0: +# name_x += USER.set_name_spatial_lags(name_x, slx_lags) + #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + #self.title += " WITH SLX (GNSM)" # kx and wkx are used to replace complex calculation for output if slx_lags > 0: # adjust for flexwx - if isinstance(slx_vars, list): # slx_vars has True,False - if len(slx_vars) != x.shape[1]: + if (isinstance(slx_vars,list)): # slx_vars has True,False + if len(slx_vars) != x.shape[1] : raise Exception("slx_vars incompatible with x column dimensions") else: # use slx_vars to extract proper columns workname = name_x[1:] kx = len(workname) - vv = list(compress(workname, slx_vars)) + vv = list(compress(workname,slx_vars)) name_x += USER.set_name_spatial_lags(vv, slx_lags) wkx = slx_vars.count(True) else: kx = len(name_x) - 1 wkx = kx - name_x += USER.set_name_spatial_lags( - name_x[1:], slx_lags - ) # exclude constant + name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant self.title += " WITH SLX (GNSM)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) - # self.name_x = USER.set_name_x(name_x, x_constant) +# self.name_x = USER.set_name_x(name_x, x_constant) self.name_x = name_x # constant already in list self.name_yend = USER.set_name_yend(name_yend, yend) self.name_yend.append(USER.set_name_yend_sp(self.name_y)) + self.name_z = self.name_x + self.name_yend self.name_z.append("lambda") self.name_q = USER.set_name_q(name_q, q) @@ -1076,66 +1041,45 @@ def __init__( if slx_lags > 0: # need to remove all but last SLX variables from name_x self.name_x0 = [] self.name_x0.append(self.name_x[0]) # constant - if isinstance(slx_vars, list): # boolean list passed + if (isinstance(slx_vars,list)): # boolean list passed # x variables that were not lagged - self.name_x0.extend( - list(compress(self.name_x[1:], [not i for i in slx_vars])) - ) + self.name_x0.extend(list(compress(self.name_x[1:],[not i for i in slx_vars]))) # last wkx variables self.name_x0.extend(self.name_x[-wkx:]) + else: - okx = int( - (self.k - self.yend.shape[1] - 1) / (slx_lags + 1) - ) # number of original exogenous vars + okx = int((self.k - self.yend.shape[1] - 1) / (slx_lags + 1)) # number of original exogenous vars self.name_x0.extend(self.name_x[-okx:]) - self.name_q.extend( - USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q) - ) - - # var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] - var_types = ( - ["x"] * (kx + 1) - + ["wx"] * wkx * slx_lags - + ["yend"] * (len(self.name_yend) - 1) - + ["rho", "lambda"] - ) + self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) + + #var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + var_types = ['o']+['x']*kx + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] else: - self.name_q.extend( - USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q) - ) - var_types = ( - ["x"] * len(self.name_x) - + ["yend"] * (len(self.name_yend) - 1) - + ["rho", "lambda"] - ) - - # self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + var_types = ['o']+['x']*(len(self.name_x)-1) + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] + + + #self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) - self.output = pd.DataFrame(self.name_z, columns=["var_names"]) + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + - # self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho', 'lambda'] - self.output["var_type"] = var_types + #self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho', 'lambda'] + self.output['var_type'] = var_types - self.output["regime"], self.output["equation"] = (0, 0) + self.output['regime'], self.output['equation'] = (0, 0) self.other_top = _spat_pseudo_r2(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) -class GMM_Error( - GM_Error, - GM_Endog_Error, - GM_Combo, - GM_Error_Het, - GM_Endog_Error_Het, - GM_Combo_Het, - GM_Error_Hom, - GM_Endog_Error_Hom, - GM_Combo_Hom, -): +class GMM_Error(GM_Error, GM_Endog_Error, GM_Combo, GM_Error_Het, GM_Endog_Error_Het, + GM_Combo_Het, GM_Error_Hom, GM_Endog_Error_Hom, GM_Combo_Hom): + """ Wrapper function to call any of the GMM methods for a spatial error model available in spreg @@ -1161,12 +1105,16 @@ class GMM_Error( homoskedasticity, and 'kp98', which does not provide inference on the spatial parameter for the error term. add_wy : boolean - If True, then a spatial lag of the dependent variable is included. + If True, then a spatial lag of the dependent variable is included. slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the SLX-Error or GNSM type. + If slx_lags>0, the specification becomes of the SLX-Error or GNSM type. slx_vars : either "All" (default) or list of booleans to select x variables - to be lagged + to be lagged + regimes : list or pandas.Series + List of n values with the mapping of each + observation to a regime. Assumed to be aligned with 'x'. + For other regimes-specific arguments, see GMM_Error_Regimes vm : boolean If True, include variance-covariance matrix in summary results @@ -1179,7 +1127,7 @@ class GMM_Error( name_yend : list of strings Names of endogenous variables for use in output name_q : list of strings - Names of instruments for use in output + Names of instruments for use in output name_ds : string Name of dataset for use in output latex : boolean @@ -1189,7 +1137,7 @@ class GMM_Error( autoregressive parameter is outside the maximum/minimum bounds. spat_diag : boolean, ignored, included for compatibility with other models **kwargs : keywords - Additional arguments to pass on to the estimators. + Additional arguments to pass on to the estimators. See the specific functions for details on what can be used. Attributes @@ -1250,8 +1198,8 @@ class GMM_Error( name_q : list of strings (optional) Names of external instruments name_h : list of strings (optional) - Names of all instruments used in ouput - + Names of all instruments used in ouput + Examples -------- @@ -1370,287 +1318,63 @@ class GMM_Error( """ def __init__( - self, - y, - x, - w, - yend=None, - q=None, - estimator="het", - add_wy=False, - slx_lags=0, - slx_vars="All", - vm=False, - name_y=None, - name_x=None, - name_w=None, - name_yend=None, - name_q=None, - name_ds=None, - latex=False, - hard_bound=False, - spat_diag=False, - **kwargs, - ): - if estimator == "het": - if yend is None and not add_wy: - GM_Error_Het.__init__( - self, - y=y, - x=x, - w=w, - slx_lags=slx_lags, - slx_vars=slx_vars, - vm=vm, - name_y=name_y, - name_x=name_x, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - **kwargs, - ) - elif yend is not None and not add_wy: - GM_Endog_Error_Het.__init__( - self, - y=y, - x=x, - yend=yend, - q=q, - w=w, - slx_lags=slx_lags, - slx_vars=slx_vars, - vm=vm, - name_y=name_y, - name_x=name_x, - name_yend=name_yend, - name_q=name_q, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - **kwargs, - ) - elif add_wy: - GM_Combo_Het.__init__( - self, - y=y, - x=x, - yend=yend, - q=q, - w=w, - slx_lags=slx_lags, - slx_vars=slx_vars, - vm=vm, - name_y=name_y, - name_x=name_x, - name_yend=name_yend, - name_q=name_q, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - **kwargs, - ) - else: - set_warn( - self, - "Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.", - ) - GM_Error_Het.__init__( - self, - y=y, - x=x, - w=w, - slx_lags=slx_lags, - slx_vars=slx_vars, - vm=vm, - name_y=name_y, - name_x=name_x, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - ) - elif estimator == "hom": - if yend is None and not add_wy: - GM_Error_Hom.__init__( - self, - y=y, - x=x, - w=w, - slx_lags=slx_lags, - slx_vars=slx_vars, - vm=vm, - name_y=name_y, - name_x=name_x, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - **kwargs, - ) - elif yend is not None and not add_wy: - GM_Endog_Error_Hom.__init__( - self, - y=y, - x=x, - yend=yend, - q=q, - w=w, - slx_lags=slx_lags, - slx_vars=slx_vars, - vm=vm, - name_y=name_y, - name_x=name_x, - name_yend=name_yend, - name_q=name_q, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - **kwargs, - ) - elif add_wy: - GM_Combo_Hom.__init__( - self, - y=y, - x=x, - yend=yend, - q=q, - w=w, - slx_lags=slx_lags, - slx_vars=slx_vars, - vm=vm, - name_y=name_y, - name_x=name_x, - name_yend=name_yend, - name_q=name_q, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - **kwargs, - ) + self, y, x, w, yend=None, q=None, regimes=None, estimator='het', add_wy=False, slx_lags=0, slx_vars="All",vm=False, name_y=None, name_x=None, name_w=None, name_yend=None, + name_q=None, name_ds=None, latex=False, hard_bound=False,spat_diag=False, **kwargs): + + if regimes is not None: + from .error_sp_regimes import GMM_Error_Regimes + self.__class__ = GMM_Error_Regimes + self.__init__( + y=y, x=x, regimes=regimes, w=w, yend=yend, q=q, estimator=estimator, add_wy=add_wy, + slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, name_w=name_w, name_yend=name_yend, + name_q=name_q, name_ds=name_ds, latex=latex, **kwargs) + else: + if estimator == 'het': + if yend is None and not add_wy: + GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + elif yend is not None and not add_wy: + GM_Endog_Error_Het.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + elif add_wy: + GM_Combo_Het.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + else: + set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') + GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound) + elif estimator == 'hom': + if yend is None and not add_wy: + GM_Error_Hom.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars,vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + elif yend is not None and not add_wy: + GM_Endog_Error_Hom.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + elif add_wy: + GM_Combo_Hom.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + else: + set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') + GM_Error_Hom.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars,vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound) + elif estimator == 'kp98': + if yend is None and not add_wy: + GM_Error.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + elif yend is not None and not add_wy: + GM_Endog_Error.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + elif add_wy: + GM_Combo.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + else: + set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') + GM_Error.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound) else: - set_warn( - self, - "Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.", - ) - GM_Error_Hom.__init__( - self, - y=y, - x=x, - w=w, - slx_lags=slx_lags, - slx_vars=slx_vars, - vm=vm, - name_y=name_y, - name_x=name_x, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - ) - elif estimator == "kp98": - if yend is None and not add_wy: - GM_Error.__init__( - self, - y=y, - x=x, - w=w, - slx_lags=slx_lags, - slx_vars=slx_vars, - vm=vm, - name_y=name_y, - name_x=name_x, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - **kwargs, - ) - elif yend is not None and not add_wy: - GM_Endog_Error.__init__( - self, - y=y, - x=x, - yend=yend, - q=q, - w=w, - slx_lags=slx_lags, - slx_vars=slx_vars, - vm=vm, - name_y=name_y, - name_x=name_x, - name_yend=name_yend, - name_q=name_q, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - **kwargs, - ) - elif add_wy: - GM_Combo.__init__( - self, - y=y, - x=x, - yend=yend, - q=q, - w=w, - slx_lags=slx_lags, - slx_vars=slx_vars, - vm=vm, - name_y=name_y, - name_x=name_x, - name_yend=name_yend, - name_q=name_q, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - **kwargs, - ) - else: - set_warn( - self, - "Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.", - ) - GM_Error.__init__( - self, - y=y, - x=x, - w=w, - slx_lags=slx_lags, - slx_vars=slx_vars, - vm=vm, - name_y=name_y, - name_x=name_x, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - ) - else: - set_warn( - self, - "Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.", - ) - GM_Error_Het.__init__( - self, - y=y, - x=x, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - name_w=name_w, - name_ds=name_ds, - latex=latex, - hard_bound=hard_bound, - ) + set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') + GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound) def _momentsGM_Error(w, u): @@ -1693,14 +1417,15 @@ def _test(): if __name__ == "__main__": + _test() import numpy as np import libpysal - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') y = np.array(db.by_col("HOVAL")) - y = np.reshape(y, (49, 1)) + y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) X = np.array(X).T @@ -1712,23 +1437,12 @@ def _test(): q = np.array(q).T w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) - w.transform = "r" - # reg = GM_Error(y, X, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True) - # reg = GM_Endog_Error(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + w.transform = 'r' + #reg = GM_Error(y, X, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True) + #reg = GM_Endog_Error(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], # name_q=['discbd'], name_ds='columbus',vm=True) - reg = GM_Combo( - y, - X, - yd, - q, - w=w, - name_x=["inc"], - name_y="hoval", - name_yend=["crime"], - name_q=["discbd"], - name_ds="columbus", - vm=True, - ) + reg = GM_Combo(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], name_q=['discbd'], + name_ds='columbus', vm=True) print(reg.output) - print(reg.summary) + print(reg.summary) \ No newline at end of file diff --git a/spreg/error_sp_het.py b/spreg/error_sp_het.py index 5d38595..34ceffd 100755 --- a/spreg/error_sp_het.py +++ b/spreg/error_sp_het.py @@ -418,7 +418,7 @@ def __init__( self.name_x.append("lambda") self.name_w = USER.set_name_w(name_w, w) self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['var_type'] = ['x'] * (len(self.name_x)-1) + ['lambda'] + self.output['var_type'] = ['o'] + ['x'] * (len(self.name_x)-2) + ['lambda'] self.output['regime'], self.output['equation'] = (0, 0) self.other_top = _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) @@ -968,7 +968,7 @@ def __init__( self.name_w = USER.set_name_w(name_w, w) self.output = pd.DataFrame(self.name_z, columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) + ['lambda'] + self.output['var_type'] = ['o'] + ['x'] * (len(self.name_x)-1) + ['yend'] * len(self.name_yend) + ['lambda'] self.output['regime'], self.output['equation'] = (0, 0) self.other_top = _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) @@ -1525,10 +1525,10 @@ def __init__( self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) #var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] - var_types = ['x'] * (kx + 1) + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] + var_types = ['o'] + ['x'] * kx + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] else: self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) - var_types = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] + var_types = ['o'] + ['x'] * (len(self.name_x)-1) + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] diff --git a/spreg/error_sp_het_regimes.py b/spreg/error_sp_het_regimes.py index c269014..cd37409 100644 --- a/spreg/error_sp_het_regimes.py +++ b/spreg/error_sp_het_regimes.py @@ -1,7 +1,6 @@ """ Spatial Error with Heteroskedasticity and Regimes family of models """ - __author__ = "Luc Anselin luc.anselin@asu.edu, Pedro V. Amaral pedro.amaral@asu.edu" import numpy as np @@ -22,15 +21,7 @@ get_vc_het_tsls, get_Omega_GS2SLS, ) -from .utils import ( - RegressionPropsY, - spdot, - set_endog, - sphstack, - set_warn, - sp_att, - get_lags, -) +from .utils import RegressionPropsY, spdot, set_endog, sphstack, set_warn, sp_att, get_lags from scipy import sparse as SP from libpysal.weights.spatial_lag import lag_spatial from platform import system @@ -39,6 +30,7 @@ class GM_Error_Het_Regimes(RegressionPropsY, REGI.Regimes_Frame): + """ GMM method for a spatial error model with heteroskedasticity and regimes; based on Arraiz et al :cite:`Arraiz2010`, following Anselin :cite:`Anselin2011`. @@ -333,6 +325,7 @@ def __init__( latex=False, hard_bound=False, ): + n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -351,7 +344,7 @@ def __init__( set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) - if slx_lags > 0: + if slx_lags >0: lag_x = get_lags(w, x_constant, slx_lags) x_constant = np.hstack((x_constant, lag_x)) name_x += USER.set_name_spatial_lags(name_x, slx_lags) @@ -387,14 +380,14 @@ def __init__( "All coefficients must vary across regimes if regime_err_sep = True." ) else: - x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) - name_x = USER.set_name_x(name_x, x_constant) - self.x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( + x_constant1 = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) + name_x = USER.set_name_x(name_x, x_constant, constant=True) + self.x, self.name_x, xtype, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, - constant_regi=None, - cols2regi=cols2regi, + constant_regi=constant_regi, + cols2regi=cols2regi[1:], names=name_x, rlist=True, ) @@ -418,7 +411,7 @@ def __init__( self.iteration, eps = 0, 1 while self.iteration < max_iter and eps > epsilon: # 2a. reg -->\hat{betas} - xs = UTILS.get_spFilter(w, lambda_old, x_constant) + xs = UTILS.get_spFilter(w, lambda_old, x_constant1) ys = UTILS.get_spFilter(w, lambda_old, y) xs = REGI.Regimes_Frame.__init__( self, xs, regimes, constant_regi=None, cols2regi=cols2regi @@ -446,36 +439,24 @@ def __init__( if slx_lags == 0: self.title = "GM SPATIALLY WEIGHTED MODEL (HET) - REGIMES" else: - self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (Error-HET) - REGIMES" - + self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (Error-HET) - REGIMES" + self.name_x.append("lambda") self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} - self.output = pd.DataFrame(self.name_x, columns=["var_names"]) - self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] - self.output["regime"] = x_rlist + ["_Global"] - self.output["equation"] = 0 + self.output = pd.DataFrame(self.name_x, + columns=['var_names']) + self.output['var_type'] = xtype + ['lambda'] + self.output['regime'] = x_rlist + ['_Global'] + self.output['equation'] = 0 self.other_top = _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _error_het_regimes_multi( - self, - y, - x, - regimes, - w, - slx_lags, - cores, - max_iter, - epsilon, - step1c, - cols2regi, - vm, - name_x, - latex, - hard_bound, + self, y, x, regimes, w, slx_lags, cores, max_iter, epsilon, step1c, cols2regi, vm, name_x, latex, hard_bound ): + regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -557,9 +538,7 @@ def _error_het_regimes_multi( results = {} self.name_y, self.name_x = [], [] counter = 0 - self.output = pd.DataFrame( - columns=["var_names", "var_type", "regime", "equation"] - ) + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -576,34 +555,26 @@ def _error_het_regimes_multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ - r - ].betas - self.u[regi_ids[r],] = results[r].u - self.predy[regi_ids[r],] = results[r].predy - self.e_filtered[regi_ids[r],] = results[r].e_filtered + self.betas[ + (counter * self.kr) : ((counter + 1) * self.kr), + ] = results[r].betas + self.u[ + regi_ids[r], + ] = results[r].u + self.predy[ + regi_ids[r], + ] = results[r].predy + self.e_filtered[ + regi_ids[r], + ] = results[r].e_filtered self.name_y += results[r].name_y - results[r].name_x = [ - str(r) + "_lambda" if value == "lambda" else value - for value in results[r].name_x - ] + results[r].name_x = [str(r) + '_lambda' if value == 'lambda' else value for value in results[r].name_x] self.name_x += results[r].name_x results[r].other_top = _summary_iteration(results[r]) - self.output = pd.concat( - [ - self.output, - pd.DataFrame( - { - "var_names": results[r].name_x, - "var_type": ["x"] * (len(results[r].name_x) - 1) - + ["lambda"], - "regime": r, - "equation": r, - } - ), - ], - ignore_index=True, - ) + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, + 'var_type': ['o'] + ['x'] * (len(results[r].name_x)-2) + + ['lambda'], + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -611,6 +582,7 @@ def _error_het_regimes_multi( class GM_Endog_Error_Het_Regimes(RegressionPropsY, REGI.Regimes_Frame): + """ GMM method for a spatial error model with heteroskedasticity, regimes and endogenous variables, with results and diagnostics; based on Arraiz et al @@ -966,6 +938,7 @@ def __init__( latex=False, hard_bound=False, ): + n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -1030,21 +1003,21 @@ def __init__( "All coefficients must vary across regimes if regime_err_sep = True." ) else: - x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) - name_x = USER.set_name_x(name_x, x_constant) - q, name_q = REGI.Regimes_Frame.__init__( + x_constant1 = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) + name_x = USER.set_name_x(name_x, x_constant, constant=True) + q, name_q, _ = REGI.Regimes_Frame.__init__( self, q, regimes, constant_regi=None, cols2regi="all", names=name_q ) - x, name_x, x_rlist = REGI.Regimes_Frame.__init__( + x, name_x, xtype, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, - constant_regi=None, - cols2regi=cols2regi, + constant_regi=constant_regi, + cols2regi=cols2regi[1:], names=name_x, rlist=True, ) - yend2, name_yend, yend_rlist = REGI.Regimes_Frame.__init__( + yend2, name_yend, xtypeyd, yend_rlist = REGI.Regimes_Frame.__init__( self, yend, regimes, @@ -1088,7 +1061,7 @@ def __init__( self.iteration, eps = 0, 1 while self.iteration < max_iter and eps > epsilon: # 2a. reg -->\hat{betas} - xs = UTILS.get_spFilter(w, lambda1, x_constant) + xs = UTILS.get_spFilter(w, lambda1, x_constant1) xs = REGI.Regimes_Frame.__init__( self, xs, regimes, constant_regi=None, cols2regi=cols2regi )[0] @@ -1141,20 +1114,17 @@ def __init__( self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} - self.output = pd.DataFrame(self.name_z, columns=["var_names"]) - self.output["var_type"] = ( - ["x"] * len(self.name_x) + ["yend"] * len(self.name_yend) + ["lambda"] - ) - self.output["regime"] = x_rlist + yend_rlist + ["_Global"] - self.output["equation"] = 0 + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = xtype + xtypeyd + ['lambda'] + self.output['regime'] = x_rlist + yend_rlist + ['_Global'] + self.output['equation'] = 0 if summ: self.other_top = _summary_iteration(self) if slx_lags == 0: - self.title = "GM SPATIALLY WEIGHTED 2SLS (HET) - REGIMES" + self.title = ("GM SPATIALLY WEIGHTED 2SLS (HET) - REGIMES") else: - self.title = ( - "GM SPATIALLY WEIGHTED 2SLS + SLX (Error-HET) - REGIMES" - ) + self.title = ("GM SPATIALLY WEIGHTED 2SLS + SLX (Error-HET) - REGIMES") output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _endog_error_het_regimes_multi( @@ -1180,6 +1150,7 @@ def _endog_error_het_regimes_multi( latex, hard_bound, ): + regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -1284,9 +1255,7 @@ def _endog_error_het_regimes_multi( self.name_h, ) = ([], [], [], [], [], []) counter = 0 - self.output = pd.DataFrame( - columns=["var_names", "var_type", "regime", "equation"] - ) + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -1303,12 +1272,18 @@ def _endog_error_het_regimes_multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ - r - ].betas - self.u[regi_ids[r],] = results[r].u - self.predy[regi_ids[r],] = results[r].predy - self.e_filtered[regi_ids[r],] = results[r].e_filtered + self.betas[ + (counter * self.kr) : ((counter + 1) * self.kr), + ] = results[r].betas + self.u[ + regi_ids[r], + ] = results[r].u + self.predy[ + regi_ids[r], + ] = results[r].predy + self.e_filtered[ + regi_ids[r], + ] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x self.name_yend += results[r].name_yend @@ -1316,36 +1291,21 @@ def _endog_error_het_regimes_multi( self.name_z += results[r].name_z self.name_h += results[r].name_h if add_lag != False: - self.predy_e[regi_ids[r],] = results[r].predy_e - self.e_pred[regi_ids[r],] = results[r].e_pred + self.predy_e[ + regi_ids[r], + ] = results[r].predy_e + self.e_pred[ + regi_ids[r], + ] = results[r].e_pred results[r].other_top = _spat_pseudo_r2(results[r]) - v_type = ( - ["x"] * len(results[r].name_x) - + ["yend"] * (len(results[r].name_yend) - 1) - + ["rho", "lambda"] - ) + v_type = ['o'] + ['x'] * (len(results[r].name_x)-1) + ['yend'] * (len(results[r].name_yend)-1) + ['rho','lambda'] else: results[r].other_top = "" - v_type = ( - ["x"] * len(results[r].name_x) - + ["yend"] * len(results[r].name_yend) - + ["lambda"] - ) + v_type = ['o'] + ['x'] * (len(results[r].name_x)-1) + ['yend'] * len(results[r].name_yend) + ['lambda'] results[r].other_top += _summary_iteration(results[r]) - self.output = pd.concat( - [ - self.output, - pd.DataFrame( - { - "var_names": results[r].name_z, - "var_type": v_type, - "regime": r, - "equation": r, - } - ), - ], - ignore_index=True, - ) + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_z, + 'var_type': v_type, + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -1353,6 +1313,7 @@ def _endog_error_het_regimes_multi( class GM_Combo_Het_Regimes(GM_Endog_Error_Het_Regimes): + """ GMM method for a spatial lag and error model with heteroskedasticity, regimes and endogenous variables, with results and diagnostics; @@ -1399,7 +1360,7 @@ class GM_Combo_Het_Regimes(GM_Endog_Error_Het_Regimes): the spatial parameter is fixed across regimes. slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the GNSM type. + If slx_lags>0, the specification becomes of the GNSM type. w_lags : integer Orders of W to include as instruments for the spatially lagged dependent variable. For example, w_lags=1, then @@ -1751,10 +1712,10 @@ def __init__( hard_bound=False, ): if regime_lag_sep and not regime_err_sep: - set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") + set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") regime_err_sep = True if regime_err_sep and not regime_lag_sep: - set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") + set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") regime_err_sep = False n = USER.check_arrays(y, x) self.step1c = step1c @@ -1771,35 +1732,20 @@ def __init__( regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) if regime_err_sep and any(col != True for col in cols2regi): - set_warn( - self, - "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.", - ) + set_warn(self, "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.") cols2regi = "all" if slx_lags > 0: - yend2, q2, wx = set_endog( - y, x_constant, w, yend, q, w_lags, lag_q, slx_lags - ) + yend2, q2, wx = set_endog(y, x_constant, w, yend, q, w_lags, lag_q, slx_lags) x_constant = np.hstack((x_constant, wx)) name_slx = USER.set_name_spatial_lags(name_x, slx_lags) - name_q.extend( - USER.set_name_q_sp( - name_slx[-len(name_x) :], w_lags, name_q, lag_q, force_all=True - ) - ) - name_x += name_slx - cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False - ) + name_q.extend(USER.set_name_q_sp(name_slx[-len(name_x):], w_lags, name_q, lag_q, force_all=True)) + name_x += name_slx + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False) else: - name_q.extend( - USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True) - ) + name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) yend2, q2 = yend, q - cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False - ) + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False) self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes @@ -1861,10 +1807,8 @@ def __init__( if slx_lags == 0: self.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HET) - REGIMES" else: - self.title = ( - "GM SPATIALLY WEIGHTED 2SLS-COMBO + SLX (GNSM-HET) - REGIMES" - ) - self.output.iat[-2, self.output.columns.get_loc("var_type")] = "rho" + self.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO + SLX (GNSM-HET) - REGIMES" + self.output.iat[-2, self.output.columns.get_loc('var_type')] = 'rho' self.other_top = _spat_pseudo_r2(self) self.other_top += _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) @@ -1891,13 +1835,7 @@ def _work_error( y_r = y[regi_ids[r]] x_r = x[regi_ids[r]] model = BaseGM_Error_Het( - y_r, - x_r, - w_r.sparse, - max_iter=max_iter, - epsilon=epsilon, - step1c=step1c, - hard_bound=hard_bound, + y_r, x_r, w_r.sparse, max_iter=max_iter, epsilon=epsilon, step1c=step1c, hard_bound=hard_bound, ) set_warn(model, warn) model.w = w_r @@ -1967,22 +1905,22 @@ def _work_endog_error( w_r, model.y, model.predy, model.yend[:, -1].reshape(model.n, 1), model.rho ) set_warn(model, warn) - + if slx_lags == 0: if add_lag != False: - model.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HET)- REGIME %s" % r + model.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HET)- REGIME %s" % r else: model.title = "GM SPATIALLY WEIGHTED 2SLS (HET) - REGIME %s" % r else: if add_lag != False: - model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM-HET) - REGIME %s" % r + model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM-HET) - REGIME %s" % r else: model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (Error-HET) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] model.name_yend = ["%s_%s" % (str(r), i) for i in name_yend] - model.name_z = model.name_x + model.name_yend + [str(r) + "lambda"] + model.name_z = model.name_x + model.name_yend + [str(r)+"lambda"] model.name_q = ["%s_%s" % (str(r), i) for i in name_q] model.name_h = model.name_x + model.name_q model.name_w = name_w @@ -2005,9 +1943,9 @@ def _test(): import numpy as np import libpysal - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') y = np.array(db.by_col("HOVAL")) - y = np.reshape(y, (49, 1)) + y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) X = np.array(X).T @@ -2018,31 +1956,16 @@ def _test(): q.append(db.by_col("DISCBD")) q = np.array(q).T - r_var = "NSA" + r_var = 'NSA' regimes = db.by_col(r_var) w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) - w.transform = "r" - # reg = GM_Error_Het_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True, + w.transform = 'r' + #reg = GM_Error_Het_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True, # regime_err_sep=True) - # reg = GM_Endog_Error_Het_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + #reg = GM_Endog_Error_Het_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], # name_q=['discbd'], name_ds='columbus',vm=True, regime_err_sep=True) - reg = GM_Combo_Het_Regimes( - y, - X, - regimes, - yd, - q, - w=w, - step1c=True, - name_x=["inc"], - name_y="hoval", - name_yend=["crime"], - name_q=["discbd"], - name_ds="columbus", - vm=True, - regime_err_sep=False, - regime_lag_sep=False, - ) + reg = GM_Combo_Het_Regimes(y, X, regimes, yd, q, w=w, step1c=True, name_x=['inc'], name_y='hoval', name_yend=['crime'], + name_q=['discbd'], name_ds='columbus', vm=True, regime_err_sep=False, regime_lag_sep=False) print(reg.output) - print(reg.summary) + print(reg.summary) \ No newline at end of file diff --git a/spreg/error_sp_hom.py b/spreg/error_sp_hom.py index b63908f..37aab0d 100644 --- a/spreg/error_sp_hom.py +++ b/spreg/error_sp_hom.py @@ -1469,7 +1469,7 @@ def __init__( # var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] var_types = ( - ["x"] * (kx + 1) + ["o"]+["x"] * kx + ["wx"] * wkx * slx_lags + ["yend"] * (len(self.name_yend) - 1) + ["rho", "lambda"] @@ -1479,7 +1479,7 @@ def __init__( USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q) ) var_types = ( - ["x"] * len(self.name_x) + ["o"]+["x"] * (len(self.name_x)-1) + ["yend"] * (len(self.name_yend) - 1) + ["rho", "lambda"] ) diff --git a/spreg/error_sp_hom_regimes.py b/spreg/error_sp_hom_regimes.py index 2fcba35..279f865 100644 --- a/spreg/error_sp_hom_regimes.py +++ b/spreg/error_sp_hom_regimes.py @@ -1,5 +1,5 @@ """ -Hom family of models with regimes. +Hom family of models with regimes. """ __author__ = "Luc Anselin luc.anselin@asu.edu, Pedro V. Amaral pedro.amaral@asu.edu, Daniel Arribas-Bel darribas@asu.edu" @@ -30,8 +30,8 @@ import pandas as pd from .output import output, _summary_iteration, _spat_pseudo_r2 - class GM_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): + """ GMM method for a spatial error model with homoskedasticity, with regimes, results and diagnostics; based on Drukker et al. (2013) :cite:`Drukker2013`, following @@ -336,6 +336,7 @@ def __init__( latex=False, hard_bound=False, ): + n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -354,7 +355,7 @@ def __init__( set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) - if slx_lags > 0: + if slx_lags >0: lag_x = get_lags(w, x_constant, slx_lags) x_constant = np.hstack((x_constant, lag_x)) name_x += USER.set_name_spatial_lags(name_x, slx_lags) @@ -390,8 +391,8 @@ def __init__( "All coefficients must vary across regimes if regime_err_sep = True." ) else: - x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) - name_x = USER.set_name_x(name_x, x_constant) + x_constant1 = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) + name_x = USER.set_name_x(name_x, x_constant, constant=True) if A1 == "hom": wA1 = get_A1_hom(w.sparse) elif A1 == "hom_sc": @@ -402,12 +403,12 @@ def __init__( wA2 = get_A2_hom(w.sparse) # 1a. OLS --> \tilde{\delta} - self.x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( + self.x, self.name_x, xtype, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, - constant_regi=None, - cols2regi=cols2regi, + constant_regi=constant_regi, + cols2regi=cols2regi[1:], names=name_x, rlist=True, ) @@ -422,7 +423,7 @@ def __init__( self.iteration, eps = 0, 1 while self.iteration < max_iter and eps > epsilon: # 2a. SWLS --> \hat{\delta} - xs = get_spFilter(w, lambda1, x_constant) + xs = get_spFilter(w, lambda1, x_constant1) ys = get_spFilter(w, lambda1, y) xs = REGI.Regimes_Frame.__init__( self, xs, regimes, constant_regi=None, cols2regi=cols2regi @@ -450,36 +451,23 @@ def __init__( if slx_lags == 0: self.title = "GM SPATIALLY WEIGHTED MODEL (HOM) - REGIMES" else: - self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (Error-HOM) - REGIMES" + self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (Error-HOM) - REGIMES" self.name_x.append("lambda") self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} self.A1 = A1 - self.output = pd.DataFrame(self.name_x, columns=["var_names"]) - self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] - self.output["regime"] = x_rlist + ["_Global"] - self.output["equation"] = 0 + self.output = pd.DataFrame(self.name_x, + columns=['var_names']) + self.output['var_type'] = xtype + ['lambda'] + self.output['regime'] = x_rlist + ['_Global'] + self.output['equation'] = 0 self.other_top = _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _error_hom_regimes_multi( - self, - y, - x, - regimes, - w, - slx_lags, - cores, - max_iter, - epsilon, - A1, - cols2regi, - vm, - name_x, - latex, - hard_bound, - ): + self, y, x, regimes, w, slx_lags, cores, max_iter, epsilon, A1, cols2regi, vm, name_x, latex, hard_bound): + regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -561,9 +549,7 @@ def _error_hom_regimes_multi( results = {} counter = 0 - self.output = pd.DataFrame( - columns=["var_names", "var_type", "regime", "equation"] - ) + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -580,31 +566,26 @@ def _error_hom_regimes_multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ - r - ].betas - self.u[regi_ids[r],] = results[r].u - self.predy[regi_ids[r],] = results[r].predy - self.e_filtered[regi_ids[r],] = results[r].e_filtered + self.betas[ + (counter * self.kr) : ((counter + 1) * self.kr), + ] = results[r].betas + self.u[ + regi_ids[r], + ] = results[r].u + self.predy[ + regi_ids[r], + ] = results[r].predy + self.e_filtered[ + regi_ids[r], + ] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x results[r].A1 = A1 results[r].other_top = _summary_iteration(results[r]) - self.output = pd.concat( - [ - self.output, - pd.DataFrame( - { - "var_names": results[r].name_x, - "var_type": ["x"] * (len(results[r].name_x) - 1) - + ["lambda"], - "regime": r, - "equation": r, - } - ), - ], - ignore_index=True, - ) + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, + 'var_type': ['o'] + ['x'] * (len(results[r].name_x) - 2) + + ['lambda'], + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -612,6 +593,7 @@ def _error_hom_regimes_multi( class GM_Endog_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): + """ GMM method for a spatial error model with homoskedasticity, regimes and endogenous variables. @@ -972,6 +954,7 @@ def __init__( latex=False, hard_bound=False, ): + n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -1035,21 +1018,21 @@ def __init__( "All coefficients must vary across regimes if regime_err_sep = True." ) else: - x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) - name_x = USER.set_name_x(name_x, x_constant) - q, name_q = REGI.Regimes_Frame.__init__( + x_constant1 = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) + name_x = USER.set_name_x(name_x, x_constant, constant=True) + q, name_q, _ = REGI.Regimes_Frame.__init__( self, q, regimes, constant_regi=None, cols2regi="all", names=name_q ) - x, name_x, x_rlist = REGI.Regimes_Frame.__init__( + x, name_x, xtype, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, - constant_regi=None, - cols2regi=cols2regi, + constant_regi=constant_regi, + cols2regi=cols2regi[1:], names=name_x, rlist=True, ) - yend2, name_yend, yend_rlist = REGI.Regimes_Frame.__init__( + yend2, name_yend, xtypeyd, yend_rlist = REGI.Regimes_Frame.__init__( self, yend, regimes, @@ -1082,7 +1065,7 @@ def __init__( self.iteration, eps = 0, 1 while self.iteration < max_iter and eps > epsilon: # 2a. GS2SLS --> \hat{\delta} - xs = get_spFilter(w, lambda1, x_constant) + xs = get_spFilter(w, lambda1, x_constant1) xs = REGI.Regimes_Frame.__init__( self, xs, regimes, constant_regi=None, cols2regi=cols2regi )[0] @@ -1126,20 +1109,17 @@ def __init__( self.chow = REGI.Chow(self) self._cache = {} self.A1 = A1 - self.output = pd.DataFrame(self.name_z, columns=["var_names"]) - self.output["var_type"] = ( - ["x"] * len(self.name_x) + ["yend"] * len(self.name_yend) + ["lambda"] - ) - self.output["regime"] = x_rlist + yend_rlist + ["_Global"] - self.output["equation"] = 0 + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = xtype + xtypeyd + ['lambda'] + self.output['regime'] = x_rlist + yend_rlist + ['_Global'] + self.output['equation'] = 0 if summ: self.other_top = _summary_iteration(self) if slx_lags == 0: - self.title = "GM SPATIALLY WEIGHTED 2SLS (HOM) - REGIMES" + self.title = ("GM SPATIALLY WEIGHTED 2SLS (HOM) - REGIMES") else: - self.title = ( - "GM SPATIALLY WEIGHTED 2SLS WITH SLX (Error-HOM) - REGIMES" - ) + self.title = ("GM SPATIALLY WEIGHTED 2SLS WITH SLX (Error-HOM) - REGIMES") output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _endog_error_hom_regimes_multi( @@ -1164,6 +1144,7 @@ def _endog_error_hom_regimes_multi( latex, hard_bound, ): + regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -1266,9 +1247,7 @@ def _endog_error_hom_regimes_multi( self.name_h, ) = ([], [], [], [], [], []) counter = 0 - self.output = pd.DataFrame( - columns=["var_names", "var_type", "regime", "equation"] - ) + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -1285,12 +1264,18 @@ def _endog_error_hom_regimes_multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ - r - ].betas - self.u[regi_ids[r],] = results[r].u - self.predy[regi_ids[r],] = results[r].predy - self.e_filtered[regi_ids[r],] = results[r].e_filtered + self.betas[ + (counter * self.kr) : ((counter + 1) * self.kr), + ] = results[r].betas + self.u[ + regi_ids[r], + ] = results[r].u + self.predy[ + regi_ids[r], + ] = results[r].predy + self.e_filtered[ + regi_ids[r], + ] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x self.name_yend += results[r].name_yend @@ -1298,37 +1283,22 @@ def _endog_error_hom_regimes_multi( self.name_z += results[r].name_z self.name_h += results[r].name_h if add_lag != False: - self.predy_e[regi_ids[r],] = results[r].predy_e - self.e_pred[regi_ids[r],] = results[r].e_pred + self.predy_e[ + regi_ids[r], + ] = results[r].predy_e + self.e_pred[ + regi_ids[r], + ] = results[r].e_pred results[r].other_top = _spat_pseudo_r2(results[r]) - v_type = ( - ["x"] * len(results[r].name_x) - + ["yend"] * (len(results[r].name_yend) - 1) - + ["rho", "lambda"] - ) + v_type = ['o'] + ['x'] * (len(results[r].name_x)-1) + ['yend'] * (len(results[r].name_yend)-1) + ['rho','lambda'] else: results[r].other_top = "" - v_type = ( - ["x"] * len(results[r].name_x) - + ["yend"] * len(results[r].name_yend) - + ["lambda"] - ) + v_type = ['o'] + ['x'] * (len(results[r].name_x)-1) + ['yend'] * len(results[r].name_yend) + ['lambda'] results[r].A1 = A1 results[r].other_top += _summary_iteration(results[r]) - self.output = pd.concat( - [ - self.output, - pd.DataFrame( - { - "var_names": results[r].name_z, - "var_type": v_type, - "regime": r, - "equation": r, - } - ), - ], - ignore_index=True, - ) + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_z, + 'var_type': v_type, + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -1336,6 +1306,7 @@ def _endog_error_hom_regimes_multi( class GM_Combo_Hom_Regimes(GM_Endog_Error_Hom_Regimes): + """ GMM method for a spatial lag and error model with homoskedasticity, regimes and endogenous variables, with results and diagnostics; @@ -1383,7 +1354,7 @@ class GM_Combo_Hom_Regimes(GM_Endog_Error_Hom_Regimes): the spatial parameter is fixed across regimes. slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the GNSM type. + If slx_lags>0, the specification becomes of the GNSM type. w_lags : integer Orders of W to include as instruments for the spatially lagged dependent variable. For example, w_lags=1, then @@ -1738,11 +1709,12 @@ def __init__( latex=False, hard_bound=False, ): + if regime_lag_sep and not regime_err_sep: - set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") + set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") regime_err_sep = True if regime_err_sep and not regime_lag_sep: - set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") + set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") regime_err_sep = False n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) @@ -1758,36 +1730,21 @@ def __init__( regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) if regime_err_sep and any(col != True for col in cols2regi): - set_warn( - self, - "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.", - ) + set_warn(self, "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.") cols2regi = "all" if slx_lags > 0: - yend2, q2, wx = set_endog( - y, x_constant, w, yend, q, w_lags, lag_q, slx_lags - ) + yend2, q2, wx = set_endog(y, x_constant, w, yend, q, w_lags, lag_q, slx_lags) x_constant = np.hstack((x_constant, wx)) name_slx = USER.set_name_spatial_lags(name_x, slx_lags) - name_q.extend( - USER.set_name_q_sp( - name_slx[-len(name_x) :], w_lags, name_q, lag_q, force_all=True - ) - ) - name_x += name_slx - cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False - ) + name_q.extend(USER.set_name_q_sp(name_slx[-len(name_x):], w_lags, name_q, lag_q, force_all=True)) + name_x += name_slx + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False) else: - name_q.extend( - USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True) - ) + name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) yend2, q2 = yend, q - cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False - ) - + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False) + self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes USER.check_regimes(self.regimes_set, n, x_constant.shape[1]) @@ -1806,6 +1763,7 @@ def __init__( if slx_lags == 0: yend2, q2 = set_endog(y, x_constant, w, yend2, q2, w_lags, lag_q) + name_yend.append(USER.set_name_yend_sp(self.name_y)) GM_Endog_Error_Hom_Regimes.__init__( @@ -1847,10 +1805,8 @@ def __init__( if slx_lags == 0: self.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HOM) - REGIMES" else: - self.title = ( - "GM SPATIALLY WEIGHTED 2SLS-COMBO WITH SLX (GNSM-HOM) - REGIMES" - ) - self.output.iat[-2, self.output.columns.get_loc("var_type")] = "rho" + self.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO WITH SLX (GNSM-HOM) - REGIMES" + self.output.iat[-2, self.output.columns.get_loc('var_type')] = 'rho' self.other_top = _spat_pseudo_r2(self) self.other_top += _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) @@ -1877,20 +1833,14 @@ def _work_error( y_r = y[regi_ids[r]] x_r = x[regi_ids[r]] model = BaseGM_Error_Hom( - y_r, - x_r, - w_r.sparse, - max_iter=max_iter, - epsilon=epsilon, - A1=A1, - hard_bound=hard_bound, + y_r, x_r, w_r.sparse, max_iter=max_iter, epsilon=epsilon, A1=A1, hard_bound=hard_bound, ) set_warn(model, warn) model.w = w_r if slx_lags == 0: model.title = "GM SPATIALLY WEIGHTED 2SLS (HOM) - REGIME %s" % r else: - model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (Error-HOM) - REGIME %s" % r + model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (Error-HOM) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] @@ -1935,15 +1885,7 @@ def _work_endog_error( ) x_constant = USER.check_constant(x_r) model = BaseGM_Endog_Error_Hom( - y_r, - x_r, - yend_r, - q_r, - w_r.sparse, - max_iter=max_iter, - epsilon=epsilon, - A1=A1, - hard_bound=hard_bound, + y_r, x_r, yend_r, q_r, w_r.sparse, max_iter=max_iter, epsilon=epsilon, A1=A1, hard_bound=hard_bound, ) set_warn(model, warn) if add_lag != False: @@ -1954,19 +1896,19 @@ def _work_endog_error( set_warn(model, warn) if slx_lags == 0: if add_lag != False: - model.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HOM)- REGIME %s" % r + model.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HOM)- REGIME %s" % r else: model.title = "GM SPATIALLY WEIGHTED 2SLS (HOM) - REGIME %s" % r else: if add_lag != False: - model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM-HOM) - REGIME %s" % r + model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM-HOM) - REGIME %s" % r else: model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (Error-HOM) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] model.name_yend = ["%s_%s" % (str(r), i) for i in name_yend] - model.name_z = model.name_x + model.name_yend + [str(r) + "_lambda"] + model.name_z = model.name_x + model.name_yend + [str(r)+"_lambda"] model.name_q = ["%s_%s" % (str(r), i) for i in name_q] model.name_h = model.name_x + model.name_q model.name_w = name_w @@ -1989,9 +1931,9 @@ def _test(): import numpy as np import libpysal - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') y = np.array(db.by_col("HOVAL")) - y = np.reshape(y, (49, 1)) + y = np.reshape(y, (49,1)) X = [] X.append(db.by_col("INC")) X = np.array(X).T @@ -2002,30 +1944,16 @@ def _test(): q.append(db.by_col("DISCBD")) q = np.array(q).T - r_var = "NSA" + r_var = 'NSA' regimes = db.by_col(r_var) w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) - w.transform = "r" - # reg = GM_Error_Hom_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True, + w.transform = 'r' + #reg = GM_Error_Hom_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True, # regime_err_sep=True) - # reg = GM_Endog_Error_Hom_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + #reg = GM_Endog_Error_Hom_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], # name_q=['discbd'], name_ds='columbus',vm=True, regime_err_sep=True) - reg = GM_Combo_Hom_Regimes( - y, - X, - regimes, - yd, - q, - w=w, - name_x=["inc"], - name_y="hoval", - name_yend=["crime"], - name_q=["discbd"], - name_ds="columbus", - vm=True, - regime_err_sep=False, - regime_lag_sep=False, - ) + reg = GM_Combo_Hom_Regimes(y, X, regimes, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + name_q=['discbd'], name_ds='columbus', vm=True, regime_err_sep=False, regime_lag_sep=False) print(reg.output) - print(reg.summary) + print(reg.summary) \ No newline at end of file diff --git a/spreg/error_sp_regimes.py b/spreg/error_sp_regimes.py index e7d4556..258b32d 100644 --- a/spreg/error_sp_regimes.py +++ b/spreg/error_sp_regimes.py @@ -18,19 +18,12 @@ from .sputils import sphstack import pandas as pd from .output import output, _spat_pseudo_r2 -from .error_sp_het_regimes import ( - GM_Error_Het_Regimes, - GM_Endog_Error_Het_Regimes, - GM_Combo_Het_Regimes, -) -from .error_sp_hom_regimes import ( - GM_Error_Hom_Regimes, - GM_Endog_Error_Hom_Regimes, - GM_Combo_Hom_Regimes, -) +from .error_sp_het_regimes import GM_Error_Het_Regimes, GM_Endog_Error_Het_Regimes, GM_Combo_Het_Regimes +from .error_sp_hom_regimes import GM_Error_Hom_Regimes, GM_Endog_Error_Hom_Regimes, GM_Combo_Hom_Regimes class GM_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): + """ GMM method for a spatial error model with regimes, with results and diagnostics; based on Kelejian and Prucha (1998, 1999) :cite:`Kelejian1998` :cite:`Kelejian1999`. @@ -300,14 +293,16 @@ def __init__( name_regimes=None, latex=False, ): + n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) - x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) + x_constant, name_x, warn = USER.check_constant( + x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) - if slx_lags > 0: + if slx_lags >0: lag_x = get_lags(w, x_constant, slx_lags) x_constant = np.hstack((x_constant, lag_x)) name_x += USER.set_name_spatial_lags(name_x, slx_lags) @@ -332,30 +327,22 @@ def __init__( if regime_err_sep == True: if set(cols2regi) == set([True]): self._error_regimes_multi( - y, - x_constant, - regimes, - w, - slx_lags, - cores, - cols2regi, - vm, - name_x, - latex, + y, x_constant, regimes, w, slx_lags, cores, cols2regi, vm, name_x, latex ) else: raise Exception( "All coefficients must vary across regimes if regime_err_sep = True." ) else: - x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) - name_x = USER.set_name_x(name_x, x_constant) - self.x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( + x_constant1 = sphstack( + np.ones((x_constant.shape[0], 1)), x_constant) + name_x = USER.set_name_x(name_x, x_constant, constant=True) + self.x, self.name_x, xtype, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, - constant_regi=None, - cols2regi=cols2regi, + constant_regi=constant_regi, + cols2regi=cols2regi[1:], names=name_x, rlist=True, ) @@ -363,7 +350,7 @@ def __init__( self.k = ols.x.shape[1] moments = _momentsGM_Error(w, ols.u) lambda1 = optim_moments(moments) - xs = get_spFilter(w, lambda1, x_constant) + xs = get_spFilter(w, lambda1, x_constant1) ys = get_spFilter(w, lambda1, y) xs = REGI.Regimes_Frame.__init__( self, xs, regimes, constant_regi=None, cols2regi=cols2regi @@ -380,20 +367,19 @@ def __init__( if slx_lags == 0: self.title = "GM SPATIALLY WEIGHTED MODEL - REGIMES" else: - self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (SLX-Error) - REGIMES" + self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (SLX-Error) - REGIMES" self.name_x.append("lambda") self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} - self.output = pd.DataFrame(self.name_x, columns=["var_names"]) - self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] - self.output["regime"] = x_rlist + ["_Global"] - self.output["equation"] = 0 + self.output = pd.DataFrame(self.name_x, + columns=['var_names']) + self.output['var_type'] = xtype + ['lambda'] + self.output['regime'] = x_rlist + ['_Global'] + self.output['equation'] = 0 output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) - def _error_regimes_multi( - self, y, x, regimes, w, slx_lags, cores, cols2regi, vm, name_x, latex - ): + def _error_regimes_multi(self, y, x, regimes, w, slx_lags, cores, cols2regi, vm, name_x, latex): regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -467,8 +453,7 @@ def _error_regimes_multi( results = {} self.name_y, self.name_x = [], [] self.output = pd.DataFrame( - columns=["var_names", "var_type", "regime", "equation"] - ) + columns=['var_names', 'var_type', 'regime', 'equation']) counter = 0 for r in self.regimes_set: """ @@ -483,32 +468,27 @@ def _error_regimes_multi( results[r] = results_p[r].get() self.vm[ - (counter * self.kr) : ((counter + 1) * self.kr), - (counter * self.kr) : ((counter + 1) * self.kr), + (counter * self.kr): ((counter + 1) * self.kr), + (counter * self.kr): ((counter + 1) * self.kr), ] = results[r].vm - self.betas[(counter * (self.kr + 1)) : ((counter + 1) * (self.kr + 1)),] = ( - results[r].betas - ) - self.u[regi_ids[r],] = results[r].u - self.predy[regi_ids[r],] = results[r].predy - self.e_filtered[regi_ids[r],] = results[r].e_filtered + self.betas[ + (counter * (self.kr + 1)): ((counter + 1) * (self.kr + 1)), + ] = results[r].betas + self.u[ + regi_ids[r], + ] = results[r].u + self.predy[ + regi_ids[r], + ] = results[r].predy + self.e_filtered[ + regi_ids[r], + ] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x - self.output = pd.concat( - [ - self.output, - pd.DataFrame( - { - "var_names": results[r].name_x, - "var_type": ["x"] * (len(results[r].name_x) - 1) - + ["lambda"], - "regime": r, - "equation": r, - } - ), - ], - ignore_index=True, - ) + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, + 'var_type': ['x'] * (len(results[r].name_x) - 1) + + ['lambda'], + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -516,6 +496,7 @@ def _error_regimes_multi( class GM_Endog_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): + """ GMM method for a spatial error model with regimes and endogenous variables, with results and diagnostics; based on Kelejian and Prucha (1998, @@ -835,11 +816,13 @@ def __init__( add_lag=False, latex=False, ): + n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) - x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) + x_constant, name_x, warn = USER.check_constant( + x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) @@ -895,21 +878,22 @@ def __init__( "All coefficients must vary across regimes if regime_err_sep = True." ) else: - x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) - name_x = USER.set_name_x(name_x, x_constant) - q, name_q = REGI.Regimes_Frame.__init__( + x_constant1 = sphstack( + np.ones((x_constant.shape[0], 1)), x_constant) + name_x = USER.set_name_x(name_x, x_constant, constant=True) + q, name_q, _ = REGI.Regimes_Frame.__init__( self, q, regimes, constant_regi=None, cols2regi="all", names=name_q ) - x, name_x, x_rlist = REGI.Regimes_Frame.__init__( + x, name_x, xtype, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, - constant_regi=None, - cols2regi=cols2regi, + constant_regi=constant_regi, + cols2regi=cols2regi[1:], names=name_x, rlist=True, ) - yend2, name_yend, yend_rlist = REGI.Regimes_Frame.__init__( + yend2, name_yend, xtypeyd, yend_rlist = REGI.Regimes_Frame.__init__( self, yend, regimes, @@ -926,7 +910,7 @@ def __init__( self.yend, self.z = tsls.yend, tsls.z moments = _momentsGM_Error(w, tsls.u) lambda1 = optim_moments(moments) - xs = get_spFilter(w, lambda1, x_constant) + xs = get_spFilter(w, lambda1, x_constant1) xs = REGI.Regimes_Frame.__init__( self, xs, regimes, constant_regi=None, cols2regi=cols2regi )[0] @@ -958,20 +942,18 @@ def __init__( self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} - self.output = pd.DataFrame(self.name_z, columns=["var_names"]) - self.output["var_type"] = ( - ["x"] * len(self.name_x) + ["yend"] * len(self.name_yend) + ["lambda"] - ) - self.output["regime"] = x_rlist + yend_rlist + ["_Global"] - self.output["equation"] = 0 + self.output = pd.DataFrame(self.name_z, + columns=['var_names']) + self.output['var_type'] = xtype + xtypeyd + ['lambda'] + self.output['regime'] = x_rlist + yend_rlist + ['_Global'] + self.output['equation'] = 0 if summ: if slx_lags == 0: - self.title = "GM SPATIALLY WEIGHTED 2SLS - REGIMES" + self.title = ("GM SPATIALLY WEIGHTED 2SLS - REGIMES") else: - self.title = ( - "GM SPATIALLY WEIGHTED 2SLS WITH SLX (SLX-Error) - REGIMES" - ) - output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) + self.title = ("GM SPATIALLY WEIGHTED 2SLS WITH SLX (SLX-Error) - REGIMES") + output(reg=self, vm=vm, robust=False, + other_end=False, latex=latex) def _endog_error_regimes_multi( self, @@ -991,6 +973,7 @@ def _endog_error_regimes_multi( add_lag, latex, ): + regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -1086,8 +1069,7 @@ def _endog_error_regimes_multi( ) = ([], [], [], [], [], []) counter = 0 self.output = pd.DataFrame( - columns=["var_names", "var_type", "regime", "equation"] - ) + columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -1101,15 +1083,21 @@ def _endog_error_regimes_multi( results[r] = results_p[r].get() self.vm[ - (counter * self.kr) : ((counter + 1) * self.kr), - (counter * self.kr) : ((counter + 1) * self.kr), + (counter * self.kr): ((counter + 1) * self.kr), + (counter * self.kr): ((counter + 1) * self.kr), ] = results[r].vm - self.betas[(counter * (self.kr + 1)) : ((counter + 1) * (self.kr + 1)),] = ( - results[r].betas - ) - self.u[regi_ids[r],] = results[r].u - self.predy[regi_ids[r],] = results[r].predy - self.e_filtered[regi_ids[r],] = results[r].e_filtered + self.betas[ + (counter * (self.kr + 1)): ((counter + 1) * (self.kr + 1)), + ] = results[r].betas + self.u[ + regi_ids[r], + ] = results[r].u + self.predy[ + regi_ids[r], + ] = results[r].predy + self.e_filtered[ + regi_ids[r], + ] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x self.name_yend += results[r].name_yend @@ -1117,35 +1105,22 @@ def _endog_error_regimes_multi( self.name_z += results[r].name_z self.name_h += results[r].name_h if add_lag != False: - self.predy_e[regi_ids[r],] = results[r].predy_e - self.e_pred[regi_ids[r],] = results[r].e_pred + self.predy_e[ + regi_ids[r], + ] = results[r].predy_e + self.e_pred[ + regi_ids[r], + ] = results[r].e_pred results[r].other_top = _spat_pseudo_r2(results[r]) - v_type = ( - ["x"] * len(results[r].name_x) - + ["yend"] * (len(results[r].name_yend) - 1) - + ["rho", "lambda"] - ) + v_type = ['o'] + ['x'] * (len(results[r].name_x)-1) + ['yend'] * \ + (len(results[r].name_yend) - 1) + ['rho', 'lambda'] else: results[r].other_top = "" - v_type = ( - ["x"] * len(results[r].name_x) - + ["yend"] * len(results[r].name_yend) - + ["lambda"] - ) - self.output = pd.concat( - [ - self.output, - pd.DataFrame( - { - "var_names": results[r].name_z, - "var_type": v_type, - "regime": r, - "equation": r, - } - ), - ], - ignore_index=True, - ) + v_type = ['o'] + ['x'] * (len(results[r].name_x)-1) + ['yend'] * \ + len(results[r].name_yend) + ['lambda'] + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_z, + 'var_type': v_type, + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -1153,6 +1128,7 @@ def _endog_error_regimes_multi( class GM_Combo_Regimes(GM_Endog_Error_Regimes, REGI.Regimes_Frame): + """ GMM method for a spatial lag and error model with regimes and endogenous variables, with results and diagnostics; based on Kelejian and Prucha (1998, @@ -1199,7 +1175,7 @@ class GM_Combo_Regimes(GM_Endog_Error_Regimes, REGI.Regimes_Frame): the spatial parameter is fixed accross regimes. slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the GNSM type. + If slx_lags>0, the specification becomes of the GNSM type. w_lags : integer Orders of W to include as instruments for the spatially lagged dependent variable. For example, w_lags=1, then @@ -1517,16 +1493,17 @@ def __init__( latex=False, ): if regime_lag_sep and not regime_err_sep: - set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") + set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") regime_err_sep = True if regime_err_sep and not regime_lag_sep: - set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") + set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") regime_err_sep = False n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) - x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) + x_constant, name_x, warn = USER.check_constant( + x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) self.name_y = USER.set_name_y(name_y) @@ -1535,35 +1512,20 @@ def __init__( regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) if regime_err_sep and any(col != True for col in cols2regi): - set_warn( - self, - "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.", - ) + set_warn(self, "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.") cols2regi = "all" - + if slx_lags > 0: - yend2, q2, wx = set_endog( - y, x_constant, w, yend, q, w_lags, lag_q, slx_lags - ) + yend2, q2, wx = set_endog(y, x_constant, w, yend, q, w_lags, lag_q, slx_lags) x_constant = np.hstack((x_constant, wx)) name_slx = USER.set_name_spatial_lags(name_x, slx_lags) - name_q.extend( - USER.set_name_q_sp( - name_slx[-len(name_x) :], w_lags, name_q, lag_q, force_all=True - ) - ) - name_x += name_slx - cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False - ) + name_q.extend(USER.set_name_q_sp(name_slx[-len(name_x):], w_lags, name_q, lag_q, force_all=True)) + name_x += name_slx + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False) else: - name_q.extend( - USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True) - ) + name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) yend2, q2 = yend, q - cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False - ) + cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False) self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes @@ -1583,10 +1545,9 @@ def __init__( cols2regi += [False] if slx_lags == 0: yend2, q2 = set_endog(y, x_constant, w, yend2, q2, w_lags, lag_q) - + name_yend.append(USER.set_name_yend_sp(self.name_y)) - print(cols2regi, x_constant.shape[1], yend2.shape[1], name_x, name_yend, name_q) GM_Endog_Error_Regimes.__init__( self, y=y, @@ -1621,25 +1582,18 @@ def __init__( if slx_lags == 0: self.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO MODEL - REGIMES" else: - self.title = ( - "SPATIALLY WEIGHTED 2SLS - GM-COMBO WITH SLX (GNSM) - REGIMES" - ) - self.output.iat[-2, self.output.columns.get_loc("var_type")] = "rho" + self.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO WITH SLX (GNSM) - REGIMES" + self.output.iat[-2, + self.output.columns.get_loc('var_type')] = 'rho' self.other_top = _spat_pseudo_r2(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) -class GMM_Error_Regimes( - GM_Error_Regimes, - GM_Combo_Regimes, - GM_Endog_Error_Regimes, - GM_Error_Het_Regimes, - GM_Combo_Het_Regimes, - GM_Endog_Error_Het_Regimes, - GM_Error_Hom_Regimes, - GM_Combo_Hom_Regimes, - GM_Endog_Error_Hom_Regimes, -): +class GMM_Error_Regimes(GM_Error_Regimes, GM_Combo_Regimes, GM_Endog_Error_Regimes, + GM_Error_Het_Regimes, GM_Combo_Het_Regimes, GM_Endog_Error_Het_Regimes, + GM_Error_Hom_Regimes, GM_Combo_Hom_Regimes, GM_Endog_Error_Hom_Regimes + ): + """ Wrapper function to call any of the GM methods for a spatial error regimes model available in spreg @@ -1684,12 +1638,12 @@ class GMM_Error_Regimes( regime_err_sep: boolean If True, a separate regression is run for each regime. regime_lag_sep: boolean - Always False, kept for consistency, ignored. + Always False, kept for consistency, ignored. add_wy : boolean - If True, then a spatial lag of the dependent variable is included. + If True, then a spatial lag of the dependent variable is included. slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the SLX-Error or GNSM type. + If slx_lags>0, the specification becomes of the SLX-Error or GNSM type. vm : boolean If True, include variance-covariance matrix in summary results @@ -1704,7 +1658,7 @@ class GMM_Error_Regimes( name_yend : list of strings Names of endogenous variables for use in output name_q : list of strings - Names of instruments for use in output + Names of instruments for use in output name_ds : string Name of dataset for use in output latex : boolean @@ -1713,7 +1667,7 @@ class GMM_Error_Regimes( If true, raises an exception if the estimated spatial autoregressive parameter is outside the maximum/minimum bounds. **kwargs : keywords - Additional arguments to pass on to the estimators. + Additional arguments to pass on to the estimators. See the specific functions for details on what can be used. Attributes @@ -1809,12 +1763,12 @@ class GMM_Error_Regimes( name_q : list of strings (optional) Names of external instruments name_h : list of strings (optional) - Names of all instruments used in ouput + Names of all instruments used in ouput multi : dictionary Only available when multiple regressions are estimated, i.e. when regime_err_sep=True and no variable is fixed across regimes. - Contains all attributes of each individual regression + Contains all attributes of each individual regression Examples -------- @@ -1944,339 +1898,74 @@ class GMM_Error_Regimes( 9 1_W_UE90 -0.708492 0.167057 -4.24102 0.000022 10 _Global_W_HR90 1.033956 0.269252 3.840111 0.000123 11 lambda -0.384968 0.192256 -2.002366 0.045245 - + """ def __init__( - self, - y, - x, - regimes, - w, - yend=None, - q=None, - estimator="het", - constant_regi="many", - cols2regi="all", - regime_err_sep=False, - regime_lag_sep=False, - add_wy=False, - slx_lags=0, - vm=False, - name_y=None, - name_x=None, - name_w=None, - name_regimes=None, - name_yend=None, - name_q=None, - name_ds=None, - latex=False, - **kwargs, - ): - if estimator == "het": + self, y, x, regimes, w, yend=None, q=None, estimator='het', constant_regi="many", cols2regi="all", regime_err_sep=False, + regime_lag_sep=False, add_wy=False, slx_lags=0, vm=False, name_y=None, name_x=None, name_w=None, name_regimes=None, name_yend=None, + name_q=None, name_ds=None, latex=False, **kwargs): + + if estimator == 'het': if yend is None and not add_wy: - GM_Error_Het_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - **kwargs, - ) + GM_Error_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) elif yend is not None and not add_wy: - GM_Endog_Error_Het_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - yend=yend, - q=q, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - name_yend=name_yend, - name_q=name_q, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - **kwargs, - ) + GM_Endog_Error_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) elif add_wy: - GM_Combo_Het_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - yend=yend, - q=q, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - regime_lag_sep=regime_lag_sep, - name_yend=name_yend, - name_q=name_q, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - **kwargs, - ) + GM_Combo_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, regime_lag_sep=regime_lag_sep, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) else: - set_warn( - self, - "Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.", - ) - GM_Error_Het_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - ) - elif estimator == "hom": + set_warn(self, 'Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.') + GM_Error_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex) + elif estimator == 'hom': if yend is None and not add_wy: - GM_Error_Hom_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - **kwargs, - ) + GM_Error_Hom_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) elif yend is not None and not add_wy: - GM_Endog_Error_Hom_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - yend=yend, - q=q, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - name_yend=name_yend, - name_q=name_q, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - **kwargs, - ) + GM_Endog_Error_Hom_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) elif add_wy: - GM_Combo_Hom_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - yend=yend, - q=q, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - regime_lag_sep=regime_lag_sep, - name_yend=name_yend, - name_q=name_q, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - **kwargs, - ) + GM_Combo_Hom_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, regime_lag_sep=regime_lag_sep, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) else: - set_warn( - self, - "Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.", - ) - GM_Error_Hom_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - ) - elif estimator == "kp98": + set_warn(self, 'Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.') + GM_Error_Hom_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex) + elif estimator == 'kp98': if yend is None and not add_wy: - GM_Error_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - **kwargs, - ) + GM_Error_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) elif yend is not None and not add_wy: - GM_Endog_Error_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - yend=yend, - q=q, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - name_yend=name_yend, - name_q=name_q, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - **kwargs, - ) + GM_Endog_Error_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) elif add_wy: - GM_Combo_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - yend=yend, - q=q, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - regime_lag_sep=regime_lag_sep, - name_yend=name_yend, - name_q=name_q, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - **kwargs, - ) + GM_Combo_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, regime_lag_sep=regime_lag_sep, + name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) else: - set_warn( - self, - "Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.", - ) - GM_Error_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - ) + set_warn(self, 'Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.') + GM_Error_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex) else: - set_warn( - self, - "Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.", - ) - GM_Error_Het_Regimes.__init__( - self, - y=y, - x=x, - regimes=regimes, - w=w, - slx_lags=slx_lags, - vm=vm, - name_y=name_y, - name_x=name_x, - constant_regi=constant_regi, - cols2regi=cols2regi, - regime_err_sep=regime_err_sep, - name_w=name_w, - name_regimes=name_regimes, - name_ds=name_ds, - latex=latex, - ) - + set_warn(self, 'Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.') + GM_Error_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, + constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, + name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex) def _work_error(y, x, regi_ids, r, w, name_ds, name_y, name_x, name_w, name_regimes): w_r, warn = REGI.w_regime(w, regi_ids[r], r, transform=True) @@ -2329,25 +2018,26 @@ def _work_endog_error( if add_lag != False: model.rho = model.betas[-2] model.predy_e, model.e_pred, warn = sp_att( - w_r, model.y, model.predy, model.yend[:, -1].reshape(model.n, 1), model.rho + w_r, model.y, model.predy, model.yend[:, - + 1].reshape(model.n, 1), model.rho ) set_warn(model, warn) model.w = w_r if slx_lags == 0: if add_lag != False: - model.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO MODEL - REGIME %s" % r + model.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO MODEL - REGIME %s" % r else: model.title = "SPATIALLY WEIGHTED 2SLS (GM) - REGIME %s" % r else: if add_lag != False: - model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM) - REGIME %s" % r + model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM) - REGIME %s" % r else: model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (SLX-Error) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] model.name_yend = ["%s_%s" % (str(r), i) for i in name_yend] - model.name_z = model.name_x + model.name_yend + [str(r) + "_lambda"] + model.name_z = model.name_x + model.name_yend + [str(r)+"_lambda"] model.name_q = ["%s_%s" % (str(r), i) for i in name_q] model.name_h = model.name_x + model.name_q model.name_w = name_w @@ -2369,7 +2059,7 @@ def _test(): import numpy as np import libpysal - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'), 'r') y = np.array(db.by_col("HOVAL")) y = np.reshape(y, (49, 1)) X = [] @@ -2382,29 +2072,17 @@ def _test(): q.append(db.by_col("DISCBD")) q = np.array(q).T - r_var = "NSA" + r_var = 'NSA' regimes = db.by_col(r_var) - w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) - w.transform = "r" + w = libpysal.weights.Rook.from_shapefile( + libpysal.examples.get_path("columbus.shp")) + w.transform = 'r' # reg = GM_Error_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', # regime_err_sep=True) # reg = GM_Endog_Error_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], # name_q=['discbd'], name_ds='columbus', regime_err_sep=True) - reg = GM_Combo_Regimes( - y, - X, - regimes, - yd, - q, - w=w, - name_x=["inc"], - name_y="hoval", - name_yend=["crime"], - name_q=["discbd"], - name_ds="columbus", - regime_err_sep=True, - regime_lag_sep=True, - ) + reg = GM_Combo_Regimes(y, X, regimes, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + name_q=['discbd'], name_ds='columbus', regime_err_sep=True, regime_lag_sep=True) print(reg.output) print(reg.summary) diff --git a/spreg/ml_error.py b/spreg/ml_error.py index f905d6f..f35ec78 100644 --- a/spreg/ml_error.py +++ b/spreg/ml_error.py @@ -31,6 +31,7 @@ class BaseML_Error(RegressionPropsY, RegressionPropsVM, REGI.Regimes_Frame): + """ ML estimation of the spatial error model (note no consistency checks, diagnostics or constants added): :cite:`Anselin1988` @@ -267,7 +268,7 @@ def __init__(self, y, x, w, method="full", epsilon=0.0000001, regimes_att=None): tr3 = waiTwai.diagonal().sum() v1 = np.vstack((tr2 + tr3, tr1 / self.sig2)) - v2 = np.vstack((tr1 / self.sig2, self.n / (2.0 * self.sig2**2))) + v2 = np.vstack((tr1 / self.sig2, self.n / (2.0 * self.sig2 ** 2))) v = np.hstack((v1, v2)) @@ -296,6 +297,7 @@ def get_x_lag(self, w, regimes_att): class ML_Error(BaseML_Error): + """ ML estimation of the spatial error model with all results and diagnostics; :cite:`Anselin1988` @@ -313,7 +315,11 @@ class ML_Error(BaseML_Error): Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the SLX-Error type. slx_vars : either "All" (default) or list of booleans to select x variables - to be lagged + to be lagged + regimes : list or pandas.Series + List of n values with the mapping of each + observation to a regime. Assumed to be aligned with 'x'. + For other regimes-specific arguments, see ML_Error_Regimes method : string if 'full', brute force calculation (full matrix expressions) if 'ord', Ord eigenvalue method @@ -473,6 +479,7 @@ def __init__( w, slx_lags=0, slx_vars="All", + regimes=None, method="full", epsilon=0.0000001, vm=False, @@ -481,48 +488,61 @@ def __init__( name_w=None, name_ds=None, latex=False, + **kwargs ): - n = USER.check_arrays(y, x) - y, name_y = USER.check_y(y, n, name_y) - w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) - x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x( - name_x, x_constant - ) # initialize in case None includes constant - set_warn(self, warn) - self.title = "ML SPATIAL ERROR" - if slx_lags > 0: + if regimes is not None: + from .ml_error_regimes import ML_Error_Regimes + self.__class__ = ML_Error_Regimes + self.__init__( + y=y, + x=x, + regimes=regimes, + w=w, + slx_lags=slx_lags, + method=method, + epsilon=epsilon, + vm=vm, + name_y=name_y, + name_x=name_x, + name_w=name_w, + name_ds=name_ds, + latex=latex, + **kwargs, + ) + else: + n = USER.check_arrays(y, x) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + x_constant, name_x, warn = USER.check_constant(x, name_x) + name_x = USER.set_name_x(name_x, x_constant) # initialize in case None includes constant + set_warn(self, warn) + self.title = "ML SPATIAL ERROR" + if slx_lags >0: # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) # x_constant = np.hstack((x_constant, lag_x)) - # name_x += USER.set_name_spatial_lags(name_x, slx_lags) + # name_x += USER.set_name_spatial_lags(name_x, slx_lags) # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant from name_x - x_constant, name_x = USER.flex_wx( - w, - x=x_constant, - name_x=name_x, - constant=True, - slx_lags=slx_lags, - slx_vars=slx_vars, + x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + self.title += " WITH SLX (SLX-Error)" + self.title += " (METHOD = " + method + ")" + + method = method.upper() + BaseML_Error.__init__( + self, y=y, x=x_constant, w=w, method=method, epsilon=epsilon ) - self.title += " WITH SLX (SLX-Error)" - self.title += " (METHOD = " + method + ")" - - method = method.upper() - BaseML_Error.__init__( - self, y=y, x=x_constant, w=w, method=method, epsilon=epsilon - ) - self.name_ds = USER.set_name_ds(name_ds) - self.name_y = USER.set_name_y(name_y) - self.name_x = name_x - self.name_x.append("lambda") - self.name_w = USER.set_name_w(name_w, w) - self.aic = DIAG.akaike(reg=self) - self.schwarz = DIAG.schwarz(reg=self) - self.output = pd.DataFrame(self.name_x, columns=["var_names"]) - self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] - self.output["regime"], self.output["equation"] = (0, 0) - self.other_top = _nonspat_top(self, ml=True) - output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) + self.name_ds = USER.set_name_ds(name_ds) + self.name_y = USER.set_name_y(name_y) + self.name_x = name_x + self.name_x.append("lambda") + self.name_w = USER.set_name_w(name_w, w) + self.aic = DIAG.akaike(reg=self) + self.schwarz = DIAG.schwarz(reg=self) + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = ['o'] + ['x'] * (len(self.name_x)-2) + ['lambda'] + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _nonspat_top(self, ml=True) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def err_c_loglik(lam, n, y, ylag, x, xlag, W): @@ -600,7 +620,6 @@ def _test(): doctest.testmod() np.set_printoptions(suppress=start_suppress) - if __name__ == "__main__": _test() diff --git a/spreg/ml_error_regimes.py b/spreg/ml_error_regimes.py index 61c561f..935f092 100644 --- a/spreg/ml_error_regimes.py +++ b/spreg/ml_error_regimes.py @@ -21,6 +21,7 @@ class ML_Error_Regimes(BaseML_Error, REGI.Regimes_Frame): + """ ML estimation of the spatial error model with regimes (note no consistency checks, diagnostics or constants added); :cite:`Anselin1988` @@ -299,6 +300,7 @@ def __init__( name_regimes=None, latex=False, ): + n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -315,7 +317,7 @@ def __init__( set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) - if slx_lags > 0: + if slx_lags >0: lag_x = get_lags(w, x_constant, slx_lags) x_constant = np.hstack((x_constant, lag_x)) name_x += USER.set_name_spatial_lags(name_x, slx_lags) @@ -323,7 +325,7 @@ def __init__( self.name_x_r = USER.set_name_x(name_x, x_constant) cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant) - self.cols2regi = cols2regi + self.cols2regi = cols2regi self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes USER.check_regimes(self.regimes_set, self.n, x.shape[1]) @@ -350,20 +352,18 @@ def __init__( "All coefficients must vary across regimes if regime_err_sep = True." ) else: - x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) - name_x = USER.set_name_x(name_x, x_constant) regimes_att = {} - regimes_att["x"] = x_constant + regimes_att["x"] = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) regimes_att["regimes"] = regimes regimes_att["cols2regi"] = cols2regi - x, name_x, x_rlist = REGI.Regimes_Frame.__init__( + x, name_x, xtype, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, - constant_regi=None, - cols2regi=cols2regi, + constant_regi=constant_regi, + cols2regi=cols2regi[1:], names=name_x, - rlist=True, + rlist=True ) BaseML_Error.__init__( self, @@ -376,7 +376,7 @@ def __init__( ) self.title = "ML SPATIAL ERROR" - if slx_lags > 0: + if slx_lags >0: self.title += " WITH SLX (SLX-Error)" self.title += " - REGIMES (METHOD = " + method + ")" @@ -386,28 +386,17 @@ def __init__( self.chow = REGI.Chow(self) self.aic = DIAG.akaike(reg=self) self.schwarz = DIAG.schwarz(reg=self) - self.output = pd.DataFrame(self.name_x, columns=["var_names"]) - self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] - self.output["regime"] = x_rlist + ["_Global"] - self.output["equation"] = 0 + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = xtype + ['lambda'] + self.output['regime'] = x_rlist + ['_Global'] + self.output['equation'] = 0 self.other_top = _nonspat_top(self, ml=True) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _error_regimes_multi( - self, - y, - x, - regimes, - w, - slx_lags, - cores, - method, - epsilon, - cols2regi, - vm, - name_x, - latex, + self, y, x, regimes, w, slx_lags, cores, method, epsilon, cols2regi, vm, name_x, latex ): + regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -485,9 +474,7 @@ def _error_regimes_multi( results = {} counter = 0 - self.output = pd.DataFrame( - columns=["var_names", "var_type", "regime", "equation"] - ) + self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) for r in self.regimes_set: """ if is_win: @@ -504,30 +491,24 @@ def _error_regimes_multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ - r - ].betas - self.u[regi_ids[r],] = results[r].u - self.predy[regi_ids[r],] = results[r].predy - self.e_filtered[regi_ids[r],] = results[r].e_filtered + self.betas[ + (counter * self.kr) : ((counter + 1) * self.kr), + ] = results[r].betas + self.u[ + regi_ids[r], + ] = results[r].u + self.predy[ + regi_ids[r], + ] = results[r].predy + self.e_filtered[ + regi_ids[r], + ] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x results[r].other_top = _nonspat_top(results[r], ml=True) - self.output = pd.concat( - [ - self.output, - pd.DataFrame( - { - "var_names": results[r].name_x, - "var_type": ["x"] * (len(results[r].name_x) - 1) - + ["lambda"], - "regime": r, - "equation": r, - } - ), - ], - ignore_index=True, - ) + self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, + 'var_type': ['o'] + ['x'] * (len(results[r].name_x) - 2) + ['lambda'], + 'regime': r, 'equation': r})], ignore_index=True) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -535,19 +516,7 @@ def _error_regimes_multi( def _work_error( - y, - x, - regi_ids, - r, - w, - slx_lags, - method, - epsilon, - name_ds, - name_y, - name_x, - name_w, - name_regimes, + y, x, regi_ids, r, w, slx_lags, method, epsilon, name_ds, name_y, name_x, name_w, name_regimes ): w_r, warn = REGI.w_regime(w, regi_ids[r], r, transform=True) y_r = y[regi_ids[r]] @@ -556,7 +525,7 @@ def _work_error( set_warn(model, warn) model.w = w_r model.title = "ML SPATIAL ERROR" - if slx_lags > 0: + if slx_lags >0: model.title += " WITH SLX (SLX-Error)" model.title += " - REGIME " + str(r) + " (METHOD = " + method + ")" model.name_ds = name_ds diff --git a/spreg/ml_lag.py b/spreg/ml_lag.py index 8fecd2f..41cbba2 100755 --- a/spreg/ml_lag.py +++ b/spreg/ml_lag.py @@ -336,6 +336,10 @@ class ML_Lag(BaseML_Lag): If slx_lags>0, the specification becomes of the Spatial Durbin type. slx_vars : either "All" (default) or list of booleans to select x variables to be lagged + regimes : list or pandas.Series + List of n values with the mapping of each + observation to a regime. Assumed to be aligned with 'x'. + For other regimes-specific arguments, see ML_Lag_Regimes method : string if 'full', brute force calculation (full matrix expressions) if 'ord', Ord eigenvalue method @@ -600,6 +604,7 @@ def __init__( w, slx_lags=0, slx_vars="All", + regimes=None, method="full", epsilon=0.0000001, spat_impacts="simple", @@ -610,68 +615,90 @@ def __init__( name_w=None, name_ds=None, latex=False, + **kwargs ): - n = USER.check_arrays(y, x) - y, name_y = USER.check_y(y, n, name_y) - w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) - x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x(name_x, x_constant) # needs to be initialized for none, now with constant - set_warn(self, warn) - method = method.upper() - # using flex_wx - kx = len(name_x) - if slx_lags > 0: - x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) - if isinstance(slx_vars,list): - kw = slx_vars.count(True) - if kw < kx - 1: - spat_diag = False # no common factor test - else: - kw = kx-1 - + if regimes is not None: + from .ml_lag_regimes import ML_Lag_Regimes + self.__class__ = ML_Lag_Regimes + self.__init__( + y=y, + x=x, + regimes=regimes, + w=w, + slx_lags=slx_lags, + method=method, + epsilon=epsilon, + spat_impacts=spat_impacts, + vm=vm, + spat_diag=spat_diag, + name_y=name_y, + name_x=name_x, + name_w=name_w, + name_ds=name_ds, + latex=latex, + **kwargs + ) + else: + n = USER.check_arrays(y, x) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + x_constant, name_x, warn = USER.check_constant(x, name_x) + name_x = USER.set_name_x(name_x, x_constant) # needs to be initialized for none, now with constant + set_warn(self, warn) + method = method.upper() + # using flex_wx + kx = len(name_x) + if slx_lags > 0: + x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + if isinstance(slx_vars,list): + kw = slx_vars.count(True) + if kw < kx - 1: + spat_diag = False # no common factor test + else: + kw = kx-1 + - BaseML_Lag.__init__( - self, y=y, x=x_constant, w=w, slx_lags=slx_lags, method=method, epsilon=epsilon - ) - # increase by 1 to have correct aic and sc, include rho in count - self.k += 1 + BaseML_Lag.__init__( + self, y=y, x=x_constant, w=w, slx_lags=slx_lags, method=method, epsilon=epsilon + ) + # increase by 1 to have correct aic and sc, include rho in count + self.k += 1 - if slx_lags>0: - # kx = len(name_x) - # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + if slx_lags>0: + # kx = len(name_x) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG WITH SLX - SPATIAL DURBIN MODEL" + " (METHOD = " + method + ")" -# var_types = ['x'] * kx + ['wx'] * (kx-1) * slx_lags + ['rho'] - var_types = ['x'] * kx + ['wx'] * (kw) * slx_lags + ['rho'] - else: - self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG" + " (METHOD = " + method + ")" - var_types = ['x'] * len(name_x) + ['rho'] - self.slx_lags = slx_lags - self.slx_vars = slx_vars - self.name_ds = USER.set_name_ds(name_ds) - self.name_y = USER.set_name_y(name_y) - self.name_x = name_x # already has constant - name_ylag = USER.set_name_yend_sp(self.name_y) - self.name_x.append(name_ylag) # rho changed to last position - self.name_w = USER.set_name_w(name_w, w) - self.aic = DIAG.akaike(reg=self) - self.schwarz = DIAG.schwarz(reg=self) - self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['var_type'] = var_types - self.output['regime'], self.output['equation'] = (0, 0) - self.other_top = _spat_pseudo_r2(self) - self.other_top += _nonspat_top(self, ml=True) - diag_out = None - if spat_diag and slx_lags==1: - diag_out = _spat_diag_out(self, w, 'yend', ml=True) - if spat_impacts: - self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags,slx_vars) - try: - diag_out += impacts_str - except TypeError: - diag_out = impacts_str - output(reg=self, vm=vm, robust=False, other_end=diag_out, latex=latex) + self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG WITH SLX - SPATIAL DURBIN MODEL" + " (METHOD = " + method + ")" + var_types = ['o'] + ['x'] * (kx-1) + ['wx'] * (kw) * slx_lags + ['rho'] + else: + self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG" + " (METHOD = " + method + ")" + var_types = ['o'] + ['x'] * (len(name_x)-1) + ['rho'] + self.slx_lags = slx_lags + self.slx_vars = slx_vars + self.name_ds = USER.set_name_ds(name_ds) + self.name_y = USER.set_name_y(name_y) + self.name_x = name_x # already has constant + name_ylag = USER.set_name_yend_sp(self.name_y) + self.name_x.append(name_ylag) # rho changed to last position + self.name_w = USER.set_name_w(name_w, w) + self.aic = DIAG.akaike(reg=self) + self.schwarz = DIAG.schwarz(reg=self) + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = var_types + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _spat_pseudo_r2(self) + self.other_top += _nonspat_top(self, ml=True) + diag_out = None + if spat_diag and slx_lags==1: + diag_out = _spat_diag_out(self, w, 'yend', ml=True) + if spat_impacts: + self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags,slx_vars) + try: + diag_out += impacts_str + except TypeError: + diag_out = impacts_str + output(reg=self, vm=vm, robust=False, other_end=diag_out, latex=latex) def lag_c_loglik(rho, n, e0, e1, W): # concentrated log-lik for lag model, no constants, brute force diff --git a/spreg/ml_lag_regimes.py b/spreg/ml_lag_regimes.py index c77e8dd..b52f2f2 100644 --- a/spreg/ml_lag_regimes.py +++ b/spreg/ml_lag_regimes.py @@ -414,7 +414,7 @@ def __init__( else: # if regime_lag_sep == True: # w = REGI.w_regimes_union(w, w_i, self.regimes_set) - x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( + x, self.name_x, xtype, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, @@ -434,7 +434,8 @@ def __init__( self.regime_lag_sep = regime_lag_sep self.output = pd.DataFrame(self.name_x, columns=['var_names']) self.output['regime'] = x_rlist + ['_Global'] - self.output['var_type'] = ['x'] * (len(self.name_x) - 1) + ['rho'] + self.output['var_type'] = xtype + ['rho'] + #self.output['var_type'] = ['o'] + ['x'] * (len(self.name_x) - 2) + ['rho'] self.output['equation'] = 0 self.slx_lags = slx_lags diag_out = None @@ -444,7 +445,7 @@ def __init__( kwx = kwx - fixed_wx if kwx > 0: for m in self.regimes_set: - r_output = self.output[(self.output['regime'] == str(m)) & (self.output['var_type'] == 'x')] + r_output = self.output[(self.output['regime'] == str(m)) & (self.output['var_type'].isin(['x', 'o']))] wx_index = r_output.index[-kwx:] self.output.loc[wx_index, 'var_type'] = 'wx' if fixed_wx > 0: @@ -602,9 +603,9 @@ def ML_Lag_Regimes_Multi( results[r].other_mid = "" if slx_lags > 0: kx = (len(results[r].name_x) - 1) // (slx_lags + 1) - var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['rho'] + var_types = ['o']+['x']*kx + ['wx'] * kx * slx_lags + ['rho'] else: - var_types = ['x'] * (len(results[r].name_x) - 1) + ['rho'] + var_types = ['o']+['x'] * (len(results[r].name_x) - 2) + ['rho'] results[r].output = pd.DataFrame({'var_names': results[r].name_x, 'var_type': var_types, 'regime': r, 'equation': r}) diff --git a/spreg/nslx.py b/spreg/nslx.py index b31c210..b2aa5d7 100755 --- a/spreg/nslx.py +++ b/spreg/nslx.py @@ -385,7 +385,7 @@ def __init__( self.name_coords = name_coords self.title = "NONLINEAR SLX" self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['var_type'] = ['x'] * len(name_x) + ['wx'] * len(xw_name) + self.output['var_type'] = ['o'] + ['x'] * (len(name_x)-1) + ['wx'] * len(xw_name) self.output['regime'], self.output['equation'] = (0, 0) self.other_top = _nslx_out(self, "top") self.other_mid = _nslx_out(self, "mid") diff --git a/spreg/ols.py b/spreg/ols.py index 5023766..5b1d341 100644 --- a/spreg/ols.py +++ b/spreg/ols.py @@ -6,9 +6,8 @@ from . import user_output as USER from .output import output, _spat_diag_out, _nonspat_mid, _nonspat_top, _summary_vif from . import robust as ROBUST -from .utils import spdot, RegressionPropsY, RegressionPropsVM, set_warn, get_lags +from .utils import spdot, RegressionPropsY, RegressionPropsVM, set_warn import pandas as pd -from libpysal import weights # needed for check on kernel weights in slx __all__ = ["OLS"] @@ -150,6 +149,10 @@ class OLS(BaseOLS): If slx_lags>0, the specification becomes of the SLX type. slx_vars : either "All" (default) or list of booleans to select x variables to be lagged + regimes : list or pandas.Series + List of n values with the mapping of each + observation to a regime. Assumed to be aligned with 'x'. + For other regimes-specific arguments, see OLS_Regimes sig2n_k : boolean If True, then use n-k to estimate sigma^2. If False, use n. nonspat_diag : boolean @@ -440,6 +443,7 @@ def __init__( gwk=None, slx_lags = 0, slx_vars = "All", + regimes = None, sig2n_k=True, nonspat_diag=True, spat_diag=False, @@ -453,60 +457,86 @@ def __init__( name_gwk=None, name_ds=None, latex=False, + **kwargs, ): - - n = USER.check_arrays(y, x) - y, name_y = USER.check_y(y, n, name_y) - USER.check_robust(robust, gwk) - spat_diag, warn = USER.check_spat_diag(spat_diag=spat_diag, w=w, robust=robust, slx_lags=slx_lags) - set_warn(self, warn) - - if robust in ["hac", "white"] and white_test: - set_warn( - self, - "White test not available when standard errors are estimated by HAC or White correction.", - ) - white_test = False - - x_constant, name_x, warn = USER.check_constant(x, name_x) - set_warn(self, warn) - self.name_x = USER.set_name_x(name_x, x_constant) - - if spat_diag or moran: - w = USER.check_weights(w, y, slx_lags=slx_lags, w_required=True, allow_wk=True) + if regimes is not None: + from .ols_regimes import OLS_Regimes + self.__class__ = OLS_Regimes + self.__init__( + y=y, + x=x, + regimes=regimes, + w=w, + robust=robust, + gwk=gwk, + slx_lags=slx_lags, + sig2n_k=sig2n_k, + nonspat_diag=nonspat_diag, + spat_diag=spat_diag, + moran=moran, + white_test=white_test, + vm=vm, + name_y=name_y, + name_x=name_x, + name_w=name_w, + name_gwk=name_gwk, + name_ds=name_ds, + latex=latex, + **kwargs, + ) else: - w = USER.check_weights(w, y, slx_lags=slx_lags, allow_wk=True) - if slx_lags >0: -# lag_x = get_lags(w, x_constant[:, 1:], slx_lags) -# x_constant = np.hstack((x_constant, lag_x)) -# self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) - x_constant,self.name_x = USER.flex_wx(w,x=x_constant,name_x=self.name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) - - BaseOLS.__init__( - self, y=y, x=x_constant, robust=robust, gwk=gwk, sig2n_k=sig2n_k - ) - self.name_ds = USER.set_name_ds(name_ds) - self.name_y = USER.set_name_y(name_y) - self.slx_lags = slx_lags - self.title = "ORDINARY LEAST SQUARES" - if slx_lags > 0: - self.title += " WITH SPATIALLY LAGGED X (SLX)" - self.robust = USER.set_robust(robust) - self.name_w = USER.set_name_w(name_w, w) - self.name_gwk = USER.set_name_w(name_gwk, gwk) - self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) - self.output['regime'], self.output['equation'] = (0, 0) - self.other_top, self.other_mid, other_end = ("", "", "") # strings where function-specific diag. are stored - if nonspat_diag: - self.other_mid += _nonspat_mid(self, white_test=white_test) - self.other_top += _nonspat_top(self) - if vif: - self.other_mid += _summary_vif(self) - if spat_diag: - other_end += _spat_diag_out(self, w, 'ols', moran=moran) - output(reg=self, vm=vm, robust=robust, other_end=other_end, latex=latex) + n = USER.check_arrays(y, x) + y, name_y = USER.check_y(y, n, name_y) + USER.check_robust(robust, gwk) + spat_diag, warn = USER.check_spat_diag(spat_diag=spat_diag, w=w, robust=robust, slx_lags=slx_lags) + set_warn(self, warn) + + if robust in ["hac", "white"] and white_test: + set_warn( + self, + "White test not available when standard errors are estimated by HAC or White correction.", + ) + white_test = False + + x_constant, name_x, warn = USER.check_constant(x, name_x) + set_warn(self, warn) + self.name_x = USER.set_name_x(name_x, x_constant) + + if spat_diag or moran: + w = USER.check_weights(w, y, slx_lags=slx_lags, w_required=True, allow_wk=True) + else: + w = USER.check_weights(w, y, slx_lags=slx_lags, allow_wk=True) + if slx_lags >0: + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) + # self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) + x_constant,self.name_x = USER.flex_wx(w,x=x_constant,name_x=self.name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + + BaseOLS.__init__( + self, y=y, x=x_constant, robust=robust, gwk=gwk, sig2n_k=sig2n_k + ) + self.name_ds = USER.set_name_ds(name_ds) + self.name_y = USER.set_name_y(name_y) + self.slx_lags = slx_lags + self.title = "ORDINARY LEAST SQUARES" + if slx_lags > 0: + self.title += " WITH SPATIALLY LAGGED X (SLX)" + self.robust = USER.set_robust(robust) + self.name_w = USER.set_name_w(name_w, w) + self.name_gwk = USER.set_name_w(name_gwk, gwk) + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = ['o']+['x'] * (len(self.name_x)-1) + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top, self.other_mid, other_end = ("", "", "") # strings where function-specific diag. are stored + if nonspat_diag: + self.other_mid += _nonspat_mid(self, white_test=white_test) + self.other_top += _nonspat_top(self) + if vif: + self.other_mid += _summary_vif(self) + if spat_diag: + other_end += _spat_diag_out(self, w, 'ols', moran=moran) + output(reg=self, vm=vm, robust=robust, other_end=other_end, latex=latex) diff --git a/spreg/ols_regimes.py b/spreg/ols_regimes.py index 5a6027a..694a0c9 100755 --- a/spreg/ols_regimes.py +++ b/spreg/ols_regimes.py @@ -64,9 +64,7 @@ class OLS_Regimes(BaseOLS, REGI.Regimes_Frame, RegressionPropsY): constant_regi: string, optional Switcher controlling the constant term setup. It may take the following values: - * 'one': a vector of ones is appended to x and held constant across regimes - * 'many': a vector of ones is appended to x and considered different per regime (default) cols2regi : list, 'all' Argument indicating whether each @@ -492,16 +490,15 @@ def __init__( latex ) else: - x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( + x, self.name_x, xtype, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, constant_regi, cols2regi, name_x, rlist=True ) self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) + self.output['var_type'] = xtype self.output['regime'] = x_rlist self.output['equation'] = 0 - BaseOLS.__init__(self, y=y, x=x, robust=robust, gwk=gwk, sig2n_k=sig2n_k) if regime_err_sep == True and robust == None: y2, x2 = REGI._get_weighted_var( @@ -653,7 +650,7 @@ def _ols_regimes_multi( self.name_y += results[r].name_y self.name_x += results[r].name_x self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, - 'var_type': ['x']*len(results[r].name_x), + 'var_type': ['o']+['x']*(len(results[r].name_x)-1), 'regime': r, 'equation': r})], ignore_index=True) results[r].other_top, results[r].other_mid = ("", "") if nonspat_diag: @@ -668,7 +665,7 @@ def _ols_regimes_multi( other_end = "" if spat_diag: self._get_spat_diag_props(x_constant, sig2n_k) - #other_end += _spat_diag_out(self, w, 'ols', moran=moran) Need to consider W before implementing + other_end += _spat_diag_out(self, w, 'ols', moran=moran) #Need to consider W output(reg=self, vm=vm, robust=robust, other_end=other_end, latex=latex) def _get_spat_diag_props(self, x, sig2n_k): diff --git a/spreg/output.py b/spreg/output.py index 4683da5..22b75f2 100755 --- a/spreg/output.py +++ b/spreg/output.py @@ -28,6 +28,7 @@ def output(reg, vm, other_end=False, robust=False, latex=False): strSummary, reg = out_part_middle(strSummary, reg, robust, m=eq, latex=latex) strSummary, reg = out_part_end(strSummary, reg, vm, other_end, m=eq) reg.summary = strSummary + reg._var_type = reg.output['var_type'] reg.output.sort_values(by=['equation', 'regime'], inplace=True) reg.output.drop(['var_type', 'regime', 'equation'], axis=1, inplace=True) @@ -62,21 +63,22 @@ def out_part_top(strSummary, reg, m): _reg.n, ) - strSummary += "%-20s:%12.4f %-22s:%12d\n" % ( - "Mean dependent var", - _reg.mean_y, - "Number of Variables", - _reg.k, - ) - strSummary += "%-20s:%12.4f %-22s:%12d\n" % ( - "S.D. dependent var", - _reg.std_y, - "Degrees of Freedom", - _reg.n - _reg.k, - ) + if not 'Probit' in _reg.__class__.__name__: + strSummary += "%-20s:%12.4f %-22s:%12d\n" % ( + "Mean dependent var", + _reg.mean_y, + "Number of Variables", + _reg.k, + ) + strSummary += "%-20s:%12.4f %-22s:%12d\n" % ( + "S.D. dependent var", + _reg.std_y, + "Degrees of Freedom", + _reg.n - _reg.k, + ) _reg.std_err = diagnostics.se_betas(_reg) - if 'OLS' in reg.__class__.__name__: + if 'OLS' in _reg.__class__.__name__: _reg.t_stat = diagnostics.t_stat(_reg) _reg.r2 = diagnostics.r2(_reg) _reg.ar2 = diagnostics.ar2(_reg) @@ -87,10 +89,9 @@ def out_part_top(strSummary, reg, m): _reg.ar2, ) _reg.__summary["summary_zt"] = "t" - else: _reg.z_stat = diagnostics.t_stat(_reg, z_stat=True) - if not 'NSLX' in reg.__class__.__name__: + if 'NSLX' not in _reg.__class__.__name__ and 'Probit' not in _reg.__class__.__name__: _reg.pr2 = diagnostics_tsls.pr2_aspatial(_reg) strSummary += "%-20s:%12.4f\n" % ("Pseudo R-squared", _reg.pr2) _reg.__summary["summary_zt"] = "z" @@ -276,7 +277,7 @@ def _spat_diag_out(reg, w, type, moran=False, ml=False): wx_indices = reg.output[(reg.output['var_type'] == 'wx') & (reg.output['regime'] != '_Global')].index x_indices = [] for m in reg.output['regime'].unique(): - x_indices.extend(reg.output[(reg.output['regime'] == m) & (reg.output['var_type'] == 'x')].index[1:]) + x_indices.extend(reg.output[(reg.output['regime'] == m) & (reg.output['var_type'] == 'x')].index) vm_indices = x_indices + wx_indices.tolist() + reg.output[reg.output['var_type'] == 'rho'].index.tolist() cft, cft_p = diagnostics_sp.comfac_test(reg.rho, reg.betas[x_indices], @@ -598,7 +599,7 @@ def _summary_impacts(reg, w, spat_impacts, slx_lags=0, slx_vars="All",regimes=Fa spat_impacts = [x.lower() for x in spat_impacts] #variables = reg.output.query("var_type in ['x', 'yend'] and index != 0") # excludes constant - variables = reg.output.query("var_type == 'x' and index != 0") # excludes constant and endogenous variables + variables = reg.output[reg.output["var_type"] == 'x'] if regimes: variables = variables[~variables['var_names'].str.endswith('_CONSTANT')] @@ -724,4 +725,113 @@ def _nslx_out(reg, section): param_i = ', '.join(map(str, param[i])) text_wrapper = TW.TextWrapper(width=76, subsequent_indent=" ") strSummary += text[i] + text_wrapper.fill(param_i) + "\n" - return strSummary \ No newline at end of file + return strSummary + +def _probit_out(reg, spat_diag=False, sptests_only=False): + """ + Summary of the Probit model. + + Parameters + ---------- + reg: spreg regression object + + Returns + ------- + strSummary: string with Probit model summary specifics + + """ + + if not sptests_only: + strSummary_top = "%-20s:%12.2f %-22s:%12d\n%-20s:%12.2f %-22s:%12d\n%-20s:%12.2f %-22s:%12.4f\n%-20s:%12.2f %-22s:%12.4f\n%-20s:%12.2f %-22s:%12.4f\n" % ( + "True positive rate", + reg.fit["TPR"], + "Number of Variables", + reg.k, + "True negative rate", + reg.fit["TNR"], + "Degrees of Freedom", + reg.n - reg.k, + "Balanced accuracy", + reg.fit["BA"], + "Log-Likelihood", + reg.logl, + "% correctly pred.", + reg.fit["PREDPC"], + "LR test", + reg.LR[0], + "McFadden's rho", + reg.mcfadrho, + "LR test (p-value)", + reg.LR[1], + ) + + strSummary_mid = "\nMARGINAL EFFECTS\n" + if reg.scalem == "phimean": + strSummary_mid += "Method: Mean of individual marginal effects\n" + elif reg.scalem == "xmean": + strSummary_mid += "Method: Marginal effects at variables mean\n" + strSummary_mid += "------------------------------------------------------------------------------------\n" + strSummary_mid += " Variable Slope Std.Error z-Statistic Probability\n" + strSummary_mid += "------------------------------------------------------------------------------------\n" + for i in range(len(reg.slopes)): + strSummary_mid += "%20s %12.5f %12.5f %12.5f %12.5f\n" % ( + reg.name_x[i + 1], + reg.slopes[i][0], + reg.slopes_std_err[i], + reg.slopes_z_stat[i][0], + reg.slopes_z_stat[i][1], + ) + strSummary_mid += "------------------------------------------------------------------------------------\n" + + strSummary_end = "\nCONFUSION MATRIX\n" + data = { + "Positive (1)": [ + reg.predtable["actpos"], # Observed positives + reg.predtable["predpos"], # Predicted positives + reg.predtable["truepos"], # Correctly predicted positives + reg.predtable["falseneg"] # Wrongly predicted positives + ], + "Negative (0)": [ + reg.predtable["actneg"], # Observed negatives + reg.predtable["predneg"], # Predicted negatives + reg.predtable["trueneg"], # Correctly predicted negatives + reg.predtable["falsepos"] # Wrongly predicted negatives + ] + } + + rows = ["Observed", "Predicted", "Correctly Predicted", "Wrongly Predicted"] + strSummary_end += f"{'':<26}{'Positive (1)':<18}{'Negative (0)':<15}\n" + for row, pos, neg in zip(rows, data["Positive (1)"], data["Negative (0)"]): + strSummary_end += f"{row:<30}{pos:<15}{neg:<15}\n" + strSummary_end += "\n------------------------------------------------------------------------------------\n" + + if spat_diag or sptests_only: + try: + strSummary_end += "\nDIAGNOSTICS FOR SPATIAL DEPENDENCE\n" + except: + strSummary_end = "\nDIAGNOSTICS FOR SPATIAL DEPENDENCE\n" + strSummary_end += "TEST DF VALUE PROB\n" + strSummary_end += "%-23s %2d %12.3f %9.4f\n" % ( + "Kelejian-Prucha (error)", + 1, + reg.KP_error[0], + reg.KP_error[1], + ) + strSummary_end += "%-23s %2d %12.3f %9.4f\n" % ( + "Pinkse (error)", + 1, + reg.Pinkse_error[0], + reg.Pinkse_error[1], + ) + strSummary_end += "%-23s %2d %12.3f %9.4f\n\n" % ( + "Pinkse-Slade (error)", + 1, + reg.PS_error[0], + reg.PS_error[1], + ) + if sptests_only: + return strSummary_end + else: + strSummary_end = "" + + return strSummary_top, strSummary_mid, strSummary_end \ No newline at end of file diff --git a/spreg/probit.py b/spreg/probit.py old mode 100644 new mode 100755 index 14acb95..28e96c2 --- a/spreg/probit.py +++ b/spreg/probit.py @@ -1,25 +1,26 @@ """Probit regression class and diagnostics.""" -__author__ = "Luc Anselin luc.anselin@asu.edu, Pedro V. Amaral pedro.amaral@asu.edu" +__author__ = "Luc Anselin lanselin@gmail.com, Pedro V. Amaral pedrovma@gmail.com" import numpy as np import numpy.linalg as la import scipy.optimize as op +import pandas as pd from scipy.stats import norm, chi2 from libpysal import weights - chisqprob = chi2.sf import scipy.sparse as SP from . import user_output as USER -from . import summary_output as SUMMARY -from .utils import spdot, spbroadcast, set_warn, get_lags +from . import diagnostics as DIAGNOSTICS +from . import diagnostics_probit as PROBDIAG +from .output import output, _probit_out +from .utils import spdot, spbroadcast, set_warn __all__ = ["Probit"] - -class BaseProbit(object): +class BaseProbit(): """ - Probit class to do all the computations + Probit class to do the core estimation Parameters ---------- @@ -28,73 +29,58 @@ class BaseProbit(object): nxk array of independent variables (assumed to be aligned with y) y : array nx1 array of dependent binary variable - w : W - PySAL weights instance or spatial weights sparse matrix - aligned with y optim : string Optimization method. Default: 'newton' (Newton-Raphson). Alternatives: 'ncg' (Newton-CG), 'bfgs' (BFGS algorithm) - scalem : string - Method to calculate the scale of the marginal effects. - Default: 'phimean' (Mean of individual marginal effects) - Alternative: 'xmean' (Marginal effects at variables mean) + bstart : list with starting values for betas maxiter : int Maximum number of iterations until optimizer stops Attributes ---------- - x : array Two dimensional array with n rows and one column for each independent (exogenous) variable, including the constant + xmean : array + kx1 vector with means of explanatory variables + (for use in slopes) y : array nx1 array of dependent variable + optim : string + optimization method + maxiter : int + maximum number of iterations + q : array + nx1 array of transformed dependent variable 2*y - 1 betas : array kx1 array with estimated coefficients + bstart : list with starting values predy : array - nx1 array of predicted y values + nx1 array of predicted y values (probabilities) n : int Number of observations k : int Number of variables vm : array Variance-covariance matrix (kxk) - z_stat : list of tuples - z statistic; each tuple contains the pair (statistic, - p-value), where each is a float - xmean : array - Mean of the independent variables (kx1) - predpc : float - Percent of y correctly predicted logl : float Log-Likelihhod of the estimation - scalem : string - Method to calculate the scale of the marginal effects. - scale : float - Scale of the marginal effects. - slopes : array - Marginal effects of the independent variables (k-1x1) - Note: Disregards the presence of dummies. - slopes_vm : array - Variance-covariance matrix of the slopes (k-1xk-1) - LR : tuple - Likelihood Ratio test of all coefficients = 0 - (test statistics, p-value) - Pinkse_error: float - Lagrange Multiplier test against spatial error correlation. - Implemented as presented in :cite:`Pinkse2004`. - KP_error : float - Moran's I type test against spatial error correlation. - Implemented as presented in :cite:`Kelejian2001`. - PS_error : float - Lagrange Multiplier test against spatial error correlation. - Implemented as presented in :cite:`Pinkse1998`. + xb : predicted value of linear index + nx1 array + predybin : predicted value as binary + =1 for predy > 0.5 + phiy : normal density at xb (phi function) + nx1 array + u_naive : naive residuals y - predy + nx1 array + u_gen : generalized residuals + nx1 array warning : boolean if True Maximum number of iterations exceeded or gradient and/or function calls not changing. - Examples + Examples - needs updating -------- >>> import numpy as np >>> import libpysal @@ -104,9 +90,7 @@ class BaseProbit(object): >>> y = np.array([dbf.by_col('CRIME')]).T >>> x = np.array([dbf.by_col('INC'), dbf.by_col('HOVAL')]).T >>> x = np.hstack((np.ones(y.shape),x)) - >>> w = libpysal.io.open(libpysal.examples.get_path("columbus.gal"), 'r').read() - >>> w.transform='r' - >>> model = spreg.probit.BaseProbit((y>40).astype(float), x, w=w) + >>> model = spreg.probit.BaseProbit((y>40).astype(float), x) >>> print(np.around(model.betas, decimals=6)) [[ 3.353811] [-0.199653] @@ -117,470 +101,51 @@ class BaseProbit(object): [-0.043627 0.004114 -0.000193] [-0.008052 -0.000193 0.00031 ]] - >>> tests = np.array([['Pinkse_error','KP_error','PS_error']]) - >>> stats = np.array([[model.Pinkse_error[0],model.KP_error[0],model.PS_error[0]]]) - >>> pvalue = np.array([[model.Pinkse_error[1],model.KP_error[1],model.PS_error[1]]]) - >>> print(np.hstack((tests.T,np.around(np.hstack((stats.T,pvalue.T)),6)))) - [['Pinkse_error' '3.131719' '0.076783'] - ['KP_error' '1.721312' '0.085194'] - ['PS_error' '2.558166' '0.109726']] """ - def __init__(self, y, x, w=None, optim="newton", scalem="phimean", maxiter=100): + def __init__(self, y, x, optim="newton",bstart=False,maxiter=100): self.y = y + self.q = 2* self.y - 1 self.x = x + self.xmean = np.mean(self.x,axis=0).reshape(-1,1) self.n, self.k = x.shape self.optim = optim - self.scalem = scalem - self.w = w + self.bstart = bstart self.maxiter = maxiter + par_est, self.warning = self.par_est() - self.betas = np.reshape(par_est[0], (self.k, 1)) + self.betas = par_est[0].reshape(-1,1) + self.logl = -float(par_est[1]) + H = self.hessian(self.betas) + self.vm = - la.inv(H) + + # predicted values + self.xb = self.x @ self.betas + self.predy = norm.cdf(self.xb) + self.predybin = (self.predy > 0.5) * 1 + self.phiy = norm.pdf(self.xb) + + # residuals + self.u_naive = self.y - self.predy + Phi_prod = self.predy * (1.0 - self.predy) + self.u_gen = self.phiy * (self.u_naive / Phi_prod) - @property - def vm(self): - try: - return self._cache["vm"] - except AttributeError: - self._cache = {} - H = self.hessian(self.betas) - self._cache["vm"] = -la.inv(H) - except KeyError: - H = self.hessian(self.betas) - self._cache["vm"] = -la.inv(H) - return self._cache["vm"] - - @vm.setter - def vm(self, val): - try: - self._cache["vm"] = val - except AttributeError: - self._cache = {} - self._cache["vm"] = val - - @property # could this get packaged into a separate function or something? It feels weird to duplicate this. - def z_stat(self): - try: - return self._cache["z_stat"] - except AttributeError: - self._cache = {} - variance = self.vm.diagonal() - zStat = self.betas.reshape( - len(self.betas), - ) / np.sqrt(variance) - rs = {} - for i in range(len(self.betas)): - rs[i] = (zStat[i], norm.sf(abs(zStat[i])) * 2) - self._cache["z_stat"] = rs.values() - except KeyError: - variance = self.vm.diagonal() - zStat = self.betas.reshape( - len(self.betas), - ) / np.sqrt(variance) - rs = {} - for i in range(len(self.betas)): - rs[i] = (zStat[i], norm.sf(abs(zStat[i])) * 2) - self._cache["z_stat"] = rs.values() - return self._cache["z_stat"] - - @z_stat.setter - def z_stat(self, val): - try: - self._cache["z_stat"] = val - except AttributeError: - self._cache = {} - self._cache["z_stat"] = val - - @property - def slopes_std_err(self): - try: - return self._cache["slopes_std_err"] - except AttributeError: - self._cache = {} - self._cache["slopes_std_err"] = np.sqrt(self.slopes_vm.diagonal()) - except KeyError: - self._cache["slopes_std_err"] = np.sqrt(self.slopes_vm.diagonal()) - return self._cache["slopes_std_err"] - - @slopes_std_err.setter - def slopes_std_err(self, val): - try: - self._cache["slopes_std_err"] = val - except AttributeError: - self._cache = {} - self._cache["slopes_std_err"] = val - - @property - def slopes_z_stat(self): - try: - return self._cache["slopes_z_stat"] - except AttributeError: - self._cache = {} - return self.slopes_z_stat - except KeyError: - zStat = ( - self.slopes.reshape( - len(self.slopes), - ) - / self.slopes_std_err - ) - rs = {} - for i in range(len(self.slopes)): - rs[i] = (zStat[i], norm.sf(abs(zStat[i])) * 2) - self._cache["slopes_z_stat"] = list(rs.values()) - return self._cache["slopes_z_stat"] - - @slopes_z_stat.setter - def slopes_z_stat(self, val): - try: - self._cache["slopes_z_stat"] = val - except AttributeError: - self._cache = {} - self._cache["slopes_z_stat"] = val - - @property - def xmean(self): - try: - return self._cache["xmean"] - except AttributeError: - self._cache = {} - try: # why is this try-accept? can x be a list?? - self._cache["xmean"] = np.reshape(sum(self.x) / self.n, (self.k, 1)) - except: - self._cache["xmean"] = np.reshape( - sum(self.x).toarray() / self.n, (self.k, 1) - ) - except KeyError: - try: - self._cache["xmean"] = np.reshape(sum(self.x) / self.n, (self.k, 1)) - except: - self._cache["xmean"] = np.reshape( - sum(self.x).toarray() / self.n, (self.k, 1) - ) - return self._cache["xmean"] - - @xmean.setter - def xmean(self, val): - try: - self._cache["xmean"] = val - except AttributeError: - self._cache = {} - self._cache["xmean"] = val - - @property - def xb(self): - try: - return self._cache["xb"] - except AttributeError: - self._cache = {} - self._cache["xb"] = spdot(self.x, self.betas) - except KeyError: - self._cache["xb"] = spdot(self.x, self.betas) - return self._cache["xb"] - - @xb.setter - def xb(self, val): - try: - self._cache["xb"] = val - except AttributeError: - self._cache = {} - self._cache["xb"] = val - - @property - def predy(self): - try: - return self._cache["predy"] - except AttributeError: - self._cache = {} - self._cache["predy"] = norm.cdf(self.xb) - except KeyError: - self._cache["predy"] = norm.cdf(self.xb) - return self._cache["predy"] - - @predy.setter - def predy(self, val): - try: - self._cache["predy"] = val - except AttributeError: - self._cache = {} - self._cache["predy"] = val - - @property - def predpc(self): - try: - return self._cache["predpc"] - except AttributeError: - self._cache = {} - predpc = abs(self.y - self.predy) - for i in range(len(predpc)): - if predpc[i] > 0.5: - predpc[i] = 0 - else: - predpc[i] = 1 - self._cache["predpc"] = float(100.0 * np.sum(predpc) / self.n) - except KeyError: - predpc = abs(self.y - self.predy) - for i in range(len(predpc)): - if predpc[i] > 0.5: - predpc[i] = 0 - else: - predpc[i] = 1 - self._cache["predpc"] = float(100.0 * np.sum(predpc) / self.n) - return self._cache["predpc"] - - @predpc.setter - def predpc(self, val): - try: - self._cache["predpc"] = val - except AttributeError: - self._cache = {} - self._cache["predpc"] = val - - @property - def phiy(self): - try: - return self._cache["phiy"] - except AttributeError: - self._cache = {} - self._cache["phiy"] = norm.pdf(self.xb) - except KeyError: - self._cache["phiy"] = norm.pdf(self.xb) - return self._cache["phiy"] - - @phiy.setter - def phiy(self, val): - try: - self._cache["phiy"] = val - except AttributeError: - self._cache = {} - self._cache["phiy"] = val - - @property - def scale(self): - try: - return self._cache["scale"] - except AttributeError: - self._cache = {} - if self.scalem == "phimean": - self._cache["scale"] = float(1.0 * np.sum(self.phiy) / self.n) - elif self.scalem == "xmean": - self._cache["scale"] = float(norm.pdf(np.dot(self.xmean.T, self.betas))) - except KeyError: - if self.scalem == "phimean": - self._cache["scale"] = float(1.0 * np.sum(self.phiy) / self.n) - if self.scalem == "xmean": - self._cache["scale"] = float(norm.pdf(np.dot(self.xmean.T, self.betas))) - return self._cache["scale"] - - @scale.setter - def scale(self, val): - try: - self._cache["scale"] = val - except AttributeError: - self._cache = {} - self._cache["scale"] = val - - @property - def slopes(self): - try: - return self._cache["slopes"] - except AttributeError: - self._cache = {} - self._cache["slopes"] = self.betas[1:] * self.scale - except KeyError: - self._cache["slopes"] = self.betas[1:] * self.scale - return self._cache["slopes"] - - @slopes.setter - def slopes(self, val): - try: - self._cache["slopes"] = val - except AttributeError: - self._cache = {} - self._cache["slopes"] = val - - @property - def slopes_vm(self): - try: - return self._cache["slopes_vm"] - except AttributeError: - self._cache = {} - x = self.xmean - b = self.betas - dfdb = np.eye(self.k) - spdot(b.T, x) * spdot(b, x.T) - slopes_vm = (self.scale**2) * np.dot(np.dot(dfdb, self.vm), dfdb.T) - self._cache["slopes_vm"] = slopes_vm[1:, 1:] - except KeyError: - x = self.xmean - b = self.betas - dfdb = np.eye(self.k) - spdot(b.T, x) * spdot(b, x.T) - slopes_vm = (self.scale**2) * np.dot(np.dot(dfdb, self.vm), dfdb.T) - self._cache["slopes_vm"] = slopes_vm[1:, 1:] - return self._cache["slopes_vm"] - - @slopes_vm.setter - def slopes_vm(self, val): - try: - self._cache["slopes_vm"] = val - except AttributeError: - self._cache = {} - self._cache["slopes_vm"] = val - - @property - def LR(self): - try: - return self._cache["LR"] - except AttributeError: - self._cache = {} - P = 1.0 * np.sum(self.y) / self.n - LR = float( - -2 * (self.n * (P * np.log(P) + (1 - P) * np.log(1 - P)) - self.logl) - ) - self._cache["LR"] = (LR, chisqprob(LR, self.k)) - except KeyError: - P = 1.0 * np.sum(self.y) / self.n - LR = float( - -2 * (self.n * (P * np.log(P) + (1 - P) * np.log(1 - P)) - self.logl) - ) - self._cache["LR"] = (LR, chisqprob(LR, self.k)) - return self._cache["LR"] - - @LR.setter - def LR(self, val): - try: - self._cache["LR"] = val - except AttributeError: - self._cache = {} - self._cache["LR"] = val - - @property - def u_naive(self): - try: - return self._cache["u_naive"] - except AttributeError: - self._cache = {} - self._cache["u_naive"] = self.y - self.predy - except KeyError: - u_naive = self.y - self.predy - self._cache["u_naive"] = u_naive - return self._cache["u_naive"] - - @u_naive.setter - def u_naive(self, val): - try: - self._cache["u_naive"] = val - except AttributeError: - self._cache = {} - self._cache["u_naive"] = val - - @property - def u_gen(self): - try: - return self._cache["u_gen"] - except AttributeError: - self._cache = {} - Phi_prod = self.predy * (1 - self.predy) - u_gen = self.phiy * (self.u_naive / Phi_prod) - self._cache["u_gen"] = u_gen - except KeyError: - Phi_prod = self.predy * (1 - self.predy) - u_gen = self.phiy * (self.u_naive / Phi_prod) - self._cache["u_gen"] = u_gen - return self._cache["u_gen"] - - @u_gen.setter - def u_gen(self, val): - try: - self._cache["u_gen"] = val - except AttributeError: - self._cache = {} - self._cache["u_gen"] = val - - @property - def Pinkse_error(self): - try: - return self._cache["Pinkse_error"] - except AttributeError: - self._cache = {} - ( - self._cache["Pinkse_error"], - self._cache["KP_error"], - self._cache["PS_error"], - ) = sp_tests(self) - except KeyError: - ( - self._cache["Pinkse_error"], - self._cache["KP_error"], - self._cache["PS_error"], - ) = sp_tests(self) - return self._cache["Pinkse_error"] - - @Pinkse_error.setter - def Pinkse_error(self, val): - try: - self._cache["Pinkse_error"] = val - except AttributeError: - self._cache = {} - self._cache["Pinkse_error"] = val - - @property - def KP_error(self): - try: - return self._cache["KP_error"] - except AttributeError: - self._cache = {} - ( - self._cache["Pinkse_error"], - self._cache["KP_error"], - self._cache["PS_error"], - ) = sp_tests(self) - except KeyError: - ( - self._cache["Pinkse_error"], - self._cache["KP_error"], - self._cache["PS_error"], - ) = sp_tests(self) - return self._cache["KP_error"] - - @KP_error.setter - def KP_error(self, val): - try: - self._cache["KP_error"] = val - except AttributeError: - self._cache = {} - self._cache["KP_error"] = val - - @property - def PS_error(self): - try: - return self._cache["PS_error"] - except AttributeError: - self._cache = {} - ( - self._cache["Pinkse_error"], - self._cache["KP_error"], - self._cache["PS_error"], - ) = sp_tests(self) - except KeyError: - ( - self._cache["Pinkse_error"], - self._cache["KP_error"], - self._cache["PS_error"], - ) = sp_tests(self) - return self._cache["PS_error"] - - @PS_error.setter - def PS_error(self, val): - try: - self._cache["PS_error"] = val - except AttributeError: - self._cache = {} - self._cache["PS_error"] = val def par_est(self): - start = np.dot(la.inv(spdot(self.x.T, self.x)), spdot(self.x.T, self.y)) + + if self.bstart: + if len(self.bstart) != self.k: + raise Exception("Incompatible number of parameters in starting values") + else: + start = np.array(self.bstart).reshape(-1,1) + else: + xtx = self.x.T @ self.x + xty = self.x.T @ self.y + start = la.solve(xtx,xty) + flogl = lambda par: -self.ll(par) + if self.optim == "newton": fgrad = lambda par: self.gradient(par) fhess = lambda par: self.hessian(par) @@ -629,6 +194,7 @@ def hessian(self, par): class Probit(BaseProbit): + """ Classic non-spatial Probit and spatial diagnostics. The class includes a printout that formats all the results and tests in a nice format. @@ -650,17 +216,23 @@ class Probit(BaseProbit): nx1 array of dependent binary variable w : W PySAL weights instance aligned with y - slx_lags : integer - Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the SLX type. + slx_lags : integer + Number of spatial lags of X to include in the model specification. + If slx_lags>0, the specification becomes of the SLX type. + slx_vars : variables to be lagged when slx_lags > 0 + default = "All", otherwise a list with Booleans indicating which + variables must be lagged (True) or not (False) optim : string Optimization method. Default: 'newton' (Newton-Raphson). Alternatives: 'ncg' (Newton-CG), 'bfgs' (BFGS algorithm) + bstart : list + list with starting values for betas, default = False scalem : string Method to calculate the scale of the marginal effects. Default: 'phimean' (Mean of individual marginal effects) Alternative: 'xmean' (Marginal effects at variables mean) + predflag : flag to print prediction table maxiter : int Maximum number of iterations until optimizer stops name_y : string @@ -672,56 +244,90 @@ class Probit(BaseProbit): name_ds : string Name of dataset for use in output - Attributes + Attributes ---------- x : array Two dimensional array with n rows and one column for each independent (exogenous) variable, including the constant + xmean : array + kx1 vector with means of explanatory variables + (for use in slopes) y : array nx1 array of dependent variable + w : spatial weights object + optim : string + optimization method + predflag : flag to print prediction table (predtable) + maxiter : int + maximum number of iterations + q : array + nx1 array of transformed dependent variable 2*y - 1 betas : array kx1 array with estimated coefficients + bstart : list with starting values for betas or False predy : array - nx1 array of predicted y values + nx1 array of predicted y values (probabilities) n : int Number of observations k : int Number of variables vm : array Variance-covariance matrix (kxk) + logl : float + Log-Likelihhod of the estimation + xb : predicted value of linear index + nx1 array + predybin : predicted value as binary + =1 for predy > 0.5 + phiy : normal density at xb (phi function) + nx1 array + u_naive : naive residuals y - predy + nx1 array + u_gen : generalized residuals + nx1 array + warning : boolean + if True Maximum number of iterations exceeded or gradient + and/or function calls not changing. + std_err : standard errors of estimates z_stat : list of tuples z statistic; each tuple contains the pair (statistic, p-value), where each is a float - xmean : array - Mean of the independent variables (kx1) + predtable : dictionary + includes margins and cells of actual and predicted + values for discrete choice model + fit : a dictionary containing various measures of fit + TPR : true positive rate (sensitivity, recall, hit rate) + TNR : true negative rate (specificity, selectivity) + PREDPC : accuracy, percent correctly predicted + BA : balanced accuracy predpc : float - Percent of y correctly predicted - logl : float - Log-Likelihhod of the estimation - scalem : string - Method to calculate the scale of the marginal effects. + Percent of y correctly predicted (legacy) + LRtest : dictionary + contains the statistic for the null model (L0), the LR test(likr), + the degrees of freedom (df) and the p-value (pvalue) + L0 : likelihood of null model + LR : tuple (legacy) + Likelihood Ratio test of all coefficients = 0 + (test statistics, p-value) + mcfadrho : McFadden rho statistics of fit scale : float Scale of the marginal effects. slopes : array Marginal effects of the independent variables (k-1x1) slopes_vm : array Variance-covariance matrix of the slopes (k-1xk-1) - LR : tuple - Likelihood Ratio test of all coefficients = 0 - (test statistics, p-value) - Pinkse_error: float + slopes_std_err : estimates of standard errors of marginal effects + slopes_z_stat : tuple with z-statistics and p-values for marginal effects + Pinkse_error: array with statistic and p-value Lagrange Multiplier test against spatial error correlation. Implemented as presented in :cite:`Pinkse2004` - KP_error : float + KP_error : array with statistic and p-value Moran's I type test against spatial error correlation. Implemented as presented in :cite:`Kelejian2001` - PS_error : float + PS_error : array with statistic and p-value Lagrange Multiplier test against spatial error correlation. Implemented as presented in :cite:`Pinkse1998` - warning : boolean - if True Maximum number of iterations exceeded or gradient - and/or function calls not changing. name_y : string Name of dependent variable for use in output name_x : list of strings @@ -834,8 +440,11 @@ def __init__( x, w=None, slx_lags=0, + slx_vars = "All", optim="newton", + bstart=False, scalem="phimean", + predflag = False, maxiter=100, vm=False, name_y=None, @@ -843,38 +452,74 @@ def __init__( name_w=None, name_ds=None, spat_diag=False, + latex=False, ): + n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) x_constant, name_x, warn = USER.check_constant(x, name_x) set_warn(self, warn) self.name_x = USER.set_name_x(name_x, x_constant) - if w != None or slx_lags > 0: - w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) - spat_diag = True - ws = w.sparse - if slx_lags > 0: - lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - x_constant = np.hstack((x_constant, lag_x)) - self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) - else: - ws = None + w_required=False + if spat_diag: + w_required=True + w = USER.check_weights(w, y, w_required=w_required, slx_lags=slx_lags) + + if slx_lags > 0: + x_constant,self.name_x = USER.flex_wx(w,x=x_constant,name_x=self.name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) BaseProbit.__init__( - self, y=y, x=x_constant, w=ws, optim=optim, scalem=scalem, maxiter=maxiter + self, y=y, x=x_constant, optim=optim, bstart=bstart, maxiter=maxiter ) self.title = "CLASSIC PROBIT ESTIMATOR" if slx_lags > 0: - self.title += " WITH SPATIALLY LAGGED X (SLX)" + self.title += " WITH SPATIALLY LAGGED X (SLX)" self.slx_lags = slx_lags self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) self.name_w = USER.set_name_w(name_w, w) - SUMMARY.Probit(reg=self, w=w, vm=vm, spat_diag=spat_diag) + + self.scalem = scalem + self.predflag = predflag # flag to print prediction table + self.w = w # note, not sparse + + # standard errors and z, p-values + self.std_err = DIAGNOSTICS.se_betas(self) + self.z_stat = DIAGNOSTICS.t_stat(self,z_stat=True) + + # truepos, trueneg, predpc - measures of fit + self.predtable = PROBDIAG.pred_table(self) + self.fit = PROBDIAG.probit_fit(self) + self.predpc = self.fit["PREDPC"] + self.LRtest = PROBDIAG.probit_lrtest(self) + self.L0 = self.LRtest["L0"] + self.LR = (self.LRtest["likr"],self.LRtest["p-value"]) + self.mcfadrho = PROBDIAG.mcfad_rho(self) + + # impact measures + scale,slopes,slopes_vm,slopes_std_err,slopes_z_stat = PROBDIAG.probit_ape(self) + self.scale = scale + self.slopes = slopes + self.slopes_vm = slopes_vm + self.slopes_std_err = slopes_std_err + self.slopes_z_stat = slopes_z_stat + + # tests for spatial autocorrelation + if spat_diag: + self.Pinkse_error,self.KP_error,self.PS_error = PROBDIAG.sp_tests(regprob=self) + + #output + self.output = pd.DataFrame(self.name_x, columns=['var_names']) + self.output['var_type'] = ['o'] + ['x'] * (len(self.name_x)-1) + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top, self.other_mid, other_end = _probit_out(reg=self, spat_diag=spat_diag) + output(reg=self, vm=vm, robust=None, other_end=other_end, latex=latex) def newton(flogl, start, fgrad, fhess, maxiter): + """ Calculates the Newton-Raphson method @@ -909,84 +554,13 @@ def newton(flogl, start, fgrad, fhess, maxiter): return (par_hat0, logl, warn) -def sp_tests(reg): - """ - Calculates tests for spatial dependence in Probit models - - Parameters - ---------- - reg : regression object - output instance from a probit model - """ - if reg.w != None: - try: - w = reg.w.sparse - except: - w = reg.w - Phi = reg.predy - phi = reg.phiy - # Pinkse_error: - Phi_prod = Phi * (1 - Phi) - u_naive = reg.u_naive - u_gen = reg.u_gen - sig2 = np.sum((phi * phi) / Phi_prod) / reg.n - LM_err_num = np.dot(u_gen.T, (w * u_gen)) ** 2 - trWW = np.sum((w * w).diagonal()) - trWWWWp = trWW + np.sum((w * w.T).diagonal()) - LM_err = float(1.0 * LM_err_num / (sig2**2 * trWWWWp)) - LM_err = np.array([LM_err, chisqprob(LM_err, 1)]) - # KP_error: - moran = moran_KP(reg.w, u_naive, Phi_prod) - # Pinkse-Slade_error: - u_std = u_naive / np.sqrt(Phi_prod) - ps_num = np.dot(u_std.T, (w * u_std)) ** 2 - trWpW = np.sum((w.T * w).diagonal()) - ps = float(ps_num / (trWW + trWpW)) - # chi-square instead of bootstrap. - ps = np.array([ps, chisqprob(ps, 1)]) - else: - raise Exception("W matrix must be provided to calculate spatial tests.") - return LM_err, moran, ps - - -def moran_KP(w, u, sig2i): - """ - Calculates Moran-flavoured tests - - Parameters - ---------- - - w : W - PySAL weights instance aligned with y - u : array - nx1 array of naive residuals - sig2i : array - nx1 array of individual variance - """ - try: - w = w.sparse - except: - pass - moran_num = np.dot(u.T, (w * u)) - E = SP.lil_matrix(w.get_shape()) - E.setdiag(sig2i.flat) - E = E.asformat("csr") - WE = w * E - moran_den = np.sqrt(np.sum((WE * WE + (w.T * E) * WE).diagonal())) - moran = float(1.0 * moran_num / moran_den) - moran = np.array([moran, norm.sf(abs(moran)) * 2.0]) - return moran - - def _test(): import doctest - start_suppress = np.get_printoptions()["suppress"] np.set_printoptions(suppress=True) doctest.testmod() np.set_printoptions(suppress=start_suppress) - if __name__ == "__main__": _test() import numpy as np @@ -1008,3 +582,4 @@ def _test(): name_w="columbus.dbf", ) print(probit1.summary) + diff --git a/spreg/regimes.py b/spreg/regimes.py index 824586a..9060baf 100755 --- a/spreg/regimes.py +++ b/spreg/regimes.py @@ -200,6 +200,8 @@ class Regimes_Frame: Names of independent variables for use in output conveniently arranged by regimes. The structure of the name is "regimeName_-_varName" + x_types : list + List of strings indicating the type of each variable in x kr : int Number of variables/columns to be "regimized" or subject to change by regime. These will result in one parameter @@ -214,18 +216,19 @@ class Regimes_Frame: """ - def __init__( - self, x, regimes, constant_regi, cols2regi, names=None, yend=False, rlist=False - ): + def __init__(self, x, regimes, constant_regi, cols2regi, names=None, yend=False, rlist=False): + xtype = ['x'] * x.shape[1] if cols2regi == "all": cols2regi = [True] * x.shape[1] else: if yend: cols2regi = cols2regi[-x.shape[1] :] + xtype[-x.shape[1]:] = ["yend"] * x.shape[1] else: cols2regi = cols2regi[0 : x.shape[1]] if constant_regi: x = np.hstack((np.ones((x.shape[0], 1)), x)) + xtype.insert(0, 'o') if constant_regi == "one": cols2regi.insert(0, False) elif constant_regi == "many": @@ -258,16 +261,16 @@ def __init__( if rlist: if names: - names, r_list = set_name_x_regimes( - names, regimes, constant_regi, cols2regi, self.regimes_set, rlist + names, xtype, r_list = set_name_x_regimes( + names, constant_regi, cols2regi, self.regimes_set, xtype, rlist ) - return (x, names, r_list) + return (x, names, xtype, r_list) else: if names: - names = set_name_x_regimes( - names, regimes, constant_regi, cols2regi, self.regimes_set + names, xtype = set_name_x_regimes( + names, constant_regi, cols2regi, self.regimes_set, xtype ) - return (x, names) + return (x, names, xtype) def wald_test(betas, r, q, vm): @@ -435,45 +438,27 @@ def regimeX_setup(x, regimes, cols2regi, regimes_set, constant=False): return xsp -def set_name_x_regimes( - name_x, regimes, constant_regi, cols2regi, regimes_set, rlist=False -): +def set_name_x_regimes(name_x, constant_regi, cols2regi, regimes_set, xtype, rlist=False): """ - Generate the set of variable names in a regimes setup, according to the - order of the betas - - NOTE: constant term, if desired in the model, should be included in the x - already + Generate the set of variable names and types in a regimes setup, according to the + order of the betas. Parameters ---------- name_x : list/None - If passed, list of strings with the names of the - variables aligned with the original dense array x - IMPORTANT: constant term (if desired in the model) should be - included - regimes : list - list of n values with the mapping of each observation to a - regime. Assumed to be aligned with 'x'. + List of variable names. constant_regi : [False, 'one', 'many'] - Switcher controlling the constant term setup. It may take - the following values: - - * False: no constant term is appended in any way - - * 'one': a vector of ones is appended to x and held constant across regimes - - * 'many': a vector of ones is appended to x and considered different per regime + Controls the constant term setup. cols2regi : list - List of k booleans indicating whether each column should be - considered as different per regime (True) or held constant - across regimes (False) + Boolean list indicating if each column should vary by regime. regimes_set : list - List of ordered regimes tags - + List of ordered regimes tags. + xtype : list + List of strings indicating the type of each variable ('o', 'x', or 'yend'). + Should have the same length as cols2regi. Returns ------- - name_x_regi + name_x_regi, xtype_regi """ k = len(cols2regi) if constant_regi: @@ -482,22 +467,34 @@ def set_name_x_regimes( name_x = ["var_" + str(i + 1) for i in range(k)] if constant_regi: name_x.insert(0, "CONSTANT") + nxa = np.array(name_x) c2ra = np.array(cols2regi) + xtype_arr = np.array(xtype) + vars_regi = nxa[np.where(c2ra == True)] vars_glob = nxa[np.where(c2ra == False)] + xtype_regi = xtype_arr[np.where(c2ra == True)] + xtype_glob = xtype_arr[np.where(c2ra == False)] + name_x_regi = [] + xtype_full = [] r_list = [] + for r in regimes_set: rl = ["%s_%s" % (str(r), i) for i in vars_regi] name_x_regi.extend(rl) + xtype_full.extend(xtype_regi) # Replicate xtype for regime-specific variables r_list.extend([str(r)] * len(rl)) + name_x_regi.extend(["_Global_%s" % i for i in vars_glob]) + xtype_full.extend(xtype_glob) # Keep xtype for global variables r_list.extend(["_Global"] * len(vars_glob)) + if rlist: - return (name_x_regi, r_list) + return name_x_regi, xtype_full, r_list else: - return name_x_regi + return name_x_regi, xtype_full def w_regime(w, regi_ids, regi_i, transform=True, min_n=None): diff --git a/spreg/skater_reg.py b/spreg/skater_reg.py index 2b203fb..9f4dea9 100755 --- a/spreg/skater_reg.py +++ b/spreg/skater_reg.py @@ -8,7 +8,6 @@ from warnings import warn from libpysal.weights import w_subset from .utils import set_endog -from .twosls_regimes import TSLS_Regimes import time import numpy as np import copy @@ -350,92 +349,81 @@ def score_spreg( If a quorum is passed and the labels do not meet quorum, the score is inf. data : (N,P) array of data on which to compute the score of the regions expressed in labels - data_reg : list containing: - 1- a callable spreg regression method (ex. OLS or GM_Lag) - 2- np.ndarray of (N,1) shape with N observations on the depedent variable for the regression - 3- np.ndarray of (N,k) shape with N observations and k columns containing the explanatory variables (constant must not be included) - 4- pysal W object to be used in the regression (optional) + data_reg : dict containing: + - 'reg': callable spreg regression method (e.g., OLS or GM_Lag) + - 'y': (N,1) np.ndarray with dependent variable + - 'x': (N,k) np.ndarray with explanatory variables (excluding constant) + - 'w': (optional) pysal W object for spatial weights + - 'yend', 'q': (optional) np.ndarrays for instrumental variables all_labels : (N,) flat vector of labels expressing the classification of each observation into a region considering the cut under evaluation. quorum : int expressing the minimum size of regions. Can be -inf if there is no lower bound. Any region below quorum makes the score inf. current_labels: (N,) flat vector of labels expressing the classification of each observation into a region not considering the cut under evaluation. - current_tree: integer indicating the tree's label currently being considered for division """ - labels, subtree_quorums = self._prep_score( - all_labels, current_tree, current_labels - ) + labels, subtree_quorums = self._prep_score(all_labels, current_tree, current_labels) if (subtree_quorums < quorum).any(): return np.inf, None + set_labels = set(labels) if data_reg is not None: - kargs = { - k: v - for k, v in data_reg.items() - if k not in ["reg", "y", "x", "w", "x_nd"] - } + kargs = {k: v for k, v in data_reg.items() if k not in ["reg", "y", "x", "w", "x_nd", "yend", "q"]} trees_scores = {} - if data_reg["reg"].__name__ == "GM_Lag" or data_reg["reg"].__name__ == "BaseGM_Lag": + if data_reg["reg"].__name__ in {"GM_Lag", "BaseGM_Lag"}: try: x = np.hstack((np.ones((data_reg["x"].shape[0], 1)), data_reg["x"])) - reg = TSLS_Regimes( - y=data_reg["y"], - x=x, - yend=data_reg["yend"], - q=data_reg["q"], - regimes=all_labels,) - except: + except np.linalg.LinAlgError: x = _const_x(data_reg["x"]) - reg = TSLS_Regimes( - y=data_reg["y"], - x=x, - yend=data_reg["yend"], - q=data_reg["q"], - regimes=all_labels,) + from .twosls_regimes import TSLS_Regimes + reg = TSLS_Regimes( + y=data_reg["y"], + x=x, + yend=data_reg.get("yend"), + q=data_reg.get("q"), + regimes=all_labels, + ) score = np.dot(reg.u.T, reg.u)[0][0] else: + label_indices = {l: np.where(all_labels == l)[0] for l in set_labels} for l in set_labels: - x = data_reg["x"][all_labels == l] - if np.linalg.matrix_rank(x) < x.shape[1]: - small_diag_indices = np.abs(np.diag(np.linalg.qr(x)[1])) < 1e-10 - x = x[:, ~small_diag_indices] - - if "w" not in data_reg: - try: - x = np.hstack((np.ones((x.shape[0], 1)), x)) - reg = data_reg["reg"]( - y=data_reg["y"][all_labels == l], x=x, **kargs - ) - except np.linalg.LinAlgError: - x = _const_x(x) - reg = data_reg["reg"]( - y=data_reg["y"][all_labels == l], x=x, **kargs - ) - else: - l_arrays = np.array(all_labels) - - regi_ids = list(np.where(l_arrays == l)[0]) + regi_ids = label_indices[l] + + if "w" in data_reg: w_ids = list(map(data_reg["w"].id_order.__getitem__, regi_ids)) - w_regi_i = w_subset(data_reg["w"], w_ids, silence_warnings=True) - try: - x = np.hstack((np.ones((x.shape[0], 1)), x)) - reg = data_reg["reg"]( - y=data_reg["y"][all_labels == l], x=x, w=w_regi_i, **kargs - ) - except np.linalg.LinAlgError: - x = _const_x(x) - reg = data_reg["reg"]( - y=data_reg["y"][all_labels == l], x=x, w=w_regi_i, **kargs - ) + kargs["w"] = w_subset(data_reg["w"], w_ids, silence_warnings=True) + + x = data_reg["x"][regi_ids] + y = data_reg["y"][regi_ids] + yend = data_reg["yend"][regi_ids] if "yend" in data_reg else None + q = data_reg["q"][regi_ids] if "q" in data_reg else None + + temp_vars = {"x": x, "yend": yend, "q": q} + for key in ["x", "yend", "q"]: + mat = temp_vars[key] + if mat is not None and mat.size > 0 and np.linalg.matrix_rank(mat) < mat.shape[1]: + _, r = np.linalg.qr(mat) + small_diag_indices = np.abs(np.diag(r)) < 1e-10 + temp_vars[key] = mat[:, ~small_diag_indices] + + x = temp_vars["x"] + try: + x = np.hstack((np.ones((x.shape[0], 1)), x)) + except np.linalg.LinAlgError: + x = _const_x(x) + + if temp_vars["yend"] is not None: + kargs["yend"] = temp_vars["yend"] + kargs["q"] = temp_vars["q"] + + reg = data_reg["reg"](y=y, x=x, **kargs) trees_scores[l] = np.dot(reg.u.T, reg.u)[0][0] + score = sum(trees_scores.values()) else: - part_scores, score, trees_scores = self._data_reg_none( - data, all_labels, l, set_labels - ) + part_scores, score, trees_scores = self._data_reg_none(data, all_labels, set_labels) return score, trees_scores @@ -506,7 +494,7 @@ def score_stats( score = sum(trees_scores.values()) else: part_scores, score, trees_scores = self._data_reg_none( - data, all_labels, l, set_labels + data, all_labels, set_labels ) return score, trees_scores @@ -523,7 +511,7 @@ def _prep_score(self, all_labels, current_tree, current_labels): _, subtree_quorums = np.unique(labels, return_counts=True) return labels, subtree_quorums - def _data_reg_none(self, data, all_labels, l, set_labels): + def _data_reg_none(self, data, all_labels, set_labels): assert data.shape[0] == len( all_labels ), "Length of label array ({}) does not match " "length of data ({})! ".format( diff --git a/spreg/sp_panels.py b/spreg/sp_panels.py index 16646bb..8ef3f52 100644 --- a/spreg/sp_panels.py +++ b/spreg/sp_panels.py @@ -342,7 +342,7 @@ def __init__( self.name_regimes = USER.set_name_ds(name_regimes) regimes_l = self._set_regimes(w, bigy.shape[0]) self.name_x_r = self.name_x - x_constant, self.name_x = REGI.Regimes_Frame.__init__( + x_constant, self.name_x, xtype = REGI.Regimes_Frame.__init__( self, x_constant, regimes_l, diff --git a/spreg/sur.py b/spreg/sur.py old mode 100644 new mode 100755 index 884be7c..fbe4352 --- a/spreg/sur.py +++ b/spreg/sur.py @@ -464,7 +464,7 @@ def __init__( self.constant_regi, self.cols2regi, bigX[r], add_cons=False ) USER.check_regimes(self.regimes_set, bigy[0].shape[0], bigX[r].shape[1]) - bigX[r], self.name_bigX[r] = REGI.Regimes_Frame.__init__( + bigX[r], self.name_bigX[r], xtype = REGI.Regimes_Frame.__init__( self, bigX[r], regimes, @@ -887,7 +887,7 @@ def __init__( add_cons=False, ) USER.check_regimes(self.regimes_set, bigy[0].shape[0], bigX[r].shape[1]) - bigX[r], self.name_bigX[r] = REGI.Regimes_Frame.__init__( + bigX[r], self.name_bigX[r], xtype = REGI.Regimes_Frame.__init__( self, bigX[r], regimes, @@ -895,7 +895,7 @@ def __init__( cols2regi=cols2regi_dic[r], names=name_bigX[r], ) - bigq[r], self.name_bigq[r] = REGI.Regimes_Frame.__init__( + bigq[r], self.name_bigq[r], xtype = REGI.Regimes_Frame.__init__( self, bigq[r], regimes, @@ -903,7 +903,7 @@ def __init__( cols2regi="all", names=name_bigq[r], ) - bigyend[r], self.name_bigyend[r] = REGI.Regimes_Frame.__init__( + bigyend[r], self.name_bigyend[r], xtype = REGI.Regimes_Frame.__init__( self, bigyend[r], regimes, diff --git a/spreg/sur_error.py b/spreg/sur_error.py index 02bbf5a..40ef789 100644 --- a/spreg/sur_error.py +++ b/spreg/sur_error.py @@ -364,7 +364,7 @@ def __init__( self.constant_regi, self.cols2regi, bigX[r], add_cons=False ) USER.check_regimes(self.regimes_set, bigy[0].shape[0], bigX[r].shape[1]) - bigX[r], self.name_bigX[r] = REGI.Regimes_Frame.__init__( + bigX[r], self.name_bigX[r], xtype = REGI.Regimes_Frame.__init__( self, bigX[r], regimes, @@ -810,7 +810,7 @@ def __init__( self.constant_regi, self.cols2regi, bigX[r], add_cons=False ) USER.check_regimes(self.regimes_set, bigy[0].shape[0], bigX[r].shape[1]) - bigX[r], self.name_bigX[r] = REGI.Regimes_Frame.__init__( + bigX[r], self.name_bigX[r], xtype = REGI.Regimes_Frame.__init__( self, bigX[r], regimes, diff --git a/spreg/sur_lag.py b/spreg/sur_lag.py index ea63768..db5f769 100644 --- a/spreg/sur_lag.py +++ b/spreg/sur_lag.py @@ -323,7 +323,7 @@ def __init__( self.constant_regi, self.cols2regi, bigX[r], add_cons=False ) USER.check_regimes(self.regimes_set, bigy[0].shape[0], bigX[r].shape[1]) - bigX[r], self.name_bigX[r] = REGI.Regimes_Frame.__init__( + bigX[r], self.name_bigX[r], xtype = REGI.Regimes_Frame.__init__( self, bigX[r], regimes, @@ -332,7 +332,7 @@ def __init__( names=name_bigX[r], ) if bigq is not None: - bigq[r], self.name_bigq[r] = REGI.Regimes_Frame.__init__( + bigq[r], self.name_bigq[r], xtype = REGI.Regimes_Frame.__init__( self, bigq[r], regimes, @@ -341,7 +341,7 @@ def __init__( names=name_bigq[r], ) if bigyend is not None: - bigyend[r], self.name_bigyend[r] = REGI.Regimes_Frame.__init__( + bigyend[r], self.name_bigyend[r], xtype = REGI.Regimes_Frame.__init__( self, bigyend[r], regimes, diff --git a/spreg/tests/test_probit.py b/spreg/tests/test_probit.py index 57d5423..191e0d0 100644 --- a/spreg/tests/test_probit.py +++ b/spreg/tests/test_probit.py @@ -4,129 +4,91 @@ from spreg import probit as PB from libpysal.common import RTOL - class TestBaseProbit(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db=libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),"r") y = np.array(db.by_col("CRIME")) - y = np.reshape(y, (49, 1)) - self.y = (y > 40).astype(float) + y = np.reshape(y, (49,1)) + self.y = (y>40).astype(float) X = [] X.append(db.by_col("INC")) X.append(db.by_col("HOVAL")) self.X = np.array(X).T - self.X = np.hstack((np.ones(self.y.shape), self.X)) - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" + self.X = np.hstack((np.ones(self.y.shape),self.X)) + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' def test_model(self): - reg = PB.BaseProbit(self.y, self.X, w=self.w) - betas = np.array([[3.35381078], [-0.1996531], [-0.02951371]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - predy = np.array([0.00174739]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + reg = PB.BaseProbit(self.y, self.X) + betas = np.array([[ 3.35381078], [-0.1996531 ], [-0.02951371]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + predy = np.array([ 0.00174739]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) + np.testing.assert_allclose(reg.n,n,RTOL) k = 3 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([0.0]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531, 80.467003]) - np.testing.assert_allclose(reg.x[0], x, RTOL) - vm = np.array( - [ - [8.52813879e-01, -4.36272459e-02, -8.05171472e-03], - [-4.36272459e-02, 4.11381444e-03, -1.92834842e-04], - [-8.05171472e-03, -1.92834842e-04, 3.09660240e-04], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - xmean = np.array([[1.0], [14.37493876], [38.43622447]]) - np.testing.assert_allclose(reg.xmean, xmean, RTOL) - predpc = 85.714285714285708 - np.testing.assert_allclose(reg.predpc, predpc, RTOL) + np.testing.assert_allclose(reg.k,k,RTOL) + y = np.array([ 0.]) + np.testing.assert_allclose(reg.y[0],y,RTOL) + x = np.array([ 1. , 19.531 , 80.467003]) + np.testing.assert_allclose(reg.x[0],x,RTOL) + vm = np.array([[ 8.52813879e-01, -4.36272459e-02, -8.05171472e-03], [ -4.36272459e-02, 4.11381444e-03, -1.92834842e-04], [ -8.05171472e-03, -1.92834842e-04, 3.09660240e-04]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) + xmean = np.array([[ 1. ], [ 14.37493876], [ 38.43622447 ]]) + np.testing.assert_allclose(reg.xmean,xmean,RTOL) logl = -20.06009093055782 - np.testing.assert_allclose(reg.logl, logl, RTOL) - scale = 0.23309310130643665 - np.testing.assert_allclose(reg.scale, scale, RTOL) - slopes = np.array([[-0.04653776], [-0.00687944]]) - np.testing.assert_allclose(reg.slopes, slopes, RTOL) - slopes_vm = np.array( - [[1.77101993e-04, -1.65021168e-05], [-1.65021168e-05, 1.60575016e-05]] - ) - np.testing.assert_allclose(reg.slopes_vm, slopes_vm, RTOL) - LR = 25.317683245671716 - np.testing.assert_allclose(reg.LR[0], LR, RTOL) - Pinkse_error = 2.9632385352516728 - np.testing.assert_allclose(reg.Pinkse_error[0], Pinkse_error, RTOL) - KP_error = 1.6509224700582124 - np.testing.assert_allclose(reg.KP_error[0], KP_error, RTOL) - PS_error = 2.3732463777623511 - np.testing.assert_allclose(reg.PS_error[0], PS_error, RTOL) + np.testing.assert_allclose(reg.logl,logl,RTOL) class TestProbit(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db=libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),"r") y = np.array(db.by_col("CRIME")) - y = np.reshape(y, (49, 1)) - self.y = (y > 40).astype(float) + y = np.reshape(y, (49,1)) + self.y = (y>40).astype(float) X = [] X.append(db.by_col("INC")) X.append(db.by_col("HOVAL")) self.X = np.array(X).T - self.w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - self.w.transform = "r" + self.w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + self.w.transform = 'r' def test_model(self): - reg = PB.Probit(self.y, self.X, w=self.w) - betas = np.array([[3.35381078], [-0.1996531], [-0.02951371]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - predy = np.array([0.00174739]) - np.testing.assert_allclose(reg.predy[0], predy, RTOL) + reg = PB.Probit(self.y, self.X, w=self.w, spat_diag=True) + betas = np.array([[ 3.35381078], [-0.1996531 ], [-0.02951371]]) + np.testing.assert_allclose(reg.betas,betas,RTOL) + predy = np.array([ 0.00174739]) + np.testing.assert_allclose(reg.predy[0],predy,RTOL) n = 49 - np.testing.assert_allclose(reg.n, n, RTOL) + np.testing.assert_allclose(reg.n,n,RTOL) k = 3 - np.testing.assert_allclose(reg.k, k, RTOL) - y = np.array([0.0]) - np.testing.assert_allclose(reg.y[0], y, RTOL) - x = np.array([1.0, 19.531, 80.467003]) - np.testing.assert_allclose(reg.x[0], x, RTOL) - vm = np.array( - [ - [8.52813879e-01, -4.36272459e-02, -8.05171472e-03], - [-4.36272459e-02, 4.11381444e-03, -1.92834842e-04], - [-8.05171472e-03, -1.92834842e-04, 3.09660240e-04], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - xmean = np.array([[1.0], [14.37493876], [38.43622447]]) - np.testing.assert_allclose(reg.xmean, xmean, RTOL) + np.testing.assert_allclose(reg.k,k,RTOL) + y = np.array([ 0.]) + np.testing.assert_allclose(reg.y[0],y,RTOL) + x = np.array([ 1. , 19.531 , 80.467003]) + np.testing.assert_allclose(reg.x[0],x,RTOL) + vm = np.array([[ 8.52813879e-01, -4.36272459e-02, -8.05171472e-03], [ -4.36272459e-02, 4.11381444e-03, -1.92834842e-04], [ -8.05171472e-03, -1.92834842e-04, 3.09660240e-04]]) + np.testing.assert_allclose(reg.vm,vm,RTOL) + xmean = np.array([[ 1. ], [ 14.37493876], [ 38.43622447 ]]) + np.testing.assert_allclose(reg.xmean,xmean,RTOL) predpc = 85.714285714285708 - np.testing.assert_allclose(reg.predpc, predpc, RTOL) + np.testing.assert_allclose(reg.predpc,predpc,RTOL) logl = -20.06009093055782 - np.testing.assert_allclose(reg.logl, logl, RTOL) + np.testing.assert_allclose(reg.logl,logl,RTOL) scale = 0.23309310130643665 - np.testing.assert_allclose(reg.scale, scale, RTOL) + np.testing.assert_allclose(reg.scale,scale,RTOL) slopes = np.array([[-0.04653776], [-0.00687944]]) - np.testing.assert_allclose(reg.slopes, slopes, RTOL) - slopes_vm = np.array( - [[1.77101993e-04, -1.65021168e-05], [-1.65021168e-05, 1.60575016e-05]] - ) - np.testing.assert_allclose(reg.slopes_vm, slopes_vm, RTOL) + np.testing.assert_allclose(reg.slopes,slopes,RTOL) + slopes_vm = np.array([[ 1.77101993e-04, -1.65021168e-05], [ -1.65021168e-05, 1.60575016e-05]]) + np.testing.assert_allclose(reg.slopes_vm,slopes_vm,RTOL) LR = 25.317683245671716 - np.testing.assert_allclose(reg.LR[0], LR, RTOL) + np.testing.assert_allclose(reg.LR[0],LR,RTOL) Pinkse_error = 2.9632385352516728 - np.testing.assert_allclose(reg.Pinkse_error[0], Pinkse_error, RTOL) + np.testing.assert_allclose(reg.Pinkse_error[0],Pinkse_error,RTOL) KP_error = 1.6509224700582124 - np.testing.assert_allclose(reg.KP_error[0], KP_error, RTOL) + np.testing.assert_allclose(reg.KP_error[0],KP_error,RTOL) PS_error = 2.3732463777623511 - np.testing.assert_allclose(reg.PS_error[0], PS_error, RTOL) - - -if __name__ == "__main__": + np.testing.assert_allclose(reg.PS_error[0],PS_error,RTOL) + +if __name__ == '__main__': unittest.main() diff --git a/spreg/tests/test_twosls.py b/spreg/tests/test_twosls.py index 73be664..beb231e 100644 --- a/spreg/tests/test_twosls.py +++ b/spreg/tests/test_twosls.py @@ -4,16 +4,15 @@ from spreg.twosls import BaseTSLS, TSLS from libpysal.common import RTOL - class TestBaseTSLS(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),'r') self.y = np.array(db.by_col("CRIME")) - self.y = np.reshape(self.y, (49, 1)) + self.y = np.reshape(self.y, (49,1)) self.X = [] self.X.append(db.by_col("INC")) self.X = np.array(self.X).T - self.X = np.hstack((np.ones(self.y.shape), self.X)) + self.X = np.hstack((np.ones(self.y.shape),self.X)) self.yd = [] self.yd.append(db.by_col("HOVAL")) self.yd = np.array(self.yd).T @@ -23,144 +22,92 @@ def setUp(self): def test_basic(self): reg = BaseTSLS(self.y, self.X, self.yd, self.q) - betas = np.array([[88.46579584], [0.5200379], [-1.58216593]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - h_0 = np.array([1.0, 19.531, 5.03]) + betas = np.array([[ 88.46579584], [ 0.5200379 ], [ -1.58216593]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + h_0 = np.array([ 1. , 19.531, 5.03 ]) np.testing.assert_allclose(reg.h[0], h_0) - hth = np.array( - [ - [49.0, 704.371999, 139.75], - [704.371999, 11686.67338121, 2246.12800625], - [139.75, 2246.12800625, 498.5851], - ] - ) - np.testing.assert_allclose(reg.hth, hth, RTOL) - hthi = np.array( - [ - [0.1597275, -0.00762011, -0.01044191], - [-0.00762011, 0.00100135, -0.0023752], - [-0.01044191, -0.0023752, 0.01563276], - ] - ) - np.testing.assert_allclose(reg.hthi, hthi, RTOL) + hth = np.array([[ 49. , 704.371999 , 139.75 ], + [ 704.371999 , 11686.67338121, 2246.12800625], + [ 139.75 , 2246.12800625, 498.5851 ]]) + np.testing.assert_allclose(reg.hth, hth,RTOL) + hthi = np.array([[ 0.1597275 , -0.00762011, -0.01044191], + [-0.00762011, 0.00100135, -0.0023752 ], + [-0.01044191, -0.0023752 , 0.01563276]]) + np.testing.assert_allclose(reg.hthi, hthi,RTOL) self.assertEqual(reg.k, 3) self.assertEqual(reg.kstar, 1) - np.testing.assert_allclose(reg.mean_y, 35.128823897959187, RTOL) + np.testing.assert_allclose(reg.mean_y, 35.128823897959187,RTOL) self.assertEqual(reg.n, 49) - pfora1a2 = np.array( - [ - [9.58156106, -0.22744226, -0.13820537], - [0.02580142, 0.08226331, -0.03143731], - [-3.13896453, -0.33487872, 0.20690965], - ] - ) - np.testing.assert_allclose(reg.pfora1a2, pfora1a2, RTOL) - predy_5 = np.array( - [[-28.68949467], [28.99484984], [55.07344824], [38.26609504], [57.57145851]] - ) - np.testing.assert_allclose(reg.predy[0:5], predy_5, RTOL) - q_5 = np.array([[5.03], [4.27], [3.89], [3.7], [2.83]]) + pfora1a2 = np.array([[ 9.58156106, -0.22744226, -0.13820537], + [ 0.02580142, 0.08226331, -0.03143731], + [-3.13896453, -0.33487872, 0.20690965]]) + np.testing.assert_allclose(reg.pfora1a2, pfora1a2,RTOL) + predy_5 = np.array([[-28.68949467], [ 28.99484984], [ 55.07344824], [ 38.26609504], [ 57.57145851]]) + np.testing.assert_allclose(reg.predy[0:5], predy_5,RTOL) + q_5 = np.array([[ 5.03], [ 4.27], [ 3.89], [ 3.7 ], [ 2.83]]) np.testing.assert_array_equal(reg.q[0:5], q_5) - np.testing.assert_allclose(reg.sig2n_k, 587.56797852699822, RTOL) - np.testing.assert_allclose(reg.sig2n, 551.5944288212637, RTOL) - np.testing.assert_allclose(reg.sig2, 551.5944288212637, RTOL) - np.testing.assert_allclose(reg.std_y, 16.732092091229699, RTOL) - u_5 = np.array( - [ - [44.41547467], - [-10.19309584], - [-24.44666724], - [-5.87833504], - [-6.83994851], - ] - ) - np.testing.assert_allclose(reg.u[0:5], u_5, RTOL) - np.testing.assert_allclose(reg.utu, 27028.127012241919, RTOL) - varb = np.array( - [ - [0.41526237, 0.01879906, -0.01730372], - [0.01879906, 0.00362823, -0.00184604], - [-0.01730372, -0.00184604, 0.0011406], - ] - ) - np.testing.assert_allclose(reg.varb, varb, RTOL) - vm = np.array( - [ - [229.05640809, 10.36945783, -9.54463414], - [10.36945783, 2.0013142, -1.01826408], - [-9.54463414, -1.01826408, 0.62914915], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - x_0 = np.array([1.0, 19.531]) - np.testing.assert_allclose(reg.x[0], x_0, RTOL) - y_5 = np.array([[15.72598], [18.801754], [30.626781], [32.38776], [50.73151]]) - np.testing.assert_allclose(reg.y[0:5], y_5, RTOL) - yend_5 = np.array([[80.467003], [44.567001], [26.35], [33.200001], [23.225]]) - np.testing.assert_allclose(reg.yend[0:5], yend_5, RTOL) - z_0 = np.array([1.0, 19.531, 80.467003]) - np.testing.assert_allclose(reg.z[0], z_0, RTOL) - zthhthi = np.array( - [ - [1.00000000e00, -1.66533454e-16, 4.44089210e-16], - [0.00000000e00, 1.00000000e00, 0.00000000e00], - [1.26978671e01, 1.05598709e00, 3.70212359e00], - ] - ) - # np.testing.assert_allclose(reg.zthhthi, zthhthi, RTOL) + np.testing.assert_allclose(reg.sig2n_k, 587.56797852699822,RTOL) + np.testing.assert_allclose(reg.sig2n, 551.5944288212637,RTOL) + np.testing.assert_allclose(reg.sig2, 551.5944288212637,RTOL) + np.testing.assert_allclose(reg.std_y, 16.732092091229699,RTOL) + u_5 = np.array([[ 44.41547467], [-10.19309584], [-24.44666724], [ -5.87833504], [ -6.83994851]]) + np.testing.assert_allclose(reg.u[0:5], u_5,RTOL) + np.testing.assert_allclose(reg.utu, 27028.127012241919,RTOL) + varb = np.array([[ 0.41526237, 0.01879906, -0.01730372], + [ 0.01879906, 0.00362823, -0.00184604], + [-0.01730372, -0.00184604, 0.0011406 ]]) + np.testing.assert_allclose(reg.varb, varb,RTOL) + vm = np.array([[ 229.05640809, 10.36945783, -9.54463414], + [ 10.36945783, 2.0013142 , -1.01826408], + [ -9.54463414, -1.01826408, 0.62914915]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + x_0 = np.array([ 1. , 19.531]) + np.testing.assert_allclose(reg.x[0], x_0,RTOL) + y_5 = np.array([[ 15.72598 ], [ 18.801754], [ 30.626781], [ 32.38776 ], [ 50.73151 ]]) + np.testing.assert_allclose(reg.y[0:5], y_5,RTOL) + yend_5 = np.array([[ 80.467003], [ 44.567001], [ 26.35 ], [ 33.200001], [ 23.225 ]]) + np.testing.assert_allclose(reg.yend[0:5], yend_5,RTOL) + z_0 = np.array([ 1. , 19.531 , 80.467003]) + np.testing.assert_allclose(reg.z[0], z_0,RTOL) + zthhthi = np.array([[ 1.00000000e+00, -1.66533454e-16, 4.44089210e-16], + [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00], + [ 1.26978671e+01, 1.05598709e+00, 3.70212359e+00]]) + #np.testing.assert_allclose(reg.zthhthi, zthhthi, RTOL) np.testing.assert_array_almost_equal(reg.zthhthi, zthhthi, 7) - + def test_n_k(self): reg = BaseTSLS(self.y, self.X, self.yd, self.q, sig2n_k=True) - betas = np.array([[88.46579584], [0.5200379], [-1.58216593]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [243.99486949, 11.04572682, -10.16711028], - [11.04572682, 2.13183469, -1.08467261], - [-10.16711028, -1.08467261, 0.67018062], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + betas = np.array([[ 88.46579584], [ 0.5200379 ], [ -1.58216593]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array([[ 243.99486949, 11.04572682, -10.16711028], + [ 11.04572682, 2.13183469, -1.08467261], + [ -10.16711028, -1.08467261, 0.67018062]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) def test_white(self): - reg = BaseTSLS(self.y, self.X, self.yd, self.q, robust="white") - betas = np.array([[88.46579584], [0.5200379], [-1.58216593]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [208.27139316, 15.6687805, -11.53686154], - [15.6687805, 2.26882747, -1.30312033], - [-11.53686154, -1.30312033, 0.81940656], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + reg = BaseTSLS(self.y, self.X, self.yd, self.q, robust='white') + betas = np.array([[ 88.46579584], [ 0.5200379 ], [ -1.58216593]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array([[ 208.27139316, 15.6687805 , -11.53686154], + [ 15.6687805 , 2.26882747, -1.30312033], + [ -11.53686154, -1.30312033, 0.81940656]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) def test_hac(self): - gwk = libpysal.weights.Kernel.from_shapefile( - libpysal.examples.get_path("columbus.shp"), - k=15, - function="triangular", - fixed=False, - ) - reg = BaseTSLS(self.y, self.X, self.yd, self.q, robust="hac", gwk=gwk) - betas = np.array([[88.46579584], [0.5200379], [-1.58216593]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [231.07254978, 15.42050291, -11.3941033], - [15.01376346, 1.92422887, -1.11865505], - [-11.34381641, -1.1279227, 0.72053806], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - + gwk = libpysal.weights.Kernel.from_shapefile(libpysal.examples.get_path('columbus.shp'),k=15,function='triangular', fixed=False) + reg = BaseTSLS(self.y, self.X, self.yd, self.q, robust='hac', gwk=gwk) + betas = np.array([[ 88.46579584], [ 0.5200379 ], [ -1.58216593]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array([[ 231.07254978, 15.42050291, -11.3941033 ], + [ 15.01376346, 1.92422887, -1.11865505], + [ -11.34381641, -1.1279227 , 0.72053806]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) class TestTSLS(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),'r') self.y = np.array(db.by_col("CRIME")) - self.y = np.reshape(self.y, (49, 1)) + self.y = np.reshape(self.y, (49,1)) self.X = [] self.X.append(db.by_col("INC")) self.X = np.array(self.X).T @@ -173,214 +120,129 @@ def setUp(self): def test_basic(self): reg = TSLS(self.y, self.X, self.yd, self.q) - betas = np.array([[88.46579584], [0.5200379], [-1.58216593]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - h_0 = np.array([1.0, 19.531, 5.03]) + betas = np.array([[ 88.46579584], [ 0.5200379 ], [ -1.58216593]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + h_0 = np.array([ 1. , 19.531, 5.03 ]) np.testing.assert_allclose(reg.h[0], h_0) - hth = np.array( - [ - [49.0, 704.371999, 139.75], - [704.371999, 11686.67338121, 2246.12800625], - [139.75, 2246.12800625, 498.5851], - ] - ) - np.testing.assert_allclose(reg.hth, hth, RTOL) - hthi = np.array( - [ - [0.1597275, -0.00762011, -0.01044191], - [-0.00762011, 0.00100135, -0.0023752], - [-0.01044191, -0.0023752, 0.01563276], - ] - ) - np.testing.assert_allclose(reg.hthi, hthi, RTOL) + hth = np.array([[ 49. , 704.371999 , 139.75 ], + [ 704.371999 , 11686.67338121, 2246.12800625], + [ 139.75 , 2246.12800625, 498.5851 ]]) + np.testing.assert_allclose(reg.hth, hth,RTOL) + hthi = np.array([[ 0.1597275 , -0.00762011, -0.01044191], + [-0.00762011, 0.00100135, -0.0023752 ], + [-0.01044191, -0.0023752 , 0.01563276]]) + np.testing.assert_allclose(reg.hthi, hthi,RTOL) self.assertEqual(reg.k, 3) self.assertEqual(reg.kstar, 1) - np.testing.assert_allclose(reg.mean_y, 35.128823897959187, RTOL) + np.testing.assert_allclose(reg.mean_y, 35.128823897959187,RTOL) self.assertEqual(reg.n, 49) - pfora1a2 = np.array( - [ - [9.58156106, -0.22744226, -0.13820537], - [0.02580142, 0.08226331, -0.03143731], - [-3.13896453, -0.33487872, 0.20690965], - ] - ) - np.testing.assert_allclose(reg.pfora1a2, pfora1a2, RTOL) - predy_5 = np.array( - [[-28.68949467], [28.99484984], [55.07344824], [38.26609504], [57.57145851]] - ) - np.testing.assert_allclose(reg.predy[0:5], predy_5, RTOL) - q_5 = np.array([[5.03], [4.27], [3.89], [3.7], [2.83]]) + pfora1a2 = np.array([[ 9.58156106, -0.22744226, -0.13820537], + [ 0.02580142, 0.08226331, -0.03143731], + [-3.13896453, -0.33487872, 0.20690965]]) + np.testing.assert_allclose(reg.pfora1a2, pfora1a2,RTOL) + predy_5 = np.array([[-28.68949467], [ 28.99484984], [ 55.07344824], [ 38.26609504], [ 57.57145851]]) + np.testing.assert_allclose(reg.predy[0:5], predy_5,RTOL) + q_5 = np.array([[ 5.03], [ 4.27], [ 3.89], [ 3.7 ], [ 2.83]]) np.testing.assert_array_equal(reg.q[0:5], q_5) - np.testing.assert_allclose(reg.sig2n_k, 587.56797852699822, RTOL) - np.testing.assert_allclose(reg.sig2n, 551.5944288212637, RTOL) - np.testing.assert_allclose(reg.sig2, 551.5944288212637, RTOL) - np.testing.assert_allclose(reg.std_y, 16.732092091229699, RTOL) - u_5 = np.array( - [ - [44.41547467], - [-10.19309584], - [-24.44666724], - [-5.87833504], - [-6.83994851], - ] - ) - np.testing.assert_allclose(reg.u[0:5], u_5, RTOL) - np.testing.assert_allclose(reg.utu, 27028.127012241919, RTOL) - varb = np.array( - [ - [0.41526237, 0.01879906, -0.01730372], - [0.01879906, 0.00362823, -0.00184604], - [-0.01730372, -0.00184604, 0.0011406], - ] - ) - np.testing.assert_allclose(reg.varb, varb, RTOL) - vm = np.array( - [ - [229.05640809, 10.36945783, -9.54463414], - [10.36945783, 2.0013142, -1.01826408], - [-9.54463414, -1.01826408, 0.62914915], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - x_0 = np.array([1.0, 19.531]) - np.testing.assert_allclose(reg.x[0], x_0, RTOL) - y_5 = np.array([[15.72598], [18.801754], [30.626781], [32.38776], [50.73151]]) - np.testing.assert_allclose(reg.y[0:5], y_5, RTOL) - yend_5 = np.array([[80.467003], [44.567001], [26.35], [33.200001], [23.225]]) - np.testing.assert_allclose(reg.yend[0:5], yend_5, RTOL) - z_0 = np.array([1.0, 19.531, 80.467003]) - np.testing.assert_allclose(reg.z[0], z_0, RTOL) - zthhthi = np.array( - [ - [1.00000000e00, -1.66533454e-16, 4.44089210e-16], - [0.00000000e00, 1.00000000e00, 0.00000000e00], - [1.26978671e01, 1.05598709e00, 3.70212359e00], - ] - ) - # np.testing.assert_allclose(reg.zthhthi, zthhthi,RTOL) + np.testing.assert_allclose(reg.sig2n_k, 587.56797852699822,RTOL) + np.testing.assert_allclose(reg.sig2n, 551.5944288212637,RTOL) + np.testing.assert_allclose(reg.sig2, 551.5944288212637,RTOL) + np.testing.assert_allclose(reg.std_y, 16.732092091229699,RTOL) + u_5 = np.array([[ 44.41547467], [-10.19309584], [-24.44666724], [ -5.87833504], [ -6.83994851]]) + np.testing.assert_allclose(reg.u[0:5], u_5,RTOL) + np.testing.assert_allclose(reg.utu, 27028.127012241919,RTOL) + varb = np.array([[ 0.41526237, 0.01879906, -0.01730372], + [ 0.01879906, 0.00362823, -0.00184604], + [-0.01730372, -0.00184604, 0.0011406 ]]) + np.testing.assert_allclose(reg.varb, varb,RTOL) + vm = np.array([[ 229.05640809, 10.36945783, -9.54463414], + [ 10.36945783, 2.0013142 , -1.01826408], + [ -9.54463414, -1.01826408, 0.62914915]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + x_0 = np.array([ 1. , 19.531]) + np.testing.assert_allclose(reg.x[0], x_0,RTOL) + y_5 = np.array([[ 15.72598 ], [ 18.801754], [ 30.626781], [ 32.38776 ], [ 50.73151 ]]) + np.testing.assert_allclose(reg.y[0:5], y_5,RTOL) + yend_5 = np.array([[ 80.467003], [ 44.567001], [ 26.35 ], [ 33.200001], [ 23.225 ]]) + np.testing.assert_allclose(reg.yend[0:5], yend_5,RTOL) + z_0 = np.array([ 1. , 19.531 , 80.467003]) + np.testing.assert_allclose(reg.z[0], z_0,RTOL) + zthhthi = np.array([[ 1.00000000e+00, -1.66533454e-16, 4.44089210e-16], + [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00], + [ 1.26978671e+01, 1.05598709e+00, 3.70212359e+00]]) + #np.testing.assert_allclose(reg.zthhthi, zthhthi,RTOL) np.testing.assert_array_almost_equal(reg.zthhthi, zthhthi, 7) - np.testing.assert_allclose(reg.pr2, 0.27936137128173893, RTOL) - z_stat = np.array( - [ - [5.84526447e00, 5.05764078e-09], - [3.67601567e-01, 7.13170346e-01], - [-1.99468913e00, 4.60767956e-02], - ] - ) - np.testing.assert_allclose(reg.z_stat, z_stat, RTOL) - title = "TWO STAGE LEAST SQUARES" + np.testing.assert_allclose(reg.pr2, 0.27936137128173893,RTOL) + z_stat = np.array([[ 5.84526447e+00, 5.05764078e-09], + [ 3.67601567e-01, 7.13170346e-01], + [ -1.99468913e+00, 4.60767956e-02]]) + np.testing.assert_allclose(reg.z_stat, z_stat,RTOL) + title = 'TWO STAGE LEAST SQUARES' self.assertEqual(reg.title, title) - + def test_n_k(self): reg = TSLS(self.y, self.X, self.yd, self.q, sig2n_k=True) - betas = np.array([[88.46579584], [0.5200379], [-1.58216593]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [243.99486949, 11.04572682, -10.16711028], - [11.04572682, 2.13183469, -1.08467261], - [-10.16711028, -1.08467261, 0.67018062], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + betas = np.array([[ 88.46579584], [ 0.5200379 ], [ -1.58216593]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array([[ 243.99486949, 11.04572682, -10.16711028], + [ 11.04572682, 2.13183469, -1.08467261], + [ -10.16711028, -1.08467261, 0.67018062]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) def test_white(self): - reg = TSLS(self.y, self.X, self.yd, self.q, robust="white") - betas = np.array([[88.46579584], [0.5200379], [-1.58216593]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [208.27139316, 15.6687805, -11.53686154], - [15.6687805, 2.26882747, -1.30312033], - [-11.53686154, -1.30312033, 0.81940656], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - self.assertEqual(reg.robust, "white") + reg = TSLS(self.y, self.X, self.yd, self.q, robust='white') + betas = np.array([[ 88.46579584], [ 0.5200379 ], [ -1.58216593]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array([[ 208.27139316, 15.6687805 , -11.53686154], + [ 15.6687805 , 2.26882747, -1.30312033], + [ -11.53686154, -1.30312033, 0.81940656]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + self.assertEqual(reg.robust, 'white') def test_hac(self): - gwk = libpysal.weights.Kernel.from_shapefile( - libpysal.examples.get_path("columbus.shp"), - k=5, - function="triangular", - fixed=False, - ) - reg = TSLS(self.y, self.X, self.yd, self.q, robust="hac", gwk=gwk) - betas = np.array([[88.46579584], [0.5200379], [-1.58216593]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [225.0795089, 17.11660041, -12.22448566], - [17.67097154, 2.47483461, -1.4183641], - [-12.45093722, -1.40495464, 0.8700441], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - self.assertEqual(reg.robust, "hac") + gwk = libpysal.weights.Kernel.from_shapefile(libpysal.examples.get_path('columbus.shp'),k=5,function='triangular', fixed=False) + reg = TSLS(self.y, self.X, self.yd, self.q, robust='hac', gwk=gwk) + betas = np.array([[ 88.46579584], [ 0.5200379 ], [ -1.58216593]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array([[ 225.0795089 , 17.11660041, -12.22448566], + [ 17.67097154, 2.47483461, -1.4183641 ], + [ -12.45093722, -1.40495464, 0.8700441 ]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + self.assertEqual(reg.robust, 'hac') def test_spatial(self): - w = libpysal.weights.Queen.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) + w = libpysal.weights.Queen.from_shapefile(libpysal.examples.get_path('columbus.shp')) reg = TSLS(self.y, self.X, self.yd, self.q, spat_diag=True, w=w) - betas = np.array([[88.46579584], [0.5200379], [-1.58216593]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [229.05640809, 10.36945783, -9.54463414], - [10.36945783, 2.0013142, -1.01826408], - [-9.54463414, -1.01826408, 0.62914915], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - ak_test = np.array([1.16816972, 0.27977763]) - np.testing.assert_allclose(reg.ak_test, ak_test, RTOL) + betas = np.array([[ 88.46579584], [ 0.5200379 ], [ -1.58216593]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array([[ 229.05640809, 10.36945783, -9.54463414], + [ 10.36945783, 2.0013142 , -1.01826408], + [ -9.54463414, -1.01826408, 0.62914915]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + ak_test = np.array([ 1.16816972, 0.27977763]) + np.testing.assert_allclose(reg.ak_test, ak_test,RTOL) def test_names(self): - w = libpysal.weights.Queen.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - gwk = libpysal.weights.Kernel.from_shapefile( - libpysal.examples.get_path("columbus.shp"), - k=5, - function="triangular", - fixed=False, - ) - name_x = ["inc"] - name_y = "crime" - name_yend = ["hoval"] - name_q = ["discbd"] - name_w = "queen" - name_gwk = "k=5" - name_ds = "columbus" - reg = TSLS( - self.y, - self.X, - self.yd, - self.q, - spat_diag=True, - w=w, - robust="hac", - gwk=gwk, - name_x=name_x, - name_y=name_y, - name_q=name_q, - name_w=name_w, - name_yend=name_yend, - name_gwk=name_gwk, - name_ds=name_ds, - ) - betas = np.array([[88.46579584], [0.5200379], [-1.58216593]]) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [225.0795089, 17.11660041, -12.22448566], - [17.67097154, 2.47483461, -1.4183641], - [-12.45093722, -1.40495464, 0.8700441], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - self.assertListEqual(reg.name_x, ["CONSTANT"] + name_x) + w = libpysal.weights.Queen.from_shapefile(libpysal.examples.get_path('columbus.shp')) + gwk = libpysal.weights.Kernel.from_shapefile(libpysal.examples.get_path('columbus.shp'),k=5,function='triangular', fixed=False) + name_x = ['inc'] + name_y = 'crime' + name_yend = ['hoval'] + name_q = ['discbd'] + name_w = 'queen' + name_gwk = 'k=5' + name_ds = 'columbus' + reg = TSLS(self.y, self.X, self.yd, self.q, + spat_diag=True, w=w, robust='hac', gwk=gwk, + name_x=name_x, name_y=name_y, name_q=name_q, name_w=name_w, + name_yend=name_yend, name_gwk=name_gwk, name_ds=name_ds) + betas = np.array([[ 88.46579584], [ 0.5200379 ], [ -1.58216593]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array([[ 225.0795089 , 17.11660041, -12.22448566], + [ 17.67097154, 2.47483461, -1.4183641 ], + [ -12.45093722, -1.40495464, 0.8700441 ]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + self.assertListEqual(reg.name_x, ['CONSTANT']+name_x) self.assertListEqual(reg.name_yend, name_yend) self.assertListEqual(reg.name_q, name_q) self.assertEqual(reg.name_y, name_y) @@ -388,6 +250,5 @@ def test_names(self): self.assertEqual(reg.name_gwk, name_gwk) self.assertEqual(reg.name_ds, name_ds) - -if __name__ == "__main__": +if __name__ == '__main__': unittest.main() diff --git a/spreg/tests/test_twosls_regimes.py b/spreg/tests/test_twosls_regimes.py index 735ada4..6c0b914 100644 --- a/spreg/tests/test_twosls_regimes.py +++ b/spreg/tests/test_twosls_regimes.py @@ -1,16 +1,17 @@ import unittest import numpy as np import libpysal +import spreg from spreg.twosls_regimes import TSLS_Regimes from spreg.twosls import TSLS from libpysal.common import RTOL - +import geopandas as gpd class TestTSLS(unittest.TestCase): def setUp(self): - db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") + db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"),'r') self.y = np.array(db.by_col("CRIME")) - self.y = np.reshape(self.y, (49, 1)) + self.y = np.reshape(self.y, (49,1)) self.x = [] self.x.append(db.by_col("INC")) self.x = np.array(self.x).T @@ -20,450 +21,308 @@ def setUp(self): self.q = [] self.q.append(db.by_col("DISCBD")) self.q = np.array(self.q).T - self.r_var = "NSA" + self.r_var = 'NSA' self.regimes = db.by_col(self.r_var) def test_basic(self): - reg = TSLS_Regimes( - self.y, self.x, self.yd, self.q, self.regimes, regime_err_sep=False - ) - betas = np.array( - [ - [80.23408166], - [5.48218125], - [82.98396737], - [0.49775429], - [-3.72663211], - [-1.27451485], - ] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - h_0 = np.array([[0.0, 0.0, 1.0, 19.531, 0.0, 5.03]]) - np.testing.assert_allclose(reg.h[0] * np.eye(6), h_0, RTOL) - hth = np.array( - [ - [25.0, 416.378999, 0.0, 0.0, 76.03, 0.0], - [416.378999, 7831.05477839, 0.0, 0.0, 1418.65422625, 0.0], - [0.0, 0.0, 24.0, 287.993, 0.0, 63.72], - [0.0, 0.0, 287.993, 3855.61860282, 0.0, 827.47378], - [76.03, 1418.65422625, 0.0, 0.0, 291.9749, 0.0], - [0.0, 0.0, 63.72, 827.47378, 0.0, 206.6102], - ] - ) - np.testing.assert_allclose(reg.hth, hth, RTOL) - hthi = np.array( - [ - [0.3507855, -0.0175615, 0.0, 0.0, -0.00601601, 0.0], - [-0.0175615, 0.00194521, -0.0, -0.0, -0.00487844, -0.0], - [0.0, 0.0, 0.42327489, -0.02563036, 0.0, -0.02789128], - [-0.0, -0.0, -0.02563036, 0.00339841, -0.0, -0.00570605], - [-0.00601601, -0.00487844, 0.0, 0.0, 0.02869498, 0.0], - [0.0, 0.0, -0.02789128, -0.00570605, 0.0, 0.03629464], - ] - ) - np.testing.assert_allclose(reg.hthi, hthi, RTOL) - self.assertEqual(reg.k, 6) + reg = TSLS_Regimes(self.y, self.x, self.yd, self.q, self.regimes, regime_err_sep=False) + betas = np.array([[ 80.23408166],[ 5.48218125],[ 82.98396737],[ 0.49775429],[ -3.72663211],[ -1.27451485]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + h_0 = np.array([[ 0. , 0. , 1. , 19.531, 0. , 5.03 ]]) + np.testing.assert_allclose(reg.h[0]*np.eye(6), h_0, RTOL) + hth = np.array([[ 25. , 416.378999 , 0. , 0. ,\ + 76.03 , 0. ],\ + [ 416.378999 , 7831.05477839, 0. , 0. ,\ + 1418.65422625, 0. ],\ + [ 0. , 0. , 24. , 287.993 ,\ + 0. , 63.72 ],\ + [ 0. , 0. , 287.993 , 3855.61860282,\ + 0. , 827.47378 ],\ + [ 76.03 , 1418.65422625, 0. , 0. ,\ + 291.9749 , 0. ],\ + [ 0. , 0. , 63.72 , 827.47378 ,\ + 0. , 206.6102 ]]) + np.testing.assert_allclose(reg.hth, hth,RTOL) + hthi = np.array([[ 0.3507855 , -0.0175615 , 0. , 0. , -0.00601601,\ + 0. ],\ + [-0.0175615 , 0.00194521, -0. , -0. , -0.00487844,\ + -0. ],\ + [ 0. , 0. , 0.42327489, -0.02563036, 0. ,\ + -0.02789128],\ + [-0. , -0. , -0.02563036, 0.00339841, -0. ,\ + -0.00570605],\ + [-0.00601601, -0.00487844, 0. , 0. , 0.02869498,\ + 0. ],\ + [ 0. , 0. , -0.02789128, -0.00570605, 0. ,\ + 0.03629464]]) + np.testing.assert_allclose(reg.hthi, hthi,RTOL) + self.assertEqual(reg.k,6) self.assertEqual(reg.kstar, 2) - np.testing.assert_allclose(reg.mean_y, 35.128823897959187, RTOL) + np.testing.assert_allclose(reg.mean_y, 35.128823897959187,RTOL) np.testing.assert_equal(reg.kf, 0) np.testing.assert_equal(reg.kr, 3) np.testing.assert_equal(reg.n, 49) np.testing.assert_equal(reg.nr, 2) - pfora1a2 = np.array( - [ - [17.80208995, -0.46997739, 0.0, 0.0, -0.21344994, 0.0], - [-0.36293902, 0.41200496, 0.0, 0.0, -0.17308863, 0.0], - [0.0, 0.0, 23.8584271, -0.96035493, 0.0, -0.26149141], - [0.0, 0.0, -0.61800983, 0.2269828, 0.0, -0.05349643], - [-3.22151864, -2.10181214, 0.0, 0.0, 1.01810757, 0.0], - [0.0, 0.0, -5.42403871, -0.6641704, 0.0, 0.34027606], - ] - ) - np.testing.assert_allclose(reg.pfora1a2, pfora1a2, RTOL) - predy_5 = np.array( - [[-9.85078372], [36.75098196], [57.34266859], [42.89851907], [58.9840913]] - ) - np.testing.assert_allclose(reg.predy[0:5], predy_5, RTOL) - q_5 = np.array([5.03, 4.27, 3.89, 3.7, 2.83]) - np.testing.assert_array_equal((reg.q[0:5].T * np.eye(5))[1, :], q_5) - np.testing.assert_allclose(reg.sig2n_k, 990.00750983736714, RTOL) - np.testing.assert_allclose(reg.sig2n, 868.78210046952631, RTOL) - np.testing.assert_allclose(reg.sig2, 990.00750983736714, RTOL) - np.testing.assert_allclose(reg.std_y, 16.732092091229699, RTOL) - u_5 = np.array( - [ - [25.57676372], - [-17.94922796], - [-26.71588759], - [-10.51075907], - [-8.2525813], - ] - ) - np.testing.assert_allclose(reg.u[0:5], u_5, RTOL) - np.testing.assert_allclose(reg.utu, 42570.322923006788, RTOL) - varb = np.array( - [ - [0.50015831, 0.07969376, 0.0, 0.0, -0.04760541, 0.0], - [0.07969376, 0.06523527, 0.0, 0.0, -0.03105915, 0.0], - [0.0, 0.0, 0.73944792, 0.01132445, 0.0, -0.02117969], - [0.0, 0.0, 0.01132445, 0.00756336, 0.0, -0.00259344], - [-0.04760541, -0.03105915, -0.0, -0.0, 0.0150449, -0.0], - [-0.0, -0.0, -0.02117969, -0.00259344, -0.0, 0.0013287], - ] - ) - np.testing.assert_allclose(reg.varb, varb, RTOL) - vm = np.array( - [ - [495.16048523, 78.89742341, 0.0, 0.0, -47.12971066, 0.0], - [78.89742341, 64.58341083, 0.0, 0.0, -30.74878934, 0.0], - [0.0, 0.0, 732.05899155, 11.21128921, 0.0, -20.96804956], - [0.0, 0.0, 11.21128921, 7.48778398, 0.0, -2.56752553], - [-47.12971066, -30.74878934, 0.0, 0.0, 14.89456384, 0.0], - [0.0, 0.0, -20.96804956, -2.56752553, 0.0, 1.3154267], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - x_0 = np.array([[0.0, 0.0, 1.0, 19.531]]) - np.testing.assert_allclose(reg.x[0] * np.eye(4), x_0, RTOL) - y_5 = np.array([[15.72598], [18.801754], [30.626781], [32.38776], [50.73151]]) - np.testing.assert_allclose(reg.y[0:5], y_5, RTOL) - yend_3 = np.array([[0.0, 80.467003], [0.0, 44.567001], [0.0, 26.35]]) - np.testing.assert_allclose(reg.yend[0:3] * np.eye(2), yend_3, RTOL) - z_0 = np.array([[0.0, 0.0, 1.0, 19.531, 0.0, 80.467003]]) - np.testing.assert_allclose(reg.z[0] * np.eye(6), z_0, RTOL) - zthhthi = np.array( - [ - [ - 1.00000000e00, - 0.00000000e00, - 0.00000000e00, - 0.00000000e00, - -4.44089210e-16, - 0.00000000e00, - ], - [ - -1.24344979e-14, - 1.00000000e00, - 0.00000000e00, - 0.00000000e00, - 0.00000000e00, - 0.00000000e00, - ], - [ - 0.00000000e00, - 0.00000000e00, - 1.00000000e00, - 0.00000000e00, - 0.00000000e00, - -1.11022302e-16, - ], - [ - 0.00000000e00, - 0.00000000e00, - -3.55271368e-15, - 1.00000000e00, - 0.00000000e00, - 0.00000000e00, - ], - [ - 2.87468088e00, - 1.82963841e00, - 0.00000000e00, - 0.00000000e00, - 1.38104644e00, - 0.00000000e00, - ], - [ - 0.00000000e00, - 0.00000000e00, - 1.19237474e01, - 1.13018165e00, - 0.00000000e00, - 5.22645427e00, - ], - ] - ) - # np.testing.assert_allclose(reg.zthhthi, zthhthi, RTOL) #why does this fail?? + pfora1a2 = np.array([[ 17.80208995, -0.46997739, 0. , 0. ,\ + -0.21344994, 0. ],\ + [ -0.36293902, 0.41200496, 0. , 0. ,\ + -0.17308863, 0. ],\ + [ 0. , 0. , 23.8584271 , -0.96035493,\ + 0. , -0.26149141],\ + [ 0. , 0. , -0.61800983, 0.2269828 ,\ + 0. , -0.05349643],\ + [ -3.22151864, -2.10181214, 0. , 0. ,\ + 1.01810757, 0. ],\ + [ 0. , 0. , -5.42403871, -0.6641704 ,\ + 0. , 0.34027606]]) + np.testing.assert_allclose(reg.pfora1a2, pfora1a2,RTOL) + predy_5 = np.array([[ -9.85078372],[ 36.75098196],[ 57.34266859],[ 42.89851907],[ 58.9840913 ]]) + np.testing.assert_allclose(reg.predy[0:5], predy_5,RTOL) + q_5 = np.array([ 5.03, 4.27, 3.89, 3.7 , 2.83]) + np.testing.assert_array_equal((reg.q[0:5].T*np.eye(5))[1,:], q_5) + np.testing.assert_allclose(reg.sig2n_k, 990.00750983736714,RTOL) + np.testing.assert_allclose(reg.sig2n, 868.78210046952631,RTOL) + np.testing.assert_allclose(reg.sig2, 990.00750983736714,RTOL) + np.testing.assert_allclose(reg.std_y, 16.732092091229699,RTOL) + u_5 = np.array([[ 25.57676372],[-17.94922796],[-26.71588759],[-10.51075907],[ -8.2525813 ]]) + np.testing.assert_allclose(reg.u[0:5], u_5,RTOL) + np.testing.assert_allclose(reg.utu, 42570.322923006788,RTOL) + varb = np.array([[ 0.50015831, 0.07969376, 0. , 0. , -0.04760541,\ + 0. ],\ + [ 0.07969376, 0.06523527, 0. , 0. , -0.03105915,\ + 0. ],\ + [ 0. , 0. , 0.73944792, 0.01132445, 0. ,\ + -0.02117969],\ + [ 0. , 0. , 0.01132445, 0.00756336, 0. ,\ + -0.00259344],\ + [-0.04760541, -0.03105915, -0. , -0. , 0.0150449 ,\ + -0. ],\ + [-0. , -0. , -0.02117969, -0.00259344, -0. ,\ + 0.0013287 ]]) + np.testing.assert_allclose(reg.varb, varb,RTOL) + vm = np.array([[ 495.16048523, 78.89742341, 0. , 0. ,\ + -47.12971066, 0. ],\ + [ 78.89742341, 64.58341083, 0. , 0. ,\ + -30.74878934, 0. ],\ + [ 0. , 0. , 732.05899155, 11.21128921,\ + 0. , -20.96804956],\ + [ 0. , 0. , 11.21128921, 7.48778398,\ + 0. , -2.56752553],\ + [ -47.12971066, -30.74878934, 0. , 0. ,\ + 14.89456384, 0. ],\ + [ 0. , 0. , -20.96804956, -2.56752553,\ + 0. , 1.3154267 ]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + x_0 = np.array([[ 0. , 0. , 1. , 19.531]]) + np.testing.assert_allclose(reg.x[0]*np.eye(4), x_0,RTOL) + y_5 = np.array([[ 15.72598 ], [ 18.801754], [ 30.626781], [ 32.38776 ], [ 50.73151 ]]) + np.testing.assert_allclose(reg.y[0:5], y_5,RTOL) + yend_3 = np.array([[ 0. , 80.467003],[ 0. , 44.567001],[ 0. , 26.35 ]]) + np.testing.assert_allclose(reg.yend[0:3]*np.eye(2), yend_3,RTOL) + z_0 = np.array([[ 0. , 0. , 1. , 19.531 , 0. , 80.467003]]) + np.testing.assert_allclose(reg.z[0]*np.eye(6), z_0,RTOL) + zthhthi = np.array([[ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00,\ + 0.00000000e+00, -4.44089210e-16, 0.00000000e+00],\ + [ -1.24344979e-14, 1.00000000e+00, 0.00000000e+00,\ + 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],\ + [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00,\ + 0.00000000e+00, 0.00000000e+00, -1.11022302e-16],\ + [ 0.00000000e+00, 0.00000000e+00, -3.55271368e-15,\ + 1.00000000e+00, 0.00000000e+00, 0.00000000e+00],\ + [ 2.87468088e+00, 1.82963841e+00, 0.00000000e+00,\ + 0.00000000e+00, 1.38104644e+00, 0.00000000e+00],\ + [ 0.00000000e+00, 0.00000000e+00, 1.19237474e+01,\ + 1.13018165e+00, 0.00000000e+00, 5.22645427e+00]]) + #np.testing.assert_allclose(reg.zthhthi, zthhthi, RTOL) #why does this fail?? np.testing.assert_array_almost_equal(reg.zthhthi, zthhthi, 7) - np.testing.assert_allclose(reg.pr2, 0.17729324026706564, RTOL) - z_stat = np.array( - [ - [3.60566933e00, 3.11349387e-04], - [6.82170447e-01, 4.95131179e-01], - [3.06705211e00, 2.16181168e-03], - [1.81902371e-01, 8.55659343e-01], - [-9.65611937e-01, 3.34238400e-01], - [-1.11124949e00, 2.66460976e-01], - ] - ) - np.testing.assert_allclose(np.array(reg.z_stat), z_stat, RTOL) - chow_regi = np.array( - [ - [0.00616179, 0.93743265], - [0.3447218, 0.55711631], - [0.37093662, 0.54249417], - ] - ) - np.testing.assert_allclose(reg.chow.regi, chow_regi, RTOL) - np.testing.assert_allclose(reg.chow.joint[0], 1.1353790779820598, RTOL) - title = "TWO STAGE LEAST SQUARES - REGIMES" + np.testing.assert_allclose(reg.pr2, 0.17729324026706564,RTOL) + z_stat = np.array([[ 3.60566933e+00, 3.11349387e-04],\ + [ 6.82170447e-01, 4.95131179e-01],\ + [ 3.06705211e+00, 2.16181168e-03],\ + [ 1.81902371e-01, 8.55659343e-01],\ + [ -9.65611937e-01, 3.34238400e-01],\ + [ -1.11124949e+00, 2.66460976e-01]]) + np.testing.assert_allclose(np.array(reg.z_stat), z_stat,RTOL) + chow_regi = np.array([[ 0.00616179, 0.93743265], + [ 0.3447218 , 0.55711631], + [ 0.37093662, 0.54249417]]) + np.testing.assert_allclose(reg.chow.regi, chow_regi,RTOL) + np.testing.assert_allclose(reg.chow.joint[0], 1.1353790779820598,RTOL) + title = 'TWO STAGE LEAST SQUARES - REGIMES' self.assertEqual(reg.title, title) - + def test_n_k(self): - reg = TSLS_Regimes( - self.y, - self.x, - self.yd, - self.q, - self.regimes, - sig2n_k=True, - regime_err_sep=False, - ) - betas = np.array( - [ - [80.23408166], - [5.48218125], - [82.98396737], - [0.49775429], - [-3.72663211], - [-1.27451485], - ] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [495.16048523, 78.89742341, 0.0, 0.0, -47.12971066, 0.0], - [78.89742341, 64.58341083, 0.0, 0.0, -30.74878934, 0.0], - [0.0, 0.0, 732.05899155, 11.21128921, 0.0, -20.96804956], - [0.0, 0.0, 11.21128921, 7.48778398, 0.0, -2.56752553], - [-47.12971066, -30.74878934, 0.0, 0.0, 14.89456384, 0.0], - [0.0, 0.0, -20.96804956, -2.56752553, 0.0, 1.3154267], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) + reg = TSLS_Regimes(self.y, self.x, self.yd, self.q, self.regimes, sig2n_k=True, regime_err_sep=False) + betas = np.array([[ 80.23408166],[ 5.48218125],[ 82.98396737],[ 0.49775429],[ -3.72663211],[ -1.27451485]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array([[ 495.16048523, 78.89742341, 0. , 0. ,\ + -47.12971066, 0. ],\ + [ 78.89742341, 64.58341083, 0. , 0. ,\ + -30.74878934, 0. ],\ + [ 0. , 0. , 732.05899155, 11.21128921,\ + 0. , -20.96804956],\ + [ 0. , 0. , 11.21128921, 7.48778398,\ + 0. , -2.56752553],\ + [ -47.12971066, -30.74878934, 0. , 0. ,\ + 14.89456384, 0. ],\ + [ 0. , 0. , -20.96804956, -2.56752553,\ + 0. , 1.3154267 ]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) def test_spatial(self): - w = libpysal.weights.Queen.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - reg = TSLS_Regimes( - self.y, - self.x, - self.yd, - self.q, - self.regimes, - spat_diag=True, - w=w, - regime_err_sep=False, - ) - betas = np.array( - [ - [80.23408166], - [5.48218125], - [82.98396737], - [0.49775429], - [-3.72663211], - [-1.27451485], - ] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [495.16048523, 78.89742341, 0.0, 0.0, -47.12971066, 0.0], - [78.89742341, 64.58341083, 0.0, 0.0, -30.74878934, 0.0], - [0.0, 0.0, 732.05899155, 11.21128921, 0.0, -20.96804956], - [0.0, 0.0, 11.21128921, 7.48778398, 0.0, -2.56752553], - [-47.12971066, -30.74878934, 0.0, 0.0, 14.89456384, 0.0], - [0.0, 0.0, -20.96804956, -2.56752553, 0.0, 1.3154267], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - ak_test = np.array([0.69774552, 0.40354227]) - np.testing.assert_allclose(reg.ak_test, ak_test, RTOL) + w = libpysal.weights.Queen.from_shapefile(libpysal.examples.get_path('columbus.shp')) + reg = TSLS_Regimes(self.y, self.x, self.yd, self.q, self.regimes, spat_diag=True, w=w, regime_err_sep=False) + betas = np.array([[ 80.23408166],[ 5.48218125],[ 82.98396737],[ 0.49775429],[ -3.72663211],[ -1.27451485]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array([[ 495.16048523, 78.89742341, 0. , 0. ,\ + -47.12971066, 0. ],\ + [ 78.89742341, 64.58341083, 0. , 0. ,\ + -30.74878934, 0. ],\ + [ 0. , 0. , 732.05899155, 11.21128921,\ + 0. , -20.96804956],\ + [ 0. , 0. , 11.21128921, 7.48778398,\ + 0. , -2.56752553],\ + [ -47.12971066, -30.74878934, 0. , 0. ,\ + 14.89456384, 0. ],\ + [ 0. , 0. , -20.96804956, -2.56752553,\ + 0. , 1.3154267 ]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + ak_test = np.array([ 0.69774552, 0.40354227]) + np.testing.assert_allclose(reg.ak_test, ak_test,RTOL) def test_names(self): - w = libpysal.weights.Queen.from_shapefile( - libpysal.examples.get_path("columbus.shp") - ) - gwk = libpysal.weights.Kernel.from_shapefile( - libpysal.examples.get_path("columbus.shp"), - k=5, - function="triangular", - fixed=False, - ) - name_x = ["inc"] - name_y = "crime" - name_yend = ["hoval"] - name_q = ["discbd"] - name_w = "queen" - name_gwk = "k=5" - name_ds = "columbus" - name_regimes = "nsa" - reg = TSLS_Regimes( - self.y, - self.x, - self.yd, - self.q, - self.regimes, - regime_err_sep=False, - spat_diag=True, - w=w, - robust="hac", - gwk=gwk, - name_regimes=name_regimes, - name_x=name_x, - name_y=name_y, - name_q=name_q, - name_w=name_w, - name_yend=name_yend, - name_gwk=name_gwk, - name_ds=name_ds, - ) - betas = np.array( - [ - [80.23408166], - [5.48218125], - [82.98396737], - [0.49775429], - [-3.72663211], - [-1.27451485], - ] - ) - np.testing.assert_allclose(reg.betas, betas, RTOL) - vm = np.array( - [ - [ - 522.75813101, - 120.64940697, - -15.60303241, - -0.976389, - -67.15556574, - 0.64553579, - ], - [ - 122.83491674, - 122.62303068, - -5.52270916, - 0.05023488, - -57.89404902, - 0.15750428, - ], - [ - 0.1983661, - -0.03539147, - 335.24731378, - 17.40764168, - -0.26447114, - -14.3375455, - ], - [ - -0.13612426, - -0.43622084, - 18.46644989, - 2.70320508, - 0.20398876, - -1.31821991, - ], - [ - -68.0704928, - -58.03685405, - 2.66225388, - 0.00323082, - 27.68512974, - -0.08124602, - ], - [ - -0.08001296, - 0.13575504, - -14.6998294, - -1.28225201, - -0.05193056, - 0.79845124, - ], - ] - ) - np.testing.assert_allclose(reg.vm, vm, RTOL) - self.assertEqual(reg.name_x, ["0_CONSTANT", "0_inc", "1_CONSTANT", "1_inc"]) - self.assertEqual(reg.name_yend, ["0_hoval", "1_hoval"]) - self.assertEqual(reg.name_q, ["0_discbd", "1_discbd"]) + w = libpysal.weights.Queen.from_shapefile(libpysal.examples.get_path('columbus.shp')) + gwk = libpysal.weights.Kernel.from_shapefile(libpysal.examples.get_path('columbus.shp'),k=5,function='triangular', fixed=False) + name_x = ['inc'] + name_y = 'crime' + name_yend = ['hoval'] + name_q = ['discbd'] + name_w = 'queen' + name_gwk = 'k=5' + name_ds = 'columbus' + name_regimes= 'nsa' + reg = TSLS_Regimes(self.y, self.x, self.yd, self.q, self.regimes, regime_err_sep=False, + spat_diag=True, w=w, robust='hac', gwk=gwk, name_regimes=name_regimes, + name_x=name_x, name_y=name_y, name_q=name_q, name_w=name_w, + name_yend=name_yend, name_gwk=name_gwk, name_ds=name_ds) + betas = np.array([[ 80.23408166],[ 5.48218125],[ 82.98396737],[ 0.49775429],[ -3.72663211],[ -1.27451485]]) + np.testing.assert_allclose(reg.betas, betas,RTOL) + vm = np.array([[ 522.75813101, 120.64940697, -15.60303241, -0.976389 ,\ + -67.15556574, 0.64553579],\ + [ 122.83491674, 122.62303068, -5.52270916, 0.05023488,\ + -57.89404902, 0.15750428],\ + [ 0.1983661 , -0.03539147, 335.24731378, 17.40764168,\ + -0.26447114, -14.3375455 ],\ + [ -0.13612426, -0.43622084, 18.46644989, 2.70320508,\ + 0.20398876, -1.31821991],\ + [ -68.0704928 , -58.03685405, 2.66225388, 0.00323082,\ + 27.68512974, -0.08124602],\ + [ -0.08001296, 0.13575504, -14.6998294 , -1.28225201,\ + -0.05193056, 0.79845124]]) + np.testing.assert_allclose(reg.vm, vm,RTOL) + self.assertEqual(reg.name_x, ['0_CONSTANT', '0_inc', '1_CONSTANT', '1_inc']) + self.assertEqual(reg.name_yend, ['0_hoval', '1_hoval']) + self.assertEqual(reg.name_q, ['0_discbd', '1_discbd']) self.assertEqual(reg.name_y, name_y) self.assertEqual(reg.name_w, name_w) self.assertEqual(reg.name_gwk, name_gwk) self.assertEqual(reg.name_ds, name_ds) self.assertEqual(reg.name_regimes, name_regimes) - + def test_regi_err(self): - # Artficial: + #Artficial: n = 256 - x1 = np.random.uniform(-10, 10, (n, 1)) - x2 = np.random.uniform(1, 5, (n, 1)) - q = x2 + np.random.normal(0, 1, (n, 1)) - x = np.hstack((x1, x2)) - y = np.dot( - np.hstack((np.ones((n, 1)), x)), np.array([[1], [0.5], [2]]) - ) + np.random.normal(0, 1, (n, 1)) + x1 = np.random.uniform(-10,10,(n,1)) + x2 = np.random.uniform(1,5,(n,1)) + q = x2 + np.random.normal(0,1,(n,1)) + x = np.hstack((x1,x2)) + y = np.dot(np.hstack((np.ones((n,1)),x)),np.array([[1],[0.5],[2]])) + np.random.normal(0,1,(n,1)) latt = int(np.sqrt(n)) - regi = [0] * (n // 2) + [1] * (n // 2) ##must be floor! - model = TSLS_Regimes( - y, x1, regimes=regi, q=q, yend=x2, regime_err_sep=True, sig2n_k=False - ) - model1 = TSLS( - y[0 : (n // 2)].reshape((n // 2), 1), - x1[0 : (n // 2)], - yend=x2[0 : (n // 2)], - q=q[0 : (n // 2)], - sig2n_k=False, - ) - model2 = TSLS( - y[(n // 2) : n].reshape((n // 2), 1), - x1[(n // 2) : n], - yend=x2[(n // 2) : n], - q=q[(n // 2) : n], - sig2n_k=False, - ) + regi = [0]*(n//2) + [1]*(n//2) ##must be floor! + model = TSLS_Regimes(y, x1, regimes=regi, q=q, yend=x2, regime_err_sep=True, sig2n_k=False) + model1 = TSLS(y[0:(n//2)].reshape((n//2),1), x1[0:(n//2)],yend=x2[0:(n//2)], q=q[0:(n//2)], sig2n_k=False) + model2 = TSLS(y[(n//2):n].reshape((n//2),1), x1[(n//2):n], yend=x2[(n//2):n], q=q[(n//2):n], sig2n_k=False) tbetas = np.vstack((model1.betas, model2.betas)) - np.testing.assert_allclose(model.betas, tbetas) - vm = np.hstack((model1.vm.diagonal(), model2.vm.diagonal())) - np.testing.assert_allclose(model.vm.diagonal(), vm, RTOL) - # Columbus: - reg = TSLS_Regimes( - self.y, - self.x, - regimes=self.regimes, - yend=self.yd, - q=self.q, - regime_err_sep=False, - ) - tbetas = np.array( - [ - [80.23408166], - [5.48218125], - [82.98396737], - [0.49775429], - [-3.72663211], - [-1.27451485], - ] - ) - np.testing.assert_allclose(tbetas, reg.betas, RTOL) - vm = np.array([495.16048523, 78.89742341, 0.0, 0.0, -47.12971066, 0.0]) - np.testing.assert_allclose(reg.vm[0], vm, RTOL) - u_3 = np.array([[25.57676372], [-17.94922796], [-26.71588759]]) - np.testing.assert_allclose(reg.u[0:3], u_3, RTOL) - predy_3 = np.array([[-9.85078372], [36.75098196], [57.34266859]]) - np.testing.assert_allclose(reg.predy[0:3], predy_3, RTOL) - chow_regi = np.array( - [ - [0.00616179, 0.93743265], - [0.3447218, 0.55711631], - [0.37093662, 0.54249417], - ] - ) - np.testing.assert_allclose(reg.chow.regi, chow_regi, RTOL) - np.testing.assert_allclose(reg.chow.joint[0], 1.1353790779821029, RTOL) + np.testing.assert_allclose(model.betas,tbetas) + vm = np.hstack((model1.vm.diagonal(),model2.vm.diagonal())) + np.testing.assert_allclose(model.vm.diagonal(), vm,RTOL) + #Columbus: + reg = TSLS_Regimes(self.y, self.x, regimes=self.regimes, yend=self.yd, q=self.q, regime_err_sep=False) + tbetas = np.array([[ 80.23408166], + [ 5.48218125], + [ 82.98396737], + [ 0.49775429], + [ -3.72663211], + [ -1.27451485],]) + np.testing.assert_allclose(tbetas, reg.betas,RTOL) + vm = np.array([ 495.16048523, 78.89742341, 0. , 0. , + -47.12971066, 0. ]) + np.testing.assert_allclose(reg.vm[0], vm,RTOL) + u_3 = np.array([[ 25.57676372], + [-17.94922796], + [-26.71588759]]) + np.testing.assert_allclose(reg.u[0:3], u_3,RTOL) + predy_3 = np.array([[ -9.85078372], + [ 36.75098196], + [ 57.34266859]]) + np.testing.assert_allclose(reg.predy[0:3], predy_3,RTOL) + chow_regi = np.array([[ 0.00616179, 0.93743265], + [ 0.3447218 , 0.55711631], + [ 0.37093662, 0.54249417]]) + np.testing.assert_allclose(reg.chow.regi, chow_regi,RTOL) + np.testing.assert_allclose(reg.chow.joint[0], 1.1353790779821029,RTOL) + + +class TestTSLS_endog_regimes(unittest.TestCase): + def test_TSLS_reg(self): + db = gpd.read_file(libpysal.examples.get_path('baltim.shp')) + w = libpysal.weights.Queen.from_dataframe(db, use_index=True) + w.transform = 'r' + tsls = spreg.TSLS_Endog_Regimes(y=db['PRICE'], x=db['AGE'], + yend=db['SQFT'], q=db['NROOM'], + w=w) + np.testing.assert_allclose(tsls.betas,np.array([[28.55593008], + [-0.27351083], + [22.36160147], + [-0.64768173], + [22.89318454], + [-0.48438473], + [ 1.11252432], + [ 2.74757415], + [ 2.26013585]])) + vm = np.array([25.25449253, -0.09514615, 0. , 0. , 0. , + 0. , -1.33479051, 0. , 0. ]) + np.testing.assert_allclose(tsls.vm[0], vm,RTOL) + np.testing.assert_allclose(tsls.mean_y, \ + 44.30718009478672,RTOL) + np.testing.assert_equal(tsls.kf, 0) + np.testing.assert_equal(tsls.kr, 3) + np.testing.assert_equal(tsls.n, 211) + np.testing.assert_equal(tsls.nr, 3) + np.testing.assert_equal(tsls.name_x, ['0_CONSTANT', '0_AGE', '1_CONSTANT', '1_AGE', '2_CONSTANT', '2_AGE']) + np.testing.assert_equal(tsls.name_yend, ['0_SQFT', '1_SQFT', '2_SQFT']) + np.testing.assert_allclose(tsls.predy[3], np.array([ + 90.88982973]),RTOL) + np.testing.assert_allclose(tsls.sig2n, \ + 355.3494911130984,RTOL) + np.testing.assert_allclose(tsls.SSR, \ + [77331.25758888898, 74978.74262486391, 73326.08720216743],RTOL) + np.testing.assert_allclose(tsls.clusters, \ + np.array([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, + 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 2, 1, 0, 1, 0, 0, 2, 0, 1, 0, 0, 0, 0, 1, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 2, 0, 0, 0, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),RTOL) -if __name__ == "__main__": - start_suppress = np.get_printoptions()["suppress"] - np.set_printoptions(suppress=True) +if __name__ == '__main__': + start_suppress = np.get_printoptions()['suppress'] + np.set_printoptions(suppress=True) unittest.main() - np.set_printoptions(suppress=start_suppress) + np.set_printoptions(suppress=start_suppress) + diff --git a/spreg/twosls.py b/spreg/twosls.py index 1bed3be..0c59bb6 100644 --- a/spreg/twosls.py +++ b/spreg/twosls.py @@ -262,6 +262,10 @@ class TSLS(BaseTSLS): If slx_lags>0, the specification becomes of the SLX type. slx_vars : either "All" (default) or list of booleans to select x variables to be lagged + regimes : list or pandas.Series + List of n values with the mapping of each + observation to a regime. Assumed to be aligned with 'x'. + For other regimes-specific arguments, see TSLS_Regimes sig2n_k : boolean If True, then use n-k to estimate sigma^2. If False, use n. spat_diag : boolean @@ -463,6 +467,7 @@ def __init__( sig2n_k=False, spat_diag=False, nonspat_diag=True, + regimes=None, vm=False, name_y=None, name_x=None, @@ -472,61 +477,86 @@ def __init__( name_gwk=None, name_ds=None, latex=False, + **kwargs, ): - - n = USER.check_arrays(y, x, yend, q) - y, name_y = USER.check_y(y, n, name_y) - yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) - USER.check_robust(robust, gwk) - spat_diag, warn = USER.check_spat_diag(spat_diag=spat_diag, w=w, robust=robust, slx_lags=slx_lags) - set_warn(self, warn) - x_constant, name_x, warn = USER.check_constant(x, name_x) - self.name_x = USER.set_name_x(name_x, x_constant) - w = USER.check_weights(w, y, slx_lags=slx_lags, w_required=spat_diag) - if slx_lags>0: -# lag_x = get_lags(w, x_constant[:, 1:], slx_lags) -# x_constant = np.hstack((x_constant, lag_x)) -# self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) - x_constant,self.name_x = USER.flex_wx(w,x=x_constant,name_x=self.name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) - - set_warn(self, warn) - BaseTSLS.__init__( - self, - y=y, - x=x_constant, - yend=yend, - q=q, - robust=robust, - gwk=gwk, - sig2n_k=sig2n_k, - ) - self.title = "TWO STAGE LEAST SQUARES" - if slx_lags > 0: - self.title += " WITH SPATIALLY LAGGED X (2SLS-SLX)" - self.name_ds = USER.set_name_ds(name_ds) - self.name_y = USER.set_name_y(name_y) - self.name_yend = USER.set_name_yend(name_yend, yend) - self.name_z = self.name_x + self.name_yend - self.name_q = USER.set_name_q(name_q, q) - self.name_h = USER.set_name_h(self.name_x, self.name_q) - self.robust = USER.set_robust(robust) - self.name_w = USER.set_name_w(name_w, w) - self.name_gwk = USER.set_name_w(name_gwk, gwk) - self.output = pd.DataFrame(self.name_x + self.name_yend, - columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) - self.output['regime'], self.output['equation'] = (0, 0) - diag_out = "" - if nonspat_diag: - self.dwh = DIAG.dwh(self) - sum_dwh = _summary_dwh(self) - diag_out += sum_dwh - if spat_diag: - diag_out += _spat_diag_out(self, w, 'yend') - - - output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) + if regimes is not None: + from .twosls_regimes import TSLS_Regimes + self.__class__ = TSLS_Regimes + self.__init__( + y=y, + x=x, + yend=yend, + q=q, + regimes=regimes, + w=w, + robust=robust, + gwk=gwk, + slx_lags=slx_lags, + sig2n_k=sig2n_k, + spat_diag=spat_diag, + vm=vm, + name_y=name_y, + name_x=name_x, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_gwk=name_gwk, + name_ds=name_ds, + latex=latex, + **kwargs, + ) + else: + n = USER.check_arrays(y, x, yend, q) + y, name_y = USER.check_y(y, n, name_y) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) + USER.check_robust(robust, gwk) + spat_diag, warn = USER.check_spat_diag(spat_diag=spat_diag, w=w, robust=robust, slx_lags=slx_lags) + set_warn(self, warn) + x_constant, name_x, warn = USER.check_constant(x, name_x) + self.name_x = USER.set_name_x(name_x, x_constant) + w = USER.check_weights(w, y, slx_lags=slx_lags, w_required=spat_diag) + if slx_lags>0: + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) + # self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) + x_constant,self.name_x = USER.flex_wx(w,x=x_constant,name_x=self.name_x,constant=True, + slx_lags=slx_lags,slx_vars=slx_vars) + + set_warn(self, warn) + BaseTSLS.__init__( + self, + y=y, + x=x_constant, + yend=yend, + q=q, + robust=robust, + gwk=gwk, + sig2n_k=sig2n_k, + ) + self.title = "TWO STAGE LEAST SQUARES" + if slx_lags > 0: + self.title += " WITH SPATIALLY LAGGED X (2SLS-SLX)" + self.name_ds = USER.set_name_ds(name_ds) + self.name_y = USER.set_name_y(name_y) + self.name_yend = USER.set_name_yend(name_yend, yend) + self.name_z = self.name_x + self.name_yend + self.name_q = USER.set_name_q(name_q, q) + self.name_h = USER.set_name_h(self.name_x, self.name_q) + self.robust = USER.set_robust(robust) + self.name_w = USER.set_name_w(name_w, w) + self.name_gwk = USER.set_name_w(name_gwk, gwk) + self.output = pd.DataFrame(self.name_x + self.name_yend, + columns=['var_names']) + self.output['var_type'] = ['o'] + ['x'] * (len(self.name_x)-1) + ['yend'] * len(self.name_yend) + self.output['regime'], self.output['equation'] = (0, 0) + diag_out = "" + if nonspat_diag: + self.dwh = DIAG.dwh(self) + sum_dwh = _summary_dwh(self) + diag_out += sum_dwh + if spat_diag: + diag_out += _spat_diag_out(self, w, 'yend') + output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) def _test(): import doctest diff --git a/spreg/twosls_regimes.py b/spreg/twosls_regimes.py index fc97889..c4eb44d 100644 --- a/spreg/twosls_regimes.py +++ b/spreg/twosls_regimes.py @@ -3,10 +3,11 @@ import pandas as pd from . import regimes as REGI from . import user_output as USER -from .utils import set_warn, RegressionProps_basic, spdot, sphstack, get_lags +from .utils import set_warn, RegressionProps_basic, spdot, sphstack, get_lags, optim_k from .twosls import BaseTSLS from .robust import hac_multi from .output import output, _spat_diag_out +from .skater_reg import Skater_reg """ Two-stage Least Squares estimation with regimes. @@ -193,61 +194,47 @@ class TSLS_Regimes(BaseTSLS, REGI.Regimes_Frame): >>> import libpysal >>> from libpysal.examples import load_example >>> from libpysal.weights import Rook + >>> import geopandas as gpd - Open data on NCOVR US County Homicides (3085 areas) using libpysal.io.open(). - This is the DBF associated with the NAT shapefile. Note that - libpysal.io.open() also reads data in CSV format; since the actual class - requires data to be passed in as numpy arrays, the user can read their - data in using any method. + Open data on NCOVR US County Homicides (3085 areas) using geopandas. >>> nat = load_example('Natregimes') - >>> db = libpysal.io.open(nat.get_path('natregimes.dbf'), 'r') + >>> db = gpd.read_file(nat.get_path('natregimes.shp')) - Extract the HR90 column (homicide rates in 1990) from the DBF file and make it the - dependent variable for the regression. Note that PySAL requires this to be - an numpy array of shape (n, 1) as opposed to the also common shape of (n, ) - that other packages accept. + Select the HR90 column (homicide rates in 1990) as the + dependent variable for the regression. - >>> y_var = 'HR90' - >>> y = np.array([db.by_col(y_var)]).reshape(3085,1) + >>> y = db['HR90'] - Extract UE90 (unemployment rate) and PS90 (population structure) vectors from - the DBF to be used as independent variables in the regression. Other variables - can be inserted by adding their names to x_var, such as x_var = ['Var1','Var2','...] - Note that PySAL requires this to be an nxj numpy array, where j is the - number of independent variables (not including a constant). By default - this model adds a vector of ones to the independent variables passed in. + Select UE90 (unemployment rate) and PS90 (population structure) variables + as independent variables in the regression. - >>> x_var = ['PS90','UE90'] - >>> x = np.array([db.by_col(name) for name in x_var]).T + >>> x = db[['PS90','UE90']] In this case we consider RD90 (resource deprivation) as an endogenous regressor. We tell the model that this is so by passing it in a different parameter from the exogenous variables (x). - >>> yd_var = ['RD90'] - >>> yd = np.array([db.by_col(name) for name in yd_var]).T + >>> yd = db['RD90'] Because we have endogenous variables, to obtain a correct estimate of the model, we need to instrument for RD90. We use FP89 (families below poverty) for this and hence put it in the instruments parameter, 'q'. - >>> q_var = ['FP89'] - >>> q = np.array([db.by_col(name) for name in q_var]).T + >>> q = db[['FP89']] The different regimes in this data are given according to the North and South dummy (SOUTH). - >>> r_var = 'SOUTH' - >>> regimes = db.by_col(r_var) + >>> r_var = db['SOUTH'] Since we want to perform tests for spatial dependence, we need to specify the spatial weights matrix that includes the spatial configuration of the observations into the error component of the model. To do that, we can open an already existing gal file or create a new one. In this case, we will - create one from ``NAT.shp``. + create one from our geopandas dataframe. - >>> w = Rook.from_shapefile(nat.get_path("natregimes.shp")) + >>> w = Rook.from_dataframe(db, use_index=True) Unless there is a good reason not to do it, the weights have to be row-standardized so every row of the matrix sums to one. Among other @@ -263,77 +250,62 @@ class TSLS_Regimes(BaseTSLS, REGI.Regimes_Frame): parameters: >>> from spreg import TSLS_Regimes - >>> tslsr = TSLS_Regimes(y, x, yd, q, regimes, w=w, constant_regi='many', spat_diag=False, name_y=y_var, name_x=x_var, name_yend=yd_var, name_q=q_var, name_regimes=r_var, name_ds='NAT', name_w='NAT.shp') + >>> tslsr = TSLS_Regimes(y, x, yd, q, r_var, w=w, spat_diag=True, name_regimes='SOUTH', name_ds='NAT', name_w='NAT.shp') >>> tslsr.betas array([[ 3.66973562], [ 1.06950466], [ 0.14680946], - [ 2.45864196], [ 9.55873243], [ 1.94666348], [-0.30810214], + [ 2.45864196], [ 3.68718119]]) >>> np.sqrt(tslsr.vm.diagonal()) - array([0.38389901, 0.09963973, 0.04672091, 0.22725012, 0.49181223, - 0.19630774, 0.07784587, 0.25529011]) + array([0.46522151, 0.12074672, 0.05661795, 0.41893265, 0.16721773, + 0.06631022, 0.27538921, 0.21745974]) >>> print(tslsr.summary) REGRESSION RESULTS ------------------ - SUMMARY OF OUTPUT: TWO STAGE LEAST SQUARES ESTIMATION - REGIME 0 - ---------------------------------------------------------------- + SUMMARY OF OUTPUT: TWO STAGE LEAST SQUARES - REGIMES + ---------------------------------------------------- Data set : NAT Weights matrix : NAT.shp - Dependent Variable : 0_HR90 Number of Observations: 1673 - Mean dependent var : 3.3416 Number of Variables : 4 - S.D. dependent var : 4.6795 Degrees of Freedom : 1669 - Pseudo R-squared : 0.2092 + Dependent Variable : HR90 Number of Observations: 3085 + Mean dependent var : 6.1829 Number of Variables : 8 + S.D. dependent var : 6.6414 Degrees of Freedom : 3077 + Pseudo R-squared : 0.4246 ------------------------------------------------------------------------------------ Variable Coefficient Std.Error z-Statistic Probability ------------------------------------------------------------------------------------ - 0_CONSTANT 3.6697356 0.3838990 9.5591172 0.0000000 - 0_PS90 1.0695047 0.0996397 10.7337170 0.0000000 - 0_UE90 0.1468095 0.0467209 3.1422643 0.0016765 - 0_RD90 2.4586420 0.2272501 10.8191009 0.0000000 + 0_CONSTANT 3.66974 0.46522 7.88815 0.00000 + 0_PS90 1.06950 0.12075 8.85742 0.00000 + 0_UE90 0.14681 0.05662 2.59298 0.00951 + 0_RD90 2.45864 0.27539 8.92788 0.00000 + 1_CONSTANT 9.55873 0.41893 22.81687 0.00000 + 1_PS90 1.94666 0.16722 11.64149 0.00000 + 1_UE90 -0.30810 0.06631 -4.64637 0.00000 + 1_RD90 3.68718 0.21746 16.95570 0.00000 ------------------------------------------------------------------------------------ - Instrumented: 0_RD90 - Instruments: 0_FP89 + Instrumented: 0_RD90, 1_RD90 + Instruments: 0_FP89, 1_FP89 Regimes variable: SOUTH - SUMMARY OF OUTPUT: TWO STAGE LEAST SQUARES ESTIMATION - REGIME 1 - ---------------------------------------------------------------- - Data set : NAT - Weights matrix : NAT.shp - Dependent Variable : 1_HR90 Number of Observations: 1412 - Mean dependent var : 9.5493 Number of Variables : 4 - S.D. dependent var : 7.0389 Degrees of Freedom : 1408 - Pseudo R-squared : 0.2987 - - ------------------------------------------------------------------------------------ - Variable Coefficient Std.Error z-Statistic Probability - ------------------------------------------------------------------------------------ - 1_CONSTANT 9.5587324 0.4918122 19.4357356 0.0000000 - 1_PS90 1.9466635 0.1963077 9.9163867 0.0000000 - 1_UE90 -0.3081021 0.0778459 -3.9578483 0.0000756 - 1_RD90 3.6871812 0.2552901 14.4431026 0.0000000 - ------------------------------------------------------------------------------------ - Instrumented: 1_RD90 - Instruments: 1_FP89 - Regimes variable: SOUTH - ------------------------------------------------------------------------------------ - GLOBAL DIAGNOSTICS - REGIMES DIAGNOSTICS - CHOW TEST VARIABLE DF VALUE PROB - CONSTANT 1 89.093 0.0000 - PS90 1 15.876 0.0001 - UE90 1 25.106 0.0000 - RD90 1 12.920 0.0003 - Global test 4 201.237 0.0000 + CONSTANT 1 88.485 0.0000 + PS90 1 18.086 0.0000 + UE90 1 27.220 0.0000 + RD90 1 12.258 0.0005 + Global test 4 195.994 0.0000 + + DIAGNOSTICS FOR SPATIAL DEPENDENCE + TEST DF VALUE PROB + Anselin-Kelejian Test 1 104.255 0.0000 ================================ END OF REPORT ===================================== """ @@ -437,10 +409,10 @@ def __init__( latex ) else: - q, self.name_q = REGI.Regimes_Frame.__init__( + q, self.name_q, xtype = REGI.Regimes_Frame.__init__( self, q, regimes, constant_regi=None, cols2regi="all", names=name_q ) - x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( + x, self.name_x, xtype, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, @@ -449,7 +421,7 @@ def __init__( names=name_x, rlist=True ) - yend, self.name_yend, yend_rlist = REGI.Regimes_Frame.__init__( + yend, self.name_yend, xtypeyd, yend_rlist = REGI.Regimes_Frame.__init__( self, yend, regimes, @@ -461,7 +433,7 @@ def __init__( ) self.output = pd.DataFrame(self.name_x+self.name_yend, columns=['var_names']) - self.output['var_type'] = ['x']*len(self.name_x)+['yend']*len(self.name_yend) + self.output['var_type'] = xtype+xtypeyd self.output['regime'] = x_rlist+yend_rlist self.output['equation'] = 0 @@ -629,7 +601,7 @@ def _tsls_regimes_multi( self.name_z += results[r].name_z self.name_h += results[r].name_h self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x+results[r].name_yend, - 'var_type': ['x']*len(results[r].name_x)+['yend']*len(results[r].name_yend), + 'var_type': ['o']+['x']*(len(results[r].name_x)-1)+['yend']*len(results[r].name_yend), 'regime': r, 'equation': r})], ignore_index=True) counter += 1 @@ -749,6 +721,381 @@ def _optimal_weight(reg, sig2n_k, warn=True): ) return +class TSLS_Endog_Regimes(TSLS_Regimes): + + """ + Two stage least squares (S2SLS) with endogenous regimes. + Based on the function skater_reg as shown in :cite:`Anselin2021`. + + Parameters + ---------- + y : numpy.ndarray or pandas.Series + nx1 array for dependent variable + x : numpy.ndarray or pandas object + Two dimensional array with n rows and one column for each + independent (exogenous) variable, excluding the constant + yend : numpy.ndarray or pandas object + Two dimensional array with n rows and one column for each + endogenous variable + q : numpy.ndarray or pandas object + Two dimensional array with n rows and one column for each + external exogenous variable to use as instruments (note: + this should not contain any variables from x) + w : pysal W object + Spatial weights object (required if running spatial + diagnostics) + n_clusters : int + Number of clusters to be used in the endogenous regimes. + If None (default), the number of clusters will be chosen + according to the function utils.optim_k using a method + adapted from Mojena (1977)'s Rule Two + quorum : int + Minimum number of observations in a cluster to be considered + Must be at least larger than the number of variables in x + Default value is 30 or 10*k, whichever is larger. + trace : boolean + Sets whether to store intermediate results of the clustering + Hard-coded to True if n_clusters is None + name_y : string + Name of dependent variable for use in output + name_x : list of strings + Names of independent variables for use in output + name_yend : list of strings + Names of endogenous variables for use in output + name_q : list of strings + Names of instruments for use in output + name_w : string + Name of weights matrix for use in output + name_gwk : string + Name of kernel weights matrix for use in output + name_ds : string + Name of dataset for use in output + name_regimes : string + Name of regimes variable for use in output + latex : boolean + Specifies if summary is to be printed in latex format + **kwargs : additional keyword arguments depending on the specific model + + Attributes + ---------- + output : dataframe + regression results pandas dataframe + summary : string + Summary of regression results and diagnostics (note: use in + conjunction with the print command) + betas : array + kx1 array of estimated coefficients + u : array + nx1 array of residuals + e_pred : array + nx1 array of residuals (using reduced form) + predy : array + nx1 array of predicted y values + predy_e : array + nx1 array of predicted y values (using reduced form) + n : integer + Number of observations + k : integer + Number of variables for which coefficients are estimated + (including the constant) + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + kstar : integer + Number of endogenous variables. + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + y : array + nx1 array for dependent variable + x : array + Two dimensional array with n rows and one column for each + independent (exogenous) variable, including the constant + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + yend : array + Two dimensional array with n rows and one column for each + endogenous variable + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + q : array + Two dimensional array with n rows and one column for each + external exogenous variable used as instruments + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + z : array + nxk array of variables (combination of x and yend) + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + h : array + nxl array of instruments (combination of x and q) + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + robust : string + Adjustment for robust standard errors + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + mean_y : float + Mean of dependent variable + std_y : float + Standard deviation of dependent variable + vm : array + Variance covariance matrix (kxk) + pr2 : float + Pseudo R squared (squared correlation between y and ypred) + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + pr2_e : float + Pseudo R squared (squared correlation between y and ypred_e + (using reduced form)) + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + utu : float + Sum of squared residuals + sig2 : float + Sigma squared used in computations + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + std_err : array + 1xk array of standard errors of the betas + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + z_stat : list of tuples + z statistic; each tuple contains the pair (statistic, + p-value), where each is a float + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + ak_test : tuple + Anselin-Kelejian test; tuple contains the pair (statistic, + p-value) + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + cfh_test : tuple + Common Factor Hypothesis test; tuple contains the pair (statistic, + p-value). Only when it applies (see specific documentation). + name_y : string + Name of dependent variable for use in output + name_x : list of strings + Names of independent variables for use in output + name_yend : list of strings + Names of endogenous variables for use in output + name_z : list of strings + Names of exogenous and endogenous variables for use in + output + name_q : list of strings + Names of external instruments + name_h : list of strings + Names of all instruments used in ouput + name_w : string + Name of weights matrix for use in output + name_gwk : string + Name of kernel weights matrix for use in output + name_ds : string + Name of dataset for use in output + name_regimes : string + Name of regimes variable for use in output + title : string + Name of the regression method used + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + sig2n : float + Sigma squared (computed with n in the denominator) + sig2n_k : float + Sigma squared (computed with n-k in the denominator) + hth : float + :math:`H'H`. + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + hthi : float + :math:`(H'H)^{-1}`. + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + varb : array + :math:`(Z'H (H'H)^{-1} H'Z)^{-1}`. + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + zthhthi : array + :math:`Z'H(H'H)^{-1}`. + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + pfora1a2 : array + n(zthhthi)'varb + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + sp_multipliers: dict + Dictionary of spatial multipliers (if spat_impacts is not None) + Only available in dictionary 'multi' when multiple regressions + (see 'multi' below for details) + regimes : list + List of n values with the mapping of each + observation to a regime. Assumed to be aligned with 'x'. + constant_regi: string + Ignored if regimes=False. Constant option for regimes. + Switcher controlling the constant term setup. It may take + the following values: + + * 'one': a vector of ones is appended to x and held constant across regimes. + + * 'many': a vector of ones is appended to x and considered different per regime. + cols2regi : list, 'all' + Ignored if regimes=False. Argument indicating whether each + column of x should be considered as different per regime + or held constant across regimes (False). + If a list, k booleans indicating for each variable the + option (True if one per regime, False to be held constant). + If 'all', all the variables vary by regime. + regime_lag_sep: boolean + If True, the spatial parameter for spatial lag is also + computed according to different regimes. If False (default), + the spatial parameter is fixed accross regimes. + regime_err_sep: boolean + If True, a separate regression is run for each regime. + kr : int + Number of variables/columns to be "regimized" or subject + to change by regime. These will result in one parameter + estimate by regime for each variable (i.e. nr parameters per + variable) + kf : int + Number of variables/columns to be considered fixed or + global across regimes and hence only obtain one parameter + estimate + nr : int + Number of different regimes in the 'regimes' list + multi : dictionary + Only available when multiple regressions are estimated, + i.e. when regime_err_sep=True and no variable is fixed + across regimes. + Contains all attributes of each individual regression + SSR : list + list with the total sum of squared residuals for the model + considering all regimes for each of steps of number of regimes + considered, starting with the solution with 2 regimes. + clusters : int + Number of clusters considered in the endogenous regimes + _trace : list + List of dictionaries with the clustering results for each + number of clusters tested. Only available if n_clusters is + None or trace=True. + Examples + -------- + >>> import libpysal + >>> import numpy as np + >>> np.set_printoptions(legacy='1.25') #to avoid printing issues with numpy floats + >>> import geopandas as gpd + >>> from spreg import TSLS_Endog_Regimes + + Open data on Baltimore house sales price and characteristics in Baltimore + from libpysal examples using geopandas. + + >>> db = gpd.read_file(libpysal.examples.get_path('baltim.shp')) + + We will create a weights matrix based on contiguity. + + >>> w = libpysal.weights.Queen.from_dataframe(db, use_index=True) + >>> w.transform = "r" + + For this example, just to show how the function works, we will use + the 'PRICE' column as the dependent variable, 'AGE' as independent variable, + 'SQFT' as endogenous variable, and 'NROOM' as an instrument. + At this point, we will let the model choose the number of clusters. + + >>> reg = TSLS_Endog_Regimes(y=db['PRICE'], x=db['AGE'], yend=db['SQFT'], q=db['NROOM'], w=w) + + The function `print(reg.summary)` can be used to visualize the results of the regression. + + Alternatively, we can check individual attributes: + >>> reg.betas + array([[28.55593008], + [-0.27351083], + [22.36160147], + [-0.64768173], + [22.89318454], + [-0.48438473], + [ 1.11252432], + [ 2.74757415], + [ 2.26013585]]) + >>> reg.SSR + [77331.25758888898, 74978.74262486391, 73326.08720216743] + >>> reg.clusters + array([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, + 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 2, 1, 0, 1, 0, 0, 2, 0, 1, 0, 0, 0, 0, 1, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 2, 0, 0, 0, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=int32) + + We will now set the number of clusters to 2 and run the regression again. + + >>> reg = TSLS_Endog_Regimes(y=db['PRICE'], x=db['AGE'], yend=db['SQFT'], q=db['NROOM'], w=w, n_clusters=2) + + The function `print(reg.summary)` can be used to visualize the results of the regression. + + Alternatively, we can check individual attributes as before: + >>> reg.betas + array([[29.89584104], + [-0.36936638], + [22.36160147], + [-0.64768173], + [ 1.41273696], + [ 2.74757415]]) + >>> reg.SSR + [77331.25758888898] + >>> reg.clusters + array([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, + 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=int32) + """ + + + def __init__( + self, y, x, yend, q, w=None, n_clusters=None, quorum=-1, trace=True, name_y=None, name_x=None, name_yend=None, name_q=None, **kwargs): + + n = USER.check_arrays(y, x) + y, name_y = USER.check_y(y, n, name_y) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) + w = USER.check_weights(w, y, w_required=True) + x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) + set_warn(self, warn) + # Standardize the variables + x_combined = np.hstack((x_constant, yend)) + x_std = (x_combined - np.mean(x_combined, axis=0)) / np.std(x_combined, axis=0) + + if quorum < 0: + quorum = np.max([(x_constant.shape[1]+1)*10, 30]) + + if not n_clusters: + n_clusters_opt = x_constant.shape[0]*0.70//quorum + if n_clusters_opt < 2: + raise ValueError( + "The combination of the values of `N` and `quorum` is not compatible with regimes estimation.") + sk_reg_results = Skater_reg().fit(n_clusters_opt, w, x_std, {'reg':BaseTSLS,'y':y,'x':x_constant,'yend':yend,'q':q}, quorum=quorum, trace=True) + n_clusters = optim_k([sk_reg_results._trace[i][1][2] for i in range(1, len(sk_reg_results._trace))]) + self.clusters = sk_reg_results._trace[n_clusters-1][0] + else: + try: + sk_reg_results = Skater_reg().fit(n_clusters, w, x_std, {'reg':BaseTSLS,'y':y,'x':x_constant,'yend':yend,'q':q}, quorum=quorum, trace=trace) + self.clusters = sk_reg_results.current_labels_ + except Exception as e: + if str(e) == "one or more input arrays have more columns than rows": + raise ValueError("One or more input ended up with more variables than observations. Please check your setting for `quorum`.") + else: + print("An error occurred:", e) + + self._trace = sk_reg_results._trace + self.SSR = [self._trace[i][1][2] for i in range(1, len(self._trace))] + + TSLS_Regimes.__init__(self, y, x_constant, yend, q, regimes=self.clusters, w=w, name_y=name_y, name_x=name_x, name_yend=name_yend, + name_q=name_q, name_regimes='Skater_reg', **kwargs) + def _test(): import doctest diff --git a/spreg/twosls_sp.py b/spreg/twosls_sp.py index e5abe01..8efc6e6 100755 --- a/spreg/twosls_sp.py +++ b/spreg/twosls_sp.py @@ -235,6 +235,10 @@ class GM_Lag(BaseGM_Lag): If slx_lags>0, the specification becomes of the Spatial Durbin type. slx_vars : either "All" (default) or list of booleans to select x variables to be lagged + regimes : list or pandas.Series + List of n values with the mapping of each + observation to a regime. Assumed to be aligned with 'x'. + For other regimes-specific arguments, see GM_Lag_Regimes robust : string If 'white', then a White consistent estimator of the variance-covariance matrix is given. If 'hac', then a @@ -514,6 +518,7 @@ def __init__( lag_q=True, slx_lags=0, slx_vars="All", + regimes = None, robust=None, gwk=None, sig2n_k=False, @@ -529,114 +534,145 @@ def __init__( name_ds=None, latex=False, hard_bound=False, + **kwargs, ): - - n = USER.check_arrays(x, yend, q) - y, name_y = USER.check_y(y, n, name_y) - w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) - USER.check_robust(robust, gwk) - yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) - spat_diag, warn = USER.check_spat_diag(spat_diag=spat_diag, w=w, robust=robust, slx_lags=slx_lags) - set_warn(self, warn) - x_constant, name_x, warn = USER.check_constant(x, name_x) - set_warn(self, warn) - name_x = USER.set_name_x(name_x, x_constant) # need to check for None and set defaults - - # kx and wkx are used to replace complex calculation for output - if slx_lags > 0: # adjust for flexwx - if (isinstance(slx_vars,list)): # slx_vars has True,False - if len(slx_vars) != x.shape[1] : - raise Exception("slx_vars incompatible with x column dimensions") - else: # use slx_vars to extract proper columns - workname = name_x[1:] - kx = len(workname) - vv = list(compress(workname,slx_vars)) - name_x += USER.set_name_spatial_lags(vv, slx_lags) - wkx = slx_vars.count(True) - else: - kx = len(name_x) - 1 - wkx = kx - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - - - BaseGM_Lag.__init__( - self, - y=y, - x=x_constant, - w=w, - yend=yend, - q=q, - w_lags=w_lags, - slx_lags=slx_lags, - slx_vars=slx_vars, - robust=robust, - gwk=gwk, - lag_q=lag_q, - sig2n_k=sig2n_k, - ) - - self.rho = self.betas[-1] - self.predy_e, self.e_pred, warn = sp_att( - w, self.y, self.predy, self.yend[:, -1].reshape(self.n, 1), self.rho, hard_bound=hard_bound - ) - set_warn(self, warn) - self.title = "SPATIAL TWO STAGE LEAST SQUARES" - if slx_lags > 0: - self.title += " WITH SLX (SPATIAL DURBIN MODEL)" - self.name_ds = USER.set_name_ds(name_ds) - self.name_y = USER.set_name_y(name_y) - # self.name_x = USER.set_name_x(name_x, x_constant) # name_x contains SLX terms for slx_lags > 0 - self.name_x = name_x # already contains constant in new setup - self.name_yend = USER.set_name_yend(name_yend, yend) - self.name_yend.append(USER.set_name_yend_sp(self.name_y)) - self.name_z = self.name_x + self.name_yend - self.name_q = USER.set_name_q(name_q, q) - - if slx_lags > 0: # need to remove all but last SLX variables from name_x - self.name_x0 = [] - self.name_x0.append(self.name_x[0]) # constant - if (isinstance(slx_vars,list)): # boolean list passed - # x variables that were not lagged - self.name_x0.extend(list(compress(self.name_x[1:],[not i for i in slx_vars]))) - # last wkx variables - self.name_x0.extend(self.name_x[-wkx:]) - - + if regimes is not None: + from .twosls_sp_regimes import GM_Lag_Regimes + self.__class__ = GM_Lag_Regimes + self.__init__( + y=y, + x=x, + regimes=regimes, + yend=yend, + q=q, + w=w, + w_lags=w_lags, + slx_lags=slx_lags, + lag_q=lag_q, + robust=robust, + gwk=gwk, + sig2n_k=sig2n_k, + spat_diag=spat_diag, + spat_impacts=spat_impacts, + vm=vm, + name_y=name_y, + name_x=name_x, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_gwk=name_gwk, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + **kwargs, + ) + else: + n = USER.check_arrays(x, yend, q) + y, name_y = USER.check_y(y, n, name_y) + w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) + USER.check_robust(robust, gwk) + yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) + spat_diag, warn = USER.check_spat_diag(spat_diag=spat_diag, w=w, robust=robust, slx_lags=slx_lags) + set_warn(self, warn) + x_constant, name_x, warn = USER.check_constant(x, name_x) + set_warn(self, warn) + name_x = USER.set_name_x(name_x, x_constant) # need to check for None and set defaults + + # kx and wkx are used to replace complex calculation for output + if slx_lags > 0: # adjust for flexwx + if (isinstance(slx_vars,list)): # slx_vars has True,False + if len(slx_vars) != x.shape[1] : + raise Exception("slx_vars incompatible with x column dimensions") + else: # use slx_vars to extract proper columns + workname = name_x[1:] + kx = len(workname) + vv = list(compress(workname,slx_vars)) + name_x += USER.set_name_spatial_lags(vv, slx_lags) + wkx = slx_vars.count(True) + else: + kx = len(name_x) - 1 + wkx = kx + name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + + + BaseGM_Lag.__init__( + self, + y=y, + x=x_constant, + w=w, + yend=yend, + q=q, + w_lags=w_lags, + slx_lags=slx_lags, + slx_vars=slx_vars, + robust=robust, + gwk=gwk, + lag_q=lag_q, + sig2n_k=sig2n_k, + ) + + self.rho = self.betas[-1] + self.predy_e, self.e_pred, warn = sp_att( + w, self.y, self.predy, self.yend[:, -1].reshape(self.n, 1), self.rho, hard_bound=hard_bound + ) + set_warn(self, warn) + self.title = "SPATIAL TWO STAGE LEAST SQUARES" + if slx_lags > 0: + self.title += " WITH SLX (SPATIAL DURBIN MODEL)" + self.name_ds = USER.set_name_ds(name_ds) + self.name_y = USER.set_name_y(name_y) + # self.name_x = USER.set_name_x(name_x, x_constant) # name_x contains SLX terms for slx_lags > 0 + self.name_x = name_x # already contains constant in new setup + self.name_yend = USER.set_name_yend(name_yend, yend) + self.name_yend.append(USER.set_name_yend_sp(self.name_y)) + self.name_z = self.name_x + self.name_yend + self.name_q = USER.set_name_q(name_q, q) + + if slx_lags > 0: # need to remove all but last SLX variables from name_x + self.name_x0 = [] + self.name_x0.append(self.name_x[0]) # constant + if (isinstance(slx_vars,list)): # boolean list passed + # x variables that were not lagged + self.name_x0.extend(list(compress(self.name_x[1:],[not i for i in slx_vars]))) + # last wkx variables + self.name_x0.extend(self.name_x[-wkx:]) + + + else: + okx = int((self.k - self.kstar - 1) / (slx_lags + 1)) # number of original exogenous vars + + self.name_x0.extend(self.name_x[-okx:]) + + self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) + + #var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + var_types = ['o'] + ['x']*kx + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] else: - okx = int((self.k - self.kstar - 1) / (slx_lags + 1)) # number of original exogenous vars - - self.name_x0.extend(self.name_x[-okx:]) - - self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) - - #var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] - var_types = ['x'] * (kx + 1) + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] - else: - self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) - var_types = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho'] - - self.name_h = USER.set_name_h(self.name_x, self.name_q) - self.robust = USER.set_robust(robust) - self.name_w = USER.set_name_w(name_w, w) - self.name_gwk = USER.set_name_w(name_gwk, gwk) - self.slx_lags = slx_lags - self.slx_vars = slx_vars - - self.output = pd.DataFrame(self.name_x + self.name_yend, columns=['var_names']) - self.output['var_type'] = var_types - self.output['regime'], self.output['equation'] = (0, 0) - self.other_top = _spat_pseudo_r2(self) - diag_out = None - - if spat_diag: - diag_out = _spat_diag_out(self, w, 'yend') - if spat_impacts: - self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags,slx_vars) - try: - diag_out += impacts_str - except TypeError: - diag_out = impacts_str - output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) + self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + var_types = ['o'] + ['x'] * (len(self.name_x)-1) + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + + self.name_h = USER.set_name_h(self.name_x, self.name_q) + self.robust = USER.set_robust(robust) + self.name_w = USER.set_name_w(name_w, w) + self.name_gwk = USER.set_name_w(name_gwk, gwk) + self.slx_lags = slx_lags + self.slx_vars = slx_vars + + self.output = pd.DataFrame(self.name_x + self.name_yend, columns=['var_names']) + self.output['var_type'] = var_types + self.output['regime'], self.output['equation'] = (0, 0) + self.other_top = _spat_pseudo_r2(self) + diag_out = None + + if spat_diag: + diag_out = _spat_diag_out(self, w, 'yend') + if spat_impacts: + self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags,slx_vars) + try: + diag_out += impacts_str + except TypeError: + diag_out = impacts_str + output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) def _test(): diff --git a/spreg/twosls_sp_regimes.py b/spreg/twosls_sp_regimes.py index 9488b56..60eba90 100644 --- a/spreg/twosls_sp_regimes.py +++ b/spreg/twosls_sp_regimes.py @@ -636,7 +636,7 @@ def __init__( self.title = "SPATIAL " + self.title if slx_lags > 0: for m in self.regimes_set: - r_output = self.output[(self.output['regime'] == str(m)) & (self.output['var_type'] == 'x')] + r_output = self.output[(self.output['regime'] == str(m)) & (self.output['var_type'].isin(['x', 'o']))] wx_index = r_output.index[-((len(r_output)-1)//(slx_lags+1)) * slx_lags:] self.output.loc[wx_index, 'var_type'] = 'wx' self.title = " SPATIAL 2SLS WITH SLX (SPATIAL DURBIN MODEL) - REGIMES" @@ -840,9 +840,9 @@ def GM_Lag_Regimes_Multi( results[r].other_mid = "" if slx_lags > 0: kx = (results[r].k - results[r].kstar - 1) // (slx_lags + 1) - var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(results[r].name_yend) - 1) + ['rho'] + var_types = ['o'] + ['x']*kx + ['wx'] * kx * slx_lags + ['yend'] * (len(results[r].name_yend) - 1) + ['rho'] else: - var_types = ['x'] * len(results[r].name_x) + ['yend'] * (len(results[r].name_yend)-1) + ['rho'] + var_types = ['o'] + ['x'] * (len(results[r].name_x)-1) + ['yend'] * (len(results[r].name_yend)-1) + ['rho'] results[r].output = pd.DataFrame({'var_names': results[r].name_x + results[r].name_yend, 'var_type': var_types, 'regime': r, 'equation': r}) @@ -1219,7 +1219,7 @@ class GM_Lag_Endog_Regimes(GM_Lag_Regimes): >>> import numpy as np >>> np.set_printoptions(legacy='1.25') #to avoid printing issues with numpy floats >>> import geopandas as gpd - >>> from spreg import OLS_Endog_Regimes + >>> from spreg import GM_Lag_Endog_Regimes Open data on Baltimore house sales price and characteristics in Baltimore from libpysal examples using geopandas. @@ -1231,7 +1231,7 @@ class GM_Lag_Endog_Regimes(GM_Lag_Regimes): >>> w = libpysal.weights.Queen.from_dataframe(db, use_index=True) >>> w.transform = "r" - For this example, we will use the 'PRICE' column as the dependent variable and + For this example, we will use the 'PRICE' column as the dependent variable, and the 'NROOM', 'AGE', and 'SQFT' columns as independent variables. At this point, we will let the model choose the number of clusters. diff --git a/spreg/user_output.py b/spreg/user_output.py index 6f092f1..8a42e54 100755 --- a/spreg/user_output.py +++ b/spreg/user_output.py @@ -9,15 +9,16 @@ ) import numpy as np import pandas as pd -import geopandas as gpd # new for check_coords +import geopandas as gpd import copy as COPY from . import sputils as spu from libpysal import weights from libpysal import graph import scipy from scipy.sparse.csr import csr_matrix -from .utils import get_lags # new for flex_wx -from itertools import compress # new for lfex_wx +from scipy.sparse import issparse +from .utils import get_lags +from itertools import compress def set_name_ds(name_ds): @@ -340,6 +341,26 @@ def set_name_multi( multireg[r].name_h = multireg[r].name_x + multireg[r].name_q return multireg +def check_numeric_bool(array): + """Check if the array passed by a user to a regression class is + correctly structured. If the user's data is correctly formed this function + returns nothing, if not then an exception is raised. Note, this does not + check for model setup, simply the types of the objects.""" + + if array is None: + return + + if issparse(array): + array = array.data + + if np.issubdtype(array.dtype, np.bool_): + raise ValueError("The array contains boolean elements, which are not supported. Please convert them to integers.") + + if not np.issubdtype(array.dtype, np.number): + raise ValueError("The array contains non-numeric elements, which are not supported.") + + if not np.isfinite(array).all(): + raise ValueError("The array contains NaN or infinite values, which are not supported.") def check_arrays(*arrays): """Check if the objects passed by a user to a regression class are @@ -472,6 +493,9 @@ def check_y(y, n, name_y=None): raise Exception( "y must be a single column array matching the length of other arrays" ) + + check_numeric_bool(y) + return y, name_y def check_endog(arrays, names): @@ -498,6 +522,7 @@ def check_endog(arrays, names): pass elif len(arrays[i].shape) == 1: arrays[i].shape = (arrays[i].shape[0], 1) + check_numeric_bool(arrays[i]) return (*arrays, *names) def check_weights(w, y, w_required=False, time=False, slx_lags=0, allow_wk=False): @@ -768,12 +793,6 @@ def check_reg_list(regimes, name_regimes, n): ) return regimes, name_regimes - - - - - - def check_regimes(reg_set, N=None, K=None): """Check if there are at least two regimes @@ -836,39 +855,45 @@ def check_constant(x, name_x=None, just_rem=False): try: name_x = x.columns.to_list() except AttributeError: - name_x = x.name + name_x = [x.name] x = x.to_numpy() + + check_numeric_bool(x) x_constant = COPY.copy(x) keep_x = COPY.copy(name_x) warn = None + + if x_constant.ndim == 1: + x_constant = x_constant.reshape(-1, 1) + if isinstance(x_constant, np.ndarray): diffs = np.ptp(x_constant, axis=0) - if sum(diffs == 0) > 0: + if np.any(diffs == 0): x_constant = np.delete(x_constant, np.nonzero(diffs == 0), 1) else: diffs = (x_constant.max(axis=0).toarray() - x_constant.min(axis=0).toarray())[0] - if sum(diffs == 0) > 0: + if np.any(diffs == 0): x_constant = x_constant[:, np.nonzero(diffs > 0)[0]] - if sum(diffs == 0) > 0: + if np.any(diffs == 0): if keep_x: rem_x = [keep_x[i] for i in np.nonzero(diffs == 0)[0]] warn = "Variable(s) " + str(rem_x) + " removed for being constant." keep_x[:] = [keep_x[i] for i in np.nonzero(diffs > 0)[0]] else: - if sum(diffs == 0) == 1: + if np.sum(diffs == 0) == 1: warn = "One variable has been removed for being constant." else: warn = ( - str(sum(diffs == 0)) + str(np.sum(diffs == 0)) + " variables have been removed for being constant." ) + if not just_rem: return spu.sphstack(np.ones((x_constant.shape[0], 1)), x_constant), keep_x, warn else: return x_constant, keep_x, warn - def flex_wx(w,x,name_x,constant=True,slx_lags=1,slx_vars="All"): """ Adds spatially lagged variables to an existing x matrix with or without a constant term