Skip to content

Commit 3a69aff

Browse files
author
F Saad
committed
Phase 2 of revival of USING MODEL, bqlfn/bqlvtab/compiler.
Herculian effort required to updated tests/test_bql.py filed as ticket: #564.
1 parent 64e22f9 commit 3a69aff

File tree

5 files changed

+299
-238
lines changed

5 files changed

+299
-238
lines changed

src/bqlfn.py

+34-28
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,18 @@
3333
def bayesdb_install_bql(db, cookie):
3434
def function(name, nargs, fn):
3535
db.createscalarfunction(name, (lambda *args: fn(cookie, *args)), nargs)
36-
function("bql_column_correlation", 4, bql_column_correlation)
37-
function("bql_column_correlation_pvalue", 4, bql_column_correlation_pvalue)
38-
function("bql_column_dependence_probability", 4,
36+
function("bql_column_correlation", 5, bql_column_correlation)
37+
function("bql_column_correlation_pvalue", 5, bql_column_correlation_pvalue)
38+
function("bql_column_dependence_probability", 5,
3939
bql_column_dependence_probability)
4040
function("bql_column_mutual_information", -1, bql_column_mutual_information)
4141
function("bql_column_value_probability", -1, bql_column_value_probability)
42-
function("bql_row_similarity", 5, bql_row_similarity)
42+
function("bql_row_similarity", 6, bql_row_similarity)
4343
function("bql_row_predictive_relevance", -1, bql_row_predictive_relevance)
44-
function("bql_row_column_predictive_probability", 5,
44+
function("bql_row_column_predictive_probability", 6,
4545
bql_row_column_predictive_probability)
46-
function("bql_predict", 6, bql_predict)
47-
function("bql_predict_confidence", 5, bql_predict_confidence)
46+
function("bql_predict", 7, bql_predict)
47+
function("bql_predict_confidence", 6, bql_predict_confidence)
4848
function("bql_json_get", 2, bql_json_get)
4949
function("bql_pdf_joint", -1, bql_pdf_joint)
5050

@@ -68,7 +68,8 @@ def bql_variable_stattypes_and_data(bdb, population_id, colno0, colno1):
6868
return (st0, st1, data0, data1)
6969

7070
# Two-column function: CORRELATION [OF <col0> WITH <col1>]
71-
def bql_column_correlation(bdb, population_id, _generator_id, colno0, colno1):
71+
def bql_column_correlation(bdb, population_id, _generator_id, _modelnos,
72+
colno0, colno1):
7273
if colno0 < 0:
7374
raise BQLError(bdb,
7475
'No correlation for latent variable: %r' %
@@ -86,7 +87,7 @@ def bql_column_correlation(bdb, population_id, _generator_id, colno0, colno1):
8687

8788
# Two-column function: CORRELATION PVALUE [OF <col0> WITH <col1>]
8889
def bql_column_correlation_pvalue(
89-
bdb, population_id, _generator_id, colno0, colno1):
90+
bdb, population_id, _generator_id, _modelnos, colno0, colno1):
9091
if colno0 < 0:
9192
raise BQLError(bdb,
9293
'No correlation p-value for latent variable: %r' %
@@ -290,7 +291,7 @@ def define_correlation_p(stattype0, stattype1, method):
290291

291292
# Two-column function: DEPENDENCE PROBABILITY [OF <col0> WITH <col1>]
292293
def bql_column_dependence_probability(
293-
bdb, population_id, generator_id, colno0, colno1):
294+
bdb, population_id, generator_id, modelnos, colno0, colno1):
294295
def generator_depprob(generator_id):
295296
metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
296297
return metamodel.column_dependence_probability(
@@ -301,22 +302,22 @@ def generator_depprob(generator_id):
301302

302303
# Two-column function: MUTUAL INFORMATION [OF <col0> WITH <col1>]
303304
def bql_column_mutual_information(
304-
bdb, population_id, generator_id, colnos0, colnos1,
305+
bdb, population_id, generator_id, modelnos, colnos0, colnos1,
305306
numsamples, *constraint_args):
306307
colnos0 = json.loads(colnos0)
307308
colnos1 = json.loads(colnos1)
308309
mutinfs = _bql_column_mutual_information(
309-
bdb, population_id, generator_id, colnos0, colnos1, numsamples,
310-
*constraint_args)
310+
bdb, population_id, generator_id, modelnos, colnos0, colnos1,
311+
numsamples, *constraint_args)
311312
# XXX This integral of the CMI returned by each model of all generators in
312313
# in the population is wrong! At least, it does not directly correspond to
313314
# any meaningful probabilistic quantity, other than literally the mean CMI
314315
# averaged over all population models.
315316
return stats.arithmetic_mean([stats.arithmetic_mean(m) for m in mutinfs])
316317

317318
def _bql_column_mutual_information(
318-
bdb, population_id, generator_id, colnos0, colnos1, numsamples,
319-
*constraint_args):
319+
bdb, population_id, generator_id, modelnos, colnos0, colnos1,
320+
numsamples, *constraint_args):
320321
if len(constraint_args) % 2 == 1:
321322
raise ValueError('Odd constraint arguments: %s.' % (constraint_args))
322323
constraints = zip(constraint_args[::2], constraint_args[1::2]) \
@@ -331,8 +332,8 @@ def generator_mutinf(generator_id):
331332
return mutinfs
332333

333334
# One-column function: PROBABILITY DENSITY OF <col>=<value> GIVEN <constraints>
334-
def bql_column_value_probability(bdb, population_id, generator_id, colno,
335-
value, *constraint_args):
335+
def bql_column_value_probability(bdb, population_id, generator_id, modelnos,
336+
colno, value, *constraint_args):
336337
constraints = []
337338
i = 0
338339
while i < len(constraint_args):
@@ -344,13 +345,14 @@ def bql_column_value_probability(bdb, population_id, generator_id, colno,
344345
constraints.append((constraint_colno, constraint_value))
345346
i += 2
346347
targets = [(colno, value)]
347-
logp = _bql_logpdf(bdb, population_id, generator_id, targets, constraints)
348+
logp = _bql_logpdf(bdb, population_id, generator_id, modelnos, targets,
349+
constraints)
348350
return ieee_exp(logp)
349351

350352
# XXX This is silly. We should return log densities, not densities.
351353
# This is Github issue #360:
352354
# https://github.com/probcomp/bayeslite/issues/360
353-
def bql_pdf_joint(bdb, population_id, generator_id, *args):
355+
def bql_pdf_joint(bdb, population_id, generator_id, modelnos, *args):
354356
i = 0
355357
targets = []
356358
while i < len(args):
@@ -372,10 +374,12 @@ def bql_pdf_joint(bdb, population_id, generator_id, *args):
372374
c_value = args[i + 1]
373375
constraints.append((c_colno, c_value))
374376
i += 2
375-
logp = _bql_logpdf(bdb, population_id, generator_id, targets, constraints)
377+
logp = _bql_logpdf(bdb, population_id, generator_id, modelnos, targets,
378+
constraints)
376379
return ieee_exp(logp)
377380

378-
def _bql_logpdf(bdb, population_id, generator_id, targets, constraints):
381+
def _bql_logpdf(bdb, population_id, generator_id, modelnos, targets,
382+
constraints):
379383
# P(T | C) = \sum_M P(T, M | C)
380384
# = \sum_M P(T | C, M) P(M | C)
381385
# = \sum_M P(T | C, M) P(M) P(C | M) / P(C)
@@ -410,7 +414,7 @@ def loglikelihood(generator_id, metamodel):
410414

411415
# Row function: SIMILARITY TO <target_row> IN THE CONTEXT OF <column>
412416
def bql_row_similarity(
413-
bdb, population_id, generator_id, rowid, target_rowid, colno):
417+
bdb, population_id, generator_id, modelnos, rowid, target_rowid, colno):
414418
if target_rowid is None:
415419
raise BQLError(bdb, 'No such target row for SIMILARITY')
416420
def generator_similarity(generator_id):
@@ -425,8 +429,8 @@ def generator_similarity(generator_id):
425429
# Row function: PREDICTIVE RELEVANCE TO (<target_row>)
426430
# [<AND HYPOTHETICAL ROWS WITH VALUES ((...))] IN THE CONTEXT OF <column>
427431
def bql_row_predictive_relevance(
428-
bdb, population_id, generator_id, rowid_target, rowid_query, colno,
429-
*constraint_args):
432+
bdb, population_id, generator_id, modelnos, rowid_target, rowid_query,
433+
colno, *constraint_args):
430434
if rowid_target is None:
431435
raise BQLError(bdb, 'No such target row for SIMILARITY')
432436
rowid_query = json.loads(rowid_query)
@@ -453,7 +457,8 @@ def generator_similarity(generator_id):
453457

454458
# Row function: PREDICTIVE PROBABILITY OF <targets> [GIVEN <constraints>]
455459
def bql_row_column_predictive_probability(
456-
bdb, population_id, generator_id, rowid, targets, constraints):
460+
bdb, population_id, generator_id, modelnos, rowid, targets,
461+
constraints):
457462
targets = json.loads(targets)
458463
constraints = json.loads(constraints)
459464
# Build the constraints and query from rowid, using a fresh rowid.
@@ -482,7 +487,8 @@ def generator_predprob(generator_id):
482487
### Predict and simulate
483488

484489
def bql_predict(
485-
bdb, population_id, generator_id, rowid, colno, threshold, numsamples):
490+
bdb, population_id, generator_id, modelnos, rowid, colno, threshold,
491+
numsamples):
486492
# XXX Randomly sample 1 generator from the population, until we figure out
487493
# how to aggregate imputations across different hypotheses.
488494
if generator_id is None:
@@ -494,7 +500,7 @@ def bql_predict(
494500
bdb, generator_id, None, rowid, colno, threshold, numsamples=numsamples)
495501

496502
def bql_predict_confidence(
497-
bdb, population_id, generator_id, rowid, colno, numsamples):
503+
bdb, population_id, generator_id, modelnos, rowid, colno, numsamples):
498504
# XXX Do real imputation here!
499505
# XXX Randomly sample 1 generator from the population, until we figure out
500506
# how to aggregate imputations across different hypotheses.
@@ -514,7 +520,7 @@ def bql_json_get(bdb, blob, key):
514520

515521
def bayesdb_simulate(
516522
bdb, population_id, constraints, colnos, generator_id=None,
517-
numpredictions=1, accuracy=None):
523+
modelnos=None, numpredictions=1, accuracy=None):
518524
"""Simulate rows from a generative model, subject to constraints.
519525
520526
Returns a list of `numpredictions` tuples, with a value for each

src/bqlvtab.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,11 @@ def Filter(self, indexnum, indexname, constraintargs):
199199
# Compute the mutual information.
200200
#
201201
# XXX Expose this API better from bqlfn.
202+
#
203+
# XXX fsaad@20170624: Setting modelnos = None arbitrarily, figure out
204+
# how to set the modelnos argument.
202205
mis = bqlfn._bql_column_mutual_information(
203-
self._bdb, self._population_id, self._generator_id,
206+
self._bdb, self._population_id, self._generator_id, None,
204207
target_vars, reference_vars, self._nsamples,
205208
*_flatten2(sorted(conditions.iteritems())))
206209
self._mi = _flatten2(mis)

0 commit comments

Comments
 (0)