33
33
def bayesdb_install_bql (db , cookie ):
34
34
def function (name , nargs , fn ):
35
35
db .createscalarfunction (name , (lambda * args : fn (cookie , * args )), nargs )
36
- function ("bql_column_correlation" , 4 , bql_column_correlation )
37
- function ("bql_column_correlation_pvalue" , 4 , bql_column_correlation_pvalue )
38
- function ("bql_column_dependence_probability" , 4 ,
36
+ function ("bql_column_correlation" , 5 , bql_column_correlation )
37
+ function ("bql_column_correlation_pvalue" , 5 , bql_column_correlation_pvalue )
38
+ function ("bql_column_dependence_probability" , 5 ,
39
39
bql_column_dependence_probability )
40
40
function ("bql_column_mutual_information" , - 1 , bql_column_mutual_information )
41
41
function ("bql_column_value_probability" , - 1 , bql_column_value_probability )
42
- function ("bql_row_similarity" , 5 , bql_row_similarity )
42
+ function ("bql_row_similarity" , 6 , bql_row_similarity )
43
43
function ("bql_row_predictive_relevance" , - 1 , bql_row_predictive_relevance )
44
- function ("bql_row_column_predictive_probability" , 5 ,
44
+ function ("bql_row_column_predictive_probability" , 6 ,
45
45
bql_row_column_predictive_probability )
46
- function ("bql_predict" , 6 , bql_predict )
47
- function ("bql_predict_confidence" , 5 , bql_predict_confidence )
46
+ function ("bql_predict" , 7 , bql_predict )
47
+ function ("bql_predict_confidence" , 6 , bql_predict_confidence )
48
48
function ("bql_json_get" , 2 , bql_json_get )
49
49
function ("bql_pdf_joint" , - 1 , bql_pdf_joint )
50
50
@@ -68,7 +68,8 @@ def bql_variable_stattypes_and_data(bdb, population_id, colno0, colno1):
68
68
return (st0 , st1 , data0 , data1 )
69
69
70
70
# Two-column function: CORRELATION [OF <col0> WITH <col1>]
71
- def bql_column_correlation (bdb , population_id , _generator_id , colno0 , colno1 ):
71
+ def bql_column_correlation (bdb , population_id , _generator_id , _modelnos ,
72
+ colno0 , colno1 ):
72
73
if colno0 < 0 :
73
74
raise BQLError (bdb ,
74
75
'No correlation for latent variable: %r' %
@@ -86,7 +87,7 @@ def bql_column_correlation(bdb, population_id, _generator_id, colno0, colno1):
86
87
87
88
# Two-column function: CORRELATION PVALUE [OF <col0> WITH <col1>]
88
89
def bql_column_correlation_pvalue (
89
- bdb , population_id , _generator_id , colno0 , colno1 ):
90
+ bdb , population_id , _generator_id , _modelnos , colno0 , colno1 ):
90
91
if colno0 < 0 :
91
92
raise BQLError (bdb ,
92
93
'No correlation p-value for latent variable: %r' %
@@ -290,7 +291,7 @@ def define_correlation_p(stattype0, stattype1, method):
290
291
291
292
# Two-column function: DEPENDENCE PROBABILITY [OF <col0> WITH <col1>]
292
293
def bql_column_dependence_probability (
293
- bdb , population_id , generator_id , colno0 , colno1 ):
294
+ bdb , population_id , generator_id , modelnos , colno0 , colno1 ):
294
295
def generator_depprob (generator_id ):
295
296
metamodel = core .bayesdb_generator_metamodel (bdb , generator_id )
296
297
return metamodel .column_dependence_probability (
@@ -301,22 +302,22 @@ def generator_depprob(generator_id):
301
302
302
303
# Two-column function: MUTUAL INFORMATION [OF <col0> WITH <col1>]
303
304
def bql_column_mutual_information (
304
- bdb , population_id , generator_id , colnos0 , colnos1 ,
305
+ bdb , population_id , generator_id , modelnos , colnos0 , colnos1 ,
305
306
numsamples , * constraint_args ):
306
307
colnos0 = json .loads (colnos0 )
307
308
colnos1 = json .loads (colnos1 )
308
309
mutinfs = _bql_column_mutual_information (
309
- bdb , population_id , generator_id , colnos0 , colnos1 , numsamples ,
310
- * constraint_args )
310
+ bdb , population_id , generator_id , modelnos , colnos0 , colnos1 ,
311
+ numsamples , * constraint_args )
311
312
# XXX This integral of the CMI returned by each model of all generators in
312
313
# in the population is wrong! At least, it does not directly correspond to
313
314
# any meaningful probabilistic quantity, other than literally the mean CMI
314
315
# averaged over all population models.
315
316
return stats .arithmetic_mean ([stats .arithmetic_mean (m ) for m in mutinfs ])
316
317
317
318
def _bql_column_mutual_information (
318
- bdb , population_id , generator_id , colnos0 , colnos1 , numsamples ,
319
- * constraint_args ):
319
+ bdb , population_id , generator_id , modelnos , colnos0 , colnos1 ,
320
+ numsamples , * constraint_args ):
320
321
if len (constraint_args ) % 2 == 1 :
321
322
raise ValueError ('Odd constraint arguments: %s.' % (constraint_args ))
322
323
constraints = zip (constraint_args [::2 ], constraint_args [1 ::2 ]) \
@@ -331,8 +332,8 @@ def generator_mutinf(generator_id):
331
332
return mutinfs
332
333
333
334
# One-column function: PROBABILITY DENSITY OF <col>=<value> GIVEN <constraints>
334
- def bql_column_value_probability (bdb , population_id , generator_id , colno ,
335
- value , * constraint_args ):
335
+ def bql_column_value_probability (bdb , population_id , generator_id , modelnos ,
336
+ colno , value , * constraint_args ):
336
337
constraints = []
337
338
i = 0
338
339
while i < len (constraint_args ):
@@ -344,13 +345,14 @@ def bql_column_value_probability(bdb, population_id, generator_id, colno,
344
345
constraints .append ((constraint_colno , constraint_value ))
345
346
i += 2
346
347
targets = [(colno , value )]
347
- logp = _bql_logpdf (bdb , population_id , generator_id , targets , constraints )
348
+ logp = _bql_logpdf (bdb , population_id , generator_id , modelnos , targets ,
349
+ constraints )
348
350
return ieee_exp (logp )
349
351
350
352
# XXX This is silly. We should return log densities, not densities.
351
353
# This is Github issue #360:
352
354
# https://github.com/probcomp/bayeslite/issues/360
353
- def bql_pdf_joint (bdb , population_id , generator_id , * args ):
355
+ def bql_pdf_joint (bdb , population_id , generator_id , modelnos , * args ):
354
356
i = 0
355
357
targets = []
356
358
while i < len (args ):
@@ -372,10 +374,12 @@ def bql_pdf_joint(bdb, population_id, generator_id, *args):
372
374
c_value = args [i + 1 ]
373
375
constraints .append ((c_colno , c_value ))
374
376
i += 2
375
- logp = _bql_logpdf (bdb , population_id , generator_id , targets , constraints )
377
+ logp = _bql_logpdf (bdb , population_id , generator_id , modelnos , targets ,
378
+ constraints )
376
379
return ieee_exp (logp )
377
380
378
- def _bql_logpdf (bdb , population_id , generator_id , targets , constraints ):
381
+ def _bql_logpdf (bdb , population_id , generator_id , modelnos , targets ,
382
+ constraints ):
379
383
# P(T | C) = \sum_M P(T, M | C)
380
384
# = \sum_M P(T | C, M) P(M | C)
381
385
# = \sum_M P(T | C, M) P(M) P(C | M) / P(C)
@@ -410,7 +414,7 @@ def loglikelihood(generator_id, metamodel):
410
414
411
415
# Row function: SIMILARITY TO <target_row> IN THE CONTEXT OF <column>
412
416
def bql_row_similarity (
413
- bdb , population_id , generator_id , rowid , target_rowid , colno ):
417
+ bdb , population_id , generator_id , modelnos , rowid , target_rowid , colno ):
414
418
if target_rowid is None :
415
419
raise BQLError (bdb , 'No such target row for SIMILARITY' )
416
420
def generator_similarity (generator_id ):
@@ -425,8 +429,8 @@ def generator_similarity(generator_id):
425
429
# Row function: PREDICTIVE RELEVANCE TO (<target_row>)
426
430
# [<AND HYPOTHETICAL ROWS WITH VALUES ((...))] IN THE CONTEXT OF <column>
427
431
def bql_row_predictive_relevance (
428
- bdb , population_id , generator_id , rowid_target , rowid_query , colno ,
429
- * constraint_args ):
432
+ bdb , population_id , generator_id , modelnos , rowid_target , rowid_query ,
433
+ colno , * constraint_args ):
430
434
if rowid_target is None :
431
435
raise BQLError (bdb , 'No such target row for SIMILARITY' )
432
436
rowid_query = json .loads (rowid_query )
@@ -453,7 +457,8 @@ def generator_similarity(generator_id):
453
457
454
458
# Row function: PREDICTIVE PROBABILITY OF <targets> [GIVEN <constraints>]
455
459
def bql_row_column_predictive_probability (
456
- bdb , population_id , generator_id , rowid , targets , constraints ):
460
+ bdb , population_id , generator_id , modelnos , rowid , targets ,
461
+ constraints ):
457
462
targets = json .loads (targets )
458
463
constraints = json .loads (constraints )
459
464
# Build the constraints and query from rowid, using a fresh rowid.
@@ -482,7 +487,8 @@ def generator_predprob(generator_id):
482
487
### Predict and simulate
483
488
484
489
def bql_predict (
485
- bdb , population_id , generator_id , rowid , colno , threshold , numsamples ):
490
+ bdb , population_id , generator_id , modelnos , rowid , colno , threshold ,
491
+ numsamples ):
486
492
# XXX Randomly sample 1 generator from the population, until we figure out
487
493
# how to aggregate imputations across different hypotheses.
488
494
if generator_id is None :
@@ -494,7 +500,7 @@ def bql_predict(
494
500
bdb , generator_id , None , rowid , colno , threshold , numsamples = numsamples )
495
501
496
502
def bql_predict_confidence (
497
- bdb , population_id , generator_id , rowid , colno , numsamples ):
503
+ bdb , population_id , generator_id , modelnos , rowid , colno , numsamples ):
498
504
# XXX Do real imputation here!
499
505
# XXX Randomly sample 1 generator from the population, until we figure out
500
506
# how to aggregate imputations across different hypotheses.
@@ -514,7 +520,7 @@ def bql_json_get(bdb, blob, key):
514
520
515
521
def bayesdb_simulate (
516
522
bdb , population_id , constraints , colnos , generator_id = None ,
517
- numpredictions = 1 , accuracy = None ):
523
+ modelnos = None , numpredictions = 1 , accuracy = None ):
518
524
"""Simulate rows from a generative model, subject to constraints.
519
525
520
526
Returns a list of `numpredictions` tuples, with a value for each
0 commit comments