3
3
import os
4
4
import sys
5
5
import time
6
- import numpy as np
6
+ import pandas as pd
7
7
8
8
import peewee as pw
9
9
from playhouse .postgres_ext import ArrayField , BinaryJSONField
@@ -73,6 +73,7 @@ class File(BaseModel):
73
73
name = pw .CharField (null = True )
74
74
created = pw .DateTimeField (default = datetime .datetime .now )
75
75
76
+
76
77
@signals .post_delete (sender = File )
77
78
def remove_file_after_delete (sender , instance ):
78
79
try :
@@ -135,6 +136,7 @@ class Meta:
135
136
(('dataset' , 'file' ), True ),
136
137
)
137
138
139
+
138
140
@signals .pre_delete (sender = Dataset )
139
141
def remove_related_files (sender , instance ):
140
142
for f in instance .files :
@@ -148,7 +150,7 @@ class Featureset(BaseModel):
148
150
name = pw .CharField ()
149
151
created = pw .DateTimeField (default = datetime .datetime .now )
150
152
features_list = ArrayField (pw .CharField )
151
- custom_features_script = pw .CharField (null = True ) # move to fset file?
153
+ custom_features_script = pw .CharField (null = True ) # move to fset file?
152
154
file = pw .ForeignKeyField (File , on_delete = 'CASCADE' )
153
155
task_id = pw .CharField (null = True )
154
156
finished = pw .DateTimeField (null = True )
@@ -194,16 +196,15 @@ def is_owned_by(self, username):
194
196
def format_pred_data (fset , data ):
195
197
fset .columns = fset .columns .droplevel ('channel' )
196
198
fset .index = fset .index .astype (str ) # can't use ints as JSON keys
197
- result = {}
198
- for i , name in enumerate (fset .index ):
199
- result [name ] = {'features' : fset .loc [name ].to_dict ()}
200
- if 'labels' in data :
201
- result [name ]['label' ] = data ['labels' ][i ]
202
- if len (data ['pred_probs' ]) > 0 :
203
- result [name ]['prediction' ] = dict (zip (data ['all_classes' ],
204
- data ['pred_probs' ][i ]))
205
- else :
206
- result [name ]['prediction' ] = data ['preds' ][i ]
199
+ labels = pd .Series (data .get ('labels' ), index = fset .index )
200
+ if len (data .get ('pred_probs' , [])) > 0 :
201
+ preds = pd .DataFrame (data .get ('pred_probs' , []),
202
+ index = fset .index ).to_dict (orient = 'index' )
203
+ else :
204
+ preds = pd .Series (data ['preds' ], index = fset .index ).to_dict ()
205
+ result = {name : {'features' : feats , 'label' : labels .loc [name ],
206
+ 'prediction' : preds [name ]}
207
+ for name , feats in fset .to_dict (orient = 'index' ).items ()}
207
208
return result
208
209
209
210
def display_info (self ):
@@ -238,6 +239,7 @@ def create_tables(retry=5):
238
239
print ('Could not connect to database...sleeping 5' )
239
240
time .sleep (5 )
240
241
242
+
241
243
def drop_tables ():
242
244
db .drop_tables (models , safe = True , cascade = True )
243
245
0 commit comments