From 7822b60b3e7b12738881d2b8c383c217a7e9cde6 Mon Sep 17 00:00:00 2001 From: John Dines Date: Wed, 17 Jul 2013 17:36:37 +0200 Subject: [PATCH 1/6] Bugfixes to command line tool. --- Media.py | 6 +++--- mainAPI.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Media.py b/Media.py index 1c98b69..df01d91 100644 --- a/Media.py +++ b/Media.py @@ -1,9 +1,9 @@ -import sys, urllib2, urllib +import sys, urllib2, urllib, os from encode import multipart_encode, MultipartParam def read_file(filename): - fp = open(filename, "r") + fp = open(os.path.abspath(filename), "r") file_content = fp.read() fp.close() @@ -88,4 +88,4 @@ def unpublish(self): self.datagen = {} request = urllib2.Request(self.dest + self.path + self.uid + self.path_unpublish, data="", headers=self.headers) request.get_method = lambda: 'PUT' - BaseObject._execute(self, request) \ No newline at end of file + BaseObject._execute(self, request) diff --git a/mainAPI.py b/mainAPI.py index fbed319..2913453 100644 --- a/mainAPI.py +++ b/mainAPI.py @@ -69,10 +69,10 @@ def main(argv=None): argv = sys.argv verbose = False - uid = "" - process_id = "" - audioFilename = "" - metadataFilename = "" + uid = None + process_id = None + audioFilename = None + metadataFilename = None accept = 'text/xml' # NEED TO SPECIFIY USERNAME AND PASSWORD HERE From dfd7ae86bb48e1a8036845af9655a9508305d4ea Mon Sep 17 00:00:00 2001 From: scrappythekangaroo Date: Mon, 19 Aug 2013 11:47:43 +0200 Subject: [PATCH 2/6] Allows service to be specified when uploading content. General code clean up for upload media. --- BaseObject.py | 4 +++- Media.py | 47 ++++++++++++++++++++--------------------------- mainAPI.py | 21 +++++++++++++++++---- 3 files changed, 40 insertions(+), 32 deletions(-) diff --git a/BaseObject.py b/BaseObject.py index 1df3dd3..330b49c 100644 --- a/BaseObject.py +++ b/BaseObject.py @@ -5,7 +5,7 @@ class BaseObject: def __init__(self, accept, username="", password="", uid="", process_id="", audioFilename=None, - metadataFilename=None, transcriptFilename=None): + metadataFilename=None, transcriptFilename=None, service=None, item_id = None): self.accept = accept self.username = username self.password = password @@ -19,6 +19,8 @@ def __init__(self, accept, username="", password="", uid="", process_id="", audi self.datagen = {} self.headers = {} self.dest = 'https://www.koemei.com/REST/' + self.service = service + self.item_id = item_id self.response = {} @classmethod diff --git a/Media.py b/Media.py index 7b5e36a..3348f10 100644 --- a/Media.py +++ b/Media.py @@ -15,10 +15,10 @@ def read_file(filename): class Media(BaseObject): def __init__(self, accept, username="", password="", uid="", process_id="", audioFilename=None, - metadataFilename=None, transcriptFilename=None): + metadataFilename=None, transcriptFilename=None, service=None, item_id=None): BaseObject.__init__(self, accept, username=username, password=password, uid=uid, process_id=process_id, audioFilename=audioFilename, metadataFilename=metadataFilename, - transcriptFilename=transcriptFilename) + transcriptFilename=transcriptFilename, service=service, item_id=item_id) self.path = 'media/' self.path_trans = '/transcribe' self.path_publish = '/publish' @@ -39,38 +39,31 @@ def get_list(self): @BaseObject._reset_headers def create(self): print >> sys.stderr, 'making post request to: %s%s' % (self.dest, self.path) - self.datagen = {} + data = {} + + if self.service: + data.update({'service': self.service, + 'item_id': self.item_id}) + + if 'http' in self.audioFilename: + data.update({'media': self.audioFilename}) + else: + data.update({'media': open(self.audioFilename, "rb")}) - if self.audioFilename is not None: - if 'http' in self.audioFilename: - self.path = self.path + "?media=" + urllib.quote(self.audioFilename, safe='') - self.datagen = "" # should not be empty dict but empty string! - if self.transcriptFilename is not None: - self.datagen, headers_ = multipart_encode({'transcript': read_file(self.transcriptFilename),}) - self.headers.update(headers_) - else: - if self.metadataFilename is not None: - self.datagen, headers_ = multipart_encode({'metadata': read_file(self.metadataFilename), - 'media': open(self.audioFilename, "rb")}) - #TODO : allow metadatafilename + transcript for alignment - elif self.transcriptFilename is not None: - print >> sys.stderr, "hi" - self.datagen, headers_ = multipart_encode({'transcript': read_file(self.transcriptFilename), - 'media': open(self.audioFilename, "rb")}) - else: - self.datagen, headers_ = multipart_encode({'media': open(self.audioFilename, "rb")}) - self.headers.update(headers_) - - #print >> sys.stderr, "request headers: ", self.headers - - request = urllib2.Request(self.dest + self.path, data=self.datagen, headers=self.headers) + if self.transcriptFilename: + self.datagen.update({'transcript': read_file(self.transcriptFilename)}) + + headers = self.headers + data, headers_ = multipart_encode(data) + headers.update(headers_) + + request = urllib2.Request(self.dest + self.path, data=data, headers=headers) BaseObject._execute(self, request) @BaseObject._reset_headers def transcribe(self, success_callback_url='', error_callback_url=''): print >> sys.stderr, 'making post request to: %s%s' % (self.dest, self.path + self.uid + self.path_trans) - self.datagen = {} data = urllib.urlencode( {'success_callback_url': success_callback_url, 'error_callback_url': error_callback_url, }) diff --git a/mainAPI.py b/mainAPI.py index 2913453..59d3eb6 100644 --- a/mainAPI.py +++ b/mainAPI.py @@ -41,6 +41,9 @@ def usage(): -p, --process_id Process ID for transcription process -u, --upload Path or link to an audiofile to be uploaded -m, --metadata Metadata for the audiofile + -t, --transcript Transcript for the audio file + -s, --service Service that the media belongs to + -e, --item_id Item ID for the service -v, --verbose Print out details about the process, handy for debugging -h, --help Print this message ;-) @@ -73,18 +76,22 @@ def main(argv=None): process_id = None audioFilename = None metadataFilename = None + transcriptFilename = None accept = 'text/xml' + service = None + item_id = None # NEED TO SPECIFIY USERNAME AND PASSWORD HERE username = '' password = '' - opts, args = getopt.getopt(argv[1:], "vhi:p:u:m:", ["verbose" ,"help", "uid=", "process_id=", "upload=", "metadata="]) + opts, args = getopt.getopt(argv[1:], "vhi:p:u:m:s:e:t:", ["verbose" ,"help", "uid=", "process_id=", "upload=", "metadata=","service=","item_id=","transcript="]) for o, a in opts: if o in ("-h","--help"): usage() elif o in ("-v","--verbose"): + # TODO: the goggles do nothing! verbose = True elif o in ("-i", "--uid"): uid = str(a) @@ -94,24 +101,30 @@ def main(argv=None): audioFilename = str(a) elif o in ("-m", "--metadeta"): metadataFilename = str(a) + elif o in ("-s", "--service"): + service = str(a) + elif o in ("-e","--item_id"): + item_id = str(a) + elif o in ("-t","--transcript"): + transcriptFilename = str(a) else: print 'Wrong option '+o+'\n' usage() - if (len(args) < 2): + if len(args) < 2: print 'You need to provide an object type and action!' usage() object_type = args[0] action = args[1] - if (len(args) == 3): + if len(args) == 3: accept = args[2] register_openers() # Create an instance of the given as input argument with the provided arguments - inst = globals()[object_type](accept, username, password, uid, process_id, audioFilename, metadataFilename) + inst = globals()[object_type](accept, username, password, uid, process_id, audioFilename, metadataFilename, transcriptFilename, service, item_id) # Call the indicated in the input arguments func = getattr(inst, action) From 7d91cc467db8b0bc15808335a56e209d75288315 Mon Sep 17 00:00:00 2001 From: scrappythekangaroo Date: Mon, 19 Aug 2013 15:39:51 +0200 Subject: [PATCH 3/6] Added count and filter options to get_list API requests. Added service to publish API requests. --- BaseObject.py | 7 +++-- KObject.py | 78 +++++++++++++++++++++++++++++++-------------------- Media.py | 35 +++++++++++++++++------ Process.py | 39 ++++++++++++++++++++------ Transcript.py | 40 ++++++++++++++++++++------ mainAPI.py | 26 +++++++++++++---- 6 files changed, 161 insertions(+), 64 deletions(-) diff --git a/BaseObject.py b/BaseObject.py index 330b49c..1d51428 100644 --- a/BaseObject.py +++ b/BaseObject.py @@ -4,8 +4,9 @@ class BaseObject: - def __init__(self, accept, username="", password="", uid="", process_id="", audioFilename=None, - metadataFilename=None, transcriptFilename=None, service=None, item_id = None): + def __init__(self, accept, username="", password="", uid="", process_id="", + audioFilename=None, metadataFilename=None, transcriptFilename=None, + service=None, item_id=None, count=None, status=None): self.accept = accept self.username = username self.password = password @@ -21,6 +22,8 @@ def __init__(self, accept, username="", password="", uid="", process_id="", audi self.dest = 'https://www.koemei.com/REST/' self.service = service self.item_id = item_id + self.count = count + self.status = status self.response = {} @classmethod diff --git a/KObject.py b/KObject.py index 6688b14..efdda07 100644 --- a/KObject.py +++ b/KObject.py @@ -1,36 +1,52 @@ -import sys, urllib2 +import sys, urllib2, urllib from BaseObject import BaseObject + class KObject(BaseObject): - def __init__(self, accept, username="", password="", uid="", process_id="", audioFilename="", metadataFilename=""): - BaseObject.__init__(self, accept, username=username, password=password, uid=uid, process_id=process_id, audioFilename=audioFilename, metadataFilename=metadataFilename) - self.path = 'kobjects/' - - @BaseObject._reset_headers - def get(self): - print >> sys.stderr, 'making get request to: %s%s' % (self.dest,self.path+self.uid) - request = urllib2.Request(self.dest+self.path+self.uid, headers=self.headers) - BaseObject._execute(self, request) - - @BaseObject._reset_headers - def delete(self): - print >> sys.stderr, 'making delete request to: %s%s' % (self.dest,self.path+self.uid) - request = urllib2.Request(self.dest+self.path+self.uid, headers=self.headers) - request.get_method = lambda: 'DELETE' - BaseObject._execute(self, request) - - @BaseObject._reset_headers - def get_list(self): - print >> sys.stderr, 'making get request to: %s%s' % (self.dest,self.path) - request = urllib2.Request(self.dest+self.path, headers=self.headers) - BaseObject._execute(self, request) - - # create a new K-Object - @BaseObject._reset_headers - def create(self): - print >> sys.stderr, 'making post request to: %s%s' % (self.dest,self.path) - self.datagen = {} - request = urllib2.Request(self.dest+self.path, data="", headers=self.headers) - BaseObject._execute(self, request) + def __init__(self, accept, username="", password="", uid="", process_id="", + audioFilename="", metadataFilename="", transcriptFilename="", + service=None, item_id=None, count=None): + BaseObject.__init__(self, accept, username=username, password=password, uid=uid, process_id=process_id, + audioFilename=audioFilename, metadataFilename=metadataFilename, transcriptFilename=transcriptFilename, + service=service, item_id=item_id, count=count) + self.path = 'kobjects/' + + @BaseObject._reset_headers + def get(self): + print >> sys.stderr, 'making get request to: %s%s' % (self.dest,self.path+self.uid) + request = urllib2.Request(self.dest+self.path+self.uid, headers=self.headers) + BaseObject._execute(self, request) + + @BaseObject._reset_headers + def delete(self): + print >> sys.stderr, 'making delete request to: %s%s' % (self.dest,self.path+self.uid) + request = urllib2.Request(self.dest+self.path+self.uid, headers=self.headers) + request.get_method = lambda: 'DELETE' + BaseObject._execute(self, request) + + @BaseObject._reset_headers + def get_list(self): + print >> sys.stderr, 'making get request to: %s%s' % (self.dest, self.path) + + data = {} + + if self.count: + data.update({'count': self.count}) + + if self.status: + data.update({'status_filter': '-'.join(map(lambda x: str(x), self.status))}) + + data = urllib.urlencode(data) + url = "%s/%s?%s" % (self.dest, self.path, data) + + request = urllib2.Request(url, headers=self.headers) + BaseObject._execute(self, request) + + # create a new K-Object + @BaseObject._reset_headers + def create(self): + print >> sys.stderr, 'making post request to: %s%s' % (self.dest,self.path) + request = urllib2.Request(self.dest+self.path, data="", headers=self.headers) + BaseObject._execute(self, request) diff --git a/Media.py b/Media.py index 3348f10..1d7a755 100644 --- a/Media.py +++ b/Media.py @@ -15,10 +15,11 @@ def read_file(filename): class Media(BaseObject): def __init__(self, accept, username="", password="", uid="", process_id="", audioFilename=None, - metadataFilename=None, transcriptFilename=None, service=None, item_id=None): + metadataFilename=None, transcriptFilename=None, + service=None, item_id=None, count=None, status=None): BaseObject.__init__(self, accept, username=username, password=password, uid=uid, process_id=process_id, - audioFilename=audioFilename, metadataFilename=metadataFilename, - transcriptFilename=transcriptFilename, service=service, item_id=item_id) + audioFilename=audioFilename, metadataFilename=metadataFilename, transcriptFilename=transcriptFilename, + service=service, item_id=item_id, count=count, status=status) self.path = 'media/' self.path_trans = '/transcribe' self.path_publish = '/publish' @@ -33,14 +34,26 @@ def get(self): @BaseObject._reset_headers def get_list(self): print >> sys.stderr, 'making get request to: %s%s' % (self.dest, self.path) - request = urllib2.Request(self.dest + self.path, headers=self.headers) + + data = {} + + if self.count: + data.update({'count': self.count}) + + if self.status: + data.update({'status_filter': '-'.join(map(lambda x: str(x), self.status))}) + + data = urllib.urlencode(data) + url = "%s/%s?%s" % (self.dest, self.path, data) + + request = urllib2.Request(url, headers=self.headers) BaseObject._execute(self, request) @BaseObject._reset_headers def create(self): print >> sys.stderr, 'making post request to: %s%s' % (self.dest, self.path) - data = {} + data = {} if self.service: data.update({'service': self.service, 'item_id': self.item_id}) @@ -74,15 +87,21 @@ def transcribe(self, success_callback_url='', error_callback_url=''): @BaseObject._reset_headers def publish(self): print >> sys.stderr, 'making put request to: %s%s' % (self.dest, self.path + self.uid + self.path_publish) - self.datagen = {} - request = urllib2.Request(self.dest + self.path + self.uid + self.path_publish, data="", headers=self.headers) + + data = {} + if self.service: + data.update({'service_name': self.service,}) + + data = urllib.urlencode(data) + url = "%s/%s/%s/%s?%s" % (self.dest, self.path, self.uid, self.path_publish, data) + + request = urllib2.Request(url, data="", headers=self.headers) request.get_method = lambda: 'PUT' BaseObject._execute(self, request) @BaseObject._reset_headers def unpublish(self): print >> sys.stderr, 'making put request to: %s%s' % (self.dest, self.path + self.uid + self.path_unpublish) - self.datagen = {} request = urllib2.Request(self.dest + self.path + self.uid + self.path_unpublish, data="", headers=self.headers) request.get_method = lambda: 'PUT' BaseObject._execute(self, request) diff --git a/Process.py b/Process.py index c3bb9fb..147c8c0 100644 --- a/Process.py +++ b/Process.py @@ -1,16 +1,37 @@ -import sys, urllib2 +import sys, urllib2, urllib from BaseObject import BaseObject + class Process(BaseObject): - def __init__(self, accept, username="", password="", uid="", process_id="", audioFilename="", metadataFilename=""): - BaseObject.__init__(self, accept, username=username, password=password, uid=uid, process_id=process_id, audioFilename=audioFilename, metadataFilename=metadataFilename) - self.path = 'media/' + def __init__(self, accept, username="", password="", uid="", process_id="", audioFilename="", metadataFilename="", transcriptFilename="", + service=None, item_id=None, count=None): + BaseObject.__init__(self, accept, username=username, password=password, uid=uid, process_id=process_id, + audioFilename=audioFilename, metadataFilename=metadataFilename, transcriptFilename=transcriptFilename, + service=service, item_id=item_id, count=count) + self.path = 'media/' + + @BaseObject._reset_headers + def get(self): + print >> sys.stderr, 'making get request to: %s%s' % (self.dest,self.path+self.uid+self.path_trans+self.process_id) + request = urllib2.Request(self.dest+self.path+self.uid+self.path_trans+self.process_id, headers=self.headers) + BaseObject._execute(self, request) + + @BaseObject._reset_headers + def get_list(self): + print >> sys.stderr, 'making get request to: %s%s' % (self.dest, self.path) + + data = {} + + if self.count: + data.update({'count': self.count}) + + if self.status: + data.update({'status_filter': '-'.join(map(lambda x: str(x), self.status))}) - @BaseObject._reset_headers - def get(self): - print >> sys.stderr, 'making get request to: %s%s' % (self.dest,self.path+self.uid+self.path_trans+self.process_id) - request = urllib2.Request(self.dest+self.path+self.uid+self.path_trans+self.process_id, headers=self.headers) - BaseObject._execute(self, request) + data = urllib.urlencode(data) + url = "%s/%s?%s" % (self.dest, self.path, data) + request = urllib2.Request(url, headers=self.headers) + BaseObject._execute(self, request) diff --git a/Transcript.py b/Transcript.py index 72ff823..8f1bb77 100644 --- a/Transcript.py +++ b/Transcript.py @@ -1,16 +1,38 @@ -import sys, urllib2 +import sys, urllib2, urllib from BaseObject import BaseObject + class Transcript(BaseObject): - def __init__(self, accept, username="", password="", uid="", process_id="", audioFilename="", metadataFilename=""): - BaseObject.__init__(self, accept, username=username, password=password, uid=uid, process_id=process_id, audioFilename=audioFilename, metadataFilename=metadataFilename) - self.path = 'transcripts/' + def __init__(self, accept, username="", password="", uid="", process_id="", + audioFilename="", metadataFilename="", transcriptFilename="", + service=None, item_id=None, count=None): + BaseObject.__init__(self, accept, username=username, password=password, uid=uid, process_id=process_id, + audioFilename=audioFilename, metadataFilename=metadataFilename, transcriptFilename=transcriptFilename, + service=service, item_id=item_id, count=count) + self.path = 'transcripts/' + + @BaseObject._reset_headers + def get(self): + print >> sys.stderr, 'making get request to: %s%s' % (self.dest,self.path+self.uid) + request = urllib2.Request(self.dest+self.path+self.uid, headers=self.headers) + BaseObject._execute(self, request) + + @BaseObject._reset_headers + def get_list(self): + print >> sys.stderr, 'making get request to: %s%s' % (self.dest, self.path) + + data = {} + + if self.count: + data.update({'count': self.count}) + + if self.status: + data.update({'status_filter': '-'.join(map(lambda x: str(x), self.status))}) - @BaseObject._reset_headers - def get(self): - print >> sys.stderr, 'making get request to: %s%s' % (self.dest,self.path+self.uid) - request = urllib2.Request(self.dest+self.path+self.uid, headers=self.headers) - BaseObject._execute(self, request) + data = urllib.urlencode(data) + url = "%s/%s?%s" % (self.dest, self.path, data) + request = urllib2.Request(url, headers=self.headers) + BaseObject._execute(self, request) diff --git a/mainAPI.py b/mainAPI.py index 59d3eb6..9612a1f 100644 --- a/mainAPI.py +++ b/mainAPI.py @@ -13,11 +13,14 @@ # for streaming from streaminghttp import register_openers +STATUS_LIST = ['ASR', 'ALIGN', 'EDIT', 'UPLOAD', 'PUBLISH', 'TRANSCODE'] +STATUS_CODE = list(enumerate(STATUS_LIST, 1)) # ======================================================= # USAGE # ======================================================= + def usage(): print """ Synopsis: @@ -44,6 +47,8 @@ def usage(): -t, --transcript Transcript for the audio file -s, --service Service that the media belongs to -e, --item_id Item ID for the service + -c, --count Count setting for get_all request (otherwise pagination is limited to 12) + -f, --filter Filter by status -v, --verbose Print out details about the process, handy for debugging -h, --help Print this message ;-) @@ -80,12 +85,14 @@ def main(argv=None): accept = 'text/xml' service = None item_id = None + count = None + status = None # NEED TO SPECIFIY USERNAME AND PASSWORD HERE username = '' password = '' - opts, args = getopt.getopt(argv[1:], "vhi:p:u:m:s:e:t:", ["verbose" ,"help", "uid=", "process_id=", "upload=", "metadata=","service=","item_id=","transcript="]) + opts, args = getopt.getopt(argv[1:], "vhi:p:u:m:s:e:t:f:", ["verbose" ,"help", "uid=", "process_id=", "upload=", "metadata=","service=","item_id=","transcript=","filter="]) for o, a in opts: if o in ("-h","--help"): @@ -101,12 +108,19 @@ def main(argv=None): audioFilename = str(a) elif o in ("-m", "--metadeta"): metadataFilename = str(a) + elif o in ("-t","--transcript"): + transcriptFilename = str(a) elif o in ("-s", "--service"): service = str(a) elif o in ("-e","--item_id"): item_id = str(a) - elif o in ("-t","--transcript"): - transcriptFilename = str(a) + elif o in ("-c","--count"): + count = int(a) + elif o in ("-f","--filter"): + if a in STATUS_LIST: + status = map(lambda x: x[0], filter(lambda x: x[1] == str(a), STATUS_CODE)) + else: + raise Exception("Unrecognised STATUS from %s" % STATUS_LIST) else: print 'Wrong option '+o+'\n' usage() @@ -124,7 +138,9 @@ def main(argv=None): register_openers() # Create an instance of the given as input argument with the provided arguments - inst = globals()[object_type](accept, username, password, uid, process_id, audioFilename, metadataFilename, transcriptFilename, service, item_id) + inst = globals()[object_type](accept, username, password, uid, process_id, + audioFilename, metadataFilename, transcriptFilename, + service, item_id, count, status) # Call the indicated in the input arguments func = getattr(inst, action) @@ -138,5 +154,5 @@ def main(argv=None): print inst.response.read() if __name__=="__main__": - main() + main() From 27452bb9d898dae01f450b65044abf5afbe2972d Mon Sep 17 00:00:00 2001 From: scrappythekangaroo Date: Mon, 14 Oct 2013 14:46:31 +0200 Subject: [PATCH 4/6] Make status code filtering a bit more user friendly --- mainAPI.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mainAPI.py b/mainAPI.py index 9612a1f..b6b6f6e 100644 --- a/mainAPI.py +++ b/mainAPI.py @@ -14,7 +14,7 @@ from streaminghttp import register_openers STATUS_LIST = ['ASR', 'ALIGN', 'EDIT', 'UPLOAD', 'PUBLISH', 'TRANSCODE'] -STATUS_CODE = list(enumerate(STATUS_LIST, 1)) +STATUS_CODE = dict((v,k) for k, v in dict(enumerate(STATUS_LIST, start=1)).iteritems()) # ======================================================= # USAGE @@ -117,8 +117,9 @@ def main(argv=None): elif o in ("-c","--count"): count = int(a) elif o in ("-f","--filter"): - if a in STATUS_LIST: - status = map(lambda x: x[0], filter(lambda x: x[1] == str(a), STATUS_CODE)) + a = a.upper() + if a in STATUS_CODE: + status = STATUS_CODE[a] else: raise Exception("Unrecognised STATUS from %s" % STATUS_LIST) else: From b27665e7c9447c70afcf5768f0a04e02482c5080 Mon Sep 17 00:00:00 2001 From: scrappythekangaroo Date: Tue, 19 Nov 2013 17:27:13 +0100 Subject: [PATCH 5/6] Display progress bar during file upload --- FileProgress.py | 34 ++++++++++++++++++++++++++++++++++ Media.py | 24 ++++++++++++------------ 2 files changed, 46 insertions(+), 12 deletions(-) create mode 100644 FileProgress.py diff --git a/FileProgress.py b/FileProgress.py new file mode 100644 index 0000000..0e8d772 --- /dev/null +++ b/FileProgress.py @@ -0,0 +1,34 @@ +import os, urllib2 +from progressbar import * + +class FileProgress(file): + widgets = ['Progress: ', Percentage(), ' ', Bar(marker=RotatingMarker(),left='[',right=']'), + ' ', ETA(), ' ', FileTransferSpeed()] + + def __init__(self, path, mode): + file.__init__(self, path, mode) + self.seek(0, os.SEEK_END) + self._total = self.tell() + self.seek(0) + + self._seen = 0.0 + self._pbar = ProgressBar(widgets=self.widgets, maxval=100) + self._pbar.start() + + def __len__(self): + return self._total + + def read(self, size): + data = file.read(self, size) + self.update(len(data)) + return data + + def close(self): + file.close(self) + self._pbar.finish() + + def update(self, size): + self._seen += size + pct = (self._seen / self._total) * 100.0 + self._pbar.update(pct) + diff --git a/Media.py b/Media.py index b926807..909349e 100644 --- a/Media.py +++ b/Media.py @@ -1,7 +1,6 @@ import sys, urllib2, urllib from encode import multipart_encode, MultipartParam - def read_file(filename): fp = open(filename, "r") file_content = fp.read() @@ -11,7 +10,7 @@ def read_file(filename): from BaseObject import BaseObject - +from FileProgress import FileProgress class Media(BaseObject): def __init__(self, accept, username="", password="", uid="", process_id="", audioFilename=None, @@ -42,6 +41,7 @@ def create(self): self.datagen = {} if self.audioFilename is not None: + fp = None if 'http' in self.audioFilename: self.path = self.path + "?media=" + urllib.quote(self.audioFilename, safe='') self.datagen = "" # should not be empty dict but empty string! @@ -49,23 +49,23 @@ def create(self): self.datagen, headers_ = multipart_encode({'transcript': read_file(self.transcriptFilename),}) self.headers.update(headers_) else: + fp = FileProgress(self.audioFilename,"rb") if self.metadataFilename is not None: self.datagen, headers_ = multipart_encode({'metadata': read_file(self.metadataFilename), - 'media': open(self.audioFilename, "rb")}) + 'media': fp}) #TODO : allow metadatafilename + transcript for alignment elif self.transcriptFilename is not None: - print >> sys.stderr, "hi" self.datagen, headers_ = multipart_encode({'transcript': read_file(self.transcriptFilename), - 'media': open(self.audioFilename, "rb")}) + 'media': fp}) else: - self.datagen, headers_ = multipart_encode({'media': open(self.audioFilename, "rb")}) + self.datagen, headers_ = multipart_encode({'media': fp}) self.headers.update(headers_) - #print >> sys.stderr, "request headers: ", self.headers - - request = urllib2.Request(self.dest + self.path, data=self.datagen, headers=self.headers) - - BaseObject._execute(self, request) + request = urllib2.Request(self.dest + self.path, data=self.datagen, headers=self.headers) + BaseObject._execute(self, request) + + if fp: + fp.close() @BaseObject._reset_headers def transcribe(self, success_callback_url='', error_callback_url=''): @@ -92,4 +92,4 @@ def unpublish(self): self.datagen = {} request = urllib2.Request(self.dest + self.path + self.uid + self.path_unpublish, data="", headers=self.headers) request.get_method = lambda: 'PUT' - BaseObject._execute(self, request) \ No newline at end of file + BaseObject._execute(self, request) From fb1dce63534484bab865756a91ee73d3e303b534 Mon Sep 17 00:00:00 2001 From: scrappythekangaroo Date: Fri, 24 Jan 2014 12:22:07 +0100 Subject: [PATCH 6/6] added batch scripts --- publish_media.py | 115 ++++++++++++++++++++++++++++++++++++++++++++ transcribe_media.py | 106 ++++++++++++++++++++++++++++++++++++++++ upload_media.py | 109 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 330 insertions(+) create mode 100644 publish_media.py create mode 100644 transcribe_media.py create mode 100644 upload_media.py diff --git a/publish_media.py b/publish_media.py new file mode 100644 index 0000000..f4bcee3 --- /dev/null +++ b/publish_media.py @@ -0,0 +1,115 @@ + +from mainAPI import * + +import json + +# ======================================================= +# USAGE +# ======================================================= + +def usage(): + print """ +Synopsis: + Uses Python API client to do some batch processing, in this + case bulk media transcribe. + +Usage: + python transcribe_media.py [options] + +Options + -s, --service Publish to this service (if you can!) + -c, --count Limit number of media to transcribe in this request + -v, --verbose Print out details about the process, handy for debugging + -h, --help Print this message ;-) + +"""; + sys.exit() + +# ======================================================= +# MAIN +# ======================================================= + + +def main(argv=None): + + if argv is None: + argv = sys.argv + + verbose = False + count = None + service = None + + opts, args = getopt.getopt(argv[1:], "vhc:s:", ["verbose", "help", "count=", "service="]) + + for o, a in opts: + if o in ("-h","--help"): + usage() + elif o in ("-v","--verbose"): + # TODO: the goggles do nothing! + verbose = True + elif o in ("-c", "--count"): + count = int(a) + elif o in ("-s", "--service"): + service = str(a) + else: + print 'Wrong option '+o+'\n' + usage() + + # first get list of uploads + register_openers() + + uid = None + process_id = None + accept='application/json' + audioFilename = None + metadataFilename = None + transcriptFilename = None + item_id = None + username='changeme' + password='changeme' + + object_type = 'Media' + action = 'get_list' + #status = [1] + status = [ STATUS_CODE['ASR'] ] + + inst = globals()[object_type](accept, username, password, uid, process_id, + audioFilename, metadataFilename, transcriptFilename, + service, item_id, count, status) + try: + func = getattr(inst, action) + func() + except urllib2.HTTPError, e: + print >> sys.stderr, "error" + print >> sys.stderr, e + print >> sys.stderr, e.read() + raise e + + media_list = json.loads(inst.response.read()) + + for m in media_list['media'][:count]: + #if True: + if m['progress'] == 100: + #if True: + print m['clientfilename'], m['status'] + + accept = 'text/xml' + audioFilename=None + metadataFilename=None + uid = m['uuid'] + #uid = '98936de9-967a-4b04-a286-807762244fcf' + + action = 'publish' + inst = globals()[object_type](accept, username, password, uid, process_id, audioFilename, metadataFilename, transcriptFilename, service, item_id) + try: + func = getattr(inst, action) + func() + except urllib2.HTTPError, e: + print >> sys.stderr,"error" + print >> sys.stderr,e + print >> sys.stderr,e.read() + + +if __name__=="__main__": + main() + diff --git a/transcribe_media.py b/transcribe_media.py new file mode 100644 index 0000000..add4a17 --- /dev/null +++ b/transcribe_media.py @@ -0,0 +1,106 @@ + +from mainAPI import * + +import json + +# ======================================================= +# USAGE +# ======================================================= + +def usage(): + print """ +Synopsis: + Uses Python API client to do some batch processing, in this + case bulk media transcribe. + +Usage: + python transcribe_media.py [options] + +Options + -c, --count Limit number of media to transcribe in this request + -v, --verbose Print out details about the process, handy for debugging + -h, --help Print this message ;-) + +"""; + sys.exit() + +# ======================================================= +# MAIN +# ======================================================= + +def main(argv=None): + + if argv is None: + argv = sys.argv + + verbose = False + count = None + + opts, args = getopt.getopt(argv[1:], "vhc:", ["verbose", "help", "count="]) + + for o, a in opts: + if o in ("-h","--help"): + usage() + elif o in ("-v","--verbose"): + # TODO: the goggles do nothing! + verbose = True + elif o in ("-c", "--count"): + count = int(a) + else: + print 'Wrong option '+o+'\n' + usage() + + # first get list of uploads + register_openers() + + uid = None + process_id = None + accept='application/json' + audioFilename = None + metadataFilename = None + transcriptFilename = None + service = None + item_id = None + username='changeme' + password='changeme' + + object_type = 'Media' + action = 'get_list' + status = [ STATUS_CODE['TRANSCODE'] ] + + inst = globals()[object_type](accept, username, password, uid, process_id, + audioFilename, metadataFilename, transcriptFilename, + service, item_id, count, status) + try: + func = getattr(inst, action) + func() + except urllib2.HTTPError, e: + print >> sys.stderr, "error" + print >> sys.stderr, e + print >> sys.stderr, e.read() + raise e + + media_list = json.loads(inst.response.read()) + + for m in media_list['media'][:count]: + if m['progress'] == 100: + print m['clientfilename'], m['status'] + + accept = 'text/xml' + audioFilename=None + metadataFilename=None + uid = m['uuid'] + + action = 'transcribe' + inst = globals()[object_type](accept, username, password, uid, process_id, audioFilename, metadataFilename, transcriptFilename, service, item_id) + try: + func = getattr(inst, action) + func() + except urllib2.HTTPError, e: + print >> sys.stderr,"error" + print >> sys.stderr,e + print >> sys.stderr,e.read() + +if __name__=="__main__": + main() + diff --git a/upload_media.py b/upload_media.py new file mode 100644 index 0000000..22e4b6a --- /dev/null +++ b/upload_media.py @@ -0,0 +1,109 @@ +from mainAPI import * + +import json, csv + +# ======================================================= +# USAGE +# ======================================================= + +def usage(): + print """ +Synopsis: + Uses Python API client to do some batch processing, in this + case bulk media transcribe. + +Usage: + python transcribe_media.py [options] + +Arguments: + List of file paths / URLs to be uploaded + (list may be CSV of URL,service,item_id) + +Options: + -b, --begin Start downloading from line number + -c, --count Limit number of media to transcribe in this request + -s, --service Service from which the upload is being made + -v, --verbose Print out details about the process, handy for debugging + -h, --help Print this message ;-) + +"""; + sys.exit() + +# ======================================================= +# MAIN +# ======================================================= + +def main(argv=None): + + if argv is None: + argv = sys.argv + + verbose = False + count = None + begin = 0 + service = None + + opts, args = getopt.getopt(argv[1:], "vhc:s:b:", ["verbose", "help", "count=", "begin=", "service="]) + + for o, a in opts: + if o in ("-h","--help"): + usage() + elif o in ("-v","--verbose"): + # TODO: the goggles do nothing! + verbose = True + elif o in ("-c", "--count"): + count = int(a) + elif o in ("-b", "--begin"): + begin = int(a) + elif o in ("-s", "--service"): + service = str(a) + else: + print 'Wrong option '+o+'\n' + usage() + + if len(args) != 1: + print 'You need to provide a file list!' + usage() + + filelistFile = args[0] + filelist = [] + with open(filelistFile) as csvfile: + csvreader = csv.reader(csvfile) + for row in csvreader: + filelist.append(tuple(row)) + + # first get list of uploads + register_openers() + + uid = None + process_id = None + accept = 'text/xml' + metadataFilename = None + transcriptFilename = None + username='changeme' + password='changeme' + + object_type = 'Media' + action = 'create' + + for f in filelist[begin:begin+count] if count else filelist[begin:]: + print "Uploading: %s" % f[0] + + audioFilename = f[0] + try: + item_id = f[1] + except IndexError: + item_id = None + service = None + + inst = globals()[object_type](accept, username, password, uid, process_id, audioFilename, metadataFilename, transcriptFilename, service, item_id) + try: + func = getattr(inst, action) + func() + except urllib2.HTTPError, e: + print >> sys.stderr,"error" + print >> sys.stderr,e + print >> sys.stderr,e.read() + +if __name__=="__main__": + main()