diff --git a/misp_modules/modules/expansion/__init__.py b/misp_modules/modules/expansion/__init__.py index 836d54a8..32b9713e 100644 --- a/misp_modules/modules/expansion/__init__.py +++ b/misp_modules/modules/expansion/__init__.py @@ -18,7 +18,7 @@ 'virustotal_public', 'apiosintds', 'urlscan', 'securitytrails', 'apivoid', 'assemblyline_submit', 'assemblyline_query', 'ransomcoindb', 'malwarebazaar', 'lastline_query', 'lastline_submit', 'sophoslabs_intelix', 'cytomic_orion', 'censys_enrich', - 'trustar_enrich', 'recordedfuture', 'html_to_markdown'] + 'trustar_enrich', 'recordedfuture', 'html_to_markdown', 'source_confidence'] minimum_required_fields = ('type', 'uuid', 'value') diff --git a/misp_modules/modules/expansion/source_confidence.py b/misp_modules/modules/expansion/source_confidence.py new file mode 100644 index 00000000..8e172499 --- /dev/null +++ b/misp_modules/modules/expansion/source_confidence.py @@ -0,0 +1,236 @@ + +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +import os +import json +import time +import logging +# from pymisp import MISPObject +from pymisp import PyMISP +# from pymisp import MISPEvent, MISPTag, MISPAttribute +from . import check_input_attribute, checking_error, standard_error_message + + +moduleinfo = { + 'version': '0.0.1', + 'author': 'HAWK.IO (Tim Shelton)', + 'description': 'Module to calculate overall score using source confidence along side time-related degradation.', + 'module-type': ['expansion', 'hover'] +} + +moduleconfig = [ 'degrade_hours', 'degrade_delta', 'confidence_json', 'misp_url', 'misp_authkey' ] + +misperrors = {'error': 'Error'} + +ATTRIBUTES = [ 'ip', 'ip-src', 'ip-dst', 'ip-src|port', 'ip-dst|port', 'url', 'uri', 'domain', 'domain|ip', 'hostname', 'hostname|ip', 'email-dst', 'email-src', 'sha1', 'md5', 'sha256', 'filename|sha1', 'filename|md5', 'filename|sha256', 'regkey|value', 'regkey' ] + +mispattributes = { + 'input': ATTRIBUTES, + 'output': ATTRIBUTES + ['email-src', 'text'], + 'format': 'misp_standard' +} + +LOGGER = logging.getLogger('source_confidence') +LOGGER.setLevel(logging.INFO) + + + +def init(misp_url, misp_key, misp_verifycert, proxies): + return PyMISP(misp_url, misp_key, ssl=misp_verifycert, debug=False, proxies=proxies) + + +def get_timestamp_from_attribute(attribute): + current_timestamp = attribute['last_seen'] + if not current_timestamp: + current_timestamp = attribute['first_seen'] + if not current_timestamp: + current_timestamp = attribute['timestamp'] + + return int(current_timestamp) + +def riskscore_color(risk_score: int) -> str: + """Returns appropriate hex-colors according to risk score.""" + risk_score = int(risk_score) + if risk_score < 25: + return '#CCCCCC' + elif risk_score < 65: + return '#FFCE00' + else: + return '#CF0A2C' + + +def parse_result(attribute, score): + """ + event = MISPEvent() + initial_attribute = MISPAttribute() + initial_attribute.from_dict(**attribute) + event.add_attribute(**initial_attribute) + print(score) + tag_name = f'source-confidence:confidence-score="{score}"' + tag = MISPTag() + tag_properties = {'name': tag_name} + tag_properties['colour'] = riskscore_color( int(score) ) + tag.from_dict(**tag_properties) + initial_attribute.add_tag(tag) + + event = json.loads(event.to_json()) + """ + event = { } + event['types'] = mispattributes['output'], + event['values'] = [ "Confidence score: %.2f%%" % score] + print(event) + return [ event ] + + +def handler(q=False): + """Handle enrichment.""" + if q is False: + return False + request = json.loads(q) + + # print(request) + + if not request.get('attribute') or not check_input_attribute(request['attribute'], requirements=('type', 'value')): + return {'error': f'{standard_error_message}, {checking_error}.'} + if request['attribute']['type'] not in mispattributes['input']: + return {'error': 'Unsupported attribute type.'} + + input_attribute = request.get('attribute') + # print("Attribute: ", input_attribute) + + config = request.get('config') + + if config and config.get('misp_url'): + misp_url = config.get('misp_url') + else: + misperrors['error'] = 'Missing base MISP URL.' + return misperrors + + if config and config.get('misp_authkey'): + misp_key = config.get('misp_authkey') + else: + misperrors['error'] = 'Missing MISP admin authkey.' + return misperrors + + # doesnt verify ssl and no proxy support for now + misp = init(misp_url, misp_key, False, { } ) + + if config and config.get('confidence_json'): + weights_file = config.get('confidence_json') + else: + weights_file = '/var/tmp/misp-source-confidence.json' + + weights = { } + if not os.path.isfile(weights_file): + misperrors['error'] = 'Missing confidence json file, has the background job completed yet?' + return misperrors + + with open(weights_file, 'r') as f: + try: + weights = json.loads( f.read() ) + except Exception as e: + misperrors['error'] = 'Failed to load confidence json file, file is not json.' + return misperrors + + + # other values are 1.0, 2.0 and 4.0 + if config and config.get('degrade_hours'): + degrading_hours = float(config.get('degrade_hours')) + else: + degrading_hours = 30 * 24 # 30 days by default. + + if config and config.get('degrade_delta'): + degrading_line = float(config.get('degrade_delta')) + else: + degrading_line = 0.5 + + + # look up all organizations that match this attribute + + # results = misp.search(quick_filter=input_attribute['value']) + results = misp.search(value=input_attribute['value']) + + total_score = 0.0 + confidence = 0.0 + + r = {"results": []} + + current_time = time.time() + + # print("%r total events match this attribute." % len(results)) + for event in results: + # get orgc id + org = event['Event']['orgc_id'] + + if not org in weights: + misperrors['error'] = "Missing org id in confidence table: %s." % org + print(misperrors) + return misperrors + + table = weights[org] + # find our attribute + attribute = None + + for a in event['Event']['Attribute']: + if a['value'] == input_attribute['value']: + attribute = a + break + + if not attribute: + for object in event['Event']['Object']: + for a in object['Attribute']: + if a['value'] == input_attribute['value']: + attribute = a + break + if attribute: + break + + if not attribute: + misperrors['error'] = "No attribute found to match, must be a mistake?" + print(misperrors) + print(json.dumps(event['Event'])) + # return misperrors + continue + + # calculate score using source score #1 + + # broke it out into smaller steps for easier understanding + time_delta = current_time - get_timestamp_from_attribute(attribute) + time_delta = time_delta / ( degrading_hours * 3600 ) + time_delta = time_delta ** ( 1 / degrading_line ) + + score = table['scs'] * max(0, 1.0 - time_delta ) + # print("Score: ", score) + # print("Table: ", table['scs']) + + total_score += score + confidence += table['scs'] + + if confidence > 0: + final_score = ( total_score / confidence) * 100.0 # make it a pct + # print("Final score: %.2f" % final_score) + + + r = {'results': parse_result(input_attribute, final_score)} + + else: + misperrors['error'] = "Unable to find value in MISP for: %s" % input_attribute['value'] + print(misperrors) + print(json.dumps(results)) + return misperrors + + + return r + +def introspection(): + """Returns a dict of the supported attributes.""" + return mispattributes + + +def version(): + """Returns a dict with the version and the associated meta-data + including potential configurations required of the module.""" + moduleinfo['config'] = moduleconfig + return moduleinfo + diff --git a/tools/misp-builddb.py b/tools/misp-builddb.py new file mode 100755 index 00000000..ed24d68e --- /dev/null +++ b/tools/misp-builddb.py @@ -0,0 +1,538 @@ +#!/usr/bin/python3 + +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +import os +import re +import sys +from multiprocessing import Pool +import requests +import argparse +import string +import json + +import logging +import logging.handlers +import datetime + +import pymisp +from pymisp import MISPObject +from pymisp import PyMISP +from pymisp import MISPEvent + + +# testing +# SKIP_ORG_ID=["27"] +SKIP_ORG_ID=[] + +THREADS = 4 +log = None + +if sys.version_info >= (3, 6, 0): + from pymisp import ExpandedPyMISP + +def splash(): + print ('MISP Build Confidence Weights') + +def init(misp_url, misp_key, misp_verifycert, proxies): + return PyMISP(misp_url, misp_key, ssl=misp_verifycert, debug=False, proxies=proxies) + + +def get_logger(name=None): + root_logger_name = 'misp-confidence' + + # Build the name of the sub-logger + if name: + name = root_logger_name + '.' + name + else: + name = root_logger_name + + root_logger = logging.getLogger(root_logger_name) + + # If the root logger has no handlers, add them + # in any case return the sub-logger + if root_logger.handlers: + return logging.getLogger(name) + else: + hdlr = logging.handlers.WatchedFileHandler( + "misp-confidence.log") + myAlt = AltFormatter() + hdlr.setFormatter(myAlt) + root_logger.addHandler(hdlr) + root_logger.setLevel(logging.DEBUG) # todo: make it configurable + + return logging.getLogger(name) + + +class AltFormatter(logging.Formatter): + + def __init__(self, msgfmt=None, datefmt=None): + logging.Formatter.__init__(self, None, "%H:%M:%S") + + def format(self, record): + self.converter = datetime.datetime.fromtimestamp + ct = self.converter(record.created) + asctime = ct.strftime("%Y-%m-%d %H:%M:%S") + msg = record.getMessage() + name = record.name + if (record.levelno == logging.CRITICAL) or (record.levelno == logging.ERROR): + record.levelname = "[E]" + if (record.levelno == logging.WARNING): + record.levelname = "[W]" + if (record.levelno == logging.INFO): + record.levelname = "[I]" + if (record.levelno == logging.DEBUG): + record.levelname = "[D]" + return '%(timestamp)s: %(levelname)s %(message)s' % {'timestamp': asctime, 'levelname': record.levelname, 'message': msg} + + +def get_timestamp_from_attribute(attribute): + current_timestamp = attribute['last_seen'] + if not current_timestamp: + current_timestamp = attribute['first_seen'] + if not current_timestamp: + current_timestamp = attribute['timestamp'] + + return current_timestamp + +def process_org(misp, org, current_org, total_orgs, period): + # 1 org at a time :) + + results = None + retry = 5 + while retry > 0: + try: + # results = misp.search(return_format='json', org=org['Organisation']['id'], include_sightings=1) + results = misp.search(return_format='json', org=org['Organisation']['id']) + break + except Exception as e: + retry -= 1 + + if not results: + log.error("Org %s unable to be populated." % org['Organisation']['id']) + org_stats = { } + org_stats[ org['Organisation']['id'] ] = { + 'sce_s' : 0.0, + 'scr_s' : 0.0, + 'ioc_unique' : 0, + 'ioc_total' : 0, + 'scw_s' : 0.0 + } + return org_stats + + sces_ioc_stat = 0.0 + scrs_ioc_stat = 0.0 + ioc_counter = 0 + unique_ioc_counter = 0 + log.info("Total %r events for org id %s (%r/%r)" % ( len(results), str(org['Organisation']['id']), current_org, total_orgs)) + for result in results: + + if len(result['Event']['Attribute']) == 0: + log.warning("Event id %s has no attributes, is this correct?! %s" % ( result['Event']['id'], misp_url + "/events/view/" + result['Event']['id']) ) + with open("cache/%s.json" % result['Event']['id'], 'w') as f: + json.dump(result, f) + continue + + related_events = [ ] + if 'RelatedEvent' in result['Event']: + for e in result['Event']['RelatedEvent']: + # print("Fetching related event: %s" % e['Event']['id']) + + # check cache!! + if not os.path.isfile("cache/%s.json" % e['Event']['id']): + retry = 5 + while retry > 0: + retry -= 1 + try: + related_event = misp.get_event(e['Event']['id']) + break + except: + log.warning("Unable to fetch event details for relative even it #%s, retries left: %d" % (e['Event']['id'], retry)) + time.sleep(1) + continue + with open("cache/%s.json" % e['Event']['id'], 'w') as f: + json.dump(related_event, f) + else: + with open("cache/%s.json" % e['Event']['id'], 'r') as f: + try: + related_event = json.load(f) + except: + related_event = None + + if not related_event: + try: + os.unlink("cache/%s.json" % e['Event']['id']) + except: + pass + retry = 5 + while retry > 0: + retry -= 1 + try: + related_event = misp.get_event(e['Event']['id']) + break + except: + time.sleep(1) + + # failed to fetch + if not related_event: + continue + + with open("cache/%s.json" % e['Event']['id'], 'w') as f: + json.dump(related_event, f) + + related_events.append( related_event ) + # print("Total %r related events" % ( len(related_events) ) ) + + + # Process Objects Also! + + for attribute in result['Event']['Attribute']: + if attribute['to_ids'] and attribute['type'] in [ 'ip-src', 'ip-dst', 'ip-src|port', 'ip-dst|port', 'url', 'domain', 'domain|ip', 'hostname|ip', 'email-dst', 'email-src', 'sha1', 'md5', 'sha256', 'filename|sha1', 'filename|md5', 'filename|sha256', 'regkey|value', 'regkey' ]: + # print("Processing attribute type: %s" % attribute['type']) + if '|' in attribute['type']: + (name1, name2) = attribute['type'].split('|') + (value1, value2) = attribute['value'].split('|') + if attribute['type'][0:8] == 'filename': + tmp = value2 + value2 = value1 + value1 = tmp + else: + name1 = attribute['type'] + name2 = None + value1 = attribute['value'] + value2 = None + # print("VAL: ", value1) + # print("VAL2: ", value2) + + """ + Timestamps of last sightings of the IoCs. + Number of sightings per IoC. - subbing with first seen + Description of threats related to the IoCs. + confidence score for the IoCs provided by the intelligence feed itself - subbed since doesnt exist in any of our examples currently (nothing to reference) comment instead + """ + + n = 0 + if 'last_seen' in attribute and attribute['last_seen']: + n += 1 + elif 'first_seen' in attribute and attribute['first_seen']: + n += 1 + + for artifact in result['Event']['Attribute']: + if artifact['type'] == 'link': + # print("FOUND LINK") + n += 1 + break + #if any(artifact['type'] == 'link' in artifact for artifact in result['Event']['Attribute']): + # n += 1 + + for artifact in result['Event']['Attribute']: + if artifact['type'] == 'detection-ratio': + # print("FOUND detection-ratio") + n += 1 + break + #if any(artifact['type'] == 'detection-ratio' in artifact for artifact in result['Event']['Attribute']): + # n += 1 + + for artifact in result['Event']['Attribute']: + if artifact['type'] == 'comment': + # print("FOUND comment") + n += 1 + break + #if any(artifact['type'] == 'comment' in artifact for artifact in result['Event']['Attribute']): + # n += 1 + + sces_ioc_stat += ( n / 4) + + ioc_counter += 1 + + # for each related event id, lookup if our current attribute val1 and 2 is present + current_timestamp = int(get_timestamp_from_attribute(attribute)) + min_timestamp = None + attribute_found = False + for related_event in related_events: + if str(related_event['Event']['Orgc']['id']) in SKIP_ORG_ID: + continue + remote_attribute = next((sub for sub in related_event['Event']['Attribute'] if sub['type'] == name1 and sub['value'] == value1), None) + if not remote_attribute: + for object in related_event['Event']['Object']: + remote_attribute = next((sub for sub in object['Attribute'] if sub['type'] == name1 and sub['value'] == value1), None) + if remote_attribute: + break + + + if remote_attribute: + attribute_found = True + other_timestamp = get_timestamp_from_attribute(remote_attribute) + + if other_timestamp: + if not min_timestamp: + min_timestamp = int(other_timestamp) + else: + min_timestamp = min( int(other_timestamp), min_timestamp ) + + if len(related_events) == 0 or not attribute_found: + unique_ioc_counter += 1 + + if not min_timestamp: + min_timestamp = current_timestamp + v = ( ( int(min_timestamp) - int(current_timestamp) ) + period ) / period + scrs_ioc_stat += v + + for object in result['Event']['Object']: + for attribute in object['Attribute']: + if attribute['to_ids'] and attribute['type'] in [ 'ip-src', 'ip-dst', 'ip-src|port', 'ip-dst|port', 'url', 'domain', 'domain|ip', 'hostname|ip', 'email-dst', 'email-src', 'sha1', 'md5', 'sha256', 'filename|sha1', 'filename|md5', 'filename|sha256', 'regkey|value', 'regkey' ]: + # print("Processing attribute type: %s" % attribute['type']) + if '|' in attribute['type']: + (name1, name2) = attribute['type'].split('|') + (value1, value2) = attribute['value'].split('|') + if attribute['type'][0:8] == 'filename': + tmp = value2 + value2 = value1 + value1 = tmp + else: + name1 = attribute['type'] + name2 = None + value1 = attribute['value'] + value2 = None + # print("VAL: ", value1) + # print("VAL2: ", value2) + + """ + Timestamps of last sightings of the IoCs. + Number of sightings per IoC. - subbing with first seen + Description of threats related to the IoCs. + confidence score for the IoCs provided by the intelligence feed itself - subbed since doesnt exist in any of our examples currently (nothing to reference) comment instead + """ + + n = 0 + if 'last_seen' in attribute and attribute['last_seen']: + n += 1 + elif 'first_seen' in attribute and attribute['first_seen']: + n += 1 + + for artifact in result['Event']['Attribute']: + if artifact['type'] == 'link': + # print("FOUND LINK") + n += 1 + break + #if any(artifact['type'] == 'link' in artifact for artifact in result['Event']['Attribute']): + # n += 1 + + for artifact in result['Event']['Attribute']: + if artifact['type'] == 'detection-ratio': + # print("FOUND detection-ratio") + n += 1 + break + #if any(artifact['type'] == 'detection-ratio' in artifact for artifact in result['Event']['Attribute']): + # n += 1 + + for artifact in result['Event']['Attribute']: + if artifact['type'] == 'comment': + # print("FOUND comment") + n += 1 + break + #if any(artifact['type'] == 'comment' in artifact for artifact in result['Event']['Attribute']): + # n += 1 + + sces_ioc_stat += ( n / 4) + + ioc_counter += 1 + + # for each related event id, lookup if our current attribute val1 and 2 is present + current_timestamp = int(get_timestamp_from_attribute(attribute)) + min_timestamp = None + attribute_found = False + for related_event in related_events: + if str(related_event['Event']['Orgc']['id']) in SKIP_ORG_ID: + continue + remote_attribute = next((sub for sub in related_event['Event']['Attribute'] if sub['type'] == name1 and sub['value'] == value1), None) + if not remote_attribute: + for remote_object in related_event['Event']['Object']: + remote_attribute = next((sub for sub in remote_object['Attribute'] if sub['type'] == name1 and sub['value'] == value1), None) + if remote_attribute: + break + + + if remote_attribute: + attribute_found = True + other_timestamp = get_timestamp_from_attribute(remote_attribute) + + if other_timestamp: + if not min_timestamp: + min_timestamp = int(other_timestamp) + else: + min_timestamp = min( int(other_timestamp), min_timestamp ) + + if len(related_events) == 0 or not attribute_found: + unique_ioc_counter += 1 + + if not min_timestamp: + min_timestamp = current_timestamp + v = ( ( int(min_timestamp) - int(current_timestamp) ) + period ) / period + scrs_ioc_stat += v + with open("cache/%s.json" % result['Event']['id'], 'w') as f: + json.dump(result, f) + + # print("Total iocs in event %s: %r" % ( result['Event']['id'], ioc_counter)) + if ioc_counter == 0: + log.info("Org: %s Empty Event Id: %s" % (str(org['Organisation']['id']), result['Event']['id'])) + # log.debug(json.dumps(result)) + + log.info("Total iocs for org %s (%r/%r): %r" % ( int(org['Organisation']['id']), current_org, total_orgs, ioc_counter)) + total_iocs_in_feed = ioc_counter + if ioc_counter > 0: + SCEs = 1.0 / ioc_counter + log.debug("Org: %s SCEs upper: %.f" % (str(org['Organisation']['id']), SCEs)) + log.debug("Org: %s SCEs stat: %.f" % (str(org['Organisation']['id']), sces_ioc_stat)) + else: + log.warning("FAILED ON ORG, NO IOCS: %s" % str(org['Organisation']['id'])) + SCEs = 0 + + org_stats = { } + org_stats[ org['Organisation']['id'] ] = { + 'sce_s' : SCEs * sces_ioc_stat, + 'scr_s' : SCEs * scrs_ioc_stat, + 'ioc_unique' : unique_ioc_counter, + 'ioc_total' : ioc_counter, + 'scw_s' : 0 + } + + log.debug(json.dumps(org_stats[ org['Organisation']['id'] ])) + return org_stats + + + +if __name__ == '__main__': + + log = get_logger() + splash() + parser = argparse.ArgumentParser() + parser.add_argument("-c", "--clear", help="Clear local cache before processing.", action='store_true') + parser.add_argument("-t", "--time", help="Number of hours before data is stale.") + parser.add_argument("-o", "--output", help="Output file to store weights json.") + parser.add_argument("-w", "--workers", help="Number of workers in pool, default is 4.") + parser.add_argument("-i", "--disablessl", help="Disable ssl checks.", action='store_true') + parser.add_argument("-m", "--mispurl", help="Base MISP URL.") + parser.add_argument("-k", "--mispauthkey", help="MISP Authkey") + + args = parser.parse_args() + + if not args.mispurl: + print("No --mispurl provided, failling") + sys.exit(1) + + if not args.mispauthkey: + print("No --mispauthkey provided, failling") + sys.exit(1) + + misp_url = args.mispurl + misp_key = args.mispauthkey + + if args.workers: + THREADS = int(args.workers) + + if args.time: + hours = int(args.time) + else: + hours = 14*24 + + output_file = '/var/tmp/misp-source-confidence.json' + if args.output: + output_file = args.output + + # make/clear our cache + if not os.path.isdir("cache"): + os.mkdir("cache") + if args.clear: + filelist = [ f for f in os.listdir("cache") if f.endswith(".json") ] + for f in filelist: + os.remove(os.path.join("cache", f)) + + # no proxy support built in, sorry, add me later + misp = init(misp_url, misp_key, not args.disablessl, { }) + + orgs = misp.organisations(scope='all') # or maybe just 'external' + + # xxx: implement allow list detection + + period = hours * (3600) + + org_stats_extensiveness = { } + + total_orgs = len(orgs) + current_org = 0 + total_iocs = 0 + unique_ioc_counter = 0 + + pool = Pool(THREADS) + results = [] + + for org in orgs: + current_org += 1 + if not str(org['Organisation']['id']) in SKIP_ORG_ID: + results.append(pool.apply_async(process_org, args=(misp, org, current_org, total_orgs, period))) + else: + log.info("Skipping organization: %s" % str(org['Organisation']['id']) ) + + # for testing only + #if current_org >= 5: + # break + + pool.close() + pool.join() + results = [r.get() for r in results] + + for x in results: + org_stats_extensiveness.update(x) + + # print(org_stats_extensiveness) + for org in org_stats_extensiveness.keys(): + unique_ioc_counter += org_stats_extensiveness[org]['ioc_unique'] + total_iocs += org_stats_extensiveness[org]['ioc_total'] + + + # FINISH CALCULATED scc_s + for org in org_stats_extensiveness.keys(): + if unique_ioc_counter > 0: + org_stats_extensiveness[org]['scc_s'] = org_stats_extensiveness[org]['ioc_total'] / unique_ioc_counter + else: + org_stats_extensiveness[org]['scc_s'] = 0 + + + weight_SCE = 1 + weight_SCR = 1 + weight_SCC = 1 + weight_SCW = 1 + + # total score confidence for org + org_stats_extensiveness[org]['scs'] = ( ( weight_SCE * org_stats_extensiveness[org]['sce_s'] ) + ( weight_SCR * org_stats_extensiveness[org]['scr_s'] ) + \ + ( weight_SCC * org_stats_extensiveness[org]['scc_s'] ) + ( weight_SCW * org_stats_extensiveness[org]['scw_s'] ) / ( weight_SCE + weight_SCR + weight_SCC + weight_SCW ) ) + + weight_SCE = 1 + weight_SCR = 1 + weight_SCC = 0 + weight_SCW = 1 + + # total score confidence for org + org_stats_extensiveness[org]['scs0'] = ( ( weight_SCE * org_stats_extensiveness[org]['sce_s'] ) + ( weight_SCR * org_stats_extensiveness[org]['scr_s'] ) + \ + ( weight_SCC * org_stats_extensiveness[org]['scc_s'] ) + ( weight_SCW * org_stats_extensiveness[org]['scw_s'] ) / ( weight_SCE + weight_SCR + weight_SCC + weight_SCW ) ) + + + weight_SCE = 0.8 + weight_SCR = 0.6 + weight_SCC = 0 + weight_SCW = 1 + + # total score confidence for org + org_stats_extensiveness[org]['scs3'] = ( ( weight_SCE * org_stats_extensiveness[org]['sce_s'] ) + ( weight_SCR * org_stats_extensiveness[org]['scr_s'] ) + \ + ( weight_SCC * org_stats_extensiveness[org]['scc_s'] ) + ( weight_SCW * org_stats_extensiveness[org]['scw_s'] ) / ( weight_SCE + weight_SCR + weight_SCC + weight_SCW ) ) + + print("GOLDEN TABLE") + print(json.dumps(org_stats_extensiveness)) + log.info("GOLDEN TABLE") + log.info(json.dumps(org_stats_extensiveness)) + + with open(output_file, 'w') as f: + f.write(json.dumps(org_stats_extensiveness)) + diff --git a/tools/misp-source-confidence.json.example b/tools/misp-source-confidence.json.example new file mode 100644 index 00000000..b0e9daea --- /dev/null +++ b/tools/misp-source-confidence.json.example @@ -0,0 +1 @@ +{"21": {"sce_s": 0.25, "scr_s": 1.0, "ioc_unique": 48, "ioc_total": 48, "scw_s": 0, "scc_s": 0.00010606164391128828, "scs": 1.2501060616439112, "scs0": 1.25, "scs3": 0.8}, "17": {"sce_s": 0.0, "scr_s": 1.0, "ioc_unique": 5, "ioc_total": 5, "scw_s": 0, "scc_s": 1.1048087907425862e-05, "scs": 1.0000110480879074, "scs0": 1.0, "scs3": 0.6}, "2": {"sce_s": 0.27933736167559853, "scr_s": 1.0260220436333003, "ioc_unique": 76036, "ioc_total": 83409, "scw_s": 0, "scc_s": 0.18430199285409674, "scs": 1.4896613981629956, "scs0": 1.3053594053088988, "scs3": 0.839083115520459}, "23": {"sce_s": 0.0, "scr_s": 1.0, "ioc_unique": 43, "ioc_total": 43, "scw_s": 0, "scc_s": 9.501355600386241e-05, "scs": 1.0000950135560038, "scs0": 1.0, "scs3": 0.6}, "22": {"sce_s": 0.25, "scr_s": 0.9914130397215792, "ioc_unique": 27, "ioc_total": 32, "scw_s": 0, "scc_s": 7.070776260752552e-05, "scs": 1.2414837474841867, "scs0": 1.2414130397215792, "scs3": 0.7948478238329475}, "19": {"sce_s": 0.25, "scr_s": 1.3987211271058384, "ioc_unique": 61, "ioc_total": 64, "scw_s": 0, "scc_s": 0.00014141552521505104, "scs": 1.6488625426310535, "scs0": 1.6487211271058384, "scs3": 1.0392326762635031}, "18": {"sce_s": 0.25, "scr_s": 1.0, "ioc_unique": 21, "ioc_total": 21, "scw_s": 0, "scc_s": 4.640196921118862e-05, "scs": 1.2500464019692112, "scs0": 1.25, "scs3": 0.8}, "4": {"sce_s": 0.2690683164499281, "scr_s": 1.2227891352625337, "ioc_unique": 50983, "ioc_total": 57058, "scw_s": 0, "scc_s": 0.12607635996438096, "scs": 1.617933811676843, "scs0": 1.4918574517124619, "scs3": 0.9489281343174627}, "24": {"sce_s": 0.1782181666202285, "scr_s": 0.9524808595196754, "ioc_unique": 4995, "ioc_total": 7178, "scw_s": 0, "scc_s": 0.015860634999900567, "scs": 1.1465596611398046, "scs0": 1.130699026139904, "scs3": 0.714063049007988}, "16": {"sce_s": 0.020833333333333332, "scr_s": 1.0012836417073903, "ioc_unique": 239, "ioc_total": 240, "scw_s": 0, "scc_s": 0.0005303082195564413, "scs": 1.02264728326028, "scs0": 1.0221169750407235, "scs3": 0.6174368516911009}, "14": {"sce_s": 0.0, "scr_s": 1.0, "ioc_unique": 29, "ioc_total": 29, "scw_s": 0, "scc_s": 6.407890986307e-05, "scs": 1.0000640789098632, "scs0": 1.0, "scs3": 0.6}, "13": {"sce_s": 0.25, "scr_s": 1.00572771279453, "ioc_unique": 73, "ioc_total": 113, "scw_s": 0, "scc_s": 0.0002496867867078245, "scs": 1.255977399581238, "scs0": 1.25572771279453, "scs3": 0.803436627676718}, "1": {"sce_s": 0.0, "scr_s": 0.9357453224935697, "ioc_unique": 312852, "ioc_total": 317391, "scw_s": 0, "scc_s": 0.7013127338051603, "scs": 1.63705805629873, "scs0": 0.9357453224935697, "scs3": 0.5614471934961418}, "20": {"sce_s": 0.25, "scr_s": 1.001592185561758, "ioc_unique": 205, "ioc_total": 209, "scw_s": 0, "scc_s": 0.000461810074530401, "scs": 1.2520539956362884, "scs0": 1.251592185561758, "scs3": 0.8009553113370549}, "15": {"sce_s": 0.5, "scr_s": 0.8215332279365718, "ioc_unique": 20, "ioc_total": 46, "scw_s": 0, "scc_s": 0.00010164240874831793, "scs": 1.3216348703453202, "scs0": 1.321533227936572, "scs3": 0.8929199367619431}, "11": {"sce_s": 0.26479289940828404, "scr_s": 1.0005409007688657, "ioc_unique": 159, "ioc_total": 169, "scw_s": 0, "scc_s": 0.00037342537127099413, "scs": 1.2657072255484207, "scs0": 1.2653338001771497, "scs3": 0.8121588599879467}, "8": {"sce_s": 0.09859154929577466, "scr_s": 1.0, "ioc_unique": 71, "ioc_total": 71, "scw_s": 0, "scc_s": 0.00015688284828544724, "scs": 1.0987484321440601, "scs0": 1.0985915492957747, "scs3": 0.6788732394366197}, "9": {"sce_s": 0.25, "scr_s": 1.0, "ioc_unique": 50, "ioc_total": 50, "scw_s": 0, "scc_s": 0.00011048087907425862, "scs": 1.2501104808790742, "scs0": 1.25, "scs3": 0.8}, "5": {"sce_s": 0.265, "scr_s": 1.0012463972479424, "ioc_unique": 15, "ioc_total": 200, "scw_s": 0, "scc_s": 0.00044192351629703447, "scs": 1.2666883207642394, "scs0": 1.2662463972479423, "scs3": 0.8127478383487654}, "6": {"sce_s": 0.054411999361736074, "scr_s": 0.9823285804279034, "ioc_unique": 6116, "ioc_total": 6267, "scw_s": 0, "scc_s": 0.013847673383167575, "scs": 1.050588253172807, "scs0": 1.0367405797896394, "scs3": 0.6329267477461309}, "10": {"sce_s": 0.24999999999999997, "scr_s": 0.9866254218866661, "ioc_unique": 231, "ioc_total": 237, "scw_s": 0, "scc_s": 0.0005236793668119858, "scs": 1.237149101253478, "scs0": 1.2366254218866661, "scs3": 0.7919752531319996}, "7": {"sce_s": 0.07971014492753624, "scr_s": 1.0, "ioc_unique": 69, "ioc_total": 69, "scw_s": 0, "scc_s": 0.0001524636131224769, "scs": 1.0798626085406586, "scs0": 1.0797101449275361, "scs3": 0.663768115942029}, "12": {"sce_s": 0.20918367346938774, "scr_s": 0.9999999999999999, "ioc_unique": 49, "ioc_total": 49, "scw_s": 0, "scc_s": 0.00010827126149277345, "scs": 1.2092919447308805, "scs0": 1.2091836734693877, "scs3": 0.7673469387755101}, "3": {"sce_s": 0.21685082872928177, "scr_s": 0.9519900916973605, "ioc_unique": 170, "ioc_total": 181, "scw_s": 0, "scc_s": 0.0003999407822488162, "scs": 1.1692408612088911, "scs0": 1.1688409204266423, "scs3": 0.7446747180018417}}