|
| 1 | +#!/usr/local/bin/python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +# Copyright © 2019 The vt-py authors. All Rights Reserved. |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +# you may not use this file except in compliance with the License. |
| 6 | +# You may obtain a copy of the License at |
| 7 | +# |
| 8 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +# |
| 10 | +# Unless required by applicable law or agreed to in writing, software |
| 11 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +# See the License for the specific language governing permissions and |
| 14 | +# limitations under the License. |
| 15 | + |
| 16 | +"""VT Intelligence searches to network IoCs. |
| 17 | +
|
| 18 | +This is a script to showcase how programmatic VT Intelligence searches can be |
| 19 | +combined with file sandbox behaviour lookups in order to generate network |
| 20 | +indicators of compromise that can be fed into network perimeter defenses. |
| 21 | +
|
| 22 | +Read more: |
| 23 | +https://www.virustotal.com/gui/intelligence-overview |
| 24 | +https://developers.virustotal.com/v3.0/reference#intelligence-search |
| 25 | +https://support.virustotal.com/hc/en-us/articles/360001387057-VirusTotal-Intelligence-Introduction |
| 26 | +""" |
| 27 | + |
| 28 | +import argparse |
| 29 | +import asyncio |
| 30 | +from collections import defaultdict |
| 31 | + |
| 32 | +import vt |
| 33 | + |
| 34 | + |
| 35 | +class VTISearchToNetworkInfrastructureHandler: |
| 36 | + """Class for handling the process of analysing VTI search matches.""" |
| 37 | + |
| 38 | + def __init__(self, apikey): |
| 39 | + self.apikey = apikey |
| 40 | + self.queue = asyncio.Queue() |
| 41 | + self.files_queue = asyncio.Queue() |
| 42 | + |
| 43 | + self.networking_counters = { |
| 44 | + 'domains': defaultdict(lambda: 0), |
| 45 | + 'ips': defaultdict(lambda: 0), |
| 46 | + 'urls': defaultdict(lambda: 0)} |
| 47 | + self.networking_infrastructure = defaultdict( |
| 48 | + lambda: defaultdict(lambda: {})) |
| 49 | + |
| 50 | + async def get_file_async(self, checksum, relationships=None): |
| 51 | + """Look up a file object.""" |
| 52 | + url = '/files/{}' |
| 53 | + async with vt.Client(self.apikey) as client: |
| 54 | + if isinstance(relationships, str) and relationships: |
| 55 | + url += '?relationships={}'.format(relationships) |
| 56 | + file_obj = await client.get_object_async(url.format(checksum)) |
| 57 | + |
| 58 | + return file_obj |
| 59 | + |
| 60 | + async def get_matching_files(self, query, max_files): |
| 61 | + """Query intelligence for files matching the given criteria.""" |
| 62 | + if not isinstance(query, str): |
| 63 | + raise ValueError('Search filter must be a string.') |
| 64 | + |
| 65 | + async with vt.Client(self.apikey) as client: |
| 66 | + query = query.lower() |
| 67 | + url = '/intelligence/search' |
| 68 | + |
| 69 | + print('Performing VT Intelligence search...') |
| 70 | + |
| 71 | + files = client.iterator(url, params={'query': query}, limit=max_files) |
| 72 | + async for matching_file in files: |
| 73 | + await self.files_queue.put(matching_file.sha256) |
| 74 | + |
| 75 | + print('Search concluded, waiting on network infrastructure retrieval...') |
| 76 | + |
| 77 | + async def get_network(self): |
| 78 | + """Retrieve the network infrastructure related to matching files.""" |
| 79 | + while True: |
| 80 | + checksum = await self.files_queue.get() |
| 81 | + file_obj = await self.get_file_async( |
| 82 | + checksum, 'contacted_domains,contacted_ips,contacted_urls') |
| 83 | + relationships = file_obj.relationships |
| 84 | + contacted_domains = relationships['contacted_domains']['data'] |
| 85 | + contacted_urls = relationships['contacted_urls']['data'] |
| 86 | + contacted_ips = relationships['contacted_ips']['data'] |
| 87 | + |
| 88 | + await self.queue.put( |
| 89 | + {'contacted_addresses': contacted_domains, |
| 90 | + 'type': 'domains', |
| 91 | + 'file': checksum}) |
| 92 | + await self.queue.put( |
| 93 | + {'contacted_addresses': contacted_ips, |
| 94 | + 'type': 'ips', |
| 95 | + 'file': checksum}) |
| 96 | + await self.queue.put( |
| 97 | + {'contacted_addresses': contacted_urls, |
| 98 | + 'type': 'urls', |
| 99 | + 'file': checksum}) |
| 100 | + |
| 101 | + self.networking_infrastructure[checksum]['domains'] = contacted_domains |
| 102 | + self.networking_infrastructure[checksum]['ips'] = contacted_ips |
| 103 | + self.networking_infrastructure[checksum]['urls'] = contacted_urls |
| 104 | + self.files_queue.task_done() |
| 105 | + |
| 106 | + async def build_network(self): |
| 107 | + """Build the stats of the network infrastructure.""" |
| 108 | + while True: |
| 109 | + item = await self.queue.get() |
| 110 | + item_type = item['type'] |
| 111 | + for contacted_address in item['contacted_addresses']: |
| 112 | + if item_type in ('domains', 'ips'): |
| 113 | + address = contacted_address['id'] |
| 114 | + else: |
| 115 | + address = contacted_address['context_attributes']['url'] |
| 116 | + self.networking_counters[item_type][address] += 1 |
| 117 | + self.queue.task_done() |
| 118 | + |
| 119 | + def print_results(self): |
| 120 | + """Pretty print network IoCs for the given VTI search query.""" |
| 121 | + print('\n\n=== Results: ===') |
| 122 | + for item in self.networking_infrastructure.items(): |
| 123 | + contacted_addr = item[1].values() |
| 124 | + if any(contacted_addr): |
| 125 | + for inf in item[1].items(): |
| 126 | + for key in inf[1]: |
| 127 | + print('{}: {}'.format( |
| 128 | + key['type'].upper(), |
| 129 | + key.get('context_attributes', {}).get('url') or key.get('id'))) |
| 130 | + |
| 131 | + |
| 132 | +async def main(): |
| 133 | + """Perform a VTI search and extract IoCs for each of the matches.""" |
| 134 | + parser = argparse.ArgumentParser( |
| 135 | + description='Generate network IoCs for files matching a VTI query.') |
| 136 | + parser.add_argument('--apikey', required=True, help='your VirusTotal API key') |
| 137 | + parser.add_argument('--query', required=True, |
| 138 | + help='VT Intelligence search query') |
| 139 | + parser.add_argument('--limit', default=10, help='Limit of files to process.') |
| 140 | + |
| 141 | + args = parser.parse_args() |
| 142 | + |
| 143 | + loop = asyncio.get_event_loop() |
| 144 | + handler = VTISearchToNetworkInfrastructureHandler(args.apikey) |
| 145 | + |
| 146 | + enqueue_files_task = loop.create_task( |
| 147 | + handler.get_matching_files(args.query, int(args.limit))) |
| 148 | + network_task = loop.create_task(handler.get_network()) |
| 149 | + build_network_task = loop.create_task( |
| 150 | + handler.build_network()) |
| 151 | + |
| 152 | + await asyncio.gather(enqueue_files_task) |
| 153 | + |
| 154 | + await handler.files_queue.join() |
| 155 | + await handler.queue.join() |
| 156 | + |
| 157 | + network_task.cancel() |
| 158 | + build_network_task.cancel() |
| 159 | + |
| 160 | + handler.print_results() |
| 161 | + |
| 162 | + |
| 163 | +if __name__ == '__main__': |
| 164 | + loop = asyncio.get_event_loop() |
| 165 | + loop.run_until_complete(main()) |
| 166 | + loop.close() |
0 commit comments