Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test-to-harness: initial set up #511

Merged
merged 13 commits into from
Aug 2, 2024
26 changes: 26 additions & 0 deletions benchmark-sets/from-test-small/krb5.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"is_test_benchmark": true
"language": "c"
"project": "krb5"
"target_name": "fuzz_gss"
"target_path": "/src/krb5/src/tests/fuzzing/fuzz_gss.c"
"test_files":
- "test_file_path": "//src/krb5/src/tests/gssapi/t_namingexts.c"
- "test_file_path": "//src/krb5/src/tests/icinterleave.c"
- "test_file_path": "//src/krb5/src/tests/gssapi/t_gssexts.c"
- "test_file_path": "//src/krb5/src/tests/s4u2self.c"
- "test_file_path": "//src/krb5/src/tests/localauth.c"
- "test_file_path": "//src/krb5/src/tests/misc/test_cxx_k5int.cpp"
- "test_file_path": "//src/krb5/src/tests/gssapi/t_inq_cred.c"
- "test_file_path": "//src/krb5/src/tests/t_inetd.c"
- "test_file_path": "//src/krb5/src/tests/gssapi/t_inq_ctx.c"
- "test_file_path": "//src/krb5/src/tests/rdreq.c"
- "test_file_path": "//src/krb5/src/tests/misc/test_cxx_kadm5.cpp"
- "test_file_path": "//src/krb5/src/tests/asn.1/krb5_encode_test.c"
- "test_file_path": "//src/krb5/src/tests/misc/test_getpw.c"
- "test_file_path": "//src/krb5/src/tests/gssapi/t_saslname.c"
- "test_file_path": "//src/krb5/src/tests/asn.1/t_trval.c"
- "test_file_path": "//src/krb5/src/tests/unlockiter.c"
- "test_file_path": "//src/krb5/src/tests/hooks.c"
- "test_file_path": "//src/krb5/src/tests/gssapi/t_spnego.c"
- "test_file_path": "//src/krb5/src/tests/misc/test_nfold.c"
- "test_file_path": "//src/krb5/src/tests/gssapi/t_ccselect.c"
21 changes: 21 additions & 0 deletions benchmark-sets/from-test-small/liblouis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"is_test_benchmark": true
"language": "c"
"project": "liblouis"
"target_name": "fuzz_translate_generic"
"target_path": "/src/liblouis/tests/fuzzing/fuzz_translate_generic.c"
"test_files":
- "test_file_path": "//src/liblouis/tests/hyphenate_xxx.c"
- "test_file_path": "//src/liblouis/tests/checkTable.c"
- "test_file_path": "//src/liblouis/tests/typeform.c"
- "test_file_path": "//src/liblouis/tests/check_metadata.c"
- "test_file_path": "//src/liblouis/tests/charToFallbackDots.c"
- "test_file_path": "//src/liblouis/tests/findTable.c"
- "test_file_path": "//src/liblouis/tests/typeform_for_emphclass.c"
- "test_file_path": "//src/liblouis/tests/resolve_table.c"
- "test_file_path": "//src/liblouis/tests/suggestChunks.c"
- "test_file_path": "//src/liblouis/tests/hash_collision.c"
- "test_file_path": "//src/liblouis/tests/attributeNames.c"
- "test_file_path": "//src/liblouis/tests/logging.c"
- "test_file_path": "//src/liblouis/tests/getTable.c"
- "test_file_path": "//src/liblouis/tests/check_ueb_test_data.c"
- "test_file_path": "//src/liblouis/tests/emphclass.c"
19 changes: 19 additions & 0 deletions benchmark-sets/from-test-small/libraw.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"is_test_benchmark": true
"language": "c++"
"project": "libraw"
"target_name": "libraw_fuzzer"
"target_path": "/src/libraw_fuzzer.cc"
"test_files":
- "test_file_path": "//src/libraw/samples/dcraw_half.c"
- "test_file_path": "//src/libraw/samples/half_mt_win32.c"
- "test_file_path": "//src/libraw/samples/simple_dcraw.cpp"
- "test_file_path": "//src/libraw/samples/raw-identify.cpp"
- "test_file_path": "//src/libraw/samples/unprocessed_raw.cpp"
- "test_file_path": "//src/libraw/samples/mem_image_sample.cpp"
- "test_file_path": "//src/libraw/samples/postprocessing_benchmark.cpp"
- "test_file_path": "//src/libraw/samples/openbayer_sample.cpp"
- "test_file_path": "//src/libraw/samples/half_mt.c"
- "test_file_path": "//src/libraw/samples/multirender_test.cpp"
- "test_file_path": "//src/libraw/samples/rawtextdump.cpp"
- "test_file_path": "//src/libraw/samples/dcraw_emu.cpp"
- "test_file_path": "//src/libraw/samples/4channels.cpp"
26 changes: 26 additions & 0 deletions benchmark-sets/from-test-small/libsodium.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"is_test_benchmark": true
"language": "c++"
"project": "libsodium"
"target_name": "secretbox_easy_fuzzer"
"target_path": "/src/secretbox_easy_fuzzer.cc"
"test_files":
- "test_file_path": "//src/libsodium/test/default/box2.c"
- "test_file_path": "//src/libsodium/test/default/sodium_core.c"
- "test_file_path": "//src/libsodium/test/default/stream2.c"
- "test_file_path": "//src/libsodium/test/default/scalarmult_ristretto255.c"
- "test_file_path": "//src/libsodium/test/default/onetimeauth2.c"
- "test_file_path": "//src/libsodium/test/default/auth6.c"
- "test_file_path": "//src/libsodium/test/default/hash3.c"
- "test_file_path": "//src/libsodium/test/default/secretbox_easy2.c"
- "test_file_path": "//src/libsodium/test/default/chacha20.c"
- "test_file_path": "//src/libsodium/test/default/secretbox.c"
- "test_file_path": "//src/libsodium/test/default/box_seal.c"
- "test_file_path": "//src/libsodium/test/default/keygen.c"
- "test_file_path": "//src/libsodium/test/default/core3.c"
- "test_file_path": "//src/libsodium/test/default/pwhash_scrypt_ll.c"
- "test_file_path": "//src/libsodium/test/default/verify1.c"
- "test_file_path": "//src/libsodium/test/default/auth2.c"
- "test_file_path": "//src/libsodium/test/default/core1.c"
- "test_file_path": "//src/libsodium/test/default/aead_xchacha20poly1305.c"
- "test_file_path": "//src/libsodium/test/default/secretbox2.c"
- "test_file_path": "//src/libsodium/test/default/box_easy.c"
53 changes: 47 additions & 6 deletions data_prep/introspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
INTROSPECTOR_ORACLE_EASY_PARAMS = ''
INTROSPECTOR_ORACLE_ALL_JVM_PUBLIC_CANDIDATES = ''
INTROSPECTOR_ORACLE_OPTIMAL = ''
INTROSPECTOR_ORACLE_ALL_TESTS = ''
INTROSPECTOR_FUNCTION_SOURCE = ''
INTROSPECTOR_PROJECT_SOURCE = ''
INTROSPECTOR_XREF = ''
Expand All @@ -81,6 +82,7 @@ def get_oracle_dict() -> Dict[str, Any]:
'easy-params-far-reach': query_introspector_for_easy_param_targets,
'jvm-public-candidates': query_introspector_jvm_all_public_candidates,
'optimal-targets': query_introspector_for_optimal_targets,
'test-migration': query_introspector_for_tests,
}
return oracle_dict

Expand All @@ -96,7 +98,8 @@ def set_introspector_endpoints(endpoint):
INTROSPECTOR_ORACLE_ALL_JVM_PUBLIC_CANDIDATES, \
INTROSPECTOR_ALL_JVM_SOURCE_PATH, INTROSPECTOR_ORACLE_OPTIMAL, \
INTROSPECTOR_HEADERS_FOR_FUNC, \
INTROSPECTOR_FUNCTION_WITH_MATCHING_RETURN_TYPE
INTROSPECTOR_FUNCTION_WITH_MATCHING_RETURN_TYPE, \
INTROSPECTOR_ORACLE_ALL_TESTS

INTROSPECTOR_ENDPOINT = endpoint

Expand Down Expand Up @@ -127,6 +130,7 @@ def set_introspector_endpoints(endpoint):
f'{INTROSPECTOR_ENDPOINT}/all-project-source-files')
INTROSPECTOR_FUNCTION_WITH_MATCHING_RETURN_TYPE = (
f'{INTROSPECTOR_ENDPOINT}/function-with-matching-return-type')
INTROSPECTOR_ORACLE_ALL_TESTS = f'{INTROSPECTOR_ENDPOINT}/project-tests'


def _construct_url(api: str, params: dict) -> str:
Expand Down Expand Up @@ -201,6 +205,14 @@ def _get_data(resp: Optional[requests.Response], key: str,
return default_value


def query_introspector_for_tests(project: str) -> list[str]:
"""Gets the list of test files in the target project."""
resp = _query_introspector(INTROSPECTOR_ORACLE_ALL_TESTS, {
'project': project,
})
return _get_data(resp, 'test-file-list', [])


def query_introspector_oracle(project: str, oracle_api: str) -> list[dict]:
"""Queries a fuzz target oracle API from Fuzz Introspector."""
resp = _query_introspector(
Expand Down Expand Up @@ -650,6 +662,38 @@ def populate_benchmarks_using_introspector(project: str, language: str,
limit: int,
target_oracles: List[str]):
"""Populates benchmark YAML files from the data from FuzzIntrospector."""

# If there is any oracle with test-migration then only do this oracle
# selection, because the benchmarks will have different .yaml structure.
# TODO(David): clean up benchmark code to make it more flexible for varying
# forms of target selectors.
for target_oracle in target_oracles:
if 'test-migration' in target_oracle:
harnesses, interesting = project_src.search_source(project, [], language)
harness = pick_one(harnesses)
if not harness:
logger.error('No fuzz target found in project %s.', project)
return []
logger.info('Using harness path %s', harness)
potential_benchmarks = []
test_files = query_introspector_for_tests(project)
for test_file in test_files:
potential_benchmarks.append(
benchmarklib.Benchmark(benchmark_id='cli',
project=project,
language=language,
function_signature='test-file',
function_name='test-file',
return_type='test',
params=[],
exceptions=[],
is_jvm_static=False,
target_path=harness,
preferred_target_name='',
is_test_benchmark=True,
test_file_path=test_file))
return potential_benchmarks[:limit]

functions = _select_functions_from_oracles(project, limit, target_oracles)

if not functions:
Expand All @@ -667,11 +711,8 @@ def populate_benchmarks_using_introspector(project: str, language: str,
for function in functions
]

result = project_src.search_source(project, filenames, language)
if not result:
return []

harnesses, interesting = result
harnesses, interesting = project_src.search_source(project, filenames,
language)
harness = pick_one(harnesses)
if not harness:
logger.error('No fuzz target found in project %s.', project)
Expand Down
121 changes: 78 additions & 43 deletions experiment/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,26 @@ def to_yaml(cls, benchmarks: list[Benchmark], outdir: str = './'):
# Register the custom representer
yaml.add_representer(str, quoted_string_presenter)
result = {
'project':
benchmarks[0].project,
'language':
benchmarks[0].language,
'target_path':
benchmarks[0].target_path,
'target_name':
benchmarks[0].target_name,
'functions': [{
'signature': b.function_signature,
'name': b.function_name,
'return_type': b.return_type,
'params': b.params,
'exceptions': b.exceptions,
'is_jvm_static': b.is_jvm_static,
} for b in benchmarks],
'project': benchmarks[0].project,
'language': benchmarks[0].language,
'target_path': benchmarks[0].target_path,
'target_name': benchmarks[0].target_name,
'is_test_benchmark': benchmarks[0].is_test_benchmark,
}
if benchmarks[0].is_test_benchmark:
result['test_files'] = [{
'test_file_path': b.test_file_path
} for b in benchmarks]
else:
result['functions'] = [{
'signature': b.function_signature,
'name': b.function_name,
'return_type': b.return_type,
'params': b.params,
'exceptions': b.exceptions,
'is_jvm_static': b.is_jvm_static,
} for b in benchmarks]

with open(os.path.join(outdir, f'{benchmarks[0].project}.yaml'),
'w') as file:
yaml.dump(result, file, default_flow_style=False, width=sys.maxsize)
Expand All @@ -83,32 +86,60 @@ def from_yaml(cls, benchmark_path: str) -> List:
cppify_headers = data.get('cppify_headers', False)
commit = data.get('commit')
functions = data.get('functions', [])
for function in functions:
# Long raw_function_names (particularly for c++ projects) may exceed
# filesystem limits on file path/name length when creating WorkDir.
max_len = os.pathconf('/', 'PC_NAME_MAX') - len('output-')
# Docker tag name cannot exceed 127 characters, and will be suffixed by
# '<sample-id>-experiment'.
docker_name_len = 127 - len('-03-experiment')
max_len = min(max_len, docker_name_len)
truncated_id = f'{project_name}-{function.get("name")}'[:max_len]
benchmarks.append(
cls(truncated_id.lower(),
data['project'],
data['language'],
function.get('signature'),
function.get('name'),
function.get('return_type'),
function.get('params'),
function.get('exceptions', []),
function.get('is_jvm_static', False),
data['target_path'],
data.get('target_name'),
use_project_examples=use_project_examples,
cppify_headers=cppify_headers,
commit=commit,
use_context=use_context,
function_dict=function))

is_test_benchmark = data.get('is_test_benchmark', False)
test_files = data.get('test_files', [])
if is_test_benchmark:
for test_file in test_files:
max_len = os.pathconf('/', 'PC_NAME_MAX') - len('output-')
test_file_path = test_file.get('test_file_path')
truncated_id = f'{project_name}-{test_file_path.replace("/","_").replace(".","_")}'[:
max_len]

benchmarks.append(
cls(
truncated_id.lower(),
data['project'],
data['language'],
'',
'',
'',
[],
[],
False,
data['target_path'],
data.get('target_name', ''),
is_test_benchmark=True,
test_file_path=test_file_path,
))
else:
# function type benchmark
for function in functions:
# Long raw_function_names (particularly for c++ projects) may exceed
# filesystem limits on file path/name length when creating WorkDir.
max_len = os.pathconf('/', 'PC_NAME_MAX') - len('output-')
# Docker tag name cannot exceed 127 characters, and will be suffixed by
# '<sample-id>-experiment'.
docker_name_len = 127 - len('-03-experiment')
max_len = min(max_len, docker_name_len)
truncated_id = f'{project_name}-{function.get("name")}'[:max_len]
benchmarks.append(
cls(truncated_id.lower(),
data['project'],
data['language'],
function.get('signature'),
function.get('name'),
function.get('return_type'),
function.get('params'),
function.get('exceptions', []),
function.get('is_jvm_static', False),
data['target_path'],
data.get('target_name'),
use_project_examples=use_project_examples,
cppify_headers=cppify_headers,
commit=commit,
use_context=use_context,
function_dict=function))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK for now, but would you please merge the same code in if/else block later to reduce repetition later?
Thanks

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, will do


return benchmarks

Expand All @@ -128,7 +159,9 @@ def __init__(self,
cppify_headers=False,
use_context=False,
commit=None,
function_dict: Optional[dict] = None):
function_dict: Optional[dict] = None,
is_test_benchmark: bool = False,
test_file_path: str = ''):
self.id = benchmark_id
self.project = project
self.language = language
Expand All @@ -145,6 +178,8 @@ def __init__(self,
self.use_context = use_context
self.cppify_headers = cppify_headers
self.commit = commit
self.test_file_path = test_file_path
self.is_test_benchmark = is_test_benchmark

if self.language == 'jvm':
# For java projects, in order to differentiate between overloaded methods
Expand Down
1 change: 1 addition & 0 deletions experiment/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def run_log_path(self, generated_target_name: str):

def create_ossfuzz_project(self, name: str, target_file: str) -> str:
"""Creates an OSS-Fuzz project with the generated target."""
logger.info(f'target file: {target_file}')
generated_project_path = os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR,
'projects', name)
if os.path.exists(generated_project_path):
Expand Down
Loading