From ccecea9c68782e2acc6a12effe9413497b98180c Mon Sep 17 00:00:00 2001 From: David Korczynski Date: Sat, 8 Jun 2024 10:48:59 -0700 Subject: [PATCH 1/3] prompt_builder: add list of header files in code fixing prompt Signed-off-by: David Korczynski --- data_prep/introspector.py | 11 ++++++++++- llm_toolkit/prompt_builder.py | 19 ++++++++++++++++--- prompts/template_xml/fixer_problem.txt | 3 ++- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/data_prep/introspector.py b/data_prep/introspector.py index 61d7e9f288..98d5277e35 100755 --- a/data_prep/introspector.py +++ b/data_prep/introspector.py @@ -49,6 +49,7 @@ INTROSPECTOR_TYPE = '' INTROSPECTOR_FUNC_SIG = '' INTROSPECTOR_ADDR_TYPE = '' +INTROSPECTOR_ALL_HEADER_FILES = '' def get_oracle_dict() -> Dict[str, Any]: @@ -66,7 +67,7 @@ def set_introspector_endpoints(endpoint): global INTROSPECTOR_ENDPOINT, INTROSPECTOR_CFG, INTROSPECTOR_FUNC_SIG, \ INTROSPECTOR_FUNCTION_SOURCE, INTROSPECTOR_PROJECT_SOURCE, \ INTROSPECTOR_XREF, INTROSPECTOR_TYPE, INTROSPECTOR_ORACLE_FAR_REACH, \ - INTROSPECTOR_ORACLE_KEYWORD, INTROSPECTOR_ADDR_TYPE + INTROSPECTOR_ORACLE_KEYWORD, INTROSPECTOR_ADDR_TYPE, INTROSPECTOR_ALL_HEADER_FILES INTROSPECTOR_ENDPOINT = endpoint logging.info('Fuzz Introspector endpoint set to %s', INTROSPECTOR_ENDPOINT) @@ -83,6 +84,8 @@ def set_introspector_endpoints(endpoint): INTROSPECTOR_FUNC_SIG = f'{INTROSPECTOR_ENDPOINT}/function-signature' INTROSPECTOR_ADDR_TYPE = ( f'{INTROSPECTOR_ENDPOINT}/addr-to-recursive-dwarf-info') + INTROSPECTOR_ALL_HEADER_FILES = ( + f'{INTROSPECTOR_ENDPOINT}/all-header-files') def _construct_url(api: str, params: dict) -> str: @@ -182,6 +185,12 @@ def query_introspector_cfg(project: str) -> dict: return _get_data(resp, 'project', {}) +def query_introspector_header_files(project: str) -> List[str]: + resp = _query_introspector(INTROSPECTOR_ALL_HEADER_FILES, {'project': project}) + all_header_files = _get_data(resp, 'all-header-files', []) + return all_header_files + + def query_introspector_function_source(project: str, func_sig: str) -> str: """Queries FuzzIntrospector API for source code of |func_sig|.""" resp = _query_introspector(INTROSPECTOR_FUNCTION_SOURCE, { diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 8cb0aa1b59..2ed189de67 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -24,7 +24,7 @@ import requests import yaml -from data_prep import project_targets +from data_prep import introspector, project_targets from experiment.benchmark import Benchmark, FileType from experiment.fuzz_target_error import SemanticCheckResult from llm_toolkit import models, prompts @@ -46,6 +46,7 @@ 'jansi_colors-problem.txt') FDP_JVM_EXAMPLE_2_SOLUTION = os.path.join(EXAMPLE_PATH, 'jansi_colors-solution.java') +HEADER_FIXER_PROMPT=os.path.join(DEFAULT_TEMPLATE_DIR, 'header_fixer.txt') EXAMPLES = { 'c++': [ @@ -271,7 +272,7 @@ def build_fixer_prompt(self, benchmark: Benchmark, raw_code: str, """Prepares the code-fixing prompt.""" priming, priming_weight = self._format_fixer_priming() problem = self._format_fixer_problem(raw_code, error_desc, errors, - priming_weight) + priming_weight, benchmark) self._prepare_prompt(priming, problem) return self._prompt @@ -287,7 +288,8 @@ def _format_fixer_priming(self) -> Tuple[str, int]: return priming, priming_weight def _format_fixer_problem(self, raw_code: str, error_desc: Optional[str], - errors: list[str], priming_weight: int) -> str: + errors: list[str], priming_weight: int, + benchmark: Benchmark) -> str: """Formats a problem for code fixer based on the template.""" with open(self.fixer_problem_template_file) as f: problem = f.read().strip() @@ -297,6 +299,17 @@ def _format_fixer_problem(self, raw_code: str, error_desc: Optional[str], else: # Build error does not pass error desc. error_summary = BUILD_ERROR_SUMMARY + headers_to_avoid = introspector.query_introspector_header_files( + benchmark.project) + if len(headers_to_avoid) > 0: + with open(HEADER_FIXER_PROMPT, 'r') as f: + header_avoid_string = f.read() + for header_file in headers_to_avoid: + header_avoid_string += '- %s\n' % (os.path.basename(header_file)) + else: + header_avoid_string = '' + problem = problem.replace('{ADDITIONAL_MESSAGE}', header_avoid_string) + problem = problem.replace('{ERROR_SUMMARY}', error_summary) problem_prompt = self._prompt.create_prompt_piece(problem, 'user') diff --git a/prompts/template_xml/fixer_problem.txt b/prompts/template_xml/fixer_problem.txt index 83663bf146..135dce96d5 100644 --- a/prompts/template_xml/fixer_problem.txt +++ b/prompts/template_xml/fixer_problem.txt @@ -13,4 +13,5 @@ Fix code: 2. Choose a solution that can maximize fuzzing result, which is utilizing the function under test and feeding it not null input. 3. Apply the solutions to the original code. It's important to show the complete code, not only the fixed line. - \ No newline at end of file +{ADDITIONAL_MESSAGE} + From 8f97ddbed96346640ca78c315690fbdff4d72a83 Mon Sep 17 00:00:00 2001 From: David Korczynski Date: Sat, 8 Jun 2024 10:49:52 -0700 Subject: [PATCH 2/3] style Signed-off-by: David Korczynski --- data_prep/introspector.py | 6 +++--- llm_toolkit/prompt_builder.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data_prep/introspector.py b/data_prep/introspector.py index 98d5277e35..a255102b96 100755 --- a/data_prep/introspector.py +++ b/data_prep/introspector.py @@ -84,8 +84,7 @@ def set_introspector_endpoints(endpoint): INTROSPECTOR_FUNC_SIG = f'{INTROSPECTOR_ENDPOINT}/function-signature' INTROSPECTOR_ADDR_TYPE = ( f'{INTROSPECTOR_ENDPOINT}/addr-to-recursive-dwarf-info') - INTROSPECTOR_ALL_HEADER_FILES = ( - f'{INTROSPECTOR_ENDPOINT}/all-header-files') + INTROSPECTOR_ALL_HEADER_FILES = (f'{INTROSPECTOR_ENDPOINT}/all-header-files') def _construct_url(api: str, params: dict) -> str: @@ -186,7 +185,8 @@ def query_introspector_cfg(project: str) -> dict: def query_introspector_header_files(project: str) -> List[str]: - resp = _query_introspector(INTROSPECTOR_ALL_HEADER_FILES, {'project': project}) + resp = _query_introspector(INTROSPECTOR_ALL_HEADER_FILES, + {'project': project}) all_header_files = _get_data(resp, 'all-header-files', []) return all_header_files diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 2ed189de67..6a183cc9ca 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -46,7 +46,7 @@ 'jansi_colors-problem.txt') FDP_JVM_EXAMPLE_2_SOLUTION = os.path.join(EXAMPLE_PATH, 'jansi_colors-solution.java') -HEADER_FIXER_PROMPT=os.path.join(DEFAULT_TEMPLATE_DIR, 'header_fixer.txt') +HEADER_FIXER_PROMPT = os.path.join(DEFAULT_TEMPLATE_DIR, 'header_fixer.txt') EXAMPLES = { 'c++': [ From 0467753f67184a47ee7d8e120fe9435409686961 Mon Sep 17 00:00:00 2001 From: David Korczynski Date: Sat, 8 Jun 2024 10:51:34 -0700 Subject: [PATCH 3/3] nit Signed-off-by: David Korczynski --- data_prep/introspector.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data_prep/introspector.py b/data_prep/introspector.py index a255102b96..052a12377d 100755 --- a/data_prep/introspector.py +++ b/data_prep/introspector.py @@ -67,7 +67,8 @@ def set_introspector_endpoints(endpoint): global INTROSPECTOR_ENDPOINT, INTROSPECTOR_CFG, INTROSPECTOR_FUNC_SIG, \ INTROSPECTOR_FUNCTION_SOURCE, INTROSPECTOR_PROJECT_SOURCE, \ INTROSPECTOR_XREF, INTROSPECTOR_TYPE, INTROSPECTOR_ORACLE_FAR_REACH, \ - INTROSPECTOR_ORACLE_KEYWORD, INTROSPECTOR_ADDR_TYPE, INTROSPECTOR_ALL_HEADER_FILES + INTROSPECTOR_ORACLE_KEYWORD, INTROSPECTOR_ADDR_TYPE, \ + INTROSPECTOR_ALL_HEADER_FILES INTROSPECTOR_ENDPOINT = endpoint logging.info('Fuzz Introspector endpoint set to %s', INTROSPECTOR_ENDPOINT)