Skip to content

Commit ab4e11c

Browse files
author
Z Stern
committed
Merge branch 'master' into async
2 parents 694a6c9 + 2994035 commit ab4e11c

File tree

233 files changed

+10793
-7051
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

233 files changed

+10793
-7051
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
/apps/HelloMatlab/iir_blur.mex
66
bazel-*
77
bin/*
8+
python_bindings/bin/*
89
build-64/*
910
build-ios/*
1011
build-osx/*

.gitmodules

Whitespace-only changes.

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ define_test_group(test_error)
307307
define_test_group(test_generator)
308308
define_test_group(test_opengl)
309309
define_test_group(test_performance)
310-
define_test_group(test_tutorials)
310+
define_test_group(test_tutorial)
311311
define_test_group(test_warning)
312312

313313
add_subdirectory(src)

Makefile

+57-20
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ SOURCE_FILES = \
363363
ObjectInstanceRegistry.cpp \
364364
OutputImageParam.cpp \
365365
ParallelRVar.cpp \
366+
ParamMap.cpp \
366367
Parameter.cpp \
367368
PartitionLoops.cpp \
368369
Pipeline.cpp \
@@ -507,8 +508,9 @@ HEADER_FILES = \
507508
Outputs.h \
508509
OutputImageParam.h \
509510
ParallelRVar.h \
510-
Parameter.h \
511511
Param.h \
512+
ParamMap.h \
513+
Parameter.h \
512514
PartitionLoops.h \
513515
Pipeline.h \
514516
Prefetch.h \
@@ -645,6 +647,7 @@ RUNTIME_LL_COMPONENTS = \
645647
win32_math \
646648
x86 \
647649
x86_avx \
650+
x86_avx2 \
648651
x86_sse41
649652

650653
RUNTIME_EXPORTED_INCLUDES = $(INCLUDE_DIR)/HalideRuntime.h \
@@ -824,17 +827,17 @@ ERROR_TESTS = $(shell ls $(ROOT_DIR)/test/error/*.cpp)
824827
WARNING_TESTS = $(shell ls $(ROOT_DIR)/test/warning/*.cpp)
825828
OPENGL_TESTS := $(shell ls $(ROOT_DIR)/test/opengl/*.cpp)
826829
GENERATOR_EXTERNAL_TESTS := $(shell ls $(ROOT_DIR)/test/generator/*test.cpp)
827-
GENERATOR_EXTERNAL_TEST_GENERATORS := $(shell ls $(ROOT_DIR)/test/generator/*_generator.cpp)
830+
GENERATOR_EXTERNAL_TEST_GENERATOR := $(shell ls $(ROOT_DIR)/test/generator/*_generator.cpp)
828831
TUTORIALS = $(filter-out %_generate.cpp, $(shell ls $(ROOT_DIR)/tutorial/*.cpp))
829832
AUTO_SCHEDULE_TESTS = $(shell ls $(ROOT_DIR)/test/auto_schedule/*.cpp)
830833

831834
-include $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=$(BUILD_DIR)/test_opengl_%.d)
832835

833836
test_correctness: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=correctness_%) $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.c=correctness_%)
834837
test_performance: $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=performance_%)
835-
test_errors: $(ERROR_TESTS:$(ROOT_DIR)/test/error/%.cpp=error_%)
836-
test_warnings: $(WARNING_TESTS:$(ROOT_DIR)/test/warning/%.cpp=warning_%)
837-
test_tutorials: $(TUTORIALS:$(ROOT_DIR)/tutorial/%.cpp=tutorial_%)
838+
test_error: $(ERROR_TESTS:$(ROOT_DIR)/test/error/%.cpp=error_%)
839+
test_warning: $(WARNING_TESTS:$(ROOT_DIR)/test/warning/%.cpp=warning_%)
840+
test_tutorial: $(TUTORIALS:$(ROOT_DIR)/tutorial/%.cpp=tutorial_%)
838841
test_valgrind: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=valgrind_%)
839842
test_avx512: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=avx512_%)
840843
test_opengl: $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=opengl_%)
@@ -900,12 +903,12 @@ GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_matlab,$(GENERATOR_AOTCP
900903
# https://github.com/halide/Halide/issues/2093
901904
GENERATOR_AOTCPP_TESTS := $(filter-out generator_aotcpp_async_coroutine,$(GENERATOR_AOTCPP_TESTS))
902905

903-
test_aotcpp_generators: $(GENERATOR_AOTCPP_TESTS)
906+
test_aotcpp_generator: $(GENERATOR_AOTCPP_TESTS)
904907

905908
# This is just a test to ensure than RunGen builds and links for a critical mass of Generators;
906909
# not all will work directly (e.g. due to missing define_externs at link time), so we blacklist
907910
# those known to be broken for plausible reasons.
908-
GENERATOR_BUILD_RUNGEN_TESTS = $(GENERATOR_EXTERNAL_TEST_GENERATORS:$(ROOT_DIR)/test/generator/%_generator.cpp=$(FILTERS_DIR)/%.rungen)
911+
GENERATOR_BUILD_RUNGEN_TESTS = $(GENERATOR_EXTERNAL_TEST_GENERATOR:$(ROOT_DIR)/test/generator/%_generator.cpp=$(FILTERS_DIR)/%.rungen)
909912
GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/cxx_mangling_define_extern.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
910913
GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/define_extern_opencl.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
911914
GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/matlab.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
@@ -916,16 +919,30 @@ GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/old_buffer_t.rungen,
916919
GENERATOR_BUILD_RUNGEN_TESTS := $(filter-out $(FILTERS_DIR)/tiled_blur.rungen,$(GENERATOR_BUILD_RUNGEN_TESTS))
917920
test_rungen: $(GENERATOR_BUILD_RUNGEN_TESTS)
918921

919-
test_generators: $(GENERATOR_AOT_TESTS) $(GENERATOR_AOTCPP_TESTS) $(GENERATOR_JIT_TESTS) $(GENERATOR_BUILD_RUNGEN_TESTS)
922+
test_generator: $(GENERATOR_AOT_TESTS) $(GENERATOR_AOTCPP_TESTS) $(GENERATOR_JIT_TESTS) $(GENERATOR_BUILD_RUNGEN_TESTS)
923+
924+
# TODO: these are temporary targets added to allow existing buildbot to run without breaking;
925+
# it will be removed after buildbot is updated.
926+
.PHONY: test_errors
927+
test_errors: test_error
928+
929+
.PHONY: test_generators
930+
test_generators: test_generator
931+
932+
.PHONY: test_tutorials
933+
test_tutorials: test_tutorial
920934

921-
ALL_TESTS = test_internal test_correctness test_errors test_tutorials test_warnings test_generators
935+
.PHONY: test_warnings
936+
test_warnings: test_warning
937+
938+
ALL_TESTS = test_internal test_correctness test_error test_tutorial test_warning test_generator
922939

923940
# These targets perform timings of each test. For most tests this includes Halide JIT compile times, and run times.
924941
# For generator tests they time the compile time only. The times are recorded in CSV files.
925942
time_compilation_correctness: init_time_compilation_correctness $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=time_compilation_test_%)
926943
time_compilation_performance: init_time_compilation_performance $(PERFORMANCE_TESTS:$(ROOT_DIR)/test/performance/%.cpp=time_compilation_performance_%)
927944
time_compilation_opengl: init_time_compilation_opengl $(OPENGL_TESTS:$(ROOT_DIR)/test/opengl/%.cpp=time_compilation_opengl_%)
928-
time_compilation_generators: init_time_compilation_generator $(GENERATOR_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=time_compilation_generator_%)
945+
time_compilation_generator: init_time_compilation_generator $(GENERATOR_TESTS:$(ROOT_DIR)/test/generator/%_aottest.cpp=time_compilation_generator_%)
929946

930947
init_time_compilation_%:
931948
echo "TEST,User (s),System (s),Real" > $(@:init_time_compilation_%=compile_times_%.csv)
@@ -944,7 +961,7 @@ build_tests: $(CORRECTNESS_TESTS:$(ROOT_DIR)/test/correctness/%.cpp=$(BIN_DIR)/c
944961
$(GENERATOR_EXTERNAL_TESTS:$(ROOT_DIR)/test/generator/%_jittest.cpp=$(BIN_DIR)/generator_jit_%) \
945962
$(AUTO_SCHEDULE_TESTS:$(ROOT_DIR)/test/auto_schedule/%.cpp=$(BIN_DIR)/auto_schedule_%)
946963

947-
clean_generators:
964+
clean_generator:
948965
rm -rf $(BIN_DIR)/*.generator
949966
rm -rf $(BIN_DIR)/*/runtime.a
950967
rm -rf $(FILTERS_DIR)
@@ -953,7 +970,7 @@ clean_generators:
953970
rm -f $(BUILD_DIR)/GenGen.o
954971
rm -f $(BUILD_DIR)/RunGen.o
955972

956-
time_compilation_tests: time_compilation_correctness time_compilation_performance time_compilation_generators
973+
time_compilation_tests: time_compilation_correctness time_compilation_performance time_compilation_generator
957974

958975
LIBHALIDE_DEPS ?= $(BIN_DIR)/libHalide.$(SHARED_EXT) $(INCLUDE_DIR)/Halide.h
959976

@@ -1015,7 +1032,7 @@ $(BIN_DIR)/auto_schedule_%: $(ROOT_DIR)/test/auto_schedule/%.cpp $(BIN_DIR)/libH
10151032
.INTERMEDIATE: $(BIN_DIR)/%.generator
10161033

10171034
# By default, %.generator is produced by building %_generator.cpp
1018-
# Note that the rule includes all _generator.cpp files, so that generators with define_extern
1035+
# Note that the rule includes all _generator.cpp files, so that generator with define_extern
10191036
# usage can just add deps later.
10201037
$(BUILD_DIR)/%_generator.o: $(ROOT_DIR)/test/generator/%_generator.cpp $(INCLUDE_DIR)/Halide.h
10211038
@mkdir -p $(@D)
@@ -1029,17 +1046,17 @@ $(BIN_DIR)/%.generator: $(BUILD_DIR)/GenGen.o $(BIN_DIR)/libHalide.$(SHARED_EXT)
10291046
# These next two rules can fail the compilationa nd produce zero length bitcode blobs.
10301047
# If the zero length blob is actually used, the test will fail anyway, but usually only the bitness
10311048
# of the target is used.
1032-
$(BUILD_DIR)/external_code_extern_bitcode_32.cpp : $(ROOT_DIR)/test/generator/external_code_extern.cpp
1049+
$(BUILD_DIR)/external_code_extern_bitcode_32.cpp : $(ROOT_DIR)/test/generator/external_code_extern.cpp $(BIN_DIR)/binary2cpp
10331050
@mkdir -p $(@D)
10341051
$(CLANG) $(CXX_WARNING_FLAGS) -O3 -c -m32 -target $(RUNTIME_TRIPLE_32) -emit-llvm $< -o $(BUILD_DIR)/external_code_extern_32.bc || echo -n > $(BUILD_DIR)/external_code_extern_32.bc
10351052
./$(BIN_DIR)/binary2cpp external_code_extern_bitcode_32 < $(BUILD_DIR)/external_code_extern_32.bc > $@
10361053

1037-
$(BUILD_DIR)/external_code_extern_bitcode_64.cpp : $(ROOT_DIR)/test/generator/external_code_extern.cpp
1054+
$(BUILD_DIR)/external_code_extern_bitcode_64.cpp : $(ROOT_DIR)/test/generator/external_code_extern.cpp $(BIN_DIR)/binary2cpp
10381055
@mkdir -p $(@D)
10391056
$(CLANG) $(CXX_WARNING_FLAGS) -O3 -c -m64 -target $(RUNTIME_TRIPLE_64) -emit-llvm $< -o $(BUILD_DIR)/external_code_extern_64.bc || echo -n > $(BUILD_DIR)/external_code_extern_64.bc
10401057
./$(BIN_DIR)/binary2cpp external_code_extern_bitcode_64 < $(BUILD_DIR)/external_code_extern_64.bc > $@
10411058

1042-
$(BUILD_DIR)/external_code_extern_cpp_source.cpp : $(ROOT_DIR)/test/generator/external_code_extern.cpp
1059+
$(BUILD_DIR)/external_code_extern_cpp_source.cpp : $(ROOT_DIR)/test/generator/external_code_extern.cpp $(BIN_DIR)/binary2cpp
10431060
@mkdir -p $(@D)
10441061
./$(BIN_DIR)/binary2cpp external_code_extern_cpp_source < $(ROOT_DIR)/test/generator/external_code_extern.cpp > $@
10451062

@@ -1107,6 +1124,10 @@ $(FILTERS_DIR)/memory_profiler_mandelbrot.a: $(BIN_DIR)/memory_profiler_mandelbr
11071124
@mkdir -p $(@D)
11081125
$(CURDIR)/$< -g memory_profiler_mandelbrot -f memory_profiler_mandelbrot $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime-profile
11091126

1127+
$(FILTERS_DIR)/alias_with_offset_42.a: $(BIN_DIR)/alias.generator
1128+
@mkdir -p $(@D)
1129+
$(CURDIR)/$< -g alias_with_offset_42 -f alias_with_offset_42 $(GEN_AOT_OUTPUTS) -o $(CURDIR)/$(FILTERS_DIR) target=$(TARGET)-no_runtime
1130+
11101131
METADATA_TESTER_GENERATOR_ARGS=\
11111132
input.type=uint8 input.dim=3 \
11121133
type_only_input_buffer.dim=3 \
@@ -1236,6 +1257,15 @@ $(BIN_DIR)/$(TARGET)/generator_aot_msan: $(ROOT_DIR)/test/generator/msan_aottest
12361257
@mkdir -p $(@D)
12371258
$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter-out %.h,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@
12381259

1260+
# alias has additional deps to link in
1261+
$(BIN_DIR)/$(TARGET)/generator_aot_alias: $(ROOT_DIR)/test/generator/alias_aottest.cpp $(FILTERS_DIR)/alias.a $(FILTERS_DIR)/alias_with_offset_42.a $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1262+
@mkdir -p $(@D)
1263+
$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@
1264+
1265+
$(BIN_DIR)/$(TARGET)/generator_aotcpp_alias: $(ROOT_DIR)/test/generator/alias_aottest.cpp $(FILTERS_DIR)/alias.cpp $(FILTERS_DIR)/alias_with_offset_42.cpp $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
1266+
@mkdir -p $(@D)
1267+
$(CXX) $(GEN_AOT_CXX_FLAGS) $(filter %.cpp %.o %.a,$^) $(GEN_AOT_INCLUDES) $(GEN_AOT_LD_FLAGS) -o $@
1268+
12391269
# nested_externs has additional deps to link in
12401270
$(BIN_DIR)/$(TARGET)/generator_aot_nested_externs: $(ROOT_DIR)/test/generator/nested_externs_aottest.cpp $(FILTERS_DIR)/nested_externs_root.a $(FILTERS_DIR)/nested_externs_inner.a $(FILTERS_DIR)/nested_externs_combine.a $(FILTERS_DIR)/nested_externs_leaf.a $(RUNTIME_EXPORTED_INCLUDES) $(BIN_DIR)/$(TARGET)/runtime.a
12411271
@mkdir -p $(@D)
@@ -1502,9 +1532,12 @@ test_bazel: $(DISTRIB_DIR)/halide.tgz
15021532
bazel build --verbose_failures :all
15031533

15041534
.PHONY: test_python
1505-
test_python: $(LIB_DIR)/libHalide.a $(INCLUDE_DIR)/Halide.h
1506-
mkdir -p python_bindings
1507-
make -C python_bindings -f $(ROOT_DIR)/python_bindings/Makefile test
1535+
test_python: distrib
1536+
make -C $(ROOT_DIR)/python_bindings \
1537+
-f $(ROOT_DIR)/python_bindings/Makefile \
1538+
test \
1539+
HALIDE_DISTRIB_PATH=$(CURDIR)/$(DISTRIB_DIR) \
1540+
BIN=$(CURDIR)/$(BIN_DIR)/python_bindings
15081541

15091542
# It's just for compiling the runtime, so earlier clangs *might* work,
15101543
# but best to peg it to the minimum llvm version.
@@ -1528,6 +1561,10 @@ ifneq (,$(findstring clang version 6.0,$(CLANG_VERSION)))
15281561
CLANG_OK=yes
15291562
endif
15301563

1564+
ifneq (,$(findstring clang version 7.0,$(CLANG_VERSION)))
1565+
CLANG_OK=yes
1566+
endif
1567+
15311568
ifneq (,$(findstring Apple LLVM version 5.0,$(CLANG_VERSION)))
15321569
CLANG_OK=yes
15331570
endif
@@ -1548,7 +1585,7 @@ $(BUILD_DIR)/clang_ok:
15481585
@exit 1
15491586
endif
15501587

1551-
ifneq (,$(findstring $(LLVM_VERSION_TIMES_10), 40 50 60))
1588+
ifneq (,$(findstring $(LLVM_VERSION_TIMES_10), 40 50 60 70))
15521589
LLVM_OK=yes
15531590
endif
15541591

README.md

+7-7
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ scheduling directive. To enable the `hexagon` scheduling directive, include the
329329
combination of targets is to use the HVX target features with an x86 linux
330330
host (to use the simulator) or with an ARM android target (to use Hexagon DSP hardware).
331331
For examples of using the `hexagon` scheduling directive on both the simulator and a
332-
Hexagon DSP, see the HelloHexagon example app.
332+
Hexagon DSP, see the blur example app.
333333

334334
To build and run an example app using the Hexagon target,
335335
1. Obtain and build LLVM and Clang v4.0 or later from llvm.org
@@ -368,14 +368,14 @@ Go to https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
368368
In addition to running Hexagon code on device, Halide also supports running Hexagon
369369
code on the simulator from the Hexagon tools.
370370

371-
To build and run the HelloHexagon example in Halide/apps/HelloHexagon on the simulator:
371+
To build and run the blur example in Halide/apps/blur on the simulator:
372372

373-
cd apps/HelloHexagon
373+
cd apps/blur
374374
export HL_HEXAGON_SIM_REMOTE=../../src/runtime/hexagon_remote/bin/v60/hexagon_sim_remote
375375
export HL_HEXAGON_TOOLS=$SDK_LOC/Hexagon_Tools/8.0/Tools/
376-
LD_LIBRARY_PATH=../../src/runtime/hexagon_remote/bin/host/:$HL_HEXAGON_TOOLS/lib/iss/:. make run-host
376+
LD_LIBRARY_PATH=../../src/runtime/hexagon_remote/bin/host/:$HL_HEXAGON_TOOLS/lib/iss/:. HL_TARGET=host-hvx_128 make test
377377

378-
#### To build and run the HelloHexagon example in Halide/apps/HelloHexagon on Android:
378+
#### To build and run the blur example in Halide/apps/blur on Android:
379379

380380
To build the example for Android, first ensure that you have a standalone toolchain
381381
created from the NDK using the make-standalone-toolchain.sh script:
@@ -384,7 +384,7 @@ created from the NDK using the make-standalone-toolchain.sh script:
384384
export ANDROID_ARM64_TOOLCHAIN=<path to put new arm64 toolchain>
385385
$ANDROID_NDK_HOME/build/tools/make-standalone-toolchain.sh --arch=arm64 --platform=android-21 --install-dir=$ANDROID_ARM64_TOOLCHAIN
386386

387-
Now build and run the HelloHexagon example:
387+
Now build and run the blur example using the script to run it on device:
388388

389389
export HL_HEXAGON_TOOLS=$SDK_LOC/HEXAGON_Tools/8.0/Tools/
390-
make run-arm-64-android
390+
HL_TARGET=arm-64-android-hvx_128 ./adb_run_on_device.sh

apps/HelloHexagon/Makefile

-66
This file was deleted.

0 commit comments

Comments
 (0)