-
Notifications
You must be signed in to change notification settings - Fork 234
Description
Describe the issue
When running 'make run_server' to build version 0.0.2, the build fails with a DeviceFree: out of memory error after a few minutes. I am using a new server with no other processes running.
Reproduce the issue
NVIDIA driver version: 390.48
Cuda version: release 9.1, V9.1.85
golang version: 1.13
gcc version: 5.4.0
cmake version: 3.15.4
Follow the instructions to compile Aresdb version 0.0.2 through 'run make_server'
Error message
[ 15%] Built target mem
[100%] Built target algorithm
[100%] Built target lib
[100%] Built target aresd
Using config file: config/ares.yaml
{"level":"info","msg":"Bootstrapping service","config":{"Port":9374,"DebugPort":43202,"RootPath":"ares-root","TotalMemorySize":161061273600,"SchedulerOff":false,"Version":"","Env":"","Query":{"DeviceMemoryUtilization":0.95,"DeviceChoosingTimeout":10,"TimezoneTable":{"TableName":"api_cities"},"EnableHashReduction":false},"DiskStore":{"WriteSync":true},"HTTP":{"MaxConnections":300,"ReadTimeOutInSeconds":20,"WriteTimeOutInSeconds":300},"RedoLogConfig":{"DiskConfig":{"Disabled":false},"KafkaConfig":{"Enabled":false,"Brokers":null,"TopicSuffix":""},"DiskOnlyForUnsharded":false},"Cluster":{"Enable":false,"Distributed":false,"Namespace":"","InstanceID":"","Controller":{"Address":"localhost:6708","Headers":null,"TimeoutSec":0},"Etcd":{"Zone":"local","Env":"dev","Service":"ares-datanode","CacheDir":"","ETCDClusters":[{"Zone":"local","Endpoints":["127.0.0.1:2379"],"KeepAlive":null,"TLS":null}],"SDConfig":{"InitTimeout":null},"WatchWithRevision":0},"HeartbeatConfig":{"Timeout":10,"Interval":1}}}}
panic: ERROR when calling CUDA functions: DeviceFree: out of memory
goroutine 1 [running]:
github.com/uber/aresdb/utils.StackError(0x0, 0x0, 0xc00004e040, 0x3d, 0x0, 0x0, 0x0, 0x0)
/nvme1n1/go1/src/github.com/uber/aresdb/utils/error.go:61 +0x3f9
github.com/uber/aresdb/cgoutils.DoCGoCall(0xc0005b2e18, 0xc0004a44d0)
/nvme1n1/go1/src/github.com/uber/aresdb/cgoutils/utils.go:31 +0xa7
github.com/uber/aresdb/cgoutils.doCGoCall(0xc0005b2e48, 0x1)
/nvme1n1/go1/src/github.com/uber/aresdb/cgoutils/memory.go:188 +0x49
github.com/uber/aresdb/cgoutils.DeviceFree(0x0, 0x0)
/nvme1n1/go1/src/github.com/uber/aresdb/cgoutils/memory.go:111 +0x5c
github.com/uber/aresdb/cmd/aresd/cmd.start(0x249e, 0xa8c2, 0xc0005660c0, 0x9, 0x2580000000, 0x0, 0x0, 0x0, 0x0, 0x0, ...)
/nvme1n1/go1/src/github.com/uber/aresdb/cmd/aresd/cmd/cmd.go:103 +0x1c2
github.com/uber/aresdb/cmd/aresd/cmd.Execute.func1(0xc00038e000, 0x1e39648, 0x0, 0x0)
/nvme1n1/go1/src/github.com/uber/aresdb/cmd/aresd/cmd/cmd.go:85 +0x13d
github.com/spf13/cobra.(*Command).execute(0xc00038e000, 0xc00003c1d0, 0x0, 0x0, 0xc00038e000, 0xc00003c1d0)
/nvme1n1/go1/pkg/mod/github.com/spf13/cobra@v0.0.5/command.go:830 +0x2aa
github.com/spf13/cobra.(*Command).ExecuteC(0xc00038e000, 0xc0004a2050, 0x5, 0x134fe40)
/nvme1n1/go1/pkg/mod/github.com/spf13/cobra@v0.0.5/command.go:914 +0x2fb
github.com/spf13/cobra.(*Command).Execute(...)
/nvme1n1/go1/pkg/mod/github.com/spf13/cobra@v0.0.5/command.go:864
github.com/uber/aresdb/cmd/aresd/cmd.Execute(0x0, 0x0, 0x0)
/nvme1n1/go1/src/github.com/uber/aresdb/cmd/aresd/cmd/cmd.go:95 +0x229
main.main()
/nvme1n1/go1/src/github.com/uber/aresdb/cmd/aresd/main.go:20 +0x32
goroutine 1 [running]:
github.com/uber/aresdb/cgoutils.DoCGoCall(0xc0005b2e18, 0xc0004a44d0)
/nvme1n1/go1/src/github.com/uber/aresdb/cgoutils/utils.go:31 +0xc1
github.com/uber/aresdb/cgoutils.doCGoCall(0xc0005b2e48, 0x1)
/nvme1n1/go1/src/github.com/uber/aresdb/cgoutils/memory.go:188 +0x49
github.com/uber/aresdb/cgoutils.DeviceFree(0x0, 0x0)
/nvme1n1/go1/src/github.com/uber/aresdb/cgoutils/memory.go:111 +0x5c
github.com/uber/aresdb/cmd/aresd/cmd.start(0x249e, 0xa8c2, 0xc0005660c0, 0x9, 0x2580000000, 0x0, 0x0, 0x0, 0x0, 0x0, ...)
/nvme1n1/go1/src/github.com/uber/aresdb/cmd/aresd/cmd/cmd.go:103 +0x1c2
github.com/uber/aresdb/cmd/aresd/cmd.Execute.func1(0xc00038e000, 0x1e39648, 0x0, 0x0)
/nvme1n1/go1/src/github.com/uber/aresdb/cmd/aresd/cmd/cmd.go:85 +0x13d
github.com/spf13/cobra.(*Command).execute(0xc00038e000, 0xc00003c1d0, 0x0, 0x0, 0xc00038e000, 0xc00003c1d0)
/nvme1n1/go1/pkg/mod/github.com/spf13/cobra@v0.0.5/command.go:830 +0x2aa
github.com/spf13/cobra.(*Command).ExecuteC(0xc00038e000, 0xc0004a2050, 0x5, 0x134fe40)
/nvme1n1/go1/pkg/mod/github.com/spf13/cobra@v0.0.5/command.go:914 +0x2fb
github.com/spf13/cobra.(*Command).Execute(...)
/nvme1n1/go1/pkg/mod/github.com/spf13/cobra@v0.0.5/command.go:864
github.com/uber/aresdb/cmd/aresd/cmd.Execute(0x0, 0x0, 0x0)
/nvme1n1/go1/src/github.com/uber/aresdb/cmd/aresd/cmd/cmd.go:95 +0x229
main.main()
/nvme1n1/go1/src/github.com/uber/aresdb/cmd/aresd/main.go:20 +0x32
CMakeFiles/run_server.dir/build.make:57: recipe for target 'CMakeFiles/run_server' failed
make[3]: *** [CMakeFiles/run_server] Error 2
CMakeFiles/Makefile2:467: recipe for target 'CMakeFiles/run_server.dir/all' failed
make[2]: *** [CMakeFiles/run_server.dir/all] Error 2
CMakeFiles/Makefile2:474: recipe for target 'CMakeFiles/run_server.dir/rule' failed
make[1]: *** [CMakeFiles/run_server.dir/rule] Error 2
Makefile:298: recipe for target 'run_server' failed
make: *** [run_server] Error 2