File tree Expand file tree Collapse file tree 4 files changed +23
-1
lines changed Expand file tree Collapse file tree 4 files changed +23
-1
lines changed Original file line number Diff line number Diff line change @@ -316,6 +316,9 @@ The following environment variables are available for tailoring the MedCAT Servi
316316- ` SERVER_PORT ` - the port number used (default: ` 5000 ` ),
317317- ` SERVER_WORKERS ` - the number of workers serving the Flask app working in parallel (default: ` 1 ` ; only used in production server).
318318- ` SERVER_WORKER_TIMEOUT ` - the max timeout (in sec) for receiving response from worker (default: ` 300 ` ; only used with production server).
319+ - ` SERVER_GUNICORN_MAX_REQUESTS ` - maximum number of requests a worker will process before restarting (default: ` 1000 ` ),
320+ - ` SERVER_GUNICORN_MAX_REQUESTS_JITTER ` - adds randomness to ` MAX_REQUESTS ` to avoid all workers restarting simultaneously (default: ` 50 ` ),
321+ - ` SERVER_GUNICORN_EXTRA_ARGS ` - any additional Gunicorn CLI arguments you want to pass (default: none). (Example value: "SERVER_GUNICORN_EXTRA_ARGS=--backlog 20")
319322
320323The following environment variables are available for tailoring the MedCAT Service wrapper:
321324
Original file line number Diff line number Diff line change @@ -36,11 +36,13 @@ SERVER_PORT=5000
3636SERVER_WORKERS = 1
3737SERVER_WORKER_TIMEOUT = 300
3838SERVER_THREADS = 1
39+ SERVER_GUNICORN_MAX_REQUESTS = 1000
40+ SERVER_GUNICORN_MAX_REQUESTS_JITTER = 50
3941
4042# set the number of torch threads, this should be used ONLY if you are using CPUs and the default image
4143# set to -1 or 0 if you are using GPU
4244APP_TORCH_THREADS = 8
4345
4446# GPU SETTING
4547# CAUTION, use only if you are using the GPU docker image.
46- APP_CUDA_DEVICE_COUNT = 1
48+ APP_CUDA_DEVICE_COUNT = - 1
Original file line number Diff line number Diff line change @@ -36,6 +36,8 @@ SERVER_PORT=5000
3636SERVER_WORKERS = 1
3737SERVER_WORKER_TIMEOUT = 300
3838SERVER_THREADS = 1
39+ SERVER_GUNICORN_MAX_REQUESTS = 1000
40+ SERVER_GUNICORN_MAX_REQUESTS_JITTER = 50
3941
4042# set the number of torch threads, this should be used ONLY if you are using CPUs and the default image
4143# set to -1 or 0 if you are using GPU
Original file line number Diff line number Diff line change @@ -33,13 +33,25 @@ if [ -z ${SERVER_WORKER_TIMEOUT+x} ]; then
3333 echo " SERVER_WORKER_TIMEOUT is unset -- setting to default (sec): $SERVER_WORKER_TIMEOUT " ;
3434fi
3535
36+ if [ -z ${SERVER_GUNICORN_MAX_REQUESTS+x} ]; then
37+ SERVER_GUNICORN_MAX_REQUESTS=1000;
38+ echo " SERVER_GUNICORN_MAX_REQUESTS is unset -- setting to default: $SERVER_GUNICORN_MAX_REQUESTS " ;
39+ fi
40+
41+ if [ -z ${SERVER_GUNICORN_MAX_REQUESTS_JITTER+x} ]; then
42+ SERVER_GUNICORN_MAX_REQUESTS_JITTER=50;
43+ echo " SERVER_GUNICORN_MAX_REQUESTS_JITTER is unset -- setting to default: $SERVER_GUNICORN_MAX_REQUESTS_JITTER " ;
44+ fi
45+
3646# Note - SERVER_ACCESS_LOG_FORMAT is unused when worker-class is set to UvicornWorker
3747SERVER_ACCESS_LOG_FORMAT=" %(t)s [ACCESS] %(h)s \" %(r)s\" %(s)s \" %(f)s\" \" %(a)s\" "
3848
3949# start the server
4050#
4151# Using Gunicorn, even though FastAPI recommends Uvicorn, to keep support for the post_fork config
4252echo " Starting up the service using gunicorn server ..."
53+ set -x
54+
4355exec gunicorn \
4456 --bind " $SERVER_HOST :$SERVER_PORT " \
4557 --workers=" $SERVER_WORKERS " \
@@ -50,5 +62,8 @@ exec gunicorn \
5062 --error-logfile=- \
5163 --log-level info \
5264 --config /cat/config.py \
65+ --max-requests=" $SERVER_GUNICORN_MAX_REQUESTS " \
66+ --max-requests-jitter=" $SERVER_GUNICORN_MAX_REQUESTS_JITTER " \
67+ ${SERVER_GUNICORN_EXTRA_ARGS:- } \
5368 --worker-class uvicorn.workers.UvicornWorker \
5469 medcat_service.main:app
You can’t perform that action at this time.
0 commit comments