Update README and deploy yaml

thomassong · thomassong · commit 088928cbcaae · 2019-11-28T14:41:57.000+08:00
Signed-off-by: thomassong &lt;thomassong@tencent.com&gt;
diff --git a/Makefile b/Makefile
@@ -6,11 +6,6 @@ all:
 clean:
 	rm -rf ./go
 
-.PHONY: vendor
-vendor:
-	rm -rf vendor
-	hack/glide.sh
-
 .PHONY: test
 test:
 	hack/build.sh "test"
diff --git a/README.md b/README.md
@@ -1,18 +1,41 @@
 # GPU Manager
 
+[![Build Status](https://travis-ci.org/tkestack/gpu-manager.svg?branch=master)](https://travis-ci.org/tkestack/gpu-manager)
+
 GPU Manager is used for managing the nvidia GPU devices in Kubernetes cluster. It implements the `DevicePlugin` interface
 of Kubernetes. So it's compatible with 1.9+ of Kubernetes release version. 
 
 To compare with the combination solution of `nvidia-docker`
 and `nvidia-k8s-plugin`, GPU manager will use native `runc` without modification but nvidia solution does.
 Besides we also support metrics report without deploying new components. 
 
-To schedule a GPU payload correctly, GPU manager should work with `gpu-quota-admission` which is a kubernetes scheduler plugin.
+To schedule a GPU payload correctly, GPU manager should work with [gpu-admission](https://github.com/tkestack/gpu-admission) which is a
+ kubernetes scheduler plugin.
 
 GPU manager also supports the payload with fraction resource of GPU device such as 0.1 card or 100MiB gpu device memory.
 If you want this kind feature, please refer to [vcuda-controller](https://github.com/tkestack/vcuda-controller) project.
 
-# How to deploy GPU Manager
+## Build
+
+**1.** Build binary
+
+- Prerequisite
+   - CUDA toolkit
+    
+```
+make
+```
+
+**2.** Build image
+
+- Prerequisite
+    - Docker
+
+```
+make img
+```
+
+## Deploy
 
 GPU Manager is running as daemonset, and because of the RABC restriction and hydrid cluster,
 you need to do the following steps to make this daemonset run correctly.
@@ -30,6 +53,63 @@ kubectl create clusterrolebinding gpu-manager-role --clusterrole=cluster-admin -
 kubectl label node <node> nvidia-device-enable=enable
 ```
 
-- change gpu-manager.yaml and submit
+## Pod template example
+
+There is nothing special to submit a Pod except the description of GPU resource is no longer 1
+. The GPU
+resources are described as that 100 `tencent.com/vcuda-core` for 1 GPU and N `tencent.com/vcuda
+-memory` for GPU memory (1 tencent.com/vcuda-memory means 256Mi
+GPU memory). And because of the limitation of extend resource validation of Kubernetes, to support
+GPU utilization limitation, you should add `tencent.com/vcuda-core-limit: XX` in the annotation
+ field of a Pod.
+ 
+ **Notice: the value of `tencent.com/vcuda-core` is either the multiple of 100 or any value
+smaller than 100.For example, 100, 200 or 20 is valid value but 150 or 250 is invalid**
+
+- Submit a Pod with 0.3 GPU utilization and 7680MiB GPU memory with 0.5 GPU utilization limit
 
-change --incluster-mode from `false` to `true`, change image field to `<your repository>/public/gpu-manager:latest`, add serviceAccount filed to `gpu-manager-role`
+```
+apiVersion: v1
+kind: Pod
+metadata:
+  name: vcuda
+  annotation:
+    tencent.com/vcuda-core-limit: 50
+spec:
+  restartPolicy: Never
+  hostNetwork: true
+  containers:
+  - image: <test-image>
+    name: nvidia
+    command: ['/usr/local/nvidia/bin/nvidia-smi']
+    resources:
+      requests:
+        tencent.com/vcuda-core: 50
+        tencent.com/vcuda-memory: 30
+      limits:
+        tencent.com/vcuda-core: 50
+        tencent.com/vcuda-memory: 30
+```
+
+- Submit a Pod with 2 GPU card
+
+```
+apiVersion: v1
+kind: Pod
+metadata:
+  name: vcuda
+spec:
+  restartPolicy: Never
+  hostNetwork: true
+  containers:
+  - image: <test-image>
+    name: nvidia
+    command: ['/usr/local/nvidia/bin/nvidia-smi']
+    resources:
+      requests:
+        tencent.com/vcuda-core: 200
+        tencent.com/vcuda-memory: 60
+      limits:
+        tencent.com/vcuda-core: 200
+        tencent.com/vcuda-memory: 60
+```
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.2.0
+1.0.3
diff --git a/gpu-manager.yaml b/gpu-manager.yaml
@@ -15,6 +15,7 @@ spec:
       labels:
         name: gpu-manager-ds
     spec:
+      serviceAccount: gpu-manager
       tolerations:
         # This toleration is deprecated. Kept here for backward compatibility
         # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
@@ -33,7 +34,7 @@ spec:
         nvidia-device-enable: enable
       hostPID: true
       containers:
-        - image: gpu-manager:latest
+        - image: tkestack/gpu-manager:1.0.3
           imagePullPolicy: Always
           name: gpu-manager
           securityContext:
@@ -62,7 +63,7 @@ spec:
             - name: LOG_LEVEL
               value: "4"
             - name: EXTRA_FLAGS
-              value: "--incluster-mode=false"
+              value: "--incluster-mode=true"
             - name: NODE_NAME
               valueFrom:
                 fieldRef:
diff --git a/hack/build.sh b/hack/build.sh
@@ -42,7 +42,7 @@ function plugin::build_binary() {
 function plugin::generate_img() {
   readonly local commit=$(git log --no-merges --oneline | wc -l | sed -e 's,^[ \t]*,,')
   readonly local version=$(<"${ROOT}/VERSION")
-  readonly local base_img=${BASE_IMG:-"centos:7"}
+  readonly local base_img=${BASE_IMG:-"tkestack/vcuda:1.0"}
 
   mkdir -p "${ROOT}/go/build"
   tar czf "${ROOT}/go/build/gpu-manager-source.tar.gz" --transform 's,^,/gpu-manager-'${version}'/,' $(plugin::source_targets)
@@ -55,7 +55,7 @@ function plugin::generate_img() {
         --build-arg version=${version} \
         --build-arg commit=${commit} \
         --build-arg base_img=${base_img} \
-        -t $IMAGE_FILE .
+        -t "${IMAGE_FILE}:${version}" .
   )
 }
 
diff --git a/hack/common.sh b/hack/common.sh
@@ -3,7 +3,7 @@
 readonly PACKAGE="tkestack.io/gpu-manager"
 readonly BUILD_IMAGE_REPO=plugin-build
 readonly LOCAL_OUTPUT_IMAGE_STAGING="${ROOT}/go/images"
-readonly IMAGE_FILE=${IMAGE_FILE:-"gpu-manager:latest"}
+readonly IMAGE_FILE=${IMAGE_FILE:-"tkestack/gpu-manager"}
 readonly PROTO_IMAGE="proto-generater"
 
 function plugin::cleanup() {
@@ -75,4 +75,4 @@ function plugin::fmt_targets() {
     )
   )
   echo "${targets[@]}"
-}
+}

Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@`
`3`	`3`	`readonly PACKAGE="tkestack.io/gpu-manager"`
`4`	`4`	`readonly BUILD_IMAGE_REPO=plugin-build`
`5`	`5`	`readonly LOCAL_OUTPUT_IMAGE_STAGING="${ROOT}/go/images"`
`6`		`-readonly IMAGE_FILE=${IMAGE_FILE:-"gpu-manager:latest"}`
	`6`	`+readonly IMAGE_FILE=${IMAGE_FILE:-"tkestack/gpu-manager"}`
`7`	`7`	`readonly PROTO_IMAGE="proto-generater"`
`8`	`8`
`9`	`9`	`function plugin::cleanup() {`
`@@ -75,4 +75,4 @@ function plugin::fmt_targets() {`
`75`	`75`	`)`
`76`	`76`	`)`
`77`	`77`	`echo "${targets[@]}"`
`78`		`-}`
	`78`	`+}`