提交 6eecda17 编写于 作者: L liqingping

docs: update docs and server ports

上级 c41397a8
variables:
REGISTRY: registry.sensetime.com/cloudnative4ai
VERSION: v1.0.0
VERSION: v1.0.1
PROJECT: di-orchestrator
# dind config
DOCKER_HOST: tcp://localhost:2376
......
# di-operator version
VERSION ?= v1.0.0
VERSION ?= v1.0.1
MASTER_VERSION := $(VERSION)
COMMIT_SHORT_SHA=$(shell git log -n 1 | head -n 1 | sed -e 's/^commit //' | head -c 8)
......
......@@ -29,14 +29,34 @@ di-server-7b86ff8df4-jfgmp 1/1 Running 0 59s
```bash
# submit DIJob
$ kubectl create -f config/samples/dijob-cartpole.yaml
$ kubectl create -f config/samples/dijob-gobigger.yaml
# get pod and you will see coordinator is created by di-operator
# a few seconds later, you will see collectors and learners created by di-server
$ kubectl get pod
NAME READY STATUS RESTARTS AGE
gobigger-test-0-0 1/1 Running 0 4m17s
gobigger-test-0-1 1/1 Running 0 4m17s
# get logs of coordinator
$ kubectl logs cartpole-dqn-coordinator
$ kubectl logs -n xlab gobigger-test-0-0
Bind subprocesses on these addresses: ['tcp://10.148.3.4:22270',
'tcp://10.148.3.4:22271']
[Warning] no enough data: 128/0
...
[Warning] no enough data: 128/120
Current Training: Train Iter(0) Loss(102.256)
Current Training: Train Iter(0) Loss(103.133)
Current Training: Train Iter(20) Loss(28.795)
Current Training: Train Iter(20) Loss(32.837)
...
Current Training: Train Iter(360) Loss(12.850)
Current Training: Train Iter(340) Loss(11.812)
Current Training: Train Iter(380) Loss(12.892)
Current Training: Train Iter(360) Loss(13.621)
Current Training: Train Iter(400) Loss(15.183)
Current Training: Train Iter(380) Loss(14.187)
Current Evaluation: Train Iter(404) Eval Reward(-1788.326)
```
## User Guide
......
......@@ -46,7 +46,7 @@ func NewCreateOptions(genFlags cmdcommon.GenericFlags) *CreateOptions {
GenericFlags: genFlags,
ServerBindAddress: ":8081",
ProbeAddress: ":8080",
MetricAddress: ":8089",
MetricAddress: ":8443",
}
}
......
......@@ -4260,9 +4260,9 @@ metadata:
namespace: di-system
spec:
ports:
- port: 8080
- port: 8081
protocol: TCP
targetPort: 8080
targetPort: 8081
selector:
control-plane: di-server
---
......@@ -4295,7 +4295,7 @@ spec:
envFrom:
- configMapRef:
name: di-config
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.0
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.1
imagePullPolicy: Always
livenessProbe:
httpGet:
......@@ -4348,7 +4348,7 @@ spec:
envFrom:
- configMapRef:
name: di-config
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.0
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.1
imagePullPolicy: Always
livenessProbe:
httpGet:
......
......@@ -30,7 +30,7 @@ spec:
- "--probe-addr=:8080"
- "--metric-addr=:8443"
- "--leader-elect"
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.0
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.1
imagePullPolicy: Always
name: manager
envFrom:
......
......@@ -21,7 +21,7 @@ spec:
args:
- --zap-devel=true
- --server-bind-address=:8081
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.0
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.1
imagePullPolicy: Always
name: server
envFrom:
......
......@@ -22,7 +22,7 @@ spec:
- "--probe-addr=:8080"
- "--metric-addr=:8443"
- --port=9443
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.0
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.1
imagePullPolicy: Always
name: webhook
securityContext:
......
......@@ -8,4 +8,4 @@ kind: Kustomization
images:
- name: registry.sensetime.com/cloudnative4ai/di-orchestrator
newName: registry.sensetime.com/cloudnative4ai/di-orchestrator
newTag: v1.0.0
newTag: v1.0.1
......@@ -8,5 +8,5 @@ spec:
control-plane: di-server
ports:
- protocol: TCP
port: 8080
targetPort: 8080
port: 8081
targetPort: 8081
......@@ -326,7 +326,7 @@ spec:
command:
- /di-orchestrator
- operator
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.0
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.1
imagePullPolicy: Always
livenessProbe:
httpGet:
......@@ -376,7 +376,7 @@ spec:
command:
- /di-orchestrator
- server
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.0
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.1
imagePullPolicy: Always
livenessProbe:
httpGet:
......@@ -421,7 +421,7 @@ spec:
command:
- /di-orchestrator
- webhook
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.0
image: registry.sensetime.com/cloudnative4ai/di-orchestrator:v1.0.1
imagePullPolicy: Always
livenessProbe:
httpGet:
......
此差异已折叠。
......@@ -151,13 +151,13 @@ The following http interfaces are provided:
| method | path | description |
|---|---|---|
| GET | /v1alpha2/replicas | list all collectors and learners |
| GET | /v1alpha2/replicas?namespace=xxx | list all collectors and learners in namespace |
| GET | /v1alpha2/replicas?namespace=xxx&coordinator=xxx | list all replicas belongs to coordinator |
| GET | /v1alpha2/replicas?namespace=xxx&aggregator=xxx | get learners belongs to aggregator |
| DELETE | /v1alpha2/replicas | delete some replicas. put data in request body |
| POST | /v1alpha2/replicas | create replicas. put data in request body |
| POST | /v1alpha2/replicas/failed | post failed replicas and request for recreation. put data in request body |
| GET | /v2alpha1/replicas | list all collectors and learners |
| GET | /v2alpha1/replicas?namespace=xxx | list all collectors and learners in namespace |
| GET | /v2alpha1/replicas?namespace=xxx&coordinator=xxx | list all replicas belongs to coordinator |
| GET | /v2alpha1/replicas?namespace=xxx&aggregator=xxx | get learners belongs to aggregator |
| DELETE | /v2alpha1/replicas | delete some replicas. put data in request body |
| POST | /v2alpha1/replicas | create replicas. put data in request body |
| POST | /v2alpha1/replicas/failed | post failed replicas and request for recreation. put data in request body |
## Advantages of DI Orchestrator
......
此差异已折叠。
......@@ -149,11 +149,10 @@ type Phase string
const (
// JobPending means the job has been submitted to the cluster,
// but not all the pods and services have been created,
// or not pods are running
// but not all the pods and services have been created
JobPending Phase = "Pending"
// JobStarted means the job has been scheduled and waits for running.
// JobStarted means the job has been created and waits for running.
JobStarting Phase = "Starting"
// JobRestarting means the job has been rescheduled and waits for restarting.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册