Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Oneflow-Inc
oneflow
提交
ea2ad7f0
O
oneflow
项目概览
Oneflow-Inc
/
oneflow
上一次同步 2 年多
通知
13
Star
2733
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
oneflow
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
ea2ad7f0
编写于
10月 10, 2017
作者:
W
willzhang4a58
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix bug for ctrl server, not response for pushplan
上级
c259b77a
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
20 addition
and
14 deletion
+20
-14
examples/mnist/train/job.prototxt
examples/mnist/train/job.prototxt
+1
-0
examples/mnist/train/resource.prototxt
examples/mnist/train/resource.prototxt
+2
-2
oneflow/core/control/ctrl_server.cpp
oneflow/core/control/ctrl_server.cpp
+10
-6
oneflow/core/persistence/file_system.cpp
oneflow/core/persistence/file_system.cpp
+2
-2
oneflow/core/persistence/hadoop/hadoop_file_system.cpp
oneflow/core/persistence/hadoop/hadoop_file_system.cpp
+2
-2
scripts/run_scheduler.sh
scripts/run_scheduler.sh
+3
-2
未找到文件。
examples/mnist/train/job.prototxt
浏览文件 @
ea2ad7f0
...
...
@@ -7,6 +7,7 @@ default_data_type: kFloat
use_async_cpu_stream: false
global_fs_conf {
hdfs_conf {
namenode: "hdfs://192.168.1.11:9000"
}
}
train_conf {
...
...
examples/mnist/train/resource.prototxt
浏览文件 @
ea2ad7f0
machine {
addr: "192.168.1.11"
port:
9000
port:
6666
name: "192.168.1.11"
}
machine {
addr: "192.168.1.13"
port:
9000
port:
6666
name: "192.168.1.13"
}
...
...
oneflow/core/control/ctrl_server.cpp
浏览文件 @
ea2ad7f0
...
...
@@ -131,16 +131,18 @@ void CtrlServer::WaitUntilDoneHandler(
void
CtrlServer
::
PushPlanHandler
(
CtrlCall
<
PushPlanRequest
,
PushPlanResponse
>*
call
)
{
plan_
.
reset
(
new
Plan
(
call
->
request
().
plan
()));
for
(
auto
call
:
pending_plan_calls_
)
{
*
(
call
->
mut_response
()
->
mutable_plan
())
=
*
plan_
;
call
->
SendResponse
();
for
(
auto
pending_
call
:
pending_plan_calls_
)
{
*
(
pending_
call
->
mut_response
()
->
mutable_plan
())
=
*
plan_
;
pending_
call
->
SendResponse
();
}
call
->
SendResponse
();
ENQUEUE_REQUEST
(
PushPlan
);
}
void
CtrlServer
::
ClearPlanHandler
(
CtrlCall
<
ClearPlanRequest
,
ClearPlanResponse
>*
call
)
{
plan_
.
reset
();
call
->
SendResponse
();
ENQUEUE_REQUEST
(
ClearPlan
);
}
...
...
@@ -158,16 +160,18 @@ void CtrlServer::PullPlanHandler(
void
CtrlServer
::
PushPortHandler
(
CtrlCall
<
PushPortRequest
,
PushPortResponse
>*
call
)
{
port_
=
call
->
request
().
port
();
for
(
auto
call
:
pending_port_calls_
)
{
call
->
mut_response
()
->
set_port
(
port_
);
call
->
SendResponse
();
for
(
auto
pending_
call
:
pending_port_calls_
)
{
pending_
call
->
mut_response
()
->
set_port
(
port_
);
pending_
call
->
SendResponse
();
}
call
->
SendResponse
();
ENQUEUE_REQUEST
(
PushPort
);
}
void
CtrlServer
::
ClearPortHandler
(
CtrlCall
<
ClearPortRequest
,
ClearPortResponse
>*
call
)
{
port_
=
-
1
;
call
->
SendResponse
();
ENQUEUE_REQUEST
(
ClearPort
);
}
...
...
oneflow/core/persistence/file_system.cpp
浏览文件 @
ea2ad7f0
...
...
@@ -2,6 +2,7 @@
#include <errno.h>
#include "oneflow/core/common/str_util.h"
#include "oneflow/core/job/job_desc.h"
#include "oneflow/core/persistence/hadoop/hadoop_file_system.h"
#include "oneflow/core/persistence/posix/posix_file_system.h"
#include "oneflow/core/persistence/windows/windows_file_system.h"
...
...
@@ -91,8 +92,7 @@ struct GlobalFSConstructor {
CHECK_EQ
(
JobDesc
::
Singleton
()
->
resource
().
machine
().
size
(),
1
);
gfs
=
LocalFS
();
}
else
if
(
gfs_conf
.
has_hdfs_conf
())
{
// static fs::FileSystem* fs = new
// fs::HadoopFileSystem(gfs_conf.hdfs_conf()); return fs;
gfs
=
new
HadoopFileSystem
(
gfs_conf
.
hdfs_conf
());
}
else
{
UNEXPECTED_RUN
();
}
...
...
oneflow/core/persistence/hadoop/hadoop_file_system.cpp
浏览文件 @
ea2ad7f0
...
...
@@ -128,9 +128,9 @@ void LibHDFS::LoadAndBind() {
#else
const
char
*
kLibHdfsDso
=
"libhdfs.so"
;
#endif
char
*
hdfs_home
=
getenv
(
"HADOOP_H
DFS_H
OME"
);
char
*
hdfs_home
=
getenv
(
"HADOOP_HOME"
);
if
(
hdfs_home
==
nullptr
)
{
PLOG
(
WARNING
)
<<
"Environment variable HADOOP_H
DFS_H
OME not set"
;
PLOG
(
WARNING
)
<<
"Environment variable HADOOP_HOME not set"
;
status_
=
false
;
return
;
}
...
...
scripts/run_scheduler.sh
浏览文件 @
ea2ad7f0
...
...
@@ -8,13 +8,14 @@ SCHEDULER_CMD='GLOG_logtostderr=0 GLOG_log_dir=./log GLOG_v=0 GLOG_logbuflevel=-
set
+e
for
host
in
"
${
hosts
[@]
}
"
do
ssh
$USER
@
$host
"/usr/sbin/fuser -k 9000/tcp"
ssh
$USER
@
$host
"/usr/sbin/fuser -k 6666/tcp"
ssh
$USER
@
$host
"mkdir ~/oneflow_temp"
done
set
-e
for
host
in
"
${
hosts
[@]
}
"
do
ssh
$USER
@
$host
'rm -rf ~/oneflow_temp
&& mkdir ~/oneflow_temp
'
ssh
$USER
@
$host
'rm -rf ~/oneflow_temp
/*
'
scp ./compiler ./runtime ./scheduler ./
*
.prototxt
$USER
@
$host
:~/oneflow_temp
ssh
$USER
@
$host
"cd ~/oneflow_temp;
$SCHEDULER_CMD
-this_machine_name=
$host
&"
done
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录