Skip to content
GitLab
Explore
Projects
Groups
Topics
Snippets
Projects
Groups
Topics
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Register
Sign in
Toggle navigation
Menu
anuvaad
OpenNMT-py
Commits
e723f2aa
Unverified
Commit
e723f2aa
authored
6 years ago
by
Vincent Nguyen
Committed by
GitHub
6 years ago
Browse files
Options
Download
Plain Diff
Merge pull request #926 from vince62s/fix-bptt
Fix bptt cf #891
parents
57df7434
a085d858
master
dependabot/pip/flask-cors-3.0.9
develop
exp_to_develop
experiments
release_anuvaad
release_anuvaad_users
revert-1292-fix/library-example-docs
0.8.2
0.8.1
0.8.0
0.7.2
0.7.1
0.7.0
0.6.0
0.5.0
0.4.1
0.4.0
0.3.0
0.2.1
v1.0.0
v0.2
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
onmt/trainer.py
+22
-11
onmt/trainer.py
train.py
+3
-0
train.py
with
25 additions
and
11 deletions
+25
-11
onmt/trainer.py
+
22
−
11
View file @
e723f2aa
...
...
@@ -254,7 +254,7 @@ class Trainer(object):
for
batch
in
true_batchs
:
target_size
=
batch
.
tgt
.
size
(
0
)
# Truncated BPTT
# Truncated BPTT
: reminder not compatible with accum > 1
if
self
.
trunc_size
:
trunc_size
=
self
.
trunc_size
else
:
...
...
@@ -287,20 +287,31 @@ class Trainer(object):
total_stats
.
update
(
batch_stats
)
report_stats
.
update
(
batch_stats
)
# 4. Update the parameters and statistics.
if
self
.
grad_accum_count
==
1
:
# Multi GPU gradient gather
if
self
.
n_gpu
>
1
:
grads
=
[
p
.
grad
.
data
for
p
in
self
.
model
.
parameters
()
if
p
.
requires_grad
and
p
.
grad
is
not
None
]
onmt
.
utils
.
distributed
.
all_reduce_and_rescale_tensors
(
grads
,
float
(
1
))
self
.
optim
.
step
()
# If truncated, don't backprop fully.
if
dec_state
is
not
None
:
dec_state
.
detach
()
#
3.bis M
ulti
GPU
gradient
gather
if
self
.
n_gpu
>
1
:
grads
=
[
p
.
grad
.
data
for
p
in
self
.
model
.
parameters
()
if
p
.
requires_grad
and
p
.
grad
is
not
None
]
onmt
.
utils
.
distributed
.
all_reduce_and_rescale_tensors
(
grads
,
float
(
1
))
# 4. Update the parameters and statistics.
self
.
optim
.
step
()
#
in case of m
ulti
step
gradient
accumulation,
# update only after accum batches
if
self
.
grad_accum_count
>
1
:
if
self
.
n_gpu
>
1
:
grads
=
[
p
.
grad
.
data
for
p
in
self
.
model
.
parameters
()
if
p
.
requires_grad
and
p
.
grad
is
not
None
]
onmt
.
utils
.
distributed
.
all_reduce_and_rescale_tensors
(
grads
,
float
(
1
))
self
.
optim
.
step
()
def
_start_report_manager
(
self
,
start_time
=
None
):
"""
...
...
This diff is collapsed.
Click to expand it.
train.py
+
3
−
0
View file @
e723f2aa
...
...
@@ -18,6 +18,9 @@ def main(opt):
if
opt
.
epochs
:
raise
AssertionError
(
"-epochs is deprecated please use -train_steps."
)
if
opt
.
truncated_decoder
>
0
and
opt
.
accum_count
>
1
:
raise
AssertionError
(
"BPTT is not compatible with -accum > 1"
)
if
len
(
opt
.
gpuid
)
>
1
:
multi_main
(
opt
)
else
:
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment
Menu
Explore
Projects
Groups
Topics
Snippets