add amp debug

This commit is contained in:
Colin 2022-01-14 09:16:26 +00:00
parent c8e26afd67
commit c796977b6b
6 changed files with 203 additions and 130 deletions

6
Amp/.gdbinit Normal file
View File

@ -0,0 +1,6 @@
set directory /home/colin/develop/pytorch/
set breakpoint pending on
set history filename ~/.gdb_history
set history save on
set confirm off

28
Amp/back_trace_autocast Normal file
View File

@ -0,0 +1,28 @@
#0 at::autocast::(anonymous namespace)::is_autocast_eligible (tensor=..., device_type=c10::DeviceType::CUDA)
at /home/colin/develop/pytorch/aten/src/ATen/autocast_mode.h:24
#1 0x00007fffb9799261 in at::autocast::is_eligible (arg=..., device_type=c10::DeviceType::CUDA)
at /home/colin/develop/pytorch/aten/src/ATen/autocast_mode.h:136
#2 0x00007fffb978dcec in at::autocast::cached_cast (to_type=c10::ScalarType::Half, arg=...,
device_type=c10::DeviceType::CUDA) at /home/colin/develop/pytorch/aten/src/ATen/autocast_mode.cpp:112
#3 0x00007fffb979d5d2 in at::autocast::WrapFunction_<(at::autocast::CastPolicy)0, (c10::DeviceType)1, at::Tensor (at::Tensor const&, at::Tensor const&), &at::mm, at::Tensor, c10::guts::typelist::typelist<at::Tensor const&, at::Tensor const&> >::call(at::Tensor const&, at::Tensor const&) (args#0=..., args#1=...)
at /home/colin/develop/pytorch/aten/src/ATen/autocast_mode.cpp:174
#4 0x00007fffb96a9e2f in c10::impl::detail::WrapFunctionIntoRuntimeFunctor_<at::Tensor (*)(at::Tensor const&, at::Tensor const&), at::Tensor, c10::guts::typelist::typelist<at::Tensor const&, at::Tensor const&> >::operator() (
this=0x555555f60280, args#0=..., args#1=...)
at /home/colin/develop/pytorch/aten/src/ATen/core/boxing/impl/WrapFunctionIntoRuntimeFunctor.h:18
#5 0x00007fffb96b1463 in c10::impl::wrap_kernel_functor_unboxed_<c10::impl::detail::WrapFunctionIntoRuntimeFunctor_<at::Tensor (*)(at::Tensor const&, at::Tensor const&), at::Tensor, c10::guts::typelist::typelist<at::Tensor const&, at::Tensor const&> >, at::Tensor (at::Tensor const&, at::Tensor const&)>::call(c10::OperatorKernel*, c10::DispatchKeySet, at::Tensor const&, at::Tensor const&) (functor=0x555555f60280, args#0=..., args#1=...)
at /home/colin/develop/pytorch/aten/src/ATen/core/boxing/impl/make_boxed_from_unboxed_functor.h:424
#6 0x00007fffba3aeb3c in c10::callUnboxedKernelFunction<at::Tensor, at::Tensor const&, at::Tensor const&> (
unboxed_kernel_func=0x7fffb96b13fa <c10::impl::wrap_kernel_functor_unboxed_<c10::impl::detail::WrapFunctionIntoRuntimeFunctor_<at::Tensor (*)(at::Tensor const&, at::Tensor const&), at::Tensor, c10::guts::typelist::typelist<at::Tensor const&, at::Tensor const&> >, at::Tensor (at::Tensor const&, at::Tensor const&)>::call(c10::OperatorKernel*, c10::DispatchKeySet, at::Tensor const&, at::Tensor const&)>, functor=0x555555f60280, dispatchKeySet=...)
at /home/colin/develop/pytorch/aten/src/ATen/core/boxing/KernelFunction_impl.h:57
#7 0x00007fffba86a05d in c10::KernelFunction::call<at::Tensor, at::Tensor const&, at::Tensor const&> (
dispatchKeySet=..., opHandle=..., this=0x555555f50088)
at /home/colin/develop/pytorch/aten/src/ATen/core/boxing/KernelFunction_impl.h:67
#8 c10::Dispatcher::call<at::Tensor, at::Tensor const&, at::Tensor const&>(c10::TypedOperatorHandle<at::Tensor (at::Tensor const&, at::Tensor const&)> const&, at::Tensor const&, at::Tensor const&) const (op=...,
this=0x7fffc7690520 <c10::Dispatcher::realSingleton()::_singleton>)
at /home/colin/develop/pytorch/aten/src/ATen/core/dispatch/Dispatcher.h:548
#9 c10::TypedOperatorHandle<at::Tensor (at::Tensor const&, at::Tensor const&)>::call(at::Tensor const&, at::Tensor const&) const (args#1=..., args#0=..., this=<optimized out>)
at /home/colin/develop/pytorch/aten/src/ATen/core/dispatch/Dispatcher.h:414
#10 at::_ops::mm::call (self=..., mat2=...) at /home/colin/develop/pytorch/build/aten/src/ATen/Operators_3.cpp:3328
#11 0x00007fffc85c1f2c in at::Tensor::mm (this=0x7fffffffdb00, mat2=...)
at /home/colin/develop/pytorch/build/aten/src/ATen/core/TensorBody.h:2787
#12 0x00007fffc868d046 in torch::autograd::<lambda(const at::Tensor&, const at::Tensor&)>::operator()(const at::Tensor &, const at::Tensor &) const (__closure=0x7fffffffdaf8, self=..., mat2=...)

19
Amp/log Normal file
View File

@ -0,0 +1,19 @@
b autocast_mode.h:23 if cast is support
b Dispatcher.h:548
b autocast_mode.cpp:174 do cast
p dispatchKeySet.highestPriorityTypeId() $4 = c10::DispatchKey::AutocastCUDA
p kernel unboxed_kernel_func_ = 0x7fffbc8d7792 <c10::impl::wrap_kernel_functor_unboxed_
<c10::impl::detail::WrapFunctionIntoFunctor_<c10::CompileTimeFunctionPointer
<at::Tensor(c10::DispatchKeySet, const at::Tensor&, const at::Tensor&),
torch::autograd::VariableType::(anonymous namespace)::mm>, at::Tensor,
c10::guts::typelist::typelist<c10::DispatchKeySet, const at::Tensor&,
const at::Tensor&> >, at::Tensor(c10::DispatchKeySet, const at::Tensor&, const at::Tensor&)>
::call(c10::OperatorKernel *, c10::DispatchKeySet, const at::Tensor &, const at::Tensor &)>}
# /home/colin/develop/pytorch/aten/src/ATen/core/boxing/KernelFunction_impl.h:57
# break pytorch/build/aten/src/ATen/Operators_1.cpp:3641
# break Dispatcher.cpp:75
# autocast_mode.cpp:193
# Dispatcher.h:548

View File

@ -99,28 +99,31 @@ class Net(nn.Module):
model = Net() model = Net()
# model = model.cuda() model = model.cuda()
optimizer = optim.SGD(model.parameters(), lr=0.0005) optimizer = optim.SGD(model.parameters(), lr=0.0005)
for batch_idx, (data, target) in enumerate(train_loader): for batch_idx, (data, target) in enumerate(train_loader):
# data = data.cuda() data = data.cuda()
# target = target.cuda() target = target.cuda()
optimizer.zero_grad() optimizer.zero_grad()
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], with_stack=True, record_shapes=True) as prof: # with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], with_stack=True, record_shapes=True) as prof:
with record_function("model_inference"): # with record_function("model_inference"):
with autocast(): # with autocast():
output = model(data) # output = model(data)
loss = F.nll_loss(output, target) # loss = F.nll_loss(output, target)
# # print(prof.key_averages().table(sort_by="cpu_time_total"))
# print(prof.key_averages().table(sort_by="cpu_time_total")) # print(prof.key_averages().table(sort_by="cpu_time_total"))
print(prof.key_averages().table(sort_by="cpu_time_total")) # prof.export_chrome_trace("trace.json")
prof.export_chrome_trace("trace.json")
exit() with autocast():
# 反向传播在autocast上下文之外 output = model(data)
loss.backward() loss = F.nll_loss(output, target)
optimizer.step()
# # 反向传播在autocast上下文之外
# loss.backward()
# optimizer.step()
# from torch.cuda.amp import autocast as autocast # from torch.cuda.amp import autocast as autocast

17
Amp/test_gdb.py Normal file
View File

@ -0,0 +1,17 @@
import torch
from torch.cuda.amp import autocast as autocast
from torch.profiler import profile, ProfilerActivity, record_function
data = torch.ones((300, 300))
data = data.cuda()
# with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], with_stack=True, record_shapes=True) as prof:
# with record_function("model_inference"):
# with autocast():
# output = torch.mm(data, data)
# print(prof.key_averages().table(sort_by="cpu_time_total"))
with autocast():
output = torch.mm(data, data)
print(output.cpu())

View File

@ -14,8 +14,8 @@
"traceEvents": [ "traceEvents": [
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::empty", "pid": 14436, "tid": 14436, "name": "aten::empty", "pid": 1673, "tid": 1673,
"ts": 1641800282593594, "dur": 7, "ts": 1641986274562095, "dur": 8,
"args": { "args": {
"External id": 2, "External id": 2,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -24,8 +24,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::zero_", "pid": 14436, "tid": 14436, "name": "aten::zero_", "pid": 1673, "tid": 1673,
"ts": 1641800282593614, "dur": 1, "ts": 1641986274562123, "dur": 1,
"args": { "args": {
"External id": 3, "External id": 3,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -34,8 +34,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::zeros", "pid": 14436, "tid": 14436, "name": "aten::zeros", "pid": 1673, "tid": 1673,
"ts": 1641800282593579, "dur": 38, "ts": 1641986274561890, "dur": 238,
"args": { "args": {
"External id": 1, "External id": 1,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -44,8 +44,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::empty", "pid": 14436, "tid": 14436, "name": "aten::empty", "pid": 1673, "tid": 1673,
"ts": 1641800282593648, "dur": 1, "ts": 1641986274562166, "dur": 1,
"args": { "args": {
"External id": 5, "External id": 5,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -54,8 +54,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::empty", "pid": 14436, "tid": 14436, "name": "aten::empty", "pid": 1673, "tid": 1673,
"ts": 1641800282602596, "dur": 6, "ts": 1641986274568374, "dur": 8,
"args": { "args": {
"External id": 10, "External id": 10,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -64,8 +64,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::as_strided_", "pid": 14436, "tid": 14436, "name": "aten::as_strided_", "pid": 1673, "tid": 1673,
"ts": 1641800282602862, "dur": 4, "ts": 1641986274568866, "dur": 5,
"args": { "args": {
"External id": 11, "External id": 11,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -74,8 +74,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::mkldnn_convolution", "pid": 14436, "tid": 14436, "name": "aten::mkldnn_convolution", "pid": 1673, "tid": 1673,
"ts": 1641800282593898, "dur": 8986, "ts": 1641986274562855, "dur": 6026,
"args": { "args": {
"External id": 9, "External id": 9,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -84,8 +84,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::_convolution", "pid": 14436, "tid": 14436, "name": "aten::_convolution", "pid": 1673, "tid": 1673,
"ts": 1641800282593879, "dur": 9007, "ts": 1641986274562686, "dur": 6199,
"args": { "args": {
"External id": 8, "External id": 8,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -94,8 +94,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::convolution", "pid": 14436, "tid": 14436, "name": "aten::convolution", "pid": 1673, "tid": 1673,
"ts": 1641800282593860, "dur": 9027, "ts": 1641986274562654, "dur": 6233,
"args": { "args": {
"External id": 7, "External id": 7,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -104,8 +104,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::conv2d", "pid": 14436, "tid": 14436, "name": "aten::conv2d", "pid": 1673, "tid": 1673,
"ts": 1641800282593839, "dur": 9051, "ts": 1641986274562628, "dur": 6264,
"args": { "args": {
"External id": 6, "External id": 6,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -114,8 +114,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::max_pool2d_with_indices", "pid": 14436, "tid": 14436, "name": "aten::max_pool2d_with_indices", "pid": 1673, "tid": 1673,
"ts": 1641800282603040, "dur": 428, "ts": 1641986274569358, "dur": 613,
"args": { "args": {
"External id": 13, "External id": 13,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -124,8 +124,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::max_pool2d", "pid": 14436, "tid": 14436, "name": "aten::max_pool2d", "pid": 1673, "tid": 1673,
"ts": 1641800282603015, "dur": 461, "ts": 1641986274569259, "dur": 716,
"args": { "args": {
"External id": 12, "External id": 12,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -134,8 +134,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::sigmoid", "pid": 14436, "tid": 14436, "name": "aten::sigmoid", "pid": 1673, "tid": 1673,
"ts": 1641800282603523, "dur": 365, "ts": 1641986274570156, "dur": 301,
"args": { "args": {
"External id": 14, "External id": 14,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -144,8 +144,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::empty", "pid": 14436, "tid": 14436, "name": "aten::empty", "pid": 1673, "tid": 1673,
"ts": 1641800282604168, "dur": 2, "ts": 1641986274570899, "dur": 4,
"args": { "args": {
"External id": 19, "External id": 19,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -154,8 +154,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::as_strided_", "pid": 14436, "tid": 14436, "name": "aten::as_strided_", "pid": 1673, "tid": 1673,
"ts": 1641800282604218, "dur": 0, "ts": 1641986274570972, "dur": 1,
"args": { "args": {
"External id": 20, "External id": 20,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -164,8 +164,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::mkldnn_convolution", "pid": 14436, "tid": 14436, "name": "aten::mkldnn_convolution", "pid": 1673, "tid": 1673,
"ts": 1641800282603942, "dur": 280, "ts": 1641986274570551, "dur": 429,
"args": { "args": {
"External id": 18, "External id": 18,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -174,8 +174,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::_convolution", "pid": 14436, "tid": 14436, "name": "aten::_convolution", "pid": 1673, "tid": 1673,
"ts": 1641800282603931, "dur": 293, "ts": 1641986274570531, "dur": 452,
"args": { "args": {
"External id": 17, "External id": 17,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -184,8 +184,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::convolution", "pid": 14436, "tid": 14436, "name": "aten::convolution", "pid": 1673, "tid": 1673,
"ts": 1641800282603921, "dur": 308, "ts": 1641986274570514, "dur": 476,
"args": { "args": {
"External id": 16, "External id": 16,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -194,8 +194,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::conv2d", "pid": 14436, "tid": 14436, "name": "aten::conv2d", "pid": 1673, "tid": 1673,
"ts": 1641800282603911, "dur": 319, "ts": 1641986274570497, "dur": 495,
"args": { "args": {
"External id": 15, "External id": 15,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -204,8 +204,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::max_pool2d_with_indices", "pid": 14436, "tid": 14436, "name": "aten::max_pool2d_with_indices", "pid": 1673, "tid": 1673,
"ts": 1641800282604261, "dur": 19, "ts": 1641986274571045, "dur": 22,
"args": { "args": {
"External id": 22, "External id": 22,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -214,8 +214,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::max_pool2d", "pid": 14436, "tid": 14436, "name": "aten::max_pool2d", "pid": 1673, "tid": 1673,
"ts": 1641800282604251, "dur": 31, "ts": 1641986274571027, "dur": 42,
"args": { "args": {
"External id": 21, "External id": 21,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -224,8 +224,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::sigmoid", "pid": 14436, "tid": 14436, "name": "aten::sigmoid", "pid": 1673, "tid": 1673,
"ts": 1641800282604295, "dur": 8, "ts": 1641986274571090, "dur": 16,
"args": { "args": {
"External id": 23, "External id": 23,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -234,8 +234,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::empty", "pid": 14436, "tid": 14436, "name": "aten::empty", "pid": 1673, "tid": 1673,
"ts": 1641800282605085, "dur": 2, "ts": 1641986274571929, "dur": 4,
"args": { "args": {
"External id": 28, "External id": 28,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -244,8 +244,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::as_strided_", "pid": 14436, "tid": 14436, "name": "aten::as_strided_", "pid": 1673, "tid": 1673,
"ts": 1641800282606358, "dur": 1, "ts": 1641986274573076, "dur": 1,
"args": { "args": {
"External id": 29, "External id": 29,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -254,8 +254,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::mkldnn_convolution", "pid": 14436, "tid": 14436, "name": "aten::mkldnn_convolution", "pid": 1673, "tid": 1673,
"ts": 1641800282604356, "dur": 2008, "ts": 1641986274571201, "dur": 1884,
"args": { "args": {
"External id": 27, "External id": 27,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -264,8 +264,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::_convolution", "pid": 14436, "tid": 14436, "name": "aten::_convolution", "pid": 1673, "tid": 1673,
"ts": 1641800282604344, "dur": 2021, "ts": 1641986274571182, "dur": 1906,
"args": { "args": {
"External id": 26, "External id": 26,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -274,8 +274,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::convolution", "pid": 14436, "tid": 14436, "name": "aten::convolution", "pid": 1673, "tid": 1673,
"ts": 1641800282604333, "dur": 2034, "ts": 1641986274571163, "dur": 1927,
"args": { "args": {
"External id": 25, "External id": 25,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -284,8 +284,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::conv2d", "pid": 14436, "tid": 14436, "name": "aten::conv2d", "pid": 1673, "tid": 1673,
"ts": 1641800282604322, "dur": 2046, "ts": 1641986274571146, "dur": 1946,
"args": { "args": {
"External id": 24, "External id": 24,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -294,8 +294,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::view", "pid": 14436, "tid": 14436, "name": "aten::view", "pid": 1673, "tid": 1673,
"ts": 1641800282606400, "dur": 13, "ts": 1641986274573139, "dur": 17,
"args": { "args": {
"External id": 30, "External id": 30,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -304,8 +304,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::_log_softmax", "pid": 14436, "tid": 14436, "name": "aten::_log_softmax", "pid": 1673, "tid": 1673,
"ts": 1641800282606479, "dur": 147, "ts": 1641986274573506, "dur": 279,
"args": { "args": {
"External id": 32, "External id": 32,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -314,8 +314,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::log_softmax", "pid": 14436, "tid": 14436, "name": "aten::log_softmax", "pid": 1673, "tid": 1673,
"ts": 1641800282606463, "dur": 164, "ts": 1641986274573487, "dur": 301,
"args": { "args": {
"External id": 31, "External id": 31,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -324,8 +324,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::nll_loss_forward", "pid": 14436, "tid": 14436, "name": "aten::nll_loss_forward", "pid": 1673, "tid": 1673,
"ts": 1641800282606769, "dur": 15, "ts": 1641986274574609, "dur": 20,
"args": { "args": {
"External id": 35, "External id": 35,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -334,8 +334,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::nll_loss", "pid": 14436, "tid": 14436, "name": "aten::nll_loss", "pid": 1673, "tid": 1673,
"ts": 1641800282606751, "dur": 34, "ts": 1641986274574475, "dur": 156,
"args": { "args": {
"External id": 34, "External id": 34,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -344,8 +344,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "aten::nll_loss_nd", "pid": 14436, "tid": 14436, "name": "aten::nll_loss_nd", "pid": 1673, "tid": 1673,
"ts": 1641800282606731, "dur": 63, "ts": 1641986274574185, "dur": 458,
"args": { "args": {
"External id": 33, "External id": 33,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -354,8 +354,8 @@
}, },
{ {
"ph": "X", "cat": "cpu_op", "ph": "X", "cat": "cpu_op",
"name": "model_inference", "pid": 14436, "tid": 14436, "name": "model_inference", "pid": 1673, "tid": 1673,
"ts": 1641800282593642, "dur": 13169, "ts": 1641986274562158, "dur": 12505,
"args": { "args": {
"External id": 4, "External id": 4,
"Trace name": "PyTorch Profiler", "Trace iteration": 0, "Trace name": "PyTorch Profiler", "Trace iteration": 0,
@ -364,8 +364,8 @@
}, },
{ {
"ph": "X", "cat": "Runtime", "ph": "X", "cat": "Runtime",
"name": "cudaGetDeviceCount", "pid": 14436, "tid": 14436, "name": "cudaGetDeviceCount", "pid": 1673, "tid": 1673,
"ts": 1641800282606912, "dur": 328, "ts": 1641986274574776, "dur": 524,
"args": { "args": {
"cbid": 3, "correlation": 1, "cbid": 3, "correlation": 1,
"external id": 0, "external ts": 0 "external id": 0, "external ts": 0
@ -373,8 +373,8 @@
}, },
{ {
"ph": "X", "cat": "Runtime", "ph": "X", "cat": "Runtime",
"name": "cudaGetDeviceProperties", "pid": 14436, "tid": 14436, "name": "cudaGetDeviceProperties", "pid": 1673, "tid": 1673,
"ts": 1641800282607278, "dur": 691, "ts": 1641986274575567, "dur": 176,
"args": { "args": {
"cbid": 4, "correlation": 4, "cbid": 4, "correlation": 4,
"external id": 0, "external ts": 0 "external id": 0, "external ts": 0
@ -382,8 +382,8 @@
}, },
{ {
"ph": "X", "cat": "Runtime", "ph": "X", "cat": "Runtime",
"name": "cudaGetDeviceCount", "pid": 14436, "tid": 14436, "name": "cudaGetDeviceCount", "pid": 1673, "tid": 1673,
"ts": 1641800282607978, "dur": 0, "ts": 1641986274575757, "dur": 0,
"args": { "args": {
"cbid": 3, "correlation": 6, "cbid": 3, "correlation": 6,
"external id": 0, "external ts": 0 "external id": 0, "external ts": 0
@ -391,8 +391,8 @@
}, },
{ {
"ph": "X", "cat": "Runtime", "ph": "X", "cat": "Runtime",
"name": "cudaGetDeviceProperties", "pid": 14436, "tid": 14436, "name": "cudaGetDeviceProperties", "pid": 1673, "tid": 1673,
"ts": 1641800282607980, "dur": 117, "ts": 1641986274575759, "dur": 157,
"args": { "args": {
"cbid": 4, "correlation": 7, "cbid": 4, "correlation": 7,
"external id": 0, "external ts": 0 "external id": 0, "external ts": 0
@ -400,189 +400,189 @@
}, },
{ {
"ph": "X", "cat": "Runtime", "ph": "X", "cat": "Runtime",
"name": "cudaDeviceSynchronize", "pid": 14436, "tid": 14436, "name": "cudaDeviceSynchronize", "pid": 1673, "tid": 1673,
"ts": 1641800282608432, "dur": 3091248, "ts": 1641986274576391, "dur": 3469523,
"args": { "args": {
"cbid": 165, "correlation": 10, "cbid": 165, "correlation": 10,
"external id": 0, "external ts": 0 "external id": 0, "external ts": 0
} }
}, },
{ {
"name": "process_name", "ph": "M", "ts": 1641800282593491, "pid": 14436, "tid": 0, "name": "process_name", "ph": "M", "ts": 1641986274561617, "pid": 1673, "tid": 0,
"args": { "args": {
"name": "python" "name": "python"
} }
}, },
{ {
"name": "process_labels", "ph": "M", "ts": 1641800282593491, "pid": 14436, "tid": 0, "name": "process_labels", "ph": "M", "ts": 1641986274561617, "pid": 1673, "tid": 0,
"args": { "args": {
"labels": "CPU" "labels": "CPU"
} }
}, },
{ {
"name": "process_sort_index", "ph": "M", "ts": 1641800282593491, "pid": 14436, "tid": 0, "name": "process_sort_index", "ph": "M", "ts": 1641986274561617, "pid": 1673, "tid": 0,
"args": { "args": {
"sort_index": 14436 "sort_index": 1673
} }
}, },
{ {
"name": "process_name", "ph": "M", "ts": 1641800282593491, "pid": 0, "tid": 0, "name": "process_name", "ph": "M", "ts": 1641986274561617, "pid": 0, "tid": 0,
"args": { "args": {
"name": "python" "name": "python"
} }
}, },
{ {
"name": "process_labels", "ph": "M", "ts": 1641800282593491, "pid": 0, "tid": 0, "name": "process_labels", "ph": "M", "ts": 1641986274561617, "pid": 0, "tid": 0,
"args": { "args": {
"labels": "GPU 0" "labels": "GPU 0"
} }
}, },
{ {
"name": "process_sort_index", "ph": "M", "ts": 1641800282593491, "pid": 0, "tid": 0, "name": "process_sort_index", "ph": "M", "ts": 1641986274561617, "pid": 0, "tid": 0,
"args": { "args": {
"sort_index": 16777216 "sort_index": 16777216
} }
}, },
{ {
"name": "process_name", "ph": "M", "ts": 1641800282593491, "pid": 1, "tid": 0, "name": "process_name", "ph": "M", "ts": 1641986274561617, "pid": 1, "tid": 0,
"args": { "args": {
"name": "python" "name": "python"
} }
}, },
{ {
"name": "process_labels", "ph": "M", "ts": 1641800282593491, "pid": 1, "tid": 0, "name": "process_labels", "ph": "M", "ts": 1641986274561617, "pid": 1, "tid": 0,
"args": { "args": {
"labels": "GPU 1" "labels": "GPU 1"
} }
}, },
{ {
"name": "process_sort_index", "ph": "M", "ts": 1641800282593491, "pid": 1, "tid": 0, "name": "process_sort_index", "ph": "M", "ts": 1641986274561617, "pid": 1, "tid": 0,
"args": { "args": {
"sort_index": 16777217 "sort_index": 16777217
} }
}, },
{ {
"name": "process_name", "ph": "M", "ts": 1641800282593491, "pid": 2, "tid": 0, "name": "process_name", "ph": "M", "ts": 1641986274561617, "pid": 2, "tid": 0,
"args": { "args": {
"name": "python" "name": "python"
} }
}, },
{ {
"name": "process_labels", "ph": "M", "ts": 1641800282593491, "pid": 2, "tid": 0, "name": "process_labels", "ph": "M", "ts": 1641986274561617, "pid": 2, "tid": 0,
"args": { "args": {
"labels": "GPU 2" "labels": "GPU 2"
} }
}, },
{ {
"name": "process_sort_index", "ph": "M", "ts": 1641800282593491, "pid": 2, "tid": 0, "name": "process_sort_index", "ph": "M", "ts": 1641986274561617, "pid": 2, "tid": 0,
"args": { "args": {
"sort_index": 16777218 "sort_index": 16777218
} }
}, },
{ {
"name": "process_name", "ph": "M", "ts": 1641800282593491, "pid": 3, "tid": 0, "name": "process_name", "ph": "M", "ts": 1641986274561617, "pid": 3, "tid": 0,
"args": { "args": {
"name": "python" "name": "python"
} }
}, },
{ {
"name": "process_labels", "ph": "M", "ts": 1641800282593491, "pid": 3, "tid": 0, "name": "process_labels", "ph": "M", "ts": 1641986274561617, "pid": 3, "tid": 0,
"args": { "args": {
"labels": "GPU 3" "labels": "GPU 3"
} }
}, },
{ {
"name": "process_sort_index", "ph": "M", "ts": 1641800282593491, "pid": 3, "tid": 0, "name": "process_sort_index", "ph": "M", "ts": 1641986274561617, "pid": 3, "tid": 0,
"args": { "args": {
"sort_index": 16777219 "sort_index": 16777219
} }
}, },
{ {
"name": "process_name", "ph": "M", "ts": 1641800282593491, "pid": 4, "tid": 0, "name": "process_name", "ph": "M", "ts": 1641986274561617, "pid": 4, "tid": 0,
"args": { "args": {
"name": "python" "name": "python"
} }
}, },
{ {
"name": "process_labels", "ph": "M", "ts": 1641800282593491, "pid": 4, "tid": 0, "name": "process_labels", "ph": "M", "ts": 1641986274561617, "pid": 4, "tid": 0,
"args": { "args": {
"labels": "GPU 4" "labels": "GPU 4"
} }
}, },
{ {
"name": "process_sort_index", "ph": "M", "ts": 1641800282593491, "pid": 4, "tid": 0, "name": "process_sort_index", "ph": "M", "ts": 1641986274561617, "pid": 4, "tid": 0,
"args": { "args": {
"sort_index": 16777220 "sort_index": 16777220
} }
}, },
{ {
"name": "process_name", "ph": "M", "ts": 1641800282593491, "pid": 5, "tid": 0, "name": "process_name", "ph": "M", "ts": 1641986274561617, "pid": 5, "tid": 0,
"args": { "args": {
"name": "python" "name": "python"
} }
}, },
{ {
"name": "process_labels", "ph": "M", "ts": 1641800282593491, "pid": 5, "tid": 0, "name": "process_labels", "ph": "M", "ts": 1641986274561617, "pid": 5, "tid": 0,
"args": { "args": {
"labels": "GPU 5" "labels": "GPU 5"
} }
}, },
{ {
"name": "process_sort_index", "ph": "M", "ts": 1641800282593491, "pid": 5, "tid": 0, "name": "process_sort_index", "ph": "M", "ts": 1641986274561617, "pid": 5, "tid": 0,
"args": { "args": {
"sort_index": 16777221 "sort_index": 16777221
} }
}, },
{ {
"name": "process_name", "ph": "M", "ts": 1641800282593491, "pid": 6, "tid": 0, "name": "process_name", "ph": "M", "ts": 1641986274561617, "pid": 6, "tid": 0,
"args": { "args": {
"name": "python" "name": "python"
} }
}, },
{ {
"name": "process_labels", "ph": "M", "ts": 1641800282593491, "pid": 6, "tid": 0, "name": "process_labels", "ph": "M", "ts": 1641986274561617, "pid": 6, "tid": 0,
"args": { "args": {
"labels": "GPU 6" "labels": "GPU 6"
} }
}, },
{ {
"name": "process_sort_index", "ph": "M", "ts": 1641800282593491, "pid": 6, "tid": 0, "name": "process_sort_index", "ph": "M", "ts": 1641986274561617, "pid": 6, "tid": 0,
"args": { "args": {
"sort_index": 16777222 "sort_index": 16777222
} }
}, },
{ {
"name": "process_name", "ph": "M", "ts": 1641800282593491, "pid": 7, "tid": 0, "name": "process_name", "ph": "M", "ts": 1641986274561617, "pid": 7, "tid": 0,
"args": { "args": {
"name": "python" "name": "python"
} }
}, },
{ {
"name": "process_labels", "ph": "M", "ts": 1641800282593491, "pid": 7, "tid": 0, "name": "process_labels", "ph": "M", "ts": 1641986274561617, "pid": 7, "tid": 0,
"args": { "args": {
"labels": "GPU 7" "labels": "GPU 7"
} }
}, },
{ {
"name": "process_sort_index", "ph": "M", "ts": 1641800282593491, "pid": 7, "tid": 0, "name": "process_sort_index", "ph": "M", "ts": 1641986274561617, "pid": 7, "tid": 0,
"args": { "args": {
"sort_index": 16777223 "sort_index": 16777223
} }
}, },
{ {
"name": "thread_name", "ph": "M", "ts": 1641800282593491, "pid": 14436, "tid": 14436, "name": "thread_name", "ph": "M", "ts": 1641986274561617, "pid": 1673, "tid": 1673,
"args": { "args": {
"name": "thread 14436 (python)" "name": "thread 1673 (python)"
} }
}, },
{ {
"name": "thread_sort_index", "ph": "M", "ts": 1641800282593491, "pid": 14436, "tid": 14436, "name": "thread_sort_index", "ph": "M", "ts": 1641986274561617, "pid": 1673, "tid": 1673,
"args": { "args": {
"sort_index": 14436 "sort_index": 1673
} }
}, },
{ {
"ph": "X", "cat": "Trace", "ts": 1641800282593491, "dur": 3106207, "ph": "X", "cat": "Trace", "ts": 1641986274561617, "dur": 3484317,
"pid": "Spans", "tid": "PyTorch Profiler", "pid": "Spans", "tid": "PyTorch Profiler",
"name": "PyTorch Profiler (0)", "name": "PyTorch Profiler (0)",
"args": { "args": {
@ -590,7 +590,7 @@
} }
}, },
{ {
"name": "process_sort_index", "ph": "M", "ts": 1641800282593491, "name": "process_sort_index", "ph": "M", "ts": 1641986274561617,
"pid": "Spans", "tid": 0, "pid": "Spans", "tid": 0,
"args": { "args": {
"sort_index": 536870912 "sort_index": 536870912
@ -598,10 +598,10 @@
}, },
{ {
"name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g",
"pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 1641800282593491 "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 1641986274561617
}, },
{ {
"name": "Record Window End", "ph": "i", "s": "g", "name": "Record Window End", "ph": "i", "s": "g",
"pid": "", "tid": "", "ts": 1641800285699878 "pid": "", "tid": "", "ts": 1641986278046116
} }
]} ]}