fxmarty commited on
Commit
0fbd324
·
verified ·
1 Parent(s): 9154ec9

Upload cuda_matmul_trace_vllm.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. cuda_matmul_trace_vllm.json +629 -0
cuda_matmul_trace_vllm.json ADDED
@@ -0,0 +1,629 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "schemaVersion": 1,
4
+ "deviceProperties": [
5
+ {
6
+ "id": 0, "name": "AMD Instinct MI300X VF", "totalGlobalMem": 205571227648,
7
+ "computeMajor": 9, "computeMinor": 4,
8
+ "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048,
9
+ "regsPerBlock": 65536, "warpSize": 64,
10
+ "sharedMemPerBlock": 65536, "numSms": 304
11
+ , "maxSharedMemoryPerMultiProcessor": 65536
12
+ }
13
+ ],
14
+ "roctracer_version": 4.1,
15
+ "hip_runtime_version": 60342131,
16
+ "hip_driver_version": 60342131,
17
+ "trace_id": "3B55A95D004243E49AFCA3BA14757EE7",
18
+ "traceEvents": [
19
+ {
20
+ "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 455, "tid": 455,
21
+ "ts": 7429064525509.327, "dur": 297396.921,
22
+ "args": {
23
+ "External id": 1,"Record function id": 0, "Fwd thread id": 0, "Sequence number": 0, "finished": true, "Ev Idx": 0
24
+ }
25
+ },
26
+ {
27
+ "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 455, "tid": 455,
28
+ "ts": 7429064525539.719, "dur": 297357.448,
29
+ "args": {
30
+ "External id": 2,"Record function id": 0, "Fwd thread id": 0, "Sequence number": 0, "finished": true, "Ev Idx": 1
31
+ }
32
+ },
33
+ {
34
+ "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 455, "tid": 455,
35
+ "ts": 7429064525887.174, "dur": 39.320,
36
+ "args": {
37
+ "External id": 3,"Record function id": 0, "finished": true, "Ev Idx": 2
38
+ }
39
+ },
40
+ {
41
+ "ph": "X", "cat": "cuda_runtime", "name": "hipStreamIsCapturing", "pid": 455, "tid": 455,
42
+ "ts": 7429064525586.815, "dur": 2.314,
43
+ "args": {
44
+ "External id": 2, "cid": 290, "correlation": 1
45
+ }
46
+ },
47
+ {
48
+ "ph": "f", "id": 1, "pid": 455, "tid": 455, "ts": 7429064525586.815,
49
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
50
+ },
51
+ {
52
+ "ph": "X", "cat": "cuda_runtime", "name": "hipMalloc", "pid": 455, "tid": 455,
53
+ "ts": 7429064525591.437, "dur": 75.397,
54
+ "args": {
55
+ "External id": 2, "bytes": 65011712, "cid": 108, "correlation": 2, "ptr": "0x55fa15629e50"
56
+ }
57
+ },
58
+ {
59
+ "ph": "s", "id": 2, "pid": 455, "tid": 455, "ts": 7429064525591.437,
60
+ "cat": "ac2g", "name": "ac2g"
61
+ },
62
+ {
63
+ "ph": "X", "cat": "cuda_runtime", "name": "hipMalloc", "pid": 455, "tid": 455,
64
+ "ts": 7429064525740.086, "dur": 35.336,
65
+ "args": {
66
+ "External id": 2, "bytes": 2621440, "cid": 108, "correlation": 3, "ptr": "0x7ffe7048af50"
67
+ }
68
+ },
69
+ {
70
+ "ph": "s", "id": 3, "pid": 455, "tid": 455, "ts": 7429064525740.086,
71
+ "cat": "ac2g", "name": "ac2g"
72
+ },
73
+ {
74
+ "ph": "X", "cat": "cuda_runtime", "name": "hipMemset", "pid": 455, "tid": 455,
75
+ "ts": 7429064525782.260, "dur": 14.254,
76
+ "args": {
77
+ "External id": 2, "cid": 154, "correlation": 4
78
+ }
79
+ },
80
+ {
81
+ "ph": "f", "id": 4, "pid": 455, "tid": 455, "ts": 7429064525782.260,
82
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
83
+ },
84
+ {
85
+ "ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
86
+ "ts": 7429064525799.123, "dur": 0.763,
87
+ "args": {
88
+ "External id": 2, "cid": 370, "correlation": 5
89
+ }
90
+ },
91
+ {
92
+ "ph": "f", "id": 5, "pid": 455, "tid": 455, "ts": 7429064525799.123,
93
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
94
+ },
95
+ {
96
+ "ph": "X", "cat": "cuda_runtime", "name": "hipStreamIsCapturing", "pid": 455, "tid": 455,
97
+ "ts": 7429064525892.788, "dur": 0.470,
98
+ "args": {
99
+ "External id": 3, "cid": 290, "correlation": 6
100
+ }
101
+ },
102
+ {
103
+ "ph": "f", "id": 6, "pid": 455, "tid": 455, "ts": 7429064525892.788,
104
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
105
+ },
106
+ {
107
+ "ph": "X", "cat": "cuda_runtime", "name": "hipMalloc", "pid": 455, "tid": 455,
108
+ "ts": 7429064525893.749, "dur": 29.083,
109
+ "args": {
110
+ "External id": 3, "bytes": 79691776, "cid": 108, "correlation": 7, "ptr": "0x7ffe7048a140"
111
+ }
112
+ },
113
+ {
114
+ "ph": "s", "id": 7, "pid": 455, "tid": 455, "ts": 7429064525893.749,
115
+ "cat": "ac2g", "name": "ac2g"
116
+ },
117
+ {
118
+ "ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
119
+ "ts": 7429064525932.854, "dur": 0.304,
120
+ "args": {
121
+ "External id": 2, "cid": 370, "correlation": 8
122
+ }
123
+ },
124
+ {
125
+ "ph": "f", "id": 8, "pid": 455, "tid": 455, "ts": 7429064525932.854,
126
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
127
+ },
128
+ {
129
+ "ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
130
+ "ts": 7429064525994.881, "dur": 0.238,
131
+ "args": {
132
+ "External id": 2, "cid": 370, "correlation": 9
133
+ }
134
+ },
135
+ {
136
+ "ph": "f", "id": 9, "pid": 455, "tid": 455, "ts": 7429064525994.881,
137
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
138
+ },
139
+ {
140
+ "ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
141
+ "ts": 7429064525996.286, "dur": 0.169,
142
+ "args": {
143
+ "External id": 2, "cid": 370, "correlation": 10
144
+ }
145
+ },
146
+ {
147
+ "ph": "f", "id": 10, "pid": 455, "tid": 455, "ts": 7429064525996.286,
148
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
149
+ },
150
+ {
151
+ "ph": "X", "cat": "cuda_runtime", "name": "hipModuleLoad", "pid": 455, "tid": 455,
152
+ "ts": 7429064528594.963, "dur": 103603.691,
153
+ "args": {
154
+ "External id": 2, "cid": 170, "correlation": 11
155
+ }
156
+ },
157
+ {
158
+ "ph": "f", "id": 11, "pid": 455, "tid": 455, "ts": 7429064528594.963,
159
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
160
+ },
161
+ {
162
+ "ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
163
+ "ts": 7429064632214.111, "dur": 1.114,
164
+ "args": {
165
+ "External id": 2, "cid": 370, "correlation": 12
166
+ }
167
+ },
168
+ {
169
+ "ph": "f", "id": 12, "pid": 455, "tid": 455, "ts": 7429064632214.111,
170
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
171
+ },
172
+ {
173
+ "ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
174
+ "ts": 7429064745562.959, "dur": 2.839,
175
+ "args": {
176
+ "External id": 2, "cid": 370, "correlation": 13
177
+ }
178
+ },
179
+ {
180
+ "ph": "f", "id": 13, "pid": 455, "tid": 455, "ts": 7429064745562.959,
181
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
182
+ },
183
+ {
184
+ "ph": "X", "cat": "cuda_runtime", "name": "hipGetDevicePropertiesR0600", "pid": 455, "tid": 455,
185
+ "ts": 7429064745586.023, "dur": 0.696,
186
+ "args": {
187
+ "External id": 2, "cid": 370, "correlation": 14
188
+ }
189
+ },
190
+ {
191
+ "ph": "f", "id": 14, "pid": 455, "tid": 455, "ts": 7429064745586.023,
192
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
193
+ },
194
+ {
195
+ "ph": "X", "cat": "cuda_runtime", "name": "hipModuleLoad", "pid": 455, "tid": 455,
196
+ "ts": 7429064745602.158, "dur": 77217.980,
197
+ "args": {
198
+ "External id": 2, "cid": 170, "correlation": 15
199
+ }
200
+ },
201
+ {
202
+ "ph": "f", "id": 15, "pid": 455, "tid": 455, "ts": 7429064745602.158,
203
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
204
+ },
205
+ {
206
+ "ph": "X", "cat": "cuda_runtime", "name": "hipExtModuleLaunchKernel", "pid": 455, "tid": 455,
207
+ "ts": 7429064822844.054, "dur": 20.100,
208
+ "args": {
209
+ "External id": 2, "kernel": "Cijk_Ailk_Bljk_SB_Bias_AS_SAV_UserArgs_MT128x64x32_MI32x32x1_SN_LDSB0_AFC1_AFEM1_AFEM1_ASEM1_CLR1_CADS0_EPS0_GRVWA4_GRVWB4_GSUAMB_ISA942_IU1_K1_LBSPPA0_LBSPPB128_LBSPPM0_LPA0_LPB4_LPM0_LRVW4_LWPMn1_MIAV0_MIWT2_1_MO1_NTn1_NTA0_NTB0_NTC0_NTD0_NTM0_NEPBS0_NLCA1_NLCB1_ONLL1_PGR2_PLR1_PKA1_SIA3_SS1_SPO0_SRVW0_SSO0_SVW1_TLDS1_ULSGRO0_USL1_UIOFGRO0_USFGROn1_VSn1_VWA1_VWB1_WSGRA0_WSGRB1_WS64_WG64_4_1_WGMXCC1_WGMXCCG0", "cid": 65, "correlation": 16, "grid": [516096, 1, 1], "block": [256, 1, 1], "shared memory": 0
210
+ }
211
+ },
212
+ {
213
+ "ph": "s", "id": 16, "pid": 455, "tid": 455, "ts": 7429064822844.054,
214
+ "cat": "ac2g", "name": "ac2g"
215
+ },
216
+ {
217
+ "ph": "X", "cat": "cuda_runtime", "name": "hipDeviceSynchronize", "pid": 455, "tid": 455,
218
+ "ts": 7429064823009.505, "dur": 1348.012,
219
+ "args": {
220
+ "cid": 48, "correlation": 17
221
+ }
222
+ },
223
+ {
224
+ "ph": "f", "id": 17, "pid": 455, "tid": 455, "ts": 7429064823009.505,
225
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
226
+ },
227
+ {
228
+ "ph": "X", "cat": "cuda_runtime", "name": "hipDeviceSynchronize", "pid": 455, "tid": 455,
229
+ "ts": 7429064824390.626, "dur": 0.870,
230
+ "args": {
231
+ "cid": 48, "correlation": 18
232
+ }
233
+ },
234
+ {
235
+ "ph": "f", "id": 18, "pid": 455, "tid": 455, "ts": 7429064824390.626,
236
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
237
+ },
238
+ {
239
+ "ph": "X", "cat": "gpu_memset", "name": "Memset (Device)", "pid": 2, "tid": 0,
240
+ "ts": 7429064525818.773, "dur": 5.652,
241
+ "args": {
242
+ "External id": 2, "device": 2, "stream": 0, "correlation": 4, "kind": "Device"
243
+ }
244
+ },
245
+ {
246
+ "ph": "f", "id": 4, "pid": 2, "tid": 0, "ts": 7429064525818.773,
247
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
248
+ },
249
+ {
250
+ "ph": "X", "cat": "kernel", "name": "Cijk_Ailk_Bljk_SB_Bias_AS_SAV_UserArgs_MT128x64x32_MI32x32x1_SN_LDSB0_AFC1_AFEM1_AFEM1_ASEM1_CLR1_CADS0_EPS0_GRVWA4_GRVWB4_GSUAMB_ISA942_IU1_K1_LBSPPA0_LBSPPB128_LBSPPM0_LPA0_LPB4_LPM0_LRVW4_LWPMn1_MIAV0_MIWT2_1_MO1_NTn1_NTA0_NTB0_NTC0_NTD0_NTM0_NEPBS0_NLCA1_NLCB1_ONLL1_PGR2_PLR1_PKA1_SIA3_SS1_SPO0_SRVW0_SSO0_SVW1_TLDS1_ULSGRO0_USL1_UIOFGRO0_USFGROn1_VSn1_VWA1_VWB1_WSGRA0_WSGRB1_WS64_WG64_4_1_WGMXCC1_WGMXCCG0", "pid": 2, "tid": 0,
251
+ "ts": 7429064822886.347, "dur": 1475.199,
252
+ "args": {
253
+ "External id": 2, "device": 2, "stream": 0, "correlation": 16, "kind": "Dispatch Kernel", "grid": [516096, 1, 1], "block": [256, 1, 1]
254
+ }
255
+ },
256
+ {
257
+ "ph": "f", "id": 16, "pid": 2, "tid": 0, "ts": 7429064822886.347,
258
+ "cat": "ac2g", "name": "ac2g", "bp": "e"
259
+ },
260
+ {
261
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 0,
262
+ "args": {
263
+ "name": "python"
264
+ }
265
+ },
266
+ {
267
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 0,
268
+ "args": {
269
+ "labels": "CPU"
270
+ }
271
+ },
272
+ {
273
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 0,
274
+ "args": {
275
+ "sort_index": 455
276
+ }
277
+ },
278
+ {
279
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 0, "tid": 0,
280
+ "args": {
281
+ "name": "python"
282
+ }
283
+ },
284
+ {
285
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 0, "tid": 0,
286
+ "args": {
287
+ "labels": "GPU 0"
288
+ }
289
+ },
290
+ {
291
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 0, "tid": 0,
292
+ "args": {
293
+ "sort_index": 5000000
294
+ }
295
+ },
296
+ {
297
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 1, "tid": 0,
298
+ "args": {
299
+ "name": "python"
300
+ }
301
+ },
302
+ {
303
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 1, "tid": 0,
304
+ "args": {
305
+ "labels": "GPU 1"
306
+ }
307
+ },
308
+ {
309
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 1, "tid": 0,
310
+ "args": {
311
+ "sort_index": 5000001
312
+ }
313
+ },
314
+ {
315
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 2, "tid": 0,
316
+ "args": {
317
+ "name": "python"
318
+ }
319
+ },
320
+ {
321
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 2, "tid": 0,
322
+ "args": {
323
+ "labels": "GPU 2"
324
+ }
325
+ },
326
+ {
327
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 2, "tid": 0,
328
+ "args": {
329
+ "sort_index": 5000002
330
+ }
331
+ },
332
+ {
333
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 3, "tid": 0,
334
+ "args": {
335
+ "name": "python"
336
+ }
337
+ },
338
+ {
339
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 3, "tid": 0,
340
+ "args": {
341
+ "labels": "GPU 3"
342
+ }
343
+ },
344
+ {
345
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 3, "tid": 0,
346
+ "args": {
347
+ "sort_index": 5000003
348
+ }
349
+ },
350
+ {
351
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 4, "tid": 0,
352
+ "args": {
353
+ "name": "python"
354
+ }
355
+ },
356
+ {
357
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 4, "tid": 0,
358
+ "args": {
359
+ "labels": "GPU 4"
360
+ }
361
+ },
362
+ {
363
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 4, "tid": 0,
364
+ "args": {
365
+ "sort_index": 5000004
366
+ }
367
+ },
368
+ {
369
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 5, "tid": 0,
370
+ "args": {
371
+ "name": "python"
372
+ }
373
+ },
374
+ {
375
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 5, "tid": 0,
376
+ "args": {
377
+ "labels": "GPU 5"
378
+ }
379
+ },
380
+ {
381
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 5, "tid": 0,
382
+ "args": {
383
+ "sort_index": 5000005
384
+ }
385
+ },
386
+ {
387
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 6, "tid": 0,
388
+ "args": {
389
+ "name": "python"
390
+ }
391
+ },
392
+ {
393
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 6, "tid": 0,
394
+ "args": {
395
+ "labels": "GPU 6"
396
+ }
397
+ },
398
+ {
399
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 6, "tid": 0,
400
+ "args": {
401
+ "sort_index": 5000006
402
+ }
403
+ },
404
+ {
405
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 7, "tid": 0,
406
+ "args": {
407
+ "name": "python"
408
+ }
409
+ },
410
+ {
411
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 7, "tid": 0,
412
+ "args": {
413
+ "labels": "GPU 7"
414
+ }
415
+ },
416
+ {
417
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 7, "tid": 0,
418
+ "args": {
419
+ "sort_index": 5000007
420
+ }
421
+ },
422
+ {
423
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 8, "tid": 0,
424
+ "args": {
425
+ "name": "python"
426
+ }
427
+ },
428
+ {
429
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 8, "tid": 0,
430
+ "args": {
431
+ "labels": "GPU 8"
432
+ }
433
+ },
434
+ {
435
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 8, "tid": 0,
436
+ "args": {
437
+ "sort_index": 5000008
438
+ }
439
+ },
440
+ {
441
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 9, "tid": 0,
442
+ "args": {
443
+ "name": "python"
444
+ }
445
+ },
446
+ {
447
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 9, "tid": 0,
448
+ "args": {
449
+ "labels": "GPU 9"
450
+ }
451
+ },
452
+ {
453
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 9, "tid": 0,
454
+ "args": {
455
+ "sort_index": 5000009
456
+ }
457
+ },
458
+ {
459
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 10, "tid": 0,
460
+ "args": {
461
+ "name": "python"
462
+ }
463
+ },
464
+ {
465
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 10, "tid": 0,
466
+ "args": {
467
+ "labels": "GPU 10"
468
+ }
469
+ },
470
+ {
471
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 10, "tid": 0,
472
+ "args": {
473
+ "sort_index": 5000010
474
+ }
475
+ },
476
+ {
477
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 11, "tid": 0,
478
+ "args": {
479
+ "name": "python"
480
+ }
481
+ },
482
+ {
483
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 11, "tid": 0,
484
+ "args": {
485
+ "labels": "GPU 11"
486
+ }
487
+ },
488
+ {
489
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 11, "tid": 0,
490
+ "args": {
491
+ "sort_index": 5000011
492
+ }
493
+ },
494
+ {
495
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 12, "tid": 0,
496
+ "args": {
497
+ "name": "python"
498
+ }
499
+ },
500
+ {
501
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 12, "tid": 0,
502
+ "args": {
503
+ "labels": "GPU 12"
504
+ }
505
+ },
506
+ {
507
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 12, "tid": 0,
508
+ "args": {
509
+ "sort_index": 5000012
510
+ }
511
+ },
512
+ {
513
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 13, "tid": 0,
514
+ "args": {
515
+ "name": "python"
516
+ }
517
+ },
518
+ {
519
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 13, "tid": 0,
520
+ "args": {
521
+ "labels": "GPU 13"
522
+ }
523
+ },
524
+ {
525
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 13, "tid": 0,
526
+ "args": {
527
+ "sort_index": 5000013
528
+ }
529
+ },
530
+ {
531
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 14, "tid": 0,
532
+ "args": {
533
+ "name": "python"
534
+ }
535
+ },
536
+ {
537
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 14, "tid": 0,
538
+ "args": {
539
+ "labels": "GPU 14"
540
+ }
541
+ },
542
+ {
543
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 14, "tid": 0,
544
+ "args": {
545
+ "sort_index": 5000014
546
+ }
547
+ },
548
+ {
549
+ "name": "process_name", "ph": "M", "ts": 7429064519991.639, "pid": 15, "tid": 0,
550
+ "args": {
551
+ "name": "python"
552
+ }
553
+ },
554
+ {
555
+ "name": "process_labels", "ph": "M", "ts": 7429064519991.639, "pid": 15, "tid": 0,
556
+ "args": {
557
+ "labels": "GPU 15"
558
+ }
559
+ },
560
+ {
561
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 15, "tid": 0,
562
+ "args": {
563
+ "sort_index": 5000015
564
+ }
565
+ },
566
+ {
567
+ "name": "thread_name", "ph": "M", "ts": 7429064519991.639, "pid": 2, "tid": 0,
568
+ "args": {
569
+ "name": "stream 0 "
570
+ }
571
+ },
572
+ {
573
+ "name": "thread_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 2, "tid": 0,
574
+ "args": {
575
+ "sort_index": 0
576
+ }
577
+ },
578
+ {
579
+ "name": "thread_name", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 455,
580
+ "args": {
581
+ "name": "thread 455 (python)"
582
+ }
583
+ },
584
+ {
585
+ "name": "thread_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 455,
586
+ "args": {
587
+ "sort_index": 455
588
+ }
589
+ },
590
+ {
591
+ "name": "thread_name", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 455,
592
+ "args": {
593
+ "name": "thread 455 (python)"
594
+ }
595
+ },
596
+ {
597
+ "name": "thread_sort_index", "ph": "M", "ts": 7429064519991.639, "pid": 455, "tid": 455,
598
+ "args": {
599
+ "sort_index": 455
600
+ }
601
+ },
602
+ {
603
+ "ph": "X", "cat": "Trace", "ts": 7429064519947.431, "dur": 304458.658,
604
+ "pid": "Spans", "tid": "PyTorch Profiler",
605
+ "name": "PyTorch Profiler (0)",
606
+ "args": {
607
+ "Op count": 0
608
+ }
609
+ },
610
+ {
611
+ "name": "process_sort_index", "ph": "M", "ts": 7429064519947.431,
612
+ "pid": "Spans", "tid": 0,
613
+ "args": {
614
+ "sort_index": 536870912
615
+ }
616
+ },
617
+ {
618
+ "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g",
619
+ "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 7429064519947.431
620
+ },
621
+ {
622
+ "name": "Record Window End", "ph": "i", "s": "g",
623
+ "pid": "", "tid": "", "ts": 7429064824552.924
624
+ }
625
+ ],
626
+ "traceName": "cuda_matmul_trace.json",
627
+ "displayTimeUnit": "ms",
628
+ "baseTimeNanoseconds": 1743521598000000000
629
+ }