HPCToolkit
write_data.c
Go to the documentation of this file.
1 // -*-Mode: C++;-*- // technically C99
2 
3 // * BeginRiceCopyright *****************************************************
4 //
5 // $HeadURL$
6 // $Id$
7 //
8 // --------------------------------------------------------------------------
9 // Part of HPCToolkit (hpctoolkit.org)
10 //
11 // Information about sources of support for research and development of
12 // HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
13 // --------------------------------------------------------------------------
14 //
15 // Copyright ((c)) 2002-2019, Rice University
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions are
20 // met:
21 //
22 // * Redistributions of source code must retain the above copyright
23 // notice, this list of conditions and the following disclaimer.
24 //
25 // * Redistributions in binary form must reproduce the above copyright
26 // notice, this list of conditions and the following disclaimer in the
27 // documentation and/or other materials provided with the distribution.
28 //
29 // * Neither the name of Rice University (RICE) nor the names of its
30 // contributors may be used to endorse or promote products derived from
31 // this software without specific prior written permission.
32 //
33 // This software is provided by RICE and contributors "as is" and any
34 // express or implied warranties, including, but not limited to, the
35 // implied warranties of merchantability and fitness for a particular
36 // purpose are disclaimed. In no event shall RICE or contributors be
37 // liable for any direct, indirect, incidental, special, exemplary, or
38 // consequential damages (including, but not limited to, procurement of
39 // substitute goods or services; loss of use, data, or profits; or
40 // business interruption) however caused and on any theory of liability,
41 // whether in contract, strict liability, or tort (including negligence
42 // or otherwise) arising in any way out of the use of this software, even
43 // if advised of the possibility of such damage.
44 //
45 // ******************************************************* EndRiceCopyright *
46 
47 //*****************************************************************************
48 // system includes
49 //*****************************************************************************
50 
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <setjmp.h>
54 
55 //*****************************************************************************
56 // local includes
57 //*****************************************************************************
58 
59 #include "fname_max.h"
60 #include "backtrace.h"
61 #include "files.h"
62 #include "epoch.h"
63 #include "rank.h"
64 #include "thread_data.h"
65 #include "cct_bundle.h"
66 #include "hpcrun_return_codes.h"
67 #include "write_data.h"
68 #include "loadmap.h"
69 #include "sample_prob.h"
70 
71 #include <messages/messages.h>
72 
73 #include <lush/lush-backtrace.h>
74 
75 #include <lib/prof-lean/hpcio.h>
76 #include <lib/prof-lean/hpcfmt.h>
78 
80 
81 
82 //*****************************************************************************
83 // structs and types
84 //*****************************************************************************
85 
87  .bits = 0
88 };
89 
90 static const uint64_t default_measurement_granularity = 1;
91 
92 
93 
94 //*****************************************************************************
95 // local utilities
96 //*****************************************************************************
97 
98 
99 //***************************************************************************
100 //
101 // The top level
102 //
103 // hpcrun_fmt_hdr_fwrite()
104 // foreach epoch
105 // hpcrun_epoch_fwrite()
106 //
107 // Writing an epoch
108 //
109 // hpcrun_fmt_epoch_hdr_fwrite(flags, char-rtn-dst, gran, NVPs) /* char-rtn-dst = 1 4theMomnt */
110 // hpcrun_fmt_metricTbl_fwrite()
111 // hpcrun_fmt_loadmap_fwrite()
112 // hpcrun_le4_fwrite(# cct_nodes)
113 // foreach cct-node
114 // hpcrun_fmt_cct_node_fwrite(cct_node_t *p)
115 //
116 //***************************************************************************
117 
118 //***************************************************************************
119 //
120 // Above functionality is factored into 2 pieces:
121 //
122 // 1) (Lazily) open the output file, and write the file header
123 // 2) Write the epochs
124 //
125 // This factoring enables the writing of the current set of epochs at anytime during
126 // the sampling run.
127 // Currently, there are 2 such situations:
128 // 1) The end of the sampling run. This is the normal place to write profile data
129 // 2) When sample data memory is low. In this case, the profile data is written
130 // out, but the sample memory is reclaimed so that more profile data may be
131 // collected.
132 //
133 //***************************************************************************
134 
135 static FILE *
137 {
138 
139  FILE* fs = cptd->hpcrun_file;
140  if (fs) {
141  return fs;
142  }
143 
144  int rank = hpcrun_get_rank();
145  if (rank < 0) {
146  rank = 0;
147  }
148  int fd = hpcrun_open_profile_file(rank, cptd->id);
149  fs = fdopen(fd, "w");
150  if (fs == NULL) {
151  EEMSG("HPCToolkit: %s: unable to open profile file", __func__);
152  return NULL;
153  }
154  cptd->hpcrun_file = fs;
155 
157  return fs;
158 
159  const uint bufSZ = 32; // sufficient to hold a 64-bit integer in base 10
160 
161  const char* jobIdStr = OSUtil_jobid();
162  if (!jobIdStr) {
163  jobIdStr = "";
164  }
165 
166  char mpiRankStr[bufSZ];
167  mpiRankStr[0] = '\0';
168  snprintf(mpiRankStr, bufSZ, "%d", rank);
169 
170  char tidStr[bufSZ];
171  snprintf(tidStr, bufSZ, "%d", cptd->id);
172 
173  char hostidStr[bufSZ];
174  snprintf(hostidStr, bufSZ, "%lx", OSUtil_hostid());
175 
176  char pidStr[bufSZ];
177  snprintf(pidStr, bufSZ, "%u", OSUtil_pid());
178 
179  char traceMinTimeStr[bufSZ];
180  snprintf(traceMinTimeStr, bufSZ, "%"PRIu64, cptd->trace_min_time_us);
181 
182  char traceMaxTimeStr[bufSZ];
183  snprintf(traceMaxTimeStr, bufSZ, "%"PRIu64, cptd->trace_max_time_us);
184 
185  //
186  // ==== file hdr =====
187  //
188 
189  TMSG(DATA_WRITE,"writing file header");
193  HPCRUN_FMT_NV_envPath, getenv("PATH"),
194  HPCRUN_FMT_NV_jobId, jobIdStr,
195  HPCRUN_FMT_NV_mpiRank, mpiRankStr,
196  HPCRUN_FMT_NV_tid, tidStr,
197  HPCRUN_FMT_NV_hostid, hostidStr,
198  HPCRUN_FMT_NV_pid, pidStr,
199  HPCRUN_FMT_NV_traceMinTime, traceMinTimeStr,
200  HPCRUN_FMT_NV_traceMaxTime, traceMaxTimeStr,
201  NULL);
202  return fs;
203 }
204 
205 
206 static int
208 {
209  uint32_t num_epochs = 0;
210 
212  return HPCRUN_OK;
213 
214  //
215  // === # epochs ===
216  //
217 
218  epoch_t* current_epoch = epoch;
219  for(epoch_t* s = current_epoch; s; s = s->next) {
220  num_epochs++;
221  }
222 
223  TMSG(EPOCH, "Actual # epochs = %d", num_epochs);
224 
225  TMSG(DATA_WRITE, "writing # epochs = %d", num_epochs);
226 
227  //
228  // for each epoch ...
229  //
230 
231  for(epoch_t* s = current_epoch; s; s = s->next) {
232 
233 #if 0
234  if (ENABLED(SKIP_WRITE_EMPTY_EPOCH)){
235  if (hpcrun_empty_cct_bundle(&(s->csdata))){
236  EMSG("Empty cct encountered: it is not written out");
237  continue;
238  }
239  }
240 #endif
241  //
242  // == epoch header ==
243  //
244 
245  TMSG(DATA_WRITE," epoch header");
246  //
247  // set epoch flags before writing
248  //
249 
251  TMSG(LUSH,"epoch lush flag set to %s", epoch_flags.fields.isLogicalUnwind ? "true" : "false");
252 
253  TMSG(DATA_WRITE,"epoch flags = %"PRIx64"", epoch_flags.bits);
254  hpcrun_fmt_epochHdr_fwrite(fs, epoch_flags,
256  "TODO:epoch-name","TODO:epoch-value",
257  NULL);
258 
259  //
260  // == metrics ==
261  //
262 
263  metric_desc_p_tbl_t *metric_tbl = hpcrun_get_metric_tbl();
264 
265  TMSG(DATA_WRITE, "metric tbl len = %d", metric_tbl->len);
266  hpcrun_fmt_metricTbl_fwrite(metric_tbl, cptd->perf_event_info, fs);
267 
268  TMSG(DATA_WRITE, "Done writing metric data");
269 
270  //
271  // == load map ==
272  //
273 
274  TMSG(DATA_WRITE, "Preparing to write loadmap");
275 
276  hpcrun_loadmap_t* current_loadmap = s->loadmap;
277 
278  hpcfmt_int4_fwrite(current_loadmap->size, fs);
279 
280  // N.B.: Write in reverse order to obtain nicely ascending LM ids.
281  for (load_module_t* lm_src = current_loadmap->lm_end;
282  (lm_src); lm_src = lm_src->prev) {
283  loadmap_entry_t lm_entry;
284  lm_entry.id = lm_src->id;
285  lm_entry.name = lm_src->name;
286  lm_entry.flags = 0;
287 
288  hpcrun_fmt_loadmapEntry_fwrite(&lm_entry, fs);
289  }
290 
291  TMSG(DATA_WRITE, "Done writing loadmap");
292 
293  //
294  // == cct ==
295  //
296 
297  cct_bundle_t* cct = &(s->csdata);
298  int ret = hpcrun_cct_bundle_fwrite(fs, epoch_flags, cct, cptd->cct2metrics_map);
299  if(ret != HPCRUN_OK) {
300  TMSG(DATA_WRITE, "Error writing tree %#lx", cct);
301  TMSG(DATA_WRITE, "Number of tree nodes lost: %ld", cct->num_nodes);
302  EMSG("could not save profile data to hpcrun file");
303  perror("write_profile_data");
304  ret = HPCRUN_ERR; // FIXME: return this value now
305  }
306  else {
307  TMSG(DATA_WRITE, "saved profile data to hpcrun file ");
308  }
309  current_loadmap++;
310 
311  } // epoch loop
312 
313  return HPCRUN_OK;
314 }
315 
316 
317 void
319 {
320  FILE *fs = lazy_open_data_file(cptd);
321  if (fs == NULL)
322  return;
323 
324  write_epochs(fs, cptd, cptd->epoch);
326 }
327 
328 int
330 {
331  TMSG(DATA_WRITE,"Writing hpcrun profile data");
332  FILE* fs = lazy_open_data_file(cptd);
333  if (fs == NULL)
334  return HPCRUN_ERR;
335 
336  write_epochs(fs, cptd, cptd->epoch);
337 
338  TMSG(DATA_WRITE,"closing file");
339  hpcio_fclose(fs);
340  TMSG(DATA_WRITE,"Done!");
341 
342  return HPCRUN_OK;
343 }
344 
345 //
346 // DEBUG: fetch and print current loadmap
347 //
348 void
350 {
352 }
#define HPCRUN_FMT_NV_tid
Definition: hpcrun-fmt.h:158
static const uint64_t default_measurement_granularity
Definition: write_data.c:90
metric_aux_info_t * perf_event_info
#define HPCRUN_FMT_NV_progPath
Definition: hpcrun-fmt.h:154
static bool hpcrun_isLogicalUnwind()
void hpcrun_dbg_print_current_loadmap(void)
Definition: write_data.c:349
static int write_epochs(FILE *fs, core_profile_trace_data_t *cptd, epoch_t *epoch)
Definition: write_data.c:207
const char * hpcrun_files_executable_name()
Definition: files.c:376
static int hpcfmt_int4_fwrite(uint32_t val, FILE *outfs)
Definition: hpcfmt.h:217
long OSUtil_hostid()
Definition: OSUtil.c:162
#define hpcrun_get_thread_epoch()
Definition: thread_data.h:278
static epoch_flags_t epoch_flags
Definition: write_data.c:86
metric_desc_p_tbl_t * hpcrun_get_metric_tbl()
Definition: metrics.c:262
int hpcrun_write_profile_data(core_profile_trace_data_t *cptd)
Definition: write_data.c:329
int hpcrun_fmt_hdr_fwrite(FILE *fs,...)
Definition: hpcrun-fmt.c:126
int hpcrun_sample_prob_active(void)
Definition: sample_prob.c:193
#define HPCRUN_ERR
#define HPCRUN_FMT_NV_mpiRank
Definition: hpcrun-fmt.h:157
unsigned int uint
Definition: uint.h:124
void hpcrun_flush_epochs(core_profile_trace_data_t *cptd)
Definition: write_data.c:318
uint16_t id
Definition: hpcrun-fmt.h:454
int hpcrun_fmt_metricTbl_fwrite(metric_desc_p_tbl_t *metric_tbl, metric_aux_info_t *aux_info, FILE *fs)
Definition: hpcrun-fmt.c:307
#define EMSG
Definition: messages.h:70
Definition: hpcrun-fmt.h:452
int hpcrun_open_profile_file(int rank, int thread)
Definition: files.c:469
#define HPCRUN_FMT_NV_pid
Definition: hpcrun-fmt.h:160
uint OSUtil_pid()
Definition: OSUtil.c:92
epoch_flags_bitfield fields
Definition: hpcrun-fmt.h:181
Definition: epoch.h:64
uint64_t flags
Definition: hpcrun-fmt.h:456
void hpcrun_loadmap_print(hpcrun_loadmap_t *loadmap)
Definition: loadmap.c:251
#define HPCRUN_FMT_NV_hostid
Definition: hpcrun-fmt.h:159
#define HPCRUN_FMT_NV_traceMaxTime
Definition: hpcrun-fmt.h:163
const char * hpcrun_files_executable_pathname()
Definition: files.c:367
int hpcrun_get_rank(void)
Definition: rank.c:89
#define TMSG(f,...)
Definition: messages.h:93
load_module_t * lm_end
Definition: loadmap.h:151
#define HPCRUN_FMT_NV_envPath
Definition: hpcrun-fmt.h:155
uint64_t bits
Definition: hpcrun-fmt.h:182
static FILE * lazy_open_data_file(core_profile_trace_data_t *cptd)
Definition: write_data.c:136
#define HPCRUN_FMT_NV_jobId
Definition: hpcrun-fmt.h:156
#define HPCRUN_FMT_NV_traceMinTime
Definition: hpcrun-fmt.h:162
int hpcrun_fmt_loadmapEntry_fwrite(loadmap_entry_t *x, FILE *fs)
Definition: hpcrun-fmt.c:600
const char * OSUtil_jobid()
Definition: OSUtil.c:100
#define EEMSG(...)
Definition: messages.h:90
#define NULL
Definition: ElfHelper.cpp:85
#define HPCRUN_OK
uint16_t size
Definition: loadmap.h:149
int hpcio_fclose(FILE *fs)
Definition: hpcio.c:152
unsigned long num_nodes
Definition: cct_bundle.h:84
int hpcrun_fmt_epochHdr_fwrite(FILE *fs, epoch_flags_t flags, uint64_t measurementGranularity,...)
Definition: hpcrun-fmt.c:201
char * name
Definition: hpcrun-fmt.h:455
int hpcrun_cct_bundle_fwrite(FILE *fs, epoch_flags_t flags, cct_bundle_t *bndl, cct2metrics_t *cct2metrics_map)
Definition: cct_bundle.c:118
void hpcrun_epoch_reset(void)
Definition: epoch.c:143
struct epoch_t * next
Definition: epoch.h:68
static metric_desc_p_tbl_t metric_tbl
Definition: metrics.c:114
struct load_module_t * prev
Definition: loadmap.h:131
#define HPCRUN_FMT_NV_prog
Definition: hpcrun-fmt.h:153
#define ENABLED(f)
Definition: debug-flag.h:76