HPCToolkit
sample_prob.c
Go to the documentation of this file.
1 // -*-Mode: C++;-*- // technically C99
2 
3 // * BeginRiceCopyright *****************************************************
4 //
5 // $HeadURL$
6 // $Id$
7 //
8 // --------------------------------------------------------------------------
9 // Part of HPCToolkit (hpctoolkit.org)
10 //
11 // Information about sources of support for research and development of
12 // HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
13 // --------------------------------------------------------------------------
14 //
15 // Copyright ((c)) 2002-2019, Rice University
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions are
20 // met:
21 //
22 // * Redistributions of source code must retain the above copyright
23 // notice, this list of conditions and the following disclaimer.
24 //
25 // * Redistributions in binary form must reproduce the above copyright
26 // notice, this list of conditions and the following disclaimer in the
27 // documentation and/or other materials provided with the distribution.
28 //
29 // * Neither the name of Rice University (RICE) nor the names of its
30 // contributors may be used to endorse or promote products derived from
31 // this software without specific prior written permission.
32 //
33 // This software is provided by RICE and contributors "as is" and any
34 // express or implied warranties, including, but not limited to, the
35 // implied warranties of merchantability and fitness for a particular
36 // purpose are disclaimed. In no event shall RICE or contributors be
37 // liable for any direct, indirect, incidental, special, exemplary, or
38 // consequential damages (including, but not limited to, procurement of
39 // substitute goods or services; loss of use, data, or profits; or
40 // business interruption) however caused and on any theory of liability,
41 // whether in contract, strict liability, or tort (including negligence
42 // or otherwise) arising in any way out of the use of this software, even
43 // if advised of the possibility of such damage.
44 //
45 // ******************************************************* EndRiceCopyright *
46 
47 #include <sys/stat.h>
48 #include <sys/time.h>
49 #include <sys/types.h>
50 #include <fcntl.h>
51 #include <stdint.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <unistd.h>
56 
57 #include <messages/messages.h>
59 #include "sample_prob.h"
60 
61 #define HPCRUN_SAMPLE_PROB "HPCRUN_PROCESS_FRACTION"
62 #define DEFAULT_PROB 0.1
63 
64 #define HASH_PRIME 2001001003
65 #define HASH_GEN 4011
66 
67 static int is_init = 0;
68 static pid_t orig_pid = 0;
69 static int sample_prob_ans = 1;
70 
71 static char *sample_prob_str = NULL;
72 static int prob_str_broken = 0;
73 static int prob_str_mesg = 0;
74 
75 
76 // -------------------------------------------------------------------
77 // This file implements probability-based sampling. All processes
78 // continue to take samples, but if HPCRUN_SAMPLE_PROB is set in the
79 // environment, then only a fraction of the processes (based on a
80 // pseudo-random seed) open their .log and .hpcrun files and write out
81 // their results.
82 //
83 // HPCRUN_SAMPLE_PROB may be written as a a floating point number or
84 // as a fraction. So, '0.10' and '1/10' are equivalent.
85 //
86 // The decision of which processes are active is process-wide, not
87 // per-thread (for now).
88 // -------------------------------------------------------------------
89 
90 
91 // Accept 0.ddd as floating point or x/y as fraction.
92 // Note: must delay printing any errors.
93 //
94 static float
95 string_to_prob(char *str)
96 {
97  int x, y;
98  float ans;
99 
100  if (strchr(str, '/') != NULL) {
101  if (sscanf(str, "%d/%d", &x, &y) == 2 && y > 0) {
102  ans = (float)x / (float)y;
103  } else {
104  prob_str_broken = 1;
105  ans = DEFAULT_PROB;
106  }
107  }
108  else {
109  if (sscanf(str, "%f", &ans) < 1) {
110  prob_str_broken = 1;
111  ans = DEFAULT_PROB;
112  }
113  }
114 
115  return ans;
116 }
117 
118 
119 // Combine the hostid, the time of day in microseconds and
120 // /dev/urandom (if available), run it through a hash function and
121 // produce a pseudo-random value in the range [0.0, 1.0).
122 //
123 // This is a simple hash function based on the exponential mod
124 // function with good cryptographic properties. MD5 or SHA-1 would
125 // make sense, but those require bringing in extra libraries.
126 //
127 // Anyway, the choice of seed is far more important than the hash
128 // function here.
129 //
130 static float
132 {
133  struct timeval tv;
134  uint64_t a, b, x, rand;
135  int fd;
136 
137  // Add /dev/urandom if available.
138  rand = 0;
139  fd = open("/dev/urandom", O_RDONLY);
140  if (fd >= 0) {
141  read(fd, &rand, sizeof(rand));
142  close(fd);
143  }
144 
145  gettimeofday(&tv, NULL);
146  x = (((uint64_t) OSUtil_hostid()) << 24) + (tv.tv_usec << 4) + rand;
147  x = (x & ~(((uint64_t) 15) << 60)) % HASH_PRIME;
148 
149  // Compute gen^x (mod prime).
150  // Invariant: a * (b ^ x) = gen^(orig x) (mod prime).
151  a = 1;
152  b = HASH_GEN;
153  while (x > 0) {
154  if (x % 2 == 0) {
155  b = (b * b) % HASH_PRIME;
156  x = x/2;
157  } else {
158  a = (a * b) % HASH_PRIME;
159  x = x - 1;
160  }
161  }
162 
163  return (float)a / (float)HASH_PRIME;
164 }
165 
166 
167 void
169 {
170  pid_t cur_pid;
171 
172  // For consistency, don't recompute the sample probability if the
173  // pid hasn't changed. But do recompute in the child after fork.
174  cur_pid = getpid();
175  if (is_init && cur_pid == orig_pid)
176  return;
177  orig_pid = cur_pid;
178 
179  // If HPCRUN_SAMPLE_PROB is not set in the environment, then the
180  // answer is always on.
182  if (sample_prob_str != NULL) {
184  }
185  else {
186  sample_prob_ans = 1;
187  }
188  is_init = 1;
189 }
190 
191 
192 int
194 {
195  if (! is_init) {
197  }
198  return sample_prob_ans;
199 }
200 
201 
202 // We can't print messages while computing the sample probability
203 // because that would trigger opening the log files and recomputing
204 // the sample probability. Instead, we have to record the failure and
205 // depend on the caller to call us again after the log files are
206 // opened.
207 //
208 void
210 {
212  EMSG("malformed probability in %s (%s), using default value of %f",
214  prob_str_mesg = 1;
215  }
216 }
#define HASH_GEN
Definition: sample_prob.c:65
static char * sample_prob_str
Definition: sample_prob.c:71
static int is_init
Definition: sample_prob.c:67
static float random_hash_prob(void)
Definition: sample_prob.c:131
void hpcrun_sample_prob_mesg(void)
Definition: sample_prob.c:209
static int prob_str_broken
Definition: sample_prob.c:72
static int prob_str_mesg
Definition: sample_prob.c:73
long OSUtil_hostid()
Definition: OSUtil.c:162
static float string_to_prob(char *str)
Definition: sample_prob.c:95
#define HPCRUN_SAMPLE_PROB
Definition: sample_prob.c:61
int hpcrun_sample_prob_active(void)
Definition: sample_prob.c:193
#define EMSG
Definition: messages.h:70
void hpcrun_sample_prob_init(void)
Definition: sample_prob.c:168
#define DEFAULT_PROB
Definition: sample_prob.c:62
static int sample_prob_ans
Definition: sample_prob.c:69
ssize_t MONITOR_EXT_WRAP_NAME() read(int fd, void *buf, size_t count)
Definition: io-over.c:152
#define HASH_PRIME
Definition: sample_prob.c:64
#define NULL
Definition: ElfHelper.cpp:85
static pid_t orig_pid
Definition: sample_prob.c:68