001package io.prometheus.metrics.core.exemplars; 002 003import static java.util.Objects.requireNonNull; 004 005import io.prometheus.metrics.annotations.StableApi; 006import io.prometheus.metrics.core.util.Scheduler; 007import io.prometheus.metrics.model.snapshots.Exemplar; 008import io.prometheus.metrics.model.snapshots.Exemplars; 009import io.prometheus.metrics.model.snapshots.Labels; 010import io.prometheus.metrics.tracer.common.SpanContext; 011import java.util.ArrayList; 012import java.util.List; 013import java.util.concurrent.TimeUnit; 014import java.util.concurrent.atomic.AtomicBoolean; 015import java.util.function.LongSupplier; 016import java.util.function.Supplier; 017import javax.annotation.Nullable; 018 019/** 020 * The ExemplarSampler selects Spans as exemplars. 021 * 022 * <p>There are two types of Exemplars: Regular exemplars are sampled implicitly if a supported 023 * tracing library is detected. Custom exemplars are provided explicitly in code, for example if a 024 * developer wants to make sure an Exemplar is created for a specific code path. 025 * 026 * <p>Spans will be marked as being an Exemplar by calling {@link 027 * SpanContext#markCurrentSpanAsExemplar()}. The tracer implementation should set a Span attribute 028 * to mark the current Span as an Exemplar. This attribute can be used by a trace sampling algorithm 029 * to make sure traces with Exemplars are sampled. 030 * 031 * <p>The ExemplarSample is rate-limited, so only a small fraction of Spans will be marked as 032 * Exemplars in an application with a large number of requests. 033 * 034 * <p>See {@link ExemplarSamplerConfig} for configuration options. 035 */ 036@StableApi 037public class ExemplarSampler { 038 039 @SuppressWarnings("ReferenceEquality") 040 private static boolean sameObject(Object left, Object right) { 041 return left == right; 042 } 043 044 private final ExemplarSamplerConfig config; 045 private final Exemplar[] exemplars; 046 private final Exemplar[] 047 customExemplars; // Separate from exemplars, because we don't want custom exemplars 048 // to be overwritten by automatic exemplar sampling. exemplars.length == customExemplars.length 049 private final AtomicBoolean acceptingNewExemplars = new AtomicBoolean(true); 050 private final AtomicBoolean acceptingNewCustomExemplars = new AtomicBoolean(true); 051 052 @Nullable 053 private final SpanContext 054 spanContext; // may be null, in that case SpanContextSupplier.getSpanContext() is used. 055 056 @Nullable private final Supplier<Labels> additionalLabelsSupplier; 057 058 public ExemplarSampler(ExemplarSamplerConfig config) { 059 this(config, null, null); 060 } 061 062 /** 063 * Constructor with an additional {code spanContext} argument. This is useful for testing, but may 064 * also be useful in some production scenarios. If {@code spanContext != null} that spanContext is 065 * used and {@link io.prometheus.metrics.tracer.initializer.SpanContextSupplier 066 * SpanContextSupplier} is not used. If {@code spanContext == null} {@link 067 * io.prometheus.metrics.tracer.initializer.SpanContextSupplier#getSpanContext() 068 * SpanContextSupplier.getSpanContext()} is called to find a span context. 069 */ 070 public ExemplarSampler(ExemplarSamplerConfig config, @Nullable SpanContext spanContext) { 071 this(config, spanContext, null); 072 } 073 074 /** 075 * Constructor that additionally accepts a supplier of labels to be merged into every 076 * automatically-sampled exemplar. The supplier is called each time an exemplar is sampled from a 077 * span context, so it can return dynamic values (e.g. a request-scoped identifier). The supplier 078 * is only called when a valid, sampled span context is present. 079 */ 080 public ExemplarSampler( 081 ExemplarSamplerConfig config, 082 @Nullable SpanContext spanContext, 083 @Nullable Supplier<Labels> additionalLabelsSupplier) { 084 this.config = config; 085 this.exemplars = new Exemplar[config.getNumberOfExemplars()]; 086 this.customExemplars = new Exemplar[exemplars.length]; 087 this.spanContext = spanContext; 088 this.additionalLabelsSupplier = additionalLabelsSupplier; 089 } 090 091 public Exemplars collect() { 092 // this may run in parallel with observe() 093 long now = System.currentTimeMillis(); 094 List<Exemplar> result = new ArrayList<>(exemplars.length); 095 for (int i = 0; i < customExemplars.length; i++) { 096 Exemplar exemplar = customExemplars[i]; 097 if (exemplar != null) { 098 if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) { 099 customExemplars[i] = null; 100 } else { 101 result.add(exemplar); 102 } 103 } 104 } 105 for (int i = 0; i < exemplars.length && result.size() < exemplars.length; i++) { 106 Exemplar exemplar = exemplars[i]; 107 if (exemplar != null) { 108 if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) { 109 exemplars[i] = null; 110 } else { 111 result.add(exemplar); 112 } 113 } 114 } 115 return Exemplars.of(result); 116 } 117 118 public void reset() { 119 for (int i = 0; i < exemplars.length; i++) { 120 exemplars[i] = null; 121 customExemplars[i] = null; 122 } 123 } 124 125 public void observe(double value) { 126 if (!acceptingNewExemplars.get()) { 127 return; // This is the hot path in a high-throughput application and should be as efficient as 128 // possible. 129 } 130 rateLimitedObserve(acceptingNewExemplars, value, () -> doObserve(value)); 131 } 132 133 public void observeWithExemplar(double value, Labels labels) { 134 if (!acceptingNewCustomExemplars.get()) { 135 return; // This is the hot path in a high-throughput application and should be as efficient as 136 // possible. 137 } 138 rateLimitedObserve( 139 acceptingNewCustomExemplars, value, () -> doObserveWithExemplar(value, labels)); 140 } 141 142 private long doObserve(double value) { 143 if (exemplars.length == 1) { 144 return doObserveSingleExemplar(value); 145 } else { 146 double[] classicUpperBounds = config.getHistogramClassicUpperBounds(); 147 if (classicUpperBounds != null) { 148 return doObserveWithUpperBounds(value, classicUpperBounds); 149 } else { 150 return doObserveWithoutUpperBounds(value); 151 } 152 } 153 } 154 155 private long doObserveSingleExemplar(double value) { 156 long now = System.currentTimeMillis(); 157 Exemplar current = exemplars[0]; 158 if (current == null 159 || now - current.getTimestampMillis() > config.getMinRetentionPeriodMillis()) { 160 return updateExemplar(0, value, now); 161 } 162 return 0; 163 } 164 165 private long doObserveSingleExemplar(double amount, Labels labels) { 166 long now = System.currentTimeMillis(); 167 Exemplar current = customExemplars[0]; 168 if (current == null 169 || now - current.getTimestampMillis() > config.getMinRetentionPeriodMillis()) { 170 return updateCustomExemplar(0, amount, labels, now); 171 } 172 return 0; 173 } 174 175 private long doObserveWithUpperBounds(double value, double[] classicUpperBounds) { 176 long now = System.currentTimeMillis(); 177 for (int i = 0; i < classicUpperBounds.length; i++) { 178 if (value <= classicUpperBounds[i]) { 179 Exemplar previous = exemplars[i]; 180 if (previous == null 181 || now - previous.getTimestampMillis() > config.getMinRetentionPeriodMillis()) { 182 return updateExemplar(i, value, now); 183 } else { 184 return 0; 185 } 186 } 187 } 188 return 0; // will never happen, as upperBounds contains +Inf 189 } 190 191 private long doObserveWithoutUpperBounds(double value) { 192 final long now = System.currentTimeMillis(); 193 Exemplar smallest = null; 194 int smallestIndex = -1; 195 Exemplar largest = null; 196 int largestIndex = -1; 197 int nullIndex = -1; 198 for (int i = exemplars.length - 1; i >= 0; i--) { 199 Exemplar exemplar = exemplars[i]; 200 if (exemplar == null) { 201 nullIndex = i; 202 } else if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) { 203 exemplars[i] = null; 204 nullIndex = i; 205 } else { 206 if (smallest == null || exemplar.getValue() < smallest.getValue()) { 207 smallest = exemplar; 208 smallestIndex = i; 209 } 210 if (largest == null || exemplar.getValue() > largest.getValue()) { 211 largest = exemplar; 212 largestIndex = i; 213 } 214 } 215 } 216 if (nullIndex >= 0) { 217 return updateExemplar(nullIndex, value, now); 218 } 219 if (now - requireNonNull(smallest).getTimestampMillis() > config.getMinRetentionPeriodMillis() 220 && value < smallest.getValue()) { 221 return updateExemplar(smallestIndex, value, now); 222 } 223 if (now - requireNonNull(largest).getTimestampMillis() > config.getMinRetentionPeriodMillis() 224 && value > largest.getValue()) { 225 return updateExemplar(largestIndex, value, now); 226 } 227 long oldestTimestamp = 0; 228 int oldestIndex = -1; 229 for (int i = 0; i < exemplars.length; i++) { 230 Exemplar exemplar = exemplars[i]; 231 if (exemplar != null && !sameObject(exemplar, smallest) && !sameObject(exemplar, largest)) { 232 if (oldestTimestamp == 0 || exemplar.getTimestampMillis() < oldestTimestamp) { 233 oldestTimestamp = exemplar.getTimestampMillis(); 234 oldestIndex = i; 235 } 236 } 237 } 238 if (oldestIndex != -1 && now - oldestTimestamp > config.getMinRetentionPeriodMillis()) { 239 return updateExemplar(oldestIndex, value, now); 240 } 241 return 0; 242 } 243 244 // Returns the timestamp of the newly added Exemplar (which is System.currentTimeMillis()) 245 // or 0 if no Exemplar was added. 246 private long doObserveWithExemplar(double amount, Labels labels) { 247 if (customExemplars.length == 1) { 248 return doObserveSingleExemplar(amount, labels); 249 } else { 250 double[] classicUpperBounds = config.getHistogramClassicUpperBounds(); 251 if (classicUpperBounds != null) { 252 return doObserveWithExemplarWithUpperBounds(amount, labels, classicUpperBounds); 253 } else { 254 return doObserveWithExemplarWithoutUpperBounds(amount, labels); 255 } 256 } 257 } 258 259 private long doObserveWithExemplarWithUpperBounds( 260 double value, Labels labels, double[] classicUpperBounds) { 261 long now = System.currentTimeMillis(); 262 for (int i = 0; i < classicUpperBounds.length; i++) { 263 if (value <= classicUpperBounds[i]) { 264 Exemplar previous = customExemplars[i]; 265 if (previous == null 266 || now - previous.getTimestampMillis() > config.getMinRetentionPeriodMillis()) { 267 return updateCustomExemplar(i, value, labels, now); 268 } else { 269 return 0; 270 } 271 } 272 } 273 return 0; // will never happen, as upperBounds contains +Inf 274 } 275 276 private long doObserveWithExemplarWithoutUpperBounds(double amount, Labels labels) { 277 final long now = System.currentTimeMillis(); 278 int nullPos = -1; 279 int oldestPos = -1; 280 Exemplar oldest = null; 281 for (int i = customExemplars.length - 1; i >= 0; i--) { 282 Exemplar exemplar = customExemplars[i]; 283 if (exemplar == null) { 284 nullPos = i; 285 } else if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) { 286 customExemplars[i] = null; 287 nullPos = i; 288 } else { 289 if (oldest == null || exemplar.getTimestampMillis() < oldest.getTimestampMillis()) { 290 oldest = exemplar; 291 oldestPos = i; 292 } 293 } 294 } 295 if (nullPos != -1) { 296 return updateCustomExemplar(nullPos, amount, labels, now); 297 } else if (now - requireNonNull(oldest).getTimestampMillis() 298 > config.getMinRetentionPeriodMillis()) { 299 return updateCustomExemplar(oldestPos, amount, labels, now); 300 } else { 301 return 0; 302 } 303 } 304 305 /** 306 * Observing requires a system call to {@link System#currentTimeMillis()}, and it requires 307 * iterating over the existing exemplars to check if one of the existing exemplars can be 308 * replaced. 309 * 310 * <p>To avoid performance issues, we rate limit observing exemplars to {@link 311 * ExemplarSamplerConfig#getSampleIntervalMillis()} milliseconds. 312 */ 313 @SuppressWarnings("FutureReturnValueIgnored") 314 private void rateLimitedObserve(AtomicBoolean accepting, double value, LongSupplier observeFunc) { 315 if (Double.isNaN(value)) { 316 return; 317 } 318 if (!accepting.compareAndSet(true, false)) { 319 return; 320 } 321 // observeFunc returns the current timestamp or 0 if no Exemplar was added. 322 long now = observeFunc.getAsLong(); 323 long sleepTime = 324 now == 0 ? config.getSampleIntervalMillis() : durationUntilNextExemplarExpires(now); 325 Scheduler.schedule( 326 () -> accepting.compareAndSet(false, true), sleepTime, TimeUnit.MILLISECONDS); 327 } 328 329 private long durationUntilNextExemplarExpires(long now) { 330 long oldestTimestamp = now; 331 for (Exemplar exemplar : exemplars) { 332 if (exemplar == null) { 333 return config.getSampleIntervalMillis(); 334 } else if (exemplar.getTimestampMillis() < oldestTimestamp) { 335 oldestTimestamp = exemplar.getTimestampMillis(); 336 } 337 } 338 long oldestAge = now - oldestTimestamp; 339 if (oldestAge < config.getMinRetentionPeriodMillis()) { 340 return config.getMinRetentionPeriodMillis() - oldestAge; 341 } 342 return config.getSampleIntervalMillis(); 343 } 344 345 private long updateCustomExemplar(int index, double value, Labels labels, long now) { 346 if (!labels.contains(Exemplar.TRACE_ID) && !labels.contains(Exemplar.SPAN_ID)) { 347 labels = mergeLabels(labels, sampleTraceContextLabels()); 348 } 349 customExemplars[index] = 350 Exemplar.builder().value(value).labels(labels).timestampMillis(now).build(); 351 return now; 352 } 353 354 private long updateExemplar(int index, double value, long now) { 355 Labels traceLabels = doSampleExemplar(); 356 if (!traceLabels.isEmpty()) { 357 exemplars[index] = 358 Exemplar.builder().value(value).labels(traceLabels).timestampMillis(now).build(); 359 return now; 360 } else { 361 return 0; 362 } 363 } 364 365 private Labels doSampleExemplar() { 366 Labels labels = sampleTraceContextLabels(); 367 if (labels.isEmpty()) { 368 return labels; 369 } 370 // Per-metric supplier first (more specific), then the global supplier. On a name 371 // collision the earlier (more specific) value is kept; the reserved trace_id/span_id 372 // labels always win over both. 373 labels = mergeAdditionalLabels(labels, additionalLabelsSupplier); 374 labels = mergeAdditionalLabels(labels, ExemplarLabelsSupplier.getExemplarLabelsSupplier()); 375 return labels; 376 } 377 378 private Labels sampleTraceContextLabels() { 379 // Using the qualified name so that Micrometer can exclude the dependency on 380 // prometheus-metrics-tracer-initializer 381 // as they provide their own implementation of SpanContextSupplier. 382 // If we had an import statement for SpanContextSupplier the dependency would be needed in any 383 // case. 384 SpanContext spanContext = 385 this.spanContext != null 386 ? this.spanContext 387 : io.prometheus.metrics.tracer.initializer.SpanContextSupplier.getSpanContext(); 388 try { 389 if (spanContext != null) { 390 if (spanContext.isCurrentSpanSampled()) { 391 String spanId = spanContext.getCurrentSpanId(); 392 String traceId = spanContext.getCurrentTraceId(); 393 if (spanId != null && traceId != null) { 394 spanContext.markCurrentSpanAsExemplar(); 395 return Labels.of(Exemplar.TRACE_ID, traceId, Exemplar.SPAN_ID, spanId); 396 } 397 } 398 } 399 } catch (NoClassDefFoundError ignored) { 400 // ignore 401 } 402 return Labels.EMPTY; 403 } 404 405 /** 406 * Merge labels from {@code supplier} into {@code base}, dropping any label whose name already 407 * exists in {@code base}. Never throws: a {@code null} supplier, a {@code null}/empty result, a 408 * colliding label name, or an exception thrown by the supplier all result in {@code base} being 409 * returned unchanged (minus the offending labels). A misbehaving supplier must never break metric 410 * collection. 411 */ 412 private static Labels mergeAdditionalLabels(Labels base, @Nullable Supplier<Labels> supplier) { 413 if (supplier == null) { 414 return base; 415 } 416 Labels extra; 417 try { 418 extra = supplier.get(); 419 } catch (Throwable ignored) { 420 // A misbehaving supplier (any RuntimeException or Error) must never break metric collection. 421 return base; 422 } 423 if (extra == null || extra.isEmpty()) { 424 return base; 425 } 426 return mergeLabels(base, extra); 427 } 428 429 /** 430 * Merge {@code extra} into {@code base}, dropping any label whose name already exists in {@code 431 * base}. 432 */ 433 private static Labels mergeLabels(Labels base, Labels extra) { 434 if (extra.isEmpty()) { 435 return base; 436 } 437 // Count name collisions with base in a single pass so we can merge exactly once below: base 438 // (trace_id/span_id and any more-specific supplier) always wins, so colliding labels are 439 // dropped. extra is itself a valid Labels (no internal duplicates), so the surviving labels 440 // never collide with each other and merge() cannot throw on a duplicate name. 441 int size = extra.size(); 442 int collisions = 0; 443 for (int i = 0; i < size; i++) { 444 if (base.contains(extra.getName(i))) { 445 collisions++; 446 } 447 } 448 if (collisions == 0) { 449 return base.merge(extra); 450 } 451 if (collisions == size) { 452 return base; 453 } 454 int kept = size - collisions; 455 String[] names = new String[kept]; 456 String[] values = new String[kept]; 457 int j = 0; 458 for (int i = 0; i < size; i++) { 459 String name = extra.getName(i); 460 if (!base.contains(name)) { 461 names[j] = name; 462 values[j] = extra.getValue(i); 463 j++; 464 } 465 } 466 return base.merge(names, values); 467 } 468}