001package io.prometheus.metrics.core.exemplars;
002
003import static java.util.Objects.requireNonNull;
004
005import io.prometheus.metrics.annotations.StableApi;
006import io.prometheus.metrics.core.util.Scheduler;
007import io.prometheus.metrics.model.snapshots.Exemplar;
008import io.prometheus.metrics.model.snapshots.Exemplars;
009import io.prometheus.metrics.model.snapshots.Labels;
010import io.prometheus.metrics.tracer.common.SpanContext;
011import java.util.ArrayList;
012import java.util.List;
013import java.util.concurrent.TimeUnit;
014import java.util.concurrent.atomic.AtomicBoolean;
015import java.util.function.LongSupplier;
016import javax.annotation.Nullable;
017
018/**
019 * The ExemplarSampler selects Spans as exemplars.
020 *
021 * <p>There are two types of Exemplars: Regular exemplars are sampled implicitly if a supported
022 * tracing library is detected. Custom exemplars are provided explicitly in code, for example if a
023 * developer wants to make sure an Exemplar is created for a specific code path.
024 *
025 * <p>Spans will be marked as being an Exemplar by calling {@link
026 * SpanContext#markCurrentSpanAsExemplar()}. The tracer implementation should set a Span attribute
027 * to mark the current Span as an Exemplar. This attribute can be used by a trace sampling algorithm
028 * to make sure traces with Exemplars are sampled.
029 *
030 * <p>The ExemplarSample is rate-limited, so only a small fraction of Spans will be marked as
031 * Exemplars in an application with a large number of requests.
032 *
033 * <p>See {@link ExemplarSamplerConfig} for configuration options.
034 */
035@StableApi
036public class ExemplarSampler {
037
038  private final ExemplarSamplerConfig config;
039  private final Exemplar[] exemplars;
040  private final Exemplar[]
041      customExemplars; // Separate from exemplars, because we don't want custom exemplars
042  // to be overwritten by automatic exemplar sampling. exemplars.length == customExemplars.length
043  private final AtomicBoolean acceptingNewExemplars = new AtomicBoolean(true);
044  private final AtomicBoolean acceptingNewCustomExemplars = new AtomicBoolean(true);
045
046  @Nullable
047  private final SpanContext
048      spanContext; // may be null, in that case SpanContextSupplier.getSpanContext() is used.
049
050  public ExemplarSampler(ExemplarSamplerConfig config) {
051    this(config, null);
052  }
053
054  /**
055   * Constructor with an additional {code spanContext} argument. This is useful for testing, but may
056   * also be useful in some production scenarios. If {@code spanContext != null} that spanContext is
057   * used and {@link io.prometheus.metrics.tracer.initializer.SpanContextSupplier
058   * SpanContextSupplier} is not used. If {@code spanContext == null} {@link
059   * io.prometheus.metrics.tracer.initializer.SpanContextSupplier#getSpanContext()
060   * SpanContextSupplier.getSpanContext()} is called to find a span context.
061   */
062  public ExemplarSampler(ExemplarSamplerConfig config, @Nullable SpanContext spanContext) {
063    this.config = config;
064    this.exemplars = new Exemplar[config.getNumberOfExemplars()];
065    this.customExemplars = new Exemplar[exemplars.length];
066    this.spanContext = spanContext;
067  }
068
069  public Exemplars collect() {
070    // this may run in parallel with observe()
071    long now = System.currentTimeMillis();
072    List<Exemplar> result = new ArrayList<>(exemplars.length);
073    for (int i = 0; i < customExemplars.length; i++) {
074      Exemplar exemplar = customExemplars[i];
075      if (exemplar != null) {
076        if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
077          customExemplars[i] = null;
078        } else {
079          result.add(exemplar);
080        }
081      }
082    }
083    for (int i = 0; i < exemplars.length && result.size() < exemplars.length; i++) {
084      Exemplar exemplar = exemplars[i];
085      if (exemplar != null) {
086        if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
087          exemplars[i] = null;
088        } else {
089          result.add(exemplar);
090        }
091      }
092    }
093    return Exemplars.of(result);
094  }
095
096  public void reset() {
097    for (int i = 0; i < exemplars.length; i++) {
098      exemplars[i] = null;
099      customExemplars[i] = null;
100    }
101  }
102
103  public void observe(double value) {
104    if (!acceptingNewExemplars.get()) {
105      return; // This is the hot path in a high-throughput application and should be as efficient as
106      // possible.
107    }
108    rateLimitedObserve(acceptingNewExemplars, value, () -> doObserve(value));
109  }
110
111  public void observeWithExemplar(double value, Labels labels) {
112    if (!acceptingNewCustomExemplars.get()) {
113      return; // This is the hot path in a high-throughput application and should be as efficient as
114      // possible.
115    }
116    rateLimitedObserve(
117        acceptingNewCustomExemplars, value, () -> doObserveWithExemplar(value, labels));
118  }
119
120  private long doObserve(double value) {
121    if (exemplars.length == 1) {
122      return doObserveSingleExemplar(value);
123    } else {
124      double[] classicUpperBounds = config.getHistogramClassicUpperBounds();
125      if (classicUpperBounds != null) {
126        return doObserveWithUpperBounds(value, classicUpperBounds);
127      } else {
128        return doObserveWithoutUpperBounds(value);
129      }
130    }
131  }
132
133  private long doObserveSingleExemplar(double value) {
134    long now = System.currentTimeMillis();
135    Exemplar current = exemplars[0];
136    if (current == null
137        || now - current.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
138      return updateExemplar(0, value, now);
139    }
140    return 0;
141  }
142
143  private long doObserveSingleExemplar(double amount, Labels labels) {
144    long now = System.currentTimeMillis();
145    Exemplar current = customExemplars[0];
146    if (current == null
147        || now - current.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
148      return updateCustomExemplar(0, amount, labels, now);
149    }
150    return 0;
151  }
152
153  private long doObserveWithUpperBounds(double value, double[] classicUpperBounds) {
154    long now = System.currentTimeMillis();
155    for (int i = 0; i < classicUpperBounds.length; i++) {
156      if (value <= classicUpperBounds[i]) {
157        Exemplar previous = exemplars[i];
158        if (previous == null
159            || now - previous.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
160          return updateExemplar(i, value, now);
161        } else {
162          return 0;
163        }
164      }
165    }
166    return 0; // will never happen, as upperBounds contains +Inf
167  }
168
169  private long doObserveWithoutUpperBounds(double value) {
170    final long now = System.currentTimeMillis();
171    Exemplar smallest = null;
172    int smallestIndex = -1;
173    Exemplar largest = null;
174    int largestIndex = -1;
175    int nullIndex = -1;
176    for (int i = exemplars.length - 1; i >= 0; i--) {
177      Exemplar exemplar = exemplars[i];
178      if (exemplar == null) {
179        nullIndex = i;
180      } else if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
181        exemplars[i] = null;
182        nullIndex = i;
183      } else {
184        if (smallest == null || exemplar.getValue() < smallest.getValue()) {
185          smallest = exemplar;
186          smallestIndex = i;
187        }
188        if (largest == null || exemplar.getValue() > largest.getValue()) {
189          largest = exemplar;
190          largestIndex = i;
191        }
192      }
193    }
194    if (nullIndex >= 0) {
195      return updateExemplar(nullIndex, value, now);
196    }
197    if (now - requireNonNull(smallest).getTimestampMillis() > config.getMinRetentionPeriodMillis()
198        && value < smallest.getValue()) {
199      return updateExemplar(smallestIndex, value, now);
200    }
201    if (now - requireNonNull(largest).getTimestampMillis() > config.getMinRetentionPeriodMillis()
202        && value > largest.getValue()) {
203      return updateExemplar(largestIndex, value, now);
204    }
205    long oldestTimestamp = 0;
206    int oldestIndex = -1;
207    for (int i = 0; i < exemplars.length; i++) {
208      Exemplar exemplar = exemplars[i];
209      if (exemplar != null && exemplar != smallest && exemplar != largest) {
210        if (oldestTimestamp == 0 || exemplar.getTimestampMillis() < oldestTimestamp) {
211          oldestTimestamp = exemplar.getTimestampMillis();
212          oldestIndex = i;
213        }
214      }
215    }
216    if (oldestIndex != -1 && now - oldestTimestamp > config.getMinRetentionPeriodMillis()) {
217      return updateExemplar(oldestIndex, value, now);
218    }
219    return 0;
220  }
221
222  // Returns the timestamp of the newly added Exemplar (which is System.currentTimeMillis())
223  // or 0 if no Exemplar was added.
224  private long doObserveWithExemplar(double amount, Labels labels) {
225    if (customExemplars.length == 1) {
226      return doObserveSingleExemplar(amount, labels);
227    } else {
228      double[] classicUpperBounds = config.getHistogramClassicUpperBounds();
229      if (classicUpperBounds != null) {
230        return doObserveWithExemplarWithUpperBounds(amount, labels, classicUpperBounds);
231      } else {
232        return doObserveWithExemplarWithoutUpperBounds(amount, labels);
233      }
234    }
235  }
236
237  private long doObserveWithExemplarWithUpperBounds(
238      double value, Labels labels, double[] classicUpperBounds) {
239    long now = System.currentTimeMillis();
240    for (int i = 0; i < classicUpperBounds.length; i++) {
241      if (value <= classicUpperBounds[i]) {
242        Exemplar previous = customExemplars[i];
243        if (previous == null
244            || now - previous.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
245          return updateCustomExemplar(i, value, labels, now);
246        } else {
247          return 0;
248        }
249      }
250    }
251    return 0; // will never happen, as upperBounds contains +Inf
252  }
253
254  private long doObserveWithExemplarWithoutUpperBounds(double amount, Labels labels) {
255    final long now = System.currentTimeMillis();
256    int nullPos = -1;
257    int oldestPos = -1;
258    Exemplar oldest = null;
259    for (int i = customExemplars.length - 1; i >= 0; i--) {
260      Exemplar exemplar = customExemplars[i];
261      if (exemplar == null) {
262        nullPos = i;
263      } else if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
264        customExemplars[i] = null;
265        nullPos = i;
266      } else {
267        if (oldest == null || exemplar.getTimestampMillis() < oldest.getTimestampMillis()) {
268          oldest = exemplar;
269          oldestPos = i;
270        }
271      }
272    }
273    if (nullPos != -1) {
274      return updateCustomExemplar(nullPos, amount, labels, now);
275    } else if (now - requireNonNull(oldest).getTimestampMillis()
276        > config.getMinRetentionPeriodMillis()) {
277      return updateCustomExemplar(oldestPos, amount, labels, now);
278    } else {
279      return 0;
280    }
281  }
282
283  /**
284   * Observing requires a system call to {@link System#currentTimeMillis()}, and it requires
285   * iterating over the existing exemplars to check if one of the existing exemplars can be
286   * replaced.
287   *
288   * <p>To avoid performance issues, we rate limit observing exemplars to {@link
289   * ExemplarSamplerConfig#getSampleIntervalMillis()} milliseconds.
290   */
291  @SuppressWarnings("FutureReturnValueIgnored")
292  private void rateLimitedObserve(AtomicBoolean accepting, double value, LongSupplier observeFunc) {
293    if (Double.isNaN(value)) {
294      return;
295    }
296    if (!accepting.compareAndSet(true, false)) {
297      return;
298    }
299    // observeFunc returns the current timestamp or 0 if no Exemplar was added.
300    long now = observeFunc.getAsLong();
301    long sleepTime =
302        now == 0 ? config.getSampleIntervalMillis() : durationUntilNextExemplarExpires(now);
303    Scheduler.schedule(
304        () -> accepting.compareAndSet(false, true), sleepTime, TimeUnit.MILLISECONDS);
305  }
306
307  private long durationUntilNextExemplarExpires(long now) {
308    long oldestTimestamp = now;
309    for (Exemplar exemplar : exemplars) {
310      if (exemplar == null) {
311        return config.getSampleIntervalMillis();
312      } else if (exemplar.getTimestampMillis() < oldestTimestamp) {
313        oldestTimestamp = exemplar.getTimestampMillis();
314      }
315    }
316    long oldestAge = now - oldestTimestamp;
317    if (oldestAge < config.getMinRetentionPeriodMillis()) {
318      return config.getMinRetentionPeriodMillis() - oldestAge;
319    }
320    return config.getSampleIntervalMillis();
321  }
322
323  private long updateCustomExemplar(int index, double value, Labels labels, long now) {
324    if (!labels.contains(Exemplar.TRACE_ID) && !labels.contains(Exemplar.SPAN_ID)) {
325      labels = labels.merge(doSampleExemplar());
326    }
327    customExemplars[index] =
328        Exemplar.builder().value(value).labels(labels).timestampMillis(now).build();
329    return now;
330  }
331
332  private long updateExemplar(int index, double value, long now) {
333    Labels traceLabels = doSampleExemplar();
334    if (!traceLabels.isEmpty()) {
335      exemplars[index] =
336          Exemplar.builder().value(value).labels(traceLabels).timestampMillis(now).build();
337      return now;
338    } else {
339      return 0;
340    }
341  }
342
343  private Labels doSampleExemplar() {
344    // Using the qualified name so that Micrometer can exclude the dependency on
345    // prometheus-metrics-tracer-initializer
346    // as they provide their own implementation of SpanContextSupplier.
347    // If we had an import statement for SpanContextSupplier the dependency would be needed in any
348    // case.
349    SpanContext spanContext =
350        this.spanContext != null
351            ? this.spanContext
352            : io.prometheus.metrics.tracer.initializer.SpanContextSupplier.getSpanContext();
353    try {
354      if (spanContext != null) {
355        if (spanContext.isCurrentSpanSampled()) {
356          String spanId = spanContext.getCurrentSpanId();
357          String traceId = spanContext.getCurrentTraceId();
358          if (spanId != null && traceId != null) {
359            spanContext.markCurrentSpanAsExemplar();
360            return Labels.of(Exemplar.TRACE_ID, traceId, Exemplar.SPAN_ID, spanId);
361          }
362        }
363      }
364    } catch (NoClassDefFoundError ignored) {
365      // ignore
366    }
367    return Labels.EMPTY;
368  }
369}