001package io.prometheus.metrics.core.exemplars;
002
003import static java.util.Objects.requireNonNull;
004
005import io.prometheus.metrics.core.util.Scheduler;
006import io.prometheus.metrics.model.snapshots.Exemplar;
007import io.prometheus.metrics.model.snapshots.Exemplars;
008import io.prometheus.metrics.model.snapshots.Labels;
009import io.prometheus.metrics.tracer.common.SpanContext;
010import java.util.ArrayList;
011import java.util.List;
012import java.util.concurrent.TimeUnit;
013import java.util.concurrent.atomic.AtomicBoolean;
014import java.util.function.LongSupplier;
015import javax.annotation.Nullable;
016
017/**
018 * The ExemplarSampler selects Spans as exemplars.
019 *
020 * <p>There are two types of Exemplars: Regular exemplars are sampled implicitly if a supported
021 * tracing library is detected. Custom exemplars are provided explicitly in code, for example if a
022 * developer wants to make sure an Exemplar is created for a specific code path.
023 *
024 * <p>Spans will be marked as being an Exemplar by calling {@link
025 * SpanContext#markCurrentSpanAsExemplar()}. The tracer implementation should set a Span attribute
026 * to mark the current Span as an Exemplar. This attribute can be used by a trace sampling algorithm
027 * to make sure traces with Exemplars are sampled.
028 *
029 * <p>The ExemplarSample is rate-limited, so only a small fraction of Spans will be marked as
030 * Exemplars in an application with a large number of requests.
031 *
032 * <p>See {@link ExemplarSamplerConfig} for configuration options.
033 */
034public class ExemplarSampler {
035
036  private final ExemplarSamplerConfig config;
037  private final Exemplar[] exemplars;
038  private final Exemplar[]
039      customExemplars; // Separate from exemplars, because we don't want custom exemplars
040  // to be overwritten by automatic exemplar sampling. exemplars.length == customExemplars.length
041  private final AtomicBoolean acceptingNewExemplars = new AtomicBoolean(true);
042  private final AtomicBoolean acceptingNewCustomExemplars = new AtomicBoolean(true);
043
044  @Nullable
045  private final SpanContext
046      spanContext; // may be null, in that case SpanContextSupplier.getSpanContext() is used.
047
048  public ExemplarSampler(ExemplarSamplerConfig config) {
049    this(config, null);
050  }
051
052  /**
053   * Constructor with an additional {code spanContext} argument. This is useful for testing, but may
054   * also be useful in some production scenarios. If {@code spanContext != null} that spanContext is
055   * used and {@link io.prometheus.metrics.tracer.initializer.SpanContextSupplier
056   * SpanContextSupplier} is not used. If {@code spanContext == null} {@link
057   * io.prometheus.metrics.tracer.initializer.SpanContextSupplier#getSpanContext()
058   * SpanContextSupplier.getSpanContext()} is called to find a span context.
059   */
060  public ExemplarSampler(ExemplarSamplerConfig config, @Nullable SpanContext spanContext) {
061    this.config = config;
062    this.exemplars = new Exemplar[config.getNumberOfExemplars()];
063    this.customExemplars = new Exemplar[exemplars.length];
064    this.spanContext = spanContext;
065  }
066
067  public Exemplars collect() {
068    // this may run in parallel with observe()
069    long now = System.currentTimeMillis();
070    List<Exemplar> result = new ArrayList<>(exemplars.length);
071    for (int i = 0; i < customExemplars.length; i++) {
072      Exemplar exemplar = customExemplars[i];
073      if (exemplar != null) {
074        if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
075          customExemplars[i] = null;
076        } else {
077          result.add(exemplar);
078        }
079      }
080    }
081    for (int i = 0; i < exemplars.length && result.size() < exemplars.length; i++) {
082      Exemplar exemplar = exemplars[i];
083      if (exemplar != null) {
084        if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
085          exemplars[i] = null;
086        } else {
087          result.add(exemplar);
088        }
089      }
090    }
091    return Exemplars.of(result);
092  }
093
094  public void reset() {
095    for (int i = 0; i < exemplars.length; i++) {
096      exemplars[i] = null;
097      customExemplars[i] = null;
098    }
099  }
100
101  public void observe(double value) {
102    if (!acceptingNewExemplars.get()) {
103      return; // This is the hot path in a high-throughput application and should be as efficient as
104      // possible.
105    }
106    rateLimitedObserve(acceptingNewExemplars, value, () -> doObserve(value));
107  }
108
109  public void observeWithExemplar(double value, Labels labels) {
110    if (!acceptingNewCustomExemplars.get()) {
111      return; // This is the hot path in a high-throughput application and should be as efficient as
112      // possible.
113    }
114    rateLimitedObserve(
115        acceptingNewCustomExemplars, value, () -> doObserveWithExemplar(value, labels));
116  }
117
118  private long doObserve(double value) {
119    if (exemplars.length == 1) {
120      return doObserveSingleExemplar(value);
121    } else {
122      double[] classicUpperBounds = config.getHistogramClassicUpperBounds();
123      if (classicUpperBounds != null) {
124        return doObserveWithUpperBounds(value, classicUpperBounds);
125      } else {
126        return doObserveWithoutUpperBounds(value);
127      }
128    }
129  }
130
131  private long doObserveSingleExemplar(double value) {
132    long now = System.currentTimeMillis();
133    Exemplar current = exemplars[0];
134    if (current == null
135        || now - current.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
136      return updateExemplar(0, value, now);
137    }
138    return 0;
139  }
140
141  private long doObserveSingleExemplar(double amount, Labels labels) {
142    long now = System.currentTimeMillis();
143    Exemplar current = customExemplars[0];
144    if (current == null
145        || now - current.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
146      return updateCustomExemplar(0, amount, labels, now);
147    }
148    return 0;
149  }
150
151  private long doObserveWithUpperBounds(double value, double[] classicUpperBounds) {
152    long now = System.currentTimeMillis();
153    for (int i = 0; i < classicUpperBounds.length; i++) {
154      if (value <= classicUpperBounds[i]) {
155        Exemplar previous = exemplars[i];
156        if (previous == null
157            || now - previous.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
158          return updateExemplar(i, value, now);
159        } else {
160          return 0;
161        }
162      }
163    }
164    return 0; // will never happen, as upperBounds contains +Inf
165  }
166
167  private long doObserveWithoutUpperBounds(double value) {
168    final long now = System.currentTimeMillis();
169    Exemplar smallest = null;
170    int smallestIndex = -1;
171    Exemplar largest = null;
172    int largestIndex = -1;
173    int nullIndex = -1;
174    for (int i = exemplars.length - 1; i >= 0; i--) {
175      Exemplar exemplar = exemplars[i];
176      if (exemplar == null) {
177        nullIndex = i;
178      } else if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
179        exemplars[i] = null;
180        nullIndex = i;
181      } else {
182        if (smallest == null || exemplar.getValue() < smallest.getValue()) {
183          smallest = exemplar;
184          smallestIndex = i;
185        }
186        if (largest == null || exemplar.getValue() > largest.getValue()) {
187          largest = exemplar;
188          largestIndex = i;
189        }
190      }
191    }
192    if (nullIndex >= 0) {
193      return updateExemplar(nullIndex, value, now);
194    }
195    if (now - requireNonNull(smallest).getTimestampMillis() > config.getMinRetentionPeriodMillis()
196        && value < smallest.getValue()) {
197      return updateExemplar(smallestIndex, value, now);
198    }
199    if (now - requireNonNull(largest).getTimestampMillis() > config.getMinRetentionPeriodMillis()
200        && value > largest.getValue()) {
201      return updateExemplar(largestIndex, value, now);
202    }
203    long oldestTimestamp = 0;
204    int oldestIndex = -1;
205    for (int i = 0; i < exemplars.length; i++) {
206      Exemplar exemplar = exemplars[i];
207      if (exemplar != null && exemplar != smallest && exemplar != largest) {
208        if (oldestTimestamp == 0 || exemplar.getTimestampMillis() < oldestTimestamp) {
209          oldestTimestamp = exemplar.getTimestampMillis();
210          oldestIndex = i;
211        }
212      }
213    }
214    if (oldestIndex != -1 && now - oldestTimestamp > config.getMinRetentionPeriodMillis()) {
215      return updateExemplar(oldestIndex, value, now);
216    }
217    return 0;
218  }
219
220  // Returns the timestamp of the newly added Exemplar (which is System.currentTimeMillis())
221  // or 0 if no Exemplar was added.
222  private long doObserveWithExemplar(double amount, Labels labels) {
223    if (customExemplars.length == 1) {
224      return doObserveSingleExemplar(amount, labels);
225    } else {
226      double[] classicUpperBounds = config.getHistogramClassicUpperBounds();
227      if (classicUpperBounds != null) {
228        return doObserveWithExemplarWithUpperBounds(amount, labels, classicUpperBounds);
229      } else {
230        return doObserveWithExemplarWithoutUpperBounds(amount, labels);
231      }
232    }
233  }
234
235  private long doObserveWithExemplarWithUpperBounds(
236      double value, Labels labels, double[] classicUpperBounds) {
237    long now = System.currentTimeMillis();
238    for (int i = 0; i < classicUpperBounds.length; i++) {
239      if (value <= classicUpperBounds[i]) {
240        Exemplar previous = customExemplars[i];
241        if (previous == null
242            || now - previous.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
243          return updateCustomExemplar(i, value, labels, now);
244        } else {
245          return 0;
246        }
247      }
248    }
249    return 0; // will never happen, as upperBounds contains +Inf
250  }
251
252  private long doObserveWithExemplarWithoutUpperBounds(double amount, Labels labels) {
253    final long now = System.currentTimeMillis();
254    int nullPos = -1;
255    int oldestPos = -1;
256    Exemplar oldest = null;
257    for (int i = customExemplars.length - 1; i >= 0; i--) {
258      Exemplar exemplar = customExemplars[i];
259      if (exemplar == null) {
260        nullPos = i;
261      } else if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
262        customExemplars[i] = null;
263        nullPos = i;
264      } else {
265        if (oldest == null || exemplar.getTimestampMillis() < oldest.getTimestampMillis()) {
266          oldest = exemplar;
267          oldestPos = i;
268        }
269      }
270    }
271    if (nullPos != -1) {
272      return updateCustomExemplar(nullPos, amount, labels, now);
273    } else if (now - requireNonNull(oldest).getTimestampMillis()
274        > config.getMinRetentionPeriodMillis()) {
275      return updateCustomExemplar(oldestPos, amount, labels, now);
276    } else {
277      return 0;
278    }
279  }
280
281  /**
282   * Observing requires a system call to {@link System#currentTimeMillis()}, and it requires
283   * iterating over the existing exemplars to check if one of the existing exemplars can be
284   * replaced.
285   *
286   * <p>To avoid performance issues, we rate limit observing exemplars to {@link
287   * ExemplarSamplerConfig#getSampleIntervalMillis()} milliseconds.
288   */
289  @SuppressWarnings("FutureReturnValueIgnored")
290  private void rateLimitedObserve(AtomicBoolean accepting, double value, LongSupplier observeFunc) {
291    if (Double.isNaN(value)) {
292      return;
293    }
294    if (!accepting.compareAndSet(true, false)) {
295      return;
296    }
297    // observeFunc returns the current timestamp or 0 if no Exemplar was added.
298    long now = observeFunc.getAsLong();
299    long sleepTime =
300        now == 0 ? config.getSampleIntervalMillis() : durationUntilNextExemplarExpires(now);
301    Scheduler.schedule(
302        () -> accepting.compareAndSet(false, true), sleepTime, TimeUnit.MILLISECONDS);
303  }
304
305  private long durationUntilNextExemplarExpires(long now) {
306    long oldestTimestamp = now;
307    for (Exemplar exemplar : exemplars) {
308      if (exemplar == null) {
309        return config.getSampleIntervalMillis();
310      } else if (exemplar.getTimestampMillis() < oldestTimestamp) {
311        oldestTimestamp = exemplar.getTimestampMillis();
312      }
313    }
314    long oldestAge = now - oldestTimestamp;
315    if (oldestAge < config.getMinRetentionPeriodMillis()) {
316      return config.getMinRetentionPeriodMillis() - oldestAge;
317    }
318    return config.getSampleIntervalMillis();
319  }
320
321  private long updateCustomExemplar(int index, double value, Labels labels, long now) {
322    if (!labels.contains(Exemplar.TRACE_ID) && !labels.contains(Exemplar.SPAN_ID)) {
323      labels = labels.merge(doSampleExemplar());
324    }
325    customExemplars[index] =
326        Exemplar.builder().value(value).labels(labels).timestampMillis(now).build();
327    return now;
328  }
329
330  private long updateExemplar(int index, double value, long now) {
331    Labels traceLabels = doSampleExemplar();
332    if (!traceLabels.isEmpty()) {
333      exemplars[index] =
334          Exemplar.builder().value(value).labels(traceLabels).timestampMillis(now).build();
335      return now;
336    } else {
337      return 0;
338    }
339  }
340
341  private Labels doSampleExemplar() {
342    // Using the qualified name so that Micrometer can exclude the dependency on
343    // prometheus-metrics-tracer-initializer
344    // as they provide their own implementation of SpanContextSupplier.
345    // If we had an import statement for SpanContextSupplier the dependency would be needed in any
346    // case.
347    SpanContext spanContext =
348        this.spanContext != null
349            ? this.spanContext
350            : io.prometheus.metrics.tracer.initializer.SpanContextSupplier.getSpanContext();
351    try {
352      if (spanContext != null) {
353        if (spanContext.isCurrentSpanSampled()) {
354          String spanId = spanContext.getCurrentSpanId();
355          String traceId = spanContext.getCurrentTraceId();
356          if (spanId != null && traceId != null) {
357            spanContext.markCurrentSpanAsExemplar();
358            return Labels.of(Exemplar.TRACE_ID, traceId, Exemplar.SPAN_ID, spanId);
359          }
360        }
361      }
362    } catch (NoClassDefFoundError ignored) {
363      // ignore
364    }
365    return Labels.EMPTY;
366  }
367}