001package io.prometheus.metrics.core.exemplars;
002
003import io.prometheus.metrics.core.util.Scheduler;
004import io.prometheus.metrics.model.snapshots.Exemplar;
005import io.prometheus.metrics.model.snapshots.Exemplars;
006import io.prometheus.metrics.model.snapshots.Labels;
007import io.prometheus.metrics.tracer.common.SpanContext;
008import java.util.ArrayList;
009import java.util.List;
010import java.util.concurrent.TimeUnit;
011import java.util.concurrent.atomic.AtomicBoolean;
012import java.util.function.LongSupplier;
013
014/**
015 * The ExemplarSampler selects Spans as exemplars.
016 *
017 * <p>There are two types of Exemplars: Regular exemplars are sampled implicitly if a supported
018 * tracing library is detected. Custom exemplars are provided explicitly in code, for example if a
019 * developer wants to make sure an Exemplar is created for a specific code path.
020 *
021 * <p>Spans will be marked as being an Exemplar by calling {@link
022 * SpanContext#markCurrentSpanAsExemplar()}. The tracer implementation should set a Span attribute
023 * to mark the current Span as an Exemplar. This attribute can be used by a trace sampling algorithm
024 * to make sure traces with Exemplars are sampled.
025 *
026 * <p>The ExemplarSample is rate-limited, so only a small fraction of Spans will be marked as
027 * Exemplars in an application with a large number of requests.
028 *
029 * <p>See {@link ExemplarSamplerConfig} for configuration options.
030 */
031public class ExemplarSampler {
032
033  private final ExemplarSamplerConfig config;
034  private final Exemplar[] exemplars;
035  private final Exemplar[]
036      customExemplars; // Separate from exemplars, because we don't want custom exemplars
037  // to be overwritten by automatic exemplar sampling. exemplars.length == customExemplars.length
038  private final AtomicBoolean acceptingNewExemplars = new AtomicBoolean(true);
039  private final AtomicBoolean acceptingNewCustomExemplars = new AtomicBoolean(true);
040  private final SpanContext
041      spanContext; // may be null, in that case SpanContextSupplier.getSpanContext() is used.
042
043  public ExemplarSampler(ExemplarSamplerConfig config) {
044    this(config, null);
045  }
046
047  /**
048   * Constructor with an additional {code spanContext} argument. This is useful for testing, but may
049   * also be useful in some production scenarios. If {@code spanContext != null} that spanContext is
050   * used and {@link io.prometheus.metrics.tracer.initializer.SpanContextSupplier
051   * SpanContextSupplier} is not used. If {@code spanContext == null} {@link
052   * io.prometheus.metrics.tracer.initializer.SpanContextSupplier#getSpanContext()
053   * SpanContextSupplier.getSpanContext()} is called to find a span context.
054   */
055  public ExemplarSampler(ExemplarSamplerConfig config, SpanContext spanContext) {
056    this.config = config;
057    this.exemplars = new Exemplar[config.getNumberOfExemplars()];
058    this.customExemplars = new Exemplar[exemplars.length];
059    this.spanContext = spanContext;
060  }
061
062  public Exemplars collect() {
063    // this may run in parallel with observe()
064    long now = System.currentTimeMillis();
065    List<Exemplar> result = new ArrayList<>(exemplars.length);
066    for (int i = 0; i < customExemplars.length; i++) {
067      Exemplar exemplar = customExemplars[i];
068      if (exemplar != null) {
069        if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
070          customExemplars[i] = null;
071        } else {
072          result.add(exemplar);
073        }
074      }
075    }
076    for (int i = 0; i < exemplars.length && result.size() < exemplars.length; i++) {
077      Exemplar exemplar = exemplars[i];
078      if (exemplar != null) {
079        if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
080          exemplars[i] = null;
081        } else {
082          result.add(exemplar);
083        }
084      }
085    }
086    return Exemplars.of(result);
087  }
088
089  public void reset() {
090    for (int i = 0; i < exemplars.length; i++) {
091      exemplars[i] = null;
092      customExemplars[i] = null;
093    }
094  }
095
096  public void observe(double value) {
097    if (!acceptingNewExemplars.get()) {
098      return; // This is the hot path in a high-throughput application and should be as efficient as
099      // possible.
100    }
101    rateLimitedObserve(acceptingNewExemplars, value, () -> doObserve(value));
102  }
103
104  public void observeWithExemplar(double value, Labels labels) {
105    if (!acceptingNewCustomExemplars.get()) {
106      return; // This is the hot path in a high-throughput application and should be as efficient as
107      // possible.
108    }
109    rateLimitedObserve(
110        acceptingNewCustomExemplars, value, () -> doObserveWithExemplar(value, labels));
111  }
112
113  private long doObserve(double value) {
114    if (exemplars.length == 1) {
115      return doObserveSingleExemplar(value);
116    } else if (config.getHistogramClassicUpperBounds() != null) {
117      return doObserveWithUpperBounds(value);
118    } else {
119      return doObserveWithoutUpperBounds(value);
120    }
121  }
122
123  private long doObserveSingleExemplar(double value) {
124    long now = System.currentTimeMillis();
125    Exemplar current = exemplars[0];
126    if (current == null
127        || now - current.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
128      return updateExemplar(0, value, now);
129    }
130    return 0;
131  }
132
133  private long doObserveSingleExemplar(double amount, Labels labels) {
134    long now = System.currentTimeMillis();
135    Exemplar current = customExemplars[0];
136    if (current == null
137        || now - current.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
138      return updateCustomExemplar(0, amount, labels, now);
139    }
140    return 0;
141  }
142
143  private long doObserveWithUpperBounds(double value) {
144    long now = System.currentTimeMillis();
145    double[] upperBounds = config.getHistogramClassicUpperBounds();
146    for (int i = 0; i < upperBounds.length; i++) {
147      if (value <= upperBounds[i]) {
148        Exemplar previous = exemplars[i];
149        if (previous == null
150            || now - previous.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
151          return updateExemplar(i, value, now);
152        } else {
153          return 0;
154        }
155      }
156    }
157    return 0; // will never happen, as upperBounds contains +Inf
158  }
159
160  private long doObserveWithoutUpperBounds(double value) {
161    final long now = System.currentTimeMillis();
162    Exemplar smallest = null;
163    int smallestIndex = -1;
164    Exemplar largest = null;
165    int largestIndex = -1;
166    int nullIndex = -1;
167    for (int i = exemplars.length - 1; i >= 0; i--) {
168      Exemplar exemplar = exemplars[i];
169      if (exemplar == null) {
170        nullIndex = i;
171      } else if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
172        exemplars[i] = null;
173        nullIndex = i;
174      } else {
175        if (smallest == null || exemplar.getValue() < smallest.getValue()) {
176          smallest = exemplar;
177          smallestIndex = i;
178        }
179        if (largest == null || exemplar.getValue() > largest.getValue()) {
180          largest = exemplar;
181          largestIndex = i;
182        }
183      }
184    }
185    if (nullIndex >= 0) {
186      return updateExemplar(nullIndex, value, now);
187    }
188    if (now - smallest.getTimestampMillis() > config.getMinRetentionPeriodMillis()
189        && value < smallest.getValue()) {
190      return updateExemplar(smallestIndex, value, now);
191    }
192    if (now - largest.getTimestampMillis() > config.getMinRetentionPeriodMillis()
193        && value > largest.getValue()) {
194      return updateExemplar(largestIndex, value, now);
195    }
196    long oldestTimestamp = 0;
197    int oldestIndex = -1;
198    for (int i = 0; i < exemplars.length; i++) {
199      Exemplar exemplar = exemplars[i];
200      if (exemplar != null && exemplar != smallest && exemplar != largest) {
201        if (oldestTimestamp == 0 || exemplar.getTimestampMillis() < oldestTimestamp) {
202          oldestTimestamp = exemplar.getTimestampMillis();
203          oldestIndex = i;
204        }
205      }
206    }
207    if (oldestIndex != -1 && now - oldestTimestamp > config.getMinRetentionPeriodMillis()) {
208      return updateExemplar(oldestIndex, value, now);
209    }
210    return 0;
211  }
212
213  // Returns the timestamp of the newly added Exemplar (which is System.currentTimeMillis())
214  // or 0 if no Exemplar was added.
215  private long doObserveWithExemplar(double amount, Labels labels) {
216    if (customExemplars.length == 1) {
217      return doObserveSingleExemplar(amount, labels);
218    } else if (config.getHistogramClassicUpperBounds() != null) {
219      return doObserveWithExemplarWithUpperBounds(amount, labels);
220    } else {
221      return doObserveWithExemplarWithoutUpperBounds(amount, labels);
222    }
223  }
224
225  private long doObserveWithExemplarWithUpperBounds(double value, Labels labels) {
226    long now = System.currentTimeMillis();
227    double[] upperBounds = config.getHistogramClassicUpperBounds();
228    for (int i = 0; i < upperBounds.length; i++) {
229      if (value <= upperBounds[i]) {
230        Exemplar previous = customExemplars[i];
231        if (previous == null
232            || now - previous.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
233          return updateCustomExemplar(i, value, labels, now);
234        } else {
235          return 0;
236        }
237      }
238    }
239    return 0; // will never happen, as upperBounds contains +Inf
240  }
241
242  private long doObserveWithExemplarWithoutUpperBounds(double amount, Labels labels) {
243    final long now = System.currentTimeMillis();
244    int nullPos = -1;
245    int oldestPos = -1;
246    Exemplar oldest = null;
247    for (int i = customExemplars.length - 1; i >= 0; i--) {
248      Exemplar exemplar = customExemplars[i];
249      if (exemplar == null) {
250        nullPos = i;
251      } else if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) {
252        customExemplars[i] = null;
253        nullPos = i;
254      } else {
255        if (oldest == null || exemplar.getTimestampMillis() < oldest.getTimestampMillis()) {
256          oldest = exemplar;
257          oldestPos = i;
258        }
259      }
260    }
261    if (nullPos != -1) {
262      return updateCustomExemplar(nullPos, amount, labels, now);
263    } else if (now - oldest.getTimestampMillis() > config.getMinRetentionPeriodMillis()) {
264      return updateCustomExemplar(oldestPos, amount, labels, now);
265    } else {
266      return 0;
267    }
268  }
269
270  /**
271   * Observing requires a system call to {@link System#currentTimeMillis()}, and it requires
272   * iterating over the existing exemplars to check if one of the existing exemplars can be
273   * replaced.
274   *
275   * <p>To avoid performance issues, we rate limit observing exemplars to {@link
276   * ExemplarSamplerConfig#getSampleIntervalMillis()} milliseconds.
277   */
278  @SuppressWarnings("FutureReturnValueIgnored")
279  private void rateLimitedObserve(AtomicBoolean accepting, double value, LongSupplier observeFunc) {
280    if (Double.isNaN(value)) {
281      return;
282    }
283    if (!accepting.compareAndSet(true, false)) {
284      return;
285    }
286    // observeFunc returns the current timestamp or 0 if no Exemplar was added.
287    long now = observeFunc.getAsLong();
288    long sleepTime =
289        now == 0 ? config.getSampleIntervalMillis() : durationUntilNextExemplarExpires(now);
290    Scheduler.schedule(
291        () -> accepting.compareAndSet(false, true), sleepTime, TimeUnit.MILLISECONDS);
292  }
293
294  private long durationUntilNextExemplarExpires(long now) {
295    long oldestTimestamp = now;
296    for (Exemplar exemplar : exemplars) {
297      if (exemplar == null) {
298        return config.getSampleIntervalMillis();
299      } else if (exemplar.getTimestampMillis() < oldestTimestamp) {
300        oldestTimestamp = exemplar.getTimestampMillis();
301      }
302    }
303    long oldestAge = now - oldestTimestamp;
304    if (oldestAge < config.getMinRetentionPeriodMillis()) {
305      return config.getMinRetentionPeriodMillis() - oldestAge;
306    }
307    return config.getSampleIntervalMillis();
308  }
309
310  private long updateCustomExemplar(int index, double value, Labels labels, long now) {
311    if (!labels.contains(Exemplar.TRACE_ID) && !labels.contains(Exemplar.SPAN_ID)) {
312      labels = labels.merge(doSampleExemplar());
313    }
314    customExemplars[index] =
315        Exemplar.builder().value(value).labels(labels).timestampMillis(now).build();
316    return now;
317  }
318
319  private long updateExemplar(int index, double value, long now) {
320    Labels traceLabels = doSampleExemplar();
321    if (!traceLabels.isEmpty()) {
322      exemplars[index] =
323          Exemplar.builder().value(value).labels(traceLabels).timestampMillis(now).build();
324      return now;
325    } else {
326      return 0;
327    }
328  }
329
330  private Labels doSampleExemplar() {
331    // Using the qualified name so that Micrometer can exclude the dependency on
332    // prometheus-metrics-tracer-initializer
333    // as they provide their own implementation of SpanContextSupplier.
334    // If we had an import statement for SpanContextSupplier the dependency would be needed in any
335    // case.
336    SpanContext spanContext =
337        this.spanContext != null
338            ? this.spanContext
339            : io.prometheus.metrics.tracer.initializer.SpanContextSupplier.getSpanContext();
340    try {
341      if (spanContext != null) {
342        if (spanContext.isCurrentSpanSampled()) {
343          String spanId = spanContext.getCurrentSpanId();
344          String traceId = spanContext.getCurrentTraceId();
345          if (spanId != null && traceId != null) {
346            spanContext.markCurrentSpanAsExemplar();
347            return Labels.of(Exemplar.TRACE_ID, traceId, Exemplar.SPAN_ID, spanId);
348          }
349        }
350      }
351    } catch (NoClassDefFoundError ignored) {
352      // ignore
353    }
354    return Labels.EMPTY;
355  }
356}