001package io.prometheus.metrics.model.snapshots;
002
003import static java.lang.Character.MAX_CODE_POINT;
004import static java.lang.Character.MAX_LOW_SURROGATE;
005import static java.lang.Character.MIN_HIGH_SURROGATE;
006
007import io.prometheus.metrics.annotations.StableApi;
008import io.prometheus.metrics.config.EscapingScheme;
009import java.nio.charset.StandardCharsets;
010import javax.annotation.Nullable;
011
012/**
013 * Utility for Prometheus Metric and Label naming.
014 *
015 * <p>Note that this library allows dots in metric and label names. Dots will automatically be
016 * replaced with underscores in Prometheus exposition formats. However, if metrics are exposed in
017 * OpenTelemetry format the dots are retained.
018 */
019@StableApi
020public class PrometheusNaming {
021
022  /**
023   * Reserved metric name suffixes. These suffixes are automatically appended by Prometheus
024   * exposition format writers for specific metric types: {@code _total} and {@code _created} for
025   * counters, {@code _info} for info metrics, and {@code _bucket} for histograms. Including these
026   * in a base metric name via {@link #sanitizeMetricName(String)} would cause confusion or
027   * double-suffixing, so they are stripped during sanitization.
028   */
029  static final String[] RESERVED_METRIC_NAME_SUFFIXES = {
030    "_total", "_created", "_bucket", "_info", ".total", ".created", ".bucket", ".info"
031  };
032
033  /**
034   * Test if a metric name is valid. Any non-empty valid UTF-8 string is accepted.
035   *
036   * <p>Collision detection for suffixes like {@code _total}, {@code _info}, {@code _bucket}, etc.
037   * is handled at registration time by the {@link
038   * io.prometheus.metrics.model.registry.PrometheusRegistry PrometheusRegistry}, not by name
039   * validation.
040   *
041   * <p>If a metric has a {@link Unit}, the metric name SHOULD end with the unit as a suffix. Note
042   * that <a href="https://openmetrics.io/">OpenMetrics</a> requires metric names to have their unit
043   * as suffix, and we implement this in {@code prometheus-metrics-core}. However, {@code
044   * prometheus-metrics-model} does not enforce Unit suffixes.
045   *
046   * <p>Example: If you create a Counter for a processing time with Unit {@link Unit#SECONDS
047   * SECONDS}, the name should be {@code processing_time_seconds}. When exposed in OpenMetrics Text
048   * format, this will be represented as two values: {@code processing_time_seconds_total} for the
049   * counter value, and the optional {@code processing_time_seconds_created} timestamp.
050   *
051   * <p>Use {@link #sanitizeMetricName(String)} for compatibility-preserving sanitization that
052   * strips reserved suffixes, or {@link #normalizeMetricName(String)} for permissive normalization
053   * that keeps the original suffixes intact.
054   */
055  public static boolean isValidMetricName(String name) {
056    return validateMetricName(name) == null;
057  }
058
059  /**
060   * Same as {@link #isValidMetricName(String)}, but produces an error message.
061   *
062   * <p>The name is valid if the error message is {@code null}.
063   */
064  @Nullable
065  public static String validateMetricName(String name) {
066    if (isValidUtf8(name)) {
067      return null;
068    }
069    return "The metric name contains unsupported characters";
070  }
071
072  public static boolean isValidLegacyMetricName(String name) {
073    if (name.isEmpty()) {
074      return false;
075    }
076    // First character must be [a-zA-Z_:]
077    char first = name.charAt(0);
078    if (!((first >= 'a' && first <= 'z')
079        || (first >= 'A' && first <= 'Z')
080        || first == '_'
081        || first == ':')) {
082      return false;
083    }
084    // Remaining characters must be [a-zA-Z0-9_:]
085    for (int i = 1; i < name.length(); i++) {
086      char c = name.charAt(i);
087      if (!((c >= 'a' && c <= 'z')
088          || (c >= 'A' && c <= 'Z')
089          || (c >= '0' && c <= '9')
090          || c == '_'
091          || c == ':')) {
092        return false;
093      }
094    }
095    return true;
096  }
097
098  public static boolean isValidLabelName(String name) {
099    return isValidUtf8(name)
100        && !(name.startsWith("__")
101            || name.startsWith("._")
102            || name.startsWith("..")
103            || name.startsWith("_."));
104  }
105
106  private static boolean isValidUtf8(String name) {
107    return !name.isEmpty() && StandardCharsets.UTF_8.newEncoder().canEncode(name);
108  }
109
110  public static boolean isValidLegacyLabelName(String name) {
111    if (name.isEmpty()) {
112      return false;
113    }
114    // First character must be [a-zA-Z_]
115    char first = name.charAt(0);
116    if (!((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_')) {
117      return false;
118    }
119    // Remaining characters must be [a-zA-Z0-9_]
120    for (int i = 1; i < name.length(); i++) {
121      char c = name.charAt(i);
122      if (!((c >= 'a' && c <= 'z')
123          || (c >= 'A' && c <= 'Z')
124          || (c >= '0' && c <= '9')
125          || c == '_')) {
126        return false;
127      }
128    }
129    return true;
130  }
131
132  /** Units may not have illegal characters. */
133  public static boolean isValidUnitName(String name) {
134    return validateUnitName(name) == null;
135  }
136
137  /** Same as {@link #isValidUnitName(String)} but returns an error message. */
138  @Nullable
139  public static String validateUnitName(String name) {
140    if (name.isEmpty()) {
141      return "The unit name must not be empty.";
142    }
143    // Check if all characters are [a-zA-Z0-9_.:]+
144    for (int i = 0; i < name.length(); i++) {
145      char c = name.charAt(i);
146      if (!((c >= 'a' && c <= 'z')
147          || (c >= 'A' && c <= 'Z')
148          || (c >= '0' && c <= '9')
149          || c == '_'
150          || c == '.'
151          || c == ':')) {
152        return "The unit name contains unsupported characters";
153      }
154    }
155    return null;
156  }
157
158  /**
159   * Get the metric or label name that is used in Prometheus exposition format.
160   *
161   * @param name must be a valid metric or label name, i.e. {@link #isValidMetricName(String)
162   *     isValidMetricName(name)} or {@link #isValidLabelName(String) isValidLabelName(name)} must
163   *     be true.
164   * @return the name with dots replaced by underscores.
165   */
166  public static String prometheusName(String name) {
167    return escapeName(name, EscapingScheme.UNDERSCORE_ESCAPING);
168  }
169
170  /**
171   * Convert an arbitrary string to a valid metric name.
172   *
173   * <p>Reserved metric name suffixes ({@code _total}, {@code _created}, {@code _bucket}, {@code
174   * _info} and their dot variants) are stripped. These suffixes are appended automatically by
175   * Prometheus exposition format writers, so including them in a base metric name would result in
176   * double-suffixing or unintended type inference. For example, a JMX attribute named {@code
177   * RequestTotal} would be sanitized from {@code kafka_consumer_request_total} to {@code
178   * kafka_consumer_request}, and the counter writer would add {@code _total} back at scrape time.
179   *
180   * <p>This behaviour was present in client_java 1.5.x and is restored here to fix a regression
181   * introduced in 1.6.0 that affected downstream tools (e.g. the JMX Exporter and the simpleclient
182   * bridge) which relied on {@code sanitizeMetricName} to strip these suffixes before passing names
183   * to the snapshot builders.
184   *
185   * <p>If you want permissive normalization that keeps reserved suffixes intact, use {@link
186   * #normalizeMetricName(String)} instead.
187   *
188   * @throws IllegalArgumentException if the input is empty
189   */
190  public static String sanitizeMetricName(String metricName) {
191    if (metricName.isEmpty()) {
192      throw new IllegalArgumentException("Cannot convert an empty string to a valid metric name.");
193    }
194    String sanitizedName = metricName;
195    boolean stripped = true;
196    while (stripped) {
197      stripped = false;
198      // When the name equals the suffix exactly, drop the leading separator character to avoid
199      // returning an empty string (e.g. "_total" → "total", ".info" → "info").
200      for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) {
201        if (sanitizedName.equals(reservedSuffix)) {
202          return reservedSuffix.substring(1);
203        }
204      }
205      for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) {
206        if (sanitizedName.endsWith(reservedSuffix)) {
207          sanitizedName =
208              sanitizedName.substring(0, sanitizedName.length() - reservedSuffix.length());
209          stripped = true;
210          break; // restart the outer loop to re-check all suffixes on the shortened name
211        }
212      }
213    }
214    return sanitizedName;
215  }
216
217  /**
218   * Like {@link #sanitizeMetricName(String)}, but also makes sure that the unit is appended as a
219   * suffix if the unit is not {@code null}.
220   */
221  public static String sanitizeMetricName(String metricName, Unit unit) {
222    String result = sanitizeMetricName(metricName);
223    if (unit != null) {
224      if (!result.endsWith("_" + unit) && !result.endsWith("." + unit)) {
225        result += "_" + unit;
226      }
227    }
228    return result;
229  }
230
231  /**
232   * Convert an arbitrary string to a valid metric name without stripping reserved suffixes.
233   *
234   * <p>Any non-empty valid UTF-8 string is accepted and returned unchanged. This is the permissive
235   * normalization behavior introduced in 1.6.0. Use this method for new integrations that want to
236   * preserve the original metric name and rely on registration-time collision detection instead of
237   * suffix stripping.
238   *
239   * @throws IllegalArgumentException if the input is empty
240   */
241  public static String normalizeMetricName(String metricName) {
242    if (metricName.isEmpty()) {
243      throw new IllegalArgumentException("Cannot convert an empty string to a valid metric name.");
244    }
245    return metricName;
246  }
247
248  /**
249   * Like {@link #normalizeMetricName(String)}, but also makes sure that the unit is appended as a
250   * suffix if the unit is not {@code null}.
251   */
252  public static String normalizeMetricName(String metricName, Unit unit) {
253    String result = normalizeMetricName(metricName);
254    if (unit != null) {
255      if (!result.endsWith("_" + unit) && !result.endsWith("." + unit)) {
256        result += "_" + unit;
257      }
258    }
259    return result;
260  }
261
262  /**
263   * Convert an arbitrary string to a name where {@link #isValidLabelName(String)
264   * isValidLabelName(name)} is true.
265   */
266  public static String sanitizeLabelName(String labelName) {
267    if (labelName.isEmpty()) {
268      throw new IllegalArgumentException("Cannot convert an empty string to a valid label name.");
269    }
270    String sanitizedName = labelName;
271    while (sanitizedName.startsWith("__")
272        || sanitizedName.startsWith("_.")
273        || sanitizedName.startsWith("._")
274        || sanitizedName.startsWith("..")) {
275      sanitizedName = sanitizedName.substring(1);
276    }
277    return sanitizedName;
278  }
279
280  /**
281   * Convert an arbitrary string to a valid unit name by replacing illegal characters.
282   *
283   * @throws IllegalArgumentException if the {@code unitName} cannot be converted, e.g. if you call
284   *     {@code sanitizeUnitName("")}.
285   * @throws NullPointerException if {@code unitName} is null.
286   */
287  public static String sanitizeUnitName(String unitName) {
288    if (unitName.isEmpty()) {
289      throw new IllegalArgumentException("Cannot convert an empty string to a valid unit name.");
290    }
291    String sanitizedName = replaceIllegalCharsInUnitName(unitName);
292    while (sanitizedName.startsWith("_") || sanitizedName.startsWith(".")) {
293      sanitizedName = sanitizedName.substring(1);
294    }
295    while (sanitizedName.endsWith(".") || sanitizedName.endsWith("_")) {
296      sanitizedName = sanitizedName.substring(0, sanitizedName.length() - 1);
297    }
298    if (sanitizedName.isEmpty()) {
299      throw new IllegalArgumentException(
300          "Cannot convert '" + unitName + "' into a valid unit name.");
301    }
302    return sanitizedName;
303  }
304
305  /** Returns a string with only valid unit name characters [a-zA-Z0-9_.:]. */
306  private static String replaceIllegalCharsInUnitName(String name) {
307    int length = name.length();
308    char[] sanitized = new char[length];
309    for (int i = 0; i < length; i++) {
310      char ch = name.charAt(i);
311      if (ch == ':'
312          || ch == '.'
313          || (ch >= 'a' && ch <= 'z')
314          || (ch >= 'A' && ch <= 'Z')
315          || (ch >= '0' && ch <= '9')) {
316        sanitized[i] = ch;
317      } else {
318        sanitized[i] = '_';
319      }
320    }
321    return new String(sanitized);
322  }
323
324  /**
325   * Escapes the incoming name according to the provided escaping scheme. Depending on the rules of
326   * escaping, this may cause no change in the string that is returned (especially NO_ESCAPING,
327   * which by definition is a noop). This method does not do any validation of the name.
328   */
329  public static String escapeName(String name, EscapingScheme scheme) {
330    if (name.isEmpty() || !needsEscaping(name, scheme)) {
331      return name;
332    }
333
334    StringBuilder escaped = new StringBuilder();
335    switch (scheme) {
336      case ALLOW_UTF8:
337        return name;
338      case UNDERSCORE_ESCAPING:
339        for (int i = 0; i < name.length(); ) {
340          int c = name.codePointAt(i);
341          if (isValidLegacyChar(c, i)) {
342            escaped.appendCodePoint(c);
343          } else {
344            escaped.append('_');
345          }
346          i += Character.charCount(c);
347        }
348        return escaped.toString();
349      case DOTS_ESCAPING:
350        // Do not early return for legacy valid names, we still escape underscores.
351        for (int i = 0; i < name.length(); ) {
352          int c = name.codePointAt(i);
353          if (c == '_') {
354            escaped.append("__");
355          } else if (c == '.') {
356            escaped.append("_dot_");
357          } else if (isValidLegacyChar(c, i)) {
358            escaped.appendCodePoint(c);
359          } else {
360            escaped.append("__");
361          }
362          i += Character.charCount(c);
363        }
364        return escaped.toString();
365      case VALUE_ENCODING_ESCAPING:
366        escaped.append("U__");
367        for (int i = 0; i < name.length(); ) {
368          int c = name.codePointAt(i);
369          if (c == '_') {
370            escaped.append("__");
371          } else if (isValidLegacyChar(c, i)) {
372            escaped.appendCodePoint(c);
373          } else if (!isValidUtf8Char(c)) {
374            escaped.append("_FFFD_");
375          } else {
376            escaped.append('_');
377            escaped.append(Integer.toHexString(c));
378            escaped.append('_');
379          }
380          i += Character.charCount(c);
381        }
382        return escaped.toString();
383      default:
384        throw new IllegalArgumentException("Invalid escaping scheme " + scheme);
385    }
386  }
387
388  public static boolean needsEscaping(String name, EscapingScheme scheme) {
389    return !isValidLegacyMetricName(name)
390        || (scheme == EscapingScheme.DOTS_ESCAPING && (name.contains(".") || name.contains("_")));
391  }
392
393  static boolean isValidLegacyChar(int c, int i) {
394    return (c >= 'a' && c <= 'z')
395        || (c >= 'A' && c <= 'Z')
396        || c == '_'
397        || c == ':'
398        || (c >= '0' && c <= '9' && i > 0);
399  }
400
401  private static boolean isValidUtf8Char(int c) {
402    return (0 <= c && c < MIN_HIGH_SURROGATE) || (MAX_LOW_SURROGATE < c && c <= MAX_CODE_POINT);
403  }
404}