001package io.prometheus.metrics.model.snapshots;
002
003import static java.lang.Character.MAX_CODE_POINT;
004import static java.lang.Character.MAX_LOW_SURROGATE;
005import static java.lang.Character.MIN_HIGH_SURROGATE;
006
007import io.prometheus.metrics.config.EscapingScheme;
008import java.nio.charset.StandardCharsets;
009import java.util.regex.Pattern;
010import javax.annotation.Nullable;
011
012/**
013 * Utility for Prometheus Metric and Label naming.
014 *
015 * <p>Note that this library allows dots in metric and label names. Dots will automatically be
016 * replaced with underscores in Prometheus exposition formats. However, if metrics are exposed in
017 * OpenTelemetry format the dots are retained.
018 */
019public class PrometheusNaming {
020
021  private static final Pattern METRIC_NAME_PATTERN = Pattern.compile("^[a-zA-Z_:][a-zA-Z0-9_:]*$");
022
023  /** Legal characters for label names. */
024  private static final Pattern LEGACY_LABEL_NAME_PATTERN =
025      Pattern.compile("^[a-zA-Z_][a-zA-Z0-9_]*$");
026
027  /** Legal characters for unit names, including dot. */
028  private static final Pattern UNIT_NAME_PATTERN = Pattern.compile("^[a-zA-Z0-9_.:]+$");
029
030  /**
031   * According to OpenMetrics {@code _count} and {@code _sum} (and {@code _gcount}, {@code _gsum})
032   * should also be reserved metric name suffixes. However, popular instrumentation libraries have
033   * Gauges with names ending in {@code _count}. Examples:
034   *
035   * <ul>
036   *   <li>Micrometer: {@code jvm_buffer_count}
037   *   <li>OpenTelemetry: {@code process_runtime_jvm_buffer_count}
038   * </ul>
039   *
040   * We do not treat {@code _count} and {@code _sum} as reserved suffixes here for compatibility
041   * with these libraries. However, there is a risk of name conflict if someone creates a gauge
042   * named {@code my_data_count} and a histogram or summary named {@code my_data}, because the
043   * histogram or summary will implicitly have a sample named {@code my_data_count}.
044   */
045  private static final String[] RESERVED_METRIC_NAME_SUFFIXES = {
046    "_total", "_created", "_bucket", "_info",
047    ".total", ".created", ".bucket", ".info"
048  };
049
050  /**
051   * Test if a metric name is valid. Rules:
052   *
053   * <ul>
054   *   <li>The name must match {@link #METRIC_NAME_PATTERN}.
055   *   <li>The name MUST NOT end with one of the {@link #RESERVED_METRIC_NAME_SUFFIXES}.
056   * </ul>
057   *
058   * If a metric has a {@link Unit}, the metric name SHOULD end with the unit as a suffix. Note that
059   * <a href="https://openmetrics.io/">OpenMetrics</a> requires metric names to have their unit as
060   * suffix, and we implement this in {@code prometheus-metrics-core}. However, {@code
061   * prometheus-metrics-model} does not enforce Unit suffixes.
062   *
063   * <p>Example: If you create a Counter for a processing time with Unit {@link Unit#SECONDS
064   * SECONDS}, the name should be {@code processing_time_seconds}. When exposed in OpenMetrics Text
065   * format, this will be represented as two values: {@code processing_time_seconds_total} for the
066   * counter value, and the optional {@code processing_time_seconds_created} timestamp.
067   *
068   * <p>Use {@link #sanitizeMetricName(String)} to convert arbitrary Strings to valid metric names.
069   */
070  public static boolean isValidMetricName(String name) {
071    return validateMetricName(name) == null;
072  }
073
074  /**
075   * Same as {@link #isValidMetricName(String)}, but produces an error message.
076   *
077   * <p>The name is valid if the error message is {@code null}.
078   */
079  @Nullable
080  public static String validateMetricName(String name) {
081    for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) {
082      if (name.endsWith(reservedSuffix)) {
083        return "The metric name must not include the '" + reservedSuffix + "' suffix.";
084      }
085    }
086    if (isValidUtf8(name)) {
087      return null;
088    }
089    return "The metric name contains unsupported characters";
090  }
091
092  public static boolean isValidLegacyMetricName(String name) {
093    return METRIC_NAME_PATTERN.matcher(name).matches();
094  }
095
096  public static boolean isValidLabelName(String name) {
097    return isValidUtf8(name)
098        && !(name.startsWith("__")
099            || name.startsWith("._")
100            || name.startsWith("..")
101            || name.startsWith("_."));
102  }
103
104  private static boolean isValidUtf8(String name) {
105    return !name.isEmpty() && StandardCharsets.UTF_8.newEncoder().canEncode(name);
106  }
107
108  public static boolean isValidLegacyLabelName(String name) {
109    return LEGACY_LABEL_NAME_PATTERN.matcher(name).matches();
110  }
111
112  /**
113   * Units may not have illegal characters, and they may not end with a reserved suffix like
114   * 'total'.
115   */
116  public static boolean isValidUnitName(String name) {
117    return validateUnitName(name) == null;
118  }
119
120  /** Same as {@link #isValidUnitName(String)} but returns an error message. */
121  @Nullable
122  public static String validateUnitName(String name) {
123    if (name.isEmpty()) {
124      return "The unit name must not be empty.";
125    }
126    for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) {
127      String suffixName = reservedSuffix.substring(1);
128      if (name.endsWith(suffixName)) {
129        return suffixName + " is a reserved suffix in Prometheus";
130      }
131    }
132    if (!UNIT_NAME_PATTERN.matcher(name).matches()) {
133      return "The unit name contains unsupported characters";
134    }
135    return null;
136  }
137
138  /**
139   * Get the metric or label name that is used in Prometheus exposition format.
140   *
141   * @param name must be a valid metric or label name, i.e. {@link #isValidMetricName(String)
142   *     isValidMetricName(name)} or {@link #isValidLabelName(String) isValidLabelName(name)} must
143   *     be true.
144   * @return the name with dots replaced by underscores.
145   */
146  public static String prometheusName(String name) {
147    return escapeName(name, EscapingScheme.UNDERSCORE_ESCAPING);
148  }
149
150  /**
151   * Convert an arbitrary string to a name where {@link #isValidMetricName(String)
152   * isValidMetricName(name)} is true.
153   */
154  public static String sanitizeMetricName(String metricName) {
155    if (metricName.isEmpty()) {
156      throw new IllegalArgumentException("Cannot convert an empty string to a valid metric name.");
157    }
158    String sanitizedName = metricName;
159    boolean modified = true;
160    while (modified) {
161      modified = false;
162      for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) {
163        if (sanitizedName.equals(reservedSuffix)) {
164          // This is for the corner case when you call sanitizeMetricName("_total").
165          // In that case the result will be "total".
166          return reservedSuffix.substring(1);
167        }
168        if (sanitizedName.endsWith(reservedSuffix)) {
169          sanitizedName =
170              sanitizedName.substring(0, sanitizedName.length() - reservedSuffix.length());
171          modified = true;
172        }
173      }
174    }
175    return sanitizedName;
176  }
177
178  /**
179   * Like {@link #sanitizeMetricName(String)}, but also makes sure that the unit is appended as a
180   * suffix if the unit is not {@code null}.
181   */
182  public static String sanitizeMetricName(String metricName, Unit unit) {
183    String result = sanitizeMetricName(metricName);
184    if (unit != null) {
185      if (!result.endsWith("_" + unit) && !result.endsWith("." + unit)) {
186        result += "_" + unit;
187      }
188    }
189    return result;
190  }
191
192  /**
193   * Convert an arbitrary string to a name where {@link #isValidLabelName(String)
194   * isValidLabelName(name)} is true.
195   */
196  public static String sanitizeLabelName(String labelName) {
197    if (labelName.isEmpty()) {
198      throw new IllegalArgumentException("Cannot convert an empty string to a valid label name.");
199    }
200    String sanitizedName = labelName;
201    while (sanitizedName.startsWith("__")
202        || sanitizedName.startsWith("_.")
203        || sanitizedName.startsWith("._")
204        || sanitizedName.startsWith("..")) {
205      sanitizedName = sanitizedName.substring(1);
206    }
207    return sanitizedName;
208  }
209
210  /**
211   * Convert an arbitrary string to a name where {@link #validateUnitName(String)} is {@code null}
212   * (i.e. the name is valid).
213   *
214   * @throws IllegalArgumentException if the {@code unitName} cannot be converted, for example if
215   *     you call {@code sanitizeUnitName("total")} or {@code sanitizeUnitName("")}.
216   * @throws NullPointerException if {@code unitName} is null.
217   */
218  public static String sanitizeUnitName(String unitName) {
219    if (unitName.isEmpty()) {
220      throw new IllegalArgumentException("Cannot convert an empty string to a valid unit name.");
221    }
222    String sanitizedName = replaceIllegalCharsInUnitName(unitName);
223    boolean modified = true;
224    while (modified) {
225      modified = false;
226      while (sanitizedName.startsWith("_") || sanitizedName.startsWith(".")) {
227        sanitizedName = sanitizedName.substring(1);
228        modified = true;
229      }
230      while (sanitizedName.endsWith(".") || sanitizedName.endsWith("_")) {
231        sanitizedName = sanitizedName.substring(0, sanitizedName.length() - 1);
232        modified = true;
233      }
234      for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) {
235        String suffixName = reservedSuffix.substring(1);
236        if (sanitizedName.endsWith(suffixName)) {
237          sanitizedName = sanitizedName.substring(0, sanitizedName.length() - suffixName.length());
238          modified = true;
239        }
240      }
241    }
242    if (sanitizedName.isEmpty()) {
243      throw new IllegalArgumentException(
244          "Cannot convert '" + unitName + "' into a valid unit name.");
245    }
246    return sanitizedName;
247  }
248
249  /** Returns a string that matches {@link #UNIT_NAME_PATTERN}. */
250  private static String replaceIllegalCharsInUnitName(String name) {
251    int length = name.length();
252    char[] sanitized = new char[length];
253    for (int i = 0; i < length; i++) {
254      char ch = name.charAt(i);
255      if (ch == ':'
256          || ch == '.'
257          || (ch >= 'a' && ch <= 'z')
258          || (ch >= 'A' && ch <= 'Z')
259          || (ch >= '0' && ch <= '9')) {
260        sanitized[i] = ch;
261      } else {
262        sanitized[i] = '_';
263      }
264    }
265    return new String(sanitized);
266  }
267
268  /**
269   * Escapes the incoming name according to the provided escaping scheme. Depending on the rules of
270   * escaping, this may cause no change in the string that is returned (especially NO_ESCAPING,
271   * which by definition is a noop). This method does not do any validation of the name.
272   */
273  public static String escapeName(String name, EscapingScheme scheme) {
274    if (name.isEmpty() || !needsEscaping(name, scheme)) {
275      return name;
276    }
277
278    StringBuilder escaped = new StringBuilder();
279    switch (scheme) {
280      case ALLOW_UTF8:
281        return name;
282      case UNDERSCORE_ESCAPING:
283        for (int i = 0; i < name.length(); ) {
284          int c = name.codePointAt(i);
285          if (isValidLegacyChar(c, i)) {
286            escaped.appendCodePoint(c);
287          } else {
288            escaped.append('_');
289          }
290          i += Character.charCount(c);
291        }
292        return escaped.toString();
293      case DOTS_ESCAPING:
294        // Do not early return for legacy valid names, we still escape underscores.
295        for (int i = 0; i < name.length(); ) {
296          int c = name.codePointAt(i);
297          if (c == '_') {
298            escaped.append("__");
299          } else if (c == '.') {
300            escaped.append("_dot_");
301          } else if (isValidLegacyChar(c, i)) {
302            escaped.appendCodePoint(c);
303          } else {
304            escaped.append("__");
305          }
306          i += Character.charCount(c);
307        }
308        return escaped.toString();
309      case VALUE_ENCODING_ESCAPING:
310        escaped.append("U__");
311        for (int i = 0; i < name.length(); ) {
312          int c = name.codePointAt(i);
313          if (c == '_') {
314            escaped.append("__");
315          } else if (isValidLegacyChar(c, i)) {
316            escaped.appendCodePoint(c);
317          } else if (!isValidUtf8Char(c)) {
318            escaped.append("_FFFD_");
319          } else {
320            escaped.append('_');
321            escaped.append(Integer.toHexString(c));
322            escaped.append('_');
323          }
324          i += Character.charCount(c);
325        }
326        return escaped.toString();
327      default:
328        throw new IllegalArgumentException("Invalid escaping scheme " + scheme);
329    }
330  }
331
332  public static boolean needsEscaping(String name, EscapingScheme scheme) {
333    return !isValidLegacyMetricName(name)
334        || (scheme == EscapingScheme.DOTS_ESCAPING && (name.contains(".") || name.contains("_")));
335  }
336
337  static boolean isValidLegacyChar(int c, int i) {
338    return (c >= 'a' && c <= 'z')
339        || (c >= 'A' && c <= 'Z')
340        || c == '_'
341        || c == ':'
342        || (c >= '0' && c <= '9' && i > 0);
343  }
344
345  private static boolean isValidUtf8Char(int c) {
346    return (0 <= c && c < MIN_HIGH_SURROGATE) || (MAX_LOW_SURROGATE < c && c <= MAX_CODE_POINT);
347  }
348}