001package io.prometheus.metrics.model.snapshots;
002
003import static java.lang.Character.MAX_CODE_POINT;
004import static java.lang.Character.MAX_LOW_SURROGATE;
005import static java.lang.Character.MIN_HIGH_SURROGATE;
006
007import io.prometheus.metrics.config.EscapingScheme;
008import java.nio.charset.StandardCharsets;
009import javax.annotation.Nullable;
010
011/**
012 * Utility for Prometheus Metric and Label naming.
013 *
014 * <p>Note that this library allows dots in metric and label names. Dots will automatically be
015 * replaced with underscores in Prometheus exposition formats. However, if metrics are exposed in
016 * OpenTelemetry format the dots are retained.
017 */
018public class PrometheusNaming {
019
020  /**
021   * According to OpenMetrics {@code _count} and {@code _sum} (and {@code _gcount}, {@code _gsum})
022   * should also be reserved metric name suffixes. However, popular instrumentation libraries have
023   * Gauges with names ending in {@code _count}. Examples:
024   *
025   * <ul>
026   *   <li>Micrometer: {@code jvm_buffer_count}
027   *   <li>OpenTelemetry: {@code process_runtime_jvm_buffer_count}
028   * </ul>
029   *
030   * We do not treat {@code _count} and {@code _sum} as reserved suffixes here for compatibility
031   * with these libraries. However, there is a risk of name conflict if someone creates a gauge
032   * named {@code my_data_count} and a histogram or summary named {@code my_data}, because the
033   * histogram or summary will implicitly have a sample named {@code my_data_count}.
034   */
035  private static final String[] RESERVED_METRIC_NAME_SUFFIXES = {
036    "_total", "_created", "_bucket", "_info",
037    ".total", ".created", ".bucket", ".info"
038  };
039
040  /**
041   * Test if a metric name is valid. Rules:
042   *
043   * <ul>
044   *   <li>The name must match <a
045   *       href="https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels">Metric
046   *       names</a>.
047   *   <li>The name MUST NOT end with one of the {@link #RESERVED_METRIC_NAME_SUFFIXES}.
048   * </ul>
049   *
050   * If a metric has a {@link Unit}, the metric name SHOULD end with the unit as a suffix. Note that
051   * <a href="https://openmetrics.io/">OpenMetrics</a> requires metric names to have their unit as
052   * suffix, and we implement this in {@code prometheus-metrics-core}. However, {@code
053   * prometheus-metrics-model} does not enforce Unit suffixes.
054   *
055   * <p>Example: If you create a Counter for a processing time with Unit {@link Unit#SECONDS
056   * SECONDS}, the name should be {@code processing_time_seconds}. When exposed in OpenMetrics Text
057   * format, this will be represented as two values: {@code processing_time_seconds_total} for the
058   * counter value, and the optional {@code processing_time_seconds_created} timestamp.
059   *
060   * <p>Use {@link #sanitizeMetricName(String)} to convert arbitrary Strings to valid metric names.
061   */
062  public static boolean isValidMetricName(String name) {
063    return validateMetricName(name) == null;
064  }
065
066  /**
067   * Same as {@link #isValidMetricName(String)}, but produces an error message.
068   *
069   * <p>The name is valid if the error message is {@code null}.
070   */
071  @Nullable
072  public static String validateMetricName(String name) {
073    for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) {
074      if (name.endsWith(reservedSuffix)) {
075        return "The metric name must not include the '" + reservedSuffix + "' suffix.";
076      }
077    }
078    if (isValidUtf8(name)) {
079      return null;
080    }
081    return "The metric name contains unsupported characters";
082  }
083
084  public static boolean isValidLegacyMetricName(String name) {
085    if (name.isEmpty()) {
086      return false;
087    }
088    // First character must be [a-zA-Z_:]
089    char first = name.charAt(0);
090    if (!((first >= 'a' && first <= 'z')
091        || (first >= 'A' && first <= 'Z')
092        || first == '_'
093        || first == ':')) {
094      return false;
095    }
096    // Remaining characters must be [a-zA-Z0-9_:]
097    for (int i = 1; i < name.length(); i++) {
098      char c = name.charAt(i);
099      if (!((c >= 'a' && c <= 'z')
100          || (c >= 'A' && c <= 'Z')
101          || (c >= '0' && c <= '9')
102          || c == '_'
103          || c == ':')) {
104        return false;
105      }
106    }
107    return true;
108  }
109
110  public static boolean isValidLabelName(String name) {
111    return isValidUtf8(name)
112        && !(name.startsWith("__")
113            || name.startsWith("._")
114            || name.startsWith("..")
115            || name.startsWith("_."));
116  }
117
118  private static boolean isValidUtf8(String name) {
119    return !name.isEmpty() && StandardCharsets.UTF_8.newEncoder().canEncode(name);
120  }
121
122  public static boolean isValidLegacyLabelName(String name) {
123    if (name.isEmpty()) {
124      return false;
125    }
126    // First character must be [a-zA-Z_]
127    char first = name.charAt(0);
128    if (!((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_')) {
129      return false;
130    }
131    // Remaining characters must be [a-zA-Z0-9_]
132    for (int i = 1; i < name.length(); i++) {
133      char c = name.charAt(i);
134      if (!((c >= 'a' && c <= 'z')
135          || (c >= 'A' && c <= 'Z')
136          || (c >= '0' && c <= '9')
137          || c == '_')) {
138        return false;
139      }
140    }
141    return true;
142  }
143
144  /**
145   * Units may not have illegal characters, and they may not end with a reserved suffix like
146   * 'total'.
147   */
148  public static boolean isValidUnitName(String name) {
149    return validateUnitName(name) == null;
150  }
151
152  /** Same as {@link #isValidUnitName(String)} but returns an error message. */
153  @Nullable
154  public static String validateUnitName(String name) {
155    if (name.isEmpty()) {
156      return "The unit name must not be empty.";
157    }
158    for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) {
159      String suffixName = reservedSuffix.substring(1);
160      if (name.endsWith(suffixName)) {
161        return suffixName + " is a reserved suffix in Prometheus";
162      }
163    }
164    // Check if all characters are [a-zA-Z0-9_.:]+
165    for (int i = 0; i < name.length(); i++) {
166      char c = name.charAt(i);
167      if (!((c >= 'a' && c <= 'z')
168          || (c >= 'A' && c <= 'Z')
169          || (c >= '0' && c <= '9')
170          || c == '_'
171          || c == '.'
172          || c == ':')) {
173        return "The unit name contains unsupported characters";
174      }
175    }
176    return null;
177  }
178
179  /**
180   * Get the metric or label name that is used in Prometheus exposition format.
181   *
182   * @param name must be a valid metric or label name, i.e. {@link #isValidMetricName(String)
183   *     isValidMetricName(name)} or {@link #isValidLabelName(String) isValidLabelName(name)} must
184   *     be true.
185   * @return the name with dots replaced by underscores.
186   */
187  public static String prometheusName(String name) {
188    return escapeName(name, EscapingScheme.UNDERSCORE_ESCAPING);
189  }
190
191  /**
192   * Convert an arbitrary string to a name where {@link #isValidMetricName(String)
193   * isValidMetricName(name)} is true.
194   */
195  public static String sanitizeMetricName(String metricName) {
196    if (metricName.isEmpty()) {
197      throw new IllegalArgumentException("Cannot convert an empty string to a valid metric name.");
198    }
199    String sanitizedName = metricName;
200    boolean modified = true;
201    while (modified) {
202      modified = false;
203      for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) {
204        if (sanitizedName.equals(reservedSuffix)) {
205          // This is for the corner case when you call sanitizeMetricName("_total").
206          // In that case the result will be "total".
207          return reservedSuffix.substring(1);
208        }
209        if (sanitizedName.endsWith(reservedSuffix)) {
210          sanitizedName =
211              sanitizedName.substring(0, sanitizedName.length() - reservedSuffix.length());
212          modified = true;
213        }
214      }
215    }
216    return sanitizedName;
217  }
218
219  /**
220   * Like {@link #sanitizeMetricName(String)}, but also makes sure that the unit is appended as a
221   * suffix if the unit is not {@code null}.
222   */
223  public static String sanitizeMetricName(String metricName, Unit unit) {
224    String result = sanitizeMetricName(metricName);
225    if (unit != null) {
226      if (!result.endsWith("_" + unit) && !result.endsWith("." + unit)) {
227        result += "_" + unit;
228      }
229    }
230    return result;
231  }
232
233  /**
234   * Convert an arbitrary string to a name where {@link #isValidLabelName(String)
235   * isValidLabelName(name)} is true.
236   */
237  public static String sanitizeLabelName(String labelName) {
238    if (labelName.isEmpty()) {
239      throw new IllegalArgumentException("Cannot convert an empty string to a valid label name.");
240    }
241    String sanitizedName = labelName;
242    while (sanitizedName.startsWith("__")
243        || sanitizedName.startsWith("_.")
244        || sanitizedName.startsWith("._")
245        || sanitizedName.startsWith("..")) {
246      sanitizedName = sanitizedName.substring(1);
247    }
248    return sanitizedName;
249  }
250
251  /**
252   * Convert an arbitrary string to a name where {@link #validateUnitName(String)} is {@code null}
253   * (i.e. the name is valid).
254   *
255   * @throws IllegalArgumentException if the {@code unitName} cannot be converted, for example if
256   *     you call {@code sanitizeUnitName("total")} or {@code sanitizeUnitName("")}.
257   * @throws NullPointerException if {@code unitName} is null.
258   */
259  public static String sanitizeUnitName(String unitName) {
260    if (unitName.isEmpty()) {
261      throw new IllegalArgumentException("Cannot convert an empty string to a valid unit name.");
262    }
263    String sanitizedName = replaceIllegalCharsInUnitName(unitName);
264    boolean modified = true;
265    while (modified) {
266      modified = false;
267      while (sanitizedName.startsWith("_") || sanitizedName.startsWith(".")) {
268        sanitizedName = sanitizedName.substring(1);
269        modified = true;
270      }
271      while (sanitizedName.endsWith(".") || sanitizedName.endsWith("_")) {
272        sanitizedName = sanitizedName.substring(0, sanitizedName.length() - 1);
273        modified = true;
274      }
275      for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) {
276        String suffixName = reservedSuffix.substring(1);
277        if (sanitizedName.endsWith(suffixName)) {
278          sanitizedName = sanitizedName.substring(0, sanitizedName.length() - suffixName.length());
279          modified = true;
280        }
281      }
282    }
283    if (sanitizedName.isEmpty()) {
284      throw new IllegalArgumentException(
285          "Cannot convert '" + unitName + "' into a valid unit name.");
286    }
287    return sanitizedName;
288  }
289
290  /** Returns a string with only valid unit name characters [a-zA-Z0-9_.:]. */
291  private static String replaceIllegalCharsInUnitName(String name) {
292    int length = name.length();
293    char[] sanitized = new char[length];
294    for (int i = 0; i < length; i++) {
295      char ch = name.charAt(i);
296      if (ch == ':'
297          || ch == '.'
298          || (ch >= 'a' && ch <= 'z')
299          || (ch >= 'A' && ch <= 'Z')
300          || (ch >= '0' && ch <= '9')) {
301        sanitized[i] = ch;
302      } else {
303        sanitized[i] = '_';
304      }
305    }
306    return new String(sanitized);
307  }
308
309  /**
310   * Escapes the incoming name according to the provided escaping scheme. Depending on the rules of
311   * escaping, this may cause no change in the string that is returned (especially NO_ESCAPING,
312   * which by definition is a noop). This method does not do any validation of the name.
313   */
314  public static String escapeName(String name, EscapingScheme scheme) {
315    if (name.isEmpty() || !needsEscaping(name, scheme)) {
316      return name;
317    }
318
319    StringBuilder escaped = new StringBuilder();
320    switch (scheme) {
321      case ALLOW_UTF8:
322        return name;
323      case UNDERSCORE_ESCAPING:
324        for (int i = 0; i < name.length(); ) {
325          int c = name.codePointAt(i);
326          if (isValidLegacyChar(c, i)) {
327            escaped.appendCodePoint(c);
328          } else {
329            escaped.append('_');
330          }
331          i += Character.charCount(c);
332        }
333        return escaped.toString();
334      case DOTS_ESCAPING:
335        // Do not early return for legacy valid names, we still escape underscores.
336        for (int i = 0; i < name.length(); ) {
337          int c = name.codePointAt(i);
338          if (c == '_') {
339            escaped.append("__");
340          } else if (c == '.') {
341            escaped.append("_dot_");
342          } else if (isValidLegacyChar(c, i)) {
343            escaped.appendCodePoint(c);
344          } else {
345            escaped.append("__");
346          }
347          i += Character.charCount(c);
348        }
349        return escaped.toString();
350      case VALUE_ENCODING_ESCAPING:
351        escaped.append("U__");
352        for (int i = 0; i < name.length(); ) {
353          int c = name.codePointAt(i);
354          if (c == '_') {
355            escaped.append("__");
356          } else if (isValidLegacyChar(c, i)) {
357            escaped.appendCodePoint(c);
358          } else if (!isValidUtf8Char(c)) {
359            escaped.append("_FFFD_");
360          } else {
361            escaped.append('_');
362            escaped.append(Integer.toHexString(c));
363            escaped.append('_');
364          }
365          i += Character.charCount(c);
366        }
367        return escaped.toString();
368      default:
369        throw new IllegalArgumentException("Invalid escaping scheme " + scheme);
370    }
371  }
372
373  public static boolean needsEscaping(String name, EscapingScheme scheme) {
374    return !isValidLegacyMetricName(name)
375        || (scheme == EscapingScheme.DOTS_ESCAPING && (name.contains(".") || name.contains("_")));
376  }
377
378  static boolean isValidLegacyChar(int c, int i) {
379    return (c >= 'a' && c <= 'z')
380        || (c >= 'A' && c <= 'Z')
381        || c == '_'
382        || c == ':'
383        || (c >= '0' && c <= '9' && i > 0);
384  }
385
386  private static boolean isValidUtf8Char(int c) {
387    return (0 <= c && c < MIN_HIGH_SURROGATE) || (MAX_LOW_SURROGATE < c && c <= MAX_CODE_POINT);
388  }
389}