001package io.prometheus.metrics.model.snapshots;
002
003import static java.lang.Character.MAX_CODE_POINT;
004import static java.lang.Character.MAX_LOW_SURROGATE;
005import static java.lang.Character.MIN_HIGH_SURROGATE;
006
007import io.prometheus.metrics.config.EscapingScheme;
008import java.nio.charset.StandardCharsets;
009import javax.annotation.Nullable;
010
011/**
012 * Utility for Prometheus Metric and Label naming.
013 *
014 * <p>Note that this library allows dots in metric and label names. Dots will automatically be
015 * replaced with underscores in Prometheus exposition formats. However, if metrics are exposed in
016 * OpenTelemetry format the dots are retained.
017 */
018public class PrometheusNaming {
019
020  /**
021   * Test if a metric name is valid. Any non-empty valid UTF-8 string is accepted.
022   *
023   * <p>Collision detection for suffixes like {@code _total}, {@code _info}, {@code _bucket}, etc.
024   * is handled at registration time by the {@link
025   * io.prometheus.metrics.model.registry.PrometheusRegistry PrometheusRegistry}, not by name
026   * validation.
027   *
028   * <p>If a metric has a {@link Unit}, the metric name SHOULD end with the unit as a suffix. Note
029   * that <a href="https://openmetrics.io/">OpenMetrics</a> requires metric names to have their unit
030   * as suffix, and we implement this in {@code prometheus-metrics-core}. However, {@code
031   * prometheus-metrics-model} does not enforce Unit suffixes.
032   *
033   * <p>Example: If you create a Counter for a processing time with Unit {@link Unit#SECONDS
034   * SECONDS}, the name should be {@code processing_time_seconds}. When exposed in OpenMetrics Text
035   * format, this will be represented as two values: {@code processing_time_seconds_total} for the
036   * counter value, and the optional {@code processing_time_seconds_created} timestamp.
037   *
038   * <p>Use {@link #sanitizeMetricName(String)} to convert arbitrary Strings to valid metric names.
039   */
040  public static boolean isValidMetricName(String name) {
041    return validateMetricName(name) == null;
042  }
043
044  /**
045   * Same as {@link #isValidMetricName(String)}, but produces an error message.
046   *
047   * <p>The name is valid if the error message is {@code null}.
048   */
049  @Nullable
050  public static String validateMetricName(String name) {
051    if (isValidUtf8(name)) {
052      return null;
053    }
054    return "The metric name contains unsupported characters";
055  }
056
057  public static boolean isValidLegacyMetricName(String name) {
058    if (name.isEmpty()) {
059      return false;
060    }
061    // First character must be [a-zA-Z_:]
062    char first = name.charAt(0);
063    if (!((first >= 'a' && first <= 'z')
064        || (first >= 'A' && first <= 'Z')
065        || first == '_'
066        || first == ':')) {
067      return false;
068    }
069    // Remaining characters must be [a-zA-Z0-9_:]
070    for (int i = 1; i < name.length(); i++) {
071      char c = name.charAt(i);
072      if (!((c >= 'a' && c <= 'z')
073          || (c >= 'A' && c <= 'Z')
074          || (c >= '0' && c <= '9')
075          || c == '_'
076          || c == ':')) {
077        return false;
078      }
079    }
080    return true;
081  }
082
083  public static boolean isValidLabelName(String name) {
084    return isValidUtf8(name)
085        && !(name.startsWith("__")
086            || name.startsWith("._")
087            || name.startsWith("..")
088            || name.startsWith("_."));
089  }
090
091  private static boolean isValidUtf8(String name) {
092    return !name.isEmpty() && StandardCharsets.UTF_8.newEncoder().canEncode(name);
093  }
094
095  public static boolean isValidLegacyLabelName(String name) {
096    if (name.isEmpty()) {
097      return false;
098    }
099    // First character must be [a-zA-Z_]
100    char first = name.charAt(0);
101    if (!((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_')) {
102      return false;
103    }
104    // Remaining characters must be [a-zA-Z0-9_]
105    for (int i = 1; i < name.length(); i++) {
106      char c = name.charAt(i);
107      if (!((c >= 'a' && c <= 'z')
108          || (c >= 'A' && c <= 'Z')
109          || (c >= '0' && c <= '9')
110          || c == '_')) {
111        return false;
112      }
113    }
114    return true;
115  }
116
117  /** Units may not have illegal characters. */
118  public static boolean isValidUnitName(String name) {
119    return validateUnitName(name) == null;
120  }
121
122  /** Same as {@link #isValidUnitName(String)} but returns an error message. */
123  @Nullable
124  public static String validateUnitName(String name) {
125    if (name.isEmpty()) {
126      return "The unit name must not be empty.";
127    }
128    // Check if all characters are [a-zA-Z0-9_.:]+
129    for (int i = 0; i < name.length(); i++) {
130      char c = name.charAt(i);
131      if (!((c >= 'a' && c <= 'z')
132          || (c >= 'A' && c <= 'Z')
133          || (c >= '0' && c <= '9')
134          || c == '_'
135          || c == '.'
136          || c == ':')) {
137        return "The unit name contains unsupported characters";
138      }
139    }
140    return null;
141  }
142
143  /**
144   * Get the metric or label name that is used in Prometheus exposition format.
145   *
146   * @param name must be a valid metric or label name, i.e. {@link #isValidMetricName(String)
147   *     isValidMetricName(name)} or {@link #isValidLabelName(String) isValidLabelName(name)} must
148   *     be true.
149   * @return the name with dots replaced by underscores.
150   */
151  public static String prometheusName(String name) {
152    return escapeName(name, EscapingScheme.UNDERSCORE_ESCAPING);
153  }
154
155  /**
156   * Convert an arbitrary string to a valid metric name. Since any non-empty valid UTF-8 string is a
157   * valid metric name, this simply returns the input unchanged.
158   *
159   * @throws IllegalArgumentException if the input is empty
160   */
161  public static String sanitizeMetricName(String metricName) {
162    if (metricName.isEmpty()) {
163      throw new IllegalArgumentException("Cannot convert an empty string to a valid metric name.");
164    }
165    return metricName;
166  }
167
168  /**
169   * Like {@link #sanitizeMetricName(String)}, but also makes sure that the unit is appended as a
170   * suffix if the unit is not {@code null}.
171   */
172  public static String sanitizeMetricName(String metricName, Unit unit) {
173    String result = sanitizeMetricName(metricName);
174    if (unit != null) {
175      if (!result.endsWith("_" + unit) && !result.endsWith("." + unit)) {
176        result += "_" + unit;
177      }
178    }
179    return result;
180  }
181
182  /**
183   * Convert an arbitrary string to a name where {@link #isValidLabelName(String)
184   * isValidLabelName(name)} is true.
185   */
186  public static String sanitizeLabelName(String labelName) {
187    if (labelName.isEmpty()) {
188      throw new IllegalArgumentException("Cannot convert an empty string to a valid label name.");
189    }
190    String sanitizedName = labelName;
191    while (sanitizedName.startsWith("__")
192        || sanitizedName.startsWith("_.")
193        || sanitizedName.startsWith("._")
194        || sanitizedName.startsWith("..")) {
195      sanitizedName = sanitizedName.substring(1);
196    }
197    return sanitizedName;
198  }
199
200  /**
201   * Convert an arbitrary string to a valid unit name by replacing illegal characters.
202   *
203   * @throws IllegalArgumentException if the {@code unitName} cannot be converted, e.g. if you call
204   *     {@code sanitizeUnitName("")}.
205   * @throws NullPointerException if {@code unitName} is null.
206   */
207  public static String sanitizeUnitName(String unitName) {
208    if (unitName.isEmpty()) {
209      throw new IllegalArgumentException("Cannot convert an empty string to a valid unit name.");
210    }
211    String sanitizedName = replaceIllegalCharsInUnitName(unitName);
212    while (sanitizedName.startsWith("_") || sanitizedName.startsWith(".")) {
213      sanitizedName = sanitizedName.substring(1);
214    }
215    while (sanitizedName.endsWith(".") || sanitizedName.endsWith("_")) {
216      sanitizedName = sanitizedName.substring(0, sanitizedName.length() - 1);
217    }
218    if (sanitizedName.isEmpty()) {
219      throw new IllegalArgumentException(
220          "Cannot convert '" + unitName + "' into a valid unit name.");
221    }
222    return sanitizedName;
223  }
224
225  /** Returns a string with only valid unit name characters [a-zA-Z0-9_.:]. */
226  private static String replaceIllegalCharsInUnitName(String name) {
227    int length = name.length();
228    char[] sanitized = new char[length];
229    for (int i = 0; i < length; i++) {
230      char ch = name.charAt(i);
231      if (ch == ':'
232          || ch == '.'
233          || (ch >= 'a' && ch <= 'z')
234          || (ch >= 'A' && ch <= 'Z')
235          || (ch >= '0' && ch <= '9')) {
236        sanitized[i] = ch;
237      } else {
238        sanitized[i] = '_';
239      }
240    }
241    return new String(sanitized);
242  }
243
244  /**
245   * Escapes the incoming name according to the provided escaping scheme. Depending on the rules of
246   * escaping, this may cause no change in the string that is returned (especially NO_ESCAPING,
247   * which by definition is a noop). This method does not do any validation of the name.
248   */
249  public static String escapeName(String name, EscapingScheme scheme) {
250    if (name.isEmpty() || !needsEscaping(name, scheme)) {
251      return name;
252    }
253
254    StringBuilder escaped = new StringBuilder();
255    switch (scheme) {
256      case ALLOW_UTF8:
257        return name;
258      case UNDERSCORE_ESCAPING:
259        for (int i = 0; i < name.length(); ) {
260          int c = name.codePointAt(i);
261          if (isValidLegacyChar(c, i)) {
262            escaped.appendCodePoint(c);
263          } else {
264            escaped.append('_');
265          }
266          i += Character.charCount(c);
267        }
268        return escaped.toString();
269      case DOTS_ESCAPING:
270        // Do not early return for legacy valid names, we still escape underscores.
271        for (int i = 0; i < name.length(); ) {
272          int c = name.codePointAt(i);
273          if (c == '_') {
274            escaped.append("__");
275          } else if (c == '.') {
276            escaped.append("_dot_");
277          } else if (isValidLegacyChar(c, i)) {
278            escaped.appendCodePoint(c);
279          } else {
280            escaped.append("__");
281          }
282          i += Character.charCount(c);
283        }
284        return escaped.toString();
285      case VALUE_ENCODING_ESCAPING:
286        escaped.append("U__");
287        for (int i = 0; i < name.length(); ) {
288          int c = name.codePointAt(i);
289          if (c == '_') {
290            escaped.append("__");
291          } else if (isValidLegacyChar(c, i)) {
292            escaped.appendCodePoint(c);
293          } else if (!isValidUtf8Char(c)) {
294            escaped.append("_FFFD_");
295          } else {
296            escaped.append('_');
297            escaped.append(Integer.toHexString(c));
298            escaped.append('_');
299          }
300          i += Character.charCount(c);
301        }
302        return escaped.toString();
303      default:
304        throw new IllegalArgumentException("Invalid escaping scheme " + scheme);
305    }
306  }
307
308  public static boolean needsEscaping(String name, EscapingScheme scheme) {
309    return !isValidLegacyMetricName(name)
310        || (scheme == EscapingScheme.DOTS_ESCAPING && (name.contains(".") || name.contains("_")));
311  }
312
313  static boolean isValidLegacyChar(int c, int i) {
314    return (c >= 'a' && c <= 'z')
315        || (c >= 'A' && c <= 'Z')
316        || c == '_'
317        || c == ':'
318        || (c >= '0' && c <= '9' && i > 0);
319  }
320
321  private static boolean isValidUtf8Char(int c) {
322    return (0 <= c && c < MIN_HIGH_SURROGATE) || (MAX_LOW_SURROGATE < c && c <= MAX_CODE_POINT);
323  }
324}