001package io.prometheus.metrics.model.snapshots; 002 003import static java.lang.Character.MAX_CODE_POINT; 004import static java.lang.Character.MAX_LOW_SURROGATE; 005import static java.lang.Character.MIN_HIGH_SURROGATE; 006 007import io.prometheus.metrics.config.EscapingScheme; 008import java.nio.charset.StandardCharsets; 009import java.util.regex.Pattern; 010import javax.annotation.Nullable; 011 012/** 013 * Utility for Prometheus Metric and Label naming. 014 * 015 * <p>Note that this library allows dots in metric and label names. Dots will automatically be 016 * replaced with underscores in Prometheus exposition formats. However, if metrics are exposed in 017 * OpenTelemetry format the dots are retained. 018 */ 019public class PrometheusNaming { 020 021 private static final Pattern METRIC_NAME_PATTERN = Pattern.compile("^[a-zA-Z_:][a-zA-Z0-9_:]*$"); 022 023 /** Legal characters for label names. */ 024 private static final Pattern LEGACY_LABEL_NAME_PATTERN = 025 Pattern.compile("^[a-zA-Z_][a-zA-Z0-9_]*$"); 026 027 /** Legal characters for unit names, including dot. */ 028 private static final Pattern UNIT_NAME_PATTERN = Pattern.compile("^[a-zA-Z0-9_.:]+$"); 029 030 /** 031 * According to OpenMetrics {@code _count} and {@code _sum} (and {@code _gcount}, {@code _gsum}) 032 * should also be reserved metric name suffixes. However, popular instrumentation libraries have 033 * Gauges with names ending in {@code _count}. Examples: 034 * 035 * <ul> 036 * <li>Micrometer: {@code jvm_buffer_count} 037 * <li>OpenTelemetry: {@code process_runtime_jvm_buffer_count} 038 * </ul> 039 * 040 * We do not treat {@code _count} and {@code _sum} as reserved suffixes here for compatibility 041 * with these libraries. However, there is a risk of name conflict if someone creates a gauge 042 * named {@code my_data_count} and a histogram or summary named {@code my_data}, because the 043 * histogram or summary will implicitly have a sample named {@code my_data_count}. 044 */ 045 private static final String[] RESERVED_METRIC_NAME_SUFFIXES = { 046 "_total", "_created", "_bucket", "_info", 047 ".total", ".created", ".bucket", ".info" 048 }; 049 050 /** 051 * Test if a metric name is valid. Rules: 052 * 053 * <ul> 054 * <li>The name must match {@link #METRIC_NAME_PATTERN}. 055 * <li>The name MUST NOT end with one of the {@link #RESERVED_METRIC_NAME_SUFFIXES}. 056 * </ul> 057 * 058 * If a metric has a {@link Unit}, the metric name SHOULD end with the unit as a suffix. Note that 059 * <a href="https://openmetrics.io/">OpenMetrics</a> requires metric names to have their unit as 060 * suffix, and we implement this in {@code prometheus-metrics-core}. However, {@code 061 * prometheus-metrics-model} does not enforce Unit suffixes. 062 * 063 * <p>Example: If you create a Counter for a processing time with Unit {@link Unit#SECONDS 064 * SECONDS}, the name should be {@code processing_time_seconds}. When exposed in OpenMetrics Text 065 * format, this will be represented as two values: {@code processing_time_seconds_total} for the 066 * counter value, and the optional {@code processing_time_seconds_created} timestamp. 067 * 068 * <p>Use {@link #sanitizeMetricName(String)} to convert arbitrary Strings to valid metric names. 069 */ 070 public static boolean isValidMetricName(String name) { 071 return validateMetricName(name) == null; 072 } 073 074 /** 075 * Same as {@link #isValidMetricName(String)}, but produces an error message. 076 * 077 * <p>The name is valid if the error message is {@code null}. 078 */ 079 @Nullable 080 public static String validateMetricName(String name) { 081 for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) { 082 if (name.endsWith(reservedSuffix)) { 083 return "The metric name must not include the '" + reservedSuffix + "' suffix."; 084 } 085 } 086 if (isValidUtf8(name)) { 087 return null; 088 } 089 return "The metric name contains unsupported characters"; 090 } 091 092 public static boolean isValidLegacyMetricName(String name) { 093 return METRIC_NAME_PATTERN.matcher(name).matches(); 094 } 095 096 public static boolean isValidLabelName(String name) { 097 return isValidUtf8(name) 098 && !(name.startsWith("__") 099 || name.startsWith("._") 100 || name.startsWith("..") 101 || name.startsWith("_.")); 102 } 103 104 private static boolean isValidUtf8(String name) { 105 return !name.isEmpty() && StandardCharsets.UTF_8.newEncoder().canEncode(name); 106 } 107 108 public static boolean isValidLegacyLabelName(String name) { 109 return LEGACY_LABEL_NAME_PATTERN.matcher(name).matches(); 110 } 111 112 /** 113 * Units may not have illegal characters, and they may not end with a reserved suffix like 114 * 'total'. 115 */ 116 public static boolean isValidUnitName(String name) { 117 return validateUnitName(name) == null; 118 } 119 120 /** Same as {@link #isValidUnitName(String)} but returns an error message. */ 121 @Nullable 122 public static String validateUnitName(String name) { 123 if (name.isEmpty()) { 124 return "The unit name must not be empty."; 125 } 126 for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) { 127 String suffixName = reservedSuffix.substring(1); 128 if (name.endsWith(suffixName)) { 129 return suffixName + " is a reserved suffix in Prometheus"; 130 } 131 } 132 if (!UNIT_NAME_PATTERN.matcher(name).matches()) { 133 return "The unit name contains unsupported characters"; 134 } 135 return null; 136 } 137 138 /** 139 * Get the metric or label name that is used in Prometheus exposition format. 140 * 141 * @param name must be a valid metric or label name, i.e. {@link #isValidMetricName(String) 142 * isValidMetricName(name)} or {@link #isValidLabelName(String) isValidLabelName(name)} must 143 * be true. 144 * @return the name with dots replaced by underscores. 145 */ 146 public static String prometheusName(String name) { 147 return escapeName(name, EscapingScheme.UNDERSCORE_ESCAPING); 148 } 149 150 /** 151 * Convert an arbitrary string to a name where {@link #isValidMetricName(String) 152 * isValidMetricName(name)} is true. 153 */ 154 public static String sanitizeMetricName(String metricName) { 155 if (metricName.isEmpty()) { 156 throw new IllegalArgumentException("Cannot convert an empty string to a valid metric name."); 157 } 158 String sanitizedName = metricName; 159 boolean modified = true; 160 while (modified) { 161 modified = false; 162 for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) { 163 if (sanitizedName.equals(reservedSuffix)) { 164 // This is for the corner case when you call sanitizeMetricName("_total"). 165 // In that case the result will be "total". 166 return reservedSuffix.substring(1); 167 } 168 if (sanitizedName.endsWith(reservedSuffix)) { 169 sanitizedName = 170 sanitizedName.substring(0, sanitizedName.length() - reservedSuffix.length()); 171 modified = true; 172 } 173 } 174 } 175 return sanitizedName; 176 } 177 178 /** 179 * Like {@link #sanitizeMetricName(String)}, but also makes sure that the unit is appended as a 180 * suffix if the unit is not {@code null}. 181 */ 182 public static String sanitizeMetricName(String metricName, Unit unit) { 183 String result = sanitizeMetricName(metricName); 184 if (unit != null) { 185 if (!result.endsWith("_" + unit) && !result.endsWith("." + unit)) { 186 result += "_" + unit; 187 } 188 } 189 return result; 190 } 191 192 /** 193 * Convert an arbitrary string to a name where {@link #isValidLabelName(String) 194 * isValidLabelName(name)} is true. 195 */ 196 public static String sanitizeLabelName(String labelName) { 197 if (labelName.isEmpty()) { 198 throw new IllegalArgumentException("Cannot convert an empty string to a valid label name."); 199 } 200 String sanitizedName = labelName; 201 while (sanitizedName.startsWith("__") 202 || sanitizedName.startsWith("_.") 203 || sanitizedName.startsWith("._") 204 || sanitizedName.startsWith("..")) { 205 sanitizedName = sanitizedName.substring(1); 206 } 207 return sanitizedName; 208 } 209 210 /** 211 * Convert an arbitrary string to a name where {@link #validateUnitName(String)} is {@code null} 212 * (i.e. the name is valid). 213 * 214 * @throws IllegalArgumentException if the {@code unitName} cannot be converted, for example if 215 * you call {@code sanitizeUnitName("total")} or {@code sanitizeUnitName("")}. 216 * @throws NullPointerException if {@code unitName} is null. 217 */ 218 public static String sanitizeUnitName(String unitName) { 219 if (unitName.isEmpty()) { 220 throw new IllegalArgumentException("Cannot convert an empty string to a valid unit name."); 221 } 222 String sanitizedName = replaceIllegalCharsInUnitName(unitName); 223 boolean modified = true; 224 while (modified) { 225 modified = false; 226 while (sanitizedName.startsWith("_") || sanitizedName.startsWith(".")) { 227 sanitizedName = sanitizedName.substring(1); 228 modified = true; 229 } 230 while (sanitizedName.endsWith(".") || sanitizedName.endsWith("_")) { 231 sanitizedName = sanitizedName.substring(0, sanitizedName.length() - 1); 232 modified = true; 233 } 234 for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) { 235 String suffixName = reservedSuffix.substring(1); 236 if (sanitizedName.endsWith(suffixName)) { 237 sanitizedName = sanitizedName.substring(0, sanitizedName.length() - suffixName.length()); 238 modified = true; 239 } 240 } 241 } 242 if (sanitizedName.isEmpty()) { 243 throw new IllegalArgumentException( 244 "Cannot convert '" + unitName + "' into a valid unit name."); 245 } 246 return sanitizedName; 247 } 248 249 /** Returns a string that matches {@link #UNIT_NAME_PATTERN}. */ 250 private static String replaceIllegalCharsInUnitName(String name) { 251 int length = name.length(); 252 char[] sanitized = new char[length]; 253 for (int i = 0; i < length; i++) { 254 char ch = name.charAt(i); 255 if (ch == ':' 256 || ch == '.' 257 || (ch >= 'a' && ch <= 'z') 258 || (ch >= 'A' && ch <= 'Z') 259 || (ch >= '0' && ch <= '9')) { 260 sanitized[i] = ch; 261 } else { 262 sanitized[i] = '_'; 263 } 264 } 265 return new String(sanitized); 266 } 267 268 /** 269 * Escapes the incoming name according to the provided escaping scheme. Depending on the rules of 270 * escaping, this may cause no change in the string that is returned (especially NO_ESCAPING, 271 * which by definition is a noop). This method does not do any validation of the name. 272 */ 273 public static String escapeName(String name, EscapingScheme scheme) { 274 if (name.isEmpty() || !needsEscaping(name, scheme)) { 275 return name; 276 } 277 278 StringBuilder escaped = new StringBuilder(); 279 switch (scheme) { 280 case ALLOW_UTF8: 281 return name; 282 case UNDERSCORE_ESCAPING: 283 for (int i = 0; i < name.length(); ) { 284 int c = name.codePointAt(i); 285 if (isValidLegacyChar(c, i)) { 286 escaped.appendCodePoint(c); 287 } else { 288 escaped.append('_'); 289 } 290 i += Character.charCount(c); 291 } 292 return escaped.toString(); 293 case DOTS_ESCAPING: 294 // Do not early return for legacy valid names, we still escape underscores. 295 for (int i = 0; i < name.length(); ) { 296 int c = name.codePointAt(i); 297 if (c == '_') { 298 escaped.append("__"); 299 } else if (c == '.') { 300 escaped.append("_dot_"); 301 } else if (isValidLegacyChar(c, i)) { 302 escaped.appendCodePoint(c); 303 } else { 304 escaped.append("__"); 305 } 306 i += Character.charCount(c); 307 } 308 return escaped.toString(); 309 case VALUE_ENCODING_ESCAPING: 310 escaped.append("U__"); 311 for (int i = 0; i < name.length(); ) { 312 int c = name.codePointAt(i); 313 if (c == '_') { 314 escaped.append("__"); 315 } else if (isValidLegacyChar(c, i)) { 316 escaped.appendCodePoint(c); 317 } else if (!isValidUtf8Char(c)) { 318 escaped.append("_FFFD_"); 319 } else { 320 escaped.append('_'); 321 escaped.append(Integer.toHexString(c)); 322 escaped.append('_'); 323 } 324 i += Character.charCount(c); 325 } 326 return escaped.toString(); 327 default: 328 throw new IllegalArgumentException("Invalid escaping scheme " + scheme); 329 } 330 } 331 332 public static boolean needsEscaping(String name, EscapingScheme scheme) { 333 return !isValidLegacyMetricName(name) 334 || (scheme == EscapingScheme.DOTS_ESCAPING && (name.contains(".") || name.contains("_"))); 335 } 336 337 static boolean isValidLegacyChar(int c, int i) { 338 return (c >= 'a' && c <= 'z') 339 || (c >= 'A' && c <= 'Z') 340 || c == '_' 341 || c == ':' 342 || (c >= '0' && c <= '9' && i > 0); 343 } 344 345 private static boolean isValidUtf8Char(int c) { 346 return (0 <= c && c < MIN_HIGH_SURROGATE) || (MAX_LOW_SURROGATE < c && c <= MAX_CODE_POINT); 347 } 348}