001package io.prometheus.metrics.model.snapshots; 002 003import static java.lang.Character.MAX_CODE_POINT; 004import static java.lang.Character.MAX_LOW_SURROGATE; 005import static java.lang.Character.MIN_HIGH_SURROGATE; 006 007import io.prometheus.metrics.annotations.StableApi; 008import io.prometheus.metrics.config.EscapingScheme; 009import java.nio.charset.StandardCharsets; 010import javax.annotation.Nullable; 011 012/** 013 * Utility for Prometheus Metric and Label naming. 014 * 015 * <p>Note that this library allows dots in metric and label names. Dots will automatically be 016 * replaced with underscores in Prometheus exposition formats. However, if metrics are exposed in 017 * OpenTelemetry format the dots are retained. 018 */ 019@StableApi 020public class PrometheusNaming { 021 022 /** 023 * Reserved metric name suffixes. These suffixes are automatically appended by Prometheus 024 * exposition format writers for specific metric types: {@code _total} and {@code _created} for 025 * counters, {@code _info} for info metrics, and {@code _bucket} for histograms. Including these 026 * in a base metric name via {@link #sanitizeMetricName(String)} would cause confusion or 027 * double-suffixing, so they are stripped during sanitization. 028 */ 029 static final String[] RESERVED_METRIC_NAME_SUFFIXES = { 030 "_total", "_created", "_bucket", "_info", ".total", ".created", ".bucket", ".info" 031 }; 032 033 /** 034 * Test if a metric name is valid. Any non-empty valid UTF-8 string is accepted. 035 * 036 * <p>Collision detection for suffixes like {@code _total}, {@code _info}, {@code _bucket}, etc. 037 * is handled at registration time by the {@link 038 * io.prometheus.metrics.model.registry.PrometheusRegistry PrometheusRegistry}, not by name 039 * validation. 040 * 041 * <p>If a metric has a {@link Unit}, the metric name SHOULD end with the unit as a suffix. Note 042 * that <a href="https://openmetrics.io/">OpenMetrics</a> requires metric names to have their unit 043 * as suffix, and we implement this in {@code prometheus-metrics-core}. However, {@code 044 * prometheus-metrics-model} does not enforce Unit suffixes. 045 * 046 * <p>Example: If you create a Counter for a processing time with Unit {@link Unit#SECONDS 047 * SECONDS}, the name should be {@code processing_time_seconds}. When exposed in OpenMetrics Text 048 * format, this will be represented as two values: {@code processing_time_seconds_total} for the 049 * counter value, and the optional {@code processing_time_seconds_created} timestamp. 050 * 051 * <p>Use {@link #sanitizeMetricName(String)} for compatibility-preserving sanitization that 052 * strips reserved suffixes, or {@link #normalizeMetricName(String)} for permissive normalization 053 * that keeps the original suffixes intact. 054 */ 055 public static boolean isValidMetricName(String name) { 056 return validateMetricName(name) == null; 057 } 058 059 /** 060 * Same as {@link #isValidMetricName(String)}, but produces an error message. 061 * 062 * <p>The name is valid if the error message is {@code null}. 063 */ 064 @Nullable 065 public static String validateMetricName(String name) { 066 if (isValidUtf8(name)) { 067 return null; 068 } 069 return "The metric name contains unsupported characters"; 070 } 071 072 public static boolean isValidLegacyMetricName(String name) { 073 if (name.isEmpty()) { 074 return false; 075 } 076 // First character must be [a-zA-Z_:] 077 char first = name.charAt(0); 078 if (!((first >= 'a' && first <= 'z') 079 || (first >= 'A' && first <= 'Z') 080 || first == '_' 081 || first == ':')) { 082 return false; 083 } 084 // Remaining characters must be [a-zA-Z0-9_:] 085 for (int i = 1; i < name.length(); i++) { 086 char c = name.charAt(i); 087 if (!((c >= 'a' && c <= 'z') 088 || (c >= 'A' && c <= 'Z') 089 || (c >= '0' && c <= '9') 090 || c == '_' 091 || c == ':')) { 092 return false; 093 } 094 } 095 return true; 096 } 097 098 public static boolean isValidLabelName(String name) { 099 return isValidUtf8(name) 100 && !(name.startsWith("__") 101 || name.startsWith("._") 102 || name.startsWith("..") 103 || name.startsWith("_.")); 104 } 105 106 private static boolean isValidUtf8(String name) { 107 return !name.isEmpty() && StandardCharsets.UTF_8.newEncoder().canEncode(name); 108 } 109 110 public static boolean isValidLegacyLabelName(String name) { 111 if (name.isEmpty()) { 112 return false; 113 } 114 // First character must be [a-zA-Z_] 115 char first = name.charAt(0); 116 if (!((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_')) { 117 return false; 118 } 119 // Remaining characters must be [a-zA-Z0-9_] 120 for (int i = 1; i < name.length(); i++) { 121 char c = name.charAt(i); 122 if (!((c >= 'a' && c <= 'z') 123 || (c >= 'A' && c <= 'Z') 124 || (c >= '0' && c <= '9') 125 || c == '_')) { 126 return false; 127 } 128 } 129 return true; 130 } 131 132 /** Units may not have illegal characters. */ 133 public static boolean isValidUnitName(String name) { 134 return validateUnitName(name) == null; 135 } 136 137 /** Same as {@link #isValidUnitName(String)} but returns an error message. */ 138 @Nullable 139 public static String validateUnitName(String name) { 140 if (name.isEmpty()) { 141 return "The unit name must not be empty."; 142 } 143 // Check if all characters are [a-zA-Z0-9_.:]+ 144 for (int i = 0; i < name.length(); i++) { 145 char c = name.charAt(i); 146 if (!((c >= 'a' && c <= 'z') 147 || (c >= 'A' && c <= 'Z') 148 || (c >= '0' && c <= '9') 149 || c == '_' 150 || c == '.' 151 || c == ':')) { 152 return "The unit name contains unsupported characters"; 153 } 154 } 155 return null; 156 } 157 158 /** 159 * Get the metric or label name that is used in Prometheus exposition format. 160 * 161 * @param name must be a valid metric or label name, i.e. {@link #isValidMetricName(String) 162 * isValidMetricName(name)} or {@link #isValidLabelName(String) isValidLabelName(name)} must 163 * be true. 164 * @return the name with dots replaced by underscores. 165 */ 166 public static String prometheusName(String name) { 167 return escapeName(name, EscapingScheme.UNDERSCORE_ESCAPING); 168 } 169 170 /** 171 * Convert an arbitrary string to a valid metric name. 172 * 173 * <p>Reserved metric name suffixes ({@code _total}, {@code _created}, {@code _bucket}, {@code 174 * _info} and their dot variants) are stripped. These suffixes are appended automatically by 175 * Prometheus exposition format writers, so including them in a base metric name would result in 176 * double-suffixing or unintended type inference. For example, a JMX attribute named {@code 177 * RequestTotal} would be sanitized from {@code kafka_consumer_request_total} to {@code 178 * kafka_consumer_request}, and the counter writer would add {@code _total} back at scrape time. 179 * 180 * <p>This behaviour was present in client_java 1.5.x and is restored here to fix a regression 181 * introduced in 1.6.0 that affected downstream tools (e.g. the JMX Exporter and the simpleclient 182 * bridge) which relied on {@code sanitizeMetricName} to strip these suffixes before passing names 183 * to the snapshot builders. 184 * 185 * <p>If you want permissive normalization that keeps reserved suffixes intact, use {@link 186 * #normalizeMetricName(String)} instead. 187 * 188 * @throws IllegalArgumentException if the input is empty 189 */ 190 public static String sanitizeMetricName(String metricName) { 191 if (metricName.isEmpty()) { 192 throw new IllegalArgumentException("Cannot convert an empty string to a valid metric name."); 193 } 194 String sanitizedName = metricName; 195 boolean stripped = true; 196 while (stripped) { 197 stripped = false; 198 // When the name equals the suffix exactly, drop the leading separator character to avoid 199 // returning an empty string (e.g. "_total" → "total", ".info" → "info"). 200 for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) { 201 if (sanitizedName.equals(reservedSuffix)) { 202 return reservedSuffix.substring(1); 203 } 204 } 205 for (String reservedSuffix : RESERVED_METRIC_NAME_SUFFIXES) { 206 if (sanitizedName.endsWith(reservedSuffix)) { 207 sanitizedName = 208 sanitizedName.substring(0, sanitizedName.length() - reservedSuffix.length()); 209 stripped = true; 210 break; // restart the outer loop to re-check all suffixes on the shortened name 211 } 212 } 213 } 214 return sanitizedName; 215 } 216 217 /** 218 * Like {@link #sanitizeMetricName(String)}, but also makes sure that the unit is appended as a 219 * suffix if the unit is not {@code null}. 220 */ 221 public static String sanitizeMetricName(String metricName, Unit unit) { 222 String result = sanitizeMetricName(metricName); 223 if (unit != null) { 224 if (!result.endsWith("_" + unit) && !result.endsWith("." + unit)) { 225 result += "_" + unit; 226 } 227 } 228 return result; 229 } 230 231 /** 232 * Convert an arbitrary string to a valid metric name without stripping reserved suffixes. 233 * 234 * <p>Any non-empty valid UTF-8 string is accepted and returned unchanged. This is the permissive 235 * normalization behavior introduced in 1.6.0. Use this method for new integrations that want to 236 * preserve the original metric name and rely on registration-time collision detection instead of 237 * suffix stripping. 238 * 239 * @throws IllegalArgumentException if the input is empty 240 */ 241 public static String normalizeMetricName(String metricName) { 242 if (metricName.isEmpty()) { 243 throw new IllegalArgumentException("Cannot convert an empty string to a valid metric name."); 244 } 245 return metricName; 246 } 247 248 /** 249 * Like {@link #normalizeMetricName(String)}, but also makes sure that the unit is appended as a 250 * suffix if the unit is not {@code null}. 251 */ 252 public static String normalizeMetricName(String metricName, Unit unit) { 253 String result = normalizeMetricName(metricName); 254 if (unit != null) { 255 if (!result.endsWith("_" + unit) && !result.endsWith("." + unit)) { 256 result += "_" + unit; 257 } 258 } 259 return result; 260 } 261 262 /** 263 * Convert an arbitrary string to a name where {@link #isValidLabelName(String) 264 * isValidLabelName(name)} is true. 265 */ 266 public static String sanitizeLabelName(String labelName) { 267 if (labelName.isEmpty()) { 268 throw new IllegalArgumentException("Cannot convert an empty string to a valid label name."); 269 } 270 String sanitizedName = labelName; 271 while (sanitizedName.startsWith("__") 272 || sanitizedName.startsWith("_.") 273 || sanitizedName.startsWith("._") 274 || sanitizedName.startsWith("..")) { 275 sanitizedName = sanitizedName.substring(1); 276 } 277 return sanitizedName; 278 } 279 280 /** 281 * Convert an arbitrary string to a valid unit name by replacing illegal characters. 282 * 283 * @throws IllegalArgumentException if the {@code unitName} cannot be converted, e.g. if you call 284 * {@code sanitizeUnitName("")}. 285 * @throws NullPointerException if {@code unitName} is null. 286 */ 287 public static String sanitizeUnitName(String unitName) { 288 if (unitName.isEmpty()) { 289 throw new IllegalArgumentException("Cannot convert an empty string to a valid unit name."); 290 } 291 String sanitizedName = replaceIllegalCharsInUnitName(unitName); 292 while (sanitizedName.startsWith("_") || sanitizedName.startsWith(".")) { 293 sanitizedName = sanitizedName.substring(1); 294 } 295 while (sanitizedName.endsWith(".") || sanitizedName.endsWith("_")) { 296 sanitizedName = sanitizedName.substring(0, sanitizedName.length() - 1); 297 } 298 if (sanitizedName.isEmpty()) { 299 throw new IllegalArgumentException( 300 "Cannot convert '" + unitName + "' into a valid unit name."); 301 } 302 return sanitizedName; 303 } 304 305 /** Returns a string with only valid unit name characters [a-zA-Z0-9_.:]. */ 306 private static String replaceIllegalCharsInUnitName(String name) { 307 int length = name.length(); 308 char[] sanitized = new char[length]; 309 for (int i = 0; i < length; i++) { 310 char ch = name.charAt(i); 311 if (ch == ':' 312 || ch == '.' 313 || (ch >= 'a' && ch <= 'z') 314 || (ch >= 'A' && ch <= 'Z') 315 || (ch >= '0' && ch <= '9')) { 316 sanitized[i] = ch; 317 } else { 318 sanitized[i] = '_'; 319 } 320 } 321 return new String(sanitized); 322 } 323 324 /** 325 * Escapes the incoming name according to the provided escaping scheme. Depending on the rules of 326 * escaping, this may cause no change in the string that is returned (especially NO_ESCAPING, 327 * which by definition is a noop). This method does not do any validation of the name. 328 */ 329 public static String escapeName(String name, EscapingScheme scheme) { 330 if (name.isEmpty() || !needsEscaping(name, scheme)) { 331 return name; 332 } 333 334 StringBuilder escaped = new StringBuilder(); 335 switch (scheme) { 336 case ALLOW_UTF8: 337 return name; 338 case UNDERSCORE_ESCAPING: 339 for (int i = 0; i < name.length(); ) { 340 int c = name.codePointAt(i); 341 if (isValidLegacyChar(c, i)) { 342 escaped.appendCodePoint(c); 343 } else { 344 escaped.append('_'); 345 } 346 i += Character.charCount(c); 347 } 348 return escaped.toString(); 349 case DOTS_ESCAPING: 350 // Do not early return for legacy valid names, we still escape underscores. 351 for (int i = 0; i < name.length(); ) { 352 int c = name.codePointAt(i); 353 if (c == '_') { 354 escaped.append("__"); 355 } else if (c == '.') { 356 escaped.append("_dot_"); 357 } else if (isValidLegacyChar(c, i)) { 358 escaped.appendCodePoint(c); 359 } else { 360 escaped.append("__"); 361 } 362 i += Character.charCount(c); 363 } 364 return escaped.toString(); 365 case VALUE_ENCODING_ESCAPING: 366 escaped.append("U__"); 367 for (int i = 0; i < name.length(); ) { 368 int c = name.codePointAt(i); 369 if (c == '_') { 370 escaped.append("__"); 371 } else if (isValidLegacyChar(c, i)) { 372 escaped.appendCodePoint(c); 373 } else if (!isValidUtf8Char(c)) { 374 escaped.append("_FFFD_"); 375 } else { 376 escaped.append('_'); 377 escaped.append(Integer.toHexString(c)); 378 escaped.append('_'); 379 } 380 i += Character.charCount(c); 381 } 382 return escaped.toString(); 383 default: 384 throw new IllegalArgumentException("Invalid escaping scheme " + scheme); 385 } 386 } 387 388 public static boolean needsEscaping(String name, EscapingScheme scheme) { 389 return !isValidLegacyMetricName(name) 390 || (scheme == EscapingScheme.DOTS_ESCAPING && (name.contains(".") || name.contains("_"))); 391 } 392 393 static boolean isValidLegacyChar(int c, int i) { 394 return (c >= 'a' && c <= 'z') 395 || (c >= 'A' && c <= 'Z') 396 || c == '_' 397 || c == ':' 398 || (c >= '0' && c <= '9' && i > 0); 399 } 400 401 private static boolean isValidUtf8Char(int c) { 402 return (0 <= c && c < MIN_HIGH_SURROGATE) || (MAX_LOW_SURROGATE < c && c <= MAX_CODE_POINT); 403 } 404}