001package io.prometheus.metrics.model.snapshots; 002 003import static java.lang.Character.MAX_CODE_POINT; 004import static java.lang.Character.MAX_LOW_SURROGATE; 005import static java.lang.Character.MIN_HIGH_SURROGATE; 006 007import io.prometheus.metrics.config.EscapingScheme; 008import java.nio.charset.StandardCharsets; 009import javax.annotation.Nullable; 010 011/** 012 * Utility for Prometheus Metric and Label naming. 013 * 014 * <p>Note that this library allows dots in metric and label names. Dots will automatically be 015 * replaced with underscores in Prometheus exposition formats. However, if metrics are exposed in 016 * OpenTelemetry format the dots are retained. 017 */ 018public class PrometheusNaming { 019 020 /** 021 * Test if a metric name is valid. Any non-empty valid UTF-8 string is accepted. 022 * 023 * <p>Collision detection for suffixes like {@code _total}, {@code _info}, {@code _bucket}, etc. 024 * is handled at registration time by the {@link 025 * io.prometheus.metrics.model.registry.PrometheusRegistry PrometheusRegistry}, not by name 026 * validation. 027 * 028 * <p>If a metric has a {@link Unit}, the metric name SHOULD end with the unit as a suffix. Note 029 * that <a href="https://openmetrics.io/">OpenMetrics</a> requires metric names to have their unit 030 * as suffix, and we implement this in {@code prometheus-metrics-core}. However, {@code 031 * prometheus-metrics-model} does not enforce Unit suffixes. 032 * 033 * <p>Example: If you create a Counter for a processing time with Unit {@link Unit#SECONDS 034 * SECONDS}, the name should be {@code processing_time_seconds}. When exposed in OpenMetrics Text 035 * format, this will be represented as two values: {@code processing_time_seconds_total} for the 036 * counter value, and the optional {@code processing_time_seconds_created} timestamp. 037 * 038 * <p>Use {@link #sanitizeMetricName(String)} to convert arbitrary Strings to valid metric names. 039 */ 040 public static boolean isValidMetricName(String name) { 041 return validateMetricName(name) == null; 042 } 043 044 /** 045 * Same as {@link #isValidMetricName(String)}, but produces an error message. 046 * 047 * <p>The name is valid if the error message is {@code null}. 048 */ 049 @Nullable 050 public static String validateMetricName(String name) { 051 if (isValidUtf8(name)) { 052 return null; 053 } 054 return "The metric name contains unsupported characters"; 055 } 056 057 public static boolean isValidLegacyMetricName(String name) { 058 if (name.isEmpty()) { 059 return false; 060 } 061 // First character must be [a-zA-Z_:] 062 char first = name.charAt(0); 063 if (!((first >= 'a' && first <= 'z') 064 || (first >= 'A' && first <= 'Z') 065 || first == '_' 066 || first == ':')) { 067 return false; 068 } 069 // Remaining characters must be [a-zA-Z0-9_:] 070 for (int i = 1; i < name.length(); i++) { 071 char c = name.charAt(i); 072 if (!((c >= 'a' && c <= 'z') 073 || (c >= 'A' && c <= 'Z') 074 || (c >= '0' && c <= '9') 075 || c == '_' 076 || c == ':')) { 077 return false; 078 } 079 } 080 return true; 081 } 082 083 public static boolean isValidLabelName(String name) { 084 return isValidUtf8(name) 085 && !(name.startsWith("__") 086 || name.startsWith("._") 087 || name.startsWith("..") 088 || name.startsWith("_.")); 089 } 090 091 private static boolean isValidUtf8(String name) { 092 return !name.isEmpty() && StandardCharsets.UTF_8.newEncoder().canEncode(name); 093 } 094 095 public static boolean isValidLegacyLabelName(String name) { 096 if (name.isEmpty()) { 097 return false; 098 } 099 // First character must be [a-zA-Z_] 100 char first = name.charAt(0); 101 if (!((first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_')) { 102 return false; 103 } 104 // Remaining characters must be [a-zA-Z0-9_] 105 for (int i = 1; i < name.length(); i++) { 106 char c = name.charAt(i); 107 if (!((c >= 'a' && c <= 'z') 108 || (c >= 'A' && c <= 'Z') 109 || (c >= '0' && c <= '9') 110 || c == '_')) { 111 return false; 112 } 113 } 114 return true; 115 } 116 117 /** Units may not have illegal characters. */ 118 public static boolean isValidUnitName(String name) { 119 return validateUnitName(name) == null; 120 } 121 122 /** Same as {@link #isValidUnitName(String)} but returns an error message. */ 123 @Nullable 124 public static String validateUnitName(String name) { 125 if (name.isEmpty()) { 126 return "The unit name must not be empty."; 127 } 128 // Check if all characters are [a-zA-Z0-9_.:]+ 129 for (int i = 0; i < name.length(); i++) { 130 char c = name.charAt(i); 131 if (!((c >= 'a' && c <= 'z') 132 || (c >= 'A' && c <= 'Z') 133 || (c >= '0' && c <= '9') 134 || c == '_' 135 || c == '.' 136 || c == ':')) { 137 return "The unit name contains unsupported characters"; 138 } 139 } 140 return null; 141 } 142 143 /** 144 * Get the metric or label name that is used in Prometheus exposition format. 145 * 146 * @param name must be a valid metric or label name, i.e. {@link #isValidMetricName(String) 147 * isValidMetricName(name)} or {@link #isValidLabelName(String) isValidLabelName(name)} must 148 * be true. 149 * @return the name with dots replaced by underscores. 150 */ 151 public static String prometheusName(String name) { 152 return escapeName(name, EscapingScheme.UNDERSCORE_ESCAPING); 153 } 154 155 /** 156 * Convert an arbitrary string to a valid metric name. Since any non-empty valid UTF-8 string is a 157 * valid metric name, this simply returns the input unchanged. 158 * 159 * @throws IllegalArgumentException if the input is empty 160 */ 161 public static String sanitizeMetricName(String metricName) { 162 if (metricName.isEmpty()) { 163 throw new IllegalArgumentException("Cannot convert an empty string to a valid metric name."); 164 } 165 return metricName; 166 } 167 168 /** 169 * Like {@link #sanitizeMetricName(String)}, but also makes sure that the unit is appended as a 170 * suffix if the unit is not {@code null}. 171 */ 172 public static String sanitizeMetricName(String metricName, Unit unit) { 173 String result = sanitizeMetricName(metricName); 174 if (unit != null) { 175 if (!result.endsWith("_" + unit) && !result.endsWith("." + unit)) { 176 result += "_" + unit; 177 } 178 } 179 return result; 180 } 181 182 /** 183 * Convert an arbitrary string to a name where {@link #isValidLabelName(String) 184 * isValidLabelName(name)} is true. 185 */ 186 public static String sanitizeLabelName(String labelName) { 187 if (labelName.isEmpty()) { 188 throw new IllegalArgumentException("Cannot convert an empty string to a valid label name."); 189 } 190 String sanitizedName = labelName; 191 while (sanitizedName.startsWith("__") 192 || sanitizedName.startsWith("_.") 193 || sanitizedName.startsWith("._") 194 || sanitizedName.startsWith("..")) { 195 sanitizedName = sanitizedName.substring(1); 196 } 197 return sanitizedName; 198 } 199 200 /** 201 * Convert an arbitrary string to a valid unit name by replacing illegal characters. 202 * 203 * @throws IllegalArgumentException if the {@code unitName} cannot be converted, e.g. if you call 204 * {@code sanitizeUnitName("")}. 205 * @throws NullPointerException if {@code unitName} is null. 206 */ 207 public static String sanitizeUnitName(String unitName) { 208 if (unitName.isEmpty()) { 209 throw new IllegalArgumentException("Cannot convert an empty string to a valid unit name."); 210 } 211 String sanitizedName = replaceIllegalCharsInUnitName(unitName); 212 while (sanitizedName.startsWith("_") || sanitizedName.startsWith(".")) { 213 sanitizedName = sanitizedName.substring(1); 214 } 215 while (sanitizedName.endsWith(".") || sanitizedName.endsWith("_")) { 216 sanitizedName = sanitizedName.substring(0, sanitizedName.length() - 1); 217 } 218 if (sanitizedName.isEmpty()) { 219 throw new IllegalArgumentException( 220 "Cannot convert '" + unitName + "' into a valid unit name."); 221 } 222 return sanitizedName; 223 } 224 225 /** Returns a string with only valid unit name characters [a-zA-Z0-9_.:]. */ 226 private static String replaceIllegalCharsInUnitName(String name) { 227 int length = name.length(); 228 char[] sanitized = new char[length]; 229 for (int i = 0; i < length; i++) { 230 char ch = name.charAt(i); 231 if (ch == ':' 232 || ch == '.' 233 || (ch >= 'a' && ch <= 'z') 234 || (ch >= 'A' && ch <= 'Z') 235 || (ch >= '0' && ch <= '9')) { 236 sanitized[i] = ch; 237 } else { 238 sanitized[i] = '_'; 239 } 240 } 241 return new String(sanitized); 242 } 243 244 /** 245 * Escapes the incoming name according to the provided escaping scheme. Depending on the rules of 246 * escaping, this may cause no change in the string that is returned (especially NO_ESCAPING, 247 * which by definition is a noop). This method does not do any validation of the name. 248 */ 249 public static String escapeName(String name, EscapingScheme scheme) { 250 if (name.isEmpty() || !needsEscaping(name, scheme)) { 251 return name; 252 } 253 254 StringBuilder escaped = new StringBuilder(); 255 switch (scheme) { 256 case ALLOW_UTF8: 257 return name; 258 case UNDERSCORE_ESCAPING: 259 for (int i = 0; i < name.length(); ) { 260 int c = name.codePointAt(i); 261 if (isValidLegacyChar(c, i)) { 262 escaped.appendCodePoint(c); 263 } else { 264 escaped.append('_'); 265 } 266 i += Character.charCount(c); 267 } 268 return escaped.toString(); 269 case DOTS_ESCAPING: 270 // Do not early return for legacy valid names, we still escape underscores. 271 for (int i = 0; i < name.length(); ) { 272 int c = name.codePointAt(i); 273 if (c == '_') { 274 escaped.append("__"); 275 } else if (c == '.') { 276 escaped.append("_dot_"); 277 } else if (isValidLegacyChar(c, i)) { 278 escaped.appendCodePoint(c); 279 } else { 280 escaped.append("__"); 281 } 282 i += Character.charCount(c); 283 } 284 return escaped.toString(); 285 case VALUE_ENCODING_ESCAPING: 286 escaped.append("U__"); 287 for (int i = 0; i < name.length(); ) { 288 int c = name.codePointAt(i); 289 if (c == '_') { 290 escaped.append("__"); 291 } else if (isValidLegacyChar(c, i)) { 292 escaped.appendCodePoint(c); 293 } else if (!isValidUtf8Char(c)) { 294 escaped.append("_FFFD_"); 295 } else { 296 escaped.append('_'); 297 escaped.append(Integer.toHexString(c)); 298 escaped.append('_'); 299 } 300 i += Character.charCount(c); 301 } 302 return escaped.toString(); 303 default: 304 throw new IllegalArgumentException("Invalid escaping scheme " + scheme); 305 } 306 } 307 308 public static boolean needsEscaping(String name, EscapingScheme scheme) { 309 return !isValidLegacyMetricName(name) 310 || (scheme == EscapingScheme.DOTS_ESCAPING && (name.contains(".") || name.contains("_"))); 311 } 312 313 static boolean isValidLegacyChar(int c, int i) { 314 return (c >= 'a' && c <= 'z') 315 || (c >= 'A' && c <= 'Z') 316 || c == '_' 317 || c == ':' 318 || (c >= '0' && c <= '9' && i > 0); 319 } 320 321 private static boolean isValidUtf8Char(int c) { 322 return (0 <= c && c < MIN_HIGH_SURROGATE) || (MAX_LOW_SURROGATE < c && c <= MAX_CODE_POINT); 323 } 324}