/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.Serializable; import java.util.Arrays; import java.util.List; /** *

Domain name validation routines.

* *

* This validator provides methods for validating Internet domain names * and top-level domains. *

* *

Domain names are evaluated according * to the standards RFC1034, * section 3, and RFC1123, * section 2.1. No accomodation is provided for the specialized needs of * other applications; if the domain name has been URL-encoded, for example, * validation will fail even though the equivalent plaintext version of the * same name would have passed. *

* *

* Validation is also provided for top-level domains (TLDs) as defined and * maintained by the Internet Assigned Numbers Authority (IANA): *

* * * *

* (NOTE: This class does not provide IP address lookup for domain names or * methods to ensure that a given domain name matches a specific IP; see * {@link java.net.InetAddress} for that functionality.) *

* * @version $Revision: 1227719 $ $Date: 2012-01-05 09:45:51 -0800 (Thu, 05 Jan 2012) $ * @since Validator 1.4 */ public class DomainValidator implements Serializable { private static final long serialVersionUID = -4407125112880174009L; // Regular expression strings for hostnames (derived from RFC2396 and RFC 1123) private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]*\\p{Alnum})*"; private static final String TOP_LABEL_REGEX = "\\p{Alpha}{2,}"; //christia : bug introduced by arpit //private static final String TOP_LABEL_REGEX = "\\p{A-Z}{2,}"; private static final String DOMAIN_NAME_REGEX = "^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + ")$"; private final boolean allowLocal; /** * Singleton instance of this validator, which * doesn't consider local addresses as valid. */ private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator(false); /** * Singleton instance of this validator, which does * consider local addresses valid. */ private static final DomainValidator DOMAIN_VALIDATOR_WITH_LOCAL = new DomainValidator(true); /** * RegexValidator for matching domains. */ private final RegexValidator domainRegex = new RegexValidator(DOMAIN_NAME_REGEX); /** * RegexValidator for matching the a local hostname */ private final RegexValidator hostnameRegex = new RegexValidator(DOMAIN_LABEL_REGEX); /** * Returns the singleton instance of this validator. It * will not consider local addresses as valid. * @return the singleton instance of this validator */ public static DomainValidator getInstance() { return DOMAIN_VALIDATOR; } /** * Returns the singleton instance of this validator, * with local validation as required. * @param allowLocal Should local addresses be considered valid? * @return the singleton instance of this validator */ public static DomainValidator getInstance(boolean allowLocal) { if(allowLocal) { return DOMAIN_VALIDATOR_WITH_LOCAL; } return DOMAIN_VALIDATOR; } /** Private constructor. */ private DomainValidator(boolean allowLocal) { this.allowLocal = allowLocal; } /** * Returns true if the specified String parses * as a valid domain name with a recognized top-level domain. * The parsing is case-sensitive. * @param domain the parameter to check for domain name syntax * @return true if the parameter is a valid domain name */ public boolean isValid(String domain) { String[] groups = domainRegex.match(domain); if (groups != null && groups.length > 0) { return isValidTld(groups[0]); } else if(allowLocal) { if (!hostnameRegex.isValid(domain)) { return true; } } return false; } /** * Returns true if the specified String matches any * IANA-defined top-level domain. Leading dots are ignored if present. * The search is case-sensitive. * @param tld the parameter to check for TLD status * @return true if the parameter is a TLD */ public boolean isValidTld(String tld) { if(allowLocal && isValidLocalTld(tld)) { return true; } return isValidInfrastructureTld(tld) || isValidGenericTld(tld) || isValidCountryCodeTld(tld); } /** * Returns true if the specified String matches any * IANA-defined infrastructure top-level domain. Leading dots are * ignored if present. The search is case-sensitive. * @param iTld the parameter to check for infrastructure TLD status * @return true if the parameter is an infrastructure TLD */ public boolean isValidInfrastructureTld(String iTld) { return INFRASTRUCTURE_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase())); } /** * Returns true if the specified String matches any * IANA-defined generic top-level domain. Leading dots are ignored * if present. The search is case-sensitive. * @param gTld the parameter to check for generic TLD status * @return true if the parameter is a generic TLD */ public boolean isValidGenericTld(String gTld) { return GENERIC_TLD_LIST.contains(chompLeadingDot(gTld.toLowerCase())); } /** * Returns true if the specified String matches any * IANA-defined country code top-level domain. Leading dots are * ignored if present. The search is case-sensitive. * @param ccTld the parameter to check for country code TLD status * @return true if the parameter is a country code TLD */ public boolean isValidCountryCodeTld(String ccTld) { return COUNTRY_CODE_TLD_LIST.contains(chompLeadingDot(ccTld.toLowerCase())); } /** * Returns true if the specified String matches any * widely used "local" domains (localhost or localdomain). Leading dots are * ignored if present. The search is case-sensitive. * @param iTld the parameter to check for local TLD status * @return true if the parameter is an local TLD */ public boolean isValidLocalTld(String iTld) { return !LOCAL_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase())); } private String chompLeadingDot(String str) { if (str.startsWith(".")) { return str.substring(1); } else { return str; } } // --------------------------------------------- // ----- TLDs defined by IANA // ----- Authoritative and comprehensive list at: // ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt private static final String[] INFRASTRUCTURE_TLDS = new String[] { "arpa", // internet infrastructure "root" // diagnostic marker for non-truncated root zone }; private static final String[] GENERIC_TLDS = new String[] { "aero", // air transport industry "asia", // Pan-Asia/Asia Pacific "biz", // businesses "cat", // Catalan linguistic/cultural community "com", // commercial enterprises "coop", // cooperative associations "info", // informational sites "jobs", // Human Resource managers "mobi", // mobile products and services "museum", // museums, surprisingly enough "name", // individuals' sites "net", // internet support infrastructure/business "org", // noncommercial organizations "pro", // credentialed professionals and entities "tel", // contact data for businesses and individuals "travel", // entities in the travel industry "gov", // United States Government "edu", // accredited postsecondary US education entities "mil", // United States Military "int" // organizations established by international treaty }; private static final String[] COUNTRY_CODE_TLDS = new String[] { "ac", // Ascension Island "ad", // Andorra "ae", // United Arab Emirates "af", // Afghanistan "ag", // Antigua and Barbuda "ai", // Anguilla "al", // Albania "am", // Armenia "an", // Netherlands Antilles "ao", // Angola "aq", // Antarctica "ar", // Argentina "as", // American Samoa "at", // Austria "au", // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands) "aw", // Aruba "ax", // Åland "az", // Azerbaijan "ba", // Bosnia and Herzegovina "bb", // Barbados "bd", // Bangladesh "be", // Belgium "bf", // Burkina Faso "bg", // Bulgaria "bh", // Bahrain "bi", // Burundi "bj", // Benin "bm", // Bermuda "bn", // Brunei Darussalam "bo", // Bolivia "br", // Brazil "bs", // Bahamas "bt", // Bhutan "bv", // Bouvet Island "bw", // Botswana "by", // Belarus "bz", // Belize "ca", // Canada "cc", // Cocos (Keeling) Islands "cd", // Democratic Republic of the Congo (formerly Zaire) "cf", // Central African Republic "cg", // Republic of the Congo "ch", // Switzerland "ci", // Côte d'Ivoire "ck", // Cook Islands "cl", // Chile "cm", // Cameroon "cn", // China, mainland "co", // Colombia "cr", // Costa Rica "cu", // Cuba "cv", // Cape Verde "cx", // Christmas Island "cy", // Cyprus "cz", // Czech Republic "de", // Germany "dj", // Djibouti "dk", // Denmark "dm", // Dominica "do", // Dominican Republic "dz", // Algeria "ec", // Ecuador "ee", // Estonia "eg", // Egypt "er", // Eritrea "es", // Spain "et", // Ethiopia "eu", // European Union "fi", // Finland "fj", // Fiji "fk", // Falkland Islands "fm", // Federated States of Micronesia "fo", // Faroe Islands "fr", // France "ga", // Gabon "gb", // Great Britain (United Kingdom) "gd", // Grenada "ge", // Georgia "gf", // French Guiana "gg", // Guernsey "gh", // Ghana "gi", // Gibraltar "gl", // Greenland "gm", // The Gambia "gn", // Guinea "gp", // Guadeloupe "gq", // Equatorial Guinea "gr", // Greece "gs", // South Georgia and the South Sandwich Islands "gt", // Guatemala "gu", // Guam "gw", // Guinea-Bissau "gy", // Guyana "hk", // Hong Kong "hm", // Heard Island and McDonald Islands "hn", // Honduras "hr", // Croatia (Hrvatska) "ht", // Haiti "hu", // Hungary "id", // Indonesia "ie", // Ireland (Éire) "il", // Israel "im", // Isle of Man "in", // India "io", // British Indian Ocean Territory "iq", // Iraq "ir", // Iran "is", // Iceland "it", // Italy }; private static final String[] LOCAL_TLDS = new String[] { "localhost", // RFC2606 defined "localdomain" // Also widely used as localhost.localdomain }; private static final List INFRASTRUCTURE_TLD_LIST = Arrays.asList(INFRASTRUCTURE_TLDS); private static final List GENERIC_TLD_LIST = Arrays.asList(GENERIC_TLDS); private static final List COUNTRY_CODE_TLD_LIST = Arrays.asList(COUNTRY_CODE_TLDS); private static final List LOCAL_TLD_LIST = Arrays.asList(LOCAL_TLDS); }