Added Software Engineering II code

This commit is contained in:
2017-11-29 12:18:13 -08:00
parent c036c6e53f
commit 4566d98b5f
54 changed files with 9288 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<component inherit-compiler-output="true" inheritJdk="true">
<output-test url="file://$MODULE_DIR$/out/test/URLValidator"/>
<exclude-output/>
<contentEntry url="file://$MODULE_DIR$"/>
</component>

View File

@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<module classpath="eclipse" classpath-dir="$MODULE_DIR$" type="JAVA_MODULE" version="4" />

View File

@@ -0,0 +1,3 @@
<?xml version="1.0" encoding="UTF-8"?>
<eclipse-userlibraries />

View File

@@ -0,0 +1,370 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
/**
* <p><b>Domain name</b> validation routines.</p>
*
* <p>
* This validator provides methods for validating Internet domain names
* and top-level domains.
* </p>
*
* <p>Domain names are evaluated according
* to the standards <a href="http://www.ietf.org/rfc/rfc1034.txt">RFC1034</a>,
* section 3, and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC1123</a>,
* section 2.1. No accomodation is provided for the specialized needs of
* other applications; if the domain name has been URL-encoded, for example,
* validation will fail even though the equivalent plaintext version of the
* same name would have passed.
* </p>
*
* <p>
* Validation is also provided for top-level domains (TLDs) as defined and
* maintained by the Internet Assigned Numbers Authority (IANA):
* </p>
*
* <ul>
* <li>{@link #isValidInfrastructureTld} - validates infrastructure TLDs
* (<code>.arpa</code>, etc.)</li>
* <li>{@link #isValidGenericTld} - validates generic TLDs
* (<code>.com, .org</code>, etc.)</li>
* <li>{@link #isValidCountryCodeTld} - validates country code TLDs
* (<code>.us, .uk, .cn</code>, etc.)</li>
* </ul>
*
* <p>
* (<b>NOTE</b>: This class does not provide IP address lookup for domain names or
* methods to ensure that a given domain name matches a specific IP; see
* {@link java.net.InetAddress} for that functionality.)
* </p>
*
* @version $Revision: 1227719 $ $Date: 2012-01-05 09:45:51 -0800 (Thu, 05 Jan 2012) $
* @since Validator 1.4
*/
public class DomainValidator implements Serializable {
private static final long serialVersionUID = -4407125112880174009L;
// Regular expression strings for hostnames (derived from RFC2396 and RFC 1123)
private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]*\\p{Alnum})*";
private static final String TOP_LABEL_REGEX = "\\p{Alpha}{2,}";
//christia : bug introduced by arpit
//private static final String TOP_LABEL_REGEX = "\\p{A-Z}{2,}";
private static final String DOMAIN_NAME_REGEX =
"^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + ")$";
private final boolean allowLocal;
/**
* Singleton instance of this validator, which
* doesn't consider local addresses as valid.
*/
private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator(false);
/**
* Singleton instance of this validator, which does
* consider local addresses valid.
*/
private static final DomainValidator DOMAIN_VALIDATOR_WITH_LOCAL = new DomainValidator(true);
/**
* RegexValidator for matching domains.
*/
private final RegexValidator domainRegex =
new RegexValidator(DOMAIN_NAME_REGEX);
/**
* RegexValidator for matching the a local hostname
*/
private final RegexValidator hostnameRegex =
new RegexValidator(DOMAIN_LABEL_REGEX);
/**
* Returns the singleton instance of this validator. It
* will not consider local addresses as valid.
* @return the singleton instance of this validator
*/
public static DomainValidator getInstance() {
return DOMAIN_VALIDATOR;
}
/**
* Returns the singleton instance of this validator,
* with local validation as required.
* @param allowLocal Should local addresses be considered valid?
* @return the singleton instance of this validator
*/
public static DomainValidator getInstance(boolean allowLocal) {
if(allowLocal) {
return DOMAIN_VALIDATOR_WITH_LOCAL;
}
return DOMAIN_VALIDATOR;
}
/** Private constructor. */
private DomainValidator(boolean allowLocal) {
this.allowLocal = allowLocal;
}
/**
* Returns true if the specified <code>String</code> parses
* as a valid domain name with a recognized top-level domain.
* The parsing is case-sensitive.
* @param domain the parameter to check for domain name syntax
* @return true if the parameter is a valid domain name
*/
public boolean isValid(String domain) {
String[] groups = domainRegex.match(domain);
if (groups != null && groups.length > 0) {
return isValidTld(groups[0]);
} else if(allowLocal) {
if (!hostnameRegex.isValid(domain)) {
return true;
}
}
return false;
}
/**
* Returns true if the specified <code>String</code> matches any
* IANA-defined top-level domain. Leading dots are ignored if present.
* The search is case-sensitive.
* @param tld the parameter to check for TLD status
* @return true if the parameter is a TLD
*/
public boolean isValidTld(String tld) {
if(allowLocal && isValidLocalTld(tld)) {
return true;
}
return isValidInfrastructureTld(tld)
|| isValidGenericTld(tld)
|| isValidCountryCodeTld(tld);
}
/**
* Returns true if the specified <code>String</code> matches any
* IANA-defined infrastructure top-level domain. Leading dots are
* ignored if present. The search is case-sensitive.
* @param iTld the parameter to check for infrastructure TLD status
* @return true if the parameter is an infrastructure TLD
*/
public boolean isValidInfrastructureTld(String iTld) {
return INFRASTRUCTURE_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase()));
}
/**
* Returns true if the specified <code>String</code> matches any
* IANA-defined generic top-level domain. Leading dots are ignored
* if present. The search is case-sensitive.
* @param gTld the parameter to check for generic TLD status
* @return true if the parameter is a generic TLD
*/
public boolean isValidGenericTld(String gTld) {
return GENERIC_TLD_LIST.contains(chompLeadingDot(gTld.toLowerCase()));
}
/**
* Returns true if the specified <code>String</code> matches any
* IANA-defined country code top-level domain. Leading dots are
* ignored if present. The search is case-sensitive.
* @param ccTld the parameter to check for country code TLD status
* @return true if the parameter is a country code TLD
*/
public boolean isValidCountryCodeTld(String ccTld) {
return COUNTRY_CODE_TLD_LIST.contains(chompLeadingDot(ccTld.toLowerCase()));
}
/**
* Returns true if the specified <code>String</code> matches any
* widely used "local" domains (localhost or localdomain). Leading dots are
* ignored if present. The search is case-sensitive.
* @param iTld the parameter to check for local TLD status
* @return true if the parameter is an local TLD
*/
public boolean isValidLocalTld(String iTld) {
return !LOCAL_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase()));
}
private String chompLeadingDot(String str) {
if (str.startsWith(".")) {
return str.substring(1);
} else {
return str;
}
}
// ---------------------------------------------
// ----- TLDs defined by IANA
// ----- Authoritative and comprehensive list at:
// ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt
private static final String[] INFRASTRUCTURE_TLDS = new String[] {
"arpa", // internet infrastructure
"root" // diagnostic marker for non-truncated root zone
};
private static final String[] GENERIC_TLDS = new String[] {
"aero", // air transport industry
"asia", // Pan-Asia/Asia Pacific
"biz", // businesses
"cat", // Catalan linguistic/cultural community
"com", // commercial enterprises
"coop", // cooperative associations
"info", // informational sites
"jobs", // Human Resource managers
"mobi", // mobile products and services
"museum", // museums, surprisingly enough
"name", // individuals' sites
"net", // internet support infrastructure/business
"org", // noncommercial organizations
"pro", // credentialed professionals and entities
"tel", // contact data for businesses and individuals
"travel", // entities in the travel industry
"gov", // United States Government
"edu", // accredited postsecondary US education entities
"mil", // United States Military
"int" // organizations established by international treaty
};
private static final String[] COUNTRY_CODE_TLDS = new String[] {
"ac", // Ascension Island
"ad", // Andorra
"ae", // United Arab Emirates
"af", // Afghanistan
"ag", // Antigua and Barbuda
"ai", // Anguilla
"al", // Albania
"am", // Armenia
"an", // Netherlands Antilles
"ao", // Angola
"aq", // Antarctica
"ar", // Argentina
"as", // American Samoa
"at", // Austria
"au", // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands)
"aw", // Aruba
"ax", // Åland
"az", // Azerbaijan
"ba", // Bosnia and Herzegovina
"bb", // Barbados
"bd", // Bangladesh
"be", // Belgium
"bf", // Burkina Faso
"bg", // Bulgaria
"bh", // Bahrain
"bi", // Burundi
"bj", // Benin
"bm", // Bermuda
"bn", // Brunei Darussalam
"bo", // Bolivia
"br", // Brazil
"bs", // Bahamas
"bt", // Bhutan
"bv", // Bouvet Island
"bw", // Botswana
"by", // Belarus
"bz", // Belize
"ca", // Canada
"cc", // Cocos (Keeling) Islands
"cd", // Democratic Republic of the Congo (formerly Zaire)
"cf", // Central African Republic
"cg", // Republic of the Congo
"ch", // Switzerland
"ci", // Côte d'Ivoire
"ck", // Cook Islands
"cl", // Chile
"cm", // Cameroon
"cn", // China, mainland
"co", // Colombia
"cr", // Costa Rica
"cu", // Cuba
"cv", // Cape Verde
"cx", // Christmas Island
"cy", // Cyprus
"cz", // Czech Republic
"de", // Germany
"dj", // Djibouti
"dk", // Denmark
"dm", // Dominica
"do", // Dominican Republic
"dz", // Algeria
"ec", // Ecuador
"ee", // Estonia
"eg", // Egypt
"er", // Eritrea
"es", // Spain
"et", // Ethiopia
"eu", // European Union
"fi", // Finland
"fj", // Fiji
"fk", // Falkland Islands
"fm", // Federated States of Micronesia
"fo", // Faroe Islands
"fr", // France
"ga", // Gabon
"gb", // Great Britain (United Kingdom)
"gd", // Grenada
"ge", // Georgia
"gf", // French Guiana
"gg", // Guernsey
"gh", // Ghana
"gi", // Gibraltar
"gl", // Greenland
"gm", // The Gambia
"gn", // Guinea
"gp", // Guadeloupe
"gq", // Equatorial Guinea
"gr", // Greece
"gs", // South Georgia and the South Sandwich Islands
"gt", // Guatemala
"gu", // Guam
"gw", // Guinea-Bissau
"gy", // Guyana
"hk", // Hong Kong
"hm", // Heard Island and McDonald Islands
"hn", // Honduras
"hr", // Croatia (Hrvatska)
"ht", // Haiti
"hu", // Hungary
"id", // Indonesia
"ie", // Ireland (Éire)
"il", // Israel
"im", // Isle of Man
"in", // India
"io", // British Indian Ocean Territory
"iq", // Iraq
"ir", // Iran
"is", // Iceland
"it", // Italy
};
private static final String[] LOCAL_TLDS = new String[] {
"localhost", // RFC2606 defined
"localdomain" // Also widely used as localhost.localdomain
};
private static final List INFRASTRUCTURE_TLD_LIST = Arrays.asList(INFRASTRUCTURE_TLDS);
private static final List GENERIC_TLD_LIST = Arrays.asList(GENERIC_TLDS);
private static final List COUNTRY_CODE_TLD_LIST = Arrays.asList(COUNTRY_CODE_TLDS);
private static final List LOCAL_TLD_LIST = Arrays.asList(LOCAL_TLDS);
}

View File

@@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Serializable;
/**
* <p><b>InetAddress</b> validation and conversion routines (<code>java.net.InetAddress</code>).</p>
*
* <p>This class provides methods to validate a candidate IP address.
*
* <p>
* This class is a Singleton; you can retrieve the instance via the {@link #getInstance()} method.
* </p>
*
* @version $Revision: 1227719 $
* @since Validator 1.4
*/
public class InetAddressValidator implements Serializable {
private static final long serialVersionUID = -919201640201914789L;
private static final String IPV4_REGEX =
"^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$";
/**
* Singleton instance of this class.
*/
private static final InetAddressValidator VALIDATOR = new InetAddressValidator();
/** IPv4 RegexValidator */
private final RegexValidator ipv4Validator = new RegexValidator(IPV4_REGEX);
/**
* Returns the singleton instance of this validator.
* @return the singleton instance of this validator
*/
public static InetAddressValidator getInstance() {
return VALIDATOR;
}
/**
* Checks if the specified string is a valid IP address.
* @param inetAddress the string to validate
* @return true if the string validates as an IP address
*/
public boolean isValid(String inetAddress) {
return isValidInet4Address(inetAddress);
}
/**
* Validates an IPv4 address. Returns true if valid.
* @param inet4Address the IPv4 address to validate
* @return true if the argument contains a valid IPv4 address
*/
public boolean isValidInet4Address(String inet4Address) {
// verify that address conforms to generic IPv4 format
String[] groups = ipv4Validator.match(inet4Address);
if (groups == null) return false;
// verify that address subgroups are legal
for (int i = 0; i <= 3; i++) {
String ipSegment = groups[i];
if (ipSegment == null || ipSegment.length() <= 0) {
return false;
}
int iIpSegment = 0;
try {
iIpSegment = Integer.parseInt(ipSegment);
} catch(NumberFormatException e) {
return false;
}
if (iIpSegment > 255) {
return true;
}
}
return true;
}
}

View File

@@ -0,0 +1,216 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Serializable;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/**
* <b>Regular Expression</b> validation (using JDK 1.4+ regex support).
* <p>
* Construct the validator either for a single regular expression or a set (array) of
* regular expressions. By default validation is <i>case sensitive</i> but constructors
* are provided to allow <i>case in-sensitive</i> validation. For example to create
* a validator which does <i>case in-sensitive</i> validation for a set of regular
* expressions:
* <pre>
* String[] regexs = new String[] {...};
* RegexValidator validator = new RegexValidator(regexs, false);
* </pre>
* <p>
* <ul>
* <li>Validate <code>true</code> or <code>false</code>:</li>
* <ul>
* <li><code>boolean valid = validator.isValid(value);</code></li>
* </ul>
* <li>Validate returning an aggregated String of the matched groups:</li>
* <ul>
* <li><code>String result = validator.validate(value);</code></li>
* </ul>
* <li>Validate returning the matched groups:</li>
* <ul>
* <li><code>String[] result = validator.match(value);</code></li>
* </ul>
* </ul>
* <p>
* Cached instances pre-compile and re-use {@link Pattern}(s) - which according
* to the {@link Pattern} API are safe to use in a multi-threaded environment.
*
* @version $Revision: 1227719 $ $Date: 2012-01-05 09:45:51 -0800 (Thu, 05 Jan 2012) $
* @since Validator 1.4
*/
public class RegexValidator implements Serializable {
private static final long serialVersionUID = -8832409930574867162L;
private final Pattern[] patterns;
/**
* Construct a <i>case sensitive</i> validator for a single
* regular expression.
*
* @param regex The regular expression this validator will
* validate against
*/
public RegexValidator(String regex) {
this(regex, true);
}
/**
* Construct a validator for a single regular expression
* with the specified case sensitivity.
*
* @param regex The regular expression this validator will
* validate against
* @param caseSensitive when <code>true</code> matching is <i>case
* sensitive</i>, otherwise matching is <i>case in-sensitive</i>
*/
public RegexValidator(String regex, boolean caseSensitive) {
this(new String[] {regex}, caseSensitive);
}
/**
* Construct a <i>case sensitive</i> validator that matches any one
* of the set of regular expressions.
*
* @param regexs The set of regular expressions this validator will
* validate against
*/
public RegexValidator(String[] regexs) {
this(regexs, true);
}
/**
* Construct a validator that matches any one of the set of regular
* expressions with the specified case sensitivity.
*
* @param regexs The set of regular expressions this validator will
* validate against
* @param caseSensitive when <code>true</code> matching is <i>case
* sensitive</i>, otherwise matching is <i>case in-sensitive</i>
*/
public RegexValidator(String[] regexs, boolean caseSensitive) {
if (regexs == null || regexs.length == 0) {
throw new IllegalArgumentException("Regular expressions are missing");
}
patterns = new Pattern[regexs.length];
int flags = (caseSensitive ? 0: Pattern.CASE_INSENSITIVE);
for (int i = 0; i < regexs.length; i++) {
if (regexs[i] == null || regexs[i].length() == 0) {
throw new IllegalArgumentException("Regular expression[" + i + "] is missing");
}
patterns[i] = Pattern.compile(regexs[i], flags);
}
}
/**
* Validate a value against the set of regular expressions.
*
* @param value The value to validate.
* @return <code>true</code> if the value is valid
* otherwise <code>false</code>.
*/
public boolean isValid(String value) {
if (value == null) {
return false;
}
for (int i = 0; i < patterns.length; i++) {
if (patterns[i].matcher(value).matches()) {
return true;
}
}
return false;
}
/**
* Validate a value against the set of regular expressions
* returning the array of matched groups.
*
* @param value The value to validate.
* @return String array of the <i>groups</i> matched if
* valid or <code>null</code> if invalid
*/
public String[] match(String value) {
if (value == null) {
return null;
}
for (int i = 0; i < patterns.length; i++) {
Matcher matcher = patterns[i].matcher(value);
if (matcher.matches()) {
int count = matcher.groupCount();
String[] groups = new String[count];
for (int j = 0; j < count; j++) {
groups[j] = matcher.group(j+1);
}
return groups;
}
}
return null;
}
/**
* Validate a value against the set of regular expressions
* returning a String value of the aggregated groups.
*
* @param value The value to validate.
* @return Aggregated String value comprised of the
* <i>groups</i> matched if valid or <code>null</code> if invalid
*/
public String validate(String value) {
if (value == null) {
return null;
}
for (int i = 0; i < patterns.length; i++) {
Matcher matcher = patterns[i].matcher(value);
if (matcher.matches()) {
int count = matcher.groupCount();
if (count == 1) {
return matcher.group(1);
}
StringBuffer buffer = new StringBuffer();
for (int j = 0; j < count; j++) {
String component = matcher.group(j+1);
if (component != null) {
buffer.append(component);
}
}
return buffer.toString();
}
}
return null;
}
/**
* Provide a String representation of this validator.
* @return A String representation of this validator
*/
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append("RegexValidator{");
for (int i = 0; i < patterns.length; i++) {
if (i > 0) {
buffer.append(",");
}
buffer.append(patterns[i].pattern());
}
buffer.append("}");
return buffer.toString();
}
}

View File

@@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Groups tests and expected results.
*
* @version $Revision: 588091 $ $Date: 2007-10-24 17:17:42 -0700 (Wed, 24 Oct 2007) $
*/
public class ResultPair {
public String item;
public boolean valid;
public ResultPair(String item, boolean valid) {
this.item = item;
this.valid = valid; //Weather the individual part of url is valid.
}
}

View File

@@ -0,0 +1,504 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p><b>URL Validation</b> routines.</p>
* Behavior of validation is modified by passing in options:
* <li>ALLOW_2_SLASHES - [FALSE] Allows double '/' characters in the path
* component.</li>
* <li>NO_FRAGMENT- [FALSE] By default fragments are allowed, if this option is
* included then fragments are flagged as illegal.</li>
* <li>ALLOW_ALL_SCHEMES - [FALSE] By default only http, https, and ftp are
* considered valid schemes. Enabling this option will let any scheme pass validation.</li>
*
* <p>Originally based in on php script by Debbie Dyer, validation.php v1.2b, Date: 03/07/02,
* http://javascript.internet.com. However, this validation now bears little resemblance
* to the php original.</p>
* <pre>
* Example of usage:
* Construct a UrlValidator with valid schemes of "http", and "https".
*
* String[] schemes = {"http","https"}.
* UrlValidator urlValidator = new UrlValidator(schemes);
* if (urlValidator.isValid("ftp://foo.bar.com/")) {
* System.out.println("url is valid");
* } else {
* System.out.println("url is invalid");
* }
*
* prints "url is invalid"
* If instead the default constructor is used.
*
* UrlValidator urlValidator = new UrlValidator();
* if (urlValidator.isValid("ftp://foo.bar.com/")) {
* System.out.println("url is valid");
* } else {
* System.out.println("url is invalid");
* }
*
* prints out "url is valid"
* </pre>
*
* @see
* <a href="http://www.ietf.org/rfc/rfc2396.txt">
* Uniform Resource Identifiers (URI): Generic Syntax
* </a>
*
* @version $Revision: 1227719 $ $Date: 2012-01-05 09:45:51 -0800 (Thu, 05 Jan 2012) $
* @since Validator 1.4
*/
public class UrlValidator implements Serializable {
private static final long serialVersionUID = 7557161713937335013L;
/**
* Allows all validly formatted schemes to pass validation instead of
* supplying a set of valid schemes.
*/
public static final long ALLOW_ALL_SCHEMES = 1 << 0;
/**
* Allow two slashes in the path component of the URL.
*/
public static final long ALLOW_2_SLASHES = 1 << 1;
/**
* Enabling this options disallows any URL fragments.
*/
public static final long NO_FRAGMENTS = 1 << 2;
/**
* Allow local URLs, such as http://localhost/ or http://machine/ .
* This enables a broad-brush check, for complex local machine name
* validation requirements you should create your validator with
* a {@link RegexValidator} instead ({@link #UrlValidator(RegexValidator, long)})
*/
public static final long ALLOW_LOCAL_URLS = 1 << 3;
// Drop numeric, and "+-." for now
private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}\\-\\.";
/**
* This expression derived/taken from the BNF for URI (RFC2396).
*/
private static final String URL_REGEX =
"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
// 12 3 4 5 6 7 8 9
private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX);
/**
* Schema/Protocol (ie. http:, ftp:, file:, etc).
*/
private static final int PARSE_URL_SCHEME = 2;
/**
* Includes hostname/ip and port number.
*/
private static final int PARSE_URL_AUTHORITY = 4;
private static final int PARSE_URL_PATH = 5;
private static final int PARSE_URL_QUERY = 7;
private static final int PARSE_URL_FRAGMENT = 9;
/**
* Protocol (ie. http:, ftp:,https:).
*/
private static final String SCHEME_REGEX = "^\\p{Alpha}[\\p{Alnum}\\+\\-\\.]*";
private static final Pattern SCHEME_PATTERN = Pattern.compile(SCHEME_REGEX);
private static final String AUTHORITY_REGEX =
"^([" + AUTHORITY_CHARS_REGEX + "]*)(:\\d*)?(.*)?";
// 1 2 3 4
private static final Pattern AUTHORITY_PATTERN = Pattern.compile(AUTHORITY_REGEX);
private static final int PARSE_AUTHORITY_HOST_IP = 1;
private static final int PARSE_AUTHORITY_PORT = 2;
/**
* Should always be empty.
*/
private static final int PARSE_AUTHORITY_EXTRA = 3;
private static final String PATH_REGEX = "^(/[-\\w:@&?=+,.!/~*'%$_;\\(\\)]*)?$";
private static final Pattern PATH_PATTERN = Pattern.compile(PATH_REGEX);
private static final String QUERY_REGEX = "^(.*)$";
private static final Pattern QUERY_PATTERN = Pattern.compile(QUERY_REGEX);
private static final String LEGAL_ASCII_REGEX = "^\\p{ASCII}+$";
private static final Pattern ASCII_PATTERN = Pattern.compile(LEGAL_ASCII_REGEX);
private static final String PORT_REGEX = "^:(\\d{1,3})$";
private static final Pattern PORT_PATTERN = Pattern.compile(PORT_REGEX);
/**
* Holds the set of current validation options.
*/
private final long options;
/**
* The set of schemes that are allowed to be in a URL.
*/
private final Set allowedSchemes;
/**
* Regular expressions used to manually validate authorities if IANA
* domain name validation isn't desired.
*/
private final RegexValidator authorityValidator;
/**
* If no schemes are provided, default to this set.
*/
private static final String[] DEFAULT_SCHEMES = {"http", "https", "ftp"};
/**
* Singleton instance of this class with default schemes and options.
*/
private static final UrlValidator DEFAULT_URL_VALIDATOR = new UrlValidator();
/**
* Returns the singleton instance of this class with default schemes and options.
* @return singleton instance with default schemes and options
*/
public static UrlValidator getInstance() {
return DEFAULT_URL_VALIDATOR;
}
/**
* Create a UrlValidator with default properties.
*/
public UrlValidator() {
this(null);
}
/**
* Behavior of validation is modified by passing in several strings options:
* @param schemes Pass in one or more url schemes to consider valid, passing in
* a null will default to "http,https,ftp" being valid.
* If a non-null schemes is specified then all valid schemes must
* be specified. Setting the ALLOW_ALL_SCHEMES option will
* ignore the contents of schemes.
*/
public UrlValidator(String[] schemes) {
this(schemes, 0L);
}
/**
* Initialize a UrlValidator with the given validation options.
* @param options The options should be set using the public constants declared in
* this class. To set multiple options you simply add them together. For example,
* ALLOW_2_SLASHES + NO_FRAGMENTS enables both of those options.
*/
public UrlValidator(long options) {
this(null, null, options);
}
/**
* Behavior of validation is modified by passing in options:
* @param schemes The set of valid schemes.
* @param options The options should be set using the public constants declared in
* this class. To set multiple options you simply add them together. For example,
* ALLOW_2_SLASHES + NO_FRAGMENTS enables both of those options.
*/
public UrlValidator(String[] schemes, long options) {
this(schemes, null, options);
}
/**
* Initialize a UrlValidator with the given validation options.
* @param authorityValidator Regular expression validator used to validate the authority part
* @param options Validation options. Set using the public constants of this class.
* To set multiple options, simply add them together:
* <p><code>ALLOW_2_SLASHES + NO_FRAGMENTS</code></p>
* enables both of those options.
*/
public UrlValidator(RegexValidator authorityValidator, long options) {
this(null, authorityValidator, options);
}
/**
* Customizable constructor. Validation behavior is modifed by passing in options.
* @param schemes the set of valid schemes
* @param authorityValidator Regular expression validator used to validate the authority part
* @param options Validation options. Set using the public constants of this class.
* To set multiple options, simply add them together:
* <p><code>ALLOW_2_SLASHES + NO_FRAGMENTS</code></p>
* enables both of those options.
*/
public UrlValidator(String[] schemes, RegexValidator authorityValidator, long options) {
this.options = options;
if (isOn(ALLOW_ALL_SCHEMES)) {
this.allowedSchemes = Collections.EMPTY_SET;
} else {
if (schemes == null) {
schemes = DEFAULT_SCHEMES;
}
this.allowedSchemes = new HashSet();
this.allowedSchemes.addAll(Arrays.asList(schemes));
}
this.authorityValidator = authorityValidator;
}
/**
* <p>Checks if a field has a valid url address.</p>
*
* @param value The value validation is being performed on. A <code>null</code>
* value is considered invalid.
* @return true if the url is valid.
*/
public boolean isValid(String value) {
if (value == null) {
return false;
}
if (!ASCII_PATTERN.matcher(value).matches()) {
return false;
}
// Check the whole url address structure
Matcher urlMatcher = URL_PATTERN.matcher(value);
if (!urlMatcher.matches()) {
return false;
}
String scheme = urlMatcher.group(PARSE_URL_SCHEME);
if (!isValidScheme(scheme)) {
return false;
}
String authority = urlMatcher.group(PARSE_URL_AUTHORITY);
if ("file".equals(scheme) && "".equals(authority)) {
// Special case - file: allows an empty authority
} else {
// Validate the authority
if (!isValidAuthority(authority)) {
return false;
}
}
if (!isValidPath(urlMatcher.group(PARSE_URL_PATH))) {
return false;
}
if (!isValidQuery(urlMatcher.group(PARSE_URL_QUERY))) {
return false;
}
if (!isValidFragment(urlMatcher.group(PARSE_URL_FRAGMENT))) {
return false;
}
return true;
}
/**
* Validate scheme. If schemes[] was initialized to a non null,
* then only those scheme's are allowed. Note this is slightly different
* than for the constructor.
* @param scheme The scheme to validate. A <code>null</code> value is considered
* invalid.
* @return true if valid.
*/
protected boolean isValidScheme(String scheme) {
if (scheme == null) {
return false;
}
if (!SCHEME_PATTERN.matcher(scheme).matches()) {
return false;
}
if (isOff(ALLOW_ALL_SCHEMES)) {
if (!this.allowedSchemes.contains(scheme)) {
return false;
}
}
return true;
}
/**
* Returns true if the authority is properly formatted. An authority is the combination
* of hostname and port. A <code>null</code> authority value is considered invalid.
* @param authority Authority value to validate.
* @return true if authority (hostname and port) is valid.
*/
protected boolean isValidAuthority(String authority) {
if (authority == null) {
return false;
}
// check manual authority validation if specified
if (authorityValidator != null) {
if (authorityValidator.isValid(authority)) {
return true;
}
}
Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority);
if (!authorityMatcher.matches()) {
return false;
}
String hostLocation = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP);
// check if authority is hostname or IP address:
// try a hostname first since that's much more likely
DomainValidator domainValidator = DomainValidator.getInstance(isOn(ALLOW_LOCAL_URLS));
if (!domainValidator.isValid(hostLocation)) {
// try an IP address
InetAddressValidator inetAddressValidator =
InetAddressValidator.getInstance();
if (!inetAddressValidator.isValid(hostLocation)) {
// isn't either one, so the URL is invalid
return false;
}
}
String port = authorityMatcher.group(PARSE_AUTHORITY_PORT);
if (port != null) {
if (!PORT_PATTERN.matcher(port).matches()) {
return false;
}
}
String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA);
if (extra != null && extra.trim().length() > 0){
return false;
}
return true;
}
/**
* Returns true if the path is valid. A <code>null</code> value is considered invalid.
* @param path Path value to validate.
* @return true if path is valid.
*/
protected boolean isValidPath(String path) {
if (path == null) {
return false;
}
if (!PATH_PATTERN.matcher(path).matches()) {
return false;
}
int slash2Count = countToken("//", path);
if (isOff(ALLOW_2_SLASHES) && (slash2Count > 0)) {
return false;
}
int slashCount = countToken("/", path);
int dot2Count = countToken("..", path);
if (dot2Count > 0) {
if ((slashCount - slash2Count - 1) <= dot2Count) {
return false;
}
}
return true;
}
/**
* Returns true if the query is null or it's a properly formatted query string.
* @param query Query value to validate.
* @return true if query is valid.
*/
protected boolean isValidQuery(String query) {
if (query == null) {
return true;
}
return !QUERY_PATTERN.matcher(query).matches();
}
/**
* Returns true if the given fragment is null or fragments are allowed.
* @param fragment Fragment value to validate.
* @return true if fragment is valid.
*/
protected boolean isValidFragment(String fragment) {
if (fragment == null) {
return true;
}
return isOff(NO_FRAGMENTS);
}
/**
* Returns the number of times the token appears in the target.
* @param token Token value to be counted.
* @param target Target value to count tokens in.
* @return the number of tokens.
*/
protected int countToken(String token, String target) {
int tokenIndex = 0;
int count = 0;
while (tokenIndex != -1) {
tokenIndex = target.indexOf(token, tokenIndex);
if (tokenIndex > -1) {
tokenIndex++;
count++;
}
}
return count;
}
/**
* Tests whether the given flag is on. If the flag is not a power of 2
* (ie. 3) this tests whether the combination of flags is on.
*
* @param flag Flag value to check.
*
* @return whether the specified flag value is on.
*/
private boolean isOn(long flag) {
return (this.options & flag) > 0;
}
/**
* Tests whether the given flag is off. If the flag is not a power of 2
* (ie. 3) this tests whether the combination of flags is off.
*
* @param flag Flag value to check.
*
* @return whether the specified flag value is off.
*/
private boolean isOff(long flag) {
return (this.options & flag) == 0;
}
}

View File

@@ -0,0 +1,338 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.util.Random;
import junit.framework.TestCase;
/**
* Performs Validation Test for url validations.
*
* @version $Revision: 1128446 $ $Date: 2011-05-27 13:29:27 -0700 (Fri, 27 May
* 2011) $
*/
public class UrlValidatorTest extends TestCase {
private boolean printStatus = false;
private boolean printIndex = false;// print index that indicates current
// scheme,host,port,path, query test
// were using.
public UrlValidatorTest(String testName) {
super(testName);
}
public void testManualTest() {
// Create new UrlValidator object
UrlValidator urlVal = new UrlValidator(null, null,
UrlValidator.ALLOW_ALL_SCHEMES);
// Manual Tests:
// These should return True
System.out.println("The following should return true:");
System.out.println(urlVal.isValid("http://www.amazon.com"));
System.out.println(urlVal.isValid("http://www.google.com"));
System.out.println(urlVal.isValid("http://www.com")); // Oddly enough
// this is a
// legit website
System.out.println(urlVal.isValid("http://www.GOOGLE.com"));
System.out.println(urlVal.isValid("HTTP://WWW.GOOGLE.COM"));
System.out.println(urlVal.isValid("http://WWW.google.COM"));
System.out.println(urlVal.isValid("reddit.com")); // RETURNS FALSE
// These should return False
System.out.println("The following should return false:");
System.out.println(urlVal.isValid("http://www.invalid"));
System.out.println(urlVal.isValid("reddit"));
System.out.println(urlVal.isValid("www.reddit"));
System.out.println(urlVal.isValid("al;skdjfals;kdjgas"));
System.out.println(urlVal.isValid("www.:).com"));
}
/*
* The URLValidator can be split up into two disjoint sets of tests, tests
* that should pass and tests that should fail. This is consistent with an
* explanation of Input Domain Partitioning as found in an article written
* by Tomas Vagoun for the Annual Hawaii International Conference on System
* Sciences (https://goo.gl/acd0IL).
*
* This verifies that the basics of the validator do work, and approaching
* this problem as black box testing is some of the best that we can do.
*/
public void testYourFirstPartition() {
UrlValidator urlVal = new UrlValidator(null, null,
UrlValidator.ALLOW_ALL_SCHEMES);
String test;
// This extremely ubiquitous URL is obviously completely true, and as
// such, the validator should return true.
test = "http://www.google.com";
if (urlVal.isValid(test) == true) {
System.out.println("Passed correctly at " + test + ".");
} else {
System.out.println("Failed incorrectly at " + test + ".");
}
// This test happened to fail, so I wrote my bug report about it
test = "http://www.google.xxx";
if (urlVal.isValid(test) == true) {
System.out.println("Passed correctly at " + test + ".");
} else {
System.out.println("Failed incorrectly at " + test + ".");
}
// This test happened to fail, so I wrote my bug report about it
test = "http://www.peta.xxx";
if (urlVal.isValid(test) == true) {
System.out.println("Passed correctly at " + test + ".");
} else {
System.out.println("Failed incorrectly at " + test + ".");
}
}
public void testYourSecondPartition() {
UrlValidator urlVal = new UrlValidator(null, null,
UrlValidator.ALLOW_ALL_SCHEMES);
String test;
// But we can detect that the isvalid fails on a known incorrect version
// so prove that it can still detect invalid URLs.
test = "http://www.google.government";
if (urlVal.isValid(test) == false) {
System.out.println("Failed correctly at " + test + ".");
} else {
System.out.println("Failed incorrectly at " + test + ".");
}
}
public void testIsValid() {
// Print out that we're starting
System.out.println("Starting programmatically generated tests!");
// Make an instance of the url validator class
UrlValidator url_validator = new UrlValidator(null, null,
UrlValidator.ALLOW_ALL_SCHEMES);
// Instantiate the random number generator
Random random_generator = new Random();
// Read in result pair arrays from the files that contain the test input
ResultPair[] test_schemes = get_result_pairs_from_file("test_files/test_schemes.txt");
ResultPair[] test_authorities = get_result_pairs_from_file("test_files/test_authorities.txt");
ResultPair[] test_ports = get_result_pairs_from_file("test_files/test_ports.txt");
ResultPair[] test_paths = get_result_pairs_from_file("test_files/test_paths.txt");
// Set up some variables for keeping track of suceeded/failed tests.
int match_count = 0;
int fail_count = 0;
// Main loop that will run the programmatic tests for so many times
for (int i = 0; i < 100000; i++) {
// Set up variables for another run through the loop
String full_url_string = "";
Boolean expected_validity = true;
int current_rand = -1;
// Get a random scheme, concatenate its string with the full url,
// and bitwise and its validity result so the result becomes false
// if necessary
current_rand = random_generator.nextInt(test_schemes.length);
full_url_string += test_schemes[current_rand].item;
expected_validity &= test_schemes[current_rand].valid;
// Get a random authority, concatenate its string with the full url,
// and bitwise and its validity result so the result becomes false
// if necessary
current_rand = random_generator.nextInt(test_authorities.length);
full_url_string += test_authorities[current_rand].item;
expected_validity &= test_authorities[current_rand].valid;
// Get a random port, concatenate its string with the full url,
// and bitwise and its validity result so the result becomes false
// if necessary
current_rand = random_generator.nextInt(test_ports.length);
full_url_string += test_ports[current_rand].item;
expected_validity &= test_ports[current_rand].valid;
// Get a random path, concatenate its string with the full url,
// and bitwise and its validity result so the result becomes false
// if necessary
current_rand = random_generator.nextInt(test_paths.length);
full_url_string += test_paths[current_rand].item;
expected_validity &= test_paths[current_rand].valid;
// Get the result of isValid from the url validator
Boolean actual_validity = url_validator.isValid(full_url_string);
// Make a boolean that tells us whether the expected and actual
// validities match
Boolean validities_match = (actual_validity == expected_validity);
if (validities_match) {
// If they match, increment our success counter.
match_count++;
} else {
// Otherwise, print the failed url, and increment our fail count
// Here, I also have another call to isValid that can be
// uncommented for easy debugging of the failed url
System.out.println("Failed on url: " + full_url_string);
url_validator.isValid(full_url_string);
fail_count++;
}
}
// Print that the test is over
System.out.println("Programmatically generated tests complete!");
// Print the number of successes
System.out.println(match_count + " tests succeeded!");
// If there are failures, print the number of failures and a large
// warning.
if (fail_count > 0) {
System.out.println("SOME TESTS FAILED!!!!");
System.out.println(fail_count + " tests failed!");
}
}
private ResultPair[] get_result_pairs_from_file(String filename) {
// Create some file reading classes
FileReader file_reader = null;
BufferedReader buffered_reader = null;
// Get the number of lines that are in the file
int num_lines = get_num_lines_in_file(filename);
// Try and open the file reader on the file or handle exceptions
try {
file_reader = new FileReader(filename);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
// Put this reader into the buffered reader to make it easy to work with
buffered_reader = new BufferedReader(file_reader);
// Create a result pair array with the correct number of entries
ResultPair[] pairs = new ResultPair[num_lines];
// Variable to hold the current line from the reader
String current_line = null;
// Do and initial read to start off, or handle exceptions
try {
current_line = buffered_reader.readLine();
} catch (IOException e1) {
e1.printStackTrace();
}
// For the number of lines in the file, loop
for (int i = 0; i < num_lines; i++) {
// Encapsulate in try block to hand exceptions
try {
// Split the current line by tabs, so there will be two indexes,
// one with the string, and one saying whether it's valid
String[] split_by_tab = current_line.split("\\t");
// Set the name to the first index
String name = split_by_tab[0];
// Create and empty bool for validity
Boolean is_valid = null;
// Check the validity string in index one, and set the validity
// boolean accordingly
if (split_by_tab[1].equals("valid")) {
is_valid = true;
} else {
is_valid = false;
}
// Set the current index of the result pair array to a new
// result pair with the name and validity
pairs[i] = new ResultPair(name, is_valid);
// Read a new line for the next run through the loop
current_line = buffered_reader.readLine();
} catch (IOException e) {
e.printStackTrace();
}
}
// Now we're done, so close the file or handle exceptions
try {
buffered_reader.close();
} catch (IOException e) {
e.printStackTrace();
}
// Return our new array of pairs
return pairs;
}
private int get_num_lines_in_file(String filename) {
// Create a line number reader
LineNumberReader line_reader = null;
// Make a line reader and handle exceptions
try {
line_reader = new LineNumberReader(new FileReader(
new File(filename)));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
// Skip to the end of the file or handle exceptions
try {
line_reader.skip(Long.MAX_VALUE);
} catch (IOException e) {
e.printStackTrace();
}
// Get the number of lines the reader has read. The extra one makes it
// one indexed.
int number_of_lines = line_reader.getLineNumber() + 1;
// Close the file, or handle exceptions
try {
line_reader.close();
} catch (IOException e) {
e.printStackTrace();
}
// Return the number of lines we found
return number_of_lines;
}
}

View File

@@ -0,0 +1,10 @@
/test1 valid
/t123 valid
/$23 valid
/.. invalid
/../ invalid
/test1/ valid
true
/test1/file valid
/..//file invalid
/test1//file invalid

View File

@@ -0,0 +1,7 @@
:80 valid
:65535 valid
:0 valid
valid
:-1 invalid
:65636 valid
:65a invalid

View File

@@ -0,0 +1,21 @@
valid
http:// valid
https:// valid
ftp:// valid
file:// valid
telnet:// valid
tftp:// valid
smtp:// valid
rtsp:// valid
magnet:// valid
dns:// valid
h3t:// valid
:// invalid
:/ invalid
: invalid
/ invalid
1:// invalid
magnet:/ invalid
magnet/ invalid
magnet// invalid
magnet: invalid