您的位置:首页 > 编程语言 > Java开发

Java正则达式引起死循环导致服务器负载过高

2015-10-14 13:45 891 查看
今天例行top检查服务器的时候,忽然发现负载竟然到了30多!我勒个去啊!


进程16319的cpu负载到了198.3%,出现这么高的负载很有可能有死循环!

jstack打印堆栈,里面有大量的这样的线程在RUNNABLE:

"http-bio-8080-exec-17" daemon prio=10 tid=0x00007f21b06b5800 nid=0x43cd runnable [0x00007f21a4efa000]
java.lang.Thread.State: RUNNABLE
at java.util.regex.Pattern$Curly.match0(Pattern.java:4148)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4177)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.match(Pattern.java:4683)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4615)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$Ques.match(Pattern.java:4079)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4170)
at java.util.regex.Pattern$Curly.match(Pattern.java:4132)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4556)
at java.util.regex.Pattern$Loop.matchInit(Pattern.java:4702)
at java.util.regex.Pattern$Prolog.match(Pattern.java:4639)
at java.util.regex.Pattern$Begin.match(Pattern.java:3472)
at java.util.regex.Matcher.match(Matcher.java:1221)
at java.util.regex.Matcher.matches(Matcher.java:559)
这个是一个判断邮箱是否合法的方法,里面用了一个正则表达式:

/**
* 验证邮箱
*
* @param email
* @return
*/
public static boolean isEmail(final String email) {
final String str = "^([a-zA-Z0-9]*[-_]?[a-zA-Z0-9]+)*@([a-zA-Z0-9]*[-_]?[a-zA-Z0-9]+)+[\\.][A-Za-z]{2,3}([\\.][A-Za-z]{2})?$";
final Pattern p = Pattern.compile(str);
final Matcher m = p.matcher(email);
return m.matches();
}
应该是这个表达是有问题!

apache的commons-validator已经提供了验证邮箱的方法,就不要再自己造轮子:

EmailValidator.getInstance().isValid(email);
</pre>看些具体的实现:<p></p><p></p><pre name="code" class="java">public boolean isValid(String email) {
if (email == null) {
return false;
}

if (email.endsWith(".")) { // check this first - it's cheap!
return false;
}

// Check the whole email address structure
Matcher emailMatcher = EMAIL_PATTERN.matcher(email);
if (!emailMatcher.matches()) {
return false;
}

if (!isValidUser(emailMatcher.group(1))) {
return false;
}

if (!isValidDomain(emailMatcher.group(2))) {
return false;
}

return true;
}

private static final Pattern EMAIL_PATTERN = Pattern.compile(EMAIL_REGEX);
private static final String EMAIL_REGEX = "^\\s*?(.+)@(.+?)\\s*$";

首先用EMAIL_PATTERN做第一次检验,检验规则是:@前面至少一个字符,@后面至少一个字符

然后分别交验@之前的部分和@之后的部分:

protected boolean isValidUser(String user) {
return USER_PATTERN.matcher(user).matches();
}
private static final Pattern USER_PATTERN = Pattern.compile(USER_REGEX);
private static final String USER_REGEX = "^\\s*" + WORD + "(\\." + WORD + ")*$";
private static final String WORD = "((" + VALID_CHARS + "|')+|" + QUOTED_USER + ")";
private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]";
private static final String SPECIAL_CHARS = "\\p{Cntrl}\\(\\)<>@,;:'\\\\\\\"\\.\\[\\]";
private static final String QUOTED_USER = "(\"[^\"]*\")";
卧槽,太复杂了,先不看了!

protected boolean isValidDomain(String domain) {
// see if domain is an IP address in brackets
Matcher ipDomainMatcher = IP_DOMAIN_PATTERN.matcher(domain);

if (ipDomainMatcher.matches()) {
InetAddressValidator inetAddressValidator =
InetAddressValidator.getInstance();
return inetAddressValidator.isValid(ipDomainMatcher.group(1));
}
// Domain is symbolic name
DomainValidator domainValidator =
DomainValidator.getInstance(allowLocal);
return domainValidator.isValid(domain) ||
domainValidator.isValidTld(domain);
}
private static final Pattern IP_DOMAIN_PATTERN = Pattern.compile(IP_DOMAIN_REGEX);
private static final String IP_DOMAIN_REGEX = "^\\[(.*)\\]$";


分别调用了InetAddressValidator和DomainValidator来验证ip或者是domain。

基本的思想是:先用正则简单验证格式,再用代码验证是否符合逻辑,比如:ip是先验证是否是3个数字一组三个数字一组的四组数字,然后判断是否在0-255之间。

private static final String IPV4_REGEX =
"^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$";

public boolean isValidInet4Address(String inet4Address) {
// verify that address conforms to generic IPv4 format
String[] groups = ipv4Validator.match(inet4Address);

if (groups == null) {
return false;
}

// verify that address subgroups are legal
for (int i = 0; i <= 3; i++) {
String ipSegment = groups[i];
if (ipSegment == null || ipSegment.length() == 0) {
return false;
}

int iIpSegment = 0;

try {
iIpSegment = Integer.parseInt(ipSegment);
} catch(NumberFormatException e) {
return false;
}

if (iIpSegment > 255) {
return false;
}

if (ipSegment.length() > 1 && ipSegment.startsWith("0")) {
return false;
}

}

return true;
}


这说明一个问题:正则虽好,切莫乱用!正则还有一个问题就是效率太低,切忌用它来做业务逻辑验证!

上网查了下,这还真不是个例:

http://blog.csdn.net/shixing_11/article/details/5997567

貌似是JDK的一个bug。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: