您的位置:首页 > 其它

获取网页上的所有QQ号码,并生成exel报表

2017-05-01 21:03 447 查看
需要的jar如下:



package jsoup.zr.com.utils;
/**
*
* @author LF
*
*/
public class Constant {

/**
* 网站链接地址ַ
*/
public static String URL = "http://bbs.tianya.cn/post-enterprise-758850-1.shtml";

/**
* 生成目标文件所存放的路径
* 注意:路径请用"/",勿用"\"
*/
public static String PATH = "/Users/apple/Desktop/";

/**
* exel文件的命名
*/
public static String NAME = "QQ";

/**
* 正则表达式(第一位1-9之间的数字,第二位0-9之间的数字,数字范围4-14)
*/
public static String QQREGEX = "[1-9][0-9]{4,14}";

/**
* 正则表达式(QQ、微信号、手机)
*/
public static String QQ_WEIXIN_PHONE = "^[a-zA-Z0-9_-]{5,19}$";
}


package jsoup.zr.com.utils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

/**
*
* @author LF
*
*/
public class ParseHTML {

public static void main(String[] args) {
List<String> list = getInfoByURL(Constant.URL,Constant.QQREGEX);
ProduceExel.writeByList(list, "QQ号码");

}

/**
* 获取网页内容
*/
public static List<String> getInfoByURL(String urlString,String regexString){
List<String> list = new ArrayList<String>();
URL url = null;
try {
url = new URL(urlString);
} catch (MalformedURLException e) {
e.printStackTrace();
}
// 打开连接
URLConnection connection = null;
try {
connection = url.openConnection();
} catch (IOException e) {
e.printStackTrace();
}
BufferedReader bReader = null;
try {
// 读取输入流
bReader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
} catch (IOException e) {
e.printStackTrace();
}

Pattern pattern = Pattern.compile(regexString);
String line = null;
try {
while ((line = bReader.readLine()) != null) {
Matcher matcher = pattern.matcher(line);
// 如果匹配,存起来
while (matcher.find()) {
String str = matcher.group();
list.add(str);
}
}
} catch (IOException e) {
e.printStackTrace();
}

return list;
}

}


package jsoup.zr.com.utils;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
/**
*
* @author LF
*
*/
public class ProduceExel {

/**
* 通过集合list生成exel报表
* @param list 集合
* @param lineName exel首行的名称
* @throws IOException 异常
*/
public static void writeByList(List<String> list,String lineName){
System.out.println(list.size());
// 如果没有数据,不做处理
if (list==null || list.size()==0) {
return;
}
String path = Constant.PATH+Constant.NAME+".xls";
System.out.println(path);
OutputStream outputStream = null;
try {
outputStream = new FileOutputStream(new File(path));
} catch (FileNotFoundException e) {
e.printStackTrace();
}

// 初始化一个HSSFWorkbook对象
HSSFWorkbook workbook = new HSSFWorkbook();
// 创建一个表
HSSFSheet sheet = workbook.createSheet("lf");
// 创建行
HSSFRow row = sheet.createRow(0);
// 创建单元格
HSSFCell cell0 = row.createCell(0);
cell0.setCellValue(new HSSFRichTextString(lineName));
int i = 0;
// 遍历
for (String str : list) {
System.out.println("====="+str);
// 创建行
HSSFRow rowi = sheet.createRow(i+1);
// 创建单元格
HSSFCell celli = rowi.createCell(0);
// 单元格添加内容
celli.setCellValue(new HSSFRichTextString(str));
i++;
}
// 写(输出)
try {
workbook.write(outputStream);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 通过集合map生成exel报表
* @param map 集合
* @param lineName exel首行的名称
* @throws IOException 异常
*/
public static void write(Map<String, String> map,String lineName){
// 如果没有数据,不做处理
if (map==null || map.size()==0) {
return;
}
String path = Constant.PATH+Constant.NAME+".xls";
System.out.println(path);
OutputStream outputStream = null;
try {
outputStream = new FileOutputStream(new File(path));
} catch (FileNotFoundException e) {
e.printStackTrace();
}

// 初始化一个HSSFWorkbook对象
HSSFWorkbook workbook = new HSSFWorkbook();
// 创建一个表
HSSFSheet sheet = workbook.createSheet("lf");
// 创建行
HSSFRow row = sheet.createRow(0);
// 创建单元格
HSSFCell cell0 = row.createCell(0);
cell0.setCellValue(new HSSFRichTextString(lineName));
int i = 0;
// 遍历
for (String str : map.values()) {
// 创建行
HSSFRow rowi = sheet.createRow(i+1);
// 创建单元格
HSSFCell celli = rowi.createCell(0);
// 单元格添加内容
celli.setCellValue(new HSSFRichTextString(str));
i++;
}
// 写(输出)
try {
workbook.write(outputStream);
} catch (IOException e) {
e.printStackTrace();
}
}

}


package jsoup.zr.com.utils;
/**
*
* @author LF
*
*/
public class Verify {

/**
* 验证是否是QQ号码
* @param QQNumber
* @return
*/
public static boolean verifyQQNumber(String QQNumber){
System.out.println(QQNumber.matches(Constant.QQREGEX));
return QQNumber.matches(Constant.QQREGEX);
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: