您的位置:首页 > Web前端 > HTML

CSDN简易客户端Demo(解析HTML)

2016-03-24 09:19 615 查看
依然是从鸿洋大神的博客学习到的内容(http://blog.csdn.net/lmj623565791/article/details/26676137),由于是原博主14年的内容,有些东西自己做了修改更新,整理如下。

主要的难点就是HTML的解析。

先看最终实现效果:





主界面ViewPager+Fragment(ListView) 点击后跳转到另一个Activity。

直接来看代码

public class MainActivity extends AppCompatActivity implements ViewPager.OnPageChangeListener, View.OnClickListener {

ViewPager viewPager;
List<FragmentForViewPager> fragmentList;
MyFragmentAdapter myFragmentAdapter;
List<TextView> tagList;

@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
viewPager = (ViewPager) findViewById(R.id.id_view_pager);
fragmentList = new ArrayList<>();
/*使用newInstance(i)方式新建Fragment*/
for (int i = 0; i < 4; i++) {
FragmentForViewPager fragment = FragmentForViewPager.newInstance(i);
fragmentList.add(fragment);
}
myFragmentAdapter = new MyFragmentAdapter(getSupportFragmentManager(), fragmentList);
viewPager.setAdapter(myFragmentAdapter);
viewPager.addOnPageChangeListener(this);

int[] tagId = new int[]{R.id.id_0, R.id.id_1, R.id.id_2, R.id.id_3};
tagList = new ArrayList<>();
for (int i = 0; i < tagId.length; i++) {
TextView textView = (TextView) findViewById(tagId[i]);
textView.setOnClickListener(this);
textView.setTag(i);
tagList.add(textView);
}
/*初始化一开始显示的ViewPager页对应的tag高亮*/
tagList.get(viewPager.getCurrentItem()).setTextColor(Color.BLACK);

}

@Override
public void onPageScrolled(int position, float positionOffset, int positionOffsetPixels) {

}

@Override
public void onPageSelected(int position) {

restoreText();
tagList.get(position).setTextColor(Color.BLACK);
}

@Override
public void onPageScrollStateChanged(int state) {
}

@Override
public void onClick(View v) {
restoreText();
int num = (Integer) v.getTag();
tagList.get(num).setTextColor(Color.BLACK);
viewPager.setCurrentItem(num);
}

private void restoreText() {
for(TextView tag : tagList){
tag.setTextColor(Color.WHITE);
}
}
}


MainActivity算是常规的,在我之前的文章里(http://blog.csdn.net/yin_zihao/article/details/50843346)有比较详细的讲解了,这里不再赘述。唯一值得注意的是我们用.newInstance(int newsType)的方式创建自定义的Fragment来作为每个Fragment的标记。原因是官方不推荐直接传入参数实例化Fragment,这么做的好处网上也已经有详细的讲解了。

这里主要的关键内容都在我们自定义的Fragment里:

public class FragmentForViewPager extends Fragment implements AbsListView.OnScrollListener {
public static String typeKey = "newsType";
private List<NewsItem> newsItemList;
private List<NewsItem> downLoadItemList;
private NewsItemAdapter newsItemAdapter;
private NewsItemURl newsItemURL = new NewsItemURl();
private int page = 1;
private int newsType;
private int currentNum = 0;

public static FragmentForViewPager newInstance(int newsType) {
Bundle bundle = new Bundle();
bundle.putInt(typeKey, newsType);
FragmentForViewPager fragment = new FragmentForViewPager();
fragment.setArguments(bundle);
return fragment;
}

@Override
public View onCreateView(LayoutInflater inflater, ViewGroup container, Bundle savedInstanceState) {

View view = inflater.inflate(R.layout.fragment_layout, null);
newsType = getArguments().getInt(typeKey);
newsItemList = new ArrayList<>();
/*用来临时保存下载的NewsItem*/
downLoadItemList = new ArrayList<>();
ListView itemListView = (ListView) view.findViewById(R.id.id_item_list_view);
newsItemAdapter = new NewsItemAdapter(getContext(), R.layout.news_item_layout, newsItemList);
itemListView.setAdapter(newsItemAdapter);
itemListView.setOnScrollListener(this);
itemListView.setOnItemClickListener(new AdapterView.OnItemClickListener() {
@Override
public void onItemClick(AdapterView<?> parent, View view, int position, long id) {
NewsItem newsItem = newsItemList.get(position);
Intent intent = new Intent(getContext(), NewsActivity.class);
intent.putExtra("href", newsItem.getHref());
startActivity(intent);
}
});
/*开启线程加载ListView的内容*/
new NewsItemTask().execute(newsType);
return view;
}

@Override
public void onScrollStateChanged(AbsListView view, int scrollState) {

}

@Override
public void onScroll(AbsListView view, int firstVisibleItem, int visibleItemCount, int totalItemCount) {

/*当滑动到当前列表底部时开启线程加载下一页。currentNum用来避免多次执行*/
if ((firstVisibleItem + visibleItemCount == totalItemCount) && currentNum != totalItemCount) {
page++;
new NewsItemTask().execute(newsType);
currentNum = totalItemCount;
}
}

private class NewsItemTask extends AsyncTask<Integer, Void, Void> {

@Override
protected Void doInBackground(Integer... params) {
int newsType = params[0];
/*getNewsItemURL()方法会根据当前卡片的新闻类型和页数返回对应的网址*/
String urlString = newsItemURL.getNewsItemURL(newsType, page);

try {
downLoadItem(urlString);
} catch (IOException e) {
e.printStackTrace();
}
return null;
}

@Override
protected void onPostExecute(Void aVoid) {

newsItemList.addAll(downLoadItemList);
newsItemAdapter.notifyDataSetChanged();
}

private void downLoadItem(String url) throws IOException {
/*利用第三的Jsoup库解析HTML得到一个Document*/
Document document = Jsoup.connect(url).get();
Elements elements = document.getElementsByClass("unit");

downLoadItemList = new ArrayList<>();

for (Element element : elements) {
NewsItem newsItem = new NewsItem();

Element h1_Element = element.getElementsByTag("h1").first();
Element h1_a_Element = h1_Element.getElementsByTag("a").first();

Element dt_Element = element.getElementsByTag("dt").first();

String title = h1_a_Element.text();
newsItem.setTitle(title);
/*对应的文章链接*/
String href = h1_a_Element.attr("href");
newsItem.setHref(href);

try {
/*如果有图片*/
Element dt_a_Element = dt_Element.child(0);
Element imgElement = dt_a_Element.child(0);
String src = imgElement.attr("src");
newsItem.setImgUrl(src);
} catch (IndexOutOfBoundsException ignored) {
}

Element h4_Element = element.getElementsByTag("h4").first();
Element dateElement = h4_Element.getElementsByClass("ago").first();
String date = dateElement.text();
newsItem.setDate(date);

Element dl_Element = element.getElementsByTag("dl").first();
Element dd_Element = dl_Element.getElementsByTag("dd").first();
String content = dd_Element.text();
newsItem.setContent(content);

downLoadItemList.add(newsItem);
}
}
}
}


我们先用了一个自定义类中的方法返回我们需要的数据页网址,这个方法类如下:

public class NewsItemURl {
public static final String NEWS_LIST_URL = "http://www.csdn.net/headlines.html/";
public static final String NEWS_LIST_URL_YIDONG = "http://mobile.csdn.net/mobile/";
public static final String NEWS_LIST_URL_YANFA = "http://sd.csdn.net/sd/";
public static final String NEWS_LIST_URL_YUNJISUAN = "http://cloud.csdn.net/cloud/";
public static final String NEWS_LIST_URL_YEJIE = "http://news.csdn.net/news/";

public String getNewsItemURL(int newsType,int page){
String urlString = null;

switch (newsType){
case NewsType.YEJIE:
urlString = NEWS_LIST_URL_YEJIE;
break;
case NewsType.YIDONG:
urlString = NEWS_LIST_URL_YIDONG;
break;
case NewsType.YANFA:
urlString = NEWS_LIST_URL_YANFA;
break;
case NewsType.YUNJISUAN:
urlString = NEWS_LIST_URL_YUNJISUAN;
break;
default:
break;
}
urlString += page;

return urlString;
}
}


然后就根据具体的HTML解析我们想要得到的数据,观察网页的一下HTML源代码



可以看到每个名为“unit”的class包含了一个NewsItem的全部内容,我们遍历所有的unit用Jsoup给我们提供的API得到我们想要的所有内容存储到每个NewsItem中。每个NewsItem类代表一个新闻项,包含了标题,摘要,链接等信息。

下载完成后就把 newsItemList设置给Adapter进行列表显示。看一下Adapter:

public class NewsItemAdapter extends ArrayAdapter<NewsItem> {

private List<NewsItem> newsItemList;
private int resource;
private ImageLoader imageLoader = ImageLoader.getInstance();
private DisplayImageOptions options;

public NewsItemAdapter(Context context, int resource, List<NewsItem> objects) {
super(context, resource, objects);
this.newsItemList = objects;
this.resource = resource;

imageLoader.init(ImageLoaderConfiguration.createDefault(getContext()));
options = new DisplayImageOptions.Builder().showImageForEmptyUri(R.drawable.ic_launcher)
.showImageOnFail(R.drawable.ic_launcher).build();
}

@Override
public View getView(int position, View convertView, ViewGroup parent) {
View view;
ViewHolder viewHolder;
if (convertView == null) {
view = LayoutInflater.from(getContext()).inflate(resource, null);
viewHolder = new ViewHolder();
viewHolder.title = (TextView) view.findViewById(R.id.id_item_title);
viewHolder.content = (TextView) view.findViewById(R.id.id_item_content);
viewHolder.date = (TextView) view.findViewById(R.id.id_item_date);
viewHolder.image = (ImageView) view.findViewById(R.id.id_image);
view.setTag(viewHolder);
} else {
view = convertView;
viewHolder = (ViewHolder) view.getTag();
}

NewsItem newsItem = newsItemList.get(position);
viewHolder.title.setText(newsItem.getTitle());
viewHolder.content.setText(newsItem.getContent());
viewHolder.date.setText(newsItem.getDate());
if (newsItem.getImgUrl() != null) {
viewHolder.image.setVisibility(View.VISIBLE);
imageLoader.displayImage(newsItem.getImgUrl(), viewHolder.image, options);
}else {
viewHolder.image.setVisibility(View.GONE);
}
return view;
}

class ViewHolder {
TextView title;
TextView content;
TextView date;
ImageView image;
}
}


这里我们导入了开源的ImageLoader加载图片,如果当前NewsItem在之前解析的时候存在src(即图片对应链接)则get出来,加载显示图片。布局文件如下:

<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:orientation="vertical">

<TextView
android:id="@+id/id_item_title"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:textSize="20sp"
android:paddingTop="6dp"
android:paddingLeft="6dp"
android:text="title"/>

<LinearLayout
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:orientation="horizontal">

<ImageView
android:id="@+id/id_image"
android:padding="6dp"
android:layout_gravity="center"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:src="@drawable/ic_launcher"
android:visibility="visible"/>

<LinearLayout
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:orientation="vertical"
android:paddingRight="10dp"
android:paddingEnd="10dp"
android:paddingLeft="6dp"
android:paddingStart="6dp"
android:paddingTop="6dp">

<TextView
android:id="@+id/id_item_content"
android:layout_width="match_parent"
android:layout_height="0dp"
android:layout_weight="1"
android:maxLines="2"
android:text="3D打印机一直以来只能进行单向操作,任务一旦开始便无法反悔。不过最近一批研究生研发了一种新型打印机,让你在打印的同时,可以修改重塑之前的设计。让我们一起来看看这个神奇的设备究竟是怎样的吧。"/>

<TextView
android:id="@+id/id_item_date"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_gravity="bottom|end"
android:paddingRight="16dp"
android:paddingBottom="6dp"
android:text="date"/>

</LinearLayout>
</LinearLayout>

</LinearLayout>


至此ListView基本完成能显示了。

然后是对应每个NewsItem的点击跳转,我们之前已经给NewsItem设置了OnItemClickListener,并且把每个NewsItem对应的链接也用intent.putExtra("href", newsItem.getHref()) 的方法传递过去了。那么再启动对应Activity的时候相应的加载应该就可以了。我们来看一下具体的代码:

public class NewsActivity extends AppCompatActivity {

private TextView titleTextView;
private TextView contentTextView;
private NewsDetail newsDetail;

@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.news_detail_layout);

Bundle extra = getIntent().getExtras();
String href = extra.getString("href");
titleTextView = (TextView) findViewById(R.id.id_news_title);
contentTextView = (TextView) findViewById(R.id.id_news_content);
new NewsDetailTask().execute(href);
}

class NewsDetailTask extends AsyncTask<String, Void, Void> {

@Override
protected Void doInBackground(String... params) {
String url = params[0];

try {
Document document;
document = Jsoup.connect(url).get();
newsDetail = new NewsDetail();
Element h1_Element = document.getElementsByTag("h1").first();
//                Element parent = h1_Element.parent();
//                Log.d("yzh", "parent = " + parent);
String title = h1_Element.text() + "\n";
newsDetail.setTitle(title);

Elements contentElements = document.getElementsByTag("p");
for (Element contentElement : contentElements) {
/*每段文字开头空两格*/
String content = "\u3000\u3000" + contentElement.text() + "\n\n";
newsDetail.setContent(content);
}
} catch (IOException e) {
e.printStackTrace();
}
return null;
}

@Override
protected void onPostExecute(Void aVoid) {
titleTextView.setText(newsDetail.getTitle());
contentTextView.setText(newsDetail.getContent());
}
}

class NewsDetail {
private String title = "default title";
private String content = "";

public String getTitle() {
return title;
}

public void setTitle(String title) {
this.title = title;
}

public String getContent() {
return content;
}

public void setContent(String content) {
this.content += content;
}
}

}


做法和之前加载列表类似,开启线程解析HTML,加载内容。

在尝试解析的时候出了一些问题:尝试还是根据网页源码去用getElementsByClass()的方法传入源代码中的class(见图片)得到Element,结果得到的总是空。



后来先用getElementsByTag()得到Elemens再用.parent()得到它的父级Element,打印出的Log(见下图)发现实际只存在两个class,并且与网页上的并不一致。这样我们去通过class解析当然什么都得不到。至于为什么会这样,希望有了解HTML的朋友留言解答。



另外还有一个缺陷就是这里我只用了ScrollView中设置两个TextView来显示标题和具体新闻内容,并没有显示图片。因为图片要加载到文字中间,并且不同文章图片的位置也不确定,就不能确定ImageView怎么放置,可能需要用到动态加载的方式来实现……鸿洋大神用的是ListView,每个子项根据内容来决定布局的方法。总之比较麻烦,这里我先偷懒了。有时间再来完善。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息