为了实现把自己的其他网站的博客,或新闻导入到现在的站点中来,rss订阅主要解析xml,很简单,但往往用户并不知道自己博客rss地址,所以要根据博客地址抓取网页并解析出rss地址,然后在解析xml并将内容导入进自己的站点,在此不贴图了,直接发代码:
package com.jyeba.core.rss;
import java.util.ArrayList;
import java.util.List;
public class RssBean {
/**
* 频道标题
*/
private String title;
/**
* 频道连接
*/
private String link;
/**
* 频道描述
*/
private String description;
private String date;
private List<RssBean> items = new ArrayList<RssBean>();
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public List<RssBean> getItems() {
return items;
}
public void setItems(List<RssBean> items) {
this.items = items;
}
public void setLink(String link) {
this.link = link;
}
public String getLink() {
return link;
}
}
package com.jyeba.core.rss;
/**
* rss处理类
*
* @author hanfei
*
*/
public class Rss {
public static String RSS_DOM_ROOT_TITLE = "//channel/title";
public static String RSS_DOM_ROOT_LINK = "//channel/link";
public static String RSS_DOM_ROOT_DESCRIPTION = "//channel/description";
public static String RSS_DOM_CHILRDEN_ROOT = "//channel/item";
public static String RSS_DOM_CHILRDEN_ROOT_TITLE = "title";
public static String RSS_DOM_CHILRDEN_ROOT_LINK = "link";
public static String RSS_DOM_CHILRDEN_ROOT_PUBDATE = "pubDate";
public static String RSS_DOM_CHILRDEN_ROOT_DESCRIPTION = "description";
public static String DATABASES_PATH = "#rss_databases.mdb";
private Document document;
public Document parse(URL url) throws DocumentException {
SAXReader reader = new SAXReader();
document = reader.read(url);
return document;
}
public void parseUrl(URL url) throws DocumentException {
SAXReader reader = new SAXReader();
document = reader.read(url);
System.out.print("文档全文"
+ document.getDocument().getRootElement().getName());
// return document;
}
public List getXmlInfo(String path, URL url) {
List info = new ArrayList();
try {
Document document = parse(url);
info = document.selectNodes(path);
} catch (DocumentException e) {
e.printStackTrace();
}
return info;
}
public List getXmlInfo(String path) throws DocumentException {
List info = new ArrayList();
// Document document = parse(url);
info = document.selectNodes(path);
return info;
}
public Element getFirstNodeTitle(String path, URL url) {
List list = getXmlInfo(path, url);
Element element = (Element) list.get(0);
return element;
}
public Element getFirstNodeTitle(String path) throws DocumentException {
List list = getXmlInfo(path);
Element element = (Element) list.get(0);
return element;
}
/**
* 通过rss地址获取rss内容
*
* @param uri
* @return
*/
public RssBean getRssBean(String uri) {
RssBean rss = new RssBean();
// Test xu = new Test();
URL url;
try {
url = new URL(uri);
parseUrl(url);
List listRoot = getXmlInfo("//channel");
for (Iterator iter = listRoot.iterator(); iter.hasNext();) {
Element element = (Element) iter.next();
Node title = element.selectSingleNode(RSS_DOM_ROOT_TITLE);
Node link = element.selectSingleNode(RSS_DOM_ROOT_LINK);
Node description = element
.selectSingleNode(RSS_DOM_ROOT_DESCRIPTION);
System.out.println(description.getText()
+ description.getText());
rss.setTitle(title.getText());
rss.setDescription(description.getText());
rss.setLink(link.getText());
}
System.out.println("----------------------");
/**
* 获取内容项
*/
List list = getXmlInfo(RSS_DOM_CHILRDEN_ROOT);
for (Iterator iter = list.iterator(); iter.hasNext();) {
Element element = (Element) iter.next();
Node title = element
.selectSingleNode(RSS_DOM_CHILRDEN_ROOT_TITLE);
Node link = element
.selectSingleNode(RSS_DOM_CHILRDEN_ROOT_LINK);
Node time = element
.selectSingleNode(RSS_DOM_CHILRDEN_ROOT_PUBDATE);
Node description = element
.selectSingleNode(RSS_DOM_CHILRDEN_ROOT_DESCRIPTION);
/*
* System.out.println("标题: " + title.getText());
* System.out.println("时间: " + time.getText());
* System.out.println("地址: " + link.getText());
* System.out.println("描述 : " + description.getText());
* System.out
* .println("======================================================"
* );
*/
RssBean rs = new RssBean();
rs.setTitle(title.getText());
rs.setDate(time.getText());
rs.setDescription(description.getText());
rs.setLink(link.getText());
rss.getItems().add(rs);
}
return rss;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
public RssBean getRSSrsult(String url) throws IOException {
// String url = "http://sun-store.blogbus.com/";
RssUtil r = new RssUtil();
// Test t = new Test();
RssBean rs;
/**
* 判断是否为html,如果是html并查找出rss地址
*/
String result = r.isHtmlOrXml(url);
if ("xml".equals(result)) {
rs = getRssBean(url);
System.out.print(rs.getTitle());
} else if ("eorr".equals(result)) {
rs = null;
System.out.print("不支持rss");
} else {
rs = getRssBean(result);
System.out.print(rs.getTitle());
}
return rs;
}
public static void main(String args[]) throws IOException {
String url = "http://blog.sina.com.cn/s/articlelist_1914306010_1_1.html";
Rss t = new Rss();
RssBean rs;
rs = t.getRSSrsult(url);
System.out.println(rs.getTitle());
}
public void setDocument(Document document) {
this.document = document;
}
public Document getDocument() {
return document;
}
}
package com.jyeba.core.rss;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class RssUtil {
private Document doc;
public String isHtmlOrXml(String url) throws IOException {
doc = Jsoup.connect(url)
.data("query", "Java")
.userAgent("Mozilla")
.cookie("auth", "token")
.timeout(6000)
.get();
// System.out.print(doc.html());
Elements head = doc.select("rss");
if (head.size() < 1) {
Elements rssEl = doc.select("link[type=application/rss+xml]");
if (rssEl.size() < 1) {
System.out.println("该地址不支持rss");
return "eorr";
} else {
System.out.println("this a html RSS地址:"
+ rssEl.get(0).attr("href"));
return rssEl.get(0).attr("href");
// 如果有多个rss地址 进行遍历
// for(Element e:rssEl){
// System.out.println(e.attr("title")+"----------"+e.attr("href"));
//
// }
}
} else {
return "xml";
}
// return null;
}
public static void main(String[] args) {
RssUtil r = new RssUtil();
try {
r.isHtmlOrXml("http://hi.baidu.com/ybhanxiao/home");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
分享到:
相关推荐
java实现rss的订阅
介绍了如何解析rss的方法 附加例子,可直接使用
java实现的RSS 值得一看,google阅读器可以订阅
Rss格式书写规范和java中解析dom的四种方式
java使用Rome解析Rss的实例
得到了博客的R S S 地址后,就可以在RSS订阅软件里新建博客的RSS频道,很快就能获取博主在博客上更新的日志、发布的图片等信息。
基于Java实现RSS的阅读器,有更新加载等功能。
使用java实现的RSS阅读器,初级水平,可供同辈伙伴进行交流。
java读取RSS-SAX解析XML的实例,java语言编写,使用时只需要在main方法中将参数换成自己的RSS地址即可。
javascript 解析 rss 实例 订阅 xml
FeedEk 是个 jQuery 插件,解析和显示 RSS 和 Atom 订阅。FeedEk 使用 Google Feed API 来检索订阅。用户可以很方便的从任意的 domain 中获取订阅,不需要服务器端脚本。在线演示 标签:FeedEk
基于JAVA SWING的RSS阅读器,使用JDOM解析本地XML文件
rome实现百度,搜狐,新浪等各大网站新闻rss消息订阅
用JAVA写的RSS订阅器,使用了Spring MVC框架,可在tomcat上运行。
C# RSS阅读器 能添加和阅读订阅RSS网址 并且提供订阅网址的访问 能把订阅的内容详细展示出来
java 开发rss阅读器,带ui界面,自主开发,童叟无欺。
Java如何实现RSS.pdf
java课程大作业rss阅读器,可根据输入的url进行定向读取,并可将数据存入数据库,也可进行超链接直接跳转网页
java代码-使用java解决RSS阅读器的源代码 ——学习参考资料:仅用于个人学习使用!
Rss Rom sevelet 订阅 灵活实现了Rss订阅功能 能够很好的生成xml文件