java - 如何使用 jsoup 在 android studio 上抓取图像
问题描述
我正在 android studio 上创建应用程序,我从下面代码中使用的新闻网站抓取数据,我能够从页面抓取文本并能够将它们放入列表中,但我想知道我怎么能从站点检索图像并将它们放入列表中。
片段类
final StringBuilder builder = new StringBuilder();
ArrayList<News> webnewss = new ArrayList<>();
try {
Document doc = Jsoup.connect("https://www.bbc.co.uk/search?q=little+aston&page=1").userAgent("Jsoup Scraper").get();
String heading = "div.ssrcss-v4rel9-PromoContent.e1f5wbog0 > div.ssrcss-l100ew-PromoContentSummary.e1f5wbog1 > p.ssrcss-1uw1j0b-PromoHeadline.e1f5wbog2";
Elements headerElements = doc.select(heading);
ArrayList<String> headerTitles = new ArrayList<>();
for (Element e : headerElements) {
headerTitles.add(e.text());
}
String description = "div.ssrcss-v4rel9-PromoContent.e1f5wbog0 > div.ssrcss-l100ew-PromoContentSummary.e1f5wbog1 > p:nth-child(2)";
Elements descriptionElements = doc.select(description);
ArrayList<String> descriptionTitles = new ArrayList<>();
for (Element e : descriptionElements) {
descriptionTitles.add(e.text());
}
String published = "div.ssrcss-v4rel9-PromoContent.e1f5wbog0 > div.ssrcss-3r6h34-PromoContentMetadata.e1f5wbog9 > div > dl > div:nth-child(1) > dd > span > span:nth-child(2)";
Elements publishedElements = doc.select(published);
ArrayList<String> publishedTitles = new ArrayList<>();
for (Element e : publishedElements) {
publishedTitles.add(e.text());
}
String url = "div.ssrcss-v4rel9-PromoContent.e1f5wbog0 > div.ssrcss-l100ew-PromoContentSummary.e1f5wbog1 > p.ssrcss-1uw1j0b-PromoHeadline.e1f5wbog2 > a";
Elements urlElements = doc.select(url);
ArrayList<String> urlTitles = new ArrayList<>();
for(Element e:urlElements) {
urlTitles.add(e.attr("href"));
}
for (int i = 0; i < headerTitles.size() && i < descriptionTitles.size() && i < publishedTitles.size() && i < urlTitles.size(); i++) {
News news = new News(headerTitles.get(i), descriptionTitles.get(i), publishedTitles.get(i), urlTitles.get(i));
webnewss.add(news);
}
} catch (IOException e) {
e.printStackTrace();
}
新闻类
public class News {
public String heading;
public String description;
public String published;
public String link;
public News(String heading, String description, String published, String link){
this.heading = heading;
this.description = description;
this.published = published;
this.link = link;
}
清单 xml
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:orientation="vertical"
android:gravity="center_horizontal"
android:background="@drawable/customshape"
>
<TextView
android:id="@+id/textNews_headingTitle"
android:layout_width="360dp"
android:layout_height="wrap_content"
android:textSize="16dp"
android:textColor="#000000"
android:textStyle="bold"
android:paddingTop="10dp"
/>
<TextView
android:id="@+id/textNews_heading"
android:layout_width="360dp"
android:layout_height="wrap_content"
android:textSize="16dp"
android:textColor="#FFFFFF"
android:paddingBottom="2dp"/>
<TextView
android:id="@+id/textNews_descriptionTitle"
android:layout_width="360dp"
android:layout_height="wrap_content"
android:textSize="16dp"
android:textColor="#000000"
android:textStyle="bold"/>
<TextView
android:id="@+id/textNews_description"
android:layout_width="360dp"
android:layout_height="wrap_content"
android:textSize="16dp"
android:textColor="#FFFFFF"
android:paddingBottom="2dp"/>
<TextView
android:id="@+id/textNews_publishedTitle"
android:layout_width="360dp"
android:layout_height="wrap_content"
android:textSize="16dp"
android:textColor="#000000"
android:textStyle="bold"/>
<TextView
android:id="@+id/textNews_published"
android:layout_width="360dp"
android:layout_height="wrap_content"
android:textSize="16dp"
android:textColor="#FFFFFF"
android:paddingBottom="2dp"/>
<TextView
android:id="@+id/textNews_linkTitle"
android:layout_width="360dp"
android:layout_height="wrap_content"
android:textSize="18dp"
android:gravity="center"
android:textColor="#000000"
android:textStyle="bold"/>
<TextView
android:id="@+id/textNews_link"
android:layout_width="360dp"
android:layout_height="wrap_content"
android:textSize="16dp"
android:textColor="#FFFFFF"
android:paddingBottom="10dp"/>
</LinearLayout>
适配器类
public class ScrapingAdapter extends ArrayAdapter<News> {
private static class ViewHolder{
TextView newsTitleHeading;
TextView newsHeading;
TextView newsTitleDescription;
TextView newsDescription;
TextView newsTitlePublished;
TextView newsPublished;
TextView newsTitleLink;
TextView newsLink;
}
public ScrapingAdapter(Context context, ArrayList<News> newsInfo){
super(context, R.layout.news_layout, newsInfo);
}
@Override
public View getView(int position, @Nullable View view, @NonNull ViewGroup parent) {
News scraping = getItem(position);
ViewHolder viewHolder;
if(view == null){
viewHolder = new ViewHolder();
LayoutInflater inflater = LayoutInflater.from(getContext());
view = inflater.inflate(R.layout.news_layout, parent, false);
viewHolder.newsTitleHeading = (TextView) view.findViewById(R.id.textNews_headingTitle);
viewHolder.newsHeading = (TextView) view.findViewById(R.id.textNews_heading);
viewHolder.newsTitleDescription = (TextView) view.findViewById(R.id.textNews_descriptionTitle);
viewHolder.newsDescription = (TextView) view.findViewById(R.id.textNews_description);
viewHolder.newsTitlePublished = (TextView) view.findViewById(R.id.textNews_publishedTitle);
viewHolder.newsPublished = (TextView) view.findViewById(R.id.textNews_published);
viewHolder.newsTitleLink = (TextView) view.findViewById(R.id.textNews_linkTitle);
viewHolder.newsLink = (TextView) view.findViewById(R.id.textNews_link);
view.setTag(viewHolder);
} else {
viewHolder = (ViewHolder) view.getTag();
}
viewHolder.newsTitleHeading.setText("Title:");
viewHolder.newsHeading.setText(scraping.heading);
viewHolder.newsTitleDescription.setText("Description:");
viewHolder.newsDescription.setText(scraping.description);
viewHolder.newsTitlePublished.setText("Date Published:");
viewHolder.newsPublished.setText(scraping.published);
viewHolder.newsTitleLink.setText("Click here to open link");
view.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
//Toast.makeText(getContext(), "You clicked" + scraping.heading, Toast.LENGTH_SHORT).show();
Intent openLinksIntent = new Intent(Intent.ACTION_VIEW, Uri.parse(scraping.link));
getContext().startActivity(openLinksIntent);
}
});
return view;
}
}
解决方案
你可以试试这个来获取图像 URL。src 将给出该页面上的所有图像 URL。
ArrayList<String> imageLinkList = new ArrayList<>();
Elements e = doc.getElementsByTag("img");
for(Element el : e) {
String url = el.absUrl("src");
imageLinkList.add(url);
}
for (int i = 0; i < headerTitles.size() && i < descriptionTitles.size() && i < publishedTitles.size() && i < urlTitles.size() && imageLinkList.size(); i++) {
News news = new News(headerTitles.get(i), descriptionTitles.get(i), publishedTitles.get(i), urlTitles.get(i),imageLinkList.get(i));
webnewss.add(news);
}
public class News {
public String heading;
public String description;
public String published;
public String link;
public String imageLink;
public News(String heading, String description, String published, String link, String imageLink){
this.heading = heading;
this.description = description;
this.published = published;
this.link = link;
this.imageLink = imageLink;
}
现在您的新闻对象将有一个图像链接。在 news_layout 中添加 imageView。在适配器类中使用 Glide/Picasso 渲染。
推荐阅读
- react-native - 有没有办法更改 Image ( base 64 ) 大小,以便可以通过 axios 调用上传?
- google-apps-script - 消息错误:“无法读取 nul 的属性 'getSheetByName' (ligne 4, fichier "code")
- math - 数学符号和泛化问题
- javascript - p5js:如何在调整窗口大小时使我随机生成的圆形包装画布响应?
- css - Leaflet Sidebar-V2 宽度 CSS
- javascript - Angular 缓存 - Chrome 正在频繁使用的路由上加载旧的 main.js 文件
- vue.js - Vue Storybook 不能与 Pug 组件一起编译
- svn - 是什么让 .svn/pristine 成长?
- java - 为 Kafka 运行 Springboot 应用程序时出现 java.lang.NullPointerException
- sockets - 取消 netty 中正在进行的连接尝试