首页 > 解决方案 > 如何使用 jsoup 在 android studio 上抓取图像

问题描述

我正在 android studio 上创建应用程序,我从下面代码中使用的新闻网站抓取数据,我能够从页面抓取文本并能够将它们放入列表中,但我想知道我怎么能从站点检索图像并将它们放入列表中。

片段类

final StringBuilder builder = new StringBuilder();
                ArrayList<News> webnewss = new ArrayList<>();
            

try {
                Document doc = Jsoup.connect("https://www.bbc.co.uk/search?q=little+aston&page=1").userAgent("Jsoup Scraper").get();

                String heading = "div.ssrcss-v4rel9-PromoContent.e1f5wbog0 > div.ssrcss-l100ew-PromoContentSummary.e1f5wbog1 > p.ssrcss-1uw1j0b-PromoHeadline.e1f5wbog2";
                Elements headerElements = doc.select(heading);
                ArrayList<String> headerTitles = new ArrayList<>();
                for (Element e : headerElements) {
                    headerTitles.add(e.text());
                }

                String description = "div.ssrcss-v4rel9-PromoContent.e1f5wbog0 > div.ssrcss-l100ew-PromoContentSummary.e1f5wbog1 > p:nth-child(2)";
                Elements descriptionElements = doc.select(description);
                ArrayList<String> descriptionTitles = new ArrayList<>();
                for (Element e : descriptionElements) {
                    descriptionTitles.add(e.text());
                }

                String published = "div.ssrcss-v4rel9-PromoContent.e1f5wbog0 > div.ssrcss-3r6h34-PromoContentMetadata.e1f5wbog9 > div > dl > div:nth-child(1) > dd > span > span:nth-child(2)";
                Elements publishedElements = doc.select(published);
                ArrayList<String> publishedTitles = new ArrayList<>();
                for (Element e : publishedElements) {
                    publishedTitles.add(e.text());
                }

                String url = "div.ssrcss-v4rel9-PromoContent.e1f5wbog0 > div.ssrcss-l100ew-PromoContentSummary.e1f5wbog1 > p.ssrcss-1uw1j0b-PromoHeadline.e1f5wbog2 > a";
                Elements urlElements = doc.select(url);
                ArrayList<String> urlTitles = new ArrayList<>();
                for(Element e:urlElements) {
                    urlTitles.add(e.attr("href"));

                }

                for (int i = 0; i < headerTitles.size() && i < descriptionTitles.size() && i < publishedTitles.size() && i < urlTitles.size(); i++) {
                    News news = new News(headerTitles.get(i), descriptionTitles.get(i), publishedTitles.get(i), urlTitles.get(i));
                    webnewss.add(news);
                }

            } catch (IOException e) {
                e.printStackTrace();
            }

新闻类

public class News {
    public String heading;
    public String description;
    public String published;
    public String link;
    

    public News(String heading, String description, String published, String link){
        this.heading = heading;
        this.description = description;
        this.published = published;
        this.link = link;
        
    }

清单 xml

<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    android:orientation="vertical"
    android:gravity="center_horizontal"
    android:background="@drawable/customshape"
    >
    
    <TextView
        android:id="@+id/textNews_headingTitle"
        android:layout_width="360dp"
        android:layout_height="wrap_content"
        android:textSize="16dp"
        android:textColor="#000000"
        android:textStyle="bold"
        android:paddingTop="10dp"
        />

    <TextView
        android:id="@+id/textNews_heading"
        android:layout_width="360dp"
        android:layout_height="wrap_content"
        android:textSize="16dp"
        android:textColor="#FFFFFF"
        android:paddingBottom="2dp"/>

    <TextView
        android:id="@+id/textNews_descriptionTitle"
        android:layout_width="360dp"
        android:layout_height="wrap_content"
        android:textSize="16dp"
        android:textColor="#000000"
        android:textStyle="bold"/>

    <TextView
        android:id="@+id/textNews_description"
        android:layout_width="360dp"
        android:layout_height="wrap_content"
        android:textSize="16dp"
        android:textColor="#FFFFFF"
        android:paddingBottom="2dp"/>

    <TextView
        android:id="@+id/textNews_publishedTitle"
        android:layout_width="360dp"
        android:layout_height="wrap_content"
        android:textSize="16dp"
        android:textColor="#000000"
        android:textStyle="bold"/>

    <TextView
        android:id="@+id/textNews_published"
        android:layout_width="360dp"
        android:layout_height="wrap_content"
        android:textSize="16dp"
        android:textColor="#FFFFFF"
        android:paddingBottom="2dp"/>

    <TextView
        android:id="@+id/textNews_linkTitle"
        android:layout_width="360dp"
        android:layout_height="wrap_content"
        android:textSize="18dp"
        android:gravity="center"
        android:textColor="#000000"
        android:textStyle="bold"/>

    <TextView
        android:id="@+id/textNews_link"
        android:layout_width="360dp"
        android:layout_height="wrap_content"
        android:textSize="16dp"
        android:textColor="#FFFFFF"
        android:paddingBottom="10dp"/>


</LinearLayout>

适配器类

public class ScrapingAdapter extends ArrayAdapter<News> {
    private static class ViewHolder{
        TextView newsTitleHeading;
        TextView newsHeading;
        TextView newsTitleDescription;
        TextView newsDescription;
        TextView newsTitlePublished;
        TextView newsPublished;
        TextView newsTitleLink;
        TextView newsLink;
    }

    public ScrapingAdapter(Context context, ArrayList<News> newsInfo){
        super(context, R.layout.news_layout, newsInfo);
    }

    @Override

    public View getView(int position, @Nullable View view, @NonNull ViewGroup parent) {
        News scraping = getItem(position);

        ViewHolder viewHolder;
        if(view == null){
            viewHolder = new ViewHolder();
            LayoutInflater inflater = LayoutInflater.from(getContext());
            view = inflater.inflate(R.layout.news_layout, parent, false);
            viewHolder.newsTitleHeading = (TextView) view.findViewById(R.id.textNews_headingTitle);
            viewHolder.newsHeading = (TextView) view.findViewById(R.id.textNews_heading);
            viewHolder.newsTitleDescription = (TextView) view.findViewById(R.id.textNews_descriptionTitle);
            viewHolder.newsDescription = (TextView) view.findViewById(R.id.textNews_description);
            viewHolder.newsTitlePublished = (TextView) view.findViewById(R.id.textNews_publishedTitle);
            viewHolder.newsPublished = (TextView) view.findViewById(R.id.textNews_published);
            viewHolder.newsTitleLink = (TextView) view.findViewById(R.id.textNews_linkTitle);
            viewHolder.newsLink = (TextView) view.findViewById(R.id.textNews_link);
            view.setTag(viewHolder);
        } else {
            viewHolder = (ViewHolder) view.getTag();
        }

        viewHolder.newsTitleHeading.setText("Title:");
        viewHolder.newsHeading.setText(scraping.heading);
        viewHolder.newsTitleDescription.setText("Description:");
        viewHolder.newsDescription.setText(scraping.description);
        viewHolder.newsTitlePublished.setText("Date Published:");
        viewHolder.newsPublished.setText(scraping.published);
        viewHolder.newsTitleLink.setText("Click here to open link");
        view.setOnClickListener(new View.OnClickListener() {
            @Override
            public void onClick(View v) {
                //Toast.makeText(getContext(), "You clicked" + scraping.heading, Toast.LENGTH_SHORT).show();
                Intent openLinksIntent = new Intent(Intent.ACTION_VIEW, Uri.parse(scraping.link));
                getContext().startActivity(openLinksIntent);
            }
        });
        return view;

    }
}

标签: javaandroidjsoup

解决方案


你可以试试这个来获取图像 URL。src 将给出该页面上的所有图像 URL。

ArrayList<String> imageLinkList = new ArrayList<>();
Elements e = doc.getElementsByTag("img");
for(Element el : e) {
String url = el.absUrl("src");
imageLinkList.add(url);
}

for (int i = 0; i < headerTitles.size() && i < descriptionTitles.size() && i < publishedTitles.size() && i < urlTitles.size() && imageLinkList.size(); i++) {
News news = new News(headerTitles.get(i), descriptionTitles.get(i), publishedTitles.get(i), urlTitles.get(i),imageLinkList.get(i));
webnewss.add(news);
}

public class News {
    public String heading;
    public String description;
    public String published;
    public String link;
    public String imageLink;
    

    public News(String heading, String description, String published, String link, String imageLink){
        this.heading = heading;
        this.description = description;
        this.published = published;
        this.link = link;
        this.imageLink = imageLink;
        
    }

现在您的新闻对象将有一个图像链接。在 news_layout 中添加 imageView。在适配器类中使用 Glide/Picasso 渲染。


推荐阅读