首页 > 解决方案 > 网站抓取html类识别问题

问题描述

我想为字符 imgurl 抓取 MAL,以便稍后使用 Picasso 和字符名称显示,但在访问正确的类文件时遇到了麻烦。这是正在检查的网站。TYSM 提前。 检查 MAL

这是我的 CharacterList 类,它标识了 HTML 类

private RecyclerView recyclerView;
private ParseAdapter adapter;
private ArrayList<ParseItem> parseItems = new ArrayList<>();
private ProgressBar progressBar;

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_to_watch_list);

    configureBackButton();

    progressBar = findViewById(R.id.progressBar);
    recyclerView = findViewById(R.id.recyclerView_character);
    recyclerView.setHasFixedSize(true);
    recyclerView.setLayoutManager(new LinearLayoutManager(this));
    adapter = new ParseAdapter(parseItems, this);
    recyclerView.setAdapter(adapter);

    Content content = new Content();
    content.execute();

}

private class Content extends AsyncTask<Void, Void,Void>{

    @Override
    protected void onPreExecute() {
        super.onPreExecute();
        progressBar.setVisibility(View.VISIBLE);
        progressBar.setAnimation(AnimationUtils.loadAnimation(CharacterList.this, android.R.anim.fade_in));
    }

    @Override
    protected void onPostExecute(Void aVoid) {
        super.onPostExecute(aVoid);
        progressBar.setVisibility(View.GONE);
        progressBar.setAnimation(AnimationUtils.loadAnimation(CharacterList.this, android.R.anim.fade_out));
        adapter.notifyDataSetChanged();
    }

    @Override
    protected void onCancelled() {
        super.onCancelled();
    }

    @Override
    protected Void doInBackground(Void... voids) {
        try{
            //website url
            String url = "https://myanimelist.net/character.php";
            Document doc = Jsoup.connect(url).get();
           
            Elements data = doc.select("tr.people");
            int size = data.size();
            for (int i = 0; i < size; i++){ 
                String imgUrl = data.select("a.mr8.ml12.fl-l")
                        .select("img")
                        .eq(i)
                        .attr("src");

                String title = data.select("tr.mt24.di-ib.information")
                        .select("a.fw-b.fs14")
                        .eq(i)
                        .text();

                parseItems.add(new ParseItem(imgUrl, title));
                Log.d("items", "img: " +imgUrl + " .title: " + title);
            }

        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }
}

我相信我的 ParseAdapter 和 ParseItem 正在工作,但以防万一他们在这里

public class ParseAdapter extends RecyclerView.Adapter<ParseAdapter.ViewHolder> {
private ArrayList<ParseItem> parseItems;
private Context context;
public ParseAdapter(ArrayList<ParseItem> parseItems, Context context){
    this.parseItems = parseItems;
    this.context = context;
}

@NonNull
@Override
public ParseAdapter.ViewHolder onCreateViewHolder(@NonNull ViewGroup parent, int viewType) {
    View view = LayoutInflater.from(parent.getContext()).inflate(R.layout.parse_item, parent, false);
    return new ViewHolder(view);
}

@Override
public void onBindViewHolder(@NonNull ParseAdapter.ViewHolder holder, int position) {
    ParseItem parseItem = parseItems.get(position);
    holder.textView.setText(parseItem.getTitle());
    Picasso
            //with vs get()
            .with(this.context)
            .load(parseItem.getImgurl())
            .into(holder.imageView);
}

@Override
public int getItemCount() {
    return parseItems.size();
}

public class ViewHolder extends RecyclerView.ViewHolder{

    ImageView imageView;
    TextView textView;

    public ViewHolder(@NonNull View itemView) {
        super(itemView);
        imageView = itemView.findViewById(R.id.imageView_character);
        textView = itemView.findViewById(R.id.textView_character);
    }
}
}



public class ParseItem {

private String imgurl;
private String title;

public ParseItem(){

}

public ParseItem(String imgurl, String title) {
    this.imgurl = imgurl;
    this.title = title;
}

public String getImgurl() {
    return imgurl;
}

public void setImgurl(String imgurl) {
    this.imgurl = imgurl;
}

public String getTitle() {
    return title;
}

public void setTitle(String title) {
    this.title = title;
}}

标签: android-studioweb-scrapingandroid-recyclerviewjsoup

解决方案


我想通了没关系。这是班级识别部分,以防以后有人卡住。

Elements data = doc.select("td.people");
            int size = data.size();
for (int i = 0; i < size; i++){
              
                String imgUrl = data.select("a.fl-l.ml12.mr8")
                       
                        .select("img")
                        .eq(i)
                    
                        .attr("data-src");

          
                String title = data.select("div.information.di-ib.mt24")
                
                        .select("a.fs14.fw-b")
                        .eq(i)
                        .text();

推荐阅读