首页 > 解决方案 > httpClient.GetStringAsync 返回 403,但我尝试使用的路线在邮递员中工作

问题描述

// 我第一次在 .net 中学习 c#,并尝试点击并返回 HTML 以获取美国每个城市的 craigslist URL 列表。连接后我的 URL 格式如下所示。https://annarbor.craigslist.org/search/sss?query=searchTermFromInput&purveyor-input=all&srchType=T 每个城市的 URL 从本地文本文件中提取。我可以在邮递员中运行它并取回数据,但使用 httpClient 我总是得到 403。我从上午 10 点开始就一直坚持这一点,非常感谢任何建议。谢谢

using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Logging;
using System;
using System.IO;
using System.Net.Http;
using System.Reflection;
using System.Text.RegularExpressions;

namespace ScraperOne
{
    public partial class Interface : IInterface
    {
        private readonly ILogger<Interface> _log;
        private readonly IConfiguration _config;
        private readonly string filePathToURL = Environment.CurrentDirectory;
        private readonly string startPath = @"/search/sss?query=";
        private readonly string endPath = @"&purveyor-input=all&srchType=T";
         
        public Interface(ILogger<Interface> log, IConfiguration config)
        {
            _log = log;
            _config = config;
        }

public async System.Threading.Tasks.Task Run()
        {
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine("{0}: Version{1} by {2}", _config.GetValue<string>("appName"), _config.GetValue<int>("version"), _config.GetValue<string>("Author"));
            Console.ResetColor();
            Console.WriteLine("Enter your key terms seperated by commas:");
            string keyTerms = Console.ReadLine();
            var searchTermsList = Regex.Split(keyTerms, @"\s*,\s*");
            var searchString = string.Join('+',searchTermsList);
            string[] list = File.ReadAllLines(filePathToURL + @"\siteListCl.txt");
            foreach (string query in list) 
            {
            var thisListItem = query;
            var urlHit = thisListItem + startPath + searchString + endPath;
            var httpClient = new HttpClient();
                httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv: 85.0) Gecko / 20100101 Firefox / 85.0");
                httpClient.DefaultRequestHeaders.Add("Connection", "keep-alive");
                try
                {
                    Console.WriteLine(urlHit);
                    string responseBody = await httpClient.GetStringAsync(urlHit);
                    Console.WriteLine(responseBody);
                }

                catch (Exception e)
                {
                    Console.WriteLine("exception caught!!");
                    Console.WriteLine(e);
                }
            }
            string wait = Console.ReadLine();
        }

    }

}

标签: c#.netweb-scraping

解决方案


推荐阅读