首页 > 解决方案 > 尝试将 API 抓取到下一页时,for 循环不断重复

问题描述

我正在从以下 API 抓取数据:https ://content.osu.edu/v2/classes/search?q=&campus=col&academic-career=ugrd

JSON 格式如下所示:

{
  "data":{
     "totalItems":10000,
     "currentItemCount":200,
     "page":1,
     "totalPages":50,
     "refineQueryTemplate":"q=_QUERY_&campus=col&academic-career=ugrd&p=1",
     "nextPageLink":"?q=&campus=col&academic-career=ugrd&p=2",
     "prevPageLink":null,
     "activeSort":"",
     
     "courses":[
        {
           "course":{
              "term":"Summer 2021",
              "effectiveDate":"2019-01-06",
              "effectiveStatus":"A",
              "title":"Dental Hygiene Practicum",
              "shortDescription":"DHY Practicum",
              "description":"Supervised practice outside the traditional clinic in a setting similar to one in which the dental hygiene student may practice, teach, or conduct research upon graduation.\nPrereq: Sr standing in DHY or BDCP major. Repeatable to a maximum of 4 cr hrs or 4 completions. This course is graded S/U.",
              "equivalentId":"S1989",
              "allowMultiEnroll":"N",
              "maxUnits":4,
              "minUnits":1,
              "repeatUnitsLimit":4,
              "grading":"Satisfactory/Unsatisfactory",
              "component":"Field Experience",
              "primaryComponent":"FLD",
              "offeringNumber":"1",
              "academicGroup":"Dentistry",
              "subject":"DENTHYG",
              "catalogNumber":"4430",
              "campus":"Columbus",
              "academicOrg":"D2120",
              "academicCareer":"Undergraduate",
              "cipCode":"51.0602",
              "courseAttributes":[
                 {
                    "name":"CCP",
                    "value":"NON-CCP",
                    "description":"Not eligible for College Credit Plus program"
                 }
              ],
              "campusCode":"COL",
              "catalogLevel":"4xxx",
              "subjectDesc":"Dental Hygiene",
              "courseId":"152909"
           },
           "sections":[
              {
                 "classNumber":"20850",
                 "section":"10",
                 "component":"Field Experience",
                 "instructionMode":"Distance Learning",
                 "meetings":[
                    {
                       "meetingNumber":1,
                       "facilityId":null,
                       "facilityType":null,
                       "facilityDescription":null,
                       "facilityDescriptionShort":null,
                       "facilityGroup":null,
                       "facilityCapacity":0,
                       "buildingCode":null,
                       "room":null,
                       "buildingDescription":null,
                       "buildingDescriptionShort":null,
                       "startTime":null,
                       "endTime":null,
                       "startDate":"2021-05-12",
                       "endDate":"2021-07-30",
                       "monday":false,
                       "tuesday":false,
                       "wednesday":false,
                       "thursday":false,
                       "friday":false,
                       "saturday":false,
                       "sunday":false,
                       "standingMeetingPattern":null,
                       "instructors":[
                          {
                             "displayName":"Irina A Novopoltseva",
                             "role":"PI",
                             "email":"novopoltseva.1@osu.edu"
                          }
                       ]
                    }
                 ],
                 "courseOfferingNumber":1,
                 "courseId":"152909",
                 "academicGroup":"DEN",
                 "subject":"Dental Hygiene",
                 "catalogNumber":"4430",
                 "career":"UGRD",
                 "description":"DHY Practicum",
                 "enrollmentStatus":"Open",
                 "status":"A",
                 "type":"E",
                 "associatedClass":"10",
                 "autoEnrollWaitlist":true,
                 "autoEnrollSection1":null,
                 "autoEnrollSection2":null,
                 "consent":"D",
                 "waitlistCapacity":5,
                 "minimumEnrollment":0,
                 "enrollmentTotal":1,
                 "waitlistTotal":0,
                 "academicOrg":"D2120",
                 "location":"CS-COLMBUS",
                 "equivalentCourseId":null,
                 "startDate":"2021-05-12",
                 "endDate":"2021-07-30",
                 "cancelDate":null,
                 "primaryInstructorSection":"10",
                 "combinedSection":null,
                 "holidaySchedule":"OSUSIS",
                 "sessionCode":"1S",
                 "sessionDescription":"Summer Term",
                 "term":"Summer 2021",
                 "campus":"Columbus",
                 "attributes":[
                    {
                       "name":"CCP",
                       "value":"NON-CCP",
                       "description":"Not eligible for College Credit Plus program"
                    }
                 ],
                 "secCampus":"COL",
                 "secAcademicGroup":"DEN",
                 "secCatalogNumber":"4430",
                 "meetingDays":"",
                 "_parent":"152909-1-1214",
                 "subjectDesc":"Dental Hygiene",
                 "courseTitle":"Dental Hygiene Practicum",
                 "courseDescription":"Supervised practice outside the traditional clinic in a setting similar to one in which the dental hygiene student may practice, teach, or conduct research upon graduation.\nPrereq: Sr standing in DHY or BDCP major. Repeatable to a maximum of 4 cr hrs or 4 completions. This course is graded S/U.",
                 "catalogLevel":"4xxx",
                 "termCode":"1214"
              }
           ]
        },
        {
           "course":{
              "term":"Spring 2021",
              "effectiveDate":"2020-08-24",
              "effectiveStatus":"A",
              "title":"Undergraduate Research in Public Health",
              "shortDescription":"Res Pub Hlth",
              "description":"Undergraduate research under the guidance of a faculty mentor in a basic or applied area of public health.\nPrereq: Jr or Sr standing, and enrollment in BSPH major, and permission of advisor. Students who are not junior or senior standing may be eligible with faculty mentor approval. Repeatable to a maximum of 6 cr hrs. This course is graded S/U.",
              "equivalentId":"",
              "allowMultiEnroll":"N",
              "maxUnits":6,
              "minUnits":1,
              "repeatUnitsLimit":6,
              "grading":"Satisfactory/Unsatisfactory",
              "component":"Independent Study",
              "primaryComponent":"IND",
              "offeringNumber":"1",
              "subject":"PUBHLTH",
              "catalogNumber":"4998",
              "campus":"Columbus",
              "academicOrg":"D2505",
              "academicCareer":"Undergraduate",
              "cipCode":"51.2201",
              "courseAttributes":[
                 {
                    "name":"CCP",
                    "value":"NON-CCP",
                    "description":"Not eligible for College Credit Plus program"
                 }
              ],
              "campusCode":"COL",
              "catalogLevel":"4xxx",
              "subjectDesc":"Public Health",
              "courseId":"160532"
           },
           "sections":[
              {
                 "classNumber":"3557",
                 "section":"0030",
                 "component":"Independent Study",
                 "instructionMode":"In Person",
                 "meetings":[
                    {
                       "meetingNumber":1,
                       "facilityId":null,
                       "facilityType":null,
                       "facilityDescription":null,
                       "facilityDescriptionShort":null,
                       "facilityGroup":null,
                       "facilityCapacity":0,
                       "buildingCode":null,
                       "room":null,
                       "buildingDescription":null,
                       "buildingDescriptionShort":null,
                       "startTime":null,
                       "endTime":null,
                       "startDate":"2021-01-11",
                       "endDate":"2021-04-23",
                       "monday":false,
                       "tuesday":false,
                       "wednesday":false,
                       "thursday":false,
                       "friday":false,
                       "saturday":false,
                       "sunday":false,
                       "standingMeetingPattern":null,
                       "instructors":[
                          {
                             "displayName":"Abigail Norris Turner",
                             "role":"PI",
                             "email":"norris-turner.1@osu.edu"
                          }
                       ]
                    }
                 ],
                 "courseOfferingNumber":1,
                 "courseId":"160532",
                 "academicGroup":"PBH",
                 "subject":"Public Health",
                 "catalogNumber":"4998",
                 "career":"UGRD",
                 "description":"Res Pub Hlth",
                 "enrollmentStatus":"Open",
                 "status":"A",
                 "type":"E",
                 "associatedClass":"1",
                 "autoEnrollWaitlist":true,
                 "autoEnrollSection1":null,
                 "autoEnrollSection2":null,
                 "consent":"I",
                 "waitlistCapacity":99,
                 "minimumEnrollment":0,
                 "enrollmentTotal":0,
                 "waitlistTotal":0,
                 "academicOrg":"D2505",
                 "location":"CS-COLMBUS",
                 "equivalentCourseId":null,
                 "startDate":"2021-01-11",
                 "endDate":"2021-04-23",
                 "cancelDate":null,
                 "primaryInstructorSection":"0010",
                 "combinedSection":null,
                 "holidaySchedule":"OSUSIS",
                 "sessionCode":"1",
                 "sessionDescription":"Regular Academic Term",
                 "term":"Spring 2021",
                 "campus":"Columbus",
                 "attributes":[
                    {
                       "name":"CCP",
                       "value":"NON-CCP",
                       "description":"Not eligible for College Credit Plus program"
                    }
                 ],
                 "secCampus":"COL",
                 "secAcademicGroup":"PBH",
                 "secCatalogNumber":"4998",
                 "meetingDays":"",
                 "_parent":"160532-1-1212",
                 "subjectDesc":"Public Health",
                 "courseTitle":"Undergraduate Research in Public Health",
                 "courseDescription":"Undergraduate research under the guidance of a faculty mentor in a basic or applied area of public health.\nPrereq: Jr or Sr standing, and enrollment in BSPH major, and permission of advisor. Students who are not junior or senior standing may be eligible with faculty mentor approval. Repeatable to a maximum of 6 cr hrs. This course is graded S/U.",
                 "catalogLevel":"4xxx",
                 "termCode":"1212"
              }
           ]
        },
        {
           "course":{
              "term":"Spring 2021",
              "effectiveDate":"2013-05-05",
              "effectiveStatus":"A",
              "title":"Individual Studies in Public Health",
              "shortDescription":"Ind Study Pub Hlth" ```

But when I use this code to scrape the pages, it just repeats. 

import requests
session = requests.Session()

def get_classes():
url = "https://content.osu.edu/v2/classes/search?q=&campus=col&academic- career=ugrd"
first_page = session.get(url).json()
yield first_page
num_pages = first_page['data']['totalPages']

for page in range(0, num_pages + 1):
    next_page = session.get(url, params={'page': page}).json()
    yield next_page

for page in get_classes():
data = page['data']['courses']
array_length = len(data)

for i in range(array_length):
    if (i <= array_length):
        course_key = data[i]['course']
        subject = course_key['subject']
        number = course_key['catalogNumber']
        title = course_key['title']
        units = course_key['minUnits']
        component = course_key['component']
        attributes = course_key['courseAttributes']
        description = course_key['description']
    else:
        break

我想从页面中刮掉所有数据,然后继续到下一页,直到我刮掉所有页面。相反,它只是一遍又一遍地打印同一页。

标签: pythonjsonapifor-looppython-requests

解决方案


您可以在响应中看到下一页链接: "nextPageLink":"?q=&campus=col&academic-career=ugrd&p=2",

所以你应该使用p而不是page.


推荐阅读