python - 尝试将 API 抓取到下一页时,for 循环不断重复
问题描述
我正在从以下 API 抓取数据:https ://content.osu.edu/v2/classes/search?q=&campus=col&academic-career=ugrd
JSON 格式如下所示:
{
"data":{
"totalItems":10000,
"currentItemCount":200,
"page":1,
"totalPages":50,
"refineQueryTemplate":"q=_QUERY_&campus=col&academic-career=ugrd&p=1",
"nextPageLink":"?q=&campus=col&academic-career=ugrd&p=2",
"prevPageLink":null,
"activeSort":"",
"courses":[
{
"course":{
"term":"Summer 2021",
"effectiveDate":"2019-01-06",
"effectiveStatus":"A",
"title":"Dental Hygiene Practicum",
"shortDescription":"DHY Practicum",
"description":"Supervised practice outside the traditional clinic in a setting similar to one in which the dental hygiene student may practice, teach, or conduct research upon graduation.\nPrereq: Sr standing in DHY or BDCP major. Repeatable to a maximum of 4 cr hrs or 4 completions. This course is graded S/U.",
"equivalentId":"S1989",
"allowMultiEnroll":"N",
"maxUnits":4,
"minUnits":1,
"repeatUnitsLimit":4,
"grading":"Satisfactory/Unsatisfactory",
"component":"Field Experience",
"primaryComponent":"FLD",
"offeringNumber":"1",
"academicGroup":"Dentistry",
"subject":"DENTHYG",
"catalogNumber":"4430",
"campus":"Columbus",
"academicOrg":"D2120",
"academicCareer":"Undergraduate",
"cipCode":"51.0602",
"courseAttributes":[
{
"name":"CCP",
"value":"NON-CCP",
"description":"Not eligible for College Credit Plus program"
}
],
"campusCode":"COL",
"catalogLevel":"4xxx",
"subjectDesc":"Dental Hygiene",
"courseId":"152909"
},
"sections":[
{
"classNumber":"20850",
"section":"10",
"component":"Field Experience",
"instructionMode":"Distance Learning",
"meetings":[
{
"meetingNumber":1,
"facilityId":null,
"facilityType":null,
"facilityDescription":null,
"facilityDescriptionShort":null,
"facilityGroup":null,
"facilityCapacity":0,
"buildingCode":null,
"room":null,
"buildingDescription":null,
"buildingDescriptionShort":null,
"startTime":null,
"endTime":null,
"startDate":"2021-05-12",
"endDate":"2021-07-30",
"monday":false,
"tuesday":false,
"wednesday":false,
"thursday":false,
"friday":false,
"saturday":false,
"sunday":false,
"standingMeetingPattern":null,
"instructors":[
{
"displayName":"Irina A Novopoltseva",
"role":"PI",
"email":"novopoltseva.1@osu.edu"
}
]
}
],
"courseOfferingNumber":1,
"courseId":"152909",
"academicGroup":"DEN",
"subject":"Dental Hygiene",
"catalogNumber":"4430",
"career":"UGRD",
"description":"DHY Practicum",
"enrollmentStatus":"Open",
"status":"A",
"type":"E",
"associatedClass":"10",
"autoEnrollWaitlist":true,
"autoEnrollSection1":null,
"autoEnrollSection2":null,
"consent":"D",
"waitlistCapacity":5,
"minimumEnrollment":0,
"enrollmentTotal":1,
"waitlistTotal":0,
"academicOrg":"D2120",
"location":"CS-COLMBUS",
"equivalentCourseId":null,
"startDate":"2021-05-12",
"endDate":"2021-07-30",
"cancelDate":null,
"primaryInstructorSection":"10",
"combinedSection":null,
"holidaySchedule":"OSUSIS",
"sessionCode":"1S",
"sessionDescription":"Summer Term",
"term":"Summer 2021",
"campus":"Columbus",
"attributes":[
{
"name":"CCP",
"value":"NON-CCP",
"description":"Not eligible for College Credit Plus program"
}
],
"secCampus":"COL",
"secAcademicGroup":"DEN",
"secCatalogNumber":"4430",
"meetingDays":"",
"_parent":"152909-1-1214",
"subjectDesc":"Dental Hygiene",
"courseTitle":"Dental Hygiene Practicum",
"courseDescription":"Supervised practice outside the traditional clinic in a setting similar to one in which the dental hygiene student may practice, teach, or conduct research upon graduation.\nPrereq: Sr standing in DHY or BDCP major. Repeatable to a maximum of 4 cr hrs or 4 completions. This course is graded S/U.",
"catalogLevel":"4xxx",
"termCode":"1214"
}
]
},
{
"course":{
"term":"Spring 2021",
"effectiveDate":"2020-08-24",
"effectiveStatus":"A",
"title":"Undergraduate Research in Public Health",
"shortDescription":"Res Pub Hlth",
"description":"Undergraduate research under the guidance of a faculty mentor in a basic or applied area of public health.\nPrereq: Jr or Sr standing, and enrollment in BSPH major, and permission of advisor. Students who are not junior or senior standing may be eligible with faculty mentor approval. Repeatable to a maximum of 6 cr hrs. This course is graded S/U.",
"equivalentId":"",
"allowMultiEnroll":"N",
"maxUnits":6,
"minUnits":1,
"repeatUnitsLimit":6,
"grading":"Satisfactory/Unsatisfactory",
"component":"Independent Study",
"primaryComponent":"IND",
"offeringNumber":"1",
"subject":"PUBHLTH",
"catalogNumber":"4998",
"campus":"Columbus",
"academicOrg":"D2505",
"academicCareer":"Undergraduate",
"cipCode":"51.2201",
"courseAttributes":[
{
"name":"CCP",
"value":"NON-CCP",
"description":"Not eligible for College Credit Plus program"
}
],
"campusCode":"COL",
"catalogLevel":"4xxx",
"subjectDesc":"Public Health",
"courseId":"160532"
},
"sections":[
{
"classNumber":"3557",
"section":"0030",
"component":"Independent Study",
"instructionMode":"In Person",
"meetings":[
{
"meetingNumber":1,
"facilityId":null,
"facilityType":null,
"facilityDescription":null,
"facilityDescriptionShort":null,
"facilityGroup":null,
"facilityCapacity":0,
"buildingCode":null,
"room":null,
"buildingDescription":null,
"buildingDescriptionShort":null,
"startTime":null,
"endTime":null,
"startDate":"2021-01-11",
"endDate":"2021-04-23",
"monday":false,
"tuesday":false,
"wednesday":false,
"thursday":false,
"friday":false,
"saturday":false,
"sunday":false,
"standingMeetingPattern":null,
"instructors":[
{
"displayName":"Abigail Norris Turner",
"role":"PI",
"email":"norris-turner.1@osu.edu"
}
]
}
],
"courseOfferingNumber":1,
"courseId":"160532",
"academicGroup":"PBH",
"subject":"Public Health",
"catalogNumber":"4998",
"career":"UGRD",
"description":"Res Pub Hlth",
"enrollmentStatus":"Open",
"status":"A",
"type":"E",
"associatedClass":"1",
"autoEnrollWaitlist":true,
"autoEnrollSection1":null,
"autoEnrollSection2":null,
"consent":"I",
"waitlistCapacity":99,
"minimumEnrollment":0,
"enrollmentTotal":0,
"waitlistTotal":0,
"academicOrg":"D2505",
"location":"CS-COLMBUS",
"equivalentCourseId":null,
"startDate":"2021-01-11",
"endDate":"2021-04-23",
"cancelDate":null,
"primaryInstructorSection":"0010",
"combinedSection":null,
"holidaySchedule":"OSUSIS",
"sessionCode":"1",
"sessionDescription":"Regular Academic Term",
"term":"Spring 2021",
"campus":"Columbus",
"attributes":[
{
"name":"CCP",
"value":"NON-CCP",
"description":"Not eligible for College Credit Plus program"
}
],
"secCampus":"COL",
"secAcademicGroup":"PBH",
"secCatalogNumber":"4998",
"meetingDays":"",
"_parent":"160532-1-1212",
"subjectDesc":"Public Health",
"courseTitle":"Undergraduate Research in Public Health",
"courseDescription":"Undergraduate research under the guidance of a faculty mentor in a basic or applied area of public health.\nPrereq: Jr or Sr standing, and enrollment in BSPH major, and permission of advisor. Students who are not junior or senior standing may be eligible with faculty mentor approval. Repeatable to a maximum of 6 cr hrs. This course is graded S/U.",
"catalogLevel":"4xxx",
"termCode":"1212"
}
]
},
{
"course":{
"term":"Spring 2021",
"effectiveDate":"2013-05-05",
"effectiveStatus":"A",
"title":"Individual Studies in Public Health",
"shortDescription":"Ind Study Pub Hlth" ```
But when I use this code to scrape the pages, it just repeats.
import requests
session = requests.Session()
def get_classes():
url = "https://content.osu.edu/v2/classes/search?q=&campus=col&academic- career=ugrd"
first_page = session.get(url).json()
yield first_page
num_pages = first_page['data']['totalPages']
for page in range(0, num_pages + 1):
next_page = session.get(url, params={'page': page}).json()
yield next_page
for page in get_classes():
data = page['data']['courses']
array_length = len(data)
for i in range(array_length):
if (i <= array_length):
course_key = data[i]['course']
subject = course_key['subject']
number = course_key['catalogNumber']
title = course_key['title']
units = course_key['minUnits']
component = course_key['component']
attributes = course_key['courseAttributes']
description = course_key['description']
else:
break
我想从页面中刮掉所有数据,然后继续到下一页,直到我刮掉所有页面。相反,它只是一遍又一遍地打印同一页。
解决方案
您可以在响应中看到下一页链接:
"nextPageLink":"?q=&campus=col&academic-career=ugrd&p=2",
所以你应该使用p
而不是page
.
推荐阅读
- algorithm - 在二进制字符串中找到包含相同数量的 0 和 1 的最大子字符串
- php - 如果表mysqli中的列相同,如何计算行x和行y
- dynamics-crm - 如何在 Dynamics CRM 的计算字段中使用查找字段中的货币字段
- redirect - 将 www 重定向到裸域的问题(Gitlab 页面,Hugo 静态网站,namecheap,让我们加密)
- r - 用 nleqslv packain R 求解两个非线性方程
- html - CSS 文本抖动 firefox
- reactjs - React:组件正在更改要控制的文本类型的不受控制的输入
- clojure - 试图理解clojure
- angular - Angular Interceptor 读取响应标头
- java - 使用 Gson 从 json 文件中获取项目的值