javascript - 抓取脚本标签内的数据并隔离元素
问题描述
我在脚本标签内抓取数据时遇到了一些问题。我想获取 jsonSwatchConfig [ ] 中的数据,尤其是每个 'option_id' 和 'us' 属性。
var jar = require('request').jar();
var request = require('request').defaults({ jar: jar });
var cheerio = require("cheerio");
var cloudscraper = require('cloudscraper');
var fs = require('fs');
const logT = require('log-timestamp');
var open = require('open');
var today = new Date();
var randomsize;
var webdriver = require('selenium-webdriver');
var request = require("request")
var cheerio = require("cheerio")
var sizes =[]
function product() {
console.log("Obtaining Product Page..")
request.get({
url: 'https://www.sivasdescalzo.com/en/nike-air-max-270-react-eng-cd0113-400',
jar: request.jar(),
headers: {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7,pt;q=0.6,fr;q=0.5,de;q=0.4',
'cache-control': 'max-age=0',
'dnt': '1',
'referer': 'https://www.sivasdescalzo.com/en/lifestyle',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'},
cloudflareTimeout: 5000,
cloudflareMaxTimeout: 30000,
followAllRedirects: true,
//proxy: getproxy(),
challengesToSolve: 3,
json: true,
decodeEmails: false,
gzip: true,
}, function(error, response, body) {
var $ = cheerio.load(body);
var script = $('body > script').map((i, x) => x.children[0])
.filter((i, x) => x &&
x.data.match(/jsonSwatchConfig/)).get(0);
if (script){
var scriptText = script.data.replace(/\r?\n|\r/g, "")
console.log(scriptText.split('"Svd_Catalog/js/svd.swatch.renderer"')[1].replace(" ",""));
}
});
}
我的实际输出是:
:{ "jsonSwatchConfig":
[{"option_id":"18","us":"7","eu":"40","uk":"6","cm":"25"},
{"option_id":"16","us":"8","eu":"41","uk":"7","cm":"26"},
{"option_id":"14","us":"9","eu":"42.5","uk":"8","cm":"27"},
{"option_id":"12","us":"10","eu":"44","uk":"9","cm":"28"},
{"option_id":"10","us":"11","eu":"45","uk":"10","cm":"29"},
{"option_id":"8","us":"12","eu":"46","uk":"11","cm":"30"},
{"option_id":"105","us":"10.5","eu":"44.5","uk":"9.5","cm":"28.5"},
{"option_id":"117","us":"11.5","eu":"45.5","uk":"10.5","cm":"29.5"},
{"option_id":"110","us":"7.5","eu":"40.5","uk":"6.5","cm":"25.5"},
{"option_id":"121","us":"8.5","eu":"42","uk":"7.5","cm":"26.5"},
{"option_id":"114","us":"9.5","eu":"43","uk":"8.5","cm":"27.5"}] } },
"*" : { "Magento_Swatches/js/catalog-add-to-cart": {} } }
我尝试了很多次,但我没有找到一种方法来获得我想要的东西。使用拆分和删除。
这是 html 脚本标记源:
<script type="text/x-magento-init">
{
"[data-role=swatch-options]": {
"Magento_Swatches/js/swatch-renderer": {
"jsonConfig": {"attributes":{"138":{"id":"138","code":"size_us","label":"Size","options":[{"id":"24","label":"4","products":[]},{"id":"172","label":"4.5","products":[]},{"id":"22","label":"5","products":[]},{"id":"160","label":"5.5","products":[]},{"id":"20","label":"6","products":[]},{"id":"165","label":"6.5","products":[]},{"id":"18","label":"7","products":["276814"]},{"id":"110","label":"7.5","products":[]},{"id":"16","label":"8","products":[]},{"id":"121","label":"8.5","products":[]},{"id":"14","label":"9","products":[]},{"id":"114","label":"9.5","products":["242798"]},{"id":"12","label":"10","products":["242796"]},{"id":"105","label":"10.5","products":["242793"]},{"id":"10","label":"11","products":["242794"]},{"id":"117","label":"11.5","products":["276810"]},{"id":"8","label":"12","products":[]}],"position":"0"}},"template":"\u20ac<%- data.price %>","currencyFormat":"\u20ac%s","optionPrices":{"242796":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"242793":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"242794":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"242795":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"242792":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"242797":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"242798":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"276810":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"276815":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"276809":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"276811":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"276816":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"276812":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"276813":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"276814":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"276817":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}},"310604":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90},"tierPrices":[],"msrpPrice":{"amount":null}}},"priceFormat":{"pattern":"\u20ac%s","precision":2,"requiredPrecision":2,"decimalSymbol":".","groupSymbol":",","groupLength":3,"integerRequired":false},"prices":{"oldPrice":{"amount":90},"basePrice":{"amount":90},"finalPrice":{"amount":90}},"productId":"242799","chooseText":"Choose an Option...","images":[],"index":{"242796":{"138":"12"},"242793":{"138":"105"},"242794":{"138":"10"},"242795":{"138":"16"},"242792":{"138":"121"},"242797":{"138":"14"},"242798":{"138":"114"},"276810":{"138":"117"},"276815":{"138":"24"},"276809":{"138":"172"},"276811":{"138":"22"},"276816":{"138":"160"},"276812":{"138":"20"},"276813":{"138":"165"},"276814":{"138":"18"},"276817":{"138":"110"},"310604":{"138":"8"}}},
"jsonSwatchConfig": {"138":{"24":{"type":"0","value":"4","label":"4"},"172":{"type":"0","value":"4.5","label":"4.5"},"22":{"type":"0","value":"5","label":"5"},"160":{"type":"0","value":"5.5","label":"5.5"},"20":{"type":"0","value":"6","label":"6"},"165":{"type":"0","value":"6.5","label":"6.5"},"18":{"type":"0","value":"7","label":"7"},"110":{"type":"0","value":"7.5","label":"7.5"},"16":{"type":"0","value":"8","label":"8"},"121":{"type":"0","value":"8.5","label":"8.5"},"14":{"type":"0","value":"9","label":"9"},"114":{"type":"0","value":"9.5","label":"9.5"},"12":{"type":"0","value":"10","label":"10"},"105":{"type":"0","value":"10.5","label":"10.5"},"10":{"type":"0","value":"11","label":"11"},"117":{"type":"0","value":"11.5","label":"11.5"},"8":{"type":"0","value":"12","label":"12"},"additional_data":"{\"swatch_input_type\":\"text\",\"update_product_preview_image\":\"0\",\"use_product_image_for_swatch\":0}"}},
"mediaCallback": "https://www.sivasdescalzo.com/en/swatches/ajax/media/",
"gallerySwitchStrategy": "replace",
"jsonSwatchImageSizeConfig": {"swatchImage":{"width":30,"height":20},"swatchThumb":{"height":90,"width":110}} },
"Svd_Catalog/js/svd.swatch.renderer": {
"jsonSwatchConfig": [{"option_id":"24","us":"4","eu":"36","uk":"3","cm":"23"},{"option_id":"22","us":"5","eu":"37.5","uk":"4.5","cm":"23.5"},{"option_id":"20","us":"6","eu":"38.5","uk":"5.5","cm":"24"},{"option_id":"18","us":"7","eu":"40","uk":"6","cm":"25"},{"option_id":"16","us":"8","eu":"41","uk":"7","cm":"26"},{"option_id":"14","us":"9","eu":"42.5","uk":"8","cm":"27"},{"option_id":"12","us":"10","eu":"44","uk":"9","cm":"28"},{"option_id":"10","us":"11","eu":"45","uk":"10","cm":"29"},{"option_id":"8","us":"12","eu":"46","uk":"11","cm":"30"},{"option_id":"105","us":"10.5","eu":"44.5","uk":"9.5","cm":"28.5"},{"option_id":"117","us":"11.5","eu":"45.5","uk":"10.5","cm":"29.5"},{"option_id":"172","us":"4.5","eu":"36.5","uk":"4","cm":"23.5"},{"option_id":"160","us":"5.5","eu":"38","uk":"5","cm":"24"},{"option_id":"165","us":"6.5","eu":"39","uk":"6","cm":"24.5"},{"option_id":"110","us":"7.5","eu":"40.5","uk":"6.5","cm":"25.5"},{"option_id":"121","us":"8.5","eu":"42","uk":"7.5","cm":"26.5"},{"option_id":"114","us":"9.5","eu":"43","uk":"8.5","cm":"27.5"}] }
},
"*" : {
"Magento_Swatches/js/catalog-add-to-cart": {}
}
}</script>
我想知道怎么做和最安全的方法。我可以继续拆分和删除,但我认为这不是完成此类任务的正确方法。
解决方案
使用一个怎么样RegExp
?
var pattern = /"jsonSwatchConfig": (\[.*\])/;
var results = scriptText.match(pattern)[1];
编辑:要提取option_id
和us
属性,您首先必须将其从字符串转换为对象。上面的代码将数组 jsonSwatchConfig 作为字符串提供给您,而不是作为实际的对象数组,所以您可以做的是(继续上面的代码):results = JSON.parse(results)
现在将数据作为对象数组提供给您。要访问特定属性,您可以执行以下操作
results.forEach(result => {
var x = result.option_id;
var y = result.us;
console.log({ x, y });
});
推荐阅读
- python - 在python中计算单词中字母之间的距离
- linux - Fedora 28 KDE 上的 Docker 容器没有互联网连接
- javascript - ng-view 未显示在 index html 页面中
- pulumi - pulumi:在`pulumi up`期间忽略手动删除的资源
- c# - 如何在 Xamarin 中恢复 Picker 的 ID
- python - multiprocessing.Pool:如何在旧进程完成时启动新进程?
- database - postgres delete 然后 pg_dump 转储已删除的数据
- python - 使用 Python 写入 HTML 文件
- javascript - 在 VSCode 中如何正确配置 jsconfig.json 以使带有 index.js 导入的绝对路径起作用?
- r - dplyr 用第一行替换 colnames 并使用管道函数删除第一行