javascript - Webpack 构建不适用于爬虫
问题描述
我的构建在浏览器中运行良好(甚至是旧版 v40 < chrome)。它不适用于我迄今为止尝试过的任何爬虫。奇怪的是,同样的错误只发生在爬虫(例如 googlebot)中,这使得查明问题变得异常困难。
我试过了
- 禁用生产构建以查看未缩小的错误,但爬虫随后拒绝加载 js 文件,因为它太大了
- 在尽可能多的浏览器中运行该站点(IE 除外),它适用于所有浏览器。
- 禁用爬虫的预加载(使用无头 chrome 呈现应用程序的 html)
这是 googlebot 看到的错误:
您可以在https://wavedistrict.com上自行测试
网络包配置:
const { resolve } = require("path")
const ForkTsCheckerWebpackPlugin = require("fork-ts-checker-webpack-plugin")
const CleanWebpackPlugin = require("clean-webpack-plugin")
const CopyWebpackPlugin = require("copy-webpack-plugin")
const HtmlWebpackPlugin = require("html-webpack-plugin")
const HtmlWebpackInlineSourcePlugin = require("html-webpack-inline-source-plugin")
const MiniCssExtractPlugin = require("mini-css-extract-plugin")
const OptimizeCssAssetsWebpackPlugin = require("optimize-css-assets-webpack-plugin")
const WebpackPwaManifest = require("webpack-pwa-manifest")
const webpackMerge = require("webpack-merge")
const Visualizer = require("webpack-visualizer-plugin")
const isProduction = process.env.NODE_ENV === "production"
/**
* Variable for the project root.
* Change this when moving the configuration files
*/
const projectRoot = resolve(__dirname)
const sourceFolder = resolve(projectRoot, "src")
const tsFolder = resolve(sourceFolder, "ts")
const buildFolder = resolve(projectRoot, "build")
const publicFolder = resolve(projectRoot, "public")
const htmlTemplateFile = resolve(publicFolder, "index.html")
const tsconfigPath = resolve(projectRoot, "tsconfig.json")
const tslintPath = resolve(projectRoot, "tslint.json")
const tsLoader = {
loader: "ts-loader",
options: {
compilerOptions: {
module: "esnext",
target: "es5",
allowSyntheticDefaultImports: true,
},
transpileOnly: true,
configFile: tsconfigPath,
allowTsInNodeModules: true,
},
}
const babelLoader = {
loader: "babel-loader",
}
const workerRule = {
test: /\.worker\.ts$/,
use: {
loader: "worker-loader",
},
}
const babelRule = {
test: /\.(js|ts|tsx)$/,
use: [babelLoader],
}
const sassRule = {
test: /\.scss$/,
use: [
isProduction
? MiniCssExtractPlugin.loader
: {
loader: "style-loader",
options: {
singleton: true,
},
},
{ loader: "css-loader" },
{
loader: "sass-loader",
options: {
data: "@import './ts/modules/core/styles/_.scss';",
includePaths: [sourceFolder],
},
},
],
}
/** @type {import('webpack').Configuration} */
const baseConfig = {
context: projectRoot,
entry: [
"babel-polyfill",
"url-search-params-polyfill",
resolve(tsFolder, "init"),
],
output: {
filename: "js/[name].js",
path: buildFolder,
publicPath: "/",
},
module: {
rules: [workerRule, babelRule, sassRule],
},
resolve: {
modules: ["node_modules"],
extensions: [".js", ".ts", ".tsx", ".scss"],
alias: {
modules: resolve(tsFolder, "modules"),
common: resolve(tsFolder, "common"),
},
mainFields: ["jsnext:main", "module", "main"],
},
plugins: [
new CopyWebpackPlugin([
{
from: publicFolder,
ignore: [htmlTemplateFile],
},
]),
new CleanWebpackPlugin(buildFolder, { root: projectRoot, verbose: false }),
/**new ForkTsCheckerWebpackPlugin({
tslint: tslintPath,
tsconfig: tsconfigPath,
}),**/
],
stats: {
children: false,
entrypoints: false,
modules: false,
},
}
if (process.argv.includes("--stats")) {
if (baseConfig.plugins) {
baseConfig.plugins.push(new Visualizer())
}
}
const devConfig = webpackMerge(baseConfig, {
mode: "development",
plugins: [
new HtmlWebpackPlugin({
template: htmlTemplateFile,
chunksSortMode: "dependency",
}),
],
devtool: "inline-source-map",
devServer: {
hot: false,
historyApiFallback: true,
},
})
const prodConfig = webpackMerge(baseConfig, {
mode: "production",
optimization: {
minimize: true,
nodeEnv: "production",
},
plugins: [
new WebpackPwaManifest({
name: "WaveDistrict",
short_name: "WaveDistrict",
description: "",
background_color: "#091F35",
theme_color: "#00ba8c",
orientation: "any",
icons: [
{
src: resolve(publicFolder, "img/logo.svg"),
sizes: [48, 72, 96, 128, 144, 192, 256, 512],
destination: "icons",
},
{
src: resolve(publicFolder, "img/logo.png"),
sizes: [48, 72, 96, 128, 144, 192, 256, 512],
destination: "icons",
},
],
}),
new MiniCssExtractPlugin({
filename: "css/[name].css",
}),
new OptimizeCssAssetsWebpackPlugin(),
new HtmlWebpackPlugin({
template: htmlTemplateFile,
minify: {
removeComments: true,
collapseWhitespace: true,
removeRedundantAttributes: true,
useShortDoctype: true,
removeEmptyAttributes: true,
removeStyleLinkTypeAttributes: true,
keepClosingSlash: true,
minifyJS: true,
minifyCSS: true,
minifyURLs: true,
},
inject: true,
}),
new HtmlWebpackInlineSourcePlugin(),
],
performance: {
maxAssetSize: 500000,
},
devtool: "source-map",
})
module.exports = isProduction ? prodConfig : devConfig
Babel 配置(需要将 ES6 node_modules 转换为 ES5):
const babelEnv = {
targets: {
chrome: "41", // For googlebot
},
}
/** Keep track of all conflicting dependencies here */
const nonES5Deps = ["qs", "querystring", "query-string", "decko"]
module.exports = function(api) {
api.cache(true)
return {
exclude: [],
include: (path) => {
if (nonES5Deps.some((p) => path.match(p))) {
return true
}
if (path.match(/node_modules/)) return false
return true
},
presets: [
["@babel/preset-env", babelEnv],
"@babel/preset-react",
"@babel/preset-typescript",
],
plugins: [
"@babel/plugin-syntax-dynamic-import",
"@babel/plugin-transform-arrow-functions",
[
"@babel/plugin-proposal-decorators",
{
legacy: true,
},
],
[
"@babel/plugin-proposal-class-properties",
{
loose: true,
},
],
],
}
}
那么这里发生了什么?当问题仅出现在爬虫中时,如何调试问题?
解决方案
我在朋友的帮助下发现了这个问题。看来 Googlebot(和其他爬虫)不支持该AudioContext
对象,因此undefined is not a function
.
修改我的代码以检查它并在它不存在时安全地禁用功能已经解决了问题。
推荐阅读
- ios - Mantle:忽略数组内嵌套字典中的键
- idris - 在这种情况下,为什么 rewrite 不会改变表达式的类型?
- three.js - 第一人称游戏 Three.js - 控制 - 移动相机
- database - 设置 datestyle = 'SQL, DMY' 失败
- c# - 在 xamrin 表单上复制工具栏
- javascript - 如何在另一个函数中调用reduce函数
- c# - 为什么将 SelectedItem 作为自定义对象传递到新页面后没有内容显示?
- javascript - 使用 JavaScript 在后台播放 Spotify 客户端上的 Spotify 歌曲
- package-managers - LXC - 无法从 linux 容器访问包管理器
- javascript - 正文中的 HTML 代码