首页 > 解决方案 > Webpack 构建不适用于爬虫

问题描述

我的构建在浏览器中运行良好(甚至是旧版 v40 < chrome)。它不适用于我迄今为止尝试过的任何爬虫。奇怪的是,同样的错误只发生在爬虫(例如 googlebot)中,这使得查明问题变得异常困难。

我试过了

这是 googlebot 看到的错误:

在此处输入图像描述

您可以在https://wavedistrict.com上自行测试

网络包配置:

const { resolve } = require("path")
const ForkTsCheckerWebpackPlugin = require("fork-ts-checker-webpack-plugin")
const CleanWebpackPlugin = require("clean-webpack-plugin")
const CopyWebpackPlugin = require("copy-webpack-plugin")
const HtmlWebpackPlugin = require("html-webpack-plugin")
const HtmlWebpackInlineSourcePlugin = require("html-webpack-inline-source-plugin")
const MiniCssExtractPlugin = require("mini-css-extract-plugin")
const OptimizeCssAssetsWebpackPlugin = require("optimize-css-assets-webpack-plugin")
const WebpackPwaManifest = require("webpack-pwa-manifest")
const webpackMerge = require("webpack-merge")
const Visualizer = require("webpack-visualizer-plugin")

const isProduction = process.env.NODE_ENV === "production"

/**
 * Variable for the project root.
 * Change this when moving the configuration files
 */
const projectRoot = resolve(__dirname)
const sourceFolder = resolve(projectRoot, "src")
const tsFolder = resolve(sourceFolder, "ts")
const buildFolder = resolve(projectRoot, "build")
const publicFolder = resolve(projectRoot, "public")
const htmlTemplateFile = resolve(publicFolder, "index.html")
const tsconfigPath = resolve(projectRoot, "tsconfig.json")
const tslintPath = resolve(projectRoot, "tslint.json")

const tsLoader = {
  loader: "ts-loader",
  options: {
    compilerOptions: {
      module: "esnext",
      target: "es5",
      allowSyntheticDefaultImports: true,
    },
    transpileOnly: true,
    configFile: tsconfigPath,
    allowTsInNodeModules: true,
  },
}

const babelLoader = {
  loader: "babel-loader",
}

const workerRule = {
  test: /\.worker\.ts$/,
  use: {
    loader: "worker-loader",
  },
}

const babelRule = {
  test: /\.(js|ts|tsx)$/,
  use: [babelLoader],
}

const sassRule = {
  test: /\.scss$/,
  use: [
    isProduction
      ? MiniCssExtractPlugin.loader
      : {
          loader: "style-loader",
          options: {
            singleton: true,
          },
        },
    { loader: "css-loader" },
    {
      loader: "sass-loader",
      options: {
        data: "@import './ts/modules/core/styles/_.scss';",
        includePaths: [sourceFolder],
      },
    },
  ],
}

/** @type {import('webpack').Configuration} */
const baseConfig = {
  context: projectRoot,

  entry: [
    "babel-polyfill",
    "url-search-params-polyfill",
    resolve(tsFolder, "init"),
  ],

  output: {
    filename: "js/[name].js",
    path: buildFolder,
    publicPath: "/",
  },

  module: {
    rules: [workerRule, babelRule, sassRule],
  },

  resolve: {
    modules: ["node_modules"],
    extensions: [".js", ".ts", ".tsx", ".scss"],
    alias: {
      modules: resolve(tsFolder, "modules"),
      common: resolve(tsFolder, "common"),
    },
    mainFields: ["jsnext:main", "module", "main"],
  },

  plugins: [
    new CopyWebpackPlugin([
      {
        from: publicFolder,
        ignore: [htmlTemplateFile],
      },
    ]),
    new CleanWebpackPlugin(buildFolder, { root: projectRoot, verbose: false }),
    /**new ForkTsCheckerWebpackPlugin({
      tslint: tslintPath,
      tsconfig: tsconfigPath,
    }),**/
  ],

  stats: {
    children: false,
    entrypoints: false,
    modules: false,
  },
}

if (process.argv.includes("--stats")) {
  if (baseConfig.plugins) {
    baseConfig.plugins.push(new Visualizer())
  }
}

const devConfig = webpackMerge(baseConfig, {
  mode: "development",
  plugins: [
    new HtmlWebpackPlugin({
      template: htmlTemplateFile,
      chunksSortMode: "dependency",
    }),
  ],
  devtool: "inline-source-map",
  devServer: {
    hot: false,
    historyApiFallback: true,
  },
})

const prodConfig = webpackMerge(baseConfig, {
  mode: "production",

  optimization: {
    minimize: true,
    nodeEnv: "production",
  },

  plugins: [
    new WebpackPwaManifest({
      name: "WaveDistrict",
      short_name: "WaveDistrict",
      description: "",
      background_color: "#091F35",
      theme_color: "#00ba8c",
      orientation: "any",
      icons: [
        {
          src: resolve(publicFolder, "img/logo.svg"),
          sizes: [48, 72, 96, 128, 144, 192, 256, 512],
          destination: "icons",
        },
        {
          src: resolve(publicFolder, "img/logo.png"),
          sizes: [48, 72, 96, 128, 144, 192, 256, 512],
          destination: "icons",
        },
      ],
    }),

    new MiniCssExtractPlugin({
      filename: "css/[name].css",
    }),

    new OptimizeCssAssetsWebpackPlugin(),

    new HtmlWebpackPlugin({
      template: htmlTemplateFile,
      minify: {
        removeComments: true,
        collapseWhitespace: true,
        removeRedundantAttributes: true,
        useShortDoctype: true,
        removeEmptyAttributes: true,
        removeStyleLinkTypeAttributes: true,
        keepClosingSlash: true,
        minifyJS: true,
        minifyCSS: true,
        minifyURLs: true,
      },
      inject: true,
    }),

    new HtmlWebpackInlineSourcePlugin(),
  ],

  performance: {
    maxAssetSize: 500000,
  },

  devtool: "source-map",
})

module.exports = isProduction ? prodConfig : devConfig

Babel 配置(需要将 ES6 node_modules 转换为 ES5):

const babelEnv = {
  targets: {
    chrome: "41", // For googlebot
  },
}

/** Keep track of all conflicting dependencies here */
const nonES5Deps = ["qs", "querystring", "query-string", "decko"]

module.exports = function(api) {
  api.cache(true)

  return {
    exclude: [],
    include: (path) => {
      if (nonES5Deps.some((p) => path.match(p))) {
        return true
      }
      if (path.match(/node_modules/)) return false

      return true
    },
    presets: [
      ["@babel/preset-env", babelEnv],
      "@babel/preset-react",
      "@babel/preset-typescript",
    ],
    plugins: [
      "@babel/plugin-syntax-dynamic-import",
      "@babel/plugin-transform-arrow-functions",
      [
        "@babel/plugin-proposal-decorators",
        {
          legacy: true,
        },
      ],
      [
        "@babel/plugin-proposal-class-properties",
        {
          loose: true,
        },
      ],
    ],
  }
}

那么这里发生了什么?当问题仅出现在爬虫中时,如何调试问题?

标签: javascriptwebpackbabeljs

解决方案


我在朋友的帮助下发现了这个问题。看来 Googlebot(和其他爬虫)不支持该AudioContext对象,因此undefined is not a function.

修改我的代码以检查它并在它不存在时安全地禁用功能已经解决了问题。


推荐阅读