首页 > 解决方案 > SQL中的正则表达式提取函数实现?

问题描述

我必须查询 github 内容以找到特定函数 add 的实现。我使用以下 SQL 查询提取的内容:

(function(f){if(typeof exports==="object"&&typeof module!=="undefined")

{module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.ngContextmenu = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(_dereq_,module,exports){
    'use strict';

    angular.module('io.dennis.contextmenu')
    .directive('contextmenuContainer', Container);

    function Container() {
      return {
        scope: {
          contextmenu: '=contextmenuContainer'
        },
        restrict: 'A',
        controller: ['$scope', ContainerCtrl]
      };
    .
    .
    .
    .
      function add(entry) {
        if (!isSelected(entry)) {
          selected.unshift(entry);
          toggleSelected(entry.element, true);
        }
        pub.item = selected[0].item;
      }

      function remove(entry) {
        var index = selected.indexOf(entry);
        if (index > -1) {
          selected.splice(index, 1);
        }
        toggleSelected(entry.element, false);
      }

     .
    .
    .
      function toggle(entry, multi) {
        multi = multi || false;
        var isEntrySelected = isSelected(entry);

        if (isEntrySelected) {
          remove(entry);

        } else {

          if (!multi) { clear(); }
          add(entry);
        }
      }

      function clear() {
        angular.forEach(selected, function(entry) {
          toggleSelected(entry.element, false);
        });
        selected = [];
      }

      function getListOfIds(limit, path) {
        path = path || 'item.id';
        limit = Math.min(limit || selected.length, selected.length);
        var list = selected.slice(0, limit).map(function(entry) {
          return safeGet(entry, path, '');
        });
        var asString = list.join(', ');
        return (limit < selected.length) ? asString + '..' : asString;
      }

      function toggleSelected(element, forceState) {
        element.toggleClass('selected', forceState);
      }

      function safeGet(obj, path, _default) {

        if (!obj) {
          return _default;
        }

        if (!path || !String(path).length) {
          return obj;
        }

        var keys = (angular.isArray(path)) ? path : path.split('.');
        var next = keys.shift();
        return get(obj[next], keys, _default);
      }
    }

    },{}]},{},[4])(4)
    });

有更多这样的行。我正在使用的查询是:

SELECT
  content,
  id
FROM
  [bigquery-public-data:github_repos.sample_contents]
  WHERE
  content CONTAINS 'function add('
  AND sample_path LIKE '%.js'
  LIMIT  40;

如何修改上述查询,以便我只能提取具有输出的内容以实现函数“add”,即

function add(entry) {
    if (!isSelected(entry)) {
      selected.unshift(entry);
      toggleSelected(entry.element, true);
    }
    pub.item = selected[0].item;
  }

提前致谢!

标签: sqlregexgithubgoogle-bigquery

解决方案


这是一个过度简化的 JavaScript UDF 版本,它甚至没有尝试匹配大括号,但我相信你明白了。我在 JavaScript 代码中留下了注释,以便您完成非平凡的查找函数结尾的工作。享受!

#standardSQL
CREATE TEMP FUNCTION
  filter_add(code STRING)
  RETURNS STRING
  LANGUAGE js AS """
  // find start, TODO: there may be multiple function add() in single file, handle in a loop
  var start = code.indexOf('function add(');
  // TODO: count open braces and close braces to decide where function definition ends.
  // Note that braces may be in 1) single-line comment; 2) multi-line comments; 3) part of string or char literal
  var end = code.indexOf('}', start);
  return code.substr(start, end-start+1);
  """ ;
SELECT
  filter_add(content), id
FROM
  `bigquery-public-data`.github_repos.sample_contents
WHERE
  STRPOS(content, 'function add(') != 0
  AND sample_path LIKE '%.js'
LIMIT 10;

推荐阅读