首页 > 解决方案 > 如何使用 HASH 对象进行 1=1 左连接

问题描述

我所拥有和想要改变的

proc sql;
create table indirect_costs as
select a.*, b.*, c.*
from dpd a
    left join efforts&costprod. b
on a.fnum = b.client_ref 
    left join (select * from costs where _name_= 'avg_effort_cost') c
on 1=1;
quit;

第一部分很简单,正是 1=1 困扰着我,因为实际上没有可以使用的密钥,或者至少没有我理解的密钥。

data indirect_costs;
    if 0 then set efforts&costprod. dpd costs(where _name_= 'avg_effort_cost');
    if _n_ eq 1 then do;
        dc="efforts&costprod.";
        declare hash h_cf(dataset:dc, hashexp:15, multidata:'Y');
        h_cf.defineKey('client_ref');
        h_cf.defineData(all:'Y');
        h_cf.defineDone(); 
        df="costs";
        declare hash h_df(dataset:df, hashexp:15, multidata:'Y');
        h_df.defineKey(all:'Y');
        h_df.defineData(all:'Y');
        h_df.defineDone(); 
        end;
    set dpd;
        rc_cf = h_cf.find(key:fnum);
        rc_df=h_df.find();
        do while (rc_cf=0 and rc_df=0);
        output;
        rc_cf =h_cf.find_next();
        rc_df=h_df.find_next();
        call missing(of _all_);
        end;
run;

任何帮助或指导将不胜感激。

我想要什么:让这个 sql 加入一个散列连接。

是什么阻止了我:语句 1=1。

我需要什么:语句 1=1 的哈希等效项。

标签: hashsas

解决方案


考虑这三个示例表。YMMV

data vendors(label='vetted vendors');
  do vendorid = 1 to 12;
    output;
  end;
run;

data projects(label='may reference unvetted vendors');
  call streaminit(2021);
  do projectid = 11 to 13;
    do vendorseq = 1 to rand('integer', 1, 6);
      /* mock up, no project has vendor 3, 5, or 7 */
      do until (vendorid not in (3 5 7));
        vendorid = rand('integer', 1, 12);
      end;
      output;
    end;
  end;
run;

data costs_across;
  call streaminit (2021);
  do method = 1 to 5;
    array c cost1-cost7;
    do over c;
      c = rand('integer', 10,50);
    end;
    output;
  end;
run;

proc transpose data=costs_across out=costs;
  by method;
  var cost:;
run;

内在和 1:1

data threeway;

  if 0 then set vendors projects costs;
 
  declare hash vendors (dataset:'vendors', ordered:'a');
  vendors.defineKey('vendorid');
  vendors.defineData(all:'yes');
  vendors.defineDone();

  declare hash projects (dataset:'projects', ordered:'a');
  projects.defineKey('projectid', 'vendorseq');
  projects.defineData(all:'yes');
  projects.defineDone();

  declare hash costs (dataset: 'costs(where=(_name_="cost3"))');
  costs.defineKey('method','_name_');
  costs.defineData(all:'yes');
  costs.defineDone();

  declare hiter p_iter('projects');
  declare hiter c_iter('costs');

  do while(p_iter.next() = 0);
    if vendors.check() = 0 then do;  /* same as criteria projects.vendorid=vendors.vendorid */
      matchcount+1;
      put 'INFO: ' matchcount= projectid= vendorseq= vendorid=;
      do while(c_iter.next() = 0);  /* same as criteria 1=1 */
        output;
      end;
    end;
  end;

  stop;
run;

LEFT JOIN 模仿需要更多的迭代


data threeway_left;
  if 0 then set vendors projects costs;
 
  declare hash vendors (dataset:'vendors', ordered:'a');
  vendors.defineKey('vendorid');
  vendors.defineData(all:'yes');
  vendors.defineDone();

  declare hash projects (dataset:'projects', ordered:'a', multidata:'y');
  projects.defineKey('vendorid');
  projects.defineData(all:'yes');
  projects.defineDone();

  declare hash costs (dataset: 'costs(where=(_name_="cost3"))', ordered:'a');
  costs.defineKey('method','_name_');
  costs.defineData(all:'yes');
  costs.defineDone();

  declare hiter v_iter('vendors');
  declare hiter c_iter('costs');

  call missing(of _all_);
  do while (v_iter.next() = 0);  /* full iteration mimics left of join */

    rcp = projects.find();

    if rcp ne 0 then do;
      /* no match in right, all missing */
      do while(c_iter.next() = 0);  /* same as criteria 1=1 */
        output;
      end;
    end;
    else do;
      /* iterate over the right matches */
      do until (rcv = 0);
        /* iterate over the costs */
        do while(c_iter.next() = 0);  /* same as criteria 1=1 */
          output;
        end;

        projects.has_next(result:rcv);
        if rcv then projects.find_next();
      end;
    end;

    call missing(of _all_);
  end;

  stop;
  drop rcv rcp;
run;

推荐阅读