mysql - 需要将数据从一个表加载到另一个表
问题描述
初始行数据在下表中 -
create table offer_row_data (
customer_key char(20),
offer1_id char(20),
offer1_cd char(20),
offer1_brand_nm char(20),
offer2_id char(20),
offer2_cd char(20),
offer2_brand_nm char(20),
offer3_id char(20),
offer3_cd char(20),
offer3_brand_nm char(20),
offer4_id char(20),
offer4_cd char(20),
offer4_brand_nm char(20)
);
我需要对其进行转换并加载到下表中...
create table offer_data (
offer_key INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
offer_id char(20) NOT NULL,
offer_cd char(20) NOT NULL,
offer_brand_nm char(20));
这是一些随机数据-
INSERT INTO offer_row_data VALUES ('1','offer_id_1a','offe3_cd_1a','offer_nm_1a','offer_id_1b','offe3_cd_1b','offer_nm_1b','offer_id_1c','offe3_cd_1c','offer_nm_1c','offer_id_1d','offe3_cd_1d','offer_nm_1d');
INSERT INTO offer_row_data VALUES ('2','offer_id_2a','offe3_cd_2a','offer_nm_2a','offer_id_2b','offe3_cd_2b','offer_nm_2b','offer_id_2c','offe3_cd_2c','offer_nm_2c','offer_id_2d','offe3_cd_2d','offer_nm_2d');
INSERT INTO offer_row_data VALUES ('3','offer_id_3a','offe3_cd_3a','offer_nm_3a','offer_id_3b','offe3_cd_3b','offer_nm_3b','offer_id_3c','offe3_cd_3c','offer_nm_3c','offer_id_3d','offe3_cd_3d','offer_nm_3d');
这是我的解决方案 -
INSERT INTO offer_data (offer_id,offer_cd,offer_brand_nm)
(SELECT distinct offer1_id, offer1_cd, offer1_brand_nm FROM offer_row_data)
UNION
(SELECT distinct offer2_id, offer2_cd, offer2_brand_nm FROM offer_row_data)
UNION
(SELECT distinct offer3_id, offer3_cd, offer3_brand_nm FROM offer_row_data)
UNION
(SELECT distinct offer4_id, offer4_cd, offer4_brand_nm FROM offer_row_data)
因为我有 2500 万个数据集,所以这将是一个性能负担,并期待一个更有效的解决方案
例子:
输入:
+--------------+-------------+-------------+-----------------+-------------+-------------+-----------------+-------------+-------------+-----------------+-------------+-------------+-----------------+
| customer_key | offer1_id | offer1_cd | offer1_brand_nm | offer2_id | offer2_cd | offer2_brand_nm | offer3_id | offer3_cd | offer3_brand_nm | offer4_id | offer4_cd | offer4_brand_nm |
+--------------+-------------+-------------+-----------------+-------------+-------------+-----------------+-------------+-------------+-----------------+-------------+-------------+-----------------+
| 1 | offer_id_1a | offe3_cd_1a | offer_nm_1a | offer_id_1b | offe3_cd_1b | offer_nm_1b | offer_id_1c | offe3_cd_1c | offer_nm_1c | offer_id_1d | offe3_cd_1d | offer_nm_1d |
| 2 | offer_id_2a | offe3_cd_2a | offer_nm_2a | offer_id_2b | offe3_cd_2b | offer_nm_2b | offer_id_2c | offe3_cd_2c | offer_nm_2c | offer_id_2d | offe3_cd_2d | offer_nm_2d |
| 3 | offer_id_3a | offe3_cd_3a | offer_nm_3a | offer_id_3b | offe3_cd_3b | offer_nm_3b | offer_id_3c | offe3_cd_3c | offer_nm_3c | offer_id_3d | offe3_cd_3d | offer_nm_3d |
+--------------+-------------+-------------+-----------------+-------------+-------------+-----------------+-------------+-------------+-----------------+-------------+-------------+-----------------+
预期输出:
+-----------+-------------+-------------+----------------+
| offer_key | offer_id | offer_cd | offer_brand_nm |
+-----------+-------------+-------------+----------------+
| 1 | offer_id_1a | offe3_cd_1a | offer_nm_1a |
| 2 | offer_id_2a | offe3_cd_2a | offer_nm_2a |
| 3 | offer_id_3a | offe3_cd_3a | offer_nm_3a |
| 4 | offer_id_1b | offe3_cd_1b | offer_nm_1b |
| 5 | offer_id_2b | offe3_cd_2b | offer_nm_2b |
| 6 | offer_id_3b | offe3_cd_3b | offer_nm_3b |
| 7 | offer_id_1c | offe3_cd_1c | offer_nm_1c |
| 8 | offer_id_2c | offe3_cd_2c | offer_nm_2c |
| 9 | offer_id_3c | offe3_cd_3c | offer_nm_3c |
| 10 | offer_id_1d | offe3_cd_1d | offer_nm_1d |
| 11 | offer_id_2d | offe3_cd_2d | offer_nm_2d |
| 12 | offer_id_3d | offe3_cd_3d | offer_nm_3d |
+-----------+-------------+-------------+----------------+
解决方案
我会选择这个简单并且可能是最快的,如果您不使用 MySQL 8.0 / MariaDB 10.2(具有 CTE),它可以工作:
INSERT INTO offer_data (offer_id,offer_cd,offer_brand_nm)
SELECT offer1_id, offer1_cd, offer1_brand_nm
FROM wide_table;
INSERT INTO offer_data (offer_id,offer_cd,offer_brand_nm)
SELECT offer2_id, offer2_cd, offer2_brand_nm
FROM wide_table;
INSERT ... 3...
INSERT ... 4...
如果行中有“最多 4 个”优惠,我建议您将其添加到SELECT
#2:
WHERE offer2_id IS NOT NULL
OR offer2_cd IS NOT NULL
OR offer2_brand_nm IS NOT NULL
(并且对其他选择做类似的事情。)
DISTINCT
会减慢速度,但如果需要,请执行此操作。或者,使用INSERT IGNORE
并准备好某种UNIQUE
密钥来捕获重复。
您可能需要重新考虑是否需要AUTO_INCREMENT
在新桌子上放置一个。也许新列之一(或组合)可能是“自然” PRIMARY KEY
?
这也是检查数据类型的好时机。也许您不需要 8-byte BIGINT
,但可以使用 4-byteINT
或 3-byte MEDIUMINT
。char(20)
列真的是固定长度吗?他们是utf8吗? VARCHAR(20)
具有合适的空间和性能CHARACTER SET
可能会明显更好。
装好桌子后,扔掉旧桌子。将数组分布在多个列中是不好的做法。
推荐阅读
- ios - SwiftUI:如何“滑动删除”嵌套 ForEach 中的元素(分组)
- javascript - 流星上的时间线不渲染
- excel - 在 excel 公式中使用数据库连接 - 没有 VBA
- java - 即使变量更新,whlie 循环也不会退出
- python-3.x - 如何找到VGG16预测的父类?
- html - ngx-bootstrap bs-sortable 在将菜单项拖动到可排序部分时显示先前拖动的项目
- visual-studio-code - VS Code - 在目录中的所有仅 c++ (.cpp) 和标头 (.h) 文件中搜索文本
- elasticsearch - 我可以在 Elasticsearch 7.x 中从 get geo_point 存储 geo_shape 吗?
- javascript - 如何提取页面的实际href,在检查该页面时显示为#
- command-line - 使用命令行或 WMI 查询(无 powershell)获取分配给 AD 中安全组的打印机列表