PgSQL,表基础探查脚本
表情况
表字段信息
SELECT attname FROM pg_attribute
WHERE attrelid = 'spfjbasepop.b01_gjjgrzhxx'::regclass -- 表名
AND attnum > 0 LIMIT 1;
表数据量
-- 表数据量
SELECT COUNT(1)
FROM "spfjbasepop".b01_gjjgrzhxx ;
-- 表去重数据量
SELECT DISTINCT COUNT(1)
FROM "spfjbasepop".b01_gjjgrzhxx ;
数据时间访问统计
-- 数据最晚交易日期:
SELECT MAX(zxjysj)
FROM"spfjbasepop".b01_gjjgrzhxx
-- 数据最早交易日期:
SELECT MIN(zxjysj) FROM (
SELECT * FROM "spfjbasepop".b01_gjjgrzhxx
WHERE
zxjysj > '1900-01-01 00:00:00.0'
);
字段情况
字段基础信息
-- 查询最小值/最大值/各分点/空值率/去重条数:
SELECT
'(dwb_source_table) ', -- 字段名
MIN(dwb_source_table) AS min_value,-- 最小值
PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY dwb_source_table) AS value1_4, -- 25%
PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY dwb_source_table) AS value1_2, -- 50%
PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY dwb_source_table) AS value3_4, -- 75%
MAX(dwb_source_table) AS max_value, -- 最大值
(SUM(CASE WHEN (TRIM(dwb_source_table::VARCHAR) ='' OR dwb_source_table IS NULL ) THEN 1 ELSE 0 END)/COUNT(*)::numeric) AS null_percent, -- 空值率
COUNT(DISTINCT dwb_source_table) AS distincit_num, -- 去重记录数
AVG(CASE WHEN pg_typeof(dwb_source_table)::VARCHAR IN ('numeric','integer','float') THEN dwb_source_table ELSE NULL END) AS average_value -- 平均值
FROM
"spfjbasepop".b01_gjjgrzhxx;
自动化探查脚本
|
A |
B |
C |
D |
E |
F |
G |
H |
I |
1 |
库中文名 |
库英文名 |
表中文名 |
表英文名 |
字段中文名 |
字段英文名 |
字段类型 |
单字段探查脚本 |
全表探查脚本 |
2 |
人口库基础层 |
spfjbasepop |
公积金个人账户信息 |
b01_gjjgrzhxx |
个人账户编号 |
grzhbh |
string |
=CONCAT("SELECT '",F2,"',","MIN(",F2,")::VARCHAR AS min_value",",PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY ",F2,")::VARCHAR AS value1_4",",PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY ",F2," )::VARCHAR AS value1_2",",PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY ",F2," )::VARCHAR AS value3_4",",MAX(",F2,")::VARCHAR AS max_value",",AVG(CASE WHEN pg_typeof(",F2,")::VARCHAR IN ('numeric','integer','float') THEN ",F2,"::VARCHAR ELSE NULL END)::NUMBER AS average_value",",(SUM(CASE WHEN (TRIM(",F2,"::VARCHAR) ='' OR ",F2," IS NULL) THEN 1 ELSE 0 END)/COUNT(*)::numeric) AS null_percent",",COUNT(DISTINCT ",F2,")AS distincit_num"," from ",B2,".",D2) |
=TEXTJOIN(" UNION ALL ",TRUE,H:H) |
-- 单字段探查脚本
=CONCAT("SELECT '",F2,"',","MIN(",F2,")::VARCHAR AS min_value",",PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY ",F2,")::VARCHAR AS value1_4",",PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY ",F2," )::VARCHAR AS value1_2",",PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY ",F2," )::VARCHAR AS value3_4",",MAX(",F2,")::VARCHAR AS max_value",",AVG(CASE WHEN pg_typeof(",F2,")::VARCHAR IN ('numeric','integer','float') THEN ",F2,"::VARCHAR ELSE NULL END)::NUMBER AS average_value",",(SUM(CASE WHEN (TRIM(",F2,"::VARCHAR) ='' OR ",F2," IS NULL) THEN 1 ELSE 0 END)/COUNT(*)::numeric) AS null_percent",",COUNT(DISTINCT ",F2,")AS distincit_num"," from ",B2,".",D2)
-- 全表探查脚本
=TEXTJOIN(" UNION ALL ",TRUE,H:H)
维度字段频次统计
SELECT
grzhztdm,
COUNT(1) AS frequence
FROM
"spfjbasepop".b01_gjjgrzhxx
GROUP BY
grzhztdm
ORDER BY
grzhztdm;
时间字段按年按月统计
-- 时间维度按年的频次统计:
SELECT
time2year,
COUNT(1)
FROM
(
SELECT
to_char (khrq, 'yyyy') AS time2year -- 时间转为年份
FROM
"spfjbasepop".b01_gjjgrzhxx
)
GROUP BY
time2year
ORDER BY
time2year;
-- 时间维度按月的频次统计:
SELECT
time2month,
COUNT(1)
FROM
(
SELECT
to_char (khrq, 'yyyy-mm') AS time2month -- 时间转为月份
FROM
"spfjbasepop".b01_gjjgrzhxx
)
GROUP BY
time2month
ORDER BY
time2month;
度量字段分区间分段统计
-- 度量值分区间分段统计:
SELECT
range_min,
range_max,
COUNT(*)
FROM (
SELECT
min_value + (max_value - min_value) *seq/part AS range_min, -- 最小区间
min_value + (max_value - min_value) *(seq+1)/part AS range_max -- 最大区间
FROM
LATERAL (SELECT 0 AS min_value) AS min_value, -- **分段的最小值**
LATERAL (SELECT 10000 AS max_value) AS max_value, -- **分段的最大值**
LATERAL (SELECT 500 AS part) AS part, -- 分段数
LATERAL(SELECT generate_series(0,part-1) AS seq)AS seq -- 递增序列
)AS ranges
JOIN "spfjbasepop".b01_gjjgrzhxx -- **表 名**
ON gryjce >= ranges.range_min -- **字段名**
AND gryjce < ranges.range_max -- **字段名**
GROUP BY range_min, range_max
ORDER BY range_min;
自动化探查脚本
|
A |
B |
C |
D |
E |
F |
G |
H |
I |
J |
K |
1 |
库中文名 |
库英文名 |
表中文名 |
表英文名 |
字段中文名 |
字段英文名 |
字段类型 |
单字段探查脚本 |
全表探查脚本 |
单字段维度/度量探查脚本 |
全表维度/度量探查脚本 |
1 |
库中文名 |
库英文名 |
表中文名 |
表英文名 |
字段中文名 |
字段英文名 |
字段类型 |
单字段探查脚本 |
全表探查脚本 |
|
|
2 |
人口库基础层 |
spfjbasepop |
公积金个人账户信息 |
b01_gjjgrzhxx |
个人账户编号 |
grzhbh |
string |
见字段情况章节 |
见字段情况章节 |
见下方自动脚本 |
见下方自动脚本 |
-- 单字段维度/度量探查脚本
= @IFS(OR(G2="string"),CONCAT("SELECT ",F2," AS ",D2,"_",F2,",COUNT(1) AS ",D2,"_",F2,"_frequence FROM ",B2,".",D2," GROUP BY ",F2," ORDER BY ",F2,";
","SELECT frequence AS ",D2,"_",F2,"_frequence,COUNT(1) AS ",D2,"_",F2,"_frequence_frequence from (","SELECT ",F2,",COUNT(1) AS frequence FROM ",B2,".",D2," GROUP BY ",F2," ORDER BY ",F2,")GROUP BY frequence;"),OR(G2="date"),CONCAT("SELECT time2year AS ",D2,"_",F2,"2year, COUNT(1) AS ",D2,"_",F2,"2year_frequence FROM (SELECT to_char(",F2,",'yyyy') AS time2year FROM ",B2,".",D2,") GROUP BY time2year ORDER BY time2year;","SELECT time2month AS ",D2,"_",F2,"2month, COUNT(1) AS ",D2,"_",F2,"2month_frequence FROM (SELECT to_char(",F2,",'yyyy-mm') AS time2month FROM ",B2,".",D2,") GROUP BY time2month ORDER BY time2month;"),OR(G2="number"),CONCAT("SELECT range_min AS ",D2,"_",F2,"_range_min, range_max AS ",D2,"_",F2,"_range_max,COUNT(*) AS ",D2,"_",F2,"_frequence FROM(SELECT min_value+(max_value-min_value)*seq/part AS range_min,min_value+(max_value-min_value)*(seq+1)/part AS range_max FROM LATERAL (SELECT min(",F2,") AS min_value FROM ",B2,".",D2,") AS min_value,LATERAL(SELECT max(",F2,") AS max_value FROM ",B2,".",D2,") AS max_value,LATERAL (SELECT 500 AS part) AS part,LATERAL(SELECT generate_series(0,part-1) AS seq)AS seq ) AS ranges JOIN ",B2,".",D2," ON ",F2,">=ranges.range_min AND ",F2,"< ranges.range_max GROUP BY range_min,range_max ORDER BY range_min;
","SELECT range_min AS ",D2,"_",F2,"_range_min, range_max AS ",D2,"_",F2,"_range_max,COUNT(*) AS ",D2,"_",F2,"_frequence FROM(SELECT min_value+(max_value-min_value)*seq/part AS range_min,min_value+(max_value-min_value)*(seq+1)/part AS range_max FROM LATERAL (SELECT PERCENTILE_DISC(0.20) WITHIN GROUP (ORDER BY ",F2,") AS min_value FROM ",B2,".",D2,") AS min_value,LATERAL(SELECT PERCENTILE_DISC(0.80) WITHIN GROUP (ORDER BY ",F2,") AS max_value FROM ",B2,".",D2,") AS max_value,LATERAL (SELECT 500 AS part) AS part,LATERAL(SELECT generate_series(0,part-1) AS seq)AS seq ) AS ranges JOIN ",B2,".",D2," ON ",F2,">=ranges.range_min AND ",F2,"< ranges.range_max GROUP BY range_min,range_max ORDER BY range_min;"))
-- 全表维度/度量探查脚本
= =TEXTJOIN("
",TRUE,R:R)