MPP 환경의 hash partition table에서
잘못된 hash key 선정으로 인해 skew가 발생하는 현황을 조회하려면
다음 쿼리로 조회할 수 있다.
with COL as
(
select
tabschema
, tabname
, colname
, colno
, colcard
, partkeyseq
, remarks
from syscat.columns
where 1=1
and partkeyseq > 0
)
select TABINF.tabschema, TABINF.tabname
, max(COL.remarks) as remarks
, substr(xmlcast(xmlgroup(', '||COL.colname as colnm order by COL.partkeyseq) as varchar(128)), 2) as "파티션 키 컬럼"
, max(TBL.remarks) as "컬럼 설명"
--, max(COL.partkeyseq) "파티션 키 순번"
, max(TBL.card) as "테이블 건수"
, max(COL.colcard) as "파티션 키 컬럼 distinct 개수"
, stddev((data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)/1024) as "표준편차_MB"
,
stddev(data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)
/
sum(data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)
* 1024 as "사이즈 고려 표준편차지수"
, sum((data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)/1024) as size_mb
,
max(decode(dbpartitionnum, 0,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node0
, max(decode(dbpartitionnum, 1,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node1
, max(decode(dbpartitionnum, 2,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node2
, max(decode(dbpartitionnum, 3,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node3
, max(decode(dbpartitionnum, 4,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node4
, max(decode(dbpartitionnum, 5,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node5
, max(decode(dbpartitionnum, 6,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node6
, max(decode(dbpartitionnum, 7,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node7
, max(decode(dbpartitionnum, 8,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node8
, max(decode(dbpartitionnum, 9,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node9
, max(decode(dbpartitionnum, 10,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node10
, max(decode(dbpartitionnum, 11,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node11
, max(decode(dbpartitionnum, 12,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node12
, max(decode(dbpartitionnum, 13,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node13
, max(decode(dbpartitionnum, 14,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node14
, max(decode(dbpartitionnum, 15,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node15
, max(decode(dbpartitionnum, 16,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node16
, max(decode(dbpartitionnum, 17,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node17
, max(decode(dbpartitionnum, 18,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node18
, max(decode(dbpartitionnum, 19,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node19
, max(decode(dbpartitionnum, 20,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node20
, max(decode(dbpartitionnum, 21,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node21
, max(decode(dbpartitionnum, 22,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node22
, max(decode(dbpartitionnum, 23,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node23
, max(decode(dbpartitionnum, 24,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node24
, max(decode(dbpartitionnum, 25,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node25
, max(decode(dbpartitionnum, 26,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node26
, max(decode(dbpartitionnum, 27,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node27
, max(decode(dbpartitionnum, 28,
data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size))
as node28
, max(TBL.lastused) as "테이블 마지막 사용일"
from SYSIBMADM.ADMINTABINFO TABINF
right join COL on TABINF.tabschema=COL.tabschema and TABINF.tabname=COL.tabname
right join syscat.tables TBL on TABINF.tabschema=TBL.tabschema and TABINF.tabname=TBL.tabname
where TABINF.tabtype='T'
group by TABINF.tabschema, TABINF.tabname
order by TABINF.tabschema, TABINF.tabname
;
hash key를 여러 칼럼으로 구성한 경우에 대해서는 결과가 제대로 안나올 수 있다.
수행 결과는 다음과 같으며
노드 간 데이터 분산 표준편차는 노드별로 고르지 않은 데이터 분포를 나타내는 값으로 크면 클 수록 노드별 분산이 잘 안되어있다는 의미이고
skew로 인해 발생하는 수행 속도 이슈나 용량 이슈는 테이블이 클 수록 영향이 크므로,
테이블의 용량 가중치를 준 '사이즈 대비 표준편차' 를 기준으로 hash key 재지정 대상을 선정한 후
업무적으로 변경이 가능한 지를 판단하면 되겠다.