원문 : http://use-the-index-luke.com/sql/explain-plan/db2/getting-an-execution-plan


DB2의 실행계획을 처음 접하는 사람들은 아스키 아트 형태의 실행계획을 잘 읽지 못한다.

사실 이렇게 보기 좋게 실행계획을 아스키 아트로 만들어준 DB2 개발자들의 노력에 감사하지만
쿼리가 조금만 길어져도 한 화면에 실행계획 트리가 다 나오지 않아, 그 노력이 무색하다.


DB2에서는 db2expln, db2exfmt, SYSTOOLS.EXPLAIN* 세 종류 방식으로 실행계획을 조회해볼 수 있는데
이 중 SYSTOOLS.EXPLAIN*을 사용하여, 비교적 오라클 스타일의 실행계획을 보여주는 View가 있어서 소개한다.


1. 다음 프로시저를 실행하여 EXPLAIN 정보를 담을 SYSTOOLS.EXPLAIN_STREAM 테이블을 생성한다.


db2 "CALL SYSPROC.SYSINSTALLOBJECTS('EXPLAIN', 'C', CAST (NULL AS VARCHAR(128)), CAST (NULL AS VARCHAR(128)))"


2. 다음 프로시저를 생성한다.


db2 -tvm +p <<EOF
-- Copyright (c) 2014-2017, Markus Winand - NO WARRANTY
-- Modifications by Ember Crooks - NO WARRANTY
-- Info & license: http://use-the-index-luke.com/s/last_explained
--
--#SET TERMINATOR ;

CREATE OR REPLACE VIEW last_explained AS
WITH tree(operator_ID, level, path, explain_time, cycle)
AS
(
SELECT 1 operator_id
     , 0 level
     , CAST('001' AS VARCHAR(1000)) path
     , max(explain_time) explain_time
     , 0
  FROM SYSTOOLS.EXPLAIN_OPERATOR O
 WHERE O.EXPLAIN_REQUESTER = SESSION_USER

UNION ALL

SELECT s.source_id
     , level + 1
     , tree.path || '/' || LPAD(CAST(s.source_id AS VARCHAR(3)), 3, '0')  path
     , tree.explain_time
     , POSITION('/' || LPAD(CAST(s.source_id AS VARCHAR(3)), 3, '0')  || '/' IN path USING OCTETS)
  FROM tree
     , SYSTOOLS.EXPLAIN_STREAM S
 WHERE s.target_id    = tree.operator_id
   AND s.explain_time = tree.explain_time
   AND S.Object_Name IS NULL
   AND S.explain_requester = SESSION_USER
   AND tree.cycle = 0
   AND level < 100
)
SELECT *
  FROM (
SELECT "Explain Plan"
  FROM (
SELECT CAST(   LPAD(id,        MAX(LENGTH(id))        OVER(), ' ')
            || ' | '
            || RPAD(operation, MAX(LENGTH(operation)) OVER(), ' ')
            || ' | '
            || LPAD(rows,      MAX(LENGTH(rows))      OVER(), ' ')
            || ' | '
            -- Don't show ActualRows columns if there are no actuals available at all
            || CASE WHEN COUNT(ActualRows) OVER () > 1 -- the heading 'ActualRows' is always present, so "1" means no OTHER values
                    THEN LPAD(ActualRows, MAX(LENGTH(ActualRows)) OVER(), ' ') || ' | '
                    ELSE ''
               END
            || LPAD(cost,      MAX(LENGTH(cost))      OVER(), ' ')
         AS VARCHAR(100)) "Explain Plan"
     , path
  FROM (
SELECT 'ID' ID
     , 'Operation' Operation
     , 'Rows' Rows
     , 'ActualRows' ActualRows
     , 'Cost' Cost
     , '0' Path
  FROM SYSIBM.SYSDUMMY1
-- TODO: UNION ALL yields duplicate. where do they come from?
UNION
SELECT CAST(tree.operator_id as VARCHAR(254)) ID
     , CAST(LPAD(' ', tree.level, ' ')
       || CASE WHEN tree.cycle = 1
               THEN '(cycle) '
               ELSE ''
          END    
       || COALESCE (
             TRIM(O.Operator_Type)
          || COALESCE(' (' || argument || ')', '')
          || ' '
          || COALESCE(S.Object_Name,'')
          , ''
          )
       AS VARCHAR(254)) AS OPERATION
     , COALESCE(CAST(rows AS VARCHAR(254)), '') Rows
     , CAST(ActualRows as VARCHAR(254)) ActualRows -- note: no coalesce
     , COALESCE(CAST(CAST(O.Total_Cost AS BIGINT) AS VARCHAR(254)), '') Cost
     , path
  FROM tree
  LEFT JOIN ( SELECT i.source_id
              , i.target_id
              , CAST(CAST(ROUND(o.stream_count) AS BIGINT) AS VARCHAR(12))
                || ' of '
                || CAST (total_rows AS VARCHAR(12))
                || CASE WHEN total_rows > 0
                         AND ROUND(o.stream_count) <= total_rows THEN
                   ' ('
                   || LPAD(CAST (ROUND(ROUND(o.stream_count)/total_rows*100,2)
                          AS NUMERIC(5,2)), 6, ' ')
                   || '%)'
                   ELSE ''
                   END rows
              , CASE WHEN act.actual_value is not null then
                CAST(CAST(ROUND(act.actual_value) AS BIGINT) AS VARCHAR(12))
                || ' of '
                || CAST (total_rows AS VARCHAR(12))
                || CASE WHEN total_rows > 0 THEN
                   ' ('
                   || LPAD(CAST (ROUND(ROUND(act.actual_value)/total_rows*100,2)
                          AS NUMERIC(5,2)), 6, ' ')
                   || '%)'
                   ELSE NULL
                   END END ActualRows
              , i.object_name
              , i.explain_time
         FROM (SELECT MAX(source_id) source_id
                    , target_id
                    , MIN(CAST(ROUND(stream_count,0) AS BIGINT)) total_rows
                    , CAST(LISTAGG(object_name) AS VARCHAR(50)) object_name
                    , explain_time
                 FROM SYSTOOLS.EXPLAIN_STREAM
                WHERE explain_time = (SELECT MAX(explain_time)
                                        FROM SYSTOOLS.EXPLAIN_OPERATOR
                                       WHERE EXPLAIN_REQUESTER = SESSION_USER
                                     )
                GROUP BY target_id, explain_time
              ) I
         LEFT JOIN SYSTOOLS.EXPLAIN_STREAM O
           ON (    I.target_id=o.source_id
               AND I.explain_time = o.explain_time
               AND O.EXPLAIN_REQUESTER = SESSION_USER
              )
         LEFT JOIN SYSTOOLS.EXPLAIN_ACTUALS act
           ON (    act.operator_id  = i.target_id
               AND act.explain_time = i.explain_time
               AND act.explain_requester = SESSION_USER
               AND act.ACTUAL_TYPE  like 'CARDINALITY%'
              )
       ) s
    ON (    s.target_id    = tree.operator_id
        AND s.explain_time = tree.explain_time
       )
  LEFT JOIN SYSTOOLS.EXPLAIN_OPERATOR O
    ON (    o.operator_id  = tree.operator_id
        AND o.explain_time = tree.explain_time
        AND o.explain_requester = SESSION_USER
       )
  LEFT JOIN (SELECT LISTAGG (CASE argument_type
                             WHEN 'UNIQUE' THEN
                                  CASE WHEN argument_value = 'TRUE'
                                       THEN 'UNIQUE'
                                  ELSE NULL
                                  END
                             WHEN 'TRUNCSRT' THEN
                                  CASE WHEN argument_value = 'TRUE'
                                       THEN 'TOP-N'
                                  ELSE NULL
                                  END  
                             WHEN 'SCANDIR' THEN
                                  CASE WHEN argument_value != 'FORWARD'
                                       THEN argument_value
                                  ELSE NULL
                                  END                    
                             ELSE argument_value    
                             END
                           , ' ') argument
                  , operator_id
                  , explain_time
               FROM SYSTOOLS.EXPLAIN_ARGUMENT EA
              WHERE argument_type IN ('AGGMODE'   -- GRPBY
                                     , 'UNIQUE', 'TRUNCSRT' -- SORT
                                     , 'SCANDIR' -- IXSCAN, TBSCAN
                                     , 'OUTERJN' -- JOINs
                                     )
                AND explain_requester = SESSION_USER
              GROUP BY explain_time, operator_id

            ) A
    ON (    a.operator_id  = tree.operator_id
        AND a.explain_time = tree.explain_time
       )
     ) O
UNION ALL
VALUES ('Explain plan (c) 2014-2017 by Markus Winand - NO WARRANTY - V20171102','Z0')
    ,  ('Modifications by Ember Crooks - NO WARRANTY','Z1')
    ,  ('http://use-the-index-luke.com/s/last_explained','Z2')
    ,  ('', 'A')
    ,  ('', 'Y')
    ,  ('Predicate Information', 'AA')
UNION ALL
SELECT CAST (LPAD(CASE WHEN operator_id = LAG  (operator_id)
                                          OVER (PARTITION BY operator_id
                                                    ORDER BY pred_order
                                               )
                       THEN ''
                       ELSE operator_id || ' - '
                  END
                , MAX(LENGTH(operator_id )+4) OVER()
                , ' ')
             || how_applied
             || ' '
             || predicate_text
          AS VARCHAR(100)) "Predicate Information"
     , 'P' || LPAD(id_order, 5, '0') || pred_order path
  FROM (SELECT CAST(operator_id AS VARCHAR(254)) operator_id
             , LPAD(trim(how_applied)
                  ,  MAX (LENGTH(TRIM(how_applied)))
                    OVER (PARTITION BY operator_id)
                  , ' '
               ) how_applied
               -- next: capped to length 80 to avoid
               -- SQL0445W  Value "..." has been truncated.  SQLSTATE=01004
               -- error when long literal values may appear (space padded!)
             , CAST(substr(predicate_text, 1, 80) AS VARCHAR(80)) predicate_text
             , CASE how_applied WHEN 'START' THEN '1'
                                WHEN 'STOP'  THEN '2'
                                WHEN 'SARG'  THEN '3'
                                ELSE '9'
               END pred_order
             , operator_id id_order
          FROM systools.explain_predicate p
         WHERE explain_time = (SELECT MAX(explain_time)
                                 FROM systools.explain_operator)
       )
)
ORDER BY path
);
EOF



3. 플랜 조회할 쿼리를 explain 뜬다.


db2 "explain plan for select * from sysibm.sysdummy1"



4. 오라클 스타일 플랜을 조회한다.

db2 "select * from last_explained"



Posted by in0de
,

MPP 환경의 hash partition table에서

잘못된 hash key 선정으로 인해 skew가 발생하는 현황을 조회하려면

다음 쿼리로 조회할 수 있다.


with COL as

(
    select
        tabschema
        , tabname
        , colname
        , colno
        , colcard
        , partkeyseq
        , remarks
    from syscat.columns
    where 1=1
    and partkeyseq > 0
)
select TABINF.tabschema, TABINF.tabname
, max(COL.remarks) as remarks
, substr(xmlcast(xmlgroup(', '||COL.colname as colnm order by COL.partkeyseq) as varchar(128)), 2) as "파티션 키 컬럼"
, max(TBL.remarks) as "컬럼 설명"
--, max(COL.partkeyseq) "파티션 키 순번"
, max(TBL.card) as "테이블 건수"
, max(COL.colcard) as "파티션 키 컬럼 distinct 개수"
, stddev((data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)/1024) as "표준편차_MB"
, stddev(data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size) / sum(data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size) * 1024 as "사이즈 고려 표준편차지수"
, sum((data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)/1024) as size_mb
, max(decode(dbpartitionnum, 0, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node0
, max(decode(dbpartitionnum, 1, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node1
, max(decode(dbpartitionnum, 2, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node2
, max(decode(dbpartitionnum, 3, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node3
, max(decode(dbpartitionnum, 4, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node4
, max(decode(dbpartitionnum, 5, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node5
, max(decode(dbpartitionnum, 6, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node6
, max(decode(dbpartitionnum, 7, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node7
, max(decode(dbpartitionnum, 8, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node8
, max(decode(dbpartitionnum, 9, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node9
, max(decode(dbpartitionnum, 10, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node10
, max(decode(dbpartitionnum, 11, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node11
, max(decode(dbpartitionnum, 12, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node12
, max(decode(dbpartitionnum, 13, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node13
, max(decode(dbpartitionnum, 14, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node14
, max(decode(dbpartitionnum, 15, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node15
, max(decode(dbpartitionnum, 16, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node16
, max(decode(dbpartitionnum, 17, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node17
, max(decode(dbpartitionnum, 18, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node18
, max(decode(dbpartitionnum, 19, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node19
, max(decode(dbpartitionnum, 20, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node20
, max(decode(dbpartitionnum, 21, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node21
, max(decode(dbpartitionnum, 22, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node22
, max(decode(dbpartitionnum, 23, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node23
, max(decode(dbpartitionnum, 24, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node24
, max(decode(dbpartitionnum, 25, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node25
, max(decode(dbpartitionnum, 26, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node26
, max(decode(dbpartitionnum, 27, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node27
, max(decode(dbpartitionnum, 28, data_object_p_size+index_object_p_size+long_object_p_size+lob_object_p_size+xml_object_p_size)) as node28
, max(TBL.lastused) as "테이블 마지막 사용일"
from SYSIBMADM.ADMINTABINFO TABINF
right join COL on TABINF.tabschema=COL.tabschema and TABINF.tabname=COL.tabname
right join syscat.tables TBL on TABINF.tabschema=TBL.tabschema and TABINF.tabname=TBL.tabname
where TABINF.tabtype='T'
group by TABINF.tabschema, TABINF.tabname
order by TABINF.tabschema, TABINF.tabname
;


hash key를 여러 칼럼으로 구성한 경우에 대해서는 결과가 제대로 안나올 수 있다.

수행 결과는 다음과 같으며


노드 간 데이터 분산 표준편차는 노드별로 고르지 않은 데이터 분포를 나타내는 값으로 크면 클 수록 노드별 분산이 잘 안되어있다는 의미이고


skew로 인해 발생하는 수행 속도 이슈나 용량 이슈는 테이블이 클 수록 영향이 크므로,

테이블의 용량 가중치를 준 '사이즈 대비 표준편차' 를 기준으로 hash key 재지정 대상을 선정한 후

업무적으로 변경이 가능한 지를 판단하면 되겠다.

Posted by in0de
,

데이터를 export 하여 가령 다른 서버로 전송한 후 sqlloader로 import할 때

특히 네트워크 속도가 좋지 않은 경우

덤프파일을 압축하면 전체적으로 시간을 절감할 수 있다.


데이터를 export할 공간이 넉넉하지 않은 경우에도 도움이 될 수 있다.



데이터 export 수행할 쿼리 파일을 생성한다.

-- in export.sql
SET term on
SET echo off
SET UNDERLINE off
SET HEADSEP off
SET TRIMSPOOL on
SET TRIMOUT on
SET COLSEP ","
SET SQLPROMPT ''
SET PAGESIZE 0
SET LINESIZE 32767
SET LONG 999999999
SET TERMOUT OFF
SET FEEDBACK OFF

SPOOL export.pipe;
alter session set nls_date_format='yyyy-mm-dd hh24:mi:ss';
SELECT

    col1

    ||'@@'|| col2

    ||'@@'|| col3

    ||'@@'|| col4

    ||'@@'||  col5
FROM TBL WHERE 1=1;
SPOOL OFF;
quit;
/

맨 마지막에 quit; / 부분이 없으면, 쿼리를 수행한 후 sqlplus가 종료되지 않고 멍하니 멈춰있게 되므로 유의한다.


sqplus로 데이터 export → pipe 파일 → gzip 으로 direct하여 export와 동시에 압축되도록 한다.

rm -f ./export.pipe

mkfifo ./export.pipe
chmod 770 ./export.pipe
cat ./export.pipe | gzip > ./export.csv.gz &
sqlplus -s ${DBUSER}/${password} @export.sql
rm -f ./export.pipe




반대로, 이 파일을 sqlload할 때는 다음과 같이 수행한다.

-- in import.ctl

LOAD DATA
INFILE import.pipe
BADFILE import.bad
APPEND INTO TABLE TBL
FIELDS TERMINATED BY "@@"
TRAILING NULLCOLS
(

col1

,col2

,col3

,col4 CHAR           -- load하는 타겟 테이블의 칼럼이 VARCHAR2 칼럼일 경우 굳이 명시하자면

,col5 INTEGER       -- load하는 타겟 테이블의 칼럼이 NUMBER일 경우

)


rm -f ./import.pipe

mkfifo ./import.pipe
chmod 770 ./import.pipe
gunzip < ./export.csv.gz > ./import.pipe &

sqlldr userid=${DBUSER}/${password} control=import.ctl errors=100 rows=5000

rm -f ./import.pipe

INFILE을 지정할 때 현재 디렉토리임을 명시하려고 ./import.pipe로 쓰면 오류가 발생하므로

그냥 파일이름만 써줘야 한다.

마찬가지로 sqlldr 커맨드에서 control 파일을 control=./import.ctl 로 지정하면 오류 발생한다.


컨트롤 파일을 통해 여러 옵션을 조정할 필요가 없다면

간단히 sqlldr userid=${DBUSER}/${password} data=import.pipe 로 수행할 수도 있다.

5000건 단위로 commit을 위해 rows=5000을 지정했지만, 실제 수행될 때는 5천 건 단위로 커밋을 찍지 않는데, 이런 경우에는 bindsize 옵션을 추가해볼 수 있다.

Posted by in0de
,