data-dictionary2excel/gen_datadic4dm.py at main · pangpang20/data-dictionary2excel · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# -*- coding: utf-8 -*-
import pandas as pd
import dmPython
from openpyxl import Workbook
from openpyxl.styles import Font, Border, Side
from openpyxl.utils.dataframe import dataframe_to_rows
import argparse


# 达梦数据库连接配置
DM_DEFAULT_PORT = 5236
DM_DEFAULT_USER = "SYSDBA"


def generate_data_dictionary(db_name, user, password, host, port):
    # Create dmPython connection
    # dmPython 连接参数：user, password, host, port, database(可选，用于指定数据库实例)
    conn = dmPython.connect(
        user=user,
        password=password,
        host=host,
        port=port
    )
    cursor = conn.cursor()

    # Get all table names and comments from specified schema
    # 达梦数据库使用 DBA_TABLES、DBA_TAB_COMMENTS，需要指定 OWNER
    table_query = f"""
    SELECT
        t.TABLE_NAME,
        c.COMMENTS AS TABLE_COMMENT
    FROM DBA_TABLES t
    LEFT JOIN DBA_TAB_COMMENTS c ON t.TABLE_NAME = c.TABLE_NAME AND c.OWNER = t.OWNER AND c.TABLE_TYPE = 'TABLE'
    WHERE t.OWNER = '{db_name}'
    """
    cursor.execute(table_query)
    tables_df = pd.DataFrame(cursor.fetchall(), columns=['table_name', 'table_comment'])

    # Initialize DataFrame with initial rows
    data = pd.DataFrame([
        ["1.1.", f"数据库：{db_name}", "", "", "", "", ""],
        ["", f"列出的数据库对象：{len(tables_df)} 表", "", "", "", "", ""],
        ["", "", "", "", "", "", ""]  # Empty row
    ], columns=["编号", "序号", "字段名称", "类型", "是否允许为空", "是否主键", "中文注释"])

    # Query each table's structure
    for table_idx, table_row in tables_df.iterrows():
        table_name = table_row['table_name']
        table_comment = table_row['table_comment'] if table_row['table_comment'] else ""

        # Add table header and field header
        new_rows = pd.DataFrame([
            [f"", f"{table_idx + 1}.", f"表：{table_name}", "", "", "", f"表注释：{table_comment}"],
            ["", "字段", "", "", "", "", ""]
        ], columns=["编号", "序号", "字段名称", "类型", "是否允许为空", "是否主键", "中文注释"])

        # Query table structure - get column info from DBA_TAB_COLUMNS
        # 达梦数据库使用 DBA_TAB_COLUMNS、DBA_CONSTRAINTS、DBA_CONS_COLUMNS、DBA_COL_COMMENTS
        column_query = f"""
        SELECT
            c.COLUMN_NAME AS "字段名称",
            c.DATA_TYPE AS "类型",
            c.DATA_LENGTH AS "长度",
            c.DATA_PRECISION AS "精度",
            c.DATA_SCALE AS "小数位",
            c.NULLABLE AS "是否允许为空",
            CASE
                WHEN pk.COLUMN_NAME IS NOT NULL THEN 'YES'
                ELSE 'NO'
            END AS "是否主键",
            cm.COMMENTS AS "中文注释"
        FROM
            DBA_TAB_COLUMNS c
        LEFT JOIN DBA_COL_COMMENTS cm ON c.TABLE_NAME = cm.TABLE_NAME AND c.COLUMN_NAME = cm.COLUMN_NAME AND c.OWNER = cm.OWNER
        LEFT JOIN (
            SELECT cc.COLUMN_NAME
            FROM DBA_CONSTRAINTS cs
            JOIN DBA_CONS_COLUMNS cc ON cs.CONSTRAINT_NAME = cc.CONSTRAINT_NAME AND cs.OWNER = cc.OWNER
            WHERE cc.OWNER = '{db_name}' AND cc.TABLE_NAME = '{table_name}' AND cs.CONSTRAINT_TYPE = 'P'
        ) pk ON c.COLUMN_NAME = pk.COLUMN_NAME
        WHERE
            c.OWNER = '{db_name}'
            AND c.TABLE_NAME = '{table_name}'
        ORDER BY
            c.COLUMN_ID
        """
        cursor.execute(column_query)
        df = pd.DataFrame(
            cursor.fetchall(),
            columns=["字段名称", "类型", "长度", "精度", "小数位", "是否允许为空", "是否主键", "中文注释"]
        )
        print(f"正在处理表：{table_name}")

        # Add position column and combine type with length/precision
        df['序号'] = range(1, len(df) + 1)
        df['类型'] = df.apply(
            lambda row: f"{row['类型']}({int(row['长度'])})"
            if pd.notnull(row['长度']) and row['类型'] not in ['TEXT', 'LONGTEXT', 'MEDIUMTEXT', 'BLOB', 'TINYBLOB', 'MEDIUMBLOB', 'LONGBLOB', 'CLOB', 'BFILE']
            else (f"{row['类型']}({int(row['精度'])},{int(row['小数位'])})"
                  if pd.notnull(row['精度']) and pd.notnull(row['小数位'])
                  else row['类型']),
            axis=1
        )
        df = df.drop(columns=['长度', '精度', '小数位'])

        # Transform 是否允许为空 from 'Y'/'N' to '是'/'否'
        df['是否允许为空'] = df['是否允许为空'].apply(lambda x: '是' if x == 'Y' else '否')

        # Reorder and reconstruct df with exact column order
        columns_order = ["编号", "序号", "字段名称", "类型", "是否允许为空", "是否主键", "中文注释"]
        df = df[["序号", "字段名称", "类型", "是否允许为空", "是否主键", "中文注释"]].copy()
        df.insert(0, "编号", "")
        df = df[columns_order]

        # Add field headers and data
        field_headers = pd.DataFrame([["", "序号", "字段名称", "类型", "是否允许为空", "是否主键", "中文注释"]],
                                   columns=columns_order)
        new_rows = pd.concat([new_rows, field_headers, df], ignore_index=True)
        new_rows = pd.concat([new_rows, pd.DataFrame([["", "", "", "", "", "", ""]],
                                                    columns=columns_order)],
                            ignore_index=True)

        # Append new rows to data with correct column order
        data = pd.concat([data, new_rows], ignore_index=True)

    # Close cursor and connection
    cursor.close()
    conn.close()

    # Create Excel workbook
    wb = Workbook()
    ws = wb.active
    ws.title = "数据字典"

    # Write DataFrame to worksheet without header row
    for row in data.itertuples(index=False):
        ws.append(list(row))

    # Apply styling
    bold_font = Font(bold=True)
    border = Border(
        left=Side(style='thin'),
        right=Side(style='thin'),
        top=Side(style='thin'),
        bottom=Side(style='thin')
    )

    # Track rows for borders and bold fonts
    for row_idx, row in enumerate(ws.iter_rows(min_row=1, max_row=ws.max_row, min_col=1, max_col=7), 1):
        row_data = [cell.value for cell in row]
        if row_data[0] and row_data[0].endswith('.') or row_data[1] == "序号":
            for cell in row:
                cell.font = bold_font
        if row_data[1] == "序号" or (row_data[0] == "" and isinstance(row_data[1], (int, float))):
            for cell_idx, cell in enumerate(row, 1):
                if cell_idx > 1:
                    cell.border = border

    # Save to Excel
    output_file = f"{db_name}_data_dictionary.xlsx"
    wb.save(output_file)

    print(f"数据字典已生成到 {output_file}")

def main():
    parser = argparse.ArgumentParser(description="生成达梦数据库数据字典.")
    parser.add_argument("db_name", help="要生成字典的数据库名称")
    parser.add_argument("--user", default=DM_DEFAULT_USER, help="数据库用户名")
    parser.add_argument("--password", default="", help="数据库密码")
    parser.add_argument("--host", default="", help="数据库主机")
    parser.add_argument("--port", default=str(DM_DEFAULT_PORT), help="数据库端口")
    args = parser.parse_args()

    generate_data_dictionary(args.db_name, args.user, args.password, args.host, args.port)

if __name__ == "__main__":
    main()