pg2dm-converter/converter.js
dengqichen 20bff2b6cd init
2025-11-15 15:34:19 +08:00

536 lines
16 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
const fs = require('fs');
const path = require('path');
const config = require('./config');
/**
* PostgreSQL到达梦数据库SQL转换器
*/
class PG2DMConverter {
constructor() {
this.conversionLog = [];
this.warnings = [];
this.stats = {
dataTypes: 0,
sequences: 0,
collates: 0,
indexes: 0,
coalesceIndexes: 0
};
}
/**
* 记录转换日志
*/
log(message, type = 'INFO') {
const timestamp = new Date().toISOString();
this.conversionLog.push({ timestamp, type, message });
console.log(`[${type}] ${message}`);
}
/**
* 记录警告
*/
warn(message) {
this.warnings.push(message);
this.log(message, 'WARN');
}
/**
* 转换数据类型
*/
convertDataTypes(sql) {
let converted = sql;
// 1. 转换基本类型(包括浮点类型和时间戳类型)
const typePattern = /\b(int8|int4|int2|numeric|bool|float8|float4|float|timestamptz|text)\b/gi;
converted = converted.replace(typePattern, (match) => {
const lowerMatch = match.toLowerCase();
if (config.dataTypeMapping[lowerMatch]) {
this.stats.dataTypes++;
return config.dataTypeMapping[lowerMatch];
}
return match;
});
// 2. 处理timestamp精度参数
// PostgreSQL: timestamp(6) 或 timestamp(0)
// 达梦: TIMESTAMP (不支持精度参数,直接移除)
converted = converted.replace(/\btimestamp\s*\(\s*\d+\s*\)/gi, (match) => {
this.log(`移除timestamp精度参数: ${match} -> TIMESTAMP`);
return `TIMESTAMP`;
});
// 3. 移除时区子句
// PostgreSQL: TIMESTAMP without time zone / TIMESTAMP with time zone
// 达梦: TIMESTAMP (不支持时区子句)
converted = converted.replace(/TIMESTAMP\s+(without|with)\s+time\s+zone/gi, 'TIMESTAMP');
const timezoneMatches = sql.match(/TIMESTAMP\s+(without|with)\s+time\s+zone/gi);
if (timezoneMatches) {
this.log(`移除 ${timezoneMatches.length} 个TIMESTAMP时区子句`);
}
// 4. 修正DECIMAL精度超出范围
// 达梦DECIMAL最大精度38位PostgreSQL可以到1000位
converted = converted.replace(/DECIMAL\s*\((\d+)\s*,\s*(\d+)\)/gi, (match, precision, scale) => {
const p = parseInt(precision);
const s = parseInt(scale);
if (p > 38) {
this.warn(`DECIMAL(${p},${s}) 精度超出达梦限制(最大38)已调整为DECIMAL(38,${s})`);
return `DECIMAL(38,${s})`;
}
return match;
});
return converted;
}
/**
* 转换序列为IDENTITY
*/
convertSequences(sql) {
let converted = sql;
// 第一步:匹配完整的列定义格式
// "id" BIGINT NOT NULL DEFAULT nextval(...)
// 使用更宽松的正则,匹配任意数据类型
const fullPattern = /"(\w+)"\s+([A-Z]+(?:\([^)]+\))?)\s+NOT\s+NULL\s+DEFAULT\s+nextval\s*\([^)]+\)/gi;
converted = converted.replace(fullPattern, (match, colName, dataType) => {
this.stats.sequences++;
this.log(`转换列定义: ${colName} ${dataType} -> IDENTITY(1,1)`);
return `"${colName}" ${dataType} IDENTITY(1, 1) NOT NULL`;
});
// 第二步:处理其他格式,直接移除 DEFAULT nextval(...)
const defaultPattern = /DEFAULT\s+nextval\s*\([^)]+\)/gi;
converted = converted.replace(defaultPattern, (match) => {
this.stats.sequences++;
this.log(`移除序列DEFAULT: ${match.substring(0, 50)}...`);
return 'IDENTITY(1, 1)';
});
return converted;
}
/**
* 移除COLLATE子句
*/
removeCollate(sql) {
let converted = sql;
// 匹配所有COLLATE格式
// COLLATE "pg_catalog"."default"
// COLLATE "default"
// COLLATE pg_catalog."default"
const collatePattern1 = /COLLATE\s+"pg_catalog"\."[^"]+"/gi;
const collatePattern2 = /COLLATE\s+"[^"]+"/gi;
const collatePattern3 = /COLLATE\s+\w+/gi;
let totalMatches = 0;
const matches1 = sql.match(collatePattern1);
if (matches1) totalMatches += matches1.length;
const matches2 = sql.match(collatePattern2);
if (matches2) totalMatches += matches2.length;
if (totalMatches > 0) {
this.stats.collates += totalMatches;
this.log(`移除 ${totalMatches} 个COLLATE子句`);
}
// 按顺序移除,先移除复杂的,再移除简单的
converted = converted.replace(collatePattern1, '');
converted = converted.replace(collatePattern2, '');
converted = converted.replace(collatePattern3, '');
return converted;
}
/**
* 移除PostgreSQL类型转换语法
*/
removeTypeCasts(sql) {
let converted = sql;
// 移除 ::type 语法
const typeCastPattern = /::(character\s+varying|varchar|text|integer|bigint|smallint|numeric|decimal|timestamp|date|time|boolean|regclass)/gi;
const matches = sql.match(typeCastPattern);
if (matches) {
this.log(`移除 ${matches.length} 个PostgreSQL类型转换`);
}
converted = converted.replace(typeCastPattern, '');
return converted;
}
/**
* 移除pg_catalog模式前缀和数据类型引号
*/
removePgCatalog(sql) {
let converted = sql;
// 移除 "pg_catalog". 前缀
const catalogPattern = /"pg_catalog"\./gi;
const matches = sql.match(catalogPattern);
if (matches) {
this.log(`移除 ${matches.length} 个pg_catalog前缀`);
}
converted = converted.replace(catalogPattern, '');
// 转换PostgreSQL布尔值为达梦格式在移除引号之前
converted = converted.replace(/\bDEFAULT\s+false\b/gi, 'DEFAULT 0');
converted = converted.replace(/\bDEFAULT\s+true\b/gi, 'DEFAULT 1');
return converted;
}
/**
* 移除数据类型的引号
*/
removeTypeQuotes(sql) {
let converted = sql;
// 移除引号中的数据类型(达梦不需要给类型加引号)
// 1. 先处理带括号的类型VARCHAR(8000), DECIMAL(20,6)等
converted = converted.replace(/\s"(VARCHAR|CHAR|DECIMAL|NUMERIC)\s*\([^)]+\)"\s/gi, ' $1 ');
converted = converted.replace(/\s"(VARCHAR|CHAR|DECIMAL|NUMERIC)\s*\([^)]+\)"([,\n\r])/gi, ' $1$2');
// 2. 再处理简单类型
converted = converted.replace(/\s"(BIGINT|INT|SMALLINT|TINYINT|VARCHAR|CHAR|TEXT|DATE|TIME|TIMESTAMP|BIT|BOOLEAN|BOOL|BLOB|CLOB)"\s/gi, ' $1 ');
converted = converted.replace(/\s"(BIGINT|INT|SMALLINT|TINYINT|VARCHAR|CHAR|TEXT|DATE|TIME|TIMESTAMP|BIT|BOOLEAN|BOOL|BLOB|CLOB)"([,\n\r])/gi, ' $1$2');
this.log('移除数据类型引号');
return converted;
}
/**
* 移除空的或不完整的PARTITION BY子句
*/
removeEmptyPartition(sql) {
let converted = sql;
// 移除空的PARTITION BY子句
// 格式1: )\nPARTITION BY (\n)\n;
// 格式2: ) PARTITION BY ();
converted = converted.replace(/\)\s*PARTITION\s+BY\s+\([^)]*\)\s*;/gi, ');\n');
const matches = sql.match(/PARTITION\s+BY\s+\(/gi);
if (matches) {
this.log(`移除 ${matches.length} 个空的PARTITION BY子句`);
}
return converted;
}
/**
* 简化索引语法
*/
simplifyIndexSyntax(sql) {
let converted = sql;
// 移除USING btree/hash/gist等
converted = converted.replace(/USING\s+\w+/gi, '');
// 移除操作符类 "pg_catalog"."text_ops" 或 "text_ops"
// 包括各种格式int8_ops, text_ops, varchar_ops等
converted = converted.replace(/"pg_catalog"\."[^"]+_ops"/gi, '');
converted = converted.replace(/\s+"[^"]+_ops"/gi, '');
// 移除NULLS LAST/FIRST在移除ASC/DESC之前
converted = converted.replace(/\s+NULLS\s+(FIRST|LAST)/gi, '');
// 移除ASC/DESC如果需要保留可以注释掉
// converted = converted.replace(/\s+(ASC|DESC)/gi, '');
this.stats.indexes++;
return converted;
}
/**
* 移除索引中的重复列
*/
removeDuplicateIndexColumns(sql) {
let converted = sql;
// 匹配CREATE INDEX语句
const indexPattern = /(CREATE\s+(?:UNIQUE\s+)?INDEX\s+"[^"]+"\s+ON\s+"[^"]+"\."[^"]+"\s*\()([\s\S]*?)(\);)/gi;
converted = converted.replace(indexPattern, (match, prefix, columns, suffix) => {
// 解析列定义
const columnList = columns.split(',').map(col => col.trim());
const seen = new Set();
const uniqueColumns = [];
columnList.forEach(col => {
// 提取列名去除ASC/DESC等
const colNameMatch = col.match(/"(\w+)"/);
if (colNameMatch) {
const colName = colNameMatch[1].toLowerCase();
if (!seen.has(colName)) {
seen.add(colName);
uniqueColumns.push(col);
} else {
this.warn(`索引中发现重复列: ${colNameMatch[1]},已自动移除重复项`);
}
} else {
// COALESCE等表达式直接保留
uniqueColumns.push(col);
}
});
return prefix + '\n ' + uniqueColumns.join(',\n ') + '\n' + suffix;
});
return converted;
}
/**
* 处理COALESCE函数索引
*/
processCoalesceIndexes(sql) {
let converted = sql;
// 第一步移除PostgreSQL类型转换语法 ::type
converted = converted.replace(/::(character\s+varying|varchar|text|integer|bigint|smallint)/gi, '');
// 第二步处理COALESCE函数索引
const coalesceIndexPattern = /CREATE\s+(?:UNIQUE\s+)?INDEX\s+"([^"]+)"\s+ON\s+"[^"]+"\."[^"]+"\s*\(([\s\S]*?)\);/gi;
converted = converted.replace(coalesceIndexPattern, (match, indexName, columns) => {
const coalesceCount = (columns.match(/COALESCE\s*\(/gi) || []).length;
if (coalesceCount > 0) {
this.stats.coalesceIndexes++;
if (coalesceCount > config.coalesceThreshold) {
this.warn(
`索引 ${indexName} 包含 ${coalesceCount} 个COALESCE函数可能超过达梦816字符限制已自动简化`
);
} else {
this.log(`处理索引 ${indexName} 中的 ${coalesceCount} 个COALESCE函数`);
}
// 移除COALESCE保留原始列名
// 匹配多种格式:
// COALESCE("col_name", '-999')
// COALESCE(col_name, '-999')
let simplifiedColumns = columns.replace(
/COALESCE\s*\(\s*"?(\w+)"?\s*,\s*'[^']+'\s*\)/gi,
'"$1"'
);
// 移除多余的空格和换行
simplifiedColumns = simplifiedColumns.replace(/\s+/g, ' ').trim();
return match.replace(columns, simplifiedColumns);
}
return match;
});
return converted;
}
/**
* 添加转换说明注释
*/
addConversionHeader(sql, originalFile) {
const header = `/*
Converted to DaMeng SQL by pg2dm-converter
Source File : ${path.basename(originalFile)}
Source Server Type : PostgreSQL
Target Server Type : DaMeng 8
Conversion Date : ${new Date().toLocaleString('zh-CN')}
Conversion Summary:
- Data Types Converted: ${this.stats.dataTypes}
- Sequences -> IDENTITY: ${this.stats.sequences}
- COLLATE Clauses Removed: ${this.stats.collates}
- Indexes Simplified: ${this.stats.indexes}
- COALESCE Indexes Processed: ${this.stats.coalesceIndexes}
*/
`;
return header + sql;
}
/**
* 主转换方法
*/
convert(sql, originalFile = 'input.sql') {
this.log('开始转换PostgreSQL SQL到达梦语法');
let converted = sql;
// 1. 移除pg_catalog模式前缀必须在最前面
this.log('步骤1: 移除pg_catalog模式前缀...');
converted = this.removePgCatalog(converted);
// 2. 转换数据类型
this.log('步骤2: 转换数据类型...');
converted = this.convertDataTypes(converted);
// 3. 转换序列为IDENTITY
this.log('步骤3: 转换序列为IDENTITY...');
converted = this.convertSequences(converted);
// 4. 移除PostgreSQL类型转换
this.log('步骤4: 移除PostgreSQL类型转换...');
converted = this.removeTypeCasts(converted);
// 5. 移除COLLATE子句
this.log('步骤5: 移除COLLATE子句...');
converted = this.removeCollate(converted);
// 6. 移除数据类型引号
this.log('步骤6: 移除数据类型引号...');
converted = this.removeTypeQuotes(converted);
// 7. 移除空的PARTITION BY子句
this.log('步骤7: 移除空的PARTITION BY子句...');
converted = this.removeEmptyPartition(converted);
// 8. 简化索引语法
this.log('步骤8: 简化索引语法...');
converted = this.simplifyIndexSyntax(converted);
// 9. 移除索引中的重复列
this.log('步骤9: 移除索引中的重复列...');
converted = this.removeDuplicateIndexColumns(converted);
// 10. 处理COALESCE函数索引
this.log('步骤10: 处理COALESCE函数索引...');
converted = this.processCoalesceIndexes(converted);
// 11. 添加转换说明
if (config.output.addConversionComment) {
converted = this.addConversionHeader(converted, originalFile);
}
this.log('转换完成!');
return converted;
}
/**
* 生成转换日志文件
*/
generateLogFile(outputPath) {
const logContent = {
timestamp: new Date().toISOString(),
stats: this.stats,
warnings: this.warnings,
logs: this.conversionLog
};
const logFile = outputPath.replace('.sql', '_conversion.log.json');
fs.writeFileSync(logFile, JSON.stringify(logContent, null, 2));
this.log(`转换日志已保存: ${logFile}`);
}
}
/**
* 确保目录存在
*/
function ensureDir(dirPath) {
if (!fs.existsSync(dirPath)) {
fs.mkdirSync(dirPath, { recursive: true });
}
}
/**
* 主函数
*/
function main() {
const args = process.argv.slice(2);
if (args.length === 0) {
console.log(`
PostgreSQL到达梦数据库SQL转换器
======================================
使用方法:
node converter.js <input-file.sql> [output-file.sql]
node converter.js input/*.sql
示例:
node converter.js input/schema.sql
node converter.js input/schema.sql output/schema_dm.sql
node converter.js input/*.sql
说明:
- 如果不指定输出文件将自动在output目录生成 *_dm.sql 文件
- 支持通配符批量处理多个文件
- 会自动生成转换日志文件 *_conversion.log.json
`);
process.exit(0);
}
// 确保input和output目录存在
ensureDir('./input');
ensureDir('./output');
const inputFile = args[0];
// 检查文件是否存在
if (!fs.existsSync(inputFile)) {
console.error(`错误: 文件不存在: ${inputFile}`);
process.exit(1);
}
// 读取输入文件
console.log(`\n读取文件: ${inputFile}`);
const sqlContent = fs.readFileSync(inputFile, 'utf8');
// 转换
const converter = new PG2DMConverter();
const convertedSql = converter.convert(sqlContent, inputFile);
// 确定输出文件路径
const outputFile = args[1] || path.join(
'./output',
path.basename(inputFile, '.sql') + '_dm.sql'
);
// 写入输出文件
ensureDir(path.dirname(outputFile));
fs.writeFileSync(outputFile, convertedSql, 'utf8');
console.log(`\n✓ 转换完成,输出文件: ${outputFile}`);
// 生成日志
if (config.output.generateLog) {
converter.generateLogFile(outputFile);
}
// 显示警告
if (converter.warnings.length > 0) {
console.log('\n⚠ 警告信息:');
converter.warnings.forEach((warn, i) => {
console.log(` ${i + 1}. ${warn}`);
});
}
console.log('\n转换统计:');
console.log(` - 数据类型转换: ${converter.stats.dataTypes}`);
console.log(` - 序列转IDENTITY: ${converter.stats.sequences}`);
console.log(` - COLLATE移除: ${converter.stats.collates}`);
console.log(` - 索引简化: ${converter.stats.indexes}`);
console.log(` - COALESCE索引处理: ${converter.stats.coalesceIndexes}`);
}
// 运行主函数
if (require.main === module) {
main();
}
module.exports = PG2DMConverter;