整理下初始化数据表

This commit is contained in:
dengqichen 2025-12-08 15:53:43 +08:00
parent dc56a1e3c0
commit b5fe3c96f6
4 changed files with 254 additions and 28 deletions

View File

@ -2,6 +2,7 @@ package com.qqchen.deploy.backend.deploy.entity;
import com.qqchen.deploy.backend.framework.enums.MonitorAlertLevelEnum;
import com.qqchen.deploy.backend.framework.enums.MonitorMetricEnum;
import com.qqchen.deploy.backend.framework.enums.ServerAlertStatusEnum;
import jakarta.persistence.*;
import lombok.AllArgsConstructor;
import lombok.Builder;
@ -71,10 +72,11 @@ public class ServerAlertLog {
private String alertMessage;
/**
* 状态: ACTIVE/RESOLVED
* 状态: PENDING/ACTIVE/RESOLVED
*/
@Enumerated(EnumType.STRING)
@Column(name = "status", length = 20)
private String status = "ACTIVE";
private ServerAlertStatusEnum status;
/**
* 告警时间

View File

@ -1,11 +1,16 @@
package com.qqchen.deploy.backend.deploy.repository;
import com.qqchen.deploy.backend.deploy.entity.ServerAlertLog;
import com.qqchen.deploy.backend.framework.enums.MonitorMetricEnum;
import com.qqchen.deploy.backend.framework.enums.ServerAlertStatusEnum;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import java.time.LocalDateTime;
import java.util.List;
import java.util.Optional;
/**
* 服务器告警记录Repository
@ -14,13 +19,29 @@ import java.util.List;
public interface IServerAlertLogRepository extends JpaRepository<ServerAlertLog, Long> {
/**
* 查询指定服务器的活跃告警
* 查询指定服务器的指定状态告警
*/
List<ServerAlertLog> findByServerIdAndStatus(Long serverId, String status);
List<ServerAlertLog> findByServerIdAndStatus(Long serverId, ServerAlertStatusEnum status);
/**
* 查询指定服务器在指定时间范围内的告警记录
*/
List<ServerAlertLog> findByServerIdAndAlertTimeBetweenOrderByAlertTimeDesc(
Long serverId, LocalDateTime startTime, LocalDateTime endTime);
/**
* 查询指定服务器规则告警类型的活跃或待处理告警
* 用于判断是否已存在未解决的告警
*/
@Query("SELECT a FROM ServerAlertLog a WHERE a.serverId = :serverId " +
"AND a.ruleId = :ruleId " +
"AND a.alertType = :alertType " +
"AND a.status IN (com.qqchen.deploy.backend.framework.enums.ServerAlertStatusEnum.PENDING, " +
" com.qqchen.deploy.backend.framework.enums.ServerAlertStatusEnum.ACTIVE) " +
"ORDER BY a.alertTime DESC")
Optional<ServerAlertLog> findActiveOrPendingAlert(
@Param("serverId") Long serverId,
@Param("ruleId") Long ruleId,
@Param("alertType") MonitorMetricEnum alertType
);
}

View File

@ -7,6 +7,7 @@ import com.qqchen.deploy.backend.deploy.entity.ServerAlertLog;
import com.qqchen.deploy.backend.deploy.entity.ServerAlertRule;
import com.qqchen.deploy.backend.framework.enums.MonitorAlertLevelEnum;
import com.qqchen.deploy.backend.framework.enums.MonitorMetricEnum;
import com.qqchen.deploy.backend.framework.enums.ServerAlertStatusEnum;
import com.qqchen.deploy.backend.deploy.repository.IServerAlertLogRepository;
import com.qqchen.deploy.backend.deploy.repository.IServerAlertRuleRepository;
import com.qqchen.deploy.backend.deploy.repository.IServerRepository;
@ -23,6 +24,7 @@ import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
/**
* 服务器告警服务实现
@ -109,9 +111,18 @@ public class ServerAlertServiceImpl implements IServerAlertService {
threshold = rule.getWarningThreshold();
}
// 触发告警
// 查询是否已有活跃或待处理的告警
Optional<ServerAlertLog> existingAlertOpt = alertLogRepository.findActiveOrPendingAlert(
serverId, rule.getId(), rule.getAlertType());
if (alertLevel != null) {
triggerAlert(serverId, rule, alertLevel, currentValue, threshold, resourceInfo, config);
// 当前超过阈值
handleAlertTriggered(existingAlertOpt, serverId, rule, alertLevel, currentValue, threshold, resourceInfo, config);
} else {
// 当前值正常检查是否需要解除告警
if (existingAlertOpt.isPresent()) {
resolveAlert(existingAlertOpt.get());
}
}
}
@ -158,21 +169,109 @@ public class ServerAlertServiceImpl implements IServerAlertService {
threshold = rule.getWarningThreshold();
}
// 触发告警
// 查询是否已有活跃或待处理的告警
Optional<ServerAlertLog> existingAlertOpt = alertLogRepository.findActiveOrPendingAlert(
serverId, rule.getId(), rule.getAlertType());
if (alertLevel != null) {
// 当前超过阈值
String resourceInfo = String.format("总磁盘(%d个分区总容量%.0fGB,已用%.0fGB",
diskUsageList.size(), totalCapacity.doubleValue(), totalUsed.doubleValue());
triggerAlert(serverId, rule, alertLevel, totalUsagePercent, threshold, resourceInfo, config);
handleAlertTriggered(existingAlertOpt, serverId, rule, alertLevel, totalUsagePercent, threshold, resourceInfo, config);
} else {
// 当前值正常检查是否需要解除告警
if (existingAlertOpt.isPresent()) {
resolveAlert(existingAlertOpt.get());
}
}
}
/**
* 触发告警
* 处理告警触发逻辑
*/
private void triggerAlert(Long serverId, ServerAlertRule rule, MonitorAlertLevelEnum level,
BigDecimal currentValue, BigDecimal threshold, String resourceInfo,
ServerMonitorNotificationConfig config) {
// 1. 记录告警日志到数据库
private void handleAlertTriggered(Optional<ServerAlertLog> existingAlertOpt, Long serverId,
ServerAlertRule rule, MonitorAlertLevelEnum level,
BigDecimal currentValue, BigDecimal threshold,
String resourceInfo, ServerMonitorNotificationConfig config) {
if (!existingAlertOpt.isPresent()) {
// 首次检测到超过阈值
if (rule.getDurationMinutes() == null || rule.getDurationMinutes() == 0) {
// 立即告警
createActiveAlertAndNotify(serverId, rule, level, currentValue, threshold, resourceInfo, config);
} else {
// 创建 PENDING 状态等待持续时间验证
createPendingAlert(serverId, rule, level, currentValue, threshold, resourceInfo);
}
} else {
ServerAlertLog existingAlert = existingAlertOpt.get();
if (ServerAlertStatusEnum.PENDING.equals(existingAlert.getStatus())) {
// 已有 PENDING 告警检查是否达到持续时间
long durationMinutes = java.time.Duration.between(
existingAlert.getAlertTime(), LocalDateTime.now()).toMinutes();
if (durationMinutes >= rule.getDurationMinutes()) {
// 达到持续时间激活并发送通知
activateAndNotify(existingAlert, rule, config);
} else {
// 未达到持续时间更新当前值如果级别变化
if (existingAlert.getAlertLevel() != level) {
updateAlertLevel(existingAlert, level, currentValue, threshold, resourceInfo);
}
log.debug("⏳ 告警持续时间不足: serverId={}, 已持续{}分钟/需要{}分钟",
serverId, durationMinutes, rule.getDurationMinutes());
}
} else if (ServerAlertStatusEnum.ACTIVE.equals(existingAlert.getStatus())) {
// 已有 ACTIVE 告警
if (existingAlert.getAlertLevel() != level &&
level.ordinal() > existingAlert.getAlertLevel().ordinal()) {
// 级别升级例如从 WARNING 升级到 CRITICAL重新发送通知
updateAlertLevelAndNotify(existingAlert, rule, level, currentValue, threshold, resourceInfo, config);
} else if (existingAlert.getAlertLevel() != level) {
// 级别变化但未升级只更新数据不发通知
updateAlertLevel(existingAlert, level, currentValue, threshold, resourceInfo);
}
// 否则不重复发送通知
}
}
}
/**
* 创建 PENDING 状态的告警需要等待持续时间验证
*/
private void createPendingAlert(Long serverId, ServerAlertRule rule, MonitorAlertLevelEnum level,
BigDecimal currentValue, BigDecimal threshold, String resourceInfo) {
String alertMessage = String.format("%s使用率达到%s级别: 当前值=%.2f%%, 阈值=%.2f%% (等待持续%d分钟验证)",
resourceInfo, level.getDescription(), currentValue, threshold, rule.getDurationMinutes());
ServerAlertLog alertLog = ServerAlertLog.builder()
.serverId(serverId)
.ruleId(rule.getId())
.alertType(rule.getAlertType())
.alertLevel(level)
.alertValue(currentValue)
.thresholdValue(threshold)
.alertMessage(alertMessage)
.status(ServerAlertStatusEnum.PENDING)
.alertTime(LocalDateTime.now())
.notified(false)
.build();
try {
alertLogRepository.save(alertLog);
log.info("📝 PENDING 告警已创建: id={}, serverId={}, 需持续{}分钟",
alertLog.getId(), serverId, rule.getDurationMinutes());
} catch (Exception e) {
log.error("创建 PENDING 告警失败", e);
}
}
/**
* 创建 ACTIVE 状态的告警并立即发送通知
*/
private void createActiveAlertAndNotify(Long serverId, ServerAlertRule rule, MonitorAlertLevelEnum level,
BigDecimal currentValue, BigDecimal threshold,
String resourceInfo, ServerMonitorNotificationConfig config) {
String alertMessage = String.format("%s使用率达到%s级别: 当前值=%.2f%%, 阈值=%.2f%%",
resourceInfo, level.getDescription(), currentValue, threshold);
@ -184,32 +283,136 @@ public class ServerAlertServiceImpl implements IServerAlertService {
.alertValue(currentValue)
.thresholdValue(threshold)
.alertMessage(alertMessage)
.status("ACTIVE")
.status(ServerAlertStatusEnum.ACTIVE)
.alertTime(LocalDateTime.now())
.notified(false)
.build();
try {
alertLogRepository.save(alertLog);
log.info("✅ 告警记录已保存: id={}, serverId={}, message={}",
log.warn("🚨 告警已触发: id={}, serverId={}, message={}",
alertLog.getId(), serverId, alertMessage);
} catch (Exception e) {
log.error("保存告警记录失败", e);
return;
}
// 2. 记录日志
log.warn("⚠️ 服务器告警触发: serverId={}, ruleName={}, type={}, level={}, resource={}, " +
"current={}%, threshold={}%",
serverId, rule.getRuleName(), rule.getAlertType(), level,
resourceInfo, currentValue, threshold);
// 发送通知
sendNotificationAndUpdateStatus(alertLog, rule, resourceInfo, config);
}
/**
* 激活 PENDING 告警并发送通知
*/
private void activateAndNotify(ServerAlertLog alertLog, ServerAlertRule rule,
ServerMonitorNotificationConfig config) {
alertLog.setStatus(ServerAlertStatusEnum.ACTIVE);
String resourceInfo = getResourceInfo(rule.getAlertType());
alertLog.setAlertMessage(String.format("%s使用率达到%s级别: 当前值=%.2f%%, 阈值=%.2f%% (已持续%d分钟)",
resourceInfo, alertLog.getAlertLevel().getDescription(),
alertLog.getAlertValue(), alertLog.getThresholdValue(), rule.getDurationMinutes()));
// 3. 发送告警通知
if (config != null && config.getNotificationChannelId() != null && config.getResourceAlertTemplateId() != null) {
try {
sendAlertNotification(serverId, rule, level, currentValue, threshold, resourceInfo, config);
} catch (Exception e) {
log.error("发送告警通知失败: serverId={}, error={}", serverId, e.getMessage(), e);
}
try {
alertLogRepository.save(alertLog);
log.warn("🚨 告警已激活: id={}, serverId={}, 已持续{}分钟",
alertLog.getId(), alertLog.getServerId(), rule.getDurationMinutes());
} catch (Exception e) {
log.error("激活告警失败", e);
return;
}
// 发送通知
sendNotificationAndUpdateStatus(alertLog, rule, resourceInfo, config);
}
/**
* 更新告警级别
*/
private void updateAlertLevel(ServerAlertLog alertLog, MonitorAlertLevelEnum newLevel,
BigDecimal currentValue, BigDecimal threshold, String resourceInfo) {
alertLog.setAlertLevel(newLevel);
alertLog.setAlertValue(currentValue);
alertLog.setThresholdValue(threshold);
alertLog.setAlertMessage(String.format("%s使用率达到%s级别: 当前值=%.2f%%, 阈值=%.2f%%",
resourceInfo, newLevel.getDescription(), currentValue, threshold));
try {
alertLogRepository.save(alertLog);
log.info("📊 告警级别已更新: id={}, 新级别={}", alertLog.getId(), newLevel);
} catch (Exception e) {
log.error("更新告警级别失败", e);
}
}
/**
* 更新告警级别并重新发送通知级别升级时
*/
private void updateAlertLevelAndNotify(ServerAlertLog alertLog, ServerAlertRule rule,
MonitorAlertLevelEnum newLevel, BigDecimal currentValue,
BigDecimal threshold, String resourceInfo,
ServerMonitorNotificationConfig config) {
MonitorAlertLevelEnum oldLevel = alertLog.getAlertLevel();
updateAlertLevel(alertLog, newLevel, currentValue, threshold, resourceInfo);
log.warn("⬆️ 告警级别升级: id={}, {} → {}, 重新发送通知",
alertLog.getId(), oldLevel, newLevel);
// 重新发送通知
sendNotificationAndUpdateStatus(alertLog, rule, resourceInfo, config);
}
/**
* 解除告警
*/
private void resolveAlert(ServerAlertLog alertLog) {
alertLog.setStatus(ServerAlertStatusEnum.RESOLVED);
alertLog.setResolveTime(LocalDateTime.now());
try {
alertLogRepository.save(alertLog);
log.info("✅ 告警已解除: id={}, serverId={}", alertLog.getId(), alertLog.getServerId());
} catch (Exception e) {
log.error("解除告警失败", e);
}
}
/**
* 发送通知并更新状态
*/
private void sendNotificationAndUpdateStatus(ServerAlertLog alertLog, ServerAlertRule rule,
String resourceInfo, ServerMonitorNotificationConfig config) {
if (config == null || config.getNotificationChannelId() == null ||
config.getResourceAlertTemplateId() == null) {
log.debug("通知配置不完整,跳过发送通知");
return;
}
try {
sendAlertNotification(alertLog.getServerId(), rule, alertLog.getAlertLevel(),
alertLog.getAlertValue(), alertLog.getThresholdValue(), resourceInfo, config);
// 发送成功后更新数据库通知状态
alertLog.setNotified(true);
alertLog.setNotifyTime(LocalDateTime.now());
alertLogRepository.save(alertLog);
log.info("✅ 告警通知已发送: alertLogId={}, serverId={}",
alertLog.getId(), alertLog.getServerId());
} catch (Exception e) {
log.error("发送告警通知失败: serverId={}, error={}",
alertLog.getServerId(), e.getMessage(), e);
}
}
/**
* 获取资源信息描述
*/
private String getResourceInfo(MonitorMetricEnum alertType) {
switch (alertType) {
case CPU: return "CPU";
case MEMORY: return "内存";
case DISK: return "磁盘";
default: return "未知";
}
}

View File

@ -765,7 +765,7 @@ INSERT INTO `deploy-ease-platform`.`schedule_job` (`id`, `create_by`, `create_ti
INSERT INTO `deploy-ease-platform`.`schedule_job` (`id`, `create_by`, `create_time`, `update_by`, `update_time`, `version`, `deleted`, `job_name`, `job_description`, `category_id`, `bean_name`, `method_name`, `form_definition_id`, `method_params`, `cron_expression`, `status`, `concurrent`, `last_execute_time`, `next_execute_time`, `execute_count`, `success_count`, `fail_count`, `timeout_seconds`, `retry_count`, `alert_email`) VALUES (14, 'admin', NOW(), 'admin', NOW(), 26, b'0', '隆基Git仓库组同步', '定期同步Git仓库组信息每天凌晨2点执行', 2, 'repositoryGroupServiceImpl', 'syncGroups', NULL, '{\"externalSystemId\": 4}', '0 0 3 * * ?', 'ENABLED', b'0', NOW(), NOW(), 0, 0, 0, 3600, 2, '');
INSERT INTO `deploy-ease-platform`.`schedule_job` (`id`, `create_by`, `create_time`, `update_by`, `update_time`, `version`, `deleted`, `job_name`, `job_description`, `category_id`, `bean_name`, `method_name`, `form_definition_id`, `method_params`, `cron_expression`, `status`, `concurrent`, `last_execute_time`, `next_execute_time`, `execute_count`, `success_count`, `fail_count`, `timeout_seconds`, `retry_count`, `alert_email`) VALUES (15, 'admin', NOW(), 'admin', NOW(), 1204, b'0', '隆基Git项目同步', '定期同步Git项目信息每天凌晨3点执行', 2, 'repositoryProjectServiceImpl', 'syncProjects', NULL, '{\"externalSystemId\": 4}', '0 */5 * * * ?', 'ENABLED', b'0', NOW(), NOW(), 0, 0, 0, 3600, 2, '');
INSERT INTO `deploy-ease-platform`.`schedule_job` (`id`, `create_by`, `create_time`, `update_by`, `update_time`, `version`, `deleted`, `job_name`, `job_description`, `category_id`, `bean_name`, `method_name`, `form_definition_id`, `method_params`, `cron_expression`, `status`, `concurrent`, `last_execute_time`, `next_execute_time`, `execute_count`, `success_count`, `fail_count`, `timeout_seconds`, `retry_count`, `alert_email`) VALUES (16, 'admin', NOW(), 'admin', NOW(), 5719, b'0', '隆基Git分支同步', '定期同步Git仓库分支信息每5分钟执行一次', 2, 'repositoryBranchServiceImpl', 'syncBranches', NULL, '{\"externalSystemId\": 4}', '0 */5 * * * ?', 'ENABLED', b'0', NOW(), NOW(), 0, 0, 0, 3600, 2, '');
INSERT INTO `deploy-ease-platform`.`schedule_job` (`id`, `create_by`, `create_time`, `update_by`, `update_time`, `version`, `deleted`, `job_name`, `job_description`, `category_id`, `bean_name`, `method_name`, `form_definition_id`, `method_params`, `cron_expression`, `status`, `concurrent`, `last_execute_time`, `next_execute_time`, `execute_count`, `success_count`, `fail_count`, `timeout_seconds`, `retry_count`, `alert_email`) VALUES (17, 'admin', NOW(), 'admin', NOW(), 2, b'0', '服务器预警', '', 4, 'serverMonitorScheduler', 'collectServerMetrics', NULL, '{\n \"notificationChannelId\": 1,\n \"resourceAlertTemplateId\": 11\n \"serverOfflineTemplateId\": 12,\n}', '0 */5 * * * ?', 'DISABLED', b'0', NULL, NULL, 0, 0, 0, 300, 0, '');
INSERT INTO `deploy-ease-platform`.`schedule_job` (`id`, `create_by`, `create_time`, `update_by`, `update_time`, `version`, `deleted`, `job_name`, `job_description`, `category_id`, `bean_name`, `method_name`, `form_definition_id`, `method_params`, `cron_expression`, `status`, `concurrent`, `last_execute_time`, `next_execute_time`, `execute_count`, `success_count`, `fail_count`, `timeout_seconds`, `retry_count`, `alert_email`) VALUES (17, 'admin', NOW(), 'admin', NOW(), 2, b'0', '服务器预警', '', 4, 'serverMonitorScheduler', 'collectServerMetrics', NULL, '{"notificationChannelId": 1, "serverOfflineTemplateId": 12, "resourceAlertTemplateId": 11}', '0 */5 * * * ?', 'DISABLED', b'0', NULL, NULL, 0, 0, 0, 300, 0, '');