整理下初始化数据表
This commit is contained in:
parent
c36ee0808c
commit
279c19ad7a
@ -12,6 +12,7 @@ import jakarta.servlet.http.HttpServletResponse;
|
|||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.data.domain.Page;
|
import org.springframework.data.domain.Page;
|
||||||
import org.springframework.validation.annotation.Validated;
|
import org.springframework.validation.annotation.Validated;
|
||||||
|
import org.springframework.web.bind.annotation.PathVariable;
|
||||||
import org.springframework.web.bind.annotation.RequestBody;
|
import org.springframework.web.bind.annotation.RequestBody;
|
||||||
import org.springframework.web.bind.annotation.RequestMapping;
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
import org.springframework.web.bind.annotation.RestController;
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
@ -38,18 +39,18 @@ public class ServerCategoryApiController
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Response<ServerCategoryDTO> update(Long aLong, ServerCategoryDTO dto) {
|
public Response<ServerCategoryDTO> update(@PathVariable Long id, @Validated @RequestBody ServerCategoryDTO dto) {
|
||||||
return super.update(aLong, dto);
|
return super.update(id, dto);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Response<Void> delete(Long aLong) {
|
public Response<Void> delete(@PathVariable Long id) {
|
||||||
return super.delete(aLong);
|
return super.delete(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Response<ServerCategoryDTO> findById(Long aLong) {
|
public Response<ServerCategoryDTO> findById(@PathVariable Long id) {
|
||||||
return super.findById(aLong);
|
return super.findById(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@ -29,16 +29,16 @@ public class ServerAlertRuleDTO extends BaseDTO {
|
|||||||
@NotNull(message = "监控指标类型不能为空")
|
@NotNull(message = "监控指标类型不能为空")
|
||||||
private MonitorMetricEnum alertType;
|
private MonitorMetricEnum alertType;
|
||||||
|
|
||||||
@Schema(description = "警告阈值(%)", required = true, example = "80.00")
|
@Schema(description = "警告阈值(CPU/MEMORY/DISK为%,NETWORK为MB/s)", required = true, example = "80.00")
|
||||||
@NotNull(message = "警告阈值不能为空")
|
@NotNull(message = "警告阈值不能为空")
|
||||||
@DecimalMin(value = "0.00", message = "警告阈值必须大于等于0")
|
@DecimalMin(value = "0.00", message = "警告阈值必须大于等于0")
|
||||||
@DecimalMax(value = "100.00", message = "警告阈值不能超过100")
|
@DecimalMax(value = "10000.00", message = "警告阈值不能超过10000")
|
||||||
private BigDecimal warningThreshold;
|
private BigDecimal warningThreshold;
|
||||||
|
|
||||||
@Schema(description = "严重阈值(%)", required = true, example = "90.00")
|
@Schema(description = "严重阈值(CPU/MEMORY/DISK为%,NETWORK为MB/s)", required = true, example = "90.00")
|
||||||
@NotNull(message = "严重阈值不能为空")
|
@NotNull(message = "严重阈值不能为空")
|
||||||
@DecimalMin(value = "0.00", message = "严重阈值必须大于等于0")
|
@DecimalMin(value = "0.00", message = "严重阈值必须大于等于0")
|
||||||
@DecimalMax(value = "100.00", message = "严重阈值不能超过100")
|
@DecimalMax(value = "10000.00", message = "严重阈值不能超过10000")
|
||||||
private BigDecimal criticalThreshold;
|
private BigDecimal criticalThreshold;
|
||||||
|
|
||||||
@Schema(description = "持续时长(分钟)", example = "5")
|
@Schema(description = "持续时长(分钟)", example = "5")
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
package com.qqchen.deploy.backend.deploy.scheduler;
|
package com.qqchen.deploy.backend.deploy.scheduler;
|
||||||
|
|
||||||
|
import com.qqchen.deploy.backend.deploy.dto.ServerInfoDTO;
|
||||||
import com.qqchen.deploy.backend.deploy.dto.ServerMonitorDataDTO;
|
import com.qqchen.deploy.backend.deploy.dto.ServerMonitorDataDTO;
|
||||||
import com.qqchen.deploy.backend.deploy.dto.ServerMonitorNotificationConfig;
|
import com.qqchen.deploy.backend.deploy.dto.ServerMonitorNotificationConfig;
|
||||||
import com.qqchen.deploy.backend.deploy.entity.Server;
|
import com.qqchen.deploy.backend.deploy.entity.Server;
|
||||||
@ -9,6 +10,7 @@ import com.qqchen.deploy.backend.deploy.repository.IServerAlertRuleRepository;
|
|||||||
import com.qqchen.deploy.backend.deploy.repository.IServerRepository;
|
import com.qqchen.deploy.backend.deploy.repository.IServerRepository;
|
||||||
import com.qqchen.deploy.backend.deploy.service.IServerAlertService;
|
import com.qqchen.deploy.backend.deploy.service.IServerAlertService;
|
||||||
import com.qqchen.deploy.backend.deploy.service.IServerMonitorService;
|
import com.qqchen.deploy.backend.deploy.service.IServerMonitorService;
|
||||||
|
import com.qqchen.deploy.backend.deploy.service.IServerService;
|
||||||
import com.qqchen.deploy.backend.framework.dto.DiskUsageInfo;
|
import com.qqchen.deploy.backend.framework.dto.DiskUsageInfo;
|
||||||
import com.qqchen.deploy.backend.framework.ssh.ISSHCommandService;
|
import com.qqchen.deploy.backend.framework.ssh.ISSHCommandService;
|
||||||
import com.qqchen.deploy.backend.framework.ssh.SSHCommandServiceFactory;
|
import com.qqchen.deploy.backend.framework.ssh.SSHCommandServiceFactory;
|
||||||
@ -34,35 +36,38 @@ import java.util.stream.Collectors;
|
|||||||
@Slf4j
|
@Slf4j
|
||||||
@Component
|
@Component
|
||||||
public class ServerMonitorScheduler {
|
public class ServerMonitorScheduler {
|
||||||
|
|
||||||
@Resource
|
@Resource
|
||||||
private IServerRepository serverRepository;
|
private IServerRepository serverRepository;
|
||||||
|
|
||||||
@Resource
|
@Resource
|
||||||
private SSHCommandServiceFactory sshCommandServiceFactory;
|
private SSHCommandServiceFactory sshCommandServiceFactory;
|
||||||
|
|
||||||
@Resource
|
@Resource
|
||||||
private IServerMonitorService monitorService;
|
private IServerMonitorService monitorService;
|
||||||
|
|
||||||
@Resource
|
@Resource
|
||||||
private IServerAlertService alertService;
|
private IServerAlertService alertService;
|
||||||
|
|
||||||
@Resource
|
@Resource
|
||||||
private IServerAlertRuleRepository alertRuleRepository;
|
private IServerAlertRuleRepository alertRuleRepository;
|
||||||
|
|
||||||
@Resource
|
@Resource
|
||||||
private INotificationService notificationService;
|
private INotificationService notificationService;
|
||||||
|
|
||||||
|
@Resource
|
||||||
|
private IServerService serverService;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 采集所有在线服务器的监控数据
|
* 采集所有在线服务器的监控数据
|
||||||
* 此方法由定时任务管理系统调用
|
* 此方法由定时任务管理系统调用
|
||||||
*
|
*
|
||||||
* @param notificationChannelId 通知渠道ID(可选,为null则不发送通知)
|
* @param notificationChannelId 通知渠道ID(可选,为null则不发送通知)
|
||||||
* @param serverOfflineTemplateId 服务器离线通知模板ID(可选)
|
* @param serverOfflineTemplateId 服务器离线通知模板ID(可选)
|
||||||
* @param resourceAlertTemplateId 资源告警通知模板ID(可选)
|
* @param resourceAlertTemplateId 资源告警通知模板ID(可选)
|
||||||
*/
|
*/
|
||||||
public void collectServerMetrics(Long notificationChannelId,
|
public void collectServerMetrics(Long notificationChannelId,
|
||||||
Long serverOfflineTemplateId,
|
Long serverOfflineTemplateId,
|
||||||
Long resourceAlertTemplateId) {
|
Long resourceAlertTemplateId) {
|
||||||
// 构建通知配置对象
|
// 构建通知配置对象
|
||||||
ServerMonitorNotificationConfig config = null;
|
ServerMonitorNotificationConfig config = null;
|
||||||
@ -71,63 +76,63 @@ public class ServerMonitorScheduler {
|
|||||||
config.setNotificationChannelId(notificationChannelId);
|
config.setNotificationChannelId(notificationChannelId);
|
||||||
config.setServerOfflineTemplateId(serverOfflineTemplateId);
|
config.setServerOfflineTemplateId(serverOfflineTemplateId);
|
||||||
config.setResourceAlertTemplateId(resourceAlertTemplateId);
|
config.setResourceAlertTemplateId(resourceAlertTemplateId);
|
||||||
|
|
||||||
log.info("========== 开始采集服务器监控数据 ========== channelId={}, offlineTemplateId={}, alertTemplateId={}",
|
log.info("========== 开始采集服务器监控数据 ========== channelId={}, offlineTemplateId={}, alertTemplateId={}",
|
||||||
notificationChannelId, serverOfflineTemplateId, resourceAlertTemplateId);
|
notificationChannelId, serverOfflineTemplateId, resourceAlertTemplateId);
|
||||||
} else {
|
} else {
|
||||||
log.info("========== 开始采集服务器监控数据(不发送通知) ==========");
|
log.info("========== 开始采集服务器监控数据(不发送通知) ==========");
|
||||||
}
|
}
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// 1. 查询所有服务器(不管当前状态),准备检测在线状态
|
// 1. 查询所有服务器(不管当前状态),准备检测在线状态
|
||||||
List<Server> allServers = serverRepository.findAll();
|
List<Server> allServers = serverRepository.findAll();
|
||||||
|
|
||||||
if (allServers.isEmpty()) {
|
if (allServers.isEmpty()) {
|
||||||
log.debug("没有需要监控的服务器,跳过监控采集");
|
log.debug("没有需要监控的服务器,跳过监控采集");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info("发现 {} 台服务器,开始检测在线状态并采集监控数据", allServers.size());
|
log.info("发现 {} 台服务器,开始检测在线状态并采集监控数据", allServers.size());
|
||||||
|
|
||||||
// 2. 并发检测所有服务器的连接状态并采集监控数据
|
// 2. 并发检测所有服务器的连接状态并采集监控数据
|
||||||
// - 连接失败 → 发送离线通知
|
// - 连接失败 → 发送离线通知
|
||||||
// - 连接成功 → 采集数据,检查阈值告警
|
// - 连接成功 → 采集数据,检查阈值告警
|
||||||
final ServerMonitorNotificationConfig finalConfig = config;
|
final ServerMonitorNotificationConfig finalConfig = config;
|
||||||
List<CompletableFuture<ServerMonitorDataDTO>> futures = allServers.stream()
|
List<CompletableFuture<ServerMonitorDataDTO>> futures = allServers.stream()
|
||||||
.map(server -> CompletableFuture.supplyAsync(() ->
|
.map(server -> CompletableFuture.supplyAsync(() ->
|
||||||
collectSingleServerWithStatusCheck(server, finalConfig)))
|
collectSingleServerWithStatusCheck(server, finalConfig)))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
// 3. 等待所有任务完成
|
// 3. 等待所有任务完成
|
||||||
CompletableFuture<Void> allFutures = CompletableFuture.allOf(
|
CompletableFuture<Void> allFutures = CompletableFuture.allOf(
|
||||||
futures.toArray(new CompletableFuture[0])
|
futures.toArray(new CompletableFuture[0])
|
||||||
);
|
);
|
||||||
allFutures.join();
|
allFutures.join();
|
||||||
|
|
||||||
// 4. 收集结果
|
// 4. 收集结果
|
||||||
List<ServerMonitorDataDTO> monitorDataList = futures.stream()
|
List<ServerMonitorDataDTO> monitorDataList = futures.stream()
|
||||||
.map(CompletableFuture::join)
|
.map(CompletableFuture::join)
|
||||||
.filter(data -> data != null)
|
.filter(data -> data != null)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
long duration = System.currentTimeMillis() - startTime;
|
long duration = System.currentTimeMillis() - startTime;
|
||||||
log.info("========== 监控数据采集完成: 在线={}/{}, 耗时={}ms ==========",
|
log.info("========== 监控数据采集完成: 在线={}/{}, 耗时={}ms ==========",
|
||||||
monitorDataList.size(), allServers.size(), duration);
|
monitorDataList.size(), allServers.size(), duration);
|
||||||
|
|
||||||
// 5. 批量保存监控数据到数据库
|
// 5. 批量保存监控数据到数据库
|
||||||
if (!monitorDataList.isEmpty()) {
|
if (!monitorDataList.isEmpty()) {
|
||||||
monitorService.batchSaveMonitorData(monitorDataList);
|
monitorService.batchSaveMonitorData(monitorDataList);
|
||||||
log.info("监控数据已保存到数据库: count={}", monitorDataList.size());
|
log.info("监控数据已保存到数据库: count={}", monitorDataList.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
// 6. 检查告警规则(优化:只查询一次规则)
|
// 6. 检查告警规则(优化:只查询一次规则)
|
||||||
if (!monitorDataList.isEmpty()) {
|
if (!monitorDataList.isEmpty()) {
|
||||||
// 一次性查询所有规则,避免 N 次数据库查询
|
// 一次性查询所有规则,避免 N 次数据库查询
|
||||||
List<ServerAlertRule> allRules = alertRuleRepository.findAll();
|
List<ServerAlertRule> allRules = alertRuleRepository.findAll();
|
||||||
log.debug("开始检查告警规则: 服务器数={}, 规则数={}",
|
log.debug("开始检查告警规则: 服务器数={}, 规则数={}",
|
||||||
monitorDataList.size(), allRules.size());
|
monitorDataList.size(), allRules.size());
|
||||||
|
|
||||||
for (ServerMonitorDataDTO data : monitorDataList) {
|
for (ServerMonitorDataDTO data : monitorDataList) {
|
||||||
try {
|
try {
|
||||||
alertService.checkAlertRules(data.getServerId(), data, allRules, config);
|
alertService.checkAlertRules(data.getServerId(), data, allRules, config);
|
||||||
@ -136,7 +141,7 @@ public class ServerMonitorScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("服务器监控数据采集失败", e);
|
log.error("服务器监控数据采集失败", e);
|
||||||
} finally {
|
} finally {
|
||||||
@ -148,20 +153,41 @@ public class ServerMonitorScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 检测服务器连接状态并采集监控数据
|
* 检测服务器连接状态并采集监控数据
|
||||||
|
* 统一使用 ServerService.testConnection() 方法进行连接测试和状态更新
|
||||||
*/
|
*/
|
||||||
private ServerMonitorDataDTO collectSingleServerWithStatusCheck(Server server, ServerMonitorNotificationConfig config) {
|
private ServerMonitorDataDTO collectSingleServerWithStatusCheck(Server server, ServerMonitorNotificationConfig config) {
|
||||||
try {
|
try {
|
||||||
// 尝试采集监控数据
|
// 1. 调用统一的连接测试方法(会自动更新服务器状态、硬件信息等)
|
||||||
return collectSingleServer(server);
|
ServerInfoDTO info = serverService.testConnection(server.getId());
|
||||||
|
|
||||||
|
// 2. 检查连接状态
|
||||||
|
if (!info.getConnected()) {
|
||||||
|
// 连接失败(离线),发送离线通知
|
||||||
|
log.error("服务器连接失败(离线): serverId={}, name={}, ip={}, error={}",
|
||||||
|
server.getId(), server.getServerName(), server.getHostIp(), info.getErrorMessage());
|
||||||
|
|
||||||
|
if (config != null && config.getNotificationChannelId() != null && config.getServerOfflineTemplateId() != null) {
|
||||||
|
try {
|
||||||
|
sendServerOfflineNotification(server, config);
|
||||||
|
} catch (Exception notifyError) {
|
||||||
|
log.error("发送服务器离线通知失败: serverId={}", server.getId(), notifyError);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. 连接成功,采集监控数据
|
||||||
|
return collectServerMonitorData(server);
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// 采集失败,说明服务器无法连接(离线)
|
// 异常情况,发送离线通知
|
||||||
log.error("服务器连接失败(离线): serverId={}, name={}, ip={}, error={}",
|
log.error("服务器连接测试异常: serverId={}, name={}, ip={}, error={}",
|
||||||
server.getId(), server.getServerName(), server.getHostIp(), e.getMessage());
|
server.getId(), server.getServerName(), server.getHostIp(), e.getMessage());
|
||||||
|
|
||||||
// 发送离线通知
|
|
||||||
if (config != null && config.getNotificationChannelId() != null && config.getServerOfflineTemplateId() != null) {
|
if (config != null && config.getNotificationChannelId() != null && config.getServerOfflineTemplateId() != null) {
|
||||||
try {
|
try {
|
||||||
sendServerOfflineNotification(server, config);
|
sendServerOfflineNotification(server, config);
|
||||||
@ -169,11 +195,11 @@ public class ServerMonitorScheduler {
|
|||||||
log.error("发送服务器离线通知失败: serverId={}", server.getId(), notifyError);
|
log.error("发送服务器离线通知失败: serverId={}", server.getId(), notifyError);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 发送服务器离线通知
|
* 发送服务器离线通知
|
||||||
*/
|
*/
|
||||||
@ -185,40 +211,41 @@ public class ServerMonitorScheduler {
|
|||||||
templateParams.put("serverIp", server.getHostIp());
|
templateParams.put("serverIp", server.getHostIp());
|
||||||
templateParams.put("offlineTime", LocalDateTime.now().format(
|
templateParams.put("offlineTime", LocalDateTime.now().format(
|
||||||
java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")));
|
java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")));
|
||||||
|
|
||||||
// 2. 构建SendNotificationRequest
|
// 2. 构建SendNotificationRequest
|
||||||
SendNotificationRequest request = new SendNotificationRequest();
|
SendNotificationRequest request = new SendNotificationRequest();
|
||||||
request.setChannelId(config.getNotificationChannelId());
|
request.setChannelId(config.getNotificationChannelId());
|
||||||
request.setNotificationTemplateId(config.getServerOfflineTemplateId());
|
request.setNotificationTemplateId(config.getServerOfflineTemplateId());
|
||||||
request.setTemplateParams(templateParams);
|
request.setTemplateParams(templateParams);
|
||||||
|
|
||||||
// 3. 发送通知(NotificationService会自动根据渠道类型创建请求对象)
|
// 3. 发送通知(NotificationService会自动根据渠道类型创建请求对象)
|
||||||
notificationService.send(request);
|
notificationService.send(request);
|
||||||
|
|
||||||
log.info("✅ 服务器离线通知已发送: serverId={}, serverName={}, ip={}",
|
log.info("✅ 服务器离线通知已发送: serverId={}, serverName={}, ip={}",
|
||||||
server.getId(), server.getServerName(), server.getHostIp());
|
server.getId(), server.getServerName(), server.getHostIp());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("发送服务器离线通知异常: serverId={}", server.getId(), e);
|
log.error("发送服务器离线通知异常: serverId={}", server.getId(), e);
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 采集单台服务器的监控数据
|
* 采集服务器监控数据(CPU、内存、磁盘使用率)
|
||||||
|
* 注意:此方法仅负责采集监控数据,不负责连接测试和状态更新
|
||||||
*/
|
*/
|
||||||
private ServerMonitorDataDTO collectSingleServer(Server server) {
|
private ServerMonitorDataDTO collectServerMonitorData(Server server) throws Exception {
|
||||||
SSHClient sshClient = null;
|
SSHClient sshClient = null;
|
||||||
ISSHCommandService sshService = null;
|
ISSHCommandService sshService = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// 1. 获取对应OS的SSH服务
|
// 1. 获取对应OS的SSH服务
|
||||||
sshService = sshCommandServiceFactory.getService(server.getOsType());
|
sshService = sshCommandServiceFactory.getService(server.getOsType());
|
||||||
|
|
||||||
// 2. 创建SSH连接
|
// 2. 创建SSH连接
|
||||||
String password = null;
|
String password = null;
|
||||||
String privateKey = null;
|
String privateKey = null;
|
||||||
String passphrase = null;
|
String passphrase = null;
|
||||||
|
|
||||||
switch (server.getAuthType()) {
|
switch (server.getAuthType()) {
|
||||||
case PASSWORD:
|
case PASSWORD:
|
||||||
password = server.getSshPassword();
|
password = server.getSshPassword();
|
||||||
@ -228,7 +255,7 @@ public class ServerMonitorScheduler {
|
|||||||
passphrase = server.getSshPassphrase();
|
passphrase = server.getSshPassphrase();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
sshClient = sshService.createConnection(
|
sshClient = sshService.createConnection(
|
||||||
server.getHostIp(),
|
server.getHostIp(),
|
||||||
server.getSshPort(),
|
server.getSshPort(),
|
||||||
@ -237,12 +264,12 @@ public class ServerMonitorScheduler {
|
|||||||
privateKey,
|
privateKey,
|
||||||
passphrase
|
passphrase
|
||||||
);
|
);
|
||||||
|
|
||||||
// 3. 采集监控数据
|
// 3. 采集监控数据
|
||||||
BigDecimal cpuUsage = sshService.getCpuUsage(sshClient);
|
BigDecimal cpuUsage = sshService.getCpuUsage(sshClient);
|
||||||
BigDecimal memoryUsage = sshService.getMemoryUsage(sshClient);
|
BigDecimal memoryUsage = sshService.getMemoryUsage(sshClient);
|
||||||
List<DiskUsageInfo> diskUsage = sshService.getDiskUsage(sshClient);
|
List<DiskUsageInfo> diskUsage = sshService.getDiskUsage(sshClient);
|
||||||
|
|
||||||
// 4. 计算已用内存(基于内存使用率和总内存)
|
// 4. 计算已用内存(基于内存使用率和总内存)
|
||||||
Integer memoryUsed = null;
|
Integer memoryUsed = null;
|
||||||
if (memoryUsage != null && server.getMemorySize() != null) {
|
if (memoryUsage != null && server.getMemorySize() != null) {
|
||||||
@ -250,7 +277,7 @@ public class ServerMonitorScheduler {
|
|||||||
.divide(new BigDecimal(100), 0, BigDecimal.ROUND_HALF_UP)
|
.divide(new BigDecimal(100), 0, BigDecimal.ROUND_HALF_UP)
|
||||||
.intValue();
|
.intValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5. 构建监控数据
|
// 5. 构建监控数据
|
||||||
ServerMonitorDataDTO data = ServerMonitorDataDTO.builder()
|
ServerMonitorDataDTO data = ServerMonitorDataDTO.builder()
|
||||||
.serverId(server.getId())
|
.serverId(server.getId())
|
||||||
@ -260,17 +287,17 @@ public class ServerMonitorScheduler {
|
|||||||
.diskUsage(diskUsage)
|
.diskUsage(diskUsage)
|
||||||
.collectTime(LocalDateTime.now())
|
.collectTime(LocalDateTime.now())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
log.debug("服务器监控数据采集成功: serverId={}, cpu={}%, mem={}%, diskCount={}",
|
log.debug("服务器监控数据采集成功: serverId={}, cpu={}%, mem={}%, diskCount={}",
|
||||||
server.getId(), cpuUsage, memoryUsage,
|
server.getId(), cpuUsage, memoryUsage,
|
||||||
diskUsage != null ? diskUsage.size() : 0);
|
diskUsage != null ? diskUsage.size() : 0);
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("采集服务器监控数据失败: serverId={}, serverName={}, error={}",
|
log.error("采集服务器监控数据失败: serverId={}, serverName={}, error={}",
|
||||||
server.getId(), server.getServerName(), e.getMessage());
|
server.getId(), server.getServerName(), e.getMessage());
|
||||||
return null;
|
throw e; // 抛出异常让上层处理
|
||||||
} finally {
|
} finally {
|
||||||
// 6. 关闭SSH连接
|
// 6. 关闭SSH连接
|
||||||
if (sshService != null && sshClient != null) {
|
if (sshService != null && sshClient != null) {
|
||||||
@ -278,19 +305,19 @@ public class ServerMonitorScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 清理历史监控数据
|
* 清理历史监控数据
|
||||||
* 此方法由定时任务管理系统调用,建议每天凌晨执行
|
* 此方法由定时任务管理系统调用,建议每天凌晨执行
|
||||||
*/
|
*/
|
||||||
public void cleanOldMonitorData() {
|
public void cleanOldMonitorData() {
|
||||||
log.info("========== 开始清理历史监控数据 ==========");
|
log.info("========== 开始清理历史监控数据 ==========");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// 删除30天前的数据
|
// 删除30天前的数据
|
||||||
LocalDateTime thirtyDaysAgo = LocalDateTime.now().minusDays(30);
|
LocalDateTime thirtyDaysAgo = LocalDateTime.now().minusDays(30);
|
||||||
int deletedCount = monitorService.deleteOldData(thirtyDaysAgo);
|
int deletedCount = monitorService.deleteOldData(thirtyDaysAgo);
|
||||||
|
|
||||||
log.info("========== 历史监控数据清理完成: count={} ==========", deletedCount);
|
log.info("========== 历史监控数据清理完成: count={} ==========", deletedCount);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("清理历史监控数据失败", e);
|
log.error("清理历史监控数据失败", e);
|
||||||
|
|||||||
@ -1200,9 +1200,9 @@ CREATE TABLE deploy_server_alert_rule
|
|||||||
-- 告警类型
|
-- 告警类型
|
||||||
alert_type VARCHAR(20) NOT NULL COMMENT '告警类型: CPU/MEMORY/DISK',
|
alert_type VARCHAR(20) NOT NULL COMMENT '告警类型: CPU/MEMORY/DISK',
|
||||||
|
|
||||||
-- 阈值
|
-- 阈值(支持百分比和绝对值:CPU/MEMORY/DISK为%,NETWORK为MB/s)
|
||||||
warning_threshold DECIMAL(5,2) NOT NULL COMMENT '警告阈值(%)',
|
warning_threshold DECIMAL(10,2) NOT NULL COMMENT '警告阈值',
|
||||||
critical_threshold DECIMAL(5,2) NOT NULL COMMENT '严重阈值(%)',
|
critical_threshold DECIMAL(10,2) NOT NULL COMMENT '严重阈值',
|
||||||
|
|
||||||
-- 持续时间(避免误报)
|
-- 持续时间(避免误报)
|
||||||
duration_minutes INT DEFAULT 5 COMMENT '持续时长(分钟)',
|
duration_minutes INT DEFAULT 5 COMMENT '持续时长(分钟)',
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user