|
| 1 | +import { Inject, Injectable, Logger } from "@nestjs/common"; |
| 2 | +import { |
| 3 | + CoreV1Api, |
| 4 | + KubeConfig, |
| 5 | + Metrics, |
| 6 | + PodMetric, |
| 7 | + V1Node, |
| 8 | + FetchError, |
| 9 | +} from "@kubernetes/client-node"; |
| 10 | +import * as child_process from "node:child_process"; |
| 11 | +import { NetworkService } from "src/system/network.service"; |
| 12 | +import { ConfigService } from "@nestjs/config"; |
| 13 | +import { NodeConfig } from "src/configs/types/NodeConfig"; |
| 14 | + |
| 15 | +@Injectable() |
| 16 | +export class KubeneretesService { |
| 17 | + private apiClient: CoreV1Api; |
| 18 | + private metricsClient: Metrics; |
| 19 | + private nodeName: string; |
| 20 | + private cpuInfo: { |
| 21 | + coresPerSocket: number; |
| 22 | + threadsPerCore: number; |
| 23 | + }; |
| 24 | + |
| 25 | + constructor( |
| 26 | + @Inject(ConfigService) private readonly configService: ConfigService, |
| 27 | + @Inject(NetworkService) private networkService: NetworkService, |
| 28 | + private readonly logger: Logger, |
| 29 | + ) { |
| 30 | + this.nodeName = this.configService.get<NodeConfig>("node")!.nodeName; |
| 31 | + |
| 32 | + const kc = new KubeConfig(); |
| 33 | + kc.loadFromDefault(); |
| 34 | + this.apiClient = kc.makeApiClient(CoreV1Api); |
| 35 | + this.metricsClient = new Metrics(kc); |
| 36 | + this.cpuInfo = this.getCpuInfo(); |
| 37 | + } |
| 38 | + |
| 39 | + public async getNodeIP(node: V1Node) { |
| 40 | + return node.status?.addresses?.find( |
| 41 | + (address) => address.type === "InternalIP", |
| 42 | + )?.address; |
| 43 | + } |
| 44 | + |
| 45 | + public async getNodeSupportsCpuPinning(node: V1Node) { |
| 46 | + return node.metadata?.annotations?.["k3s.io/node-args"].includes( |
| 47 | + "cpu-manager-policy=static", |
| 48 | + ); |
| 49 | + } |
| 50 | + |
| 51 | + public async getNodeLabels(node: V1Node) { |
| 52 | + try { |
| 53 | + const _labels = node.metadata?.labels || {}; |
| 54 | + |
| 55 | + const labels: Record<string, string> = {}; |
| 56 | + |
| 57 | + for (const label in _labels) { |
| 58 | + if (label.includes("5stack")) { |
| 59 | + labels[label] = _labels[label]; |
| 60 | + } |
| 61 | + } |
| 62 | + |
| 63 | + return labels; |
| 64 | + } catch (error) { |
| 65 | + this.logger.error("error fetching node metadata:", error); |
| 66 | + } |
| 67 | + } |
| 68 | + |
| 69 | + public async getNode() { |
| 70 | + return await this.apiClient.readNode({ |
| 71 | + name: this.nodeName, |
| 72 | + }); |
| 73 | + } |
| 74 | + |
| 75 | + public async getNodeStats(node: V1Node) { |
| 76 | + try { |
| 77 | + const allocatable = node.status?.allocatable; |
| 78 | + const capacity = node.status?.capacity; |
| 79 | + |
| 80 | + if (!allocatable || !capacity) { |
| 81 | + throw new Error("Could not get node allocatable or capacity"); |
| 82 | + } |
| 83 | + |
| 84 | + if (!node.metadata?.name) { |
| 85 | + throw new Error("Could not get node name"); |
| 86 | + } |
| 87 | + |
| 88 | + const metrics = await this.metricsClient.getNodeMetrics(); |
| 89 | + |
| 90 | + return { |
| 91 | + disks: this.getDiskStats(), |
| 92 | + network: this.networkService.getNetworkStats(), |
| 93 | + memoryAllocatable: allocatable.memory, |
| 94 | + memoryCapacity: capacity.memory, |
| 95 | + cpuInfo: this.cpuInfo, |
| 96 | + cpuCapacity: parseInt(capacity.cpu), |
| 97 | + nvidiaGPU: allocatable["nvidia.com/gpu"] ? true : false, |
| 98 | + metrics: metrics.items.find( |
| 99 | + (nodeMetric) => nodeMetric.metadata.name === node.metadata?.name, |
| 100 | + ), |
| 101 | + }; |
| 102 | + } catch (error) { |
| 103 | + if (error instanceof FetchError && error.code !== "404") { |
| 104 | + this.logger.error("Error getting node metrics:", error.message); |
| 105 | + } |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + public async getPodStats() { |
| 110 | + try { |
| 111 | + const podList = await this.apiClient.listNamespacedPod({ |
| 112 | + namespace: "5stack", |
| 113 | + fieldSelector: `spec.nodeName=${this.nodeName}`, |
| 114 | + }); |
| 115 | + |
| 116 | + const stats: Array<{ |
| 117 | + name: string; |
| 118 | + metrics: PodMetric; |
| 119 | + }> = []; |
| 120 | + |
| 121 | + const { items: podMetrics } = |
| 122 | + await this.metricsClient.getPodMetrics("5stack"); |
| 123 | + |
| 124 | + for (const pod of podList.items) { |
| 125 | + if (!pod.metadata?.namespace || !pod.metadata?.name) { |
| 126 | + continue; |
| 127 | + } |
| 128 | + |
| 129 | + const podMetric = podMetrics.find( |
| 130 | + (podMetric) => podMetric.metadata.name === pod.metadata?.name, |
| 131 | + ); |
| 132 | + |
| 133 | + if (!podMetric) { |
| 134 | + continue; |
| 135 | + } |
| 136 | + |
| 137 | + stats.push({ |
| 138 | + name: pod.metadata?.labels?.app!, |
| 139 | + metrics: podMetric, |
| 140 | + }); |
| 141 | + } |
| 142 | + |
| 143 | + return stats; |
| 144 | + } catch (error) { |
| 145 | + this.logger.error("Error listing pods:", error); |
| 146 | + } |
| 147 | + } |
| 148 | + |
| 149 | + public async getNodeLowLatency(node: V1Node) { |
| 150 | + try { |
| 151 | + const nodeInfo = node.status?.nodeInfo; |
| 152 | + if (!nodeInfo) { |
| 153 | + throw new Error("Could not get node info"); |
| 154 | + } |
| 155 | + |
| 156 | + return nodeInfo.kernelVersion.includes("lowlatency"); |
| 157 | + } catch (error) { |
| 158 | + this.logger.error("Error getting node kernel information:", error); |
| 159 | + throw error; |
| 160 | + } |
| 161 | + } |
| 162 | + |
| 163 | + private getDiskStats() { |
| 164 | + try { |
| 165 | + const output = child_process.execSync( |
| 166 | + "df -P / /demos 2>/dev/null || true", |
| 167 | + { encoding: "utf8" }, |
| 168 | + ); |
| 169 | + |
| 170 | + return output |
| 171 | + .split("\n") |
| 172 | + .map((line) => line.trim()) |
| 173 | + .filter((line) => { |
| 174 | + return line.length > 0 && !line.startsWith("Filesystem"); |
| 175 | + }) |
| 176 | + .map((line) => { |
| 177 | + const [filesystem, size, used, available, usedPercent, mountpoint] = |
| 178 | + line.split(/\s+/); |
| 179 | + return { |
| 180 | + filesystem, |
| 181 | + size, |
| 182 | + used, |
| 183 | + available, |
| 184 | + usedPercent, |
| 185 | + mountpoint, |
| 186 | + } as { |
| 187 | + filesystem: string; |
| 188 | + size: string; |
| 189 | + used: string; |
| 190 | + available: string; |
| 191 | + usedPercent: string; |
| 192 | + mountpoint: string; |
| 193 | + }; |
| 194 | + }) |
| 195 | + .filter((disk) => { |
| 196 | + return disk.mountpoint === "/" || disk.mountpoint === "/demos"; |
| 197 | + }); |
| 198 | + } catch (error) { |
| 199 | + this.logger.error("Error getting disk summary:", error); |
| 200 | + } |
| 201 | + } |
| 202 | + |
| 203 | + private getCpuInfo() { |
| 204 | + const json = child_process.execSync("lscpu -J", { encoding: "utf8" }); |
| 205 | + const parsed = JSON.parse(json) as { |
| 206 | + lscpu: Array<{ field: string; data: string }>; |
| 207 | + }; |
| 208 | + |
| 209 | + const map: Record<string, string> = {}; |
| 210 | + |
| 211 | + for (const item of parsed.lscpu) { |
| 212 | + map[item.field.replace(/:/g, "")] = item.data; |
| 213 | + } |
| 214 | + |
| 215 | + return { |
| 216 | + sockets: parseInt(map["Socket(s)"]), |
| 217 | + coresPerSocket: parseInt(map["Core(s) per socket"], 10), |
| 218 | + threadsPerCore: parseInt(map["Thread(s) per core"], 10), |
| 219 | + }; |
| 220 | + } |
| 221 | + |
| 222 | + public async hasGameServerImage() { |
| 223 | + const output = child_process.execSync( |
| 224 | + `ctr -a /containerd.sock -n k8s.io images ls | grep -q 'ghcr.io/5stackgg/game-server:latest' && echo "true" || echo "false"`, |
| 225 | + { encoding: "utf8" }, |
| 226 | + ); |
| 227 | + return output.trim() === "true"; |
| 228 | + } |
| 229 | +} |
0 commit comments