import os
import time
from threading import Thread
import cpuinfo
import GPUtil
import psutil
from tensorboardX import SummaryWriter
[docs]class Monitor(Thread):
"""Monitor Class."""
def __init__(self, log_dir, delay=1, gpu_id=0, verbose=False):
"""Initialize monitor, log_dir and gpu_id are needed."""
super(Monitor, self).__init__()
DEVICE_ID_LIST = GPUtil.getAvailable(
order="memory", limit=1
) # get the fist gpu with the lowest load
if len(DEVICE_ID_LIST) < 1 or gpu_id is None:
self.hasgpu = False
else:
self.hasgpu = True
self.gpu_id = gpu_id
self.start_time = time.time() # Start time
self.verbose = verbose # if update the usage status during the process
self.stopped = False # flag for stop the monitor
self.delay = delay # Time between calls to GPUtil
self.pid = os.getpid()
self.writer = SummaryWriter(log_dir=log_dir) # tensorboard writer
label = "brand"
if "brand_raw" in cpuinfo.get_cpu_info().keys():
label = "brand_raw"
self.writer.add_text(
"device/CPU",
"cpu count: {:d} \t brand: {:s}".format(
os.cpu_count(), cpuinfo.get_cpu_info()[label]
),
0,
)
self.writer.add_text(
"device/RAM",
"Current RAM - total:\t {:.3f}GB;".format(
psutil.virtual_memory().total / 2.0 ** 30
),
0,
)
self.count = 0 # Count for calculate the average usage
self.GPU_memoryUsed = []
self.GPU_memoryFree = []
self.CPU_load = []
self.memoryUsed = []
if self.hasgpu:
self.GPU = GPUtil.getGPUs()[self.gpu_id]
self.GPU_memoryTotal = (
self.GPU.memoryTotal / 2.0 ** 10
) # Total gpu memory amount in GB
self.writer.add_text(
"device/GPU",
"Current GPU (ID:{:d}) name:{:s} ".format(self.gpu_id, self.GPU.name)
+ "Total_GPU_memory: {:.3f}GB;".format(self.GPU_memoryTotal),
0,
)
if verbose:
devices_status()
self.start()
[docs] def write_cpu_status(self):
"""Write CPU status."""
CPU_load = psutil.Process(self.pid).cpu_percent(interval=1)
self.writer.add_scalars(
"device/cpu",
{"CPU_load (%)": CPU_load},
self.count,
)
self.CPU_load.append(CPU_load)
[docs] def write_mem_status(self):
"""Write memory usage status."""
memoryUsed = (
psutil.Process(self.pid).memory_info()[0] / 2.0 ** 30
) # current app memory use in GB
self.writer.add_scalars(
"device/mem",
{"memory_used (GB)": memoryUsed},
self.count,
)
self.memoryUsed.append(memoryUsed)
[docs] def write_gpu_status(self):
"""Write gpu usage status."""
self.GPU = GPUtil.getGPUs()[self.gpu_id]
GPU_load = self.GPU.load * 100
GPU_memoryUsed = self.GPU.memoryUsed / self.GPU_memoryTotal * 100
GPU_memoryFree = self.GPU.memoryFree / self.GPU_memoryTotal * 100
self.writer.add_scalars(
"device/GPU",
{
"GPU_load (%)": GPU_load,
"GPU_memory_used (%)": GPU_memoryUsed,
"GPU_memory_free (%)": GPU_memoryFree,
},
self.count,
)
self.GPU_memoryUsed.append(GPU_memoryUsed)
self.GPU_memoryFree.append(GPU_memoryFree)
[docs] def run(self):
"""Run the monitor."""
while not self.stopped:
self.count += 1
self.write_cpu_status()
self.write_mem_status()
if self.hasgpu:
self.write_gpu_status()
[docs] def stop(self):
"""Stop the monitor."""
self.run_time = time.time() - self.start_time
print("Program running time:%d seconds" % self.run_time)
self.stopped = True
return self.run_time
[docs]def print_gpu_stat(gpu_id=None):
"""Print GPU status."""
if gpu_id is None:
gpu_ids = GPUtil.getAvailable(limit=10)
for gpu_id in gpu_ids:
GPU = GPUtil.getGPUs()[gpu_id]
GPU_load = GPU.load * 100
GPU_memoryUtil = GPU.memoryUtil / 2.0 ** 10
GPU_memoryTotal = GPU.memoryTotal / 2.0 ** 10
GPU_memoryUsed = GPU.memoryUsed / 2.0 ** 10
GPU_memoryFree = GPU.memoryFree / 2.0 ** 10
print("Current GPU (ID:{:d}) name:\t{:s}".format(gpu_id, GPU.name))
print("Total_GPU_memory:\t{:.3f}GB;".format(GPU_memoryTotal))
print("GPU_memoryUtil:\t{:.3f}GB;".format(GPU_memoryUtil))
print("GPU_memoryUsed:\t{:.3f}GB;".format(GPU_memoryUsed))
print("GPU_memoryFree:\t{:.3f}GB;".format(GPU_memoryFree))
print("GPU_load:\t{:.3f}GB;".format(GPU_load))
else:
GPU = GPUtil.getGPUs()[gpu_id]
GPU_load = GPU.load * 100
GPU_memoryUtil = GPU.memoryUtil / 2.0 ** 10
GPU_memoryTotal = GPU.memoryTotal / 2.0 ** 10
GPU_memoryUsed = GPU.memoryUsed / 2.0 ** 10
GPU_memoryFree = GPU.memoryFree / 2.0 ** 10
print("Current GPU (ID:{:d}) name:{:s}".format(gpu_id, GPU.name))
print("Total_GPU_memory: {:.3f}GB;".format(GPU_memoryTotal))
print("GPU_memoryUsed:{:.3f}GB;".format(GPU_memoryUsed))
print("GPU_memoryFree:{:.3f}GB;".format(GPU_memoryFree))
print("GPU_load:{:.3f}GB;".format(GPU_load))
"""
some static methods
"""
[docs]def print_cpu_stat():
"""Print CPU status."""
label = "brand"
if "brand_raw" in cpuinfo.get_cpu_info().keys():
label = "brand_raw"
print(
"Cpu count: {:d} \t brand: {:s}".format(
os.cpu_count(), cpuinfo.get_cpu_info()[label]
)
)
print("Avg_load_1m: \t{:.3f}%%;".format(os.getloadavg()[0]))
print("Avg_load_5m:\t{:.3f}%%;".format(os.getloadavg()[1]))
print("Avg_load_15m:\t{:.3f}%%;".format(os.getloadavg()[2]))
[docs]def print_mem_stat(memoryInfo=None):
"""Print memory status."""
# Main memory info
if memoryInfo is None:
memoryInfo = (
psutil.virtual_memory()
) # svmem(total, available, percent, used, free, active, inactive, buffers, cached, shared, slab)
print("Current RAM - total:\t {:.3f}GB;".format(memoryInfo.total / 2.0 ** 30))
print(
"Current RAM - available:\t{:.3f}GB;".format(memoryInfo.available / 2.0 ** 30)
)
print("Current RAM - used:\t{:.3f}GB;".format(memoryInfo.used / 2.0 ** 30))
print("Current RAM - free:\t{:.3f}GB;".format(memoryInfo.free / 2.0 ** 30))
# print current devices available
[docs]def devices_status():
"""Print current devices status."""
print_cpu_stat()
print_mem_stat()
print_gpu_stat()