mirror of
https://github.com/ostris/ai-toolkit.git
synced 2026-01-26 16:39:47 +00:00
Added queing system to the UI
This commit is contained in:
@@ -124,6 +124,19 @@ class DiffusionTrainer(SDTrainer):
|
||||
|
||||
return _check_stop()
|
||||
|
||||
def should_return_to_queue(self):
|
||||
if not self.is_ui_trainer:
|
||||
return False
|
||||
def _check_return_to_queue():
|
||||
with self._db_connect() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT return_to_queue FROM Job WHERE id = ?", (self.job_id,))
|
||||
return_to_queue = cursor.fetchone()
|
||||
return False if return_to_queue is None else return_to_queue[0] == 1
|
||||
|
||||
return _check_return_to_queue()
|
||||
|
||||
def maybe_stop(self):
|
||||
if not self.is_ui_trainer:
|
||||
return
|
||||
@@ -132,6 +145,11 @@ class DiffusionTrainer(SDTrainer):
|
||||
self._update_status("stopped", "Job stopped"))
|
||||
self.is_stopping = True
|
||||
raise Exception("Job stopped")
|
||||
if self.should_return_to_queue():
|
||||
self._run_async_operation(
|
||||
self._update_status("queued", "Job queued"))
|
||||
self.is_stopping = True
|
||||
raise Exception("Job returning to queue")
|
||||
|
||||
async def _update_key(self, key, value):
|
||||
if not self.accelerator.is_main_process:
|
||||
|
||||
@@ -115,6 +115,17 @@ class UITrainer(SDTrainer):
|
||||
return False if stop is None else stop[0] == 1
|
||||
|
||||
return _check_stop()
|
||||
|
||||
def should_return_to_queue(self):
|
||||
def _check_return_to_queue():
|
||||
with self._db_connect() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT return_to_queue FROM Job WHERE id = ?", (self.job_id,))
|
||||
return_to_queue = cursor.fetchone()
|
||||
return False if return_to_queue is None else return_to_queue[0] == 1
|
||||
|
||||
return _check_return_to_queue()
|
||||
|
||||
def maybe_stop(self):
|
||||
if self.should_stop():
|
||||
@@ -122,6 +133,11 @@ class UITrainer(SDTrainer):
|
||||
self._update_status("stopped", "Job stopped"))
|
||||
self.is_stopping = True
|
||||
raise Exception("Job stopped")
|
||||
if self.should_return_to_queue():
|
||||
self._run_async_operation(
|
||||
self._update_status("queued", "Job queued"))
|
||||
self.is_stopping = True
|
||||
raise Exception("Job returning to queue")
|
||||
|
||||
async def _update_key(self, key, value):
|
||||
if not self.accelerator.is_main_process:
|
||||
|
||||
71
ui/cron/actions/processQueue.ts
Normal file
71
ui/cron/actions/processQueue.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
import prisma from '../prisma';
|
||||
|
||||
import { Job, Queue } from '@prisma/client';
|
||||
import startJob from './startJob';
|
||||
|
||||
export default async function processQueue() {
|
||||
const queues: Queue[] = await prisma.queue.findMany({
|
||||
orderBy: {
|
||||
id: 'asc',
|
||||
},
|
||||
});
|
||||
|
||||
for (const queue of queues) {
|
||||
if (!queue.is_running) {
|
||||
// stop any running jobs first
|
||||
const runningJobs: Job[] = await prisma.job.findMany({
|
||||
where: {
|
||||
status: 'running',
|
||||
gpu_ids: queue.gpu_ids,
|
||||
},
|
||||
});
|
||||
|
||||
for (const job of runningJobs) {
|
||||
console.log(`Stopping job ${job.id} on GPU(s) ${job.gpu_ids}`);
|
||||
await prisma.job.update({
|
||||
where: { id: job.id },
|
||||
data: {
|
||||
return_to_queue: true,
|
||||
info: 'Stopping job...',
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
if (queue.is_running) {
|
||||
// first see if one is already running, status of running or stopping
|
||||
const runningJob: Job | null = await prisma.job.findFirst({
|
||||
where: {
|
||||
status: { in: ['running', 'stopping'] },
|
||||
gpu_ids: queue.gpu_ids,
|
||||
},
|
||||
});
|
||||
|
||||
if (runningJob) {
|
||||
// already running, nothing to do
|
||||
continue; // skip to next queue
|
||||
} else {
|
||||
// find the next job in the queue
|
||||
const nextJob: Job | null = await prisma.job.findFirst({
|
||||
where: {
|
||||
status: 'queued',
|
||||
gpu_ids: queue.gpu_ids,
|
||||
},
|
||||
orderBy: {
|
||||
queue_position: 'asc',
|
||||
},
|
||||
});
|
||||
if (nextJob) {
|
||||
console.log(`Starting job ${nextJob.id} on GPU(s) ${nextJob.gpu_ids}`);
|
||||
await startJob(nextJob.id);
|
||||
} else {
|
||||
// no more jobs, stop the queue
|
||||
console.log(`No more jobs in queue for GPU(s) ${queue.gpu_ids}, stopping queue`);
|
||||
await prisma.queue.update({
|
||||
where: { id: queue.id },
|
||||
data: { is_running: false },
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
179
ui/cron/actions/startJob.ts
Normal file
179
ui/cron/actions/startJob.ts
Normal file
@@ -0,0 +1,179 @@
|
||||
import prisma from '../prisma';
|
||||
import { Job } from '@prisma/client';
|
||||
import { spawn } from 'child_process';
|
||||
import path from 'path';
|
||||
import fs from 'fs';
|
||||
import { TOOLKIT_ROOT, getTrainingFolder, getHFToken } from '../paths';
|
||||
const isWindows = process.platform === 'win32';
|
||||
|
||||
const startAndWatchJob = (job: Job) => {
|
||||
// starts and watches the job asynchronously
|
||||
return new Promise<void>(async (resolve, reject) => {
|
||||
const jobID = job.id;
|
||||
|
||||
// setup the training
|
||||
const trainingRoot = await getTrainingFolder();
|
||||
|
||||
const trainingFolder = path.join(trainingRoot, job.name);
|
||||
if (!fs.existsSync(trainingFolder)) {
|
||||
fs.mkdirSync(trainingFolder, { recursive: true });
|
||||
}
|
||||
|
||||
// make the config file
|
||||
const configPath = path.join(trainingFolder, '.job_config.json');
|
||||
|
||||
//log to path
|
||||
const logPath = path.join(trainingFolder, 'log.txt');
|
||||
|
||||
try {
|
||||
// if the log path exists, move it to a folder called logs and rename it {num}_log.txt, looking for the highest num
|
||||
// if the log path does not exist, create it
|
||||
if (fs.existsSync(logPath)) {
|
||||
const logsFolder = path.join(trainingFolder, 'logs');
|
||||
if (!fs.existsSync(logsFolder)) {
|
||||
fs.mkdirSync(logsFolder, { recursive: true });
|
||||
}
|
||||
|
||||
let num = 0;
|
||||
while (fs.existsSync(path.join(logsFolder, `${num}_log.txt`))) {
|
||||
num++;
|
||||
}
|
||||
|
||||
fs.renameSync(logPath, path.join(logsFolder, `${num}_log.txt`));
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error moving log file:', e);
|
||||
}
|
||||
|
||||
// update the config dataset path
|
||||
const jobConfig = JSON.parse(job.job_config);
|
||||
jobConfig.config.process[0].sqlite_db_path = path.join(TOOLKIT_ROOT, 'aitk_db.db');
|
||||
|
||||
// write the config file
|
||||
fs.writeFileSync(configPath, JSON.stringify(jobConfig, null, 2));
|
||||
|
||||
let pythonPath = 'python';
|
||||
// use .venv or venv if it exists
|
||||
if (fs.existsSync(path.join(TOOLKIT_ROOT, '.venv'))) {
|
||||
if (isWindows) {
|
||||
pythonPath = path.join(TOOLKIT_ROOT, '.venv', 'Scripts', 'python.exe');
|
||||
} else {
|
||||
pythonPath = path.join(TOOLKIT_ROOT, '.venv', 'bin', 'python');
|
||||
}
|
||||
} else if (fs.existsSync(path.join(TOOLKIT_ROOT, 'venv'))) {
|
||||
if (isWindows) {
|
||||
pythonPath = path.join(TOOLKIT_ROOT, 'venv', 'Scripts', 'python.exe');
|
||||
} else {
|
||||
pythonPath = path.join(TOOLKIT_ROOT, 'venv', 'bin', 'python');
|
||||
}
|
||||
}
|
||||
|
||||
const runFilePath = path.join(TOOLKIT_ROOT, 'run.py');
|
||||
if (!fs.existsSync(runFilePath)) {
|
||||
console.error(`run.py not found at path: ${runFilePath}`);
|
||||
await prisma.job.update({
|
||||
where: { id: jobID },
|
||||
data: {
|
||||
status: 'error',
|
||||
info: `Error launching job: run.py not found`,
|
||||
},
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const additionalEnv: any = {
|
||||
AITK_JOB_ID: jobID,
|
||||
CUDA_DEVICE_ORDER: 'PCI_BUS_ID',
|
||||
CUDA_VISIBLE_DEVICES: `${job.gpu_ids}`,
|
||||
IS_AI_TOOLKIT_UI: '1',
|
||||
};
|
||||
|
||||
// HF_TOKEN
|
||||
const hfToken = await getHFToken();
|
||||
if (hfToken && hfToken.trim() !== '') {
|
||||
additionalEnv.HF_TOKEN = hfToken;
|
||||
}
|
||||
|
||||
// Add the --log argument to the command
|
||||
const args = [runFilePath, configPath, '--log', logPath];
|
||||
|
||||
try {
|
||||
let subprocess;
|
||||
|
||||
if (isWindows) {
|
||||
// Spawn Python directly on Windows so the process can survive parent exit
|
||||
subprocess = spawn(pythonPath, args, {
|
||||
env: {
|
||||
...process.env,
|
||||
...additionalEnv,
|
||||
},
|
||||
cwd: TOOLKIT_ROOT,
|
||||
detached: true,
|
||||
windowsHide: true,
|
||||
stdio: 'ignore', // don't tie stdio to parent
|
||||
});
|
||||
} else {
|
||||
// For non-Windows platforms, fully detach and ignore stdio so it survives daemon-like
|
||||
subprocess = spawn(pythonPath, args, {
|
||||
detached: true,
|
||||
stdio: 'ignore',
|
||||
env: {
|
||||
...process.env,
|
||||
...additionalEnv,
|
||||
},
|
||||
cwd: TOOLKIT_ROOT,
|
||||
});
|
||||
}
|
||||
|
||||
// Important: let the child run independently of this Node process.
|
||||
if (subprocess.unref) {
|
||||
subprocess.unref();
|
||||
}
|
||||
|
||||
// Optionally write a pid file for future management (stop/inspect) without keeping streams open
|
||||
try {
|
||||
fs.writeFileSync(path.join(trainingFolder, 'pid.txt'), String(subprocess.pid ?? ''), { flag: 'w' });
|
||||
} catch (e) {
|
||||
console.error('Error writing pid file:', e);
|
||||
}
|
||||
|
||||
// (No stdout/stderr listeners — logging should go to --log handled by your Python)
|
||||
// (No monitoring loop — the whole point is to let it live past this worker)
|
||||
} catch (error: any) {
|
||||
// Handle any exceptions during process launch
|
||||
console.error('Error launching process:', error);
|
||||
|
||||
await prisma.job.update({
|
||||
where: { id: jobID },
|
||||
data: {
|
||||
status: 'error',
|
||||
info: `Error launching job: ${error?.message || 'Unknown error'}`,
|
||||
},
|
||||
});
|
||||
return;
|
||||
}
|
||||
// Resolve the promise immediately after starting the process
|
||||
resolve();
|
||||
});
|
||||
};
|
||||
|
||||
export default async function startJob(jobID: string) {
|
||||
const job: Job | null = await prisma.job.findUnique({
|
||||
where: { id: jobID },
|
||||
});
|
||||
if (!job) {
|
||||
console.error(`Job with ID ${jobID} not found`);
|
||||
return;
|
||||
}
|
||||
// update job status to 'running', this will run sync so we don't start multiple jobs.
|
||||
await prisma.job.update({
|
||||
where: { id: jobID },
|
||||
data: {
|
||||
status: 'running',
|
||||
stop: false,
|
||||
info: 'Starting job...',
|
||||
},
|
||||
});
|
||||
// start and watch the job asynchronously so the cron can continue
|
||||
startAndWatchJob(job);
|
||||
}
|
||||
37
ui/cron/paths.ts
Normal file
37
ui/cron/paths.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import path from 'path';
|
||||
import prisma from './prisma';
|
||||
|
||||
export const TOOLKIT_ROOT = path.resolve('@', '..', '..');
|
||||
export const defaultTrainFolder = path.join(TOOLKIT_ROOT, 'output');
|
||||
export const defaultDatasetsFolder = path.join(TOOLKIT_ROOT, 'datasets');
|
||||
export const defaultDataRoot = path.join(TOOLKIT_ROOT, 'data');
|
||||
|
||||
console.log('TOOLKIT_ROOT:', TOOLKIT_ROOT);
|
||||
|
||||
export const getTrainingFolder = async () => {
|
||||
const key = 'TRAINING_FOLDER';
|
||||
let row = await prisma.settings.findFirst({
|
||||
where: {
|
||||
key: key,
|
||||
},
|
||||
});
|
||||
let trainingRoot = defaultTrainFolder;
|
||||
if (row?.value && row.value !== '') {
|
||||
trainingRoot = row.value;
|
||||
}
|
||||
return trainingRoot as string;
|
||||
};
|
||||
|
||||
export const getHFToken = async () => {
|
||||
const key = 'HF_TOKEN';
|
||||
let row = await prisma.settings.findFirst({
|
||||
where: {
|
||||
key: key,
|
||||
},
|
||||
});
|
||||
let token = '';
|
||||
if (row?.value && row.value !== '') {
|
||||
token = row.value;
|
||||
}
|
||||
return token;
|
||||
};
|
||||
4
ui/cron/prisma.ts
Normal file
4
ui/cron/prisma.ts
Normal file
@@ -0,0 +1,4 @@
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
const prisma = new PrismaClient();
|
||||
|
||||
export default prisma;
|
||||
@@ -1,3 +1,4 @@
|
||||
import processQueue from './actions/processQueue';
|
||||
class CronWorker {
|
||||
interval: number;
|
||||
is_running: boolean;
|
||||
@@ -23,7 +24,9 @@ class CronWorker {
|
||||
this.is_running = false;
|
||||
}
|
||||
|
||||
async loop() {}
|
||||
async loop() {
|
||||
await processQueue();
|
||||
}
|
||||
}
|
||||
|
||||
// it automatically starts the loop
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "concurrently -k -n WORKER,UI \"ts-node-dev --respawn --watch cron --transpile-only cron/worker.ts\" \"next dev --turbopack\"",
|
||||
"dev": "concurrently -k -n WORKER,UI \"ts-node-dev --project tsconfig.worker.json --respawn --watch cron --transpile-only cron/worker.ts\" \"next dev --turbopack\"",
|
||||
"build": "tsc -p tsconfig.worker.json && next build",
|
||||
"start": "concurrently --restart-tries -1 --restart-after 1000 -n WORKER,UI \"node dist/worker.js\" \"next start --port 8675\"",
|
||||
"start": "concurrently --restart-tries -1 --restart-after 1000 -n WORKER,UI \"node dist/cron/worker.js\" \"next start --port 8675\"",
|
||||
"build_and_start": "npm install && npm run update_db && npm run build && npm run start",
|
||||
"lint": "next lint",
|
||||
"update_db": "npx prisma generate && npx prisma db push",
|
||||
|
||||
@@ -13,26 +13,29 @@ model Settings {
|
||||
value String
|
||||
}
|
||||
|
||||
model Job {
|
||||
id String @id @default(uuid())
|
||||
name String @unique
|
||||
gpu_ids String
|
||||
job_config String // JSON string
|
||||
created_at DateTime @default(now())
|
||||
updated_at DateTime @updatedAt
|
||||
status String @default("stopped")
|
||||
stop Boolean @default(false)
|
||||
step Int @default(0)
|
||||
info String @default("")
|
||||
speed_string String @default("")
|
||||
model Queue {
|
||||
id Int @id @default(autoincrement())
|
||||
gpu_ids String @unique
|
||||
is_running Boolean @default(false)
|
||||
|
||||
@@index([gpu_ids])
|
||||
}
|
||||
|
||||
model Queue {
|
||||
id String @id @default(uuid())
|
||||
channel String
|
||||
job_id String
|
||||
created_at DateTime @default(now())
|
||||
updated_at DateTime @updatedAt
|
||||
status String @default("waiting")
|
||||
@@index([job_id, channel])
|
||||
}
|
||||
model Job {
|
||||
id String @id @default(uuid())
|
||||
name String @unique
|
||||
gpu_ids String
|
||||
job_config String // JSON string
|
||||
created_at DateTime @default(now())
|
||||
updated_at DateTime @updatedAt
|
||||
status String @default("stopped")
|
||||
stop Boolean @default(false)
|
||||
return_to_queue Boolean @default(false) // same as stop, but will be set to 'queued' when stopped
|
||||
step Int @default(0)
|
||||
info String @default("")
|
||||
speed_string String @default("")
|
||||
queue_position Int @default(0)
|
||||
|
||||
@@index([status])
|
||||
@@index([gpu_ids])
|
||||
}
|
||||
|
||||
@@ -65,7 +65,9 @@ async function getGpuStats(isWindows: boolean) {
|
||||
'nvidia-smi --query-gpu=index,name,driver_version,temperature.gpu,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used,power.draw,power.limit,clocks.current.graphics,clocks.current.memory,fan.speed --format=csv,noheader,nounits';
|
||||
|
||||
// Execute command
|
||||
const { stdout } = await execAsync(command);
|
||||
const { stdout } = await execAsync(command, {
|
||||
env: { ...process.env, CUDA_DEVICE_ORDER: 'PCI_BUS_ID' },
|
||||
});
|
||||
|
||||
// Parse CSV output
|
||||
const gpus = stdout
|
||||
|
||||
@@ -1,12 +1,5 @@
|
||||
import { NextRequest, NextResponse } from 'next/server';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
import { TOOLKIT_ROOT } from '@/paths';
|
||||
import { spawn } from 'child_process';
|
||||
import path from 'path';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import { getTrainingFolder, getHFToken } from '@/server/settings';
|
||||
const isWindows = process.platform === 'win32';
|
||||
|
||||
const prisma = new PrismaClient();
|
||||
|
||||
@@ -21,195 +14,46 @@ export async function GET(request: NextRequest, { params }: { params: { jobID: s
|
||||
return NextResponse.json({ error: 'Job not found' }, { status: 404 });
|
||||
}
|
||||
|
||||
// update job status to 'running'
|
||||
// get highest queue position
|
||||
const highestQueuePosition = await prisma.job.aggregate({
|
||||
_max: {
|
||||
queue_position: true,
|
||||
},
|
||||
});
|
||||
const newQueuePosition = (highestQueuePosition._max.queue_position || 0) + 1000;
|
||||
|
||||
await prisma.job.update({
|
||||
where: { id: jobID },
|
||||
data: {
|
||||
status: 'running',
|
||||
stop: false,
|
||||
info: 'Starting job...',
|
||||
data: { queue_position: newQueuePosition },
|
||||
});
|
||||
|
||||
// make sure the queue is running
|
||||
const queue = await prisma.queue.findFirst({
|
||||
where: {
|
||||
gpu_ids: job.gpu_ids,
|
||||
},
|
||||
});
|
||||
|
||||
// setup the training
|
||||
const trainingRoot = await getTrainingFolder();
|
||||
|
||||
const trainingFolder = path.join(trainingRoot, job.name);
|
||||
if (!fs.existsSync(trainingFolder)) {
|
||||
fs.mkdirSync(trainingFolder, { recursive: true });
|
||||
}
|
||||
|
||||
// make the config file
|
||||
const configPath = path.join(trainingFolder, '.job_config.json');
|
||||
|
||||
//log to path
|
||||
const logPath = path.join(trainingFolder, 'log.txt');
|
||||
|
||||
try {
|
||||
// if the log path exists, move it to a folder called logs and rename it {num}_log.txt, looking for the highest num
|
||||
// if the log path does not exist, create it
|
||||
if (fs.existsSync(logPath)) {
|
||||
const logsFolder = path.join(trainingFolder, 'logs');
|
||||
if (!fs.existsSync(logsFolder)) {
|
||||
fs.mkdirSync(logsFolder, { recursive: true });
|
||||
}
|
||||
|
||||
let num = 0;
|
||||
while (fs.existsSync(path.join(logsFolder, `${num}_log.txt`))) {
|
||||
num++;
|
||||
}
|
||||
|
||||
fs.renameSync(logPath, path.join(logsFolder, `${num}_log.txt`));
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error moving log file:', e);
|
||||
}
|
||||
|
||||
// update the config dataset path
|
||||
const jobConfig = JSON.parse(job.job_config);
|
||||
jobConfig.config.process[0].sqlite_db_path = path.join(TOOLKIT_ROOT, 'aitk_db.db');
|
||||
|
||||
// write the config file
|
||||
fs.writeFileSync(configPath, JSON.stringify(jobConfig, null, 2));
|
||||
|
||||
let pythonPath = 'python';
|
||||
// use .venv or venv if it exists
|
||||
if (fs.existsSync(path.join(TOOLKIT_ROOT, '.venv'))) {
|
||||
if (isWindows) {
|
||||
pythonPath = path.join(TOOLKIT_ROOT, '.venv', 'Scripts', 'python.exe');
|
||||
} else {
|
||||
pythonPath = path.join(TOOLKIT_ROOT, '.venv', 'bin', 'python');
|
||||
}
|
||||
} else if (fs.existsSync(path.join(TOOLKIT_ROOT, 'venv'))) {
|
||||
if (isWindows) {
|
||||
pythonPath = path.join(TOOLKIT_ROOT, 'venv', 'Scripts', 'python.exe');
|
||||
} else {
|
||||
pythonPath = path.join(TOOLKIT_ROOT, 'venv', 'bin', 'python');
|
||||
}
|
||||
}
|
||||
|
||||
const runFilePath = path.join(TOOLKIT_ROOT, 'run.py');
|
||||
if (!fs.existsSync(runFilePath)) {
|
||||
return NextResponse.json({ error: 'run.py not found' }, { status: 500 });
|
||||
}
|
||||
|
||||
const additionalEnv: any = {
|
||||
AITK_JOB_ID: jobID,
|
||||
CUDA_VISIBLE_DEVICES: `${job.gpu_ids}`,
|
||||
IS_AI_TOOLKIT_UI: '1'
|
||||
};
|
||||
|
||||
// HF_TOKEN
|
||||
const hfToken = await getHFToken();
|
||||
if (hfToken && hfToken.trim() !== '') {
|
||||
additionalEnv.HF_TOKEN = hfToken;
|
||||
}
|
||||
|
||||
// Add the --log argument to the command
|
||||
const args = [runFilePath, configPath, '--log', logPath];
|
||||
|
||||
try {
|
||||
let subprocess;
|
||||
|
||||
if (isWindows) {
|
||||
// For Windows, use 'cmd.exe' to open a new command window
|
||||
subprocess = spawn('cmd.exe', ['/c', 'start', 'cmd.exe', '/k', pythonPath, ...args], {
|
||||
env: {
|
||||
...process.env,
|
||||
...additionalEnv,
|
||||
},
|
||||
cwd: TOOLKIT_ROOT,
|
||||
windowsHide: false,
|
||||
});
|
||||
} else {
|
||||
// For non-Windows platforms
|
||||
subprocess = spawn(pythonPath, args, {
|
||||
detached: true,
|
||||
stdio: ['ignore', 'pipe', 'pipe'], // Changed from 'ignore' to capture output
|
||||
env: {
|
||||
...process.env,
|
||||
...additionalEnv,
|
||||
},
|
||||
cwd: TOOLKIT_ROOT,
|
||||
});
|
||||
}
|
||||
|
||||
// Start monitoring in the background without blocking the response
|
||||
const monitorProcess = async () => {
|
||||
const startTime = Date.now();
|
||||
let errorOutput = '';
|
||||
let stdoutput = '';
|
||||
|
||||
if (subprocess.stderr) {
|
||||
subprocess.stderr.on('data', data => {
|
||||
errorOutput += data.toString();
|
||||
});
|
||||
subprocess.stdout.on('data', data => {
|
||||
stdoutput += data.toString();
|
||||
// truncate to only get the last 500 characters
|
||||
if (stdoutput.length > 500) {
|
||||
stdoutput = stdoutput.substring(stdoutput.length - 500);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
subprocess.on('exit', async code => {
|
||||
const currentTime = Date.now();
|
||||
const duration = (currentTime - startTime) / 1000;
|
||||
console.log(`Job ${jobID} exited with code ${code} after ${duration} seconds.`);
|
||||
// wait for 5 seconds to give it time to stop itself. It id still has a status of running in the db, update it to stopped
|
||||
await new Promise(resolve => setTimeout(resolve, 5000));
|
||||
const updatedJob = await prisma.job.findUnique({
|
||||
where: { id: jobID },
|
||||
});
|
||||
if (updatedJob?.status === 'running') {
|
||||
let errorString = errorOutput;
|
||||
if (errorString.trim() === '') {
|
||||
errorString = stdoutput;
|
||||
}
|
||||
await prisma.job.update({
|
||||
where: { id: jobID },
|
||||
data: {
|
||||
status: 'error',
|
||||
info: `Error launching job: ${errorString.substring(0, 500)}`,
|
||||
},
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Wait 30 seconds before releasing the process
|
||||
await new Promise(resolve => setTimeout(resolve, 30000));
|
||||
// Detach the process for non-Windows systems
|
||||
if (!isWindows && subprocess.unref) {
|
||||
subprocess.unref();
|
||||
}
|
||||
};
|
||||
|
||||
// Start the monitoring without awaiting it
|
||||
monitorProcess().catch(err => {
|
||||
console.error(`Error in process monitoring for job ${jobID}:`, err);
|
||||
});
|
||||
|
||||
// Return the response immediately
|
||||
return NextResponse.json(job);
|
||||
} catch (error: any) {
|
||||
// Handle any exceptions during process launch
|
||||
console.error('Error launching process:', error);
|
||||
|
||||
await prisma.job.update({
|
||||
where: { id: jobID },
|
||||
// if queue doesn't exist, create it
|
||||
if (!queue) {
|
||||
await prisma.queue.create({
|
||||
data: {
|
||||
status: 'error',
|
||||
info: `Error launching job: ${error?.message || 'Unknown error'}`,
|
||||
gpu_ids: job.gpu_ids,
|
||||
is_running: false,
|
||||
},
|
||||
});
|
||||
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: 'Failed to launch job process',
|
||||
details: error?.message || 'Unknown error',
|
||||
},
|
||||
{ status: 500 },
|
||||
);
|
||||
}
|
||||
|
||||
await prisma.job.update({
|
||||
where: { id: jobID },
|
||||
data: {
|
||||
status: 'queued',
|
||||
stop: false,
|
||||
return_to_queue: false,
|
||||
info: 'Job queued',
|
||||
},
|
||||
});
|
||||
|
||||
// Return the response immediately
|
||||
return NextResponse.json(job);
|
||||
}
|
||||
|
||||
@@ -42,12 +42,21 @@ export async function POST(request: Request) {
|
||||
});
|
||||
return NextResponse.json(training);
|
||||
} else {
|
||||
// find the highest queue position and add 1000
|
||||
const highestQueuePosition = await prisma.job.aggregate({
|
||||
_max: {
|
||||
queue_position: true,
|
||||
},
|
||||
});
|
||||
const newQueuePosition = (highestQueuePosition._max.queue_position || 0) + 1000;
|
||||
|
||||
// Create new training
|
||||
const training = await prisma.job.create({
|
||||
data: {
|
||||
name,
|
||||
gpu_ids,
|
||||
job_config: JSON.stringify(job_config),
|
||||
queue_position: newQueuePosition,
|
||||
},
|
||||
});
|
||||
return NextResponse.json(training);
|
||||
|
||||
27
ui/src/app/api/queue/[queueID]/start/route.ts
Normal file
27
ui/src/app/api/queue/[queueID]/start/route.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import { NextRequest, NextResponse } from 'next/server';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
|
||||
const prisma = new PrismaClient();
|
||||
|
||||
export async function GET(request: NextRequest, { params }: { params: { queueID: string } }) {
|
||||
const { queueID } = await params;
|
||||
|
||||
const queue = await prisma.queue.findUnique({
|
||||
where: { gpu_ids: queueID },
|
||||
});
|
||||
|
||||
if (!queue) {
|
||||
// create it if it doesn't exist
|
||||
const newQueue = await prisma.queue.create({
|
||||
data: { gpu_ids: queueID, is_running: true },
|
||||
});
|
||||
return NextResponse.json(newQueue);
|
||||
}
|
||||
|
||||
await prisma.queue.update({
|
||||
where: { id: queue.id },
|
||||
data: { is_running: true },
|
||||
});
|
||||
|
||||
return NextResponse.json(queue);
|
||||
}
|
||||
23
ui/src/app/api/queue/[queueID]/stop/route.ts
Normal file
23
ui/src/app/api/queue/[queueID]/stop/route.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
import { NextRequest, NextResponse } from 'next/server';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
|
||||
const prisma = new PrismaClient();
|
||||
|
||||
export async function GET(request: NextRequest, { params }: { params: { queueID: string } }) {
|
||||
const { queueID } = await params;
|
||||
|
||||
const queue = await prisma.queue.findUnique({
|
||||
where: { gpu_ids: queueID },
|
||||
});
|
||||
|
||||
if (!queue) {
|
||||
return NextResponse.json({ error: 'Queue not found' }, { status: 404 });
|
||||
}
|
||||
|
||||
await prisma.queue.update({
|
||||
where: { id: queue.id },
|
||||
data: { is_running: false },
|
||||
});
|
||||
|
||||
return NextResponse.json(queue);
|
||||
}
|
||||
18
ui/src/app/api/queue/route.ts
Normal file
18
ui/src/app/api/queue/route.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import { NextResponse } from 'next/server';
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
|
||||
const prisma = new PrismaClient();
|
||||
|
||||
export async function GET(request: Request) {
|
||||
const { searchParams } = new URL(request.url);
|
||||
|
||||
try {
|
||||
const queues = await prisma.queue.findMany({
|
||||
orderBy: { gpu_ids: 'asc' },
|
||||
});
|
||||
return NextResponse.json({ queues: queues });
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
return NextResponse.json({ error: 'Failed to fetch queue' }, { status: 500 });
|
||||
}
|
||||
}
|
||||
@@ -18,7 +18,7 @@ export default function Dashboard() {
|
||||
<GpuMonitor />
|
||||
<div className="w-full mt-4">
|
||||
<div className="flex justify-between items-center mb-2">
|
||||
<h1 className="text-md">Active Jobs</h1>
|
||||
<h1 className="text-md">Queues</h1>
|
||||
<div className="text-xs text-gray-500">
|
||||
<Link href="/jobs">View All</Link>
|
||||
</div>
|
||||
|
||||
@@ -5,7 +5,7 @@ import { FaChevronLeft } from 'react-icons/fa';
|
||||
import { Button } from '@headlessui/react';
|
||||
import { TopBar, MainContent } from '@/components/layout';
|
||||
import useJob from '@/hooks/useJob';
|
||||
import SampleImages, {SampleImagesMenu} from '@/components/SampleImages';
|
||||
import SampleImages, { SampleImagesMenu } from '@/components/SampleImages';
|
||||
import JobOverview from '@/components/JobOverview';
|
||||
import { redirect } from 'next/navigation';
|
||||
import JobActionBar from '@/components/JobActionBar';
|
||||
@@ -73,6 +73,7 @@ export default function JobPage({ params }: { params: { jobID: string } }) {
|
||||
afterDelete={() => {
|
||||
redirect('/jobs');
|
||||
}}
|
||||
autoStartQueue={true}
|
||||
/>
|
||||
)}
|
||||
</TopBar>
|
||||
@@ -98,15 +99,12 @@ export default function JobPage({ params }: { params: { jobID: string } }) {
|
||||
{page.name}
|
||||
</Button>
|
||||
))}
|
||||
{
|
||||
page?.menuItem && (
|
||||
<>
|
||||
<div className='flex-grow'>
|
||||
</div>
|
||||
<page.menuItem job={job} />
|
||||
</>
|
||||
)
|
||||
}
|
||||
{page?.menuItem && (
|
||||
<>
|
||||
<div className="flex-grow"></div>
|
||||
<page.menuItem job={job} />
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
|
||||
@@ -9,7 +9,7 @@ export default function Dashboard() {
|
||||
<>
|
||||
<TopBar>
|
||||
<div>
|
||||
<h1 className="text-lg">Training Jobs</h1>
|
||||
<h1 className="text-lg">Training Queue</h1>
|
||||
</div>
|
||||
<div className="flex-1"></div>
|
||||
<div>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import React from 'react';
|
||||
import { GpuInfo } from '@/types';
|
||||
import { ChevronRight, Thermometer, Zap, Clock, HardDrive, Fan, Cpu } from 'lucide-react';
|
||||
import { Thermometer, Zap, Clock, HardDrive, Fan, Cpu } from 'lucide-react';
|
||||
|
||||
interface GPUWidgetProps {
|
||||
gpu: GpuInfo;
|
||||
@@ -24,7 +24,7 @@ export default function GPUWidget({ gpu }: GPUWidgetProps) {
|
||||
<div className="bg-gray-800 px-4 py-3 flex items-center justify-between">
|
||||
<div className="flex items-center space-x-2">
|
||||
<h2 className="font-semibold text-gray-100">{gpu.name}</h2>
|
||||
<span className="px-2 py-0.5 bg-gray-700 rounded-full text-xs text-gray-300">#{gpu.index}</span>
|
||||
<span className="px-2 py-0.5 bg-gray-700 rounded-full text-xs text-gray-300"># {gpu.index}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import Link from 'next/link';
|
||||
import { Eye, Trash2, Pen, Play, Pause, Cog } from 'lucide-react';
|
||||
import { Eye, Trash2, Pen, Play, Pause, Cog, X } from 'lucide-react';
|
||||
import { Button } from '@headlessui/react';
|
||||
import { openConfirm } from '@/components/ConfirmModal';
|
||||
import { Job } from '@prisma/client';
|
||||
import { startJob, stopJob, deleteJob, getAvaliableJobActions, markJobAsStopped } from '@/utils/jobs';
|
||||
import { startQueue } from '@/utils/queue';
|
||||
import { Menu, MenuButton, MenuItem, MenuItems } from '@headlessui/react';
|
||||
|
||||
interface JobActionBarProps {
|
||||
@@ -12,10 +13,18 @@ interface JobActionBarProps {
|
||||
afterDelete?: () => void;
|
||||
hideView?: boolean;
|
||||
className?: string;
|
||||
autoStartQueue?: boolean;
|
||||
}
|
||||
|
||||
export default function JobActionBar({ job, onRefresh, afterDelete, className, hideView }: JobActionBarProps) {
|
||||
const { canStart, canStop, canDelete, canEdit } = getAvaliableJobActions(job);
|
||||
export default function JobActionBar({
|
||||
job,
|
||||
onRefresh,
|
||||
afterDelete,
|
||||
className,
|
||||
hideView,
|
||||
autoStartQueue = false,
|
||||
}: JobActionBarProps) {
|
||||
const { canStart, canStop, canDelete, canEdit, canRemoveFromQueue } = getAvaliableJobActions(job);
|
||||
|
||||
if (!afterDelete) afterDelete = onRefresh;
|
||||
|
||||
@@ -26,6 +35,10 @@ export default function JobActionBar({ job, onRefresh, afterDelete, className, h
|
||||
onClick={async () => {
|
||||
if (!canStart) return;
|
||||
await startJob(job.id);
|
||||
// start the queue as well
|
||||
if (autoStartQueue) {
|
||||
await startQueue(job.gpu_ids);
|
||||
}
|
||||
if (onRefresh) onRefresh();
|
||||
}}
|
||||
className={`ml-2 opacity-100`}
|
||||
@@ -33,6 +46,18 @@ export default function JobActionBar({ job, onRefresh, afterDelete, className, h
|
||||
<Play />
|
||||
</Button>
|
||||
)}
|
||||
{canRemoveFromQueue && (
|
||||
<Button
|
||||
onClick={async () => {
|
||||
if (!canRemoveFromQueue) return;
|
||||
await markJobAsStopped(job.id);
|
||||
if (onRefresh) onRefresh();
|
||||
}}
|
||||
className={`ml-2 opacity-100`}
|
||||
>
|
||||
<X />
|
||||
</Button>
|
||||
)}
|
||||
{canStop && (
|
||||
<Button
|
||||
onClick={() => {
|
||||
|
||||
@@ -1,16 +1,30 @@
|
||||
import { useMemo } from 'react';
|
||||
import useJobsList from '@/hooks/useJobsList';
|
||||
import Link from 'next/link';
|
||||
import UniversalTable, { TableColumn } from '@/components/UniversalTable';
|
||||
import { JobConfig } from '@/types';
|
||||
import { GpuInfo, JobConfig } from '@/types';
|
||||
import JobActionBar from './JobActionBar';
|
||||
import { Job, Queue } from '@prisma/client';
|
||||
import useQueueList from '@/hooks/useQueueList';
|
||||
import classNames from 'classnames';
|
||||
import { startQueue, stopQueue } from '@/utils/queue';
|
||||
import { CgSpinner } from 'react-icons/cg';
|
||||
import useGPUInfo from '@/hooks/useGPUInfo';
|
||||
|
||||
interface JobsTableProps {
|
||||
autoStartQueue?: boolean;
|
||||
onlyActive?: boolean;
|
||||
}
|
||||
|
||||
export default function JobsTable({ onlyActive = false }: JobsTableProps) {
|
||||
const { jobs, status, refreshJobs } = useJobsList(onlyActive);
|
||||
const isLoading = status === 'loading';
|
||||
const { jobs, status, refreshJobs } = useJobsList(onlyActive, 5000);
|
||||
const { queues, status: queueStatus, refreshQueues } = useQueueList();
|
||||
const { gpuList, isGPUInfoLoaded } = useGPUInfo();
|
||||
|
||||
const refresh = () => {
|
||||
refreshJobs();
|
||||
refreshQueues();
|
||||
};
|
||||
|
||||
const columns: TableColumn[] = [
|
||||
{
|
||||
@@ -18,6 +32,9 @@ export default function JobsTable({ onlyActive = false }: JobsTableProps) {
|
||||
key: 'name',
|
||||
render: row => (
|
||||
<Link href={`/jobs/${row.id}`} className="font-medium whitespace-nowrap">
|
||||
{['running', 'stopping'].includes(row.status) ? (
|
||||
<CgSpinner className="inline animate-spin mr-2 text-blue-400" />
|
||||
) : null}
|
||||
{row.name}
|
||||
</Link>
|
||||
),
|
||||
@@ -30,11 +47,11 @@ export default function JobsTable({ onlyActive = false }: JobsTableProps) {
|
||||
const totalSteps = jobConfig.config.process[0].train.steps;
|
||||
|
||||
return (
|
||||
<div className="flex items-center">
|
||||
<span>
|
||||
<div>
|
||||
<div className="text-xs text-gray-400">
|
||||
{row.step} / {totalSteps}
|
||||
</span>
|
||||
<div className="w-16 bg-gray-700 rounded-full h-1.5 ml-2">
|
||||
</div>
|
||||
<div className="bg-gray-700 rounded-full h-1.5">
|
||||
<div
|
||||
className="bg-blue-500 h-1.5 rounded-full"
|
||||
style={{ width: `${(row.step / totalSteps) * 100}%` }}
|
||||
@@ -70,10 +87,115 @@ export default function JobsTable({ onlyActive = false }: JobsTableProps) {
|
||||
key: 'actions',
|
||||
className: 'text-right',
|
||||
render: row => {
|
||||
return <JobActionBar job={row} onRefresh={refreshJobs} />;
|
||||
return <JobActionBar job={row} onRefresh={refreshJobs} autoStartQueue={false} />;
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
return <UniversalTable columns={columns} rows={jobs} isLoading={isLoading} onRefresh={refreshJobs} />;
|
||||
const jobsDict = useMemo(() => {
|
||||
if (!isGPUInfoLoaded) return {};
|
||||
if (jobs.length === 0) return {};
|
||||
let jd: { [key: string]: { name: string; jobs: Job[] } } = {};
|
||||
gpuList.forEach(gpu => {
|
||||
jd[`${gpu.index}`] = { name: `${gpu.name}`, jobs: [] };
|
||||
});
|
||||
jd['Idle'] = { name: 'Idle', jobs: [] };
|
||||
jobs.forEach(job => {
|
||||
const gpu = gpuList.find(gpu => job.gpu_ids?.split(',').includes(gpu.index.toString())) as GpuInfo;
|
||||
const key = `${gpu.index}`;
|
||||
if (['queued', 'running', 'stopping'].includes(job.status) && key in jd) {
|
||||
jd[key].jobs.push(job);
|
||||
} else {
|
||||
jd['Idle'].jobs.push(job);
|
||||
}
|
||||
});
|
||||
// sort the queued/running jobs by queue position
|
||||
Object.keys(jd).forEach(key => {
|
||||
if (key === 'Idle') return;
|
||||
jd[key].jobs.sort((a, b) => {
|
||||
if (a.queue_position === null) return 1;
|
||||
if (b.queue_position === null) return -1;
|
||||
return a.queue_position - b.queue_position;
|
||||
});
|
||||
});
|
||||
return jd;
|
||||
}, [jobs, queues, isGPUInfoLoaded]);
|
||||
|
||||
let isLoading = status === 'loading' || queueStatus === 'loading' || !isGPUInfoLoaded;
|
||||
|
||||
// if job dict is populated, we are always loaded
|
||||
if (Object.keys(jobsDict).length > 0) isLoading = false;
|
||||
|
||||
return (
|
||||
<div>
|
||||
{Object.keys(jobsDict)
|
||||
.sort()
|
||||
.filter(key => key !== 'Idle')
|
||||
.map(gpuKey => {
|
||||
const queue = queues.find(q => `${q.gpu_ids}` === gpuKey) as Queue;
|
||||
return (
|
||||
<div key={gpuKey} className="mb-6">
|
||||
<div
|
||||
className={classNames(
|
||||
'text-md flex px-4 py-1 rounded-t-lg',
|
||||
{ 'bg-green-900': queue?.is_running },
|
||||
{ 'bg-red-900': !queue?.is_running },
|
||||
)}
|
||||
>
|
||||
<div className="flex items-center space-x-2 flex-1 py-2">
|
||||
<h2 className="font-semibold text-gray-100">{jobsDict[gpuKey].name}</h2>
|
||||
<span className="px-2 py-0.5 bg-gray-700 rounded-full text-xs text-gray-300"># {queue?.gpu_ids}</span>
|
||||
</div>
|
||||
<div className="text-sm text-gray-300 italic flex items-center">
|
||||
{queue?.is_running ? (
|
||||
<>
|
||||
<span className="text-green-400 mr-2">Queue Running</span>
|
||||
<button
|
||||
onClick={async () => {
|
||||
await stopQueue(queue.gpu_ids as string);
|
||||
refresh();
|
||||
}}
|
||||
className="ml-4 text-xs bg-red-900 hover:bg-red-800 px-2 py-1 rounded"
|
||||
>
|
||||
STOP
|
||||
</button>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<span className="text-red-400 mr-2">Queue Stopped</span>
|
||||
<button
|
||||
onClick={async () => {
|
||||
await startQueue(queue.gpu_ids as string);
|
||||
refresh();
|
||||
}}
|
||||
className="ml-4 text-xs bg-green-700 hover:bg-green-600 px-2 py-1 rounded"
|
||||
>
|
||||
START
|
||||
</button>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<UniversalTable
|
||||
columns={columns}
|
||||
rows={jobsDict[gpuKey].jobs}
|
||||
isLoading={isLoading}
|
||||
onRefresh={refresh}
|
||||
theadClassName={queue?.is_running ? 'bg-green-950' : 'bg-red-950'}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
{!onlyActive && Object.keys(jobsDict).includes('Idle') && (
|
||||
<div className="mb-6 opacity-50">
|
||||
<div className="text-md flex px-4 py-1 rounded-t-lg bg-slate-600">
|
||||
<div className="flex items-center space-x-2 flex-1 py-2">
|
||||
<h2 className="font-semibold text-gray-100">Idle</h2>
|
||||
</div>
|
||||
</div>
|
||||
<UniversalTable columns={columns} rows={jobsDict['Idle'].jobs} isLoading={isLoading} onRefresh={refresh} />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
import Link from 'next/link';
|
||||
import { Home, Settings, BrainCircuit, Images, Plus} from 'lucide-react';
|
||||
import { FaXTwitter, FaDiscord, FaYoutube } from "react-icons/fa6";
|
||||
import { Home, Settings, BrainCircuit, Images, Plus } from 'lucide-react';
|
||||
import { FaXTwitter, FaDiscord, FaYoutube } from 'react-icons/fa6';
|
||||
|
||||
const Sidebar = () => {
|
||||
const navigation = [
|
||||
{ name: 'Dashboard', href: '/dashboard', icon: Home },
|
||||
{ name: 'New Job', href: '/jobs/new', icon: Plus },
|
||||
{ name: 'Training Jobs', href: '/jobs', icon: BrainCircuit },
|
||||
{ name: 'Training Queue', href: '/jobs', icon: BrainCircuit },
|
||||
{ name: 'Datasets', href: '/datasets', icon: Images },
|
||||
{ name: 'Settings', href: '/settings', icon: Settings },
|
||||
];
|
||||
|
||||
const socialsBoxClass = 'flex flex-col items-center justify-center p-1 hover:bg-gray-800 rounded-lg transition-colors';
|
||||
const socialsBoxClass =
|
||||
'flex flex-col items-center justify-center p-1 hover:bg-gray-800 rounded-lg transition-colors';
|
||||
const socialIconClass = 'w-5 h-5 text-gray-400 hover:text-white';
|
||||
|
||||
return (
|
||||
@@ -60,30 +61,15 @@ const Sidebar = () => {
|
||||
{/* Social links grid */}
|
||||
<div className="px-1 py-1 border-t border-gray-800">
|
||||
<div className="grid grid-cols-3 gap-4">
|
||||
<a
|
||||
href="https://discord.gg/VXmU2f5WEU"
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
className={socialsBoxClass}
|
||||
>
|
||||
<a href="https://discord.gg/VXmU2f5WEU" target="_blank" rel="noreferrer" className={socialsBoxClass}>
|
||||
<FaDiscord className={socialIconClass} />
|
||||
{/* <span className="text-xs text-gray-500 mt-1">Discord</span> */}
|
||||
</a>
|
||||
<a
|
||||
href="https://www.youtube.com/@ostrisai"
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
className={socialsBoxClass}
|
||||
>
|
||||
<a href="https://www.youtube.com/@ostrisai" target="_blank" rel="noreferrer" className={socialsBoxClass}>
|
||||
<FaYoutube className={socialIconClass} />
|
||||
{/* <span className="text-xs text-gray-500 mt-1">YouTube</span> */}
|
||||
</a>
|
||||
<a
|
||||
href="https://x.com/ostrisai"
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
className={socialsBoxClass}
|
||||
>
|
||||
<a href="https://x.com/ostrisai" target="_blank" rel="noreferrer" className={socialsBoxClass}>
|
||||
<FaXTwitter className={socialIconClass} />
|
||||
{/* <span className="text-xs text-gray-500 mt-1">X</span> */}
|
||||
</a>
|
||||
|
||||
@@ -16,10 +16,17 @@ interface TableProps {
|
||||
columns: TableColumn[];
|
||||
rows: TableRow[];
|
||||
isLoading: boolean;
|
||||
theadClassName?: string;
|
||||
onRefresh: () => void;
|
||||
}
|
||||
|
||||
export default function UniversalTable({ columns, rows, isLoading, onRefresh = () => {} }: TableProps) {
|
||||
export default function UniversalTable({
|
||||
columns,
|
||||
rows,
|
||||
isLoading,
|
||||
theadClassName = 'text-gray-400',
|
||||
onRefresh = () => {},
|
||||
}: TableProps) {
|
||||
return (
|
||||
<div className="w-full bg-gray-900 rounded-md shadow-md">
|
||||
{isLoading ? (
|
||||
@@ -39,7 +46,7 @@ export default function UniversalTable({ columns, rows, isLoading, onRefresh = (
|
||||
) : (
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full text-sm text-left text-gray-300">
|
||||
<thead className="text-xs uppercase bg-gray-800 text-gray-400">
|
||||
<thead className={classNames('text-xs uppercase bg-gray-800', theadClassName)}>
|
||||
<tr>
|
||||
{columns.map(column => (
|
||||
<th key={column.key} className="px-3 py-2">
|
||||
|
||||
@@ -4,7 +4,7 @@ import { useEffect, useState } from 'react';
|
||||
import { Job } from '@prisma/client';
|
||||
import { apiClient } from '@/utils/api';
|
||||
|
||||
export default function useJobsList(onlyActive = false) {
|
||||
export default function useJobsList(onlyActive = false, reloadInterval: null | number = null) {
|
||||
const [jobs, setJobs] = useState<Job[]>([]);
|
||||
const [status, setStatus] = useState<'idle' | 'loading' | 'success' | 'error'>('idle');
|
||||
|
||||
@@ -20,19 +20,26 @@ export default function useJobsList(onlyActive = false) {
|
||||
setStatus('error');
|
||||
} else {
|
||||
if (onlyActive) {
|
||||
data.jobs = data.jobs.filter((job: Job) => job.status === 'running');
|
||||
data.jobs = data.jobs.filter((job: Job) => ['running', 'queued', 'stopping'].includes(job.status));
|
||||
}
|
||||
setJobs(data.jobs);
|
||||
setStatus('success');
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error fetching datasets:', error);
|
||||
console.error('Error fetching jobs:', error);
|
||||
setStatus('error');
|
||||
});
|
||||
};
|
||||
useEffect(() => {
|
||||
refreshJobs();
|
||||
|
||||
if (reloadInterval) {
|
||||
const interval = setInterval(() => {
|
||||
refreshJobs();
|
||||
}, reloadInterval);
|
||||
return () => clearInterval(interval);
|
||||
}
|
||||
}, []);
|
||||
|
||||
return { jobs, setJobs, status, refreshJobs };
|
||||
|
||||
36
ui/src/hooks/useQueueList.tsx
Normal file
36
ui/src/hooks/useQueueList.tsx
Normal file
@@ -0,0 +1,36 @@
|
||||
'use client';
|
||||
|
||||
import { useEffect, useState } from 'react';
|
||||
import { Queue } from '@prisma/client';
|
||||
import { apiClient } from '@/utils/api';
|
||||
|
||||
export default function useQueueList() {
|
||||
const [queues, setQueues] = useState<Queue[]>([]);
|
||||
const [status, setStatus] = useState<'idle' | 'loading' | 'success' | 'error'>('idle');
|
||||
|
||||
const refreshQueues = () => {
|
||||
setStatus('loading');
|
||||
apiClient
|
||||
.get('/api/queue')
|
||||
.then(res => res.data)
|
||||
.then(data => {
|
||||
console.log('Queues:', data);
|
||||
if (data.error) {
|
||||
console.log('Error fetching queues:', data.error);
|
||||
setStatus('error');
|
||||
} else {
|
||||
setQueues(data.queues);
|
||||
setStatus('success');
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error fetching queues:', error);
|
||||
setStatus('error');
|
||||
});
|
||||
};
|
||||
useEffect(() => {
|
||||
refreshQueues();
|
||||
}, []);
|
||||
|
||||
return { queues, setQueues, status, refreshQueues };
|
||||
}
|
||||
@@ -246,3 +246,5 @@ export interface GroupedSelectOption {
|
||||
readonly label: string;
|
||||
readonly options: SelectOption[];
|
||||
}
|
||||
|
||||
export type JobStatus = 'queued' | 'running' | 'stopping' | 'stopped' | 'completed' | 'error';
|
||||
|
||||
@@ -73,15 +73,16 @@ export const getJobConfig = (job: Job) => {
|
||||
export const getAvaliableJobActions = (job: Job) => {
|
||||
const jobConfig = getJobConfig(job);
|
||||
const isStopping = job.stop && job.status === 'running';
|
||||
const canDelete = ['completed', 'stopped', 'error'].includes(job.status) && !isStopping;
|
||||
const canEdit = ['completed', 'stopped', 'error'].includes(job.status) && !isStopping;
|
||||
const canDelete = ['queued', 'completed', 'stopped', 'error'].includes(job.status) && !isStopping;
|
||||
const canEdit = ['queued','completed', 'stopped', 'error'].includes(job.status) && !isStopping;
|
||||
const canRemoveFromQueue = job.status === 'queued';
|
||||
const canStop = job.status === 'running' && !isStopping;
|
||||
let canStart = ['stopped', 'error'].includes(job.status) && !isStopping;
|
||||
// can resume if more steps were added
|
||||
if (job.status === 'completed' && jobConfig.config.process[0].train.steps > job.step && !isStopping) {
|
||||
canStart = true;
|
||||
}
|
||||
return { canDelete, canEdit, canStop, canStart };
|
||||
return { canDelete, canEdit, canStop, canStart, canRemoveFromQueue };
|
||||
};
|
||||
|
||||
export const getNumberOfSamples = (job: Job) => {
|
||||
|
||||
32
ui/src/utils/queue.ts
Normal file
32
ui/src/utils/queue.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import { apiClient } from '@/utils/api';
|
||||
|
||||
export const startQueue = (queueID: string) => {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
apiClient
|
||||
.get(`/api/queue/${queueID}/start`)
|
||||
.then(res => res.data)
|
||||
.then(data => {
|
||||
console.log('Queue started:', data);
|
||||
resolve();
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error starting queue:', error);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
};
|
||||
export const stopQueue = (queueID: string) => {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
apiClient
|
||||
.get(`/api/queue/${queueID}/stop`)
|
||||
.then(res => res.data)
|
||||
.then(data => {
|
||||
console.log('Queue stopped:', data);
|
||||
resolve();
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error stopping queue:', error);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
};
|
||||
@@ -3,11 +3,18 @@
|
||||
"compilerOptions": {
|
||||
"module": "commonjs",
|
||||
"target": "es2020",
|
||||
"outDir": "dist",
|
||||
"outDir": "dist/cron",
|
||||
"moduleResolution": "node",
|
||||
"types": [
|
||||
"node"
|
||||
]
|
||||
],
|
||||
"esModuleInterop": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"paths": {
|
||||
"@/*": [
|
||||
"./cron/*"
|
||||
]
|
||||
}
|
||||
},
|
||||
"include": [
|
||||
"cron/**/*.ts"
|
||||
|
||||
@@ -1 +1 @@
|
||||
VERSION = "0.6.5"
|
||||
VERSION = "0.7.0"
|
||||
Reference in New Issue
Block a user