Skip to content

Commit

Permalink
nsfs fixes
Browse files Browse the repository at this point in the history
adding special handling in case of getting too many forks exits in a given time frame
adding read_bucket_sdk_config_info to remove errors when running nsfs in simple mode

Signed-off-by: jackyalbo <[email protected]>
  • Loading branch information
jackyalbo committed Feb 14, 2024
1 parent f665bd6 commit c9f50f3
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 2 deletions.
3 changes: 3 additions & 0 deletions config.js
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,9 @@ config.NSFS_RENAME_RETRIES = 3;
config.NSFS_VERSIONING_ENABLED = true;
config.NSFS_UPDATE_ISSUES_REPORT_ENABLED = true;

config.NSFS_EXIT_EVENTS_TIME_FRAME_MIN = 24 * 60; // per day
config.NSFS_MAX_EXIT_EVENTS_PER_TIME_FRAME = 10; // allow max 10 failed forks per day

////////////////////////////
// NSFS NON CONTAINERIZED //
////////////////////////////
Expand Down
1 change: 1 addition & 0 deletions src/cmd/nsfs.js
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ class NsfsObjectSDK extends ObjectSDK {
this._get_bucket_namespace = bucket_name => this._simple_get_single_bucket_namespace(bucket_name);
this.load_requesting_account = auth_req => this._simple_load_requesting_account(auth_req);
this.read_bucket_sdk_policy_info = bucket_name => this._simple_read_bucket_sdk_policy_info(bucket_name);
this.read_bucket_sdk_config_info = () => undefined;
this.read_bucket_usage_info = () => undefined;
this.read_bucket_sdk_website_info = () => undefined;
this.read_bucket_sdk_namespace_info = () => undefined;
Expand Down
22 changes: 20 additions & 2 deletions src/util/fork_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const cluster = /** @type {import('node:cluster').Cluster} */ (
const dbg = require('../util/debug_module')(__filename);
const prom_reporting = require('../server/analytic_services/prometheus_reporting');
const NoobaaEvent = require('../manage_nsfs/manage_nsfs_events_utils').NoobaaEvent;
const config = require('../../config');


const io_stats = {
Expand All @@ -29,6 +30,7 @@ const op_stats = {};
* @returns {boolean} true if workers were started.
*/
function start_workers(metrics_port, count = 0) {
const exit_events = [];
if (cluster.isPrimary && count > 0) {
for (let i = 0; i < count; ++i) {
const worker = cluster.fork();
Expand All @@ -38,11 +40,27 @@ function start_workers(metrics_port, count = 0) {
// We don't want to leave the process with a partial set of workers,
// so if any worker exits, we will print an error message in the logs and start a new one.
cluster.on('exit', (worker, code, signal) => {
console.warn('WORKER exit', { id: worker.id, pid: worker.process.pid, code, signal }, 'starting a new one.');
console.warn('WORKER exit', { id: worker.id, pid: worker.process.pid, code, signal });
new NoobaaEvent(NoobaaEvent.FORK_EXIT).create_event(undefined, { id: worker.id, pid: worker.process.pid,
code: code, signal: signal}, undefined);
// This code part will check if we god too many exit events on forks being killed
// if we get more than NSFS_MAX_EXIT_EVENTS_PER_TIME_FRAME in a time frame of NSFS_MAX_EXIT_EVENTS_PER_TIME_FRAME
// we will kill the main process and stop creating new forks.
const now = Date.now();
while (exit_events.length && now - exit_events[0] > config.NSFS_EXIT_EVENTS_TIME_FRAME_MIN * 60 * 1000) {
exit_events.shift();
}
exit_events.push(now);
if (exit_events.length > config.NSFS_MAX_EXIT_EVENTS_PER_TIME_FRAME) {
const error = `too many forks exited: ${exit_events.length} in a given time frame: ${config.NSFS_EXIT_EVENTS_TIME_FRAME_MIN} minutes`;
console.error('EXIT ON WORKER ERROR - ', error);
new NoobaaEvent(NoobaaEvent.ENDPOINT_CRASHED).create_event(undefined, undefined, error);
process.exit(1);
}
console.warn(`${exit_events.length} exit events in the last ${config.NSFS_EXIT_EVENTS_TIME_FRAME_MIN} minutes,` +
` max allowed are: ${config.NSFS_MAX_EXIT_EVENTS_PER_TIME_FRAME}`);
const new_worker = cluster.fork();
console.warn('WORKER started', { id: new_worker.id, pid: new_worker.process.pid });
console.warn('WORKER re-started', { id: new_worker.id, pid: new_worker.process.pid });
});
for (const id in cluster.workers) {
if (id) {
Expand Down

0 comments on commit c9f50f3

Please sign in to comment.