Job Scheduling Guide on Elastic Beanstalk (pm2 included)

whoami · January 15, 2024, 3:13pm

Hi everyone,

Today I managed to get the Cloud Jobs working with my parse-server instance hosted on AWS Elastic Beanstalk. It took me a while so I wanted to share the code in case it comes handy for anyone.

I have my parse-server hosted on ElasticBeanstalk with a Load Balancer to scale the server. I also use pm2 to execute one instance per thread.

Dependencies:

npm install pm2 --save
npm install node-schedule --save
npm install axios --save

Then, in package.json set the following scripts:

"scripts": {
    "start": "node ./node_modules/.bin/pm2 delete all ; node ./node_modules/.bin/pm2 start index.js --name YOURAPPNAME -i max --update-env --watch --merge-logs --ignore-watch='.git .ebextensions .elasticbeanstalk .github .platform aws node_modules logs modules postman'",
    "poststart": "node ./node_modules/.bin/pm2 logs",
    "start_local": "node index.js"
  },

Feel free to change the arguments as you wish.

Create a file called AWS.js and place the following content:

import AWS from "aws-sdk";
import axios from "axios";

/** GET Request */
export async function getRequest(url, headers) {
	return await axios.get(url, {headers: headers});
}

/** PUT Request */
export async function putRequest(url, data, headers) {
	return await axios.put(url, data, {headers: headers});
}

export function isRunningOnAWS() {
	return process.env.IS_RUNNING_ON_AWS === "true";
}

async function fetchMetadataToken() {
	try {
		const response = await putRequest("http://169.254.169.254/latest/api/token", null, {
			"X-aws-ec2-metadata-token-ttl-seconds": "21600",
		});
		return response.data;
	} catch (error) {
		console.error("Error fetching metadata token:", error.response && error.response.status && error.response.statusText ? `${error.response.status} ${error.response.statusText}` : error);
		return null;
	}
}

async function getInstanceId() {
	if (!isRunningOnAWS()) {
		return null;
	}

	const token = await fetchMetadataToken();
	if (!token) {
		return null;
	}

	try {
		const response = await getRequest("http://169.254.169.254/latest/meta-data/instance-id", {
			"X-aws-ec2-metadata-token": token,
		});
		return response.data;
	} catch (error) {
		console.error("Error fetching instance ID:", error.response && error.response.status && error.response.statusText ? `${error.response.status} ${error.response.statusText}` : error);
		return null;
	}
}

// Function to get Instance Tags
async function getInstanceTags(instanceId) {
	const ec2 = new AWS.EC2();
	const params = {
		Filters: [
			{
				Name: "resource-id",
				Values: [instanceId],
			},
		],
	};

	try {
		const result = await ec2.describeTags(params).promise();
		return result.Tags;
	} catch (error) {
		console.error("Error fetching instance tags:", error.response && error.response.status && error.response.statusText ? `${error.response.status} ${error.response.statusText}` : error);
		return [];
	}
}

// Function to get the Elastic Beanstalk environment name
async function getELBEnvName() {
	const instanceId = await getInstanceId();
	if (instanceId) {
		const tags = await getInstanceTags(instanceId);

		// should return the value of tag:elasticbeanstalk:environment-name
		const envName = tags.find((tag) => tag.Key === "elasticbeanstalk:environment-name");
		return envName.Value;
	} else {
		return null;
	}
}

// Leader Election Logic
export async function checkIfLeader() {
	if (!isRunningOnAWS()) {
		return true;
	}

	const instanceId = await getInstanceId();
	const environmentName = await getELBEnvName();
	if (instanceId && environmentName) {
		const leaderExists = await isAnyInstanceLeader(instanceId, environmentName);

		// If there is no leader, assign leadership to the current instance
		if (!leaderExists) {
			await assignLeadership(instanceId);
		}

		// Check if the current instance is the leader
		const isLeader = await isCurrentInstanceLeader(instanceId, environmentName);

		return isLeader;
	} else {
		console.error("Failed to retrieve instance ID or environment name.");
	}
}

// Check if any instance in the Elastic Beanstalk environment has the "is_leader" tag
async function isAnyInstanceLeader(currentInstanceId, environmentName) {
	const ec2 = new AWS.EC2(); // Create an EC2 client

	try {
		// Use AWS SDK to describe instances in the Elastic Beanstalk environment with the "is_leader" tag
		const result = await ec2
			.describeInstances({
				Filters: [
					{
						Name: "tag:elasticbeanstalk:environment-name",
						Values: [environmentName],
					},
					{
						Name: "tag:is_leader",
						Values: ["true"],
					},
				],
			})
			.promise();

		// Check if any instance other than the current one has the tag
		const instances = result.Reservations.flatMap((reservation) => reservation.Instances);
		const otherLeaderInstances = instances.filter((instance) => instance.InstanceId !== currentInstanceId);

		return otherLeaderInstances.length > 0;
	} catch (error) {
		console.error("Error checking leader status:", error.response && error.response.status && error.response.statusText ? `${error.response.status} ${error.response.statusText}` : error);
		return false;
	}
}

// Assign leadership to the current instance
async function assignLeadership(instanceId) {
	const ec2 = new AWS.EC2(); // Create an EC2 client

	try {
		// Use AWS SDK to add the "is_leader" tag to the current instance
		await ec2
			.createTags({
				Resources: [instanceId],
				Tags: [
					{
						Key: "is_leader",
						Value: "true",
					},
				],
			})
			.promise();
	} catch (error) {
		console.error("Error assigning leadership:", error.response && error.response.status && error.response.statusText ? `${error.response.status} ${error.response.statusText}` : error);
	}
}

// Check if the current instance has the "is_leader" tag
async function isCurrentInstanceLeader(currentInstanceId, environmentName) {
	const ec2 = new AWS.EC2(); // Create an EC2 client

	try {
		// Use AWS SDK to describe instances in the Elastic Beanstalk environment with the "is_leader" tag
		const result = await ec2
			.describeInstances({
				Filters: [
					{
						Name: "tag:elasticbeanstalk:environment-name",
						Values: [environmentName],
					},
					{
						Name: "tag:is_leader",
						Values: ["true"],
					},
				],
			})
			.promise();

		// Check if the current instance has the tag
		const instances = result.Reservations.flatMap((reservation) => reservation.Instances);
		const currentInstance = instances.find((instance) => instance.InstanceId === currentInstanceId);

		return currentInstance !== undefined;
	} catch (error) {
		console.error("Error checking leader status:", error.response && error.response.status && error.response.statusText ? `${error.response.status} ${error.response.statusText}` : error);
		return false;
	}
}

Now, create another file called JobScheduler.js and place the following:

import {scheduleJob} from "node-schedule";
import axios from "axios";

/** POST Request */
export async function postRequest(url, data, headers) {
    return await axios.post(url, data, {headers: headers});
}

/**
 * Schedule a cloud job
 * @param {String} jobName - Name of the job to schedule
 * @param {String} cronConfig - Cron config to schedule the job
 *
 * @example scheduleCloudJob("sampleJob", "0 0 * * *"); // Every day at midnight
 */
export function scheduleCloudJob(jobName, cronConfig) {
    try {
		scheduleJob(cronConfig, async function () {
			const url = process.env.SERVER_URL + "/jobs/" + jobName;
			const headers = {
				"X-Parse-Application-Id": process.env.APP_ID,
				"X-Parse-Master-Key": process.env.MASTER_KEY,
			};

			// Trigger the job using the postRequest function
			postRequest(url, {}, headers)
				.then((response) => {
					console.log("Successfully triggered job " + jobName + " with cron config " + cronConfig);
				})
				.catch((error) => {
					console.error("Error triggering job " + jobName + ":", error);
				});
		});
		console.log("Successfully scheduled job " + jobName + " with cron config " + cronConfig);
	} catch (error) {
		console.error("Error scheduling job " + jobName + ":", error);
	}
}

Now, you can add the following at the end of your index.js file:

import {checkIfLeader, isRunningOnAWS} from "./AWS.js";
import AWS from "aws-sdk";
import {scheduleCloudJob} from "./JobsScheduler.js";

// Configure AWS SDK
AWS.config.update({
	region: "YOUR-AWS-REGION-HERE",
});

 // Cloud Jobs Scheduler (only runs on the first instance (pm2) and on AWS ELB leader)
if (process.env.NODE_APP_INSTANCE === "0" && isRunningOnAWS() && checkIfLeader()) {
    console.log("Starting Cloud Jobs Scheduler...");
    
    // Schedule 'sampleJob' job to run at 4 AM every Sunday
    scheduleCloudJob("sampleJob", "0 0 4 * * 0");
    // TODO: ADD MORE JOBS TO BE SCHEDULED
} else {
    console.log("Cloud Jobs Scheduler doesn't run on this instance.");
}

This will cause to only execute the jobs on the leader instance of Elastic Beanstalk, and inside your leader instance, if more than once instances are running via pm2, it will take the number 0.

If you don’t use pm2 you can just remove this check process.env.NODE_APP_INSTANCE === "0"

To detect if you are running on aws, I have created a github action pipeline that add the env variable IS_RUNNING_ON_AWS to true, in local I set it to false, but you can set it from the ELB dashboard too.

Also, make sure to have your app id in the env var APP_ID, your server url on SERVER_URL & your masterkey on MASTER_KEY, if different, change the code references.

And this is everything, now you should have you Cloud Jobs scheduled, if you have any questions, let me know!