Pupeteer Cluster - PDF Responses getting Exchanged
jestinosam opened this issue · 1 comments
I am running a Puppeteer Cluster with maxConcurrency as 10. I am using Puppeteer here to receive HTML content via a POST endpoint exposed from this program. Also, the HTML content received via POST endpoint is 2-3 pages max. However while doing some performance tests, I am observing that the generated PDFs are getting exchanged when the response is sent from the endpoint i.e. Assume 3 requests are triggered parallelly to the endpoint. So the PDF of the 3rd request at times goes as the response of the 2nd request & vice versa. Can someone explain or point out how the PDF responses are getting exchanged for the incoming requests with the below implementation. Below is the code for the same:
Puppeteer Cluster Configuration
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_PAGE,
maxConcurrency: 10,
puppeteerOptions: {
headless: true,
args: [
"--no-sandbox",
"--disabled-setupid-sandbox",
"--font-render-hinting=none",
"--force-color-profile=srgb",
],
},
});
POST Controller
app.post("/pdfengine/v1/api/generatePdf", async (request, response) => {
logger.info("Document Name is %s :: Correlation Id is :: %s", request.body.documentName, request.body.correlationId);
logger.info("Invoking the Generate PDF method!");
try {
const generatedPdf = await cluster.execute(
generatePdf(
request.body.documentName,
request.body.correlationId,
request.body.htmlContent
)
);
logger.info(
"PDF generated successfully for Document Name :: %s and CorrelationId :: %s",
request.body.documentName,
request.body.correlationId
);
response.contentType("application/pdf");
response.status(200).send(generatedPdf);
} catch (err) {
logger.error(err);
response.status(500).send("Request Failed!");
next(err);
}
});
Generate PDF Method
async function generatePdf(documentName, correlationId, htmlContent) {
await cluster.task(async ({ page }) => {
let hrstart = process.hrtime();
let startTime;
try {
await page.addScriptTag({
path: "jquery-3.6.0.min.js",
});
startTime = process.hrtime();
await page.setContent(htmlContent);
calculateExecutionTime(startTime, "HTML Content was set in :: %d ms for DocumentName :: %s and CorrelationId :: %s");
await page.emulateMediaType("print");
await page.evaluateHandle("document.fonts.ready");
startTime = process.hrtime();
const pdf = await page.pdf({
format: "Letter",
printBackground: true,
preferCSSPageSize: true,
});
calculateExecutionTime(
startTime,
"PDF generated by Puppeteer in :: %d ms for DocumentName :: %s and CorrelationId :: %s",
documentName,
correlationId
);
calculateExecutionTime(
hrstart,
"Total Time taken to generate PDF is :: %d ms for DocumentName :: %s and CorrelationId :: %s",
documentName,
correlationId
);
return pdf;
} catch (err) {
logger.error("Error in PDF Generation");
logger.error(err);
}
});
}
Full Code
(async () => {
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_PAGE,
maxConcurrency: 10,
puppeteerOptions: {
headless: true,
args: [
"--no-sandbox",
"--disabled-setupid-sandbox",
"--font-render-hinting=none",
"--force-color-profile=srgb",
],
},
});
/**
* Register additional utilities with Express
*/
app.use(
rTracer.expressMiddleware(),
express.json({ limit: "50mb" }),
express.static(process.env.ASSETS_ENV_PATH)
);
/**
* Node JS Request Interceptor
*/
app.all("/pdfengine/v1/api/generatePdf", function (request, response, next) {
requestMetadata = metadata(request);
traceId = request.get("traceId");
parentSpanId = request.get("parentSpanId");
next();
});
/**
* Custom Logger Format
*/
const myFormat = printf(({ level, message, timestamp }) => {
var spanId = rTracer.id();
var logMetadata = Object.assign({}, requestMetadata);
let msg = `{"level": "${level}", "timestamp": "${timestamp}", "parentSpanId": "${parentSpanId}", "spanId": "${spanId}", "traceId": "${spanId}", "message": "${message}", "metadata": ${JSON.stringify(
logMetadata
)}}`;
if (!spanId) {
msg = `{"level": "${level}", "timestamp": "${timestamp}", "message": "${message}"}`;
}
return msg;
});
/**
* Set Metadata for Logging
* @param {*} request
* @returns {JSON} Object
*/
const metadata = function setMetadata(request) {
return {
documentName: request.body.documentName,
correlationId: request.body.correlationId,
};
};
/**
* Create a Custom Logger
*/
const logger = winston.createLogger({
format: combine(timestamp(), splat(), myFormat), //format.json() myFormat
transports: [new winston.transports.Console()],
});
/**
* Post Endpoint
*/
app.post("/pdfengine/v1/api/generatePdf", async (request, response) => {
logger.info("Document Name is %s :: Correlation Id is :: %s", request.body.documentName, request.body.correlationId);
logger.info("Invoking the Generate PDF method!");
try {
const generatedPdf = await cluster.execute(
generatePdf(
request.body.documentName,
request.body.correlationId,
request.body.htmlContent
)
);
logger.info(
"PDF generated successfully for Document Name :: %s and CorrelationId :: %s",
request.body.documentName,
request.body.correlationId
);
response.contentType("application/pdf");
response.status(200).send(generatedPdf);
} catch (err) {
logger.error(err);
response.status(500).send("Request Failed!");
next(err);
}
});
/**
* Create a new browser instance and launch the same.
* @returns browser
*/
async function generatePdf(documentName, correlationId, htmlContent) {
await cluster.task(async ({ page }) => {
let hrstart = process.hrtime();
let startTime;
try {
await page.addScriptTag({
path: "jquery-3.6.0.min.js",
});
startTime = process.hrtime();
await page.setContent(htmlContent);
calculateExecutionTime(startTime, "HTML Content was set in :: %d ms for DocumentName :: %s and CorrelationId :: %s");
await page.emulateMediaType("print");
await page.evaluateHandle("document.fonts.ready");
startTime = process.hrtime();
const pdf = await page.pdf({
format: "Letter",
printBackground: true,
preferCSSPageSize: true,
});
calculateExecutionTime(
startTime,
"PDF generated by Puppeteer in :: %d ms for DocumentName :: %s and CorrelationId :: %s",
documentName,
correlationId
);
calculateExecutionTime(
hrstart,
"Total Time taken to generate PDF is :: %d ms for DocumentName :: %s and CorrelationId :: %s",
documentName,
correlationId
);
return pdf;
} catch (err) {
logger.error("Error in PDF Generation");
logger.error(err);
}
});
}
/**
* Method to calculate the time difference for any execution
*
* @param {NodeJS.Process} startTime
* @param {String} message
*/
function calculateExecutionTime(startTime, message, documentName, correlationId) {
var precision = 2;
var timeDifference = (process.hrtime(startTime)[1] / 1000000).toFixed(
precision
);
logger.info(message, timeDifference, documentName, correlationId);
}
/**
* Expose Application
*/
app.listen(3000, function () {
logger.info("Application started! Server running on port 3000");
logger.info("Welcome To Node JS App!");
console.log("Environment is :: ", process.env.NODE_ENV);
console.log("Assets path is ::", process.env.ASSETS_ENV_PATH);
});
})();
Hi @jestinosam
Does it still happen? Exchange of responses amongst the requests?
If so, how did you fix it? Can you please let me know this? Thanks.