Detect Metadata About Pdf From Javascript
If my JavaScript code is running in a Chrome extension, and the browser has loaded a PDF file, can I detect metadata about the loaded PDF (number of pages, etc.)? Extra challenge:
Solution 1:
After some quick Google-fu I learned that PDFs store metadata in XMP Format (XML). So you can read the raw file data a pick out metadata with some simple regex.
Select a PDF file from your computer to see the demo:
document.getElementById('f').oninput = asyncfunction() {
var pdf = this.files[0];
var details = awaitpdfDetails(pdf);
console.log(details);
};
functionpdfDetails(pdfBlob) {
returnnewPromise(done => {
var reader = newFileReader();
reader.onload = function() {
var raw = reader.result;
varPages = raw.match(/\/Type[\s]*\/Page[^s]/g).length;
var regex = /<xmp.*?:(.*?)>(.*?)</g;
var meta = [{
Pages
}];
var matches = regex.exec(raw);
while (matches != null) {
matches.shift();
meta.push({
[matches.shift()]: matches.shift()
});
matches = regex.exec(raw);
}
done(meta);
};
reader.readAsBinaryString(pdfBlob);
});
}
<inputtype=fileid=faccept=".pdf">
Solution 2:
A PDF document doesn't list the number of pages in its metadata. Even if you added some custom metadata to track that information it wouldn't be in a standard way PDF readers would be expected to understand.
Post a Comment for "Detect Metadata About Pdf From Javascript"