Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add PDF URL validation and content type check #1896

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 35 additions & 5 deletions public/js/extra.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import './lib/renderer/lightbox'
import { renderCSVPreview } from './lib/renderer/csvpreview'

import { escapeAttrValue } from './render'
import { sanitizeUrl } from './utils'
import { sanitizeUrl, isPdfUrl } from './utils'

import markdownit from 'markdown-it'
import markdownitContainer from 'markdown-it-container'
Expand Down Expand Up @@ -634,11 +634,41 @@ export function finishView (view) {
const cleanUrl = sanitizeUrl(url)
const inner = $('<div></div>')
$(this).append(inner)
setTimeout(() => {
PDFObject.embed(cleanUrl, inner, {
height: '400px'

// First check URL format
const isPDFByExtension = /\.pdf(\?.*)?$/i.test(cleanUrl) || cleanUrl.includes('pdf')

if (isPDFByExtension) {
// Show loading message while we check content type
const loadingMessage = $('<div class="alert alert-info">Verifying PDF file...</div>')
inner.html(loadingMessage)

// Perform additional validation with HEAD request
isPdfUrl(cleanUrl).then(isPDFByContentType => {
if (isPDFByContentType) {
// Valid PDF by content type, embed it
PDFObject.embed(cleanUrl, inner, {
height: '400px'
})
} else {
// URL format looks like PDF but content type doesn't match
inner.html('<div class="alert alert-warning">The URL looks like a PDF but the server didn\'t confirm it has a PDF content type.</div>')
console.warn('URL has PDF extension but content type is not application/pdf:', cleanUrl)

// Try to embed anyway as a fallback
setTimeout(() => {
PDFObject.embed(cleanUrl, inner, {
height: '400px',
fallbackLink: 'This doesn\'t appear to be a valid PDF. <a href="[url]">Click here to try downloading it directly</a>.'
})
}, 1)
}
})
}, 1)
} else {
// Not a valid PDF URL by extension
inner.html('<div class="alert alert-danger">Invalid PDF URL. The URL must point to a PDF file.</div>')
console.warn('Invalid PDF URL format:', cleanUrl)
}
})
// syntax highlighting
view.find('code.raw').removeClass('raw')
Expand Down
13 changes: 13 additions & 0 deletions public/js/utils.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/* global fetch */
import base64url from 'base64url'

const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i
Expand Down Expand Up @@ -46,3 +47,15 @@ export function sanitizeUrl (rawUrl) {
return 'about:blank'
}
}

// Check if URL is a PDF based on Content-Type header
export async function isPdfUrl (url) {
try {
const response = await fetch(url, { method: 'HEAD' })
const contentType = response.headers.get('Content-Type')
return contentType === 'application/pdf'
} catch (error) {
console.warn('Error checking PDF content type:', error)
return false
}
}