| | const express = require('express'); |
| | const puppeteerExtra = require('puppeteer-extra'); |
| | const StealthPlugin = require('puppeteer-extra-plugin-stealth'); |
| | const cors = require('cors'); |
| | const { EventEmitter } = require('events'); |
| |
|
| | puppeteerExtra.use(StealthPlugin()); |
| |
|
| | const app = express(); |
| | const port = 7860; |
| |
|
| | app.use(cors()); |
| | app.use(express.json()); |
| |
|
| | |
| | const progressTrackers = new Map(); |
| | const downloadJobs = new Map(); |
| |
|
| | class ProgressTracker extends EventEmitter { |
| | constructor(sessionId) { |
| | super(); |
| | this.sessionId = sessionId; |
| | this.progress = 0; |
| | this.status = 'initializing'; |
| | this.message = ''; |
| | } |
| |
|
| |
|
| | |
| | updateProgress(progress, status, message) { |
| | this.progress = progress; |
| | this.status = status; |
| | this.message = message; |
| | const update = { |
| | sessionId: this.sessionId, |
| | progress, |
| | status, |
| | message, |
| | timestamp: new Date().toISOString() |
| | }; |
| | this.emit('progress', update); |
| | console.log(`π [${this.sessionId}] ${progress}% - ${status}: ${message}`); |
| | } |
| | } |
| |
|
| | |
| | const simulateHumanBehavior = async (page, progressTracker) => { |
| | console.log("π§ Simulating human-like mouse movements and delays..."); |
| | const viewport = page.viewport(); |
| | for (let i = 0; i < 5; i++) { |
| | const x = Math.random() * (viewport.width || 1920); |
| | const y = Math.random() * (viewport.height || 1080); |
| | await page.mouse.move(x, y, { steps: 10 }); |
| | await page.waitForTimeout(Math.random() * 1000 + 500); |
| | } |
| | |
| | await page.evaluate(() => { |
| | window.scrollBy(0, Math.random() * 200 - 100); |
| | }); |
| | await page.waitForTimeout(Math.random() * 2000 + 1000); |
| | progressTracker?.updateProgress(progressTracker.progress + 1, 'humanizing', 'Human behavior simulated'); |
| | }; |
| |
|
| | |
| | const handleCloudflareChallenge = async (page, progressTracker) => { |
| | progressTracker?.updateProgress(35, 'cloudflare', 'Detecting and bypassing Cloudflare...'); |
| |
|
| | console.log("βοΈ Checking for Cloudflare challenge..."); |
| | const cloudflareSelectors = [ |
| | '#challenge-running', |
| | '.cf-browser-verification', |
| | '[data-ray]', |
| | '#cf-challenge-running', |
| | '.under-attack', |
| | 'iframe[src*="cloudflare"]', |
| | '#challenge-form', |
| | '.cf-turnstile' |
| | ]; |
| |
|
| | |
| | let challengeDetected = false; |
| | for (const selector of cloudflareSelectors) { |
| | try { |
| | await page.waitForSelector(selector, { timeout: 5000 }); |
| | challengeDetected = true; |
| | console.log(`βοΈ Cloudflare challenge detected with selector: ${selector}`); |
| | break; |
| | } catch (e) { |
| | |
| | } |
| | } |
| |
|
| | if (challengeDetected) { |
| | |
| | await simulateHumanBehavior(page, progressTracker); |
| |
|
| | |
| | console.log("β³ Waiting for Cloudflare challenge to complete..."); |
| | try { |
| | await page.waitForFunction(() => { |
| | const selectors = [ |
| | '#challenge-running', |
| | '.cf-browser-verification', |
| | '[data-ray]', |
| | '#cf-challenge-running', |
| | '.under-attack', |
| | '#challenge-form', |
| | '.cf-turnstile' |
| | ]; |
| | return !selectors.some(sel => document.querySelector(sel)); |
| | }, { timeout: 90000 }); |
| | } catch (e) { |
| | console.log("β οΈ Standard wait failed, attempting Turnstile click..."); |
| | |
| | try { |
| | const cfInput = await page.$('[name="cf-turnstile-response"]'); |
| | if (cfInput) { |
| | const parentItem = await cfInput.evaluateHandle((element) => element.parentElement); |
| | const coordinates = await parentItem.boundingBox(); |
| | if (coordinates) { |
| | await page.mouse.click(coordinates.x + 25, coordinates.y + coordinates.height / 2); |
| | console.log("π±οΈ Clicked on Turnstile CAPTCHA"); |
| | await page.waitForTimeout(3000); |
| | } |
| | } |
| | |
| | await page.waitForFunction(() => { |
| | const selectors = [ |
| | '#challenge-running', |
| | '.cf-browser-verification', |
| | '[data-ray]', |
| | '#cf-challenge-running', |
| | '.under-attack', |
| | '#challenge-form', |
| | '.cf-turnstile' |
| | ]; |
| | return !selectors.some(sel => document.querySelector(sel)); |
| | }, { timeout: 60000 }); |
| | } catch (clickError) { |
| | console.error("β Turnstile click failed:", clickError.message); |
| | throw new Error("Failed to bypass Cloudflare challenge. Try again later or use a proxy."); |
| | } |
| | } |
| |
|
| | |
| | const randomDelay = (min, max) => Math.floor(Math.random() * (max - min + 1) + min); |
| | await page.waitForTimeout(randomDelay(3000, 7000)); |
| | console.log("β
Cloudflare challenge bypassed successfully."); |
| | progressTracker?.updateProgress(38, 'cloudflare', 'Cloudflare bypassed'); |
| | } else { |
| | console.log("β
No Cloudflare challenge detected."); |
| | } |
| | }; |
| |
|
| | |
| | const bypassCookiesAndRestrictions = async (page, progressTracker) => { |
| | progressTracker?.updateProgress(5, 'bypassing', 'Setting up cookie bypass...'); |
| |
|
| | console.log("πͺ Starting comprehensive cookie and restriction bypass..."); |
| | |
| | const preCookies = [ |
| | { name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' }, |
| | { name: 'cookie_consent', value: 'true', domain: '.studocu.com' }, |
| | { name: 'gdpr_consent', value: 'accepted', domain: '.studocu.com' }, |
| | { name: 'privacy_policy_accepted', value: 'true', domain: '.studocu.com' }, |
| | { name: 'user_consent', value: '1', domain: '.studocu.com' }, |
| | { name: 'analytics_consent', value: 'false', domain: '.studocu.com' }, |
| | { name: 'marketing_consent', value: 'false', domain: '.studocu.com' }, |
| | { name: 'functional_consent', value: 'true', domain: '.studocu.com' }, |
| | ]; |
| | for (const cookie of preCookies) { |
| | try { |
| | await page.setCookie(cookie); |
| | } catch (e) { |
| | console.log(`Failed to set cookie ${cookie.name}:`, e.message); |
| | } |
| | } |
| |
|
| | |
| | await page.addStyleTag({ |
| | content: ` |
| | /* Hide all possible cookie banners */ |
| | [id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i], [aria-label*="cookie" i], |
| | .gdpr-banner, .gdpr-popup, .gdpr-modal, .consent-banner, .consent-popup, .consent-modal, .privacy-banner, .privacy-popup, .privacy-modal, |
| | .cookie-law, .cookie-policy, .cookie-compliance, .onetrust-banner-sdk, #onetrust-consent-sdk, .cmp-banner, .cmp-popup, .cmp-modal, |
| | [class*="CookieBanner"], [class*="CookieNotice"], [class*="ConsentBanner"], [class*="ConsentManager"], .cc-banner, .cc-window, .cc-compliance, |
| | div[style*="position: fixed"]:has-text("cookie"), div[style*="position: fixed"]:has-text("consent"), .fixed:has-text("cookie"), .fixed:has-text("consent") { |
| | display: none !important; |
| | visibility: hidden !important; |
| | opacity: 0 !important; |
| | z-index: -9999 !important; |
| | pointer-events: none !important; |
| | } |
| | /* Remove blur and premium overlays, including previews */ |
| | [class*="blur" i], [class*="premium" i], [class*="paywall" i], [class*="sample-preview-blur" i], [class*="preview" i], [class*="blurred-container" i], [class*="blurred" i] { |
| | display: none !important; |
| | filter: none !important; |
| | backdrop-filter: none !important; |
| | opacity: 1 !important; |
| | visibility: visible !important; |
| | } |
| | /* Ensure document content is visible */ |
| | .document-content, .page-content, [data-page] { |
| | filter: none !important; |
| | opacity: 1 !important; |
| | visibility: visible !important; |
| | pointer-events: auto !important; |
| | } |
| | /* Remove fixed overlays */ |
| | .fixed-overlay, .sticky-overlay, .content-overlay { |
| | display: none !important; |
| | } |
| | /* Restore scrolling */ |
| | html, body { |
| | overflow: auto !important; |
| | position: static !important; |
| | } |
| | /* Hide Cloudflare elements if they persist */ |
| | #challenge-running, .cf-browser-verification, [data-ray], .under-attack { |
| | display: none !important; |
| | } |
| | ` |
| | }); |
| |
|
| | |
| | await page.evaluateOnNewDocument(() => { |
| | |
| | window.cookieConsent = { accepted: true }; |
| | window.gtag = () => { }; |
| | window.ga = () => { }; |
| | window.dataLayer = []; |
| |
|
| | |
| | const observer = new MutationObserver((mutations) => { |
| | mutations.forEach((mutation) => { |
| | mutation.addedNodes.forEach((node) => { |
| | if (node.nodeType === 1) { |
| | const element = node; |
| | const text = element.textContent || ''; |
| | const className = element.className || ''; |
| | const id = element.id || ''; |
| | |
| | if ( |
| | text.toLowerCase().includes('cookie') || |
| | text.toLowerCase().includes('consent') || |
| | text.toLowerCase().includes('privacy policy') || |
| | className.toLowerCase().includes('cookie') || |
| | className.toLowerCase().includes('consent') || |
| | className.toLowerCase().includes('gdpr') || |
| | id.toLowerCase().includes('cookie') || |
| | id.toLowerCase().includes('consent') |
| | ) { |
| | console.log('Removing detected cookie banner:', element); |
| | element.remove(); |
| | } |
| | } |
| | }); |
| | }); |
| | }); |
| | observer.observe(document.body, { childList: true, subtree: true }); |
| |
|
| | |
| | setInterval(() => { |
| | const cookieElements = document.querySelectorAll(` |
| | [id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i], |
| | .gdpr-banner, .consent-banner, .privacy-banner, .onetrust-banner-sdk, #onetrust-consent-sdk, |
| | .cmp-banner, .cc-banner |
| | `); |
| | cookieElements.forEach(el => el.remove()); |
| | |
| | document.body.style.overflow = 'auto'; |
| | document.documentElement.style.overflow = 'auto'; |
| | }, 1000); |
| | }); |
| |
|
| | progressTracker?.updateProgress(10, 'bypassing', 'Cookie bypass configured successfully'); |
| | return true; |
| | }; |
| |
|
| | const unblurContent = async (page, progressTracker) => { |
| | progressTracker?.updateProgress(15, 'unblurring', 'Removing content restrictions...'); |
| |
|
| | console.log("π Unblurring content and bypassing premium restrictions..."); |
| | await page.evaluate(() => { |
| | const removeRestrictions = () => { |
| | const removeBySelector = (selector) => { |
| | document.querySelectorAll(selector).forEach(el => el.remove()); |
| | }; |
| |
|
| | removeBySelector("#adbox, .adsbox, .ad-box, .banner-ads, .advert"); |
| | removeBySelector(".PremiumBannerBlobWrapper_overflow-wrapper__xsaS8"); |
| | |
| | removeBySelector('[class*="preview" i], [class*="blurred-container" i], [class*="blurred" i]:not(img)'); |
| |
|
| | const removeBlur = (element = document) => { |
| | element.querySelectorAll("*").forEach(el => { |
| | const style = window.getComputedStyle(el); |
| | if ( |
| | style.filter?.includes("blur") || |
| | style.backdropFilter?.includes("blur") || |
| | parseFloat(style.opacity) < 1 || |
| | (el.className && el.className.toString().toLowerCase().includes("blur")) || |
| | (el.className && el.className.toString().toLowerCase().includes("premium")) |
| | ) { |
| | el.style.filter = "none !important"; |
| | el.style.backdropFilter = "none !important"; |
| | el.style.opacity = "1 !important"; |
| | if (el.classList) { |
| | el.classList.remove("blur", "blurred", "premium-blur"); |
| | } |
| | } |
| | }); |
| | }; |
| |
|
| | removeBlur(); |
| |
|
| | const contentSelectors = [ |
| | '.document-content', '.page-content', '.content', '[data-page]', '[data-testid*="document"]', |
| | '[data-testid*="page"]', '.page', '.document-page', 'main', 'article' |
| | ]; |
| | contentSelectors.forEach(selector => { |
| | document.querySelectorAll(selector).forEach(el => { |
| | el.style.setProperty('filter', 'none', 'important'); |
| | el.style.setProperty('opacity', '1', 'important'); |
| | el.style.setProperty('visibility', 'visible', 'important'); |
| | el.style.setProperty('display', 'block', 'important'); |
| | el.style.setProperty('pointer-events', 'auto', 'important'); |
| | }); |
| | }); |
| | }; |
| |
|
| | removeRestrictions(); |
| | const intervalId = setInterval(removeRestrictions, 1000); |
| | setTimeout(() => clearInterval(intervalId), 30000); |
| | }); |
| |
|
| | progressTracker?.updateProgress(20, 'unblurring', 'Content restrictions removed'); |
| | }; |
| |
|
| | |
| | const fetchClearImages = async (page, progressTracker) => { |
| | progressTracker?.updateProgress(65, 'unblurring_images', 'Fetching clear page images...'); |
| |
|
| | console.log("πΌοΈ Modifying blurred image URLs to fetch clear versions..."); |
| | await page.evaluate(() => { |
| | const images = document.querySelectorAll('img[src*="/blurred/"]'); |
| | images.forEach(img => { |
| | img.src = img.src.replace(/\/blurred\//, '/'); |
| | console.log(`Modified image src: ${img.src}`); |
| | }); |
| | }); |
| |
|
| | |
| | await page.evaluate(async () => { |
| | const images = Array.from(document.querySelectorAll('img')); |
| | await Promise.all(images.map(img => { |
| | if (img.complete) return Promise.resolve(); |
| | return new Promise((resolve) => { |
| | img.addEventListener('load', resolve); |
| | img.addEventListener('error', resolve); |
| | setTimeout(resolve, 10000); |
| | }); |
| | })); |
| | }); |
| |
|
| | await new Promise(resolve => setTimeout(resolve, 3000)); |
| | progressTracker?.updateProgress(70, 'unblurring_images', 'Clear images loaded'); |
| | }; |
| |
|
| | const applyPrintStyles = async (page, progressTracker) => { |
| | progressTracker?.updateProgress(85, 'styling', 'Applying print styles...'); |
| |
|
| | console.log("π¨οΈ Applying print styles for clean PDF..."); |
| | await page.evaluate(() => { |
| | const style = document.createElement("style"); |
| | style.id = "print-style-extension"; |
| | style.innerHTML = ` |
| | @page { |
| | /* Set page size to A4 and remove default margins */ |
| | size: A4 portrait; |
| | margin: 0mm; |
| | } |
| | @media print { |
| | html, body { |
| | /* Ensure the body takes the full width and has no extra padding/margin */ |
| | width: 210mm !important; |
| | height: auto !important; |
| | margin: 0 !important; |
| | padding: 0 !important; |
| | overflow: visible !important; |
| | background: white !important; |
| | color: black !important; |
| | display: flex; |
| | justify-content: center; |
| | } |
| | /* Remove all unwanted elements like headers, footers, sidebars, etc. */ |
| | header, footer, nav, aside, .no-print, .ads, .sidebar, .premium-banner, |
| | [class*="Header"], [class*="Footer"], [class*="Sidebar"], [id*="Header"], |
| | .ViewerToolbar, .Layout_info-bar-wrapper__He0Ho, .Sidebar_sidebar-scrollable__kqeBZ, |
| | .HeaderWrapper_header-wrapper__mCmf3, .Layout_visible-content-bottom-wrapper-sticky__yaaAB, |
| | .Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ, |
| | .InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper, #top-bar-wrapper, |
| | .Layout_sidebar-wrapper__unavM, .Layout_is-open__9DQr4 { |
| | display: none !important; |
| | } |
| | /* Force all elements to have a transparent background and no shadow */ |
| | * { |
| | box-shadow: none !important; |
| | background: transparent !important; |
| | color: inherit !important; |
| | } |
| | /* |
| | * KEY FIX: Target the main document container. |
| | * Force it to be a block element, remove any transforms or max-widths, |
| | * and center it perfectly within the page. |
| | */ |
| | .Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ, |
| | .Viewer_document-wrapper__XsO4j, .page-content, .document-viewer, #page-container { |
| | position: static !important; |
| | display: block !important; |
| | width: 100% !important; |
| | max-width: none !important; |
| | margin: 0 auto !important; /* Center horizontally */ |
| | padding: 0 !important; |
| | box-sizing: border-box; /* Include padding in width calculation */ |
| | transform: none !important; |
| | } |
| | /* Ensure individual pages and images within the document use the full width */ |
| | [data-page], .page, .document-page, img { |
| | page-break-after: always !important; |
| | page-break-inside: avoid !important; |
| | page-break-before: avoid !important; |
| | width: 100% !important; |
| | max-width: 100% !important; |
| | height: auto !important; |
| | display: block !important; |
| | margin: 0 !important; |
| | padding: 0 !important; |
| | } |
| | } |
| | `; |
| | document.head.appendChild(style); |
| | }); |
| |
|
| | progressTracker?.updateProgress(88, 'styling', 'Print styles applied successfully'); |
| | }; |
| | const studocuDownloader = async (url, options = {}, progressTracker = null) => { |
| | let browser; |
| | try { |
| | progressTracker?.updateProgress(0, 'initializing', 'Starting browser...'); |
| |
|
| | console.log("π Launching browser with enhanced stealth configuration..."); |
| | browser = await puppeteerExtra.launch({ |
| | headless: true, |
| | args: [ |
| | '--no-sandbox', |
| | '--disable-setuid-sandbox', |
| | '--disable-dev-shm-usage', |
| | '--disable-accelerated-2d-canvas', |
| | '--no-first-run', |
| | '--no-zygote', |
| | '--disable-gpu', |
| | '--disable-features=VizDisplayCompositor', |
| | '--disable-background-networking', |
| | '--disable-background-timer-throttling', |
| | '--disable-renderer-backgrounding', |
| | '--disable-backgrounding-occluded-windows', |
| | '--disable-ipc-flooding-protection', |
| | '--disable-web-security', |
| | '--disable-features=site-per-process', |
| | '--disable-blink-features=AutomationControlled', |
| | '--disable-extensions', |
| | '--ignore-certificate-errors', |
| | |
| | '--disable-features=TranslateUI', |
| | '--disable-ipc-flooding', |
| | '--disable-backgrounding-occluded-windows', |
| | '--disable-renderer-backgrounding', |
| | '--disable-features=TranslateUI,BlinkGenPropertyTrees', |
| | '--metrics-recording-only', |
| | '--no-default-browser-check', |
| | '--safebrowsing-disable-auto-update', |
| | '--password-store=basic', |
| | '--use-mock-keychain' |
| | ], |
| | ignoreHTTPSErrors: true, |
| | timeout: 300000, |
| | }); |
| |
|
| | const page = await browser.newPage(); |
| |
|
| | progressTracker?.updateProgress(2, 'initializing', 'Configuring browser settings...'); |
| |
|
| | await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); |
| | await page.setViewport({ width: 1920, height: 1080 }); |
| |
|
| | |
| | await page.evaluateOnNewDocument(() => { |
| | Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); |
| | Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); |
| | Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); |
| | |
| | Object.defineProperty(navigator, 'permissions', { |
| | get: () => ({ |
| | query: () => Promise.resolve({ state: 'granted' }) |
| | }) |
| | }); |
| | window.chrome = { |
| | runtime: {}, |
| | loadTimes: function () { }, |
| | csi: function () { }, |
| | app: {} |
| | }; |
| | }); |
| |
|
| | |
| | await bypassCookiesAndRestrictions(page, progressTracker); |
| |
|
| | |
| | await page.setRequestInterception(true); |
| | page.on('request', (req) => { |
| | const resourceType = req.resourceType(); |
| | const reqUrl = req.url().toLowerCase(); |
| |
|
| | if (resourceType === 'document') { |
| | req.continue(); |
| | return; |
| | } |
| |
|
| | |
| | if (reqUrl.includes('cloudflare') || reqUrl.includes('cf-')) { |
| | req.continue(); |
| | return; |
| | } |
| |
|
| | if ( |
| | ['image', 'media', 'font', 'stylesheet'].includes(resourceType) && |
| | !reqUrl.includes('document') && !reqUrl.includes('page') && !reqUrl.includes('studocu') || |
| | resourceType === 'script' && !reqUrl.includes('studocu') && !reqUrl.includes('cloudflare') || |
| | reqUrl.includes('doubleclick') || |
| | reqUrl.includes('googletagmanager') || |
| | reqUrl.includes('facebook.com') || |
| | reqUrl.includes('twitter.com') || |
| | reqUrl.includes('analytics') || |
| | reqUrl.includes('gtm') || |
| | reqUrl.includes('hotjar') || |
| | reqUrl.includes('mixpanel') || |
| | reqUrl.includes('onetrust') || |
| | reqUrl.includes('cookielaw') || |
| | (resourceType === 'other' && reqUrl.includes('/track/')) |
| | ) { |
| | req.abort(); |
| | } else { |
| | req.continue(); |
| | } |
| | }); |
| |
|
| | |
| | if (options.email && options.password) { |
| | progressTracker?.updateProgress(12, 'authenticating', 'Logging into StuDocu...'); |
| |
|
| | console.log("π Logging in to StuDocu..."); |
| | await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 120000 }); |
| | |
| | await handleCloudflareChallenge(page, progressTracker); |
| | await page.waitForSelector('#email', { timeout: 15000 }); |
| | await page.type('#email', options.email); |
| | await page.type('#password', options.password); |
| | await page.click('button[type="submit"]'); |
| | try { |
| | await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 }); |
| | await page.waitForSelector('.user-profile, [data-testid="user-menu"]', { timeout: 10000 }); |
| | console.log("β
Login successful."); |
| | progressTracker?.updateProgress(18, 'authenticated', 'Login successful'); |
| | } catch (e) { |
| | console.error("β Login failed:", e.message); |
| | throw new Error("Login failed. Check credentials or try again."); |
| | } |
| | } |
| |
|
| | |
| | progressTracker?.updateProgress(30, 'navigating', 'Navigating to document...'); |
| | console.log(`π Navigating to ${url}...`); |
| |
|
| | let navigationSuccess = false; |
| | let attempts = 0; |
| | const maxAttempts = 3; |
| | while (!navigationSuccess && attempts < maxAttempts) { |
| | try { |
| | attempts++; |
| | progressTracker?.updateProgress(30 + (attempts * 5), 'navigating', `Navigation attempt ${attempts}/${maxAttempts}`); |
| | console.log(`Navigation attempt ${attempts}/${maxAttempts}`); |
| | await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 120000 }); |
| | navigationSuccess = true; |
| | } catch (e) { |
| | console.log(`Navigation attempt ${attempts} failed:`, e.message); |
| | if (attempts >= maxAttempts) throw e; |
| | await new Promise(resolve => setTimeout(resolve, 10000)); |
| | } |
| | } |
| |
|
| | |
| | await handleCloudflareChallenge(page, progressTracker); |
| |
|
| | progressTracker?.updateProgress(40, 'loading', 'Page loaded, waiting for content...'); |
| | await new Promise(resolve => setTimeout(resolve, 5000)); |
| |
|
| | |
| | await unblurContent(page, progressTracker); |
| |
|
| | |
| | progressTracker?.updateProgress(45, 'loading', 'Waiting for document content...'); |
| | console.log("β³ Waiting for document content to load..."); |
| |
|
| | const contentSelectors = [ |
| | '.document-content', '.page-content', '[data-page]', '[data-testid*="document"]', |
| | 'img[src*="document"]', 'img[src*="page"]', '.page', 'main img', 'article img' |
| | ]; |
| | let contentFound = false; |
| | for (const selector of contentSelectors) { |
| | try { |
| | await page.waitForSelector(selector, { timeout: 20000 }); |
| | console.log(`β
Found content with selector: ${selector}`); |
| | contentFound = true; |
| | break; |
| | } catch (e) { |
| | console.log(`β Selector ${selector} not found, trying next...`); |
| | } |
| | } |
| |
|
| | if (!contentFound) { |
| | console.log("β οΈ No specific content selector found, proceeding with page content..."); |
| | } |
| |
|
| | |
| | progressTracker?.updateProgress(50, 'scrolling', 'Loading all document pages...'); |
| | console.log("π Loading all document pages with enhanced slow scroll..."); |
| |
|
| | await page.evaluate(async () => { |
| | const delay = (ms) => new Promise((res) => setTimeout(res, ms)); |
| | let scrollHeight = document.body.scrollHeight; |
| | while (true) { |
| | let totalHeight = 0; |
| | const distance = 600; |
| | while (totalHeight < scrollHeight) { |
| | window.scrollBy(0, distance); |
| | totalHeight += distance; |
| | await delay(300); |
| | } |
| | await delay(2000); |
| | const newHeight = document.body.scrollHeight; |
| | if (newHeight === scrollHeight) break; |
| | scrollHeight = newHeight; |
| | } |
| | window.scrollTo({ top: 0, behavior: "smooth" }); |
| | await delay(1000); |
| | }); |
| |
|
| | |
| | await unblurContent(page, progressTracker); |
| |
|
| | |
| | await fetchClearImages(page, progressTracker); |
| |
|
| | |
| | progressTracker?.updateProgress(75, 'loading_images', 'Loading images...'); |
| | console.log("πΌοΈ Waiting for all images to load..."); |
| |
|
| | await page.evaluate(async () => { |
| | const images = Array.from(document.querySelectorAll('img')); |
| | await Promise.all(images.map(img => { |
| | if (img.complete) return Promise.resolve(); |
| | return new Promise((resolve) => { |
| | img.addEventListener('load', resolve); |
| | img.addEventListener('error', resolve); |
| | setTimeout(resolve, 10000); |
| | }); |
| | })); |
| | }); |
| |
|
| | await new Promise(resolve => setTimeout(resolve, 5000)); |
| | progressTracker?.updateProgress(80, 'finalizing', 'Preparing document for PDF generation...'); |
| |
|
| | |
| | await page.evaluate(() => { |
| | const getDocumentHeight = () => Math.max( |
| | document.body.scrollHeight, document.body.offsetHeight, |
| | document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight |
| | ); |
| | const height = getDocumentHeight(); |
| | document.body.style.height = `${height}px !important`; |
| | document.documentElement.style.height = `${height}px !important`; |
| | document.body.style.overflow = 'hidden !important'; |
| | }); |
| |
|
| | |
| | const contentCheck = await page.evaluate(() => { |
| | const textContent = document.body.textContent || ''; |
| | const images = document.querySelectorAll('img'); |
| | const documentImages = Array.from(images).filter(img => |
| | img.src.includes('document') || img.src.includes('page') || |
| | img.alt.includes('document') || img.alt.includes('page') |
| | ); |
| | return { |
| | totalText: textContent.length, |
| | totalImages: images.length, |
| | documentImages: documentImages.length, |
| | hasDocumentContent: documentImages.length > 0 || textContent.length > 1000 |
| | }; |
| | }); |
| |
|
| | console.log("π Content verification:", { |
| | textLength: contentCheck.totalText, |
| | images: contentCheck.totalImages, |
| | documentImages: contentCheck.documentImages, |
| | hasContent: contentCheck.hasDocumentContent |
| | }); |
| |
|
| | if (!contentCheck.hasDocumentContent) { |
| | console.warn("β οΈ Warning: Limited document content detected."); |
| | } |
| |
|
| | |
| | await applyPrintStyles(page, progressTracker); |
| | await page.emulateMediaType('print'); |
| |
|
| | progressTracker?.updateProgress(90, 'generating', 'Generating PDF...'); |
| | console.log("π Generating PDF..."); |
| |
|
| | const pdfBuffer = await page.pdf({ |
| | printBackground: true, |
| | preferCSSPageSize: true, |
| | displayHeaderFooter: false, |
| | timeout: 180000, |
| | scale: 1, |
| | omitBackground: false |
| | }); |
| |
|
| | progressTracker?.updateProgress(100, 'completed', 'PDF generated successfully!'); |
| | console.log(`β
PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`); |
| | return pdfBuffer; |
| |
|
| | } catch (error) { |
| | progressTracker?.updateProgress(-1, 'error', error.message); |
| | console.error("β Error during PDF generation:", error); |
| | throw error; |
| | } finally { |
| | if (browser) { |
| | console.log("π Closing browser..."); |
| | try { |
| | await browser.close(); |
| | } catch (e) { |
| | console.log("Error closing browser:", e.message); |
| | } |
| | } |
| | } |
| | }; |
| |
|
| | |
| | app.post('/api/request-download', (req, res) => { |
| | const { url, email, password } = req.body; |
| | if (!url || !url.includes('studocu.com')) { |
| | return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' }); |
| | } |
| |
|
| | const sessionId = Date.now().toString(); |
| | const progressTracker = new ProgressTracker(sessionId); |
| |
|
| | progressTrackers.set(sessionId, progressTracker); |
| | downloadJobs.set(sessionId, { status: 'processing' }); |
| |
|
| | console.log(`π― Processing request for: ${url} [Session: ${sessionId}]`); |
| |
|
| | |
| | res.json({ sessionId }); |
| |
|
| | |
| | studocuDownloader(url, { email, password }, progressTracker) |
| | .then(pdfBuffer => { |
| | |
| | downloadJobs.set(sessionId, { status: 'completed', buffer: pdfBuffer }); |
| | progressTrackers.delete(sessionId); |
| | }) |
| | .catch(error => { |
| | |
| | downloadJobs.set(sessionId, { status: 'error', message: error.message }); |
| | progressTrackers.delete(sessionId); |
| | }); |
| | }); |
| |
|
| | app.get('/api/progress/:sessionId', (req, res) => { |
| | const { sessionId } = req.params; |
| | const tracker = progressTrackers.get(sessionId); |
| |
|
| | if (tracker) { |
| | |
| | return res.json({ |
| | sessionId, |
| | progress: tracker.progress, |
| | status: tracker.status, |
| | message: tracker.message, |
| | timestamp: new Date().toISOString() |
| | }); |
| | } |
| |
|
| | const job = downloadJobs.get(sessionId); |
| | if (job) { |
| | |
| | if (job.status === 'completed') { |
| | return res.json({ sessionId, progress: 100, status: 'completed', message: 'PDF generated successfully!' }); |
| | } |
| | if (job.status === 'error') { |
| | return res.json({ sessionId, progress: -1, status: 'error', message: job.message }); |
| | } |
| | } |
| |
|
| | return res.status(404).json({ error: 'Session not found' }); |
| | }); |
| |
|
| | app.get('/api/download/:sessionId', (req, res) => { |
| | const { sessionId } = req.params; |
| | const job = downloadJobs.get(sessionId); |
| |
|
| | if (!job) { |
| | return res.status(404).json({ error: 'Download session not found or expired.' }); |
| | } |
| |
|
| | if (job.status === 'processing') { |
| | return res.status(400).json({ error: 'Download is still processing.' }); |
| | } |
| |
|
| | if (job.status === 'error') { |
| | return res.status(500).json({ error: `Failed to generate PDF: ${job.message}` }); |
| | } |
| |
|
| | if (job.status === 'completed' && job.buffer) { |
| | res.setHeader('Content-Type', 'application/pdf'); |
| | res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf'); |
| | res.send(job.buffer); |
| | |
| | |
| | } else { |
| | res.status(500).json({ error: 'An unknown error occurred.' }); |
| | } |
| | }); |
| |
|
| | |
| | app.get('/health', (req, res) => { |
| | res.json({ |
| | status: 'healthy', |
| | timestamp: new Date().toISOString(), |
| | uptime: process.uptime(), |
| | activeDownloads: progressTrackers.size |
| | }); |
| | }); |
| |
|
| | app.get('/', (req, res) => { |
| | res.json({ |
| | message: 'π Enhanced StuDocu Downloader API v5.3 - Real-time Progress Tracking with Cloudflare Bypass', |
| | version: '5.3.0', |
| | features: [ |
| | 'πͺ Advanced cookie banner bypass', |
| | 'π Premium content unblurring', |
| | 'π Login support for full access', |
| | 'π Real-time progress tracking via polling', |
| | 'π Clean PDF generation with print styles', |
| | 'π΅οΈ Enhanced stealth to evade bot detection', |
| | 'βοΈ Automatic Cloudflare challenge handling', |
| | 'π§ Human-like behavior simulation' |
| | ], |
| | endpoints: { |
| | request: 'POST /api/request-download (body: {url, filename?, email?, password?})', |
| | progress: 'GET /api/progress/:sessionId', |
| | download: 'GET /api/download/:sessionId', |
| | health: 'GET /health' |
| | } |
| | }); |
| | }); |
| |
|
| | process.on('SIGTERM', () => { |
| | console.log('SIGTERM received, shutting down gracefully...'); |
| | process.exit(0); |
| | }); |
| |
|
| | process.on('SIGINT', () => { |
| | console.log('SIGINT received, shutting down gracefully...'); |
| | process.exit(0); |
| | }); |
| |
|
| | app.listen(port, () => { |
| | console.log(`π Enhanced StuDocu Downloader v5.3.0 running on http://localhost:${port}`); |
| | console.log(`β¨ Features: Real-time progress tracking, enhanced stealth, Cloudflare bypass, and user feedback`); |
| | }); |