From 66f501edb9e0d04ec25384c7788dd37d55ac4fa4 Mon Sep 17 00:00:00 2001 From: IgorA100 Date: Sun, 15 Oct 2023 01:40:52 +0300 Subject: [PATCH 1/3] Fix: Set CURLOPT_USERAGENT Some sites do not serve content without a User Agent Set CURLOPT_USERAGENT= Google Chrome Signed-off-by: IgorA100 --- lib/Scraper/Scraper.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Scraper/Scraper.php b/lib/Scraper/Scraper.php index 998c4464c..b766a00b8 100644 --- a/lib/Scraper/Scraper.php +++ b/lib/Scraper/Scraper.php @@ -37,7 +37,7 @@ public function __construct(LoggerInterface $logger) CURLOPT_RETURNTRANSFER => true, // return web page CURLOPT_HEADER => false, // do not return headers CURLOPT_FOLLOWLOCATION => true, // follow redirects - //CURLOPT_USERAGENT => "php-news", // who am i + CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36", // who am i CURLOPT_AUTOREFERER => true, // set referer on redirect CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect CURLOPT_TIMEOUT => 120, // timeout on response From 1667d24e4af27e9e57112f54999591242da57ac9 Mon Sep 17 00:00:00 2001 From: IgorA100 Date: Sun, 15 Oct 2023 21:20:01 +0300 Subject: [PATCH 2/3] Update Scraper.php Use FetcherConfig::DEFAULT_USER_AGENT for Curl Signed-off-by: IgorA100 --- lib/Scraper/Scraper.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Scraper/Scraper.php b/lib/Scraper/Scraper.php index b766a00b8..c5ae401f9 100644 --- a/lib/Scraper/Scraper.php +++ b/lib/Scraper/Scraper.php @@ -16,6 +16,7 @@ use fivefilters\Readability\ParseException; use League\Uri\Exceptions\SyntaxError; use Psr\Log\LoggerInterface; +use OCA\News\Config\FetcherConfig; class Scraper implements IScraper { @@ -37,7 +38,7 @@ public function __construct(LoggerInterface $logger) CURLOPT_RETURNTRANSFER => true, // return web page CURLOPT_HEADER => false, // do not return headers CURLOPT_FOLLOWLOCATION => true, // follow redirects - CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36", // who am i + CURLOPT_USERAGENT => FetcherConfig::DEFAULT_USER_AGENT, // who am i CURLOPT_AUTOREFERER => true, // set referer on redirect CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect CURLOPT_TIMEOUT => 120, // timeout on response From d8052ab745ce9ed79b72a9f7560c82bcbb1b8824 Mon Sep 17 00:00:00 2001 From: IgorA100 Date: Mon, 16 Oct 2023 12:46:12 +0300 Subject: [PATCH 3/3] Update CHANGELOG.md - Set User Agent for curl in Scraper Signed-off-by: IgorA100 --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 98cbf3b4d..7920e15cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,8 @@ The format is mostly based on [Keep a Changelog](https://keepachangelog.com/en/1 # Unreleased ## [24.x.x] ### Changed - +- Set User Agent for curl in Scraper + ### Fixed # Releases