From 4cff688057cf0996aa5ab5295ad1b23297d08456 Mon Sep 17 00:00:00 2001 From: flashwave Date: Fri, 9 Jun 2023 19:56:38 +0000 Subject: [PATCH] Replaced Twitter API with Nitter crawling. --- .gitmodules | 3 + lib/html5-php | 1 + src/Lookup/TwitterLookup.php | 106 +++++++++++++++++------- src/Lookup/TwitterLookupTweetResult.php | 14 ++-- src/Lookup/TwitterLookupUserResult.php | 14 ++-- uiharu.php | 3 +- 6 files changed, 96 insertions(+), 45 deletions(-) create mode 160000 lib/html5-php diff --git a/.gitmodules b/.gitmodules index 525bff2..2cdb6aa 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "lib/index"] path = lib/index url = https://git.flash.moe/flash/index.git +[submodule "lib/html5-php"] + path = lib/html5-php + url = https://github.com/Masterminds/html5-php.git diff --git a/lib/html5-php b/lib/html5-php new file mode 160000 index 0000000..3c5d5a5 --- /dev/null +++ b/lib/html5-php @@ -0,0 +1 @@ +Subproject commit 3c5d5a56d56f48a1ca08a0670f0f80c1dad368f3 diff --git a/src/Lookup/TwitterLookup.php b/src/Lookup/TwitterLookup.php index a139ddb..373c48f 100644 --- a/src/Lookup/TwitterLookup.php +++ b/src/Lookup/TwitterLookup.php @@ -1,10 +1,14 @@ getPath()); } - private function lookupUser(string $userName): ?object { - $curl = curl_init("https://api.twitter.com/2/users/by?usernames={$userName}&user.fields=description,entities,id,name,profile_image_url,protected,url,username,verified"); - curl_setopt_array($curl, [ - CURLOPT_AUTOREFERER => false, - CURLOPT_CERTINFO => false, - CURLOPT_FAILONERROR => false, - CURLOPT_FOLLOWLOCATION => false, - CURLOPT_RETURNTRANSFER => true, - CURLOPT_TCP_FASTOPEN => true, - CURLOPT_CONNECTTIMEOUT => 2, - CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, - CURLOPT_TIMEOUT => 2, - CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, - CURLOPT_HTTPHEADER => [ - 'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'), - 'Accept: application/json', - ], - ]); - $resp = curl_exec($curl); - curl_close($curl); - return json_decode($resp); - } - - private function lookupTweet(string $tweetId): ?object { - $curl = curl_init("https://api.twitter.com/2/tweets?ids={$tweetId}&expansions=attachments.media_keys,author_id,entities.mentions.username,referenced_tweets.id,referenced_tweets.id.author_id&media.fields=height,width,media_key,preview_image_url,url,type&tweet.fields=attachments,conversation_id,text,source,possibly_sensitive,created_at&user.fields=id,name,profile_image_url,protected,username,verified"); + private function getString(string $path): string { + $curl = curl_init(Config::get('Nitter', 'endpoint') . $path); curl_setopt_array($curl, [ CURLOPT_AUTOREFERER => false, CURLOPT_CERTINFO => false, @@ -57,14 +38,79 @@ final class TwitterLookup implements \Uiharu\ILookup { CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, CURLOPT_TIMEOUT => 5, CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, - CURLOPT_HTTPHEADER => [ - 'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'), - 'Accept: application/json', - ], ]); $resp = curl_exec($curl); curl_close($curl); - return json_decode($resp); + return $resp; + } + + private function getDocument(string $path): DOMDocument { + $string = $this->getString($path); + if(empty($string)) + throw new RuntimeException('Failed to download Nitter page.'); + + return (new HTML5)->loadHTML($string); + } + + private static function convertNitterMediaURL(string $path): string { + if($path === '') + return $path; + + [,,$url] = explode('/', $path); + $url = rawurldecode($url); + + if(!str_starts_with($url, 'pbs.twimg.com')) + $url = 'pbs.twimg.com/' . str_replace('_bigger', '', $url); + + return 'https://' . $url; + } + + private function lookupUser(string $userName): ?object { + $document = $this->getDocument("/{$userName}"); + $xpath = new DOMXpath($document); + + $out = new stdClass; + + $userNameElems = $xpath->query('//*[@class="profile-card-username"]'); + $out->userName = $userNameElems->length < 1 ? '' : trim($userNameElems[0]->textContent); + + $profileNameElems = $xpath->query('//*[@class="profile-card-fullname"]'); + $out->profileName = $profileNameElems->length < 1 ? '' : trim($profileNameElems[0]->textContent); + + $profileBioElems = $xpath->query('//*[@class="profile-bio"]/*'); + $out->profileBio = $profileBioElems->length < 1 ? '' : trim($profileBioElems[0]->textContent); + + $profilePictureElems = $xpath->query('//*[@class="profile-card-avatar"]'); + $out->profilePicture = $profilePictureElems->length < 1 ? '' : $profilePictureElems[0]->getAttribute('href'); + $out->profilePicture = self::convertNitterMediaURL($out->profilePicture); + + return $out; + } + + private function lookupTweet(string $tweetId): ?object { + $document = $this->getDocument("/i/status/{$tweetId}"); + $xpath = new DOMXpath($document); + + $out = new stdClass; + + $tweetDateElems = $xpath->query('//*[@class="tweet-date"]/*'); + $out->tweetId = $tweetDateElems->length < 1 ? '' : trim($tweetDateElems[0]->getAttribute('href')); + if($out->tweetId !== '') { + [,,,$out->tweetId] = explode('/', $out->tweetId); + [$out->tweetId] = explode('#', $out->tweetId); + } + + $tweetTextElems = $xpath->query('//*[@class="tweet-content media-body"]'); + $out->tweetText = $tweetTextElems->length < 1 ? '' : trim($tweetTextElems[0]->textContent); + + $profileNameElems = $xpath->query('//*[@class="fullname"]'); + $out->profileName = $profileNameElems->length < 1 ? '' : trim($profileNameElems[0]->textContent); + + $profilePictureElems = $xpath->query('//*[@class="tweet-avatar"]/*'); + $out->profilePicture = $profilePictureElems->length < 1 ? '' : $profilePictureElems[0]->getAttribute('src'); + $out->profilePicture = self::convertNitterMediaURL($out->profilePicture); + + return $out; } public function lookup(Url $url): TwitterLookupResult { diff --git a/src/Lookup/TwitterLookupTweetResult.php b/src/Lookup/TwitterLookupTweetResult.php index a036e7e..9837f4f 100644 --- a/src/Lookup/TwitterLookupTweetResult.php +++ b/src/Lookup/TwitterLookupTweetResult.php @@ -16,28 +16,28 @@ class TwitterLookupTweetResult extends TwitterLookupResult { } public function getTwitterTweetId(): string { - return $this->tweetInfo->data[0]->id; + return $this->tweetInfo->tweetId; } public function hasTitle(): bool { - return isset($this->tweetInfo->includes->users[0]->name); + return $this->tweetInfo->profileName !== ''; } public function getTitle(): string { - return $this->tweetInfo->includes->users[0]->name; + return $this->tweetInfo->profileName; } public function hasDescription(): bool { - return isset($this->tweetInfo->data[0]->text); + return $this->tweetInfo->tweetText !== ''; } public function getDescription(): string { - return $this->tweetInfo->data[0]->text; + return $this->tweetInfo->tweetText; } public function hasPreviewImage(): bool { - return isset($this->tweetInfo->includes->users[0]->profile_image_url); + return $this->tweetInfo->profilePicture !== ''; } public function getPreviewImage(): string { - return $this->tweetInfo->includes->users[0]->profile_image_url; + return $this->tweetInfo->profilePicture; } public function getTwitterResult(): object { diff --git a/src/Lookup/TwitterLookupUserResult.php b/src/Lookup/TwitterLookupUserResult.php index 13d8f6f..92aac15 100644 --- a/src/Lookup/TwitterLookupUserResult.php +++ b/src/Lookup/TwitterLookupUserResult.php @@ -16,28 +16,28 @@ class TwitterLookupUserResult extends TwitterLookupResult { } public function getTwitterUserName(): string { - return $this->userInfo->data[0]->username; + return $this->userInfo->userName; } public function hasTitle(): bool { - return isset($this->userInfo->data[0]->name); + return $this->userInfo->profileName !== ''; } public function getTitle(): string { - return $this->userInfo->data[0]->name; + return $this->userInfo->profileName; } public function hasDescription(): bool { - return isset($this->userInfo->data[0]->description); + return $this->userInfo->profileBio !== ''; } public function getDescription(): string { - return $this->userInfo->data[0]->description; + return $this->userInfo->profileBio; } public function hasPreviewImage(): bool { - return isset($this->userInfo->data[0]->profile_image_url); + return $this->userInfo->profilePicture !== ''; } public function getPreviewImage(): string { - return $this->userInfo->data[0]->profile_image_url; + return $this->userInfo->profilePicture; } public function getTwitterResult(): object { diff --git a/uiharu.php b/uiharu.php index 0ff1574..cbff564 100644 --- a/uiharu.php +++ b/uiharu.php @@ -12,13 +12,14 @@ define('UIH_DEBUG', is_file(UIH_ROOT . '/.debug')); define('UIH_PUBLIC', UIH_ROOT . '/public'); define('UIH_SOURCE', UIH_ROOT . '/src'); define('UIH_LIBRARY', UIH_ROOT . '/lib'); -define('UIH_VERSION', '20230125'); +define('UIH_VERSION', '20230609'); define('UIH_NDX_PATH', UIH_LIBRARY . '/index'); define('UIH_NDX_PATH_DEV', UIH_LIBRARY . '/index-dev'); require_once (UIH_DEBUG && is_dir(UIH_NDX_PATH_DEV) ? UIH_NDX_PATH_DEV : UIH_NDX_PATH) . '/index.php'; +Autoloader::addNamespace('Masterminds', UIH_LIBRARY . '/html5-php/src'); Autoloader::addNamespace(__NAMESPACE__, UIH_SOURCE); Environment::setDebug(UIH_DEBUG);