Replaced Twitter API with Nitter crawling.

This commit is contained in:
flash 2023-06-09 19:56:38 +00:00
parent 2be425eab3
commit 4cff688057
6 changed files with 96 additions and 45 deletions

3
.gitmodules vendored
View file

@ -1,3 +1,6 @@
[submodule "lib/index"] [submodule "lib/index"]
path = lib/index path = lib/index
url = https://git.flash.moe/flash/index.git url = https://git.flash.moe/flash/index.git
[submodule "lib/html5-php"]
path = lib/html5-php
url = https://github.com/Masterminds/html5-php.git

1
lib/html5-php Submodule

@ -0,0 +1 @@
Subproject commit 3c5d5a56d56f48a1ca08a0670f0f80c1dad368f3

View file

@ -1,10 +1,14 @@
<?php <?php
namespace Uiharu\Lookup; namespace Uiharu\Lookup;
use stdClass;
use DOMDocument;
use DOMXpath;
use RuntimeException; use RuntimeException;
use Uiharu\Config; use Uiharu\Config;
use Uiharu\Url; use Uiharu\Url;
use Index\MediaType; use Index\MediaType;
use Masterminds\HTML5;
final class TwitterLookup implements \Uiharu\ILookup { final class TwitterLookup implements \Uiharu\ILookup {
private const TWITTER_DOMAINS = [ private const TWITTER_DOMAINS = [
@ -21,31 +25,8 @@ final class TwitterLookup implements \Uiharu\ILookup {
|| preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $url->getPath()); || preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $url->getPath());
} }
private function lookupUser(string $userName): ?object { private function getString(string $path): string {
$curl = curl_init("https://api.twitter.com/2/users/by?usernames={$userName}&user.fields=description,entities,id,name,profile_image_url,protected,url,username,verified"); $curl = curl_init(Config::get('Nitter', 'endpoint') . $path);
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 2,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'),
'Accept: application/json',
],
]);
$resp = curl_exec($curl);
curl_close($curl);
return json_decode($resp);
}
private function lookupTweet(string $tweetId): ?object {
$curl = curl_init("https://api.twitter.com/2/tweets?ids={$tweetId}&expansions=attachments.media_keys,author_id,entities.mentions.username,referenced_tweets.id,referenced_tweets.id.author_id&media.fields=height,width,media_key,preview_image_url,url,type&tweet.fields=attachments,conversation_id,text,source,possibly_sensitive,created_at&user.fields=id,name,profile_image_url,protected,username,verified");
curl_setopt_array($curl, [ curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false, CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false, CURLOPT_CERTINFO => false,
@ -57,14 +38,79 @@ final class TwitterLookup implements \Uiharu\ILookup {
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5, CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'),
'Accept: application/json',
],
]); ]);
$resp = curl_exec($curl); $resp = curl_exec($curl);
curl_close($curl); curl_close($curl);
return json_decode($resp); return $resp;
}
private function getDocument(string $path): DOMDocument {
$string = $this->getString($path);
if(empty($string))
throw new RuntimeException('Failed to download Nitter page.');
return (new HTML5)->loadHTML($string);
}
private static function convertNitterMediaURL(string $path): string {
if($path === '')
return $path;
[,,$url] = explode('/', $path);
$url = rawurldecode($url);
if(!str_starts_with($url, 'pbs.twimg.com'))
$url = 'pbs.twimg.com/' . str_replace('_bigger', '', $url);
return 'https://' . $url;
}
private function lookupUser(string $userName): ?object {
$document = $this->getDocument("/{$userName}");
$xpath = new DOMXpath($document);
$out = new stdClass;
$userNameElems = $xpath->query('//*[@class="profile-card-username"]');
$out->userName = $userNameElems->length < 1 ? '' : trim($userNameElems[0]->textContent);
$profileNameElems = $xpath->query('//*[@class="profile-card-fullname"]');
$out->profileName = $profileNameElems->length < 1 ? '' : trim($profileNameElems[0]->textContent);
$profileBioElems = $xpath->query('//*[@class="profile-bio"]/*');
$out->profileBio = $profileBioElems->length < 1 ? '' : trim($profileBioElems[0]->textContent);
$profilePictureElems = $xpath->query('//*[@class="profile-card-avatar"]');
$out->profilePicture = $profilePictureElems->length < 1 ? '' : $profilePictureElems[0]->getAttribute('href');
$out->profilePicture = self::convertNitterMediaURL($out->profilePicture);
return $out;
}
private function lookupTweet(string $tweetId): ?object {
$document = $this->getDocument("/i/status/{$tweetId}");
$xpath = new DOMXpath($document);
$out = new stdClass;
$tweetDateElems = $xpath->query('//*[@class="tweet-date"]/*');
$out->tweetId = $tweetDateElems->length < 1 ? '' : trim($tweetDateElems[0]->getAttribute('href'));
if($out->tweetId !== '') {
[,,,$out->tweetId] = explode('/', $out->tweetId);
[$out->tweetId] = explode('#', $out->tweetId);
}
$tweetTextElems = $xpath->query('//*[@class="tweet-content media-body"]');
$out->tweetText = $tweetTextElems->length < 1 ? '' : trim($tweetTextElems[0]->textContent);
$profileNameElems = $xpath->query('//*[@class="fullname"]');
$out->profileName = $profileNameElems->length < 1 ? '' : trim($profileNameElems[0]->textContent);
$profilePictureElems = $xpath->query('//*[@class="tweet-avatar"]/*');
$out->profilePicture = $profilePictureElems->length < 1 ? '' : $profilePictureElems[0]->getAttribute('src');
$out->profilePicture = self::convertNitterMediaURL($out->profilePicture);
return $out;
} }
public function lookup(Url $url): TwitterLookupResult { public function lookup(Url $url): TwitterLookupResult {

View file

@ -16,28 +16,28 @@ class TwitterLookupTweetResult extends TwitterLookupResult {
} }
public function getTwitterTweetId(): string { public function getTwitterTweetId(): string {
return $this->tweetInfo->data[0]->id; return $this->tweetInfo->tweetId;
} }
public function hasTitle(): bool { public function hasTitle(): bool {
return isset($this->tweetInfo->includes->users[0]->name); return $this->tweetInfo->profileName !== '';
} }
public function getTitle(): string { public function getTitle(): string {
return $this->tweetInfo->includes->users[0]->name; return $this->tweetInfo->profileName;
} }
public function hasDescription(): bool { public function hasDescription(): bool {
return isset($this->tweetInfo->data[0]->text); return $this->tweetInfo->tweetText !== '';
} }
public function getDescription(): string { public function getDescription(): string {
return $this->tweetInfo->data[0]->text; return $this->tweetInfo->tweetText;
} }
public function hasPreviewImage(): bool { public function hasPreviewImage(): bool {
return isset($this->tweetInfo->includes->users[0]->profile_image_url); return $this->tweetInfo->profilePicture !== '';
} }
public function getPreviewImage(): string { public function getPreviewImage(): string {
return $this->tweetInfo->includes->users[0]->profile_image_url; return $this->tweetInfo->profilePicture;
} }
public function getTwitterResult(): object { public function getTwitterResult(): object {

View file

@ -16,28 +16,28 @@ class TwitterLookupUserResult extends TwitterLookupResult {
} }
public function getTwitterUserName(): string { public function getTwitterUserName(): string {
return $this->userInfo->data[0]->username; return $this->userInfo->userName;
} }
public function hasTitle(): bool { public function hasTitle(): bool {
return isset($this->userInfo->data[0]->name); return $this->userInfo->profileName !== '';
} }
public function getTitle(): string { public function getTitle(): string {
return $this->userInfo->data[0]->name; return $this->userInfo->profileName;
} }
public function hasDescription(): bool { public function hasDescription(): bool {
return isset($this->userInfo->data[0]->description); return $this->userInfo->profileBio !== '';
} }
public function getDescription(): string { public function getDescription(): string {
return $this->userInfo->data[0]->description; return $this->userInfo->profileBio;
} }
public function hasPreviewImage(): bool { public function hasPreviewImage(): bool {
return isset($this->userInfo->data[0]->profile_image_url); return $this->userInfo->profilePicture !== '';
} }
public function getPreviewImage(): string { public function getPreviewImage(): string {
return $this->userInfo->data[0]->profile_image_url; return $this->userInfo->profilePicture;
} }
public function getTwitterResult(): object { public function getTwitterResult(): object {

View file

@ -12,13 +12,14 @@ define('UIH_DEBUG', is_file(UIH_ROOT . '/.debug'));
define('UIH_PUBLIC', UIH_ROOT . '/public'); define('UIH_PUBLIC', UIH_ROOT . '/public');
define('UIH_SOURCE', UIH_ROOT . '/src'); define('UIH_SOURCE', UIH_ROOT . '/src');
define('UIH_LIBRARY', UIH_ROOT . '/lib'); define('UIH_LIBRARY', UIH_ROOT . '/lib');
define('UIH_VERSION', '20230125'); define('UIH_VERSION', '20230609');
define('UIH_NDX_PATH', UIH_LIBRARY . '/index'); define('UIH_NDX_PATH', UIH_LIBRARY . '/index');
define('UIH_NDX_PATH_DEV', UIH_LIBRARY . '/index-dev'); define('UIH_NDX_PATH_DEV', UIH_LIBRARY . '/index-dev');
require_once (UIH_DEBUG && is_dir(UIH_NDX_PATH_DEV) ? UIH_NDX_PATH_DEV : UIH_NDX_PATH) . '/index.php'; require_once (UIH_DEBUG && is_dir(UIH_NDX_PATH_DEV) ? UIH_NDX_PATH_DEV : UIH_NDX_PATH) . '/index.php';
Autoloader::addNamespace('Masterminds', UIH_LIBRARY . '/html5-php/src');
Autoloader::addNamespace(__NAMESPACE__, UIH_SOURCE); Autoloader::addNamespace(__NAMESPACE__, UIH_SOURCE);
Environment::setDebug(UIH_DEBUG); Environment::setDebug(UIH_DEBUG);