136 lines
5.1 KiB
PHP
136 lines
5.1 KiB
PHP
<?php
|
|
namespace Uiharu\Lookup;
|
|
|
|
use stdClass;
|
|
use DOMDocument;
|
|
use DOMXpath;
|
|
use RuntimeException;
|
|
use Uiharu\Config;
|
|
use Uiharu\Url;
|
|
use Index\MediaType;
|
|
use Masterminds\HTML5;
|
|
|
|
final class TwitterLookup implements \Uiharu\ILookup {
|
|
private const TWITTER_DOMAINS = [
|
|
'twitter.com', 'www.twitter.com',
|
|
'm.twitter.com', 'mobile.twitter.com',
|
|
'nitter.net', 'www.nitter.net',
|
|
];
|
|
|
|
public function match(Url $url): bool {
|
|
if(!$url->isWeb() || !in_array(strtolower($url->getHost()), self::TWITTER_DOMAINS))
|
|
return false;
|
|
|
|
return preg_match('#^/@?(?:[A-Za-z0-9_]{1,20})/status(?:es)?/([0-9]+)/?$#', $url->getPath())
|
|
|| preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $url->getPath());
|
|
}
|
|
|
|
private function getString(string $path): string {
|
|
$curl = curl_init(Config::get('Nitter', 'endpoint') . $path);
|
|
curl_setopt_array($curl, [
|
|
CURLOPT_AUTOREFERER => false,
|
|
CURLOPT_CERTINFO => false,
|
|
CURLOPT_FAILONERROR => false,
|
|
CURLOPT_FOLLOWLOCATION => false,
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_TCP_FASTOPEN => true,
|
|
CURLOPT_CONNECTTIMEOUT => 2,
|
|
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
|
|
CURLOPT_TIMEOUT => 5,
|
|
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
|
|
]);
|
|
$resp = curl_exec($curl);
|
|
curl_close($curl);
|
|
return $resp;
|
|
}
|
|
|
|
private function getDocument(string $path): DOMDocument {
|
|
$string = $this->getString($path);
|
|
if(empty($string))
|
|
throw new RuntimeException('Failed to download Nitter page.');
|
|
|
|
return (new HTML5)->loadHTML($string);
|
|
}
|
|
|
|
private static function convertNitterMediaURL(string $path): string {
|
|
if($path === '')
|
|
return $path;
|
|
|
|
[,,$url] = explode('/', $path);
|
|
$url = rawurldecode($url);
|
|
|
|
if(!str_starts_with($url, 'pbs.twimg.com'))
|
|
$url = 'pbs.twimg.com/' . str_replace('_bigger', '', $url);
|
|
|
|
return 'https://' . $url;
|
|
}
|
|
|
|
private function lookupUser(string $userName): ?object {
|
|
$document = $this->getDocument("/{$userName}");
|
|
$xpath = new DOMXpath($document);
|
|
|
|
$out = new stdClass;
|
|
|
|
$userNameElems = $xpath->query('//*[@class="profile-card-username"]');
|
|
$out->userName = $userNameElems->length < 1 ? '' : trim($userNameElems[0]->textContent);
|
|
|
|
$profileNameElems = $xpath->query('//*[@class="profile-card-fullname"]');
|
|
$out->profileName = $profileNameElems->length < 1 ? '' : trim($profileNameElems[0]->textContent);
|
|
|
|
$profileBioElems = $xpath->query('//*[@class="profile-bio"]/*');
|
|
$out->profileBio = $profileBioElems->length < 1 ? '' : trim($profileBioElems[0]->textContent);
|
|
|
|
$profilePictureElems = $xpath->query('//*[@class="profile-card-avatar"]');
|
|
$out->profilePicture = $profilePictureElems->length < 1 ? '' : $profilePictureElems[0]->getAttribute('href');
|
|
$out->profilePicture = self::convertNitterMediaURL($out->profilePicture);
|
|
|
|
return $out;
|
|
}
|
|
|
|
private function lookupTweet(string $tweetId): ?object {
|
|
$document = $this->getDocument("/i/status/{$tweetId}");
|
|
$xpath = new DOMXpath($document);
|
|
|
|
$out = new stdClass;
|
|
|
|
$tweetDateElems = $xpath->query('//*[@class="tweet-date"]/*');
|
|
$out->tweetId = $tweetDateElems->length < 1 ? '' : trim($tweetDateElems[0]->getAttribute('href'));
|
|
if($out->tweetId !== '') {
|
|
[,,,$out->tweetId] = explode('/', $out->tweetId);
|
|
[$out->tweetId] = explode('#', $out->tweetId);
|
|
}
|
|
|
|
$tweetTextElems = $xpath->query('//*[@class="tweet-content media-body"]');
|
|
$out->tweetText = $tweetTextElems->length < 1 ? '' : trim($tweetTextElems[0]->textContent);
|
|
|
|
$profileNameElems = $xpath->query('//*[@class="fullname"]');
|
|
$out->profileName = $profileNameElems->length < 1 ? '' : trim($profileNameElems[0]->textContent);
|
|
|
|
$profilePictureElems = $xpath->query('//*[@class="tweet-avatar"]/*');
|
|
$out->profilePicture = $profilePictureElems->length < 1 ? '' : $profilePictureElems[0]->getAttribute('src');
|
|
$out->profilePicture = self::convertNitterMediaURL($out->profilePicture);
|
|
|
|
return $out;
|
|
}
|
|
|
|
public function lookup(Url $url): TwitterLookupResult {
|
|
if(preg_match('#^/@?(?:[A-Za-z0-9_]{1,20})/status(?:es)?/([0-9]+)/?$#', $url->getPath(), $matches)) {
|
|
$tweetId = strval($matches[1] ?? '0');
|
|
$tweetInfo = $this->lookupTweet($tweetId);
|
|
if($tweetInfo === null)
|
|
throw new RuntimeException('Tweet lookup failed.');
|
|
return new TwitterLookupTweetResult($url, $tweetInfo);
|
|
}
|
|
|
|
if(preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $url->getPath(), $matches)) {
|
|
$userName = strval($matches[1] ?? '');
|
|
$userInfo = $this->lookupUser($userName);
|
|
if($userInfo === null)
|
|
throw new RuntimeException('Twitter user lookup failed.');
|
|
return new TwitterLookupUserResult($url, $userInfo);
|
|
}
|
|
|
|
throw new RuntimeException('Unknown Twitter URL format.');
|
|
}
|
|
}
|