uiharu/src/Lookup/TwitterLookup.php

136 lines
5.1 KiB
PHP

<?php
namespace Uiharu\Lookup;
use stdClass;
use DOMDocument;
use DOMXpath;
use RuntimeException;
use Uiharu\Config;
use Uiharu\Url;
use Index\MediaType;
use Masterminds\HTML5;
final class TwitterLookup implements \Uiharu\ILookup {
private const TWITTER_DOMAINS = [
'twitter.com', 'www.twitter.com',
'm.twitter.com', 'mobile.twitter.com',
'nitter.net', 'www.nitter.net',
];
public function match(Url $url): bool {
if(!$url->isWeb() || !in_array(strtolower($url->getHost()), self::TWITTER_DOMAINS))
return false;
return preg_match('#^/@?(?:[A-Za-z0-9_]{1,20})/status(?:es)?/([0-9]+)/?$#', $url->getPath())
|| preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $url->getPath());
}
private function getString(string $path): string {
$curl = curl_init(Config::get('Nitter', 'endpoint') . $path);
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
]);
$resp = curl_exec($curl);
curl_close($curl);
return $resp;
}
private function getDocument(string $path): DOMDocument {
$string = $this->getString($path);
if(empty($string))
throw new RuntimeException('Failed to download Nitter page.');
return (new HTML5)->loadHTML($string);
}
private static function convertNitterMediaURL(string $path): string {
if($path === '')
return $path;
[,,$url] = explode('/', $path);
$url = rawurldecode($url);
if(!str_starts_with($url, 'pbs.twimg.com'))
$url = 'pbs.twimg.com/' . str_replace('_bigger', '', $url);
return 'https://' . $url;
}
private function lookupUser(string $userName): ?object {
$document = $this->getDocument("/{$userName}");
$xpath = new DOMXpath($document);
$out = new stdClass;
$userNameElems = $xpath->query('//*[@class="profile-card-username"]');
$out->userName = $userNameElems->length < 1 ? '' : trim($userNameElems[0]->textContent);
$profileNameElems = $xpath->query('//*[@class="profile-card-fullname"]');
$out->profileName = $profileNameElems->length < 1 ? '' : trim($profileNameElems[0]->textContent);
$profileBioElems = $xpath->query('//*[@class="profile-bio"]/*');
$out->profileBio = $profileBioElems->length < 1 ? '' : trim($profileBioElems[0]->textContent);
$profilePictureElems = $xpath->query('//*[@class="profile-card-avatar"]');
$out->profilePicture = $profilePictureElems->length < 1 ? '' : $profilePictureElems[0]->getAttribute('href');
$out->profilePicture = self::convertNitterMediaURL($out->profilePicture);
return $out;
}
private function lookupTweet(string $tweetId): ?object {
$document = $this->getDocument("/i/status/{$tweetId}");
$xpath = new DOMXpath($document);
$out = new stdClass;
$tweetDateElems = $xpath->query('//*[@class="tweet-date"]/*');
$out->tweetId = $tweetDateElems->length < 1 ? '' : trim($tweetDateElems[0]->getAttribute('href'));
if($out->tweetId !== '') {
[,,,$out->tweetId] = explode('/', $out->tweetId);
[$out->tweetId] = explode('#', $out->tweetId);
}
$tweetTextElems = $xpath->query('//*[@class="tweet-content media-body"]');
$out->tweetText = $tweetTextElems->length < 1 ? '' : trim($tweetTextElems[0]->textContent);
$profileNameElems = $xpath->query('//*[@class="fullname"]');
$out->profileName = $profileNameElems->length < 1 ? '' : trim($profileNameElems[0]->textContent);
$profilePictureElems = $xpath->query('//*[@class="tweet-avatar"]/*');
$out->profilePicture = $profilePictureElems->length < 1 ? '' : $profilePictureElems[0]->getAttribute('src');
$out->profilePicture = self::convertNitterMediaURL($out->profilePicture);
return $out;
}
public function lookup(Url $url): TwitterLookupResult {
if(preg_match('#^/@?(?:[A-Za-z0-9_]{1,20})/status(?:es)?/([0-9]+)/?$#', $url->getPath(), $matches)) {
$tweetId = strval($matches[1] ?? '0');
$tweetInfo = $this->lookupTweet($tweetId);
if($tweetInfo === null)
throw new RuntimeException('Tweet lookup failed.');
return new TwitterLookupTweetResult($url, $tweetInfo);
}
if(preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $url->getPath(), $matches)) {
$userName = strval($matches[1] ?? '');
$userInfo = $this->lookupUser($userName);
if($userInfo === null)
throw new RuntimeException('Twitter user lookup failed.');
return new TwitterLookupUserResult($url, $userInfo);
}
throw new RuntimeException('Unknown Twitter URL format.');
}
}