Created structure for additional API version handlers.

This commit is contained in:
flash 2022-07-15 22:20:20 +00:00
parent 9948642a5a
commit 7160e9909d
5 changed files with 651 additions and 566 deletions

View file

@ -1,574 +1,14 @@
<?php
namespace Uiharu;
use stdClass;
use InvalidArgumentException;
use Index\Http\HttpFx;
use Index\MediaType;
use Index\Performance\Stopwatch;
require_once __DIR__ . '/../uiharu.php';
function uih_origin_allowed(string $origin): bool {
$origin = mb_strtolower(parse_url($origin, PHP_URL_HOST));
if($origin === $_SERVER['HTTP_HOST'])
return true;
$allowed = Config::get('CORS', 'origins', []);
if(empty($allowed))
return true;
return in_array($origin, $allowed);
}
function uih_eeprom_lookup(stdClass $resp, string $eepromFileId, string $domain = 'flashii'): void {
$resp->type = 'eeprom:file';
$resp->color = '#8559a5';
$resp->eeprom_file_id = $eepromFileId;
$curl = curl_init("https://eeprom.{$domain}.net/uploads/{$resp->eeprom_file_id}.json");
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Accept: application/json',
],
]);
$eepromResp = curl_exec($curl);
curl_close($curl);
$resp->eeprom_file_info = json_decode($eepromResp);
if(isset($resp->eeprom_file_info->name))
$resp->title = $resp->eeprom_file_info->name;
if(isset($resp->eeprom_file_info->thumb))
$resp->image = $resp->eeprom_file_info->thumb;
$resp->site_name = 'Flashii EEPROM';
}
// should be in a cron job
$db->execute('DELETE FROM `uih_metadata_cache` WHERE `metadata_created` < NOW() - INTERVAL 7 DAY');
/*apis = [
new \Uiharu\APIs\v1_0,
];*/
$ctx->setupHttp();
$router = new HttpFx;
$router->use('/', function($response) {
$response->setPoweredBy('Uiharu');
});
$ctx->registerApi(new \Uiharu\Apis\v1_0($ctx));
$ctx->matchApi(filter_input(INPUT_SERVER, 'REQUEST_URI'));
$router->use('/', function($response, $request) {
$origin = $request->getHeaderLine('Origin');
if(!empty($origin)) {
if(!uih_origin_allowed($origin))
return 403;
$response->setHeader('Access-Control-Allow-Origin', $origin);
$response->setHeader('Vary', 'Origin');
}
});
$router->use('/', function($response, $request) {
if($request->getMethod() === 'OPTIONS') {
$response->setHeader('Access-Control-Allow-Methods', 'OPTIONS, GET, POST');
return 204;
}
});
$router->get('/', function($response) {
$response->accelRedirect('/index.html');
$response->setContentType('text/html; charset=utf-8');
});
$metaDataHandlerV1 = function($response, $request) use ($db) {
$response->setContentType('application/json; charset=utf-8');
if($request->getMethod() === 'HEAD')
return;
$sw = Stopwatch::startNew();
$resp = new stdClass;
if($request->getMethod() === 'POST') {
if(!$request->isStreamContent()) {
$response->setStatusCode(400);
return $resp;
}
$targetUrl = $request->getContent()->getStream()->read(1000);
} else {
$targetUrl = (string)$request->getParam('url');
}
if(empty($targetUrl)) {
$response->setStatusCode(400);
return $resp;
}
try {
$parsedUrl = Url::parse($targetUrl);
} catch(InvalidArgumentException $ex) {
$response->setStatusCode(400);
$resp->error = 'metadata:uri';
return $resp;
}
// if no scheme is specified, try https
if(!$parsedUrl->hasScheme())
$parsedUrl->setScheme('https');
$resp->uri = $parsedUrl->toV1();
$urlHash = $parsedUrl->calculateHash(false);
$enableCache = !UIH_DEBUG || $request->hasParam('_cache');
$includeRawResult = UIH_DEBUG || $request->hasParam('include_raw');
if($enableCache) {
$cacheFetch = $db->prepare('SELECT `metadata_resp` FROM `uih_metadata_cache` WHERE `metadata_url` = UNHEX(?) AND `metadata_created` > NOW() - INTERVAL 10 MINUTE');
$cacheFetch->addParameter(1, $urlHash);
$cacheFetch->execute();
$cacheResult = $cacheFetch->getResult();
if($cacheResult->next()) {
$cacheResp = json_decode($cacheResult->getString(0));
if($cacheResp !== null)
$resp = $cacheResp;
}
}
if(empty($resp->type)) {
$urlScheme = strtolower($parsedUrl->getScheme());
$urlHost = strtolower($parsedUrl->getHost());
$urlPath = '/' . trim($parsedUrl->getPath(), '/');
if($urlScheme === 'eeprom') {
if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl->getPath(), $matches)) {
$parsedUrl = Url::parse('https://i.fii.moe/' . $matches[1]);
$resp->uri = $parsedUrl->toV1();
$continueRaw = true;
uih_eeprom_lookup($resp, $matches[1]);
}
} elseif($urlScheme === 'devrom') {
if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl->getPath(), $matches)) {
$parsedUrl = Url::parse('https://i.edgii.net/' . $matches[1]);
$resp->uri = $parsedUrl->toV1();
$continueRaw = true;
uih_eeprom_lookup($resp, $matches[1], 'edgii');
}
} elseif($urlScheme === 'http' || $urlScheme === 'https') {
switch($urlHost) {
case 'i.flashii.net':
case 'i.fii.moe':
$eepromFileId = substr($urlPath, 1);
case 'eeprom.flashii.net':
if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches))
$eepromFileId = $matches[1];
if(!empty($eepromFileId)) {
$continueRaw = true;
uih_eeprom_lookup($resp, $eepromFileId);
}
break;
case 'i.edgii.net':
$eepromFileId = substr($urlPath, 1);
case 'eeprom.edgii.net':
if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches))
$eepromFileId = $matches[1];
if(!empty($eepromFileId)) {
$continueRaw = true;
uih_eeprom_lookup($resp, $eepromFileId, 'edgii');
}
break;
case 'twitter.com': case 'www.twitter.com':
case 'm.twitter.com': case 'mobile.twitter.com':
case 'nitter.net': case 'www.nitter.net':
if(preg_match('#^/@?(?:[A-Za-z0-9_]{1,20})/status(?:es)?/([0-9]+)/?$#', $urlPath, $matches)) {
$resp->type = 'twitter:tweet';
$resp->color = '#1da1f2';
$resp->tweet_id = strval($matches[1] ?? '0');
$curl = curl_init("https://api.twitter.com/2/tweets?ids={$resp->tweet_id}&expansions=attachments.media_keys,author_id,entities.mentions.username,referenced_tweets.id,referenced_tweets.id.author_id&media.fields=height,width,media_key,preview_image_url,url,type&tweet.fields=attachments,conversation_id,text,source,possibly_sensitive,created_at&user.fields=id,name,profile_image_url,protected,username,verified");
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'),
'Accept: application/json',
],
]);
$tweetResp = curl_exec($curl);
curl_close($curl);
$resp->tweet_info = json_decode($tweetResp);
if(isset($resp->tweet_info->includes->users[0]->name))
$resp->title = $resp->tweet_info->includes->users[0]->name;
if(isset($resp->tweet_info->includes->users[0]->profile_image_url))
$resp->image = $resp->tweet_info->includes->users[0]->profile_image_url;
if(isset($resp->tweet_info->data[0]->text))
$resp->description = $resp->tweet_info->data[0]->text;
$resp->site_name = 'Twitter';
break;
}
if(preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $urlPath, $matches)) {
$resp->type = 'twitter:user';
$resp->color = '#1da1f2';
$resp->twitter_user_name = strval($matches[1] ?? '');
$curl = curl_init("https://api.twitter.com/2/users/by?usernames={$resp->twitter_user_name}&user.fields=description,entities,id,name,profile_image_url,protected,url,username,verified");
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'),
'Accept: application/json',
],
]);
$twitUserResp = curl_exec($curl);
curl_close($curl);
$resp->twitter_user_info = json_decode($twitUserResp);
if(isset($resp->twitter_user_info->data[0]->name))
$resp->title = $resp->twitter_user_info->data[0]->name;
if(isset($resp->twitter_user_info->data[0]->profile_image_url))
$resp->image = $resp->twitter_user_info->data[0]->profile_image_url;
if(isset($resp->twitter_user_info->data[0]->description))
$resp->description = $resp->twitter_user_info->data[0]->description;
$resp->site_name = 'Twitter';
break;
}
break;
case 'youtu.be': case 'www.youtu.be': // www. doesn't work for this, but may as well cover it
$youtubeVideoId = substr($urlPath, 1);
case 'youtube.com': case 'www.youtube.com':
case 'youtube-nocookie.com': case 'www.youtube-nocookie.com':
parse_str($parsedUrl->getQuery(), $queryString);
if(!isset($youtubeVideoId) && $urlPath === '/watch')
$youtubeVideoId = $queryString['v'] ?? null;
if(!empty($youtubeVideoId)) {
$resp->type = 'youtube:video';
$resp->color = '#f00';
$resp->youtube_video_id = $youtubeVideoId;
if(isset($queryString['t']))
$resp->youtube_start_time = $queryString['t'];
if(isset($queryString['list']))
$resp->youtube_playlist = $queryString['list'];
if(isset($queryString['index']))
$resp->youtube_playlist_index = $queryString['index'];
$curl = curl_init("https://www.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id={$resp->youtube_video_id}&key=" . Config::get('Google', 'apiKey'));
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Accept: application/json',
],
]);
$youtubeResp = curl_exec($curl);
curl_close($curl);
$resp->youtube_video_info = json_decode($youtubeResp);
if(isset($resp->youtube_video_info->items[0]->snippet->title))
$resp->title = $resp->youtube_video_info->items[0]->snippet->title;
if(isset($resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url))
$resp->image = $resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url;
if(isset($resp->youtube_video_info->items[0]->snippet->description))
$resp->description = $resp->youtube_video_info->items[0]->snippet->description;
$resp->site_name = 'YouTube';
}
break;
}
} else {
$resp->error = 'metadata:scheme';
$response->setStatusCode(400);
return $resp;
}
if((empty($resp->type) || isset($continueRaw)) && in_array($parsedUrl->getScheme(), ['http', 'https'])) {
$curl = curl_init((string)$parsedUrl);
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => true,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 5,
CURLOPT_PATH_AS_IS => true,
CURLOPT_NOBODY => true,
CURLOPT_HEADER => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_DEFAULT_PROTOCOL => 'https',
CURLOPT_USERAGENT => 'Mozilla/5.0 (compatible) Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Accept: text/html,application/xhtml+xml',
],
]);
$headers = curl_exec($curl);
if($headers === false) {
$resp->error = 'metadata:timeout';
$resp->errorMessage = curl_error($curl);
} else {
$headersRaw = explode("\r\n", trim($headers));
$statusCode = 200;
$headers = [];
foreach($headersRaw as $header) {
if(empty($header))
continue;
if(strpos($header, ':') === false) {
$headParts = explode(' ', $header);
if(isset($headParts[1]) && is_numeric($headParts[1]))
$statusCode = (int)$headParts[1];
$headers = [];
continue;
}
$headerParts = explode(':', $header, 2);
$headerParts[0] = mb_strtolower($headerParts[0]);
if(isset($headers[$headerParts[0]]))
$headers[$headerParts[0]] .= ', ' . trim($headerParts[1] ?? '');
else
$headers[$headerParts[0]] = trim($headerParts[1] ?? '');
}
try {
$contentType = MediaType::parse($headers['content-type'] ?? '');
} catch(InvalidArgumentException $ex) {
$contentType = MediaType::parse('application/octet-stream');
}
$resp->content_type = MediaTypeExts::toV1($contentType);
$isHTML = $contentType->equals('text/html');
$isXHTML = $contentType->equals('application/xhtml+xml');
if($isHTML || $isXHTML) {
curl_setopt_array($curl, [
CURLOPT_NOBODY => false,
CURLOPT_HEADER => false,
]);
$body = curl_exec($curl);
curl_close($curl);
$document = new DOMDocument;
if($isXHTML) {
$document->loadXML($body);
} else {
@$document->loadHTML('<?xml encoding="' . $contentType->getCharset() . '">' . $body);
foreach($document->childNodes as $child)
if($child->nodeType === XML_PI_NODE) {
$document->removeChild($child);
break;
}
$document->encoding = $contentType->getCharset();
}
$charSet = $document->encoding;
$resp->type = 'website';
$resp->title = '';
$isMetaTitle = false;
$titleTag = $document->getElementsByTagName('title');
foreach($titleTag as $tag) {
$resp->title = trim(mb_convert_encoding($tag->textContent, 'utf-8', $charSet));
break;
}
$metaTags = $document->getElementsByTagName('meta');
foreach($metaTags as $tag) {
$nameAttr = $tag->hasAttribute('name') ? $tag->getAttribute('name') : (
$tag->hasAttribute('property') ? $tag->getAttribute('property') : ''
);
$valueAttr = $tag->hasAttribute('value') ? $tag->getAttribute('value') : (
$tag->hasAttribute('content') ? $tag->getAttribute('content') : ''
);
$nameAttr = trim(mb_convert_encoding($nameAttr, 'utf-8', $charSet));
$valueAttr = trim(mb_convert_encoding($valueAttr, 'utf-8', $charSet));
if(empty($nameAttr) || empty($valueAttr))
continue;
switch($nameAttr) {
case 'og:title':
case 'twitter:title':
if(!$isMetaTitle) {
$isMetaTitle = true;
$resp->title = $valueAttr;
}
break;
case 'description':
case 'og:description':
case 'twitter:description':
if(!isset($resp->description))
$resp->description = $valueAttr;
break;
case 'og:site_name':
$resp->site_name = $valueAttr;
break;
case 'og:image':
case 'twitter:image':
$resp->image = $valueAttr;
break;
case 'theme-color':
$resp->color = $valueAttr;
break;
case 'og:type':
$resp->type = $valueAttr;
break;
}
}
} else {
$resp->is_image = $isImage = $contentType->matchCategory('image');
$resp->is_audio = $isAudio = $contentType->matchCategory('audio');
$resp->is_video = $isVideo = $contentType->matchCategory('video');
if($isImage || $isAudio || $isVideo) {
curl_close($curl);
$resp->media = new stdClass;
$ffmpeg = json_decode(shell_exec(sprintf('ffprobe -show_streams -show_format -print_format json -v quiet -i %s', escapeshellarg((string)$parsedUrl))));
if(!empty($ffmpeg)) {
if(!empty($ffmpeg->format)) {
$resp->media->confidence = empty($ffmpeg->format->probe_score) ? 0 : (intval($ffmpeg->format->probe_score) / 100);
if(!empty($ffmpeg->format->duration))
$resp->media->duration = floatval($ffmpeg->format->duration);
if(!empty($ffmpeg->format->size))
$resp->media->size = intval($ffmpeg->format->size);
if(!empty($ffmpeg->format->bit_rate))
$resp->media->bitrate = intval($ffmpeg->format->bit_rate);
if($isVideo || $isImage) {
if(!empty($ffmpeg->streams)) {
foreach($ffmpeg->streams as $stream) {
if(($stream->codec_type ?? null) !== 'video')
continue;
$resp->width = intval($stream->coded_width ?? $stream->width ?? -1);
$resp->height = intval($stream->coded_height ?? $stream->height ?? -1);
if(!empty($stream->display_aspect_ratio))
$resp->media->aspect_ratio = $stream->display_aspect_ratio;
if($isImage)
break;
}
}
}
if($isAudio) {
function eat_tags(stdClass $dest, stdClass $source): void {
if(!empty($source->title) || !empty($source->TITLE))
$dest->title = $source->title ?? $source->TITLE;
if(!empty($source->artist) || !empty($source->ARTIST))
$dest->artist = $source->artist ?? $source->ARTIST;
if(!empty($source->album) || !empty($source->ALBUM))
$dest->album = $source->album ?? $source->ALBUM;
if(!empty($source->date) || !empty($source->DATE))
$dest->date = $source->date ?? $source->DATE;
if(!empty($source->comment) || !empty($source->COMMENT))
$dest->comment = $source->comment ?? $source->COMMENT;
if(!empty($source->genre) || !empty($source->GENRE))
$dest->genre = $source->genre ?? $source->GENRE;
}
if(!empty($ffmpeg->format->tags)) {
$resp->media->tags = new stdClass;
eat_tags($resp->media->tags, $ffmpeg->format->tags);
} elseif(!empty($ffmpeg->streams)) {
// iterate over streams, fuck ogg
$resp->media->tags = new stdClass;
foreach($ffmpeg->streams as $stream) {
if(($stream->codec_type ?? null) === 'audio' && !empty($stream->tags)) {
eat_tags($resp->media->tags, $stream->tags);
if(!empty($resp->media->tags))
break;
}
}
}
if(empty($resp->title)) {
$audioTitle = '';
if(!empty($resp->media->tags->artist))
$audioTitle .= $resp->media->tags->artist . ' - ';
if(!empty($resp->media->tags->title))
$audioTitle .= $resp->media->tags->title;
if(!empty($resp->media->tags->date))
$audioTitle .= ' (' . $resp->media->tags->date . ')';
if(!empty($audioTitle))
$resp->title = $audioTitle;
}
if(empty($resp->description) && !empty($resp->media->tags->comment))
$resp->description = $resp->media->tags->comment;
}
}
}
if($includeRawResult)
$resp->ffmpeg = $ffmpeg;
} else curl_close($curl);
}
}
}
$sw->stop();
$resp->took = $sw->getElapsedTime() / 1000;
$respJson = json_encode($resp);
$replaceCache = $db->prepare('REPLACE INTO `uih_metadata_cache` (`metadata_url`, `metadata_resp`) VALUES (UNHEX(?), ?)');
$replaceCache->addParameter(1, $urlHash);
$replaceCache->addParameter(2, $respJson);
$replaceCache->execute();
}
if(!empty($respJson))
$response->setContent($respJson);
else
return $resp;
};
// Allow using POST for ridiculous urls.
$router->get('/metadata', $metaDataHandlerV1);
$router->post('/metadata', $metaDataHandlerV1);
$router->dispatch();
$ctx->dispatchHttp();

550
src/Apis/v1_0.php Normal file
View file

@ -0,0 +1,550 @@
<?php
namespace Uiharu\APIs;
use stdClass;
use DOMDocument;
use InvalidArgumentException;
use Uiharu\Config;
use Uiharu\MediaTypeExts;
use Uiharu\UihContext;
use Uiharu\Url;
use Index\MediaType;
use Index\Data\IDbConnection;
use Index\Http\HttpFx;
use Index\Performance\Stopwatch;
final class v1_0 implements \Uiharu\IApi {
private IDbConnection $db;
public function __construct(UihContext $ctx) {
$this->db = $ctx->getDatabase();
}
public function match(string $url): string {
return !str_starts_with($url, '/v');
}
public function register(HttpFx $router): void {
$router->get('/metadata', [$this, 'handleGET']);
$router->post('/metadata', [$this, 'handlePOST']);
}
public function eepromLookup(stdClass $resp, string $eepromFileId, string $domain = 'flashii'): void {
$resp->type = 'eeprom:file';
$resp->color = '#8559a5';
$resp->eeprom_file_id = $eepromFileId;
$curl = curl_init("https://eeprom.{$domain}.net/uploads/{$resp->eeprom_file_id}.json");
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Accept: application/json',
],
]);
$eepromResp = curl_exec($curl);
curl_close($curl);
$resp->eeprom_file_info = json_decode($eepromResp);
if(isset($resp->eeprom_file_info->name))
$resp->title = $resp->eeprom_file_info->name;
if(isset($resp->eeprom_file_info->thumb))
$resp->image = $resp->eeprom_file_info->thumb;
$resp->site_name = 'Flashii EEPROM';
}
public function handleGET($response, $request) {
if($request->getMethod() === 'HEAD') {
$response->setTypeJson();
return;
}
return $this->handler(
$response, $request,
(string)$request->getParam('url')
);
}
public function handlePOST($response, $request) {
if(!$request->isStreamContent())
return 400;
return $this->handler(
$response, $request,
$request->getContent()->getStream()->read(1000)
);
}
private function handler($response, $request, string $targetUrl) {
$sw = Stopwatch::startNew();
$resp = new stdClass;
$response->setTypeJson();
if(empty($targetUrl)) {
$response->setStatusCode(400);
return $resp;
}
try {
$parsedUrl = Url::parse($targetUrl);
} catch(InvalidArgumentException $ex) {
$response->setStatusCode(400);
$resp->error = 'metadata:uri';
return $resp;
}
// if no scheme is specified, try https
if(!$parsedUrl->hasScheme())
$parsedUrl->setScheme('https');
$resp->uri = $parsedUrl->toV1();
$urlHash = $parsedUrl->calculateHash(false);
$enableCache = !UIH_DEBUG || $request->hasParam('_cache');
$includeRawResult = UIH_DEBUG || $request->hasParam('include_raw');
if($enableCache) {
$cacheFetch = $this->db->prepare('SELECT `metadata_resp` FROM `uih_metadata_cache` WHERE `metadata_url` = UNHEX(?) AND `metadata_created` > NOW() - INTERVAL 10 MINUTE');
$cacheFetch->addParameter(1, $urlHash);
$cacheFetch->execute();
$cacheResult = $cacheFetch->getResult();
if($cacheResult->next()) {
$cacheResp = json_decode($cacheResult->getString(0));
if($cacheResp !== null)
$resp = $cacheResp;
}
}
if(empty($resp->type)) {
$urlScheme = strtolower($parsedUrl->getScheme());
$urlHost = strtolower($parsedUrl->getHost());
$urlPath = '/' . trim($parsedUrl->getPath(), '/');
if($urlScheme === 'eeprom') {
if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl->getPath(), $matches)) {
$parsedUrl = Url::parse('https://i.fii.moe/' . $matches[1]);
$resp->uri = $parsedUrl->toV1();
$continueRaw = true;
$this->eepromLookup($resp, $matches[1]);
}
} elseif($urlScheme === 'devrom') {
if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl->getPath(), $matches)) {
$parsedUrl = Url::parse('https://i.edgii.net/' . $matches[1]);
$resp->uri = $parsedUrl->toV1();
$continueRaw = true;
$this->eepromLookup($resp, $matches[1], 'edgii');
}
} elseif($urlScheme === 'http' || $urlScheme === 'https') {
switch($urlHost) {
case 'i.flashii.net':
case 'i.fii.moe':
$eepromFileId = substr($urlPath, 1);
case 'eeprom.flashii.net':
if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches))
$eepromFileId = $matches[1];
if(!empty($eepromFileId)) {
$continueRaw = true;
$this->eepromLookup($resp, $eepromFileId);
}
break;
case 'i.edgii.net':
$eepromFileId = substr($urlPath, 1);
case 'eeprom.edgii.net':
if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches))
$eepromFileId = $matches[1];
if(!empty($eepromFileId)) {
$continueRaw = true;
$this->eepromLookup($resp, $eepromFileId, 'edgii');
}
break;
case 'twitter.com': case 'www.twitter.com':
case 'm.twitter.com': case 'mobile.twitter.com':
case 'nitter.net': case 'www.nitter.net':
if(preg_match('#^/@?(?:[A-Za-z0-9_]{1,20})/status(?:es)?/([0-9]+)/?$#', $urlPath, $matches)) {
$resp->type = 'twitter:tweet';
$resp->color = '#1da1f2';
$resp->tweet_id = strval($matches[1] ?? '0');
$curl = curl_init("https://api.twitter.com/2/tweets?ids={$resp->tweet_id}&expansions=attachments.media_keys,author_id,entities.mentions.username,referenced_tweets.id,referenced_tweets.id.author_id&media.fields=height,width,media_key,preview_image_url,url,type&tweet.fields=attachments,conversation_id,text,source,possibly_sensitive,created_at&user.fields=id,name,profile_image_url,protected,username,verified");
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'),
'Accept: application/json',
],
]);
$tweetResp = curl_exec($curl);
curl_close($curl);
$resp->tweet_info = json_decode($tweetResp);
if(isset($resp->tweet_info->includes->users[0]->name))
$resp->title = $resp->tweet_info->includes->users[0]->name;
if(isset($resp->tweet_info->includes->users[0]->profile_image_url))
$resp->image = $resp->tweet_info->includes->users[0]->profile_image_url;
if(isset($resp->tweet_info->data[0]->text))
$resp->description = $resp->tweet_info->data[0]->text;
$resp->site_name = 'Twitter';
break;
}
if(preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $urlPath, $matches)) {
$resp->type = 'twitter:user';
$resp->color = '#1da1f2';
$resp->twitter_user_name = strval($matches[1] ?? '');
$curl = curl_init("https://api.twitter.com/2/users/by?usernames={$resp->twitter_user_name}&user.fields=description,entities,id,name,profile_image_url,protected,url,username,verified");
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'),
'Accept: application/json',
],
]);
$twitUserResp = curl_exec($curl);
curl_close($curl);
$resp->twitter_user_info = json_decode($twitUserResp);
if(isset($resp->twitter_user_info->data[0]->name))
$resp->title = $resp->twitter_user_info->data[0]->name;
if(isset($resp->twitter_user_info->data[0]->profile_image_url))
$resp->image = $resp->twitter_user_info->data[0]->profile_image_url;
if(isset($resp->twitter_user_info->data[0]->description))
$resp->description = $resp->twitter_user_info->data[0]->description;
$resp->site_name = 'Twitter';
break;
}
break;
case 'youtu.be': case 'www.youtu.be': // www. doesn't work for this, but may as well cover it
$youtubeVideoId = substr($urlPath, 1);
case 'youtube.com': case 'www.youtube.com':
case 'youtube-nocookie.com': case 'www.youtube-nocookie.com':
parse_str($parsedUrl->getQuery(), $queryString);
if(!isset($youtubeVideoId) && $urlPath === '/watch')
$youtubeVideoId = $queryString['v'] ?? null;
if(!empty($youtubeVideoId)) {
$resp->type = 'youtube:video';
$resp->color = '#f00';
$resp->youtube_video_id = $youtubeVideoId;
if(isset($queryString['t']))
$resp->youtube_start_time = $queryString['t'];
if(isset($queryString['list']))
$resp->youtube_playlist = $queryString['list'];
if(isset($queryString['index']))
$resp->youtube_playlist_index = $queryString['index'];
$curl = curl_init("https://www.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id={$resp->youtube_video_id}&key=" . Config::get('Google', 'apiKey'));
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => false,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => false,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Accept: application/json',
],
]);
$youtubeResp = curl_exec($curl);
curl_close($curl);
$resp->youtube_video_info = json_decode($youtubeResp);
if(isset($resp->youtube_video_info->items[0]->snippet->title))
$resp->title = $resp->youtube_video_info->items[0]->snippet->title;
if(isset($resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url))
$resp->image = $resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url;
if(isset($resp->youtube_video_info->items[0]->snippet->description))
$resp->description = $resp->youtube_video_info->items[0]->snippet->description;
$resp->site_name = 'YouTube';
}
break;
}
} else {
$resp->error = 'metadata:scheme';
$response->setStatusCode(400);
return $resp;
}
if((empty($resp->type) || isset($continueRaw)) && in_array($parsedUrl->getScheme(), ['http', 'https'])) {
$curl = curl_init((string)$parsedUrl);
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => true,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 5,
CURLOPT_PATH_AS_IS => true,
CURLOPT_NOBODY => true,
CURLOPT_HEADER => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_DEFAULT_PROTOCOL => 'https',
CURLOPT_USERAGENT => 'Mozilla/5.0 (compatible) Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Accept: text/html,application/xhtml+xml',
],
]);
$headers = curl_exec($curl);
if($headers === false) {
$resp->error = 'metadata:timeout';
$resp->errorMessage = curl_error($curl);
} else {
$headersRaw = explode("\r\n", trim($headers));
$statusCode = 200;
$headers = [];
foreach($headersRaw as $header) {
if(empty($header))
continue;
if(strpos($header, ':') === false) {
$headParts = explode(' ', $header);
if(isset($headParts[1]) && is_numeric($headParts[1]))
$statusCode = (int)$headParts[1];
$headers = [];
continue;
}
$headerParts = explode(':', $header, 2);
$headerParts[0] = mb_strtolower($headerParts[0]);
if(isset($headers[$headerParts[0]]))
$headers[$headerParts[0]] .= ', ' . trim($headerParts[1] ?? '');
else
$headers[$headerParts[0]] = trim($headerParts[1] ?? '');
}
try {
$contentType = MediaType::parse($headers['content-type'] ?? '');
} catch(InvalidArgumentException $ex) {
$contentType = MediaType::parse('application/octet-stream');
}
$resp->content_type = MediaTypeExts::toV1($contentType);
$isHTML = $contentType->equals('text/html');
$isXHTML = $contentType->equals('application/xhtml+xml');
if($isHTML || $isXHTML) {
curl_setopt_array($curl, [
CURLOPT_NOBODY => false,
CURLOPT_HEADER => false,
]);
$body = curl_exec($curl);
curl_close($curl);
$document = new DOMDocument;
if($isXHTML) {
$document->loadXML($body, LIBXML_NOERROR | LIBXML_NONET | LIBXML_NOWARNING);
} else {
$document->loadHTML($body, LIBXML_NOERROR | LIBXML_NONET | LIBXML_NOWARNING);
foreach($document->childNodes as $child)
if($child->nodeType === XML_PI_NODE) {
$document->removeChild($child);
break;
}
$document->encoding = $contentType->getCharset();
}
$charSet = $document->encoding;
$resp->type = 'website';
$resp->title = '';
$isMetaTitle = false;
$titleTag = $document->getElementsByTagName('title');
foreach($titleTag as $tag) {
$resp->title = trim(mb_convert_encoding($tag->textContent, 'utf-8', $charSet));
break;
}
$metaTags = $document->getElementsByTagName('meta');
foreach($metaTags as $tag) {
$nameAttr = $tag->hasAttribute('name') ? $tag->getAttribute('name') : (
$tag->hasAttribute('property') ? $tag->getAttribute('property') : ''
);
$valueAttr = $tag->hasAttribute('value') ? $tag->getAttribute('value') : (
$tag->hasAttribute('content') ? $tag->getAttribute('content') : ''
);
$nameAttr = trim(mb_convert_encoding($nameAttr, 'utf-8', $charSet));
$valueAttr = trim(mb_convert_encoding($valueAttr, 'utf-8', $charSet));
if(empty($nameAttr) || empty($valueAttr))
continue;
switch($nameAttr) {
case 'og:title':
case 'twitter:title':
if(!$isMetaTitle) {
$isMetaTitle = true;
$resp->title = $valueAttr;
}
break;
case 'description':
case 'og:description':
case 'twitter:description':
if(!isset($resp->description))
$resp->description = $valueAttr;
break;
case 'og:site_name':
$resp->site_name = $valueAttr;
break;
case 'og:image':
case 'twitter:image':
$resp->image = $valueAttr;
break;
case 'theme-color':
$resp->color = $valueAttr;
break;
case 'og:type':
$resp->type = $valueAttr;
break;
}
}
} else {
$resp->is_image = $isImage = $contentType->matchCategory('image');
$resp->is_audio = $isAudio = $contentType->matchCategory('audio');
$resp->is_video = $isVideo = $contentType->matchCategory('video');
if($isImage || $isAudio || $isVideo) {
curl_close($curl);
$resp->media = new stdClass;
$ffmpeg = json_decode(shell_exec(sprintf('ffprobe -show_streams -show_format -print_format json -v quiet -i %s', escapeshellarg((string)$parsedUrl))));
if(!empty($ffmpeg)) {
if(!empty($ffmpeg->format)) {
$resp->media->confidence = empty($ffmpeg->format->probe_score) ? 0 : (intval($ffmpeg->format->probe_score) / 100);
if(!empty($ffmpeg->format->duration))
$resp->media->duration = floatval($ffmpeg->format->duration);
if(!empty($ffmpeg->format->size))
$resp->media->size = intval($ffmpeg->format->size);
if(!empty($ffmpeg->format->bit_rate))
$resp->media->bitrate = intval($ffmpeg->format->bit_rate);
if($isVideo || $isImage) {
if(!empty($ffmpeg->streams)) {
foreach($ffmpeg->streams as $stream) {
if(($stream->codec_type ?? null) !== 'video')
continue;
$resp->width = intval($stream->coded_width ?? $stream->width ?? -1);
$resp->height = intval($stream->coded_height ?? $stream->height ?? -1);
if(!empty($stream->display_aspect_ratio))
$resp->media->aspect_ratio = $stream->display_aspect_ratio;
if($isImage)
break;
}
}
}
if($isAudio) {
function eat_tags(stdClass $dest, stdClass $source): void {
if(!empty($source->title) || !empty($source->TITLE))
$dest->title = $source->title ?? $source->TITLE;
if(!empty($source->artist) || !empty($source->ARTIST))
$dest->artist = $source->artist ?? $source->ARTIST;
if(!empty($source->album) || !empty($source->ALBUM))
$dest->album = $source->album ?? $source->ALBUM;
if(!empty($source->date) || !empty($source->DATE))
$dest->date = $source->date ?? $source->DATE;
if(!empty($source->comment) || !empty($source->COMMENT))
$dest->comment = $source->comment ?? $source->COMMENT;
if(!empty($source->genre) || !empty($source->GENRE))
$dest->genre = $source->genre ?? $source->GENRE;
}
if(!empty($ffmpeg->format->tags)) {
$resp->media->tags = new stdClass;
eat_tags($resp->media->tags, $ffmpeg->format->tags);
} elseif(!empty($ffmpeg->streams)) {
// iterate over streams, fuck ogg
$resp->media->tags = new stdClass;
foreach($ffmpeg->streams as $stream) {
if(($stream->codec_type ?? null) === 'audio' && !empty($stream->tags)) {
eat_tags($resp->media->tags, $stream->tags);
if(!empty($resp->media->tags))
break;
}
}
}
if(empty($resp->title)) {
$audioTitle = '';
if(!empty($resp->media->tags->artist))
$audioTitle .= $resp->media->tags->artist . ' - ';
if(!empty($resp->media->tags->title))
$audioTitle .= $resp->media->tags->title;
if(!empty($resp->media->tags->date))
$audioTitle .= ' (' . $resp->media->tags->date . ')';
if(!empty($audioTitle))
$resp->title = $audioTitle;
}
if(empty($resp->description) && !empty($resp->media->tags->comment))
$resp->description = $resp->media->tags->comment;
}
}
}
if($includeRawResult)
$resp->ffmpeg = $ffmpeg;
} else curl_close($curl);
}
}
}
$sw->stop();
$resp->took = $sw->getElapsedTime() / 1000;
$respJson = json_encode($resp);
$replaceCache = $this->db->prepare('REPLACE INTO `uih_metadata_cache` (`metadata_url`, `metadata_resp`) VALUES (UNHEX(?), ?)');
$replaceCache->addParameter(1, $urlHash);
$replaceCache->addParameter(2, $respJson);
$replaceCache->execute();
}
if(!empty($respJson))
$response->setContent($respJson);
else
return $resp;
}
}

9
src/IApi.php Normal file
View file

@ -0,0 +1,9 @@
<?php
namespace Uiharu;
use Index\Http\HttpFx;
interface IApi {
function match(string $url): string;
function register(HttpFx $router): void;
}

84
src/UihContext.php Normal file
View file

@ -0,0 +1,84 @@
<?php
namespace Uiharu;
use Index\Data\IDbConnection;
use Index\Http\HttpFx;
final class UihContext {
private IDbConnection $database;
private HttpFx $router;
private array $apis = [];
public function __construct(IDbConnection $database) {
$this->database = $database;
}
public function getDatabase(): IDbConnection {
return $this->database;
}
public function getRouter(): HttpFx {
return $this->router;
}
public function isOriginAllowed(string $origin): bool {
$origin = mb_strtolower(parse_url($origin, PHP_URL_HOST));
if($origin === $_SERVER['HTTP_HOST'])
return true;
$allowed = Config::get('CORS', 'origins', []);
if(empty($allowed))
return true;
return in_array($origin, $allowed);
}
public function setupHttp(): void {
$this->router = new HttpFx;
$this->router->use('/', function($response) {
$response->setPoweredBy('Uiharu');
});
$this->router->use('/', function($response, $request) {
$origin = $request->getHeaderLine('Origin');
if(!empty($origin)) {
if(!$this->isOriginAllowed($origin))
return 403;
$response->setHeader('Access-Control-Allow-Origin', $origin);
$response->setHeader('Vary', 'Origin');
}
});
$this->router->use('/', function($response, $request) {
if($request->getMethod() === 'OPTIONS') {
$response->setHeader('Access-Control-Allow-Methods', 'OPTIONS, GET, POST');
return 204;
}
});
$this->router->get('/', function($response) {
$response->accelRedirect('/index.html');
$response->setContentType('text/html; charset=utf-8');
});
}
public function dispatchHttp(...$args): void {
$this->router->dispatch(...$args);
}
public function registerApi(IApi $api): void {
$this->apis[] = $api;
}
public function matchApi(string $reqPath): void {
$reqPath = '/' . trim(parse_url($reqPath, PHP_URL_PATH), '/');
foreach($this->apis as $api)
if($api->match($reqPath)) {
$api->register($this->router);
break;
}
}
}

View file

@ -12,7 +12,7 @@ define('UIH_DEBUG', is_file(UIH_ROOT . '/.debug'));
define('UIH_PUBLIC', UIH_ROOT . '/public');
define('UIH_SOURCE', UIH_ROOT . '/src');
define('UIH_LIBRARY', UIH_ROOT . '/lib');
define('UIH_VERSION', '20220715');
define('UIH_VERSION', '20220716');
require_once UIH_LIBRARY . '/index/index.php';
@ -36,3 +36,5 @@ try {
echo '<h3>Unable to connect to database</h3>';
die($ex->getMessage());
}
$ctx = new UihContext($db);