From 7160e9909d17e50824f0d5fdd9a284b930dacc8e Mon Sep 17 00:00:00 2001 From: flashwave Date: Fri, 15 Jul 2022 22:20:20 +0000 Subject: [PATCH] Created structure for additional API version handlers. --- public/index.php | 570 +-------------------------------------------- src/Apis/v1_0.php | 550 +++++++++++++++++++++++++++++++++++++++++++ src/IApi.php | 9 + src/UihContext.php | 84 +++++++ uiharu.php | 4 +- 5 files changed, 651 insertions(+), 566 deletions(-) create mode 100644 src/Apis/v1_0.php create mode 100644 src/IApi.php create mode 100644 src/UihContext.php diff --git a/public/index.php b/public/index.php index e130ea6..1d770c5 100644 --- a/public/index.php +++ b/public/index.php @@ -1,574 +1,14 @@ type = 'eeprom:file'; - $resp->color = '#8559a5'; - $resp->eeprom_file_id = $eepromFileId; - $curl = curl_init("https://eeprom.{$domain}.net/uploads/{$resp->eeprom_file_id}.json"); - curl_setopt_array($curl, [ - CURLOPT_AUTOREFERER => false, - CURLOPT_CERTINFO => false, - CURLOPT_FAILONERROR => false, - CURLOPT_FOLLOWLOCATION => false, - CURLOPT_RETURNTRANSFER => true, - CURLOPT_TCP_FASTOPEN => true, - CURLOPT_CONNECTTIMEOUT => 2, - CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, - CURLOPT_TIMEOUT => 5, - CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, - CURLOPT_HTTPHEADER => [ - 'Accept: application/json', - ], - ]); - $eepromResp = curl_exec($curl); - curl_close($curl); - $resp->eeprom_file_info = json_decode($eepromResp); - if(isset($resp->eeprom_file_info->name)) - $resp->title = $resp->eeprom_file_info->name; - if(isset($resp->eeprom_file_info->thumb)) - $resp->image = $resp->eeprom_file_info->thumb; - $resp->site_name = 'Flashii EEPROM'; -} - +// should be in a cron job $db->execute('DELETE FROM `uih_metadata_cache` WHERE `metadata_created` < NOW() - INTERVAL 7 DAY'); -/*apis = [ - new \Uiharu\APIs\v1_0, -];*/ +$ctx->setupHttp(); -$router = new HttpFx; -$router->use('/', function($response) { - $response->setPoweredBy('Uiharu'); -}); +$ctx->registerApi(new \Uiharu\Apis\v1_0($ctx)); +$ctx->matchApi(filter_input(INPUT_SERVER, 'REQUEST_URI')); -$router->use('/', function($response, $request) { - $origin = $request->getHeaderLine('Origin'); - - if(!empty($origin)) { - if(!uih_origin_allowed($origin)) - return 403; - - $response->setHeader('Access-Control-Allow-Origin', $origin); - $response->setHeader('Vary', 'Origin'); - } -}); - -$router->use('/', function($response, $request) { - if($request->getMethod() === 'OPTIONS') { - $response->setHeader('Access-Control-Allow-Methods', 'OPTIONS, GET, POST'); - return 204; - } -}); - -$router->get('/', function($response) { - $response->accelRedirect('/index.html'); - $response->setContentType('text/html; charset=utf-8'); -}); - -$metaDataHandlerV1 = function($response, $request) use ($db) { - $response->setContentType('application/json; charset=utf-8'); - if($request->getMethod() === 'HEAD') - return; - - $sw = Stopwatch::startNew(); - $resp = new stdClass; - - if($request->getMethod() === 'POST') { - if(!$request->isStreamContent()) { - $response->setStatusCode(400); - return $resp; - } - - $targetUrl = $request->getContent()->getStream()->read(1000); - } else { - $targetUrl = (string)$request->getParam('url'); - } - - if(empty($targetUrl)) { - $response->setStatusCode(400); - return $resp; - } - - try { - $parsedUrl = Url::parse($targetUrl); - } catch(InvalidArgumentException $ex) { - $response->setStatusCode(400); - $resp->error = 'metadata:uri'; - return $resp; - } - - // if no scheme is specified, try https - if(!$parsedUrl->hasScheme()) - $parsedUrl->setScheme('https'); - - $resp->uri = $parsedUrl->toV1(); - - $urlHash = $parsedUrl->calculateHash(false); - - $enableCache = !UIH_DEBUG || $request->hasParam('_cache'); - $includeRawResult = UIH_DEBUG || $request->hasParam('include_raw'); - - if($enableCache) { - $cacheFetch = $db->prepare('SELECT `metadata_resp` FROM `uih_metadata_cache` WHERE `metadata_url` = UNHEX(?) AND `metadata_created` > NOW() - INTERVAL 10 MINUTE'); - $cacheFetch->addParameter(1, $urlHash); - $cacheFetch->execute(); - $cacheResult = $cacheFetch->getResult(); - if($cacheResult->next()) { - $cacheResp = json_decode($cacheResult->getString(0)); - if($cacheResp !== null) - $resp = $cacheResp; - } - } - - if(empty($resp->type)) { - $urlScheme = strtolower($parsedUrl->getScheme()); - $urlHost = strtolower($parsedUrl->getHost()); - $urlPath = '/' . trim($parsedUrl->getPath(), '/'); - - if($urlScheme === 'eeprom') { - if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl->getPath(), $matches)) { - $parsedUrl = Url::parse('https://i.fii.moe/' . $matches[1]); - $resp->uri = $parsedUrl->toV1(); - $continueRaw = true; - uih_eeprom_lookup($resp, $matches[1]); - } - } elseif($urlScheme === 'devrom') { - if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl->getPath(), $matches)) { - $parsedUrl = Url::parse('https://i.edgii.net/' . $matches[1]); - $resp->uri = $parsedUrl->toV1(); - $continueRaw = true; - uih_eeprom_lookup($resp, $matches[1], 'edgii'); - } - } elseif($urlScheme === 'http' || $urlScheme === 'https') { - switch($urlHost) { - case 'i.flashii.net': - case 'i.fii.moe': - $eepromFileId = substr($urlPath, 1); - case 'eeprom.flashii.net': - if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches)) - $eepromFileId = $matches[1]; - - if(!empty($eepromFileId)) { - $continueRaw = true; - uih_eeprom_lookup($resp, $eepromFileId); - } - break; - - case 'i.edgii.net': - $eepromFileId = substr($urlPath, 1); - case 'eeprom.edgii.net': - if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches)) - $eepromFileId = $matches[1]; - - if(!empty($eepromFileId)) { - $continueRaw = true; - uih_eeprom_lookup($resp, $eepromFileId, 'edgii'); - } - break; - - case 'twitter.com': case 'www.twitter.com': - case 'm.twitter.com': case 'mobile.twitter.com': - case 'nitter.net': case 'www.nitter.net': - if(preg_match('#^/@?(?:[A-Za-z0-9_]{1,20})/status(?:es)?/([0-9]+)/?$#', $urlPath, $matches)) { - $resp->type = 'twitter:tweet'; - $resp->color = '#1da1f2'; - $resp->tweet_id = strval($matches[1] ?? '0'); - $curl = curl_init("https://api.twitter.com/2/tweets?ids={$resp->tweet_id}&expansions=attachments.media_keys,author_id,entities.mentions.username,referenced_tweets.id,referenced_tweets.id.author_id&media.fields=height,width,media_key,preview_image_url,url,type&tweet.fields=attachments,conversation_id,text,source,possibly_sensitive,created_at&user.fields=id,name,profile_image_url,protected,username,verified"); - curl_setopt_array($curl, [ - CURLOPT_AUTOREFERER => false, - CURLOPT_CERTINFO => false, - CURLOPT_FAILONERROR => false, - CURLOPT_FOLLOWLOCATION => false, - CURLOPT_RETURNTRANSFER => true, - CURLOPT_TCP_FASTOPEN => true, - CURLOPT_CONNECTTIMEOUT => 2, - CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, - CURLOPT_TIMEOUT => 5, - CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, - CURLOPT_HTTPHEADER => [ - 'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'), - 'Accept: application/json', - ], - ]); - $tweetResp = curl_exec($curl); - curl_close($curl); - $resp->tweet_info = json_decode($tweetResp); - if(isset($resp->tweet_info->includes->users[0]->name)) - $resp->title = $resp->tweet_info->includes->users[0]->name; - if(isset($resp->tweet_info->includes->users[0]->profile_image_url)) - $resp->image = $resp->tweet_info->includes->users[0]->profile_image_url; - if(isset($resp->tweet_info->data[0]->text)) - $resp->description = $resp->tweet_info->data[0]->text; - $resp->site_name = 'Twitter'; - break; - } - - if(preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $urlPath, $matches)) { - $resp->type = 'twitter:user'; - $resp->color = '#1da1f2'; - $resp->twitter_user_name = strval($matches[1] ?? ''); - $curl = curl_init("https://api.twitter.com/2/users/by?usernames={$resp->twitter_user_name}&user.fields=description,entities,id,name,profile_image_url,protected,url,username,verified"); - curl_setopt_array($curl, [ - CURLOPT_AUTOREFERER => false, - CURLOPT_CERTINFO => false, - CURLOPT_FAILONERROR => false, - CURLOPT_FOLLOWLOCATION => false, - CURLOPT_RETURNTRANSFER => true, - CURLOPT_TCP_FASTOPEN => true, - CURLOPT_CONNECTTIMEOUT => 2, - CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, - CURLOPT_TIMEOUT => 5, - CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, - CURLOPT_HTTPHEADER => [ - 'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'), - 'Accept: application/json', - ], - ]); - $twitUserResp = curl_exec($curl); - curl_close($curl); - $resp->twitter_user_info = json_decode($twitUserResp); - if(isset($resp->twitter_user_info->data[0]->name)) - $resp->title = $resp->twitter_user_info->data[0]->name; - if(isset($resp->twitter_user_info->data[0]->profile_image_url)) - $resp->image = $resp->twitter_user_info->data[0]->profile_image_url; - if(isset($resp->twitter_user_info->data[0]->description)) - $resp->description = $resp->twitter_user_info->data[0]->description; - $resp->site_name = 'Twitter'; - break; - } - break; - - case 'youtu.be': case 'www.youtu.be': // www. doesn't work for this, but may as well cover it - $youtubeVideoId = substr($urlPath, 1); - case 'youtube.com': case 'www.youtube.com': - case 'youtube-nocookie.com': case 'www.youtube-nocookie.com': - parse_str($parsedUrl->getQuery(), $queryString); - - if(!isset($youtubeVideoId) && $urlPath === '/watch') - $youtubeVideoId = $queryString['v'] ?? null; - - if(!empty($youtubeVideoId)) { - $resp->type = 'youtube:video'; - $resp->color = '#f00'; - $resp->youtube_video_id = $youtubeVideoId; - - if(isset($queryString['t'])) - $resp->youtube_start_time = $queryString['t']; - if(isset($queryString['list'])) - $resp->youtube_playlist = $queryString['list']; - if(isset($queryString['index'])) - $resp->youtube_playlist_index = $queryString['index']; - - $curl = curl_init("https://www.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id={$resp->youtube_video_id}&key=" . Config::get('Google', 'apiKey')); - curl_setopt_array($curl, [ - CURLOPT_AUTOREFERER => false, - CURLOPT_CERTINFO => false, - CURLOPT_FAILONERROR => false, - CURLOPT_FOLLOWLOCATION => false, - CURLOPT_RETURNTRANSFER => true, - CURLOPT_TCP_FASTOPEN => true, - CURLOPT_CONNECTTIMEOUT => 2, - CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, - CURLOPT_TIMEOUT => 5, - CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, - CURLOPT_HTTPHEADER => [ - 'Accept: application/json', - ], - ]); - $youtubeResp = curl_exec($curl); - curl_close($curl); - $resp->youtube_video_info = json_decode($youtubeResp); - if(isset($resp->youtube_video_info->items[0]->snippet->title)) - $resp->title = $resp->youtube_video_info->items[0]->snippet->title; - if(isset($resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url)) - $resp->image = $resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url; - if(isset($resp->youtube_video_info->items[0]->snippet->description)) - $resp->description = $resp->youtube_video_info->items[0]->snippet->description; - $resp->site_name = 'YouTube'; - } - break; - } - } else { - $resp->error = 'metadata:scheme'; - $response->setStatusCode(400); - return $resp; - } - - if((empty($resp->type) || isset($continueRaw)) && in_array($parsedUrl->getScheme(), ['http', 'https'])) { - $curl = curl_init((string)$parsedUrl); - curl_setopt_array($curl, [ - CURLOPT_AUTOREFERER => true, - CURLOPT_CERTINFO => false, - CURLOPT_FAILONERROR => false, - CURLOPT_FOLLOWLOCATION => true, - CURLOPT_MAXREDIRS => 5, - CURLOPT_PATH_AS_IS => true, - CURLOPT_NOBODY => true, - CURLOPT_HEADER => true, - CURLOPT_RETURNTRANSFER => true, - CURLOPT_TCP_FASTOPEN => true, - CURLOPT_CONNECTTIMEOUT => 2, - CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS, - CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS, - CURLOPT_TIMEOUT => 5, - CURLOPT_DEFAULT_PROTOCOL => 'https', - CURLOPT_USERAGENT => 'Mozilla/5.0 (compatible) Uiharu/' . UIH_VERSION, - CURLOPT_HTTPHEADER => [ - 'Accept: text/html,application/xhtml+xml', - ], - ]); - $headers = curl_exec($curl); - - if($headers === false) { - $resp->error = 'metadata:timeout'; - $resp->errorMessage = curl_error($curl); - } else { - $headersRaw = explode("\r\n", trim($headers)); - $statusCode = 200; - $headers = []; - foreach($headersRaw as $header) { - if(empty($header)) - continue; - if(strpos($header, ':') === false) { - $headParts = explode(' ', $header); - if(isset($headParts[1]) && is_numeric($headParts[1])) - $statusCode = (int)$headParts[1]; - $headers = []; - continue; - } - $headerParts = explode(':', $header, 2); - $headerParts[0] = mb_strtolower($headerParts[0]); - if(isset($headers[$headerParts[0]])) - $headers[$headerParts[0]] .= ', ' . trim($headerParts[1] ?? ''); - else - $headers[$headerParts[0]] = trim($headerParts[1] ?? ''); - } - - try { - $contentType = MediaType::parse($headers['content-type'] ?? ''); - } catch(InvalidArgumentException $ex) { - $contentType = MediaType::parse('application/octet-stream'); - } - - $resp->content_type = MediaTypeExts::toV1($contentType); - - $isHTML = $contentType->equals('text/html'); - $isXHTML = $contentType->equals('application/xhtml+xml'); - - if($isHTML || $isXHTML) { - curl_setopt_array($curl, [ - CURLOPT_NOBODY => false, - CURLOPT_HEADER => false, - ]); - - $body = curl_exec($curl); - curl_close($curl); - - $document = new DOMDocument; - if($isXHTML) { - $document->loadXML($body); - } else { - @$document->loadHTML('getCharset() . '">' . $body); - foreach($document->childNodes as $child) - if($child->nodeType === XML_PI_NODE) { - $document->removeChild($child); - break; - } - $document->encoding = $contentType->getCharset(); - } - - $charSet = $document->encoding; - - $resp->type = 'website'; - $resp->title = ''; - - $isMetaTitle = false; - $titleTag = $document->getElementsByTagName('title'); - foreach($titleTag as $tag) { - $resp->title = trim(mb_convert_encoding($tag->textContent, 'utf-8', $charSet)); - break; - } - - $metaTags = $document->getElementsByTagName('meta'); - foreach($metaTags as $tag) { - $nameAttr = $tag->hasAttribute('name') ? $tag->getAttribute('name') : ( - $tag->hasAttribute('property') ? $tag->getAttribute('property') : '' - ); - $valueAttr = $tag->hasAttribute('value') ? $tag->getAttribute('value') : ( - $tag->hasAttribute('content') ? $tag->getAttribute('content') : '' - ); - $nameAttr = trim(mb_convert_encoding($nameAttr, 'utf-8', $charSet)); - $valueAttr = trim(mb_convert_encoding($valueAttr, 'utf-8', $charSet)); - if(empty($nameAttr) || empty($valueAttr)) - continue; - - switch($nameAttr) { - case 'og:title': - case 'twitter:title': - if(!$isMetaTitle) { - $isMetaTitle = true; - $resp->title = $valueAttr; - } - break; - - case 'description': - case 'og:description': - case 'twitter:description': - if(!isset($resp->description)) - $resp->description = $valueAttr; - break; - - case 'og:site_name': - $resp->site_name = $valueAttr; - break; - - case 'og:image': - case 'twitter:image': - $resp->image = $valueAttr; - break; - - case 'theme-color': - $resp->color = $valueAttr; - break; - - case 'og:type': - $resp->type = $valueAttr; - break; - } - } - } else { - $resp->is_image = $isImage = $contentType->matchCategory('image'); - $resp->is_audio = $isAudio = $contentType->matchCategory('audio'); - $resp->is_video = $isVideo = $contentType->matchCategory('video'); - - if($isImage || $isAudio || $isVideo) { - curl_close($curl); - $resp->media = new stdClass; - $ffmpeg = json_decode(shell_exec(sprintf('ffprobe -show_streams -show_format -print_format json -v quiet -i %s', escapeshellarg((string)$parsedUrl)))); - - if(!empty($ffmpeg)) { - if(!empty($ffmpeg->format)) { - $resp->media->confidence = empty($ffmpeg->format->probe_score) ? 0 : (intval($ffmpeg->format->probe_score) / 100); - if(!empty($ffmpeg->format->duration)) - $resp->media->duration = floatval($ffmpeg->format->duration); - if(!empty($ffmpeg->format->size)) - $resp->media->size = intval($ffmpeg->format->size); - if(!empty($ffmpeg->format->bit_rate)) - $resp->media->bitrate = intval($ffmpeg->format->bit_rate); - - if($isVideo || $isImage) { - if(!empty($ffmpeg->streams)) { - foreach($ffmpeg->streams as $stream) { - if(($stream->codec_type ?? null) !== 'video') - continue; - - $resp->width = intval($stream->coded_width ?? $stream->width ?? -1); - $resp->height = intval($stream->coded_height ?? $stream->height ?? -1); - - if(!empty($stream->display_aspect_ratio)) - $resp->media->aspect_ratio = $stream->display_aspect_ratio; - - if($isImage) - break; - } - } - } - - if($isAudio) { - function eat_tags(stdClass $dest, stdClass $source): void { - if(!empty($source->title) || !empty($source->TITLE)) - $dest->title = $source->title ?? $source->TITLE; - if(!empty($source->artist) || !empty($source->ARTIST)) - $dest->artist = $source->artist ?? $source->ARTIST; - if(!empty($source->album) || !empty($source->ALBUM)) - $dest->album = $source->album ?? $source->ALBUM; - if(!empty($source->date) || !empty($source->DATE)) - $dest->date = $source->date ?? $source->DATE; - if(!empty($source->comment) || !empty($source->COMMENT)) - $dest->comment = $source->comment ?? $source->COMMENT; - if(!empty($source->genre) || !empty($source->GENRE)) - $dest->genre = $source->genre ?? $source->GENRE; - } - - if(!empty($ffmpeg->format->tags)) { - $resp->media->tags = new stdClass; - eat_tags($resp->media->tags, $ffmpeg->format->tags); - } elseif(!empty($ffmpeg->streams)) { - // iterate over streams, fuck ogg - $resp->media->tags = new stdClass; - foreach($ffmpeg->streams as $stream) { - if(($stream->codec_type ?? null) === 'audio' && !empty($stream->tags)) { - eat_tags($resp->media->tags, $stream->tags); - if(!empty($resp->media->tags)) - break; - } - } - } - - if(empty($resp->title)) { - $audioTitle = ''; - if(!empty($resp->media->tags->artist)) - $audioTitle .= $resp->media->tags->artist . ' - '; - if(!empty($resp->media->tags->title)) - $audioTitle .= $resp->media->tags->title; - if(!empty($resp->media->tags->date)) - $audioTitle .= ' (' . $resp->media->tags->date . ')'; - if(!empty($audioTitle)) - $resp->title = $audioTitle; - } - - if(empty($resp->description) && !empty($resp->media->tags->comment)) - $resp->description = $resp->media->tags->comment; - } - } - } - - if($includeRawResult) - $resp->ffmpeg = $ffmpeg; - } else curl_close($curl); - } - } - } - - $sw->stop(); - $resp->took = $sw->getElapsedTime() / 1000; - $respJson = json_encode($resp); - $replaceCache = $db->prepare('REPLACE INTO `uih_metadata_cache` (`metadata_url`, `metadata_resp`) VALUES (UNHEX(?), ?)'); - $replaceCache->addParameter(1, $urlHash); - $replaceCache->addParameter(2, $respJson); - $replaceCache->execute(); - } - - if(!empty($respJson)) - $response->setContent($respJson); - else - return $resp; -}; - -// Allow using POST for ridiculous urls. -$router->get('/metadata', $metaDataHandlerV1); -$router->post('/metadata', $metaDataHandlerV1); - -$router->dispatch(); +$ctx->dispatchHttp(); diff --git a/src/Apis/v1_0.php b/src/Apis/v1_0.php new file mode 100644 index 0000000..cf6efaf --- /dev/null +++ b/src/Apis/v1_0.php @@ -0,0 +1,550 @@ +db = $ctx->getDatabase(); + } + + public function match(string $url): string { + return !str_starts_with($url, '/v'); + } + + public function register(HttpFx $router): void { + $router->get('/metadata', [$this, 'handleGET']); + $router->post('/metadata', [$this, 'handlePOST']); + } + + public function eepromLookup(stdClass $resp, string $eepromFileId, string $domain = 'flashii'): void { + $resp->type = 'eeprom:file'; + $resp->color = '#8559a5'; + $resp->eeprom_file_id = $eepromFileId; + $curl = curl_init("https://eeprom.{$domain}.net/uploads/{$resp->eeprom_file_id}.json"); + curl_setopt_array($curl, [ + CURLOPT_AUTOREFERER => false, + CURLOPT_CERTINFO => false, + CURLOPT_FAILONERROR => false, + CURLOPT_FOLLOWLOCATION => false, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_TCP_FASTOPEN => true, + CURLOPT_CONNECTTIMEOUT => 2, + CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, + CURLOPT_TIMEOUT => 5, + CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, + CURLOPT_HTTPHEADER => [ + 'Accept: application/json', + ], + ]); + $eepromResp = curl_exec($curl); + curl_close($curl); + $resp->eeprom_file_info = json_decode($eepromResp); + if(isset($resp->eeprom_file_info->name)) + $resp->title = $resp->eeprom_file_info->name; + if(isset($resp->eeprom_file_info->thumb)) + $resp->image = $resp->eeprom_file_info->thumb; + $resp->site_name = 'Flashii EEPROM'; + } + + public function handleGET($response, $request) { + if($request->getMethod() === 'HEAD') { + $response->setTypeJson(); + return; + } + + return $this->handler( + $response, $request, + (string)$request->getParam('url') + ); + } + + public function handlePOST($response, $request) { + if(!$request->isStreamContent()) + return 400; + + return $this->handler( + $response, $request, + $request->getContent()->getStream()->read(1000) + ); + } + + private function handler($response, $request, string $targetUrl) { + $sw = Stopwatch::startNew(); + + $resp = new stdClass; + $response->setTypeJson(); + + if(empty($targetUrl)) { + $response->setStatusCode(400); + return $resp; + } + + try { + $parsedUrl = Url::parse($targetUrl); + } catch(InvalidArgumentException $ex) { + $response->setStatusCode(400); + $resp->error = 'metadata:uri'; + return $resp; + } + + // if no scheme is specified, try https + if(!$parsedUrl->hasScheme()) + $parsedUrl->setScheme('https'); + + $resp->uri = $parsedUrl->toV1(); + + $urlHash = $parsedUrl->calculateHash(false); + + $enableCache = !UIH_DEBUG || $request->hasParam('_cache'); + $includeRawResult = UIH_DEBUG || $request->hasParam('include_raw'); + + if($enableCache) { + $cacheFetch = $this->db->prepare('SELECT `metadata_resp` FROM `uih_metadata_cache` WHERE `metadata_url` = UNHEX(?) AND `metadata_created` > NOW() - INTERVAL 10 MINUTE'); + $cacheFetch->addParameter(1, $urlHash); + $cacheFetch->execute(); + $cacheResult = $cacheFetch->getResult(); + if($cacheResult->next()) { + $cacheResp = json_decode($cacheResult->getString(0)); + if($cacheResp !== null) + $resp = $cacheResp; + } + } + + if(empty($resp->type)) { + $urlScheme = strtolower($parsedUrl->getScheme()); + $urlHost = strtolower($parsedUrl->getHost()); + $urlPath = '/' . trim($parsedUrl->getPath(), '/'); + + if($urlScheme === 'eeprom') { + if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl->getPath(), $matches)) { + $parsedUrl = Url::parse('https://i.fii.moe/' . $matches[1]); + $resp->uri = $parsedUrl->toV1(); + $continueRaw = true; + $this->eepromLookup($resp, $matches[1]); + } + } elseif($urlScheme === 'devrom') { + if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl->getPath(), $matches)) { + $parsedUrl = Url::parse('https://i.edgii.net/' . $matches[1]); + $resp->uri = $parsedUrl->toV1(); + $continueRaw = true; + $this->eepromLookup($resp, $matches[1], 'edgii'); + } + } elseif($urlScheme === 'http' || $urlScheme === 'https') { + switch($urlHost) { + case 'i.flashii.net': + case 'i.fii.moe': + $eepromFileId = substr($urlPath, 1); + case 'eeprom.flashii.net': + if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches)) + $eepromFileId = $matches[1]; + + if(!empty($eepromFileId)) { + $continueRaw = true; + $this->eepromLookup($resp, $eepromFileId); + } + break; + + case 'i.edgii.net': + $eepromFileId = substr($urlPath, 1); + case 'eeprom.edgii.net': + if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches)) + $eepromFileId = $matches[1]; + + if(!empty($eepromFileId)) { + $continueRaw = true; + $this->eepromLookup($resp, $eepromFileId, 'edgii'); + } + break; + + case 'twitter.com': case 'www.twitter.com': + case 'm.twitter.com': case 'mobile.twitter.com': + case 'nitter.net': case 'www.nitter.net': + if(preg_match('#^/@?(?:[A-Za-z0-9_]{1,20})/status(?:es)?/([0-9]+)/?$#', $urlPath, $matches)) { + $resp->type = 'twitter:tweet'; + $resp->color = '#1da1f2'; + $resp->tweet_id = strval($matches[1] ?? '0'); + $curl = curl_init("https://api.twitter.com/2/tweets?ids={$resp->tweet_id}&expansions=attachments.media_keys,author_id,entities.mentions.username,referenced_tweets.id,referenced_tweets.id.author_id&media.fields=height,width,media_key,preview_image_url,url,type&tweet.fields=attachments,conversation_id,text,source,possibly_sensitive,created_at&user.fields=id,name,profile_image_url,protected,username,verified"); + curl_setopt_array($curl, [ + CURLOPT_AUTOREFERER => false, + CURLOPT_CERTINFO => false, + CURLOPT_FAILONERROR => false, + CURLOPT_FOLLOWLOCATION => false, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_TCP_FASTOPEN => true, + CURLOPT_CONNECTTIMEOUT => 2, + CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, + CURLOPT_TIMEOUT => 5, + CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, + CURLOPT_HTTPHEADER => [ + 'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'), + 'Accept: application/json', + ], + ]); + $tweetResp = curl_exec($curl); + curl_close($curl); + $resp->tweet_info = json_decode($tweetResp); + if(isset($resp->tweet_info->includes->users[0]->name)) + $resp->title = $resp->tweet_info->includes->users[0]->name; + if(isset($resp->tweet_info->includes->users[0]->profile_image_url)) + $resp->image = $resp->tweet_info->includes->users[0]->profile_image_url; + if(isset($resp->tweet_info->data[0]->text)) + $resp->description = $resp->tweet_info->data[0]->text; + $resp->site_name = 'Twitter'; + break; + } + + if(preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $urlPath, $matches)) { + $resp->type = 'twitter:user'; + $resp->color = '#1da1f2'; + $resp->twitter_user_name = strval($matches[1] ?? ''); + $curl = curl_init("https://api.twitter.com/2/users/by?usernames={$resp->twitter_user_name}&user.fields=description,entities,id,name,profile_image_url,protected,url,username,verified"); + curl_setopt_array($curl, [ + CURLOPT_AUTOREFERER => false, + CURLOPT_CERTINFO => false, + CURLOPT_FAILONERROR => false, + CURLOPT_FOLLOWLOCATION => false, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_TCP_FASTOPEN => true, + CURLOPT_CONNECTTIMEOUT => 2, + CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, + CURLOPT_TIMEOUT => 5, + CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, + CURLOPT_HTTPHEADER => [ + 'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'), + 'Accept: application/json', + ], + ]); + $twitUserResp = curl_exec($curl); + curl_close($curl); + $resp->twitter_user_info = json_decode($twitUserResp); + if(isset($resp->twitter_user_info->data[0]->name)) + $resp->title = $resp->twitter_user_info->data[0]->name; + if(isset($resp->twitter_user_info->data[0]->profile_image_url)) + $resp->image = $resp->twitter_user_info->data[0]->profile_image_url; + if(isset($resp->twitter_user_info->data[0]->description)) + $resp->description = $resp->twitter_user_info->data[0]->description; + $resp->site_name = 'Twitter'; + break; + } + break; + + case 'youtu.be': case 'www.youtu.be': // www. doesn't work for this, but may as well cover it + $youtubeVideoId = substr($urlPath, 1); + case 'youtube.com': case 'www.youtube.com': + case 'youtube-nocookie.com': case 'www.youtube-nocookie.com': + parse_str($parsedUrl->getQuery(), $queryString); + + if(!isset($youtubeVideoId) && $urlPath === '/watch') + $youtubeVideoId = $queryString['v'] ?? null; + + if(!empty($youtubeVideoId)) { + $resp->type = 'youtube:video'; + $resp->color = '#f00'; + $resp->youtube_video_id = $youtubeVideoId; + + if(isset($queryString['t'])) + $resp->youtube_start_time = $queryString['t']; + if(isset($queryString['list'])) + $resp->youtube_playlist = $queryString['list']; + if(isset($queryString['index'])) + $resp->youtube_playlist_index = $queryString['index']; + + $curl = curl_init("https://www.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id={$resp->youtube_video_id}&key=" . Config::get('Google', 'apiKey')); + curl_setopt_array($curl, [ + CURLOPT_AUTOREFERER => false, + CURLOPT_CERTINFO => false, + CURLOPT_FAILONERROR => false, + CURLOPT_FOLLOWLOCATION => false, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_TCP_FASTOPEN => true, + CURLOPT_CONNECTTIMEOUT => 2, + CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, + CURLOPT_TIMEOUT => 5, + CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, + CURLOPT_HTTPHEADER => [ + 'Accept: application/json', + ], + ]); + $youtubeResp = curl_exec($curl); + curl_close($curl); + $resp->youtube_video_info = json_decode($youtubeResp); + if(isset($resp->youtube_video_info->items[0]->snippet->title)) + $resp->title = $resp->youtube_video_info->items[0]->snippet->title; + if(isset($resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url)) + $resp->image = $resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url; + if(isset($resp->youtube_video_info->items[0]->snippet->description)) + $resp->description = $resp->youtube_video_info->items[0]->snippet->description; + $resp->site_name = 'YouTube'; + } + break; + } + } else { + $resp->error = 'metadata:scheme'; + $response->setStatusCode(400); + return $resp; + } + + if((empty($resp->type) || isset($continueRaw)) && in_array($parsedUrl->getScheme(), ['http', 'https'])) { + $curl = curl_init((string)$parsedUrl); + curl_setopt_array($curl, [ + CURLOPT_AUTOREFERER => true, + CURLOPT_CERTINFO => false, + CURLOPT_FAILONERROR => false, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_MAXREDIRS => 5, + CURLOPT_PATH_AS_IS => true, + CURLOPT_NOBODY => true, + CURLOPT_HEADER => true, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_TCP_FASTOPEN => true, + CURLOPT_CONNECTTIMEOUT => 2, + CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS, + CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS, + CURLOPT_TIMEOUT => 5, + CURLOPT_DEFAULT_PROTOCOL => 'https', + CURLOPT_USERAGENT => 'Mozilla/5.0 (compatible) Uiharu/' . UIH_VERSION, + CURLOPT_HTTPHEADER => [ + 'Accept: text/html,application/xhtml+xml', + ], + ]); + $headers = curl_exec($curl); + + if($headers === false) { + $resp->error = 'metadata:timeout'; + $resp->errorMessage = curl_error($curl); + } else { + $headersRaw = explode("\r\n", trim($headers)); + $statusCode = 200; + $headers = []; + foreach($headersRaw as $header) { + if(empty($header)) + continue; + if(strpos($header, ':') === false) { + $headParts = explode(' ', $header); + if(isset($headParts[1]) && is_numeric($headParts[1])) + $statusCode = (int)$headParts[1]; + $headers = []; + continue; + } + $headerParts = explode(':', $header, 2); + $headerParts[0] = mb_strtolower($headerParts[0]); + if(isset($headers[$headerParts[0]])) + $headers[$headerParts[0]] .= ', ' . trim($headerParts[1] ?? ''); + else + $headers[$headerParts[0]] = trim($headerParts[1] ?? ''); + } + + try { + $contentType = MediaType::parse($headers['content-type'] ?? ''); + } catch(InvalidArgumentException $ex) { + $contentType = MediaType::parse('application/octet-stream'); + } + + $resp->content_type = MediaTypeExts::toV1($contentType); + + $isHTML = $contentType->equals('text/html'); + $isXHTML = $contentType->equals('application/xhtml+xml'); + + if($isHTML || $isXHTML) { + curl_setopt_array($curl, [ + CURLOPT_NOBODY => false, + CURLOPT_HEADER => false, + ]); + + $body = curl_exec($curl); + curl_close($curl); + + $document = new DOMDocument; + if($isXHTML) { + $document->loadXML($body, LIBXML_NOERROR | LIBXML_NONET | LIBXML_NOWARNING); + } else { + $document->loadHTML($body, LIBXML_NOERROR | LIBXML_NONET | LIBXML_NOWARNING); + foreach($document->childNodes as $child) + if($child->nodeType === XML_PI_NODE) { + $document->removeChild($child); + break; + } + $document->encoding = $contentType->getCharset(); + } + + $charSet = $document->encoding; + + $resp->type = 'website'; + $resp->title = ''; + + $isMetaTitle = false; + $titleTag = $document->getElementsByTagName('title'); + foreach($titleTag as $tag) { + $resp->title = trim(mb_convert_encoding($tag->textContent, 'utf-8', $charSet)); + break; + } + + $metaTags = $document->getElementsByTagName('meta'); + foreach($metaTags as $tag) { + $nameAttr = $tag->hasAttribute('name') ? $tag->getAttribute('name') : ( + $tag->hasAttribute('property') ? $tag->getAttribute('property') : '' + ); + $valueAttr = $tag->hasAttribute('value') ? $tag->getAttribute('value') : ( + $tag->hasAttribute('content') ? $tag->getAttribute('content') : '' + ); + $nameAttr = trim(mb_convert_encoding($nameAttr, 'utf-8', $charSet)); + $valueAttr = trim(mb_convert_encoding($valueAttr, 'utf-8', $charSet)); + if(empty($nameAttr) || empty($valueAttr)) + continue; + + switch($nameAttr) { + case 'og:title': + case 'twitter:title': + if(!$isMetaTitle) { + $isMetaTitle = true; + $resp->title = $valueAttr; + } + break; + + case 'description': + case 'og:description': + case 'twitter:description': + if(!isset($resp->description)) + $resp->description = $valueAttr; + break; + + case 'og:site_name': + $resp->site_name = $valueAttr; + break; + + case 'og:image': + case 'twitter:image': + $resp->image = $valueAttr; + break; + + case 'theme-color': + $resp->color = $valueAttr; + break; + + case 'og:type': + $resp->type = $valueAttr; + break; + } + } + } else { + $resp->is_image = $isImage = $contentType->matchCategory('image'); + $resp->is_audio = $isAudio = $contentType->matchCategory('audio'); + $resp->is_video = $isVideo = $contentType->matchCategory('video'); + + if($isImage || $isAudio || $isVideo) { + curl_close($curl); + $resp->media = new stdClass; + $ffmpeg = json_decode(shell_exec(sprintf('ffprobe -show_streams -show_format -print_format json -v quiet -i %s', escapeshellarg((string)$parsedUrl)))); + + if(!empty($ffmpeg)) { + if(!empty($ffmpeg->format)) { + $resp->media->confidence = empty($ffmpeg->format->probe_score) ? 0 : (intval($ffmpeg->format->probe_score) / 100); + if(!empty($ffmpeg->format->duration)) + $resp->media->duration = floatval($ffmpeg->format->duration); + if(!empty($ffmpeg->format->size)) + $resp->media->size = intval($ffmpeg->format->size); + if(!empty($ffmpeg->format->bit_rate)) + $resp->media->bitrate = intval($ffmpeg->format->bit_rate); + + if($isVideo || $isImage) { + if(!empty($ffmpeg->streams)) { + foreach($ffmpeg->streams as $stream) { + if(($stream->codec_type ?? null) !== 'video') + continue; + + $resp->width = intval($stream->coded_width ?? $stream->width ?? -1); + $resp->height = intval($stream->coded_height ?? $stream->height ?? -1); + + if(!empty($stream->display_aspect_ratio)) + $resp->media->aspect_ratio = $stream->display_aspect_ratio; + + if($isImage) + break; + } + } + } + + if($isAudio) { + function eat_tags(stdClass $dest, stdClass $source): void { + if(!empty($source->title) || !empty($source->TITLE)) + $dest->title = $source->title ?? $source->TITLE; + if(!empty($source->artist) || !empty($source->ARTIST)) + $dest->artist = $source->artist ?? $source->ARTIST; + if(!empty($source->album) || !empty($source->ALBUM)) + $dest->album = $source->album ?? $source->ALBUM; + if(!empty($source->date) || !empty($source->DATE)) + $dest->date = $source->date ?? $source->DATE; + if(!empty($source->comment) || !empty($source->COMMENT)) + $dest->comment = $source->comment ?? $source->COMMENT; + if(!empty($source->genre) || !empty($source->GENRE)) + $dest->genre = $source->genre ?? $source->GENRE; + } + + if(!empty($ffmpeg->format->tags)) { + $resp->media->tags = new stdClass; + eat_tags($resp->media->tags, $ffmpeg->format->tags); + } elseif(!empty($ffmpeg->streams)) { + // iterate over streams, fuck ogg + $resp->media->tags = new stdClass; + foreach($ffmpeg->streams as $stream) { + if(($stream->codec_type ?? null) === 'audio' && !empty($stream->tags)) { + eat_tags($resp->media->tags, $stream->tags); + if(!empty($resp->media->tags)) + break; + } + } + } + + if(empty($resp->title)) { + $audioTitle = ''; + if(!empty($resp->media->tags->artist)) + $audioTitle .= $resp->media->tags->artist . ' - '; + if(!empty($resp->media->tags->title)) + $audioTitle .= $resp->media->tags->title; + if(!empty($resp->media->tags->date)) + $audioTitle .= ' (' . $resp->media->tags->date . ')'; + if(!empty($audioTitle)) + $resp->title = $audioTitle; + } + + if(empty($resp->description) && !empty($resp->media->tags->comment)) + $resp->description = $resp->media->tags->comment; + } + } + } + + if($includeRawResult) + $resp->ffmpeg = $ffmpeg; + } else curl_close($curl); + } + } + } + + $sw->stop(); + $resp->took = $sw->getElapsedTime() / 1000; + $respJson = json_encode($resp); + $replaceCache = $this->db->prepare('REPLACE INTO `uih_metadata_cache` (`metadata_url`, `metadata_resp`) VALUES (UNHEX(?), ?)'); + $replaceCache->addParameter(1, $urlHash); + $replaceCache->addParameter(2, $respJson); + $replaceCache->execute(); + } + + if(!empty($respJson)) + $response->setContent($respJson); + else + return $resp; + } +} diff --git a/src/IApi.php b/src/IApi.php new file mode 100644 index 0000000..3e39935 --- /dev/null +++ b/src/IApi.php @@ -0,0 +1,9 @@ +database = $database; + } + + public function getDatabase(): IDbConnection { + return $this->database; + } + + public function getRouter(): HttpFx { + return $this->router; + } + + public function isOriginAllowed(string $origin): bool { + $origin = mb_strtolower(parse_url($origin, PHP_URL_HOST)); + + if($origin === $_SERVER['HTTP_HOST']) + return true; + + $allowed = Config::get('CORS', 'origins', []); + if(empty($allowed)) + return true; + + return in_array($origin, $allowed); + } + + public function setupHttp(): void { + $this->router = new HttpFx; + $this->router->use('/', function($response) { + $response->setPoweredBy('Uiharu'); + }); + + $this->router->use('/', function($response, $request) { + $origin = $request->getHeaderLine('Origin'); + + if(!empty($origin)) { + if(!$this->isOriginAllowed($origin)) + return 403; + + $response->setHeader('Access-Control-Allow-Origin', $origin); + $response->setHeader('Vary', 'Origin'); + } + }); + + $this->router->use('/', function($response, $request) { + if($request->getMethod() === 'OPTIONS') { + $response->setHeader('Access-Control-Allow-Methods', 'OPTIONS, GET, POST'); + return 204; + } + }); + + $this->router->get('/', function($response) { + $response->accelRedirect('/index.html'); + $response->setContentType('text/html; charset=utf-8'); + }); + } + + public function dispatchHttp(...$args): void { + $this->router->dispatch(...$args); + } + + public function registerApi(IApi $api): void { + $this->apis[] = $api; + } + + public function matchApi(string $reqPath): void { + $reqPath = '/' . trim(parse_url($reqPath, PHP_URL_PATH), '/'); + foreach($this->apis as $api) + if($api->match($reqPath)) { + $api->register($this->router); + break; + } + } +} diff --git a/uiharu.php b/uiharu.php index f3e0be0..89fcbc1 100644 --- a/uiharu.php +++ b/uiharu.php @@ -12,7 +12,7 @@ define('UIH_DEBUG', is_file(UIH_ROOT . '/.debug')); define('UIH_PUBLIC', UIH_ROOT . '/public'); define('UIH_SOURCE', UIH_ROOT . '/src'); define('UIH_LIBRARY', UIH_ROOT . '/lib'); -define('UIH_VERSION', '20220715'); +define('UIH_VERSION', '20220716'); require_once UIH_LIBRARY . '/index/index.php'; @@ -36,3 +36,5 @@ try { echo '

Unable to connect to database

'; die($ex->getMessage()); } + +$ctx = new UihContext($db);