db = $ctx->getDatabase(); } public function match(string $url): string { return !str_starts_with($url, '/v'); } public function register(HttpFx $router): void { $router->get('/metadata', [$this, 'handleGET']); $router->post('/metadata', [$this, 'handlePOST']); } public function eepromLookup(stdClass $resp, string $eepromFileId, string $domain = 'flashii'): void { $resp->type = 'eeprom:file'; $resp->color = '#8559a5'; $resp->eeprom_file_id = $eepromFileId; $curl = curl_init("https://eeprom.{$domain}.net/uploads/{$resp->eeprom_file_id}.json"); curl_setopt_array($curl, [ CURLOPT_AUTOREFERER => false, CURLOPT_CERTINFO => false, CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => false, CURLOPT_RETURNTRANSFER => true, CURLOPT_TCP_FASTOPEN => true, CURLOPT_CONNECTTIMEOUT => 2, CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, CURLOPT_TIMEOUT => 5, CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, CURLOPT_HTTPHEADER => [ 'Accept: application/json', ], ]); $eepromResp = curl_exec($curl); curl_close($curl); $resp->eeprom_file_info = json_decode($eepromResp); if(isset($resp->eeprom_file_info->name)) $resp->title = $resp->eeprom_file_info->name; if(isset($resp->eeprom_file_info->thumb)) $resp->image = $resp->eeprom_file_info->thumb; $resp->site_name = 'Flashii EEPROM'; } public function handleGET($response, $request) { if($request->getMethod() === 'HEAD') { $response->setTypeJson(); return; } return $this->handler( $response, $request, (string)$request->getParam('url') ); } public function handlePOST($response, $request) { if(!$request->isStreamContent()) return 400; return $this->handler( $response, $request, $request->getContent()->getStream()->read(1000) ); } private function handler($response, $request, string $targetUrl) { $sw = Stopwatch::startNew(); $resp = new stdClass; $response->setTypeJson(); if(empty($targetUrl)) { $response->setStatusCode(400); return $resp; } try { $parsedUrl = Url::parse($targetUrl); } catch(InvalidArgumentException $ex) { $response->setStatusCode(400); $resp->error = 'metadata:uri'; return $resp; } // if no scheme is specified, try https if(!$parsedUrl->hasScheme()) $parsedUrl->setScheme('https'); $resp->uri = $parsedUrl->toV1(); $urlHash = $parsedUrl->calculateHash(false); $enableCache = !UIH_DEBUG || $request->hasParam('_cache'); $includeRawResult = UIH_DEBUG || $request->hasParam('include_raw'); if($enableCache) { $cacheFetch = $this->db->prepare('SELECT `metadata_resp` FROM `uih_metadata_cache` WHERE `metadata_url` = UNHEX(?) AND `metadata_created` > NOW() - INTERVAL 10 MINUTE'); $cacheFetch->addParameter(1, $urlHash); $cacheFetch->execute(); $cacheResult = $cacheFetch->getResult(); if($cacheResult->next()) { $cacheResp = json_decode($cacheResult->getString(0)); if($cacheResp !== null) $resp = $cacheResp; } } if(empty($resp->type)) { $urlScheme = strtolower($parsedUrl->getScheme()); $urlHost = strtolower($parsedUrl->getHost()); $urlPath = '/' . trim($parsedUrl->getPath(), '/'); if($urlScheme === 'eeprom') { if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl->getPath(), $matches)) { $parsedUrl = Url::parse('https://i.fii.moe/' . $matches[1]); $resp->uri = $parsedUrl->toV1(); $continueRaw = true; $this->eepromLookup($resp, $matches[1]); } } elseif($urlScheme === 'devrom') { if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl->getPath(), $matches)) { $parsedUrl = Url::parse('https://i.edgii.net/' . $matches[1]); $resp->uri = $parsedUrl->toV1(); $continueRaw = true; $this->eepromLookup($resp, $matches[1], 'edgii'); } } elseif($urlScheme === 'http' || $urlScheme === 'https') { switch($urlHost) { case 'i.flashii.net': case 'i.fii.moe': $eepromFileId = substr($urlPath, 1); case 'eeprom.flashii.net': if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches)) $eepromFileId = $matches[1]; if(!empty($eepromFileId)) { $continueRaw = true; $this->eepromLookup($resp, $eepromFileId); } break; case 'i.edgii.net': $eepromFileId = substr($urlPath, 1); case 'eeprom.edgii.net': if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches)) $eepromFileId = $matches[1]; if(!empty($eepromFileId)) { $continueRaw = true; $this->eepromLookup($resp, $eepromFileId, 'edgii'); } break; case 'twitter.com': case 'www.twitter.com': case 'm.twitter.com': case 'mobile.twitter.com': case 'nitter.net': case 'www.nitter.net': if(preg_match('#^/@?(?:[A-Za-z0-9_]{1,20})/status(?:es)?/([0-9]+)/?$#', $urlPath, $matches)) { $resp->type = 'twitter:tweet'; $resp->color = '#1da1f2'; $resp->tweet_id = strval($matches[1] ?? '0'); $curl = curl_init("https://api.twitter.com/2/tweets?ids={$resp->tweet_id}&expansions=attachments.media_keys,author_id,entities.mentions.username,referenced_tweets.id,referenced_tweets.id.author_id&media.fields=height,width,media_key,preview_image_url,url,type&tweet.fields=attachments,conversation_id,text,source,possibly_sensitive,created_at&user.fields=id,name,profile_image_url,protected,username,verified"); curl_setopt_array($curl, [ CURLOPT_AUTOREFERER => false, CURLOPT_CERTINFO => false, CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => false, CURLOPT_RETURNTRANSFER => true, CURLOPT_TCP_FASTOPEN => true, CURLOPT_CONNECTTIMEOUT => 2, CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, CURLOPT_TIMEOUT => 5, CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, CURLOPT_HTTPHEADER => [ 'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'), 'Accept: application/json', ], ]); $tweetResp = curl_exec($curl); curl_close($curl); $resp->tweet_info = json_decode($tweetResp); if(isset($resp->tweet_info->includes->users[0]->name)) $resp->title = $resp->tweet_info->includes->users[0]->name; if(isset($resp->tweet_info->includes->users[0]->profile_image_url)) $resp->image = $resp->tweet_info->includes->users[0]->profile_image_url; if(isset($resp->tweet_info->data[0]->text)) $resp->description = $resp->tweet_info->data[0]->text; $resp->site_name = 'Twitter'; break; } if(preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $urlPath, $matches)) { $resp->type = 'twitter:user'; $resp->color = '#1da1f2'; $resp->twitter_user_name = strval($matches[1] ?? ''); $curl = curl_init("https://api.twitter.com/2/users/by?usernames={$resp->twitter_user_name}&user.fields=description,entities,id,name,profile_image_url,protected,url,username,verified"); curl_setopt_array($curl, [ CURLOPT_AUTOREFERER => false, CURLOPT_CERTINFO => false, CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => false, CURLOPT_RETURNTRANSFER => true, CURLOPT_TCP_FASTOPEN => true, CURLOPT_CONNECTTIMEOUT => 2, CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, CURLOPT_TIMEOUT => 5, CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, CURLOPT_HTTPHEADER => [ 'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'), 'Accept: application/json', ], ]); $twitUserResp = curl_exec($curl); curl_close($curl); $resp->twitter_user_info = json_decode($twitUserResp); if(isset($resp->twitter_user_info->data[0]->name)) $resp->title = $resp->twitter_user_info->data[0]->name; if(isset($resp->twitter_user_info->data[0]->profile_image_url)) $resp->image = $resp->twitter_user_info->data[0]->profile_image_url; if(isset($resp->twitter_user_info->data[0]->description)) $resp->description = $resp->twitter_user_info->data[0]->description; $resp->site_name = 'Twitter'; break; } break; case 'youtu.be': case 'www.youtu.be': // www. doesn't work for this, but may as well cover it $youtubeVideoId = substr($urlPath, 1); case 'youtube.com': case 'www.youtube.com': case 'youtube-nocookie.com': case 'www.youtube-nocookie.com': parse_str($parsedUrl->getQuery(), $queryString); if(!isset($youtubeVideoId) && $urlPath === '/watch') $youtubeVideoId = $queryString['v'] ?? null; if(!empty($youtubeVideoId)) { $resp->type = 'youtube:video'; $resp->color = '#f00'; $resp->youtube_video_id = $youtubeVideoId; if(isset($queryString['t'])) $resp->youtube_start_time = $queryString['t']; if(isset($queryString['list'])) $resp->youtube_playlist = $queryString['list']; if(isset($queryString['index'])) $resp->youtube_playlist_index = $queryString['index']; $curl = curl_init("https://www.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id={$resp->youtube_video_id}&key=" . Config::get('Google', 'apiKey')); curl_setopt_array($curl, [ CURLOPT_AUTOREFERER => false, CURLOPT_CERTINFO => false, CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => false, CURLOPT_RETURNTRANSFER => true, CURLOPT_TCP_FASTOPEN => true, CURLOPT_CONNECTTIMEOUT => 2, CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, CURLOPT_TIMEOUT => 5, CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, CURLOPT_HTTPHEADER => [ 'Accept: application/json', ], ]); $youtubeResp = curl_exec($curl); curl_close($curl); $resp->youtube_video_info = json_decode($youtubeResp); if(isset($resp->youtube_video_info->items[0]->snippet->title)) $resp->title = $resp->youtube_video_info->items[0]->snippet->title; if(isset($resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url)) $resp->image = $resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url; if(isset($resp->youtube_video_info->items[0]->snippet->description)) $resp->description = $resp->youtube_video_info->items[0]->snippet->description; $resp->site_name = 'YouTube'; } break; } } else { $resp->error = 'metadata:scheme'; $response->setStatusCode(400); return $resp; } if((empty($resp->type) || isset($continueRaw)) && in_array($parsedUrl->getScheme(), ['http', 'https'])) { $curl = curl_init((string)$parsedUrl); curl_setopt_array($curl, [ CURLOPT_AUTOREFERER => true, CURLOPT_CERTINFO => false, CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => true, CURLOPT_MAXREDIRS => 5, CURLOPT_PATH_AS_IS => true, CURLOPT_NOBODY => true, CURLOPT_HEADER => true, CURLOPT_RETURNTRANSFER => true, CURLOPT_TCP_FASTOPEN => true, CURLOPT_CONNECTTIMEOUT => 2, CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS, CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS, CURLOPT_TIMEOUT => 5, CURLOPT_DEFAULT_PROTOCOL => 'https', CURLOPT_USERAGENT => 'Mozilla/5.0 (compatible) Uiharu/' . UIH_VERSION, CURLOPT_HTTPHEADER => [ 'Accept: text/html,application/xhtml+xml', ], ]); $headers = curl_exec($curl); if($headers === false) { $resp->error = 'metadata:timeout'; $resp->errorMessage = curl_error($curl); } else { $headersRaw = explode("\r\n", trim($headers)); $statusCode = 200; $headers = []; foreach($headersRaw as $header) { if(empty($header)) continue; if(strpos($header, ':') === false) { $headParts = explode(' ', $header); if(isset($headParts[1]) && is_numeric($headParts[1])) $statusCode = (int)$headParts[1]; $headers = []; continue; } $headerParts = explode(':', $header, 2); $headerParts[0] = mb_strtolower($headerParts[0]); if(isset($headers[$headerParts[0]])) $headers[$headerParts[0]] .= ', ' . trim($headerParts[1] ?? ''); else $headers[$headerParts[0]] = trim($headerParts[1] ?? ''); } try { $contentType = MediaType::parse($headers['content-type'] ?? ''); } catch(InvalidArgumentException $ex) { $contentType = MediaType::parse('application/octet-stream'); } $resp->content_type = MediaTypeExts::toV1($contentType); $isHTML = $contentType->equals('text/html'); $isXHTML = $contentType->equals('application/xhtml+xml'); if($isHTML || $isXHTML) { curl_setopt_array($curl, [ CURLOPT_NOBODY => false, CURLOPT_HEADER => false, ]); $body = curl_exec($curl); curl_close($curl); $document = new DOMDocument; if($isXHTML) { $document->loadXML($body, LIBXML_NOERROR | LIBXML_NONET | LIBXML_NOWARNING); } else { $document->loadHTML($body, LIBXML_NOERROR | LIBXML_NONET | LIBXML_NOWARNING); foreach($document->childNodes as $child) if($child->nodeType === XML_PI_NODE) { $document->removeChild($child); break; } $document->encoding = $contentType->getCharset(); } $charSet = $document->encoding; $resp->type = 'website'; $resp->title = ''; $isMetaTitle = false; $titleTag = $document->getElementsByTagName('title'); foreach($titleTag as $tag) { $resp->title = trim(mb_convert_encoding($tag->textContent, 'utf-8', $charSet)); break; } $metaTags = $document->getElementsByTagName('meta'); foreach($metaTags as $tag) { $nameAttr = $tag->hasAttribute('name') ? $tag->getAttribute('name') : ( $tag->hasAttribute('property') ? $tag->getAttribute('property') : '' ); $valueAttr = $tag->hasAttribute('value') ? $tag->getAttribute('value') : ( $tag->hasAttribute('content') ? $tag->getAttribute('content') : '' ); $nameAttr = trim(mb_convert_encoding($nameAttr, 'utf-8', $charSet)); $valueAttr = trim(mb_convert_encoding($valueAttr, 'utf-8', $charSet)); if(empty($nameAttr) || empty($valueAttr)) continue; switch($nameAttr) { case 'og:title': case 'twitter:title': if(!$isMetaTitle) { $isMetaTitle = true; $resp->title = $valueAttr; } break; case 'description': case 'og:description': case 'twitter:description': if(!isset($resp->description)) $resp->description = $valueAttr; break; case 'og:site_name': $resp->site_name = $valueAttr; break; case 'og:image': case 'twitter:image': $resp->image = $valueAttr; break; case 'theme-color': $resp->color = $valueAttr; break; case 'og:type': $resp->type = $valueAttr; break; } } } else { $resp->is_image = $isImage = $contentType->matchCategory('image'); $resp->is_audio = $isAudio = $contentType->matchCategory('audio'); $resp->is_video = $isVideo = $contentType->matchCategory('video'); if($isImage || $isAudio || $isVideo) { curl_close($curl); $resp->media = new stdClass; $ffmpeg = json_decode(shell_exec(sprintf('ffprobe -show_streams -show_format -print_format json -v quiet -i %s', escapeshellarg((string)$parsedUrl)))); if(!empty($ffmpeg)) { if(!empty($ffmpeg->format)) { $resp->media->confidence = empty($ffmpeg->format->probe_score) ? 0 : (intval($ffmpeg->format->probe_score) / 100); if(!empty($ffmpeg->format->duration)) $resp->media->duration = floatval($ffmpeg->format->duration); if(!empty($ffmpeg->format->size)) $resp->media->size = intval($ffmpeg->format->size); if(!empty($ffmpeg->format->bit_rate)) $resp->media->bitrate = intval($ffmpeg->format->bit_rate); if($isVideo || $isImage) { if(!empty($ffmpeg->streams)) { foreach($ffmpeg->streams as $stream) { if(($stream->codec_type ?? null) !== 'video') continue; $resp->width = intval($stream->coded_width ?? $stream->width ?? -1); $resp->height = intval($stream->coded_height ?? $stream->height ?? -1); if(!empty($stream->display_aspect_ratio)) $resp->media->aspect_ratio = $stream->display_aspect_ratio; if($isImage) break; } } } if($isAudio) { function eat_tags(stdClass $dest, stdClass $source): void { if(!empty($source->title) || !empty($source->TITLE)) $dest->title = $source->title ?? $source->TITLE; if(!empty($source->artist) || !empty($source->ARTIST)) $dest->artist = $source->artist ?? $source->ARTIST; if(!empty($source->album) || !empty($source->ALBUM)) $dest->album = $source->album ?? $source->ALBUM; if(!empty($source->date) || !empty($source->DATE)) $dest->date = $source->date ?? $source->DATE; if(!empty($source->comment) || !empty($source->COMMENT)) $dest->comment = $source->comment ?? $source->COMMENT; if(!empty($source->genre) || !empty($source->GENRE)) $dest->genre = $source->genre ?? $source->GENRE; } if(!empty($ffmpeg->format->tags)) { $resp->media->tags = new stdClass; eat_tags($resp->media->tags, $ffmpeg->format->tags); } elseif(!empty($ffmpeg->streams)) { // iterate over streams, fuck ogg $resp->media->tags = new stdClass; foreach($ffmpeg->streams as $stream) { if(($stream->codec_type ?? null) === 'audio' && !empty($stream->tags)) { eat_tags($resp->media->tags, $stream->tags); if(!empty($resp->media->tags)) break; } } } if(empty($resp->title)) { $audioTitle = ''; if(!empty($resp->media->tags->artist)) $audioTitle .= $resp->media->tags->artist . ' - '; if(!empty($resp->media->tags->title)) $audioTitle .= $resp->media->tags->title; if(!empty($resp->media->tags->date)) $audioTitle .= ' (' . $resp->media->tags->date . ')'; if(!empty($audioTitle)) $resp->title = $audioTitle; } if(empty($resp->description) && !empty($resp->media->tags->comment)) $resp->description = $resp->media->tags->comment; } } } if($includeRawResult) $resp->ffmpeg = $ffmpeg; } else curl_close($curl); } } } $sw->stop(); $resp->took = $sw->getElapsedTime() / 1000; $respJson = json_encode($resp); $replaceCache = $this->db->prepare('REPLACE INTO `uih_metadata_cache` (`metadata_url`, `metadata_resp`) VALUES (UNHEX(?), ?)'); $replaceCache->addParameter(1, $urlHash); $replaceCache->addParameter(2, $respJson); $replaceCache->execute(); } if(!empty($respJson)) $response->setContent($respJson); else return $resp; } }