(string)$mediaType, 'type' => $mediaType->getCategory(), 'subtype' => $mediaType->getKind(), ]; if(!empty($suffix = $mediaType->getSuffix())) $parts['suffix'] = $suffix; if(!empty($params = $mediaType->getParams())) $parts['params'] = $params; return $parts; } function uih_parse_url(string $url): array|false { $parts = parse_url($url); if($parts === false) return false; // v1 compat $parts['uri'] = uih_build_url($parts); if(isset($parts['pass'])) $parts['password'] = $parts['pass']; return $parts; } function uih_build_url(array $parts): string { $string = ''; if(!empty($parts['scheme'])) $string .= $parts['scheme'] . ':'; $authority = ''; if(isset($parts['user']) || isset($parts['pass'])) { if(isset($parts['user'])) $authority .= $parts['user']; if(isset($parts['pass'])) $authority .= ':' . $parts['pass']; $authority .= '@'; } if(isset($parts['host'])) { $authority .= $parts['host']; if(isset($parts['port'])) $authority .= ':' . $parts['port']; } $hasAuthority = !empty($authority); if($hasAuthority) $string .= '//' . $authority; $path = $parts['path'] ?? ''; $hasPath = !empty($path); if($hasAuthority && (!$hasPath || $path[0] !== '/')) $string .= '/'; elseif(!$hasAuthority && $path[1] === '/') $path = '/' . trim($path, '/'); $string .= $path; if(!empty($parts['query'])) { $string .= '?'; $queryParts = explode('&', $parts['query']); foreach($queryParts as $queryPart) { $kvp = explode('=', $queryPart, 2); $string .= rawurlencode($kvp[0]); if(isset($kvp[1])) $string .= '=' . rawurlencode($kvp[1]); $string .= '&'; } $string = substr($string, 0, -1); } if(!empty($parts['fragment'])) $string .= '#' . rawurlencode($parts['fragment']); return $string; } function uih_eeprom_lookup(stdClass $resp, string $eepromFileId, string $domain = 'flashii'): void { $resp->type = 'eeprom:file'; $resp->color = '#8559a5'; $resp->eeprom_file_id = $eepromFileId; $curl = curl_init("https://eeprom.{$domain}.net/uploads/{$resp->eeprom_file_id}.json"); curl_setopt_array($curl, [ CURLOPT_AUTOREFERER => false, CURLOPT_CERTINFO => false, CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => false, CURLOPT_RETURNTRANSFER => true, CURLOPT_TCP_FASTOPEN => true, CURLOPT_CONNECTTIMEOUT => 2, CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, CURLOPT_TIMEOUT => 5, CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, CURLOPT_HTTPHEADER => [ 'Accept: application/json', ], ]); $eepromResp = curl_exec($curl); curl_close($curl); $resp->eeprom_file_info = json_decode($eepromResp); if(isset($resp->eeprom_file_info->name)) $resp->title = $resp->eeprom_file_info->name; if(isset($resp->eeprom_file_info->thumb)) $resp->image = $resp->eeprom_file_info->thumb; $resp->site_name = 'Flashii EEPROM'; } if(!is_dir(UIH_SEM_PATH)) mkdir(UIH_SEM_PATH, 0777, true); header('X-Powered-By: Uiharu'); $db->execute('DELETE FROM `uih_metadata_cache` WHERE `metadata_created` < NOW() - INTERVAL 7 DAY'); $reqMethod = filter_input(INPUT_SERVER, 'REQUEST_METHOD'); $reqPath = '/' . trim(parse_url(filter_input(INPUT_SERVER, 'REQUEST_URI'), PHP_URL_PATH), '/'); $reqHead = false; if($reqMethod == 'HEAD') { $reqMethod = 'GET'; $reqHead = true; } if(!empty($_SERVER['HTTP_ORIGIN'])) { $originLast12 = substr($_SERVER['HTTP_ORIGIN'], -12, 12); $originLast10 = substr($_SERVER['HTTP_ORIGIN'], -10, 10); if($originLast12 !== '/flashii.net' && $originLast12 !== '.flashii.net' && $originLast10 !== '/edgii.net' && $originLast10 !== '.edgii.net' && $_SERVER['HTTP_ORIGIN'] !== 'https://flashii.net' && $_SERVER['HTTP_ORIGIN'] !== 'http://flashii.net' && $_SERVER['HTTP_ORIGIN'] !== 'https://edgii.net' && $_SERVER['HTTP_ORIGIN'] !== 'http://edgii.net') { http_response_code(403); return; } header('Access-Control-Allow-Origin: ' . $_SERVER['HTTP_ORIGIN']); header('Vary: Origin'); } if($reqMethod === 'OPTIONS') { http_response_code(204); //header('Access-Control-Allow-Credentials: true'); //header('Access-Control-Allow-Headers: Authorization'); header('Access-Control-Allow-Methods: OPTIONS, GET, POST'); return; } if($reqPath === '/metadata') { // Allow using POST for ridiculous urls. if($reqMethod !== 'GET' && $reqMethod !== 'POST') { http_response_code(405); return; } header('Content-Type: application/json; charset=utf-8'); if($reqHead) return; $sw = Stopwatch::startNew(); $resp = new stdClass; if($_SERVER['HTTP_HOST'] === 'mii.flashii.net') { $resp->type = 'object'; $resp->content_type = []; $resp->content_type['string'] = 'application/x-update-your-script-to-use-uiharu.flashii.net-instead-of-mii.flashii.net'; $resp->content_type['type'] = 'text'; $resp->content_type['subtype'] = 'deprecation'; $resp->title = 'Update your URLs: mii.flashii.net -> uiharu.flashii.net'; $resp->description = 'Update your URLs: mii.flashii.net -> uiharu.flashii.net'; $resp->site_name = 'Deprecation notice'; $resp->took = 35.1; echo json_encode($resp); return; } if($reqMethod === 'POST') { $targetUrl = substr((string)file_get_contents('php://input'), 0, 1000); } else { $targetUrl = (string)filter_input(INPUT_GET, 'url'); } $parsedUrl = uih_parse_url($targetUrl); if($parsedUrl === false) { http_response_code(400); $resp->error = 'metadata:uri'; echo json_encode($resp); return; } $resp->uri = $parsedUrl; // if no scheme is specified, try https if(empty($parsedUrl['scheme'])) { $parsedUrl['scheme'] = 'https'; $parsedUrl = uih_parse_url(uih_build_url($parsedUrl)); } $urlHash = hash('sha256', uih_build_url($parsedUrl)); try { $semPath = UIH_SEM_PATH . DIRECTORY_SEPARATOR . $urlHash; if(!is_file($semPath)) touch($semPath); $ftok = ftok($semPath, UIH_SEM_NAME); $semaphore = sem_get($ftok, 1); while(!sem_acquire($semaphore)) usleep(100); if(UIH_CACHE) { $cacheFetch = $db->prepare('SELECT `metadata_resp` FROM `uih_metadata_cache` WHERE `metadata_url` = UNHEX(?) AND `metadata_created` > NOW() - INTERVAL 10 MINUTE'); $cacheFetch->addParameter(1, $urlHash); $cacheFetch->execute(); $cacheResult = $cacheFetch->getResult(); if($cacheResult->next()) { $cacheResp = json_decode($cacheResult->getString(0)); if($cacheResp !== null) $resp = $cacheResp; } } if(empty($resp->type)) { $urlScheme = strtolower($parsedUrl['scheme']); $urlHost = strtolower($parsedUrl['host']); $urlPath = '/' . trim($parsedUrl['path'], '/'); if($urlScheme === 'eeprom') { if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl['path'], $matches)) { $resp->uri = $parsedUrl = uih_parse_url('https://i.fii.moe/' . $matches[1]); $continueRaw = true; uih_eeprom_lookup($resp, $matches[1]); } } elseif($urlScheme === 'devrom') { if(preg_match('#^([A-Za-z0-9-_]+)$#', $parsedUrl['path'], $matches)) { $resp->uri = $parsedUrl = uih_parse_url('https://i.edgii.net/' . $matches[1]); $continueRaw = true; uih_eeprom_lookup($resp, $matches[1], 'edgii'); } } elseif($urlScheme === 'http' || $urlScheme === 'https') { switch($urlHost) { case 'i.flashii.net': case 'i.fii.moe': $eepromFileId = substr($urlPath, 1); case 'eeprom.flashii.net': if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches)) $eepromFileId = $matches[1]; if(!empty($eepromFileId)) { $continueRaw = true; uih_eeprom_lookup($resp, $eepromFileId); } break; case 'i.edgii.net': $eepromFileId = substr($urlPath, 1); case 'eeprom.edgii.net': if(!isset($eepromFileId) && preg_match('#^/uploads/([A-Za-z0-9-_]+)/?$#', $urlPath, $matches)) $eepromFileId = $matches[1]; if(!empty($eepromFileId)) { $continueRaw = true; uih_eeprom_lookup($resp, $eepromFileId, 'edgii'); } break; case 'twitter.com': case 'www.twitter.com': case 'm.twitter.com': case 'mobile.twitter.com': case 'nitter.net': case 'www.nitter.net': if(preg_match('#^/@?(?:[A-Za-z0-9_]{1,20})/status(?:es)?/([0-9]+)/?$#', $urlPath, $matches)) { $resp->type = 'twitter:tweet'; $resp->color = '#1da1f2'; $resp->tweet_id = strval($matches[1] ?? '0'); $curl = curl_init("https://api.twitter.com/2/tweets?ids={$resp->tweet_id}&expansions=attachments.media_keys,author_id,entities.mentions.username,referenced_tweets.id,referenced_tweets.id.author_id&media.fields=height,width,media_key,preview_image_url,url,type&tweet.fields=attachments,conversation_id,text,source,possibly_sensitive,created_at&user.fields=id,name,profile_image_url,protected,username,verified"); curl_setopt_array($curl, [ CURLOPT_AUTOREFERER => false, CURLOPT_CERTINFO => false, CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => false, CURLOPT_RETURNTRANSFER => true, CURLOPT_TCP_FASTOPEN => true, CURLOPT_CONNECTTIMEOUT => 2, CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, CURLOPT_TIMEOUT => 5, CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, CURLOPT_HTTPHEADER => [ 'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'), 'Accept: application/json', ], ]); $tweetResp = curl_exec($curl); curl_close($curl); $resp->tweet_info = json_decode($tweetResp); if(isset($resp->tweet_info->includes->users[0]->name)) $resp->title = $resp->tweet_info->includes->users[0]->name; if(isset($resp->tweet_info->includes->users[0]->profile_image_url)) $resp->image = $resp->tweet_info->includes->users[0]->profile_image_url; if(isset($resp->tweet_info->data[0]->text)) $resp->description = $resp->tweet_info->data[0]->text; $resp->site_name = 'Twitter'; break; } if(preg_match('#^/@?([A-Za-z0-9_]{1,20})/?$#', $urlPath, $matches)) { $resp->type = 'twitter:user'; $resp->color = '#1da1f2'; $resp->twitter_user_name = strval($matches[1] ?? ''); $curl = curl_init("https://api.twitter.com/2/users/by?usernames={$resp->twitter_user_name}&user.fields=description,entities,id,name,profile_image_url,protected,url,username,verified"); curl_setopt_array($curl, [ CURLOPT_AUTOREFERER => false, CURLOPT_CERTINFO => false, CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => false, CURLOPT_RETURNTRANSFER => true, CURLOPT_TCP_FASTOPEN => true, CURLOPT_CONNECTTIMEOUT => 2, CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, CURLOPT_TIMEOUT => 5, CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, CURLOPT_HTTPHEADER => [ 'Authorization: Bearer ' . Config::get('Twitter', 'apiToken'), 'Accept: application/json', ], ]); $twitUserResp = curl_exec($curl); curl_close($curl); $resp->twitter_user_info = json_decode($twitUserResp); if(isset($resp->twitter_user_info->data[0]->name)) $resp->title = $resp->twitter_user_info->data[0]->name; if(isset($resp->twitter_user_info->data[0]->profile_image_url)) $resp->image = $resp->twitter_user_info->data[0]->profile_image_url; if(isset($resp->twitter_user_info->data[0]->description)) $resp->description = $resp->twitter_user_info->data[0]->description; $resp->site_name = 'Twitter'; break; } break; case 'youtu.be': case 'www.youtu.be': // www. doesn't work for this, but may as well cover it $youtubeVideoId = substr($urlPath, 1); case 'youtube.com': case 'www.youtube.com': case 'youtube-nocookie.com': case 'www.youtube-nocookie.com': parse_str($parsedUrl['query'], $queryString); if(!isset($youtubeVideoId) && $urlPath === '/watch') $youtubeVideoId = $queryString['v'] ?? null; if(!empty($youtubeVideoId)) { $resp->type = 'youtube:video'; $resp->color = '#f00'; $resp->youtube_video_id = $youtubeVideoId; if(isset($queryString['t'])) $resp->youtube_start_time = $queryString['t']; if(isset($queryString['list'])) $resp->youtube_playlist = $queryString['list']; if(isset($queryString['index'])) $resp->youtube_playlist_index = $queryString['index']; $curl = curl_init("https://www.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id={$resp->youtube_video_id}&key=" . Config::get('Google', 'apiKey')); curl_setopt_array($curl, [ CURLOPT_AUTOREFERER => false, CURLOPT_CERTINFO => false, CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => false, CURLOPT_RETURNTRANSFER => true, CURLOPT_TCP_FASTOPEN => true, CURLOPT_CONNECTTIMEOUT => 2, CURLOPT_PROTOCOLS => CURLPROTO_HTTPS, CURLOPT_TIMEOUT => 5, CURLOPT_USERAGENT => 'Uiharu/' . UIH_VERSION, CURLOPT_HTTPHEADER => [ 'Accept: application/json', ], ]); $youtubeResp = curl_exec($curl); curl_close($curl); $resp->youtube_video_info = json_decode($youtubeResp); if(isset($resp->youtube_video_info->items[0]->snippet->title)) $resp->title = $resp->youtube_video_info->items[0]->snippet->title; if(isset($resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url)) $resp->image = $resp->youtube_video_info->items[0]->snippet->thumbnails->medium->url; if(isset($resp->youtube_video_info->items[0]->snippet->description)) $resp->description = $resp->youtube_video_info->items[0]->snippet->description; $resp->site_name = 'YouTube'; } break; } } else { http_response_code(404); $resp->error = 'metadata:scheme'; } if((empty($resp->type) || isset($continueRaw)) && in_array($parsedUrl['scheme'], ['http', 'https'])) { $curl = curl_init(uih_build_url($parsedUrl)); curl_setopt_array($curl, [ CURLOPT_AUTOREFERER => true, CURLOPT_CERTINFO => false, CURLOPT_FAILONERROR => false, CURLOPT_FOLLOWLOCATION => true, CURLOPT_MAXREDIRS => 5, CURLOPT_PATH_AS_IS => true, CURLOPT_NOBODY => true, CURLOPT_HEADER => true, CURLOPT_RETURNTRANSFER => true, CURLOPT_TCP_FASTOPEN => true, CURLOPT_CONNECTTIMEOUT => 2, CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS, CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS, CURLOPT_TIMEOUT => 5, CURLOPT_DEFAULT_PROTOCOL => 'https', CURLOPT_USERAGENT => 'Mozilla/5.0 (compatible) Uiharu/' . UIH_VERSION, CURLOPT_HTTPHEADER => [ 'Accept: text/html,application/xhtml+xml', ], ]); $headers = curl_exec($curl); if($headers === false) { $resp->error = 'metadata:timeout'; $resp->errorMessage = curl_error($curl); } else { $headersRaw = explode("\r\n", trim($headers)); $statusCode = 200; $headers = []; foreach($headersRaw as $header) { if(empty($header)) continue; if(strpos($header, ':') === false) { $headParts = explode(' ', $header); if(isset($headParts[1]) && is_numeric($headParts[1])) $statusCode = (int)$headParts[1]; $headers = []; continue; } $headerParts = explode(':', $header, 2); $headerParts[0] = mb_strtolower($headerParts[0]); if(isset($headers[$headerParts[0]])) $headers[$headerParts[0]] .= ', ' . trim($headerParts[1] ?? ''); else $headers[$headerParts[0]] = trim($headerParts[1] ?? ''); } try { $contentType = MediaType::parse($headers['content-type'] ?? ''); } catch(InvalidArgumentException $ex) { $contentType = MediaType::parse('application/octet-stream'); } $resp->content_type = uih_media_type_json($contentType); $isHTML = $contentType->equals('text/html'); $isXHTML = $contentType->equals('application/xhtml+xml'); if($isHTML || $isXHTML) { curl_setopt_array($curl, [ CURLOPT_NOBODY => false, CURLOPT_HEADER => false, ]); $body = curl_exec($curl); curl_close($curl); $document = new DOMDocument; if($isXHTML) { $document->loadXML($body); } else { @$document->loadHTML('getCharset() . '">' . $body); foreach($document->childNodes as $child) if($child->nodeType === XML_PI_NODE) { $document->removeChild($child); break; } $document->encoding = $contentType->getCharset(); } $charSet = $document->encoding; $resp->type = 'website'; $resp->title = ''; $isMetaTitle = false; $titleTag = $document->getElementsByTagName('title'); foreach($titleTag as $tag) { $resp->title = trim(mb_convert_encoding($tag->textContent, 'utf-8', $charSet)); break; } $metaTags = $document->getElementsByTagName('meta'); foreach($metaTags as $tag) { $nameAttr = $tag->hasAttribute('name') ? $tag->getAttribute('name') : ( $tag->hasAttribute('property') ? $tag->getAttribute('property') : '' ); $valueAttr = $tag->hasAttribute('value') ? $tag->getAttribute('value') : ( $tag->hasAttribute('content') ? $tag->getAttribute('content') : '' ); $nameAttr = trim(mb_convert_encoding($nameAttr, 'utf-8', $charSet)); $valueAttr = trim(mb_convert_encoding($valueAttr, 'utf-8', $charSet)); if(empty($nameAttr) || empty($valueAttr)) continue; switch($nameAttr) { case 'og:title': case 'twitter:title': if(!$isMetaTitle) { $isMetaTitle = true; $resp->title = $valueAttr; } break; case 'description': case 'og:description': case 'twitter:description': if(!isset($resp->description)) $resp->description = $valueAttr; break; case 'og:site_name': $resp->site_name = $valueAttr; break; case 'og:image': case 'twitter:image': $resp->image = $valueAttr; break; case 'theme-color': $resp->color = $valueAttr; break; case 'og:type': $resp->type = $valueAttr; break; } } } else { $resp->is_image = $isImage = $contentType->matchCategory('image'); $resp->is_audio = $isAudio = $contentType->matchCategory('audio'); $resp->is_video = $isVideo = $contentType->matchCategory('video'); if($isImage || $isAudio || $isVideo) { curl_close($curl); $resp->media = new stdClass; $ffmpeg = json_decode(shell_exec(sprintf('ffprobe -show_streams -show_format -print_format json -v quiet -i %s', escapeshellarg(uih_build_url($parsedUrl))))); if(!empty($ffmpeg)) { if(!empty($ffmpeg->format)) { $resp->media->confidence = empty($ffmpeg->format->probe_score) ? 0 : (intval($ffmpeg->format->probe_score) / 100); if(!empty($ffmpeg->format->duration)) $resp->media->duration = floatval($ffmpeg->format->duration); if(!empty($ffmpeg->format->size)) $resp->media->size = intval($ffmpeg->format->size); if(!empty($ffmpeg->format->bit_rate)) $resp->media->bitrate = intval($ffmpeg->format->bit_rate); if($isVideo || $isImage) { if(!empty($ffmpeg->streams)) { foreach($ffmpeg->streams as $stream) { if(($stream->codec_type ?? null) !== 'video') continue; $resp->width = intval($stream->coded_width ?? $stream->width ?? -1); $resp->height = intval($stream->coded_height ?? $stream->height ?? -1); if(!empty($stream->display_aspect_ratio)) $resp->media->aspect_ratio = $stream->display_aspect_ratio; if($isImage) break; } } } if($isAudio) { function eat_tags(stdClass $dest, stdClass $source): void { if(!empty($source->title) || !empty($source->TITLE)) $dest->title = $source->title ?? $source->TITLE; if(!empty($source->artist) || !empty($source->ARTIST)) $dest->artist = $source->artist ?? $source->ARTIST; if(!empty($source->album) || !empty($source->ALBUM)) $dest->album = $source->album ?? $source->ALBUM; if(!empty($source->date) || !empty($source->DATE)) $dest->date = $source->date ?? $source->DATE; if(!empty($source->comment) || !empty($source->COMMENT)) $dest->comment = $source->comment ?? $source->COMMENT; if(!empty($source->genre) || !empty($source->GENRE)) $dest->genre = $source->genre ?? $source->GENRE; } if(!empty($ffmpeg->format->tags)) { $resp->media->tags = new stdClass; eat_tags($resp->media->tags, $ffmpeg->format->tags); } elseif(!empty($ffmpeg->streams)) { // iterate over streams, fuck ogg $resp->media->tags = new stdClass; foreach($ffmpeg->streams as $stream) { if(($stream->codec_type ?? null) === 'audio' && !empty($stream->tags)) { eat_tags($resp->media->tags, $stream->tags); if(!empty($resp->media->tags)) break; } } } if(empty($resp->title)) { $audioTitle = ''; if(!empty($resp->media->tags->artist)) $audioTitle .= $resp->media->tags->artist . ' - '; if(!empty($resp->media->tags->title)) $audioTitle .= $resp->media->tags->title; if(!empty($resp->media->tags->date)) $audioTitle .= ' (' . $resp->media->tags->date . ')'; if(!empty($audioTitle)) $resp->title = $audioTitle; } if(empty($resp->description) && !empty($resp->media->tags->comment)) $resp->description = $resp->media->tags->comment; } } } if(UIH_INCLUDE_RAW) $resp->ffmpeg = $ffmpeg; } else curl_close($curl); } } } $sw->stop(); $resp->took = $sw->getElapsedTime() / 1000; $respJson = json_encode($resp); $replaceCache = $db->prepare('REPLACE INTO `uih_metadata_cache` (`metadata_url`, `metadata_resp`) VALUES (UNHEX(?), ?)'); $replaceCache->addParameter(1, $urlHash); $replaceCache->addParameter(2, $respJson); $replaceCache->execute(); } } finally { if(!empty($semaphore)) sem_release($semaphore); if(is_file($semPath)) unlink($semPath); } echo $respJson ?? json_encode($resp); return; } if($reqPath === '/') { if($reqMethod !== 'GET') { http_response_code(405); return; } header('Content-Type: text/plain'); if($reqHead) return; echo 'Metadata lookup service - OK'; return; } http_response_code(404);