uiharu/src/Apis/v1_0.php

463 lines
22 KiB
PHP

<?php
namespace Uiharu\APIs;
use stdClass;
use DOMDocument;
use Exception;
use InvalidArgumentException;
use Uiharu\Colour;
use Uiharu\Config;
use Uiharu\FFMPEG;
use Uiharu\IHasMediaInfo;
use Uiharu\MediaTypeExts;
use Uiharu\UihContext;
use Uiharu\Url;
use Uiharu\Lookup\EEPROMLookupResult;
use Uiharu\Lookup\TwitterLookupResult;
use Uiharu\Lookup\TwitterLookupTweetResult;
use Uiharu\Lookup\TwitterLookupUserResult;
use Uiharu\Lookup\YouTubeLookupResult;
use Index\MediaType;
use Index\Data\IDbConnection;
use Index\Http\HttpFx;
use Index\Performance\Stopwatch;
final class v1_0 implements \Uiharu\IApi {
private UihContext $ctx;
private IDbConnection $db;
public function __construct(UihContext $ctx) {
$this->ctx = $ctx;
$this->db = $ctx->getDatabase();
}
public function match(string $url): string {
return !str_starts_with($url, '/v');
}
public function register(HttpFx $router): void {
$router->get('/metadata', [$this, 'handleGET']);
$router->post('/metadata', [$this, 'handlePOST']);
}
public function handleGET($response, $request) {
if($request->getMethod() === 'HEAD') {
$response->setTypeJson();
return;
}
return $this->handler(
$response, $request,
(string)$request->getParam('url')
);
}
public function handlePOST($response, $request) {
if(!$request->isStreamContent())
return 400;
return $this->handler(
$response, $request,
$request->getContent()->getStream()->read(1000)
);
}
private function handler($response, $request, string $targetUrl) {
$sw = Stopwatch::startNew();
$resp = new stdClass;
$response->setTypeJson();
if(empty($targetUrl)) {
$response->setStatusCode(400);
return $resp;
}
try {
$parsedUrl = Url::parse($targetUrl);
} catch(InvalidArgumentException $ex) {
$response->setStatusCode(400);
$resp->error = 'metadata:uri';
return $resp;
}
// if no scheme is specified, try https
if(!$parsedUrl->hasScheme())
$parsedUrl->setScheme('https');
$resp->uri = $parsedUrl->toV1();
$urlHash = $parsedUrl->calculateHash(false);
$enableCache = !UIH_DEBUG || $request->hasParam('_cache');
$includeRawResult = UIH_DEBUG || $request->hasParam('include_raw');
if($enableCache) {
$cacheFetch = $this->db->prepare('SELECT `metadata_resp` FROM `uih_metadata_cache` WHERE `metadata_url` = UNHEX(?) AND `metadata_created` > NOW() - INTERVAL 10 MINUTE');
$cacheFetch->addParameter(1, $urlHash);
$cacheFetch->execute();
$cacheResult = $cacheFetch->getResult();
if($cacheResult->next()) {
$cacheResp = json_decode($cacheResult->getString(0));
if($cacheResp !== null)
$resp = $cacheResp;
}
}
if(empty($resp->type)) {
$lookup = $this->ctx->matchLookup($parsedUrl);
if($lookup !== null) {
try {
$result = $lookup->lookup($parsedUrl);
$resp->uri = $result->getUrl()->toV1();
$resp->type = $result->getObjectType();
if($result->hasMediaType())
$resp->content_type = MediaTypeExts::toV1($result->getMediaType());
if($result->hasColour())
$resp->color = Colour::toHexString($result->getColour());
if($result->hasTitle())
$resp->title = $result->getTitle();
if($result->hasSiteName())
$resp->site_name = $result->getSiteName();
if($result->hasDescription())
$resp->description = $result->getDescription();
if($result->hasPreviewImage())
$resp->image = $result->getPreviewImage();
if($result instanceof TwitterLookupResult) {
if($result instanceof TwitterLookupTweetResult)
$resp->tweet_id = $result->getTwitterTweetId();
if($result instanceof TwitterLookupUserResult)
$resp->twitter_user_name = $result->getTwitterUserName();
if(UIH_DEBUG)
$resp->dbg_twitter_info = $result->getTwitterResult();
}
if($result instanceof YouTubeLookupResult) {
$resp->youtube_video_id = $result->getYouTubeVideoId();
if($result->hasYouTubeVideoStartTime())
$resp->youtube_start_time = $result->getYouTubeVideoStartTime();
if($result->hasYouTubePlayListId())
$resp->youtube_playlist = $result->getYouTubePlayListId();
if($result->hasYouTubePlayListIndex())
$resp->youtube_playlist_index = $result->getYouTubePlayListIndex();
if(UIH_DEBUG) {
$resp->dbg_youtube_info = $result->getYouTubeVideoInfo();
$resp->dbg_youtube_query = $result->getYouTubeUrlQuery();
}
}
if($result instanceof IHasMediaInfo) {
if($result->isMedia()) {
$resp->is_image = $result->isImage();
$resp->is_audio = $result->isAudio();
$resp->is_video = $result->isVideo();
if($result->hasDimensions()) {
$resp->width = $result->getWidth();
$resp->height = $result->getHeight();
}
$resp->media = new stdClass;
$resp->media->confidence = $result->getConfidence();
if($result->hasAspectRatio())
$resp->media->aspect_ratio = $result->getAspectRatio();
if($result->hasDuration())
$resp->media->duration = $result->getDuration();
if($result->hasSize())
$resp->media->size = $result->getSize();
if($result->hasBitRate())
$resp->media->bitrate = $result->getBitRate();
}
if($result instanceof EEPROMLookupResult) {
$resp->eeprom_file_id = $result->getEEPROMId();
$resp->eeprom_file_info = $result->getEEPROMInfo();
}
if(UIH_DEBUG && $result->hasMediaInfo())
$resp->dbg_media_info = $result->getMediaInfo();
}
} catch(Exception $ex) {
$resp->error = 'metadata:lookup';
if(UIH_DEBUG) {
$resp->dbg_msg = $ex->getMessage();
$resp->dbg_ex = (string)$ex;
}
$response->setStatusCode(500);
return $resp;
}
} else {
$urlScheme = strtolower($parsedUrl->getScheme());
$urlHost = strtolower($parsedUrl->getHost());
$urlPath = '/' . trim($parsedUrl->getPath(), '/');
if($urlScheme !== 'http' && $urlScheme !== 'https') {
$resp->error = 'metadata:scheme';
$response->setStatusCode(400);
return $resp;
}
if((empty($resp->type) || isset($continueRaw)) && in_array($parsedUrl->getScheme(), ['http', 'https'])) {
$curl = curl_init((string)$parsedUrl);
curl_setopt_array($curl, [
CURLOPT_AUTOREFERER => true,
CURLOPT_CERTINFO => false,
CURLOPT_FAILONERROR => false,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 5,
CURLOPT_PATH_AS_IS => true,
CURLOPT_NOBODY => true,
CURLOPT_HEADER => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TCP_FASTOPEN => true,
CURLOPT_CONNECTTIMEOUT => 2,
CURLOPT_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTP | CURLPROTO_HTTPS,
CURLOPT_TIMEOUT => 5,
CURLOPT_DEFAULT_PROTOCOL => 'https',
CURLOPT_USERAGENT => 'Mozilla/5.0 (compatible) Uiharu/' . UIH_VERSION,
CURLOPT_HTTPHEADER => [
'Accept: text/html,application/xhtml+xml',
],
]);
$headers = curl_exec($curl);
if($headers === false) {
$resp->error = 'metadata:timeout';
$resp->errorMessage = curl_error($curl);
} else {
$headersRaw = explode("\r\n", trim($headers));
$statusCode = 200;
$headers = [];
foreach($headersRaw as $header) {
if(empty($header))
continue;
if(strpos($header, ':') === false) {
$headParts = explode(' ', $header);
if(isset($headParts[1]) && is_numeric($headParts[1]))
$statusCode = (int)$headParts[1];
$headers = [];
continue;
}
$headerParts = explode(':', $header, 2);
$headerParts[0] = mb_strtolower($headerParts[0]);
if(isset($headers[$headerParts[0]]))
$headers[$headerParts[0]] .= ', ' . trim($headerParts[1] ?? '');
else
$headers[$headerParts[0]] = trim($headerParts[1] ?? '');
}
try {
$contentType = MediaType::parse($headers['content-type'] ?? '');
} catch(InvalidArgumentException $ex) {
$contentType = MediaType::parse('application/octet-stream');
}
$resp->content_type = MediaTypeExts::toV1($contentType);
$isHTML = $contentType->equals('text/html');
$isXHTML = $contentType->equals('application/xhtml+xml');
if($isHTML || $isXHTML) {
curl_setopt_array($curl, [
CURLOPT_NOBODY => false,
CURLOPT_HEADER => false,
]);
$body = curl_exec($curl);
curl_close($curl);
$document = new DOMDocument;
if($isXHTML) {
$document->loadXML($body, LIBXML_NOERROR | LIBXML_NONET | LIBXML_NOWARNING);
} else {
$document->loadHTML($body, LIBXML_NOERROR | LIBXML_NONET | LIBXML_NOWARNING);
foreach($document->childNodes as $child)
if($child->nodeType === XML_PI_NODE) {
$document->removeChild($child);
break;
}
$document->encoding = $contentType->getCharset();
}
$charSet = $document->encoding;
$resp->type = 'website';
$resp->title = '';
$isMetaTitle = false;
$titleTag = $document->getElementsByTagName('title');
foreach($titleTag as $tag) {
$resp->title = trim(mb_convert_encoding($tag->textContent, 'utf-8', $charSet));
break;
}
$metaTags = $document->getElementsByTagName('meta');
foreach($metaTags as $tag) {
$nameAttr = $tag->hasAttribute('name') ? $tag->getAttribute('name') : (
$tag->hasAttribute('property') ? $tag->getAttribute('property') : ''
);
$valueAttr = $tag->hasAttribute('value') ? $tag->getAttribute('value') : (
$tag->hasAttribute('content') ? $tag->getAttribute('content') : ''
);
$nameAttr = trim(mb_convert_encoding($nameAttr, 'utf-8', $charSet));
$valueAttr = trim(mb_convert_encoding($valueAttr, 'utf-8', $charSet));
if(empty($nameAttr) || empty($valueAttr))
continue;
switch($nameAttr) {
case 'og:title':
case 'twitter:title':
if(!$isMetaTitle) {
$isMetaTitle = true;
$resp->title = $valueAttr;
}
break;
case 'description':
case 'og:description':
case 'twitter:description':
if(!isset($resp->description))
$resp->description = $valueAttr;
break;
case 'og:site_name':
$resp->site_name = $valueAttr;
break;
case 'og:image':
case 'twitter:image':
$resp->image = $valueAttr;
break;
case 'theme-color':
$resp->color = $valueAttr;
break;
case 'og:type':
$resp->type = $valueAttr;
break;
}
}
} else {
if(empty($resp->type))
$resp->type = 'media';
$resp->is_image = $isImage = $contentType->matchCategory('image');
$resp->is_audio = $isAudio = $contentType->matchCategory('audio');
$resp->is_video = $isVideo = $contentType->matchCategory('video');
if($isImage || $isAudio || $isVideo) {
curl_close($curl);
$resp->media = new stdClass;
$ffmpeg = FFMPEG::probe($parsedUrl);
if(!empty($ffmpeg)) {
if(!empty($ffmpeg->format)) {
$resp->media->confidence = empty($ffmpeg->format->probe_score) ? 0 : (intval($ffmpeg->format->probe_score) / 100);
if(!empty($ffmpeg->format->duration))
$resp->media->duration = floatval($ffmpeg->format->duration);
if(!empty($ffmpeg->format->size))
$resp->media->size = intval($ffmpeg->format->size);
if(!empty($ffmpeg->format->bit_rate))
$resp->media->bitrate = intval($ffmpeg->format->bit_rate);
if($isVideo || $isImage) {
if(!empty($ffmpeg->streams)) {
foreach($ffmpeg->streams as $stream) {
if(($stream->codec_type ?? null) !== 'video')
continue;
$resp->width = intval($stream->coded_width ?? $stream->width ?? -1);
$resp->height = intval($stream->coded_height ?? $stream->height ?? -1);
if(!empty($stream->display_aspect_ratio))
$resp->media->aspect_ratio = $stream->display_aspect_ratio;
if($isImage)
break;
}
}
}
if($isAudio) {
function eat_tags(stdClass $dest, stdClass $source): void {
if(!empty($source->title) || !empty($source->TITLE))
$dest->title = $source->title ?? $source->TITLE;
if(!empty($source->artist) || !empty($source->ARTIST))
$dest->artist = $source->artist ?? $source->ARTIST;
if(!empty($source->album) || !empty($source->ALBUM))
$dest->album = $source->album ?? $source->ALBUM;
if(!empty($source->date) || !empty($source->DATE))
$dest->date = $source->date ?? $source->DATE;
if(!empty($source->comment) || !empty($source->COMMENT))
$dest->comment = $source->comment ?? $source->COMMENT;
if(!empty($source->genre) || !empty($source->GENRE))
$dest->genre = $source->genre ?? $source->GENRE;
}
if(!empty($ffmpeg->format->tags)) {
$resp->media->tags = new stdClass;
eat_tags($resp->media->tags, $ffmpeg->format->tags);
} elseif(!empty($ffmpeg->streams)) {
// iterate over streams, fuck ogg
$resp->media->tags = new stdClass;
foreach($ffmpeg->streams as $stream) {
if(($stream->codec_type ?? null) === 'audio' && !empty($stream->tags)) {
eat_tags($resp->media->tags, $stream->tags);
if(!empty($resp->media->tags))
break;
}
}
}
if(empty($resp->title)) {
$audioTitle = '';
if(!empty($resp->media->tags->artist))
$audioTitle .= $resp->media->tags->artist . ' - ';
if(!empty($resp->media->tags->title))
$audioTitle .= $resp->media->tags->title;
if(!empty($resp->media->tags->date))
$audioTitle .= ' (' . $resp->media->tags->date . ')';
if(!empty($audioTitle))
$resp->title = $audioTitle;
}
if(empty($resp->description) && !empty($resp->media->tags->comment))
$resp->description = $resp->media->tags->comment;
}
}
}
if($includeRawResult)
$resp->ffmpeg = $ffmpeg;
} else curl_close($curl);
}
}
}
}
$sw->stop();
$resp->took = $sw->getElapsedTime() / 1000;
$respJson = json_encode($resp);
$replaceCache = $this->db->prepare('REPLACE INTO `uih_metadata_cache` (`metadata_url`, `metadata_resp`) VALUES (UNHEX(?), ?)');
$replaceCache->addParameter(1, $urlHash);
$replaceCache->addParameter(2, $respJson);
$replaceCache->execute();
}
if(!empty($respJson))
$response->setContent($respJson);
else
return $resp;
}
}