Replaced upload_dmca field with dedicated blacklist table.

This commit is contained in:
flash 2023-11-11 02:00:01 +00:00
parent 5c2931cb54
commit e30b37f2ae
10 changed files with 166 additions and 53 deletions

View file

@ -21,21 +21,18 @@ try {
$uploadsData = $uploadsCtx->getUploadsData();
// Mark expired as deleted
$expired = $uploadsData->getUploads(expired: true, deleted: false, dmca: false);
$expired = $uploadsData->getUploads(expired: true, deleted: false);
foreach($expired as $uploadInfo)
$uploadsData->deleteUpload($uploadInfo);
// Hard delete soft deleted files
$deleted = $uploadsData->getUploads(deleted: true, dmca: false);
$deleted = $uploadsData->getUploads(deleted: true);
foreach($deleted as $uploadInfo) {
$uploadsCtx->deleteUploadData($uploadInfo);
$uploadsData->nukeUpload($uploadInfo);
}
// Ensure local data of DMCA'd files is gone
$deleted = $uploadsData->getUploads(dmca: true);
foreach($deleted as $uploadInfo)
$uploadsCtx->deleteUploadData($uploadInfo);
// new storage format should store by hashes again, ensure blacklisted data is no longer saved
} finally {
sem_release($semaphore);
}

View file

@ -0,0 +1,18 @@
<?php
use Index\Data\IDbConnection;
use Index\Data\Migration\IDbMigration;
final class NewBlacklistSystem_20231111_015548 implements IDbMigration {
public function migrate(IDbConnection $conn): void {
$conn->execute('
CREATE TABLE prm_blacklist (
bl_hash BINARY(32) NOT NULL,
bl_reason ENUM("copyright", "rules", "other") NOT NULL COLLATE "ascii_general_ci",
bl_created TIMESTAMP NOT NULL DEFAULT current_timestamp(),
PRIMARY KEY (bl_hash)
) ENGINE=InnoDB COLLATE="utf8mb4_bin"
');
$conn->execute('ALTER TABLE prm_uploads DROP COLUMN upload_dmca');
}
}

View file

@ -0,0 +1,32 @@
<?php
namespace EEPROM\Blacklist;
use Index\Data\IDbConnection;
use EEPROM\Uploads\UploadInfo;
class BlacklistContext {
private BlacklistData $blacklistData;
public function __construct(IDbConnection $dbConn) {
$this->blacklistData = new BlacklistData($dbConn);
}
public function getBlacklistData(): BlacklistData {
return $this->blacklistData;
}
public function createBlacklistEntry(UploadInfo|string $item, string $reason): void {
if($item instanceof UploadInfo)
$item = hex2bin($item->getHashString());
$this->blacklistData->createBlacklistEntry($item, $reason);
}
public function getBlacklistEntry(UploadInfo|string $item): ?BlacklistInfo {
// will this ever be useful? who knows!
if($item instanceof UploadInfo)
$item = hex2bin($item->getHashString());
return $this->blacklistData->getBlacklistEntry($item);
}
}

View file

@ -0,0 +1,35 @@
<?php
namespace EEPROM\Blacklist;
use InvalidArgumentException;
use Index\Data\DbStatementCache;
use Index\Data\IDbConnection;
class BlacklistData {
private DbStatementCache $cache;
public function __construct(IDbConnection $dbConn) {
$this->cache = new DbStatementCache($dbConn);
}
public function getBlacklistEntry(string $hash): ?BlacklistInfo {
$stmt = $this->cache->get('SELECT bl_hash, bl_reason, UNIX_TIMESTAMP(bl_created) FROM prm_blacklist WHERE bl_hash = ?');
$stmt->addParameter(1, $hash);
$stmt->execute();
$result = $stmt->getResult();
return $result->next() ? new BlacklistInfo($result) : null;
}
public function createBlacklistEntry(string $hash, string $reason): void {
if(strlen($hash) !== 32)
throw new InvalidArgumentException('$hash must be 32 bytes.');
if(!in_array($reason, BlacklistInfo::REASONS))
throw new InvalidArgumentException('$reason is not a valid reason.');
$stmt = $this->cache->get('INSERT INTO prm_blacklist (bl_hash, bl_reason) VALUES (?, ?)');
$stmt->addParameter(1, $hash);
$stmt->addParameter(2, $reason);
$stmt->execute();
}
}

View file

@ -0,0 +1,47 @@
<?php
namespace EEPROM\Blacklist;
use Index\DateTime;
use Index\Data\IDbResult;
class BlacklistInfo {
private string $hash;
private string $reason;
private int $created;
public const REASONS = [
'copyright',
'rules',
'other',
];
public function __construct(IDbResult $result) {
$this->hash = $result->getString(0);
$this->reason = $result->getString(1);
$this->created = $result->getInteger(2);
}
public function getHash(): string {
return $this->hash;
}
public function getReason(): string {
return $this->reason;
}
public function isCopyrightTakedown(): bool {
return $this->reason === 'copyright';
}
public function isRulesViolation(): bool {
return $this->reason === 'rules';
}
public function getCreatedTime(): int {
return $this->created;
}
public function getCreatedAt(): DateTime {
return DateTime::fromUnixTimeSeconds($this->created);
}
}

View file

@ -12,6 +12,7 @@ class EEPROMContext {
private AuthInfo $authInfo;
private Apps\AppsContext $appsCtx;
private Blacklist\BlacklistContext $blacklistCtx;
private Uploads\UploadsContext $uploadsCtx;
private Users\UsersContext $usersCtx;
@ -22,6 +23,7 @@ class EEPROMContext {
$this->authInfo = new AuthInfo;
$this->appsCtx = new Apps\AppsContext($dbConn);
$this->blacklistCtx = new Blacklist\BlacklistContext($dbConn);
$this->uploadsCtx = new Uploads\UploadsContext($config, $dbConn);
$this->usersCtx = new Users\UsersContext($dbConn);
}
@ -42,6 +44,10 @@ class EEPROMContext {
return $this->appsCtx;
}
public function getBlacklistContext(): Blacklist\BlacklistContext {
return $this->blacklistCtx;
}
public function getUploadsContext(): Uploads\UploadsContext {
return $this->uploadsCtx;
}
@ -67,6 +73,7 @@ class EEPROMContext {
$this->authInfo,
$this->appsCtx,
$this->uploadsCtx,
$this->blacklistCtx,
$isApiDomain
));

View file

@ -26,7 +26,7 @@ class LandingRoutes extends RouteHandler {
$stats->types = 0;
$stats->members = 0;
$result = $dbConn->query('SELECT COUNT(upload_id), SUM(upload_size), COUNT(DISTINCT upload_type) FROM prm_uploads WHERE upload_deleted IS NULL AND upload_dmca IS NULL');
$result = $dbConn->query('SELECT COUNT(upload_id), SUM(upload_size), COUNT(DISTINCT upload_type) FROM prm_uploads WHERE upload_deleted IS NULL');
if($result->next()) {
$stats->files = $result->getInteger(0);
$stats->size = $result->getInteger(1);

View file

@ -16,7 +16,6 @@ class UploadInfo {
private ?int $accessed;
private ?int $expires;
private ?int $deleted;
private ?int $dmca;
private int $bump;
private string $name;
private string $type;
@ -32,11 +31,10 @@ class UploadInfo {
$this->accessed = $result->getIntegerOrNull(6);
$this->expires = $result->getIntegerOrNull(7);
$this->deleted = $result->getIntegerOrNull(8);
$this->dmca = $result->getIntegerOrNull(9);
$this->bump = $result->getInteger(10);
$this->name = $result->getString(11);
$this->type = $result->getString(12);
$this->size = $result->getInteger(13);
$this->bump = $result->getInteger(9);
$this->name = $result->getString(10);
$this->type = $result->getString(11);
$this->size = $result->getInteger(12);
}
public function getId(): string {
@ -119,18 +117,6 @@ class UploadInfo {
return $this->deleted === null ? null : DateTime::fromUnixTimeSeconds($this->deleted);
}
public function isCopyrightTakedown(): bool {
return $this->dmca !== null;
}
public function getCopyrightTakedownTime(): ?int {
return $this->dmca;
}
public function getCopyrightTakedownAt(): ?DateTime {
return $this->dmca === null ? null : DateTime::fromUnixTimeSeconds($this->dmca);
}
public function getBumpAmount(): int {
return $this->bump;
}

View file

@ -17,23 +17,19 @@ class UploadsData {
public function getUploads(
?bool $deleted = null,
?bool $expired = null,
?bool $dmca = null
?bool $expired = null
): array {
$hasDeleted = $deleted !== null;
$hasExpired = $expired !== null;
$hasDMCA = $dmca !== null;
$args = 0;
$query = 'SELECT upload_id, user_id, app_id, LOWER(HEX(upload_hash)), INET6_NTOA(upload_ip), UNIX_TIMESTAMP(upload_created), UNIX_TIMESTAMP(upload_accessed), UNIX_TIMESTAMP(upload_expires), UNIX_TIMESTAMP(upload_deleted), UNIX_TIMESTAMP(upload_dmca), upload_bump, upload_name, upload_type, upload_size FROM prm_uploads';
$query = 'SELECT upload_id, user_id, app_id, LOWER(HEX(upload_hash)), INET6_NTOA(upload_ip), UNIX_TIMESTAMP(upload_created), UNIX_TIMESTAMP(upload_accessed), UNIX_TIMESTAMP(upload_expires), UNIX_TIMESTAMP(upload_deleted), upload_bump, upload_name, upload_type, upload_size FROM prm_uploads';
if($hasDeleted) {
++$args;
$query .= sprintf(' WHERE upload_deleted %s NULL', $deleted ? 'IS NOT' : 'IS');
}
if($hasExpired)
$query .= sprintf(' %s upload_expires %s NOW()', ++$args > 1 ? 'AND' : 'WHERE', $expired ? '<=' : '>');
if($hasDMCA)
$query .= sprintf(' %s upload_dmca %s NULL', ++$args > 1 ? 'AND' : 'WHERE', $dmca ? 'IS NOT' : 'IS');
$stmt = $this->cache->get($query);
$stmt->execute();
@ -59,7 +55,7 @@ class UploadsData {
$hasUserInfo = $userInfo !== null;
$args = 0;
$query = 'SELECT upload_id, user_id, app_id, LOWER(HEX(upload_hash)), INET6_NTOA(upload_ip), UNIX_TIMESTAMP(upload_created), UNIX_TIMESTAMP(upload_accessed), UNIX_TIMESTAMP(upload_expires), UNIX_TIMESTAMP(upload_deleted), UNIX_TIMESTAMP(upload_dmca), upload_bump, upload_name, upload_type, upload_size FROM prm_uploads';
$query = 'SELECT upload_id, user_id, app_id, LOWER(HEX(upload_hash)), INET6_NTOA(upload_ip), UNIX_TIMESTAMP(upload_created), UNIX_TIMESTAMP(upload_accessed), UNIX_TIMESTAMP(upload_expires), UNIX_TIMESTAMP(upload_deleted), upload_bump, upload_name, upload_type, upload_size FROM prm_uploads';
if($hasUploadId) {
++$args;
$query .= ' WHERE upload_id = ?';
@ -158,7 +154,7 @@ class UploadsData {
}
public function nukeUpload(UploadInfo|string $uploadInfo): void {
$stmt = $this->cache->get('DELETE FROM prm_uploads WHERE upload_id = ? AND upload_dmca IS NULL');
$stmt = $this->cache->get('DELETE FROM prm_uploads WHERE upload_id = ?');
$stmt->addParameter(1, $uploadInfo instanceof UploadInfo ? $uploadInfo->getId() : $uploadInfo);
$stmt->execute();
}

View file

@ -7,6 +7,7 @@ use Index\Routing\IRouteHandler;
use Index\Routing\Route;
use EEPROM\Apps\AppsContext;
use EEPROM\Auth\AuthInfo;
use EEPROM\Blacklist\BlacklistContext;
use EEPROM\Uploads\UploadsContext;
class UploadsRoutes implements IRouteHandler {
@ -14,6 +15,7 @@ class UploadsRoutes implements IRouteHandler {
private AuthInfo $authInfo,
private AppsContext $appsCtx,
private UploadsContext $uploadsCtx,
private BlacklistContext $blacklistCtx,
private bool $isApiDomain
) {}
@ -59,9 +61,15 @@ class UploadsRoutes implements IRouteHandler {
return 404;
}
if($uploadInfo->isCopyrightTakedown()) {
$response->setContent('File is unavailable for copyright reasons.');
return 451;
$blInfo = $this->blacklistCtx->getBlacklistEntry($uploadInfo);
if($blInfo !== null) {
$response->setContent(match($blInfo->getReason()) {
'copyright' => 'File is unavailable for copyright reasons.',
'rules' => 'File was in violation of the rules.',
default => 'File was removed for reasons beyond understanding.',
});
return $blInfo->isCopyrightTakedown() ? 451 : 410;
}
if($uploadInfo->isDeleted() || $uploadInfo->hasExpired()) {
@ -157,19 +165,11 @@ class UploadsRoutes implements IRouteHandler {
$uploadsData = $this->uploadsCtx->getUploadsData();
$hash = hash_file('sha256', $localFile);
// this is stupid: dmca status is stored as a file record rather than in a separate table requiring this hack ass garbage
$uploadInfo = $uploadsData->getUpload(appInfo: $appInfo, userInfo: $userInfo, hashString: $hash)
?? $uploadsData->getUpload(hashString: $hash);
if($uploadInfo !== null) {
if($uploadInfo->isCopyrightTakedown())
$blInfo = $this->blacklistCtx->getBlacklistEntry(hex2bin($hash));
if($blInfo !== null)
return 451;
if($uploadInfo->getUserId() !== $userInfo->getId()
|| $uploadInfo->getAppId() !== $appInfo->getId())
$uploadInfo = null;
}
$uploadInfo = $uploadsData->getUpload(appInfo: $appInfo, userInfo: $userInfo, hashString: $hash);
if($uploadInfo === null) {
$uploadInfo = $uploadsData->createUpload(
$appInfo, $userInfo, $_SERVER['REMOTE_ADDR'],
@ -205,11 +205,6 @@ class UploadsRoutes implements IRouteHandler {
return 404;
}
if($uploadInfo->isCopyrightTakedown()) {
$response->setContent('File is unavailable for copyright reasons.');
return 451;
}
if($uploadInfo->isDeleted() || $uploadInfo->hasExpired()) {
$response->setContent('File not found.');
return 404;