Direktori Peraturan di Indonesia saat ini:
- JDIHN (lambat)
- Peraturan BPK (yang kita coa kali ini)
- Peraturan.go.id (belum nyoba)
- lainnya? (beritahu saya)
DiDOM adalah Simple and fast HTML and XML parser
Kalau mungkin sudah terbiasa dengan simple html dom maka DiDOM ini adalah alternatif yang lebih modern.
URL: https://github.com/Imangazaliev/DiDOM
Pertama kita buat table di Supabase
CREATE TABLE peraturan_bpk (
id SERIAL PRIMARY KEY,
tipe_dokumen TEXT,
judul TEXT,
t_e_u TEXT,
nomor TEXT,
bentuk TEXT,
bentuk_singkat TEXT,
tahun TEXT,
tempat_penetapan TEXT,
tanggal_penetapan TEXT,
tanggal_pengundangan TEXT,
tanggal_berlaku TEXT,
sumber TEXT,
subjek TEXT,
status TEXT,
bahasa TEXT,
lokasi TEXT,
bidang TEXT,
judul_singkat TEXT,
brief TEXT,
abstract TEXT,
catatan TEXT,
nama_file TEXT,
link_download TEXT,
status_peraturan TEXT,
status_peraturan_item TEXT,
url_id TEXT
);
Script lengkap + simpan ke supabase
<?php
error_reporting(0);
ini_set('display_errors', 0);
// Supabase API Config
$supabaseUrl = 'https://hoyo.supabase.co';
$supabaseKey = 'Nd14s3nd4sm03';
$tabel = 'peraturan_bpk';
require 'vendor/autoload.php';
use DiDom\Document;
function safe_url($text) {
return trim(preg_replace('/[^a-z0-9]+/i', '_', strtolower($text)), '_');
}
function cleanText($text) {
if (!is_string($text) || empty(trim($text))) {
return '';
}
$text = preg_replace('/\s+/', ' ', $text);
$text = preg_replace('/[^a-zA-Z0-9 .,;-]/', '', $text);
return trim($text);
}
$start = 314198;
for ($i = $start; $i >= 0; $i--) {
$uri = "https://peraturan.bpk.go.id/Details/$i";
$html = @file_get_contents($uri);
if ($html === false) {
echo "\033[33m [$i] \033[0m \e[31m [GAGAL] \e[0m page not found \n";
continue;
}
$data = array();
$document = new Document($html);
if ($document->has('.fs-6')) {
$keys = [];
$vals = [];
$key_divs = $document->find('.col-lg-3');
foreach ($key_divs as $key_div) {
$keys[] = safe_url($key_div->text());
}
$val_divs = $document->find('.col-lg-9');
foreach ($val_divs as $val_div) {
$vals[] = cleanText($val_div->text());
}
$keys = array_slice($keys, 0, 17, true);
$vals = array_slice($vals, 0, 17, true);
$data = array_combine($keys, $vals);
} else {
echo "\033[33m [$i] \033[0m \e[31m [GAGAL] \e[0m belum ada isinya \n";
continue;
}
$data['judul_singkat'] = cleanText($document->first('h1')->text() ?? '');
$data['brief'] = $document->has('.card-body p') ? cleanText($document->first('.card-body p')->text()) : '';
if ($document->has('.dash')) {
$abs = [];
$li_abstract = $document->first('.dash')->find('li');
foreach ($li_abstract as $a) {
$abs[] = cleanText($a->text());
}
$data['abstract'] = implode('<br />', $abs);
if (isset($document->find('.dash')[1])) {
$cat = [];
$li_catatan = $document->find('.dash')[1]->find('li');
foreach ($li_catatan as $c) {
$cat[] = cleanText($c->text());
}
$data['catatan'] = implode('<br />', $cat);
}
} else {
$data['abstract'] = '';
$data['catatan'] = '';
}
if ($document->has('.fs-6')) {
$div_download = $document->find('.fs-6')[1];
$link = $div_download->find('a');
if (isset($link[0]) && isset($link[2])) {
$data['nama_file'] = cleanText($link[0]->text());
$data['link_download'] = 'https://peraturan.bpk.go.id' . $link[2]->getAttribute('href');
}
}
if ($document->has('.fs-6') && isset($document->find('.fs-6')[2])) {
$div_status = $document->find('.fs-6')[2];
$data['status_peraturan'] = $div_status->has('.bg-light-primary') ? cleanText($div_status->first('.bg-light-primary')->text()) : 'Belum Tersedia';
$lsp = [];
$li_status_peraturan = $div_status->find('li');
foreach ($li_status_peraturan as $li) {
$lsp[] = cleanText($li->text());
}
$data['status_peraturan_item'] = implode('<br />', $lsp);
}
$data['url_id'] = $i;
// Hapus field kosong/null
$data = array_filter($data, function($value) {
return $value !== null && $value !== '';
});
// Simpan ke Supabase
$ch = curl_init("$supabaseUrl/rest/v1/$tabel"); // Ganti `peraturan` dengan nama tabel di Supabase
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
"Content-Type: application/json",
"apikey: $supabaseKey",
"Authorization: Bearer $supabaseKey",
"Prefer: resolution=merge-duplicates"
]);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode === 201 || $httpCode === 200) {
echo "\033[33m [$i] \033[0m \033[32m [SUKSES] \033[0m Simpan ke Supabase \n";
} else {
echo "\033[33m [$i] \033[0m \e[31m [GAGAL] \e[0m menyimpan data ke Supabase \n Status: \033[34m $response \e[0m \n";
continue;
}
unset($html, $document, $data);
usleep(rand(100000, 9900000));
}
Jalankan di terminal CPanel
nohup php script.php > output.log 2>&1 &
//atau
nohup php script.php
kalau di CPanel lihat lognya
Lognya di file nohup.out
Mematikan proses berjalan dengan nohup
pgrep -f script.php