84 lines
2.3 KiB
JavaScript
84 lines
2.3 KiB
JavaScript
// Crawls the website to discover pages using PROPFIND
|
|
|
|
import { url as root_url } from "./editor.js";
|
|
|
|
export const __files__ = Symbol("crawlerFiles");
|
|
|
|
function nodeTextToHrefs(nodes, exclude) {
|
|
const result = [];
|
|
for (const node of nodes) {
|
|
const href = new URL(node.textContent, root_url).href;
|
|
if (href != exclude) {
|
|
result.push(href);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
async function crawl(url) {
|
|
const response = await fetch(url, {
|
|
method: "PROPFIND",
|
|
headers: { Depth: "1" },
|
|
});
|
|
const text = await response.text();
|
|
const parser = new DOMParser();
|
|
const doc = parser.parseFromString(text, "text/xml");
|
|
const collections = nodeTextToHrefs(
|
|
doc.querySelectorAll("response:has(resourcetype collection) href"),
|
|
url,
|
|
);
|
|
const files = nodeTextToHrefs(
|
|
doc.querySelectorAll("response:not(:has(resourcetype collection)) href"),
|
|
url,
|
|
);
|
|
return [collections, files];
|
|
}
|
|
|
|
async function crawlCollection(url) {
|
|
const result = {};
|
|
const [subcollections, subfiles] = await crawl(url);
|
|
for (const subcollection of subcollections) {
|
|
result[subcollection] = await crawlCollection(subcollection);
|
|
}
|
|
result[__files__] = subfiles;
|
|
return result;
|
|
}
|
|
|
|
async function recursiveCrawl(url) {
|
|
return {
|
|
[url]: await crawlCollection(url),
|
|
};
|
|
}
|
|
|
|
function formattedIndexDirectory(url, recursiveCrawlResult) {
|
|
const subcollections = Object.getOwnPropertyNames(recursiveCrawlResult);
|
|
const ul = document.createElement("ul");
|
|
const a = document.createElement("a");
|
|
a.href = url;
|
|
a.innerText = "/" + url.replace(root_url, "");
|
|
ul.appendChild(a);
|
|
for (const subcollection of subcollections) {
|
|
const li = document.createElement("li");
|
|
li.appendChild(
|
|
formattedIndexDirectory(
|
|
subcollection,
|
|
recursiveCrawlResult[subcollection],
|
|
),
|
|
);
|
|
ul.appendChild(li);
|
|
}
|
|
for (const file of recursiveCrawlResult[__files__]) {
|
|
const li = document.createElement("li");
|
|
const a = document.createElement("a");
|
|
li.appendChild(a);
|
|
ul.appendChild(li);
|
|
a.href = file;
|
|
a.innerText = "/" + file.replace(root_url, "");
|
|
}
|
|
return ul;
|
|
}
|
|
|
|
const rcr = await recursiveCrawl(root_url);
|
|
const root = Object.getOwnPropertyNames(rcr)[0];
|
|
const formatted = formattedIndexDirectory(root, rcr[root]);
|
|
document.body.appendChild(formatted);
|