Add script to clean orphans

This commit is contained in:
Kamran Ahmed
2026-03-03 18:22:27 +00:00
parent b0878c3481
commit e4c1e1e4d5
3 changed files with 316 additions and 0 deletions

View File

@@ -0,0 +1,81 @@
name: Cleanup Orphaned Content
on:
workflow_dispatch:
inputs:
roadmap_slug:
description: "The ID of the roadmap to clean up (or __all__ for all roadmaps)"
required: true
default: "__all__"
jobs:
cleanup-content:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup pnpm@v9
uses: pnpm/action-setup@v4
with:
version: 9
run_install: false
- name: Setup Node.js Version 20 (LTS)
uses: actions/setup-node@v4
with:
node-version: 20
cache: 'pnpm'
- name: Install Dependencies and Run Cleanup
run: |
echo "Installing Dependencies"
pnpm install
echo "Running Orphaned Content Cleanup"
npm run cleanup:orphaned-content -- --roadmap-slug=${{ inputs.roadmap_slug }}
- name: Read cleanup summary
id: read-summary
run: |
if [ -f .cleanup-summary.md ]; then
{
echo 'summary<<EOF'
cat .cleanup-summary.md
echo 'EOF'
} >> $GITHUB_OUTPUT
fi
- name: Check for changes
id: verify-changed-files
run: |
if [ -n "$(git status --porcelain)" ]; then
echo "changed=true" >> $GITHUB_OUTPUT
else
echo "changed=false" >> $GITHUB_OUTPUT
fi
- name: Delete summary file
if: steps.verify-changed-files.outputs.changed == 'true'
run: rm -f .cleanup-summary.md
- name: Create PR
if: steps.verify-changed-files.outputs.changed == 'true'
uses: peter-evans/create-pull-request@v7
with:
delete-branch: false
branch: "chore/cleanup-orphaned-content-${{ inputs.roadmap_slug }}"
base: "master"
labels: |
automated pr
reviewers: jcanalesluna,kamranahmedse
commit-message: "chore: cleanup orphaned content files"
title: "chore: cleanup orphaned content - ${{ inputs.roadmap_slug }}"
body: |
${{ steps.read-summary.outputs.summary }}
> [!IMPORTANT]
> This PR removes orphaned/duplicate content files for: ${{ inputs.roadmap_slug }}
>
> Commit: ${{ github.sha }}
> Workflow Path: ${{ github.workflow_ref }}
**Please review the changes and merge the PR if everything looks correct.**

View File

@@ -33,6 +33,7 @@
"sync:repo-to-database": "tsx ./scripts/sync-repo-to-database.ts",
"sync:roadmap": "tsx ./scripts/sync-roadmap-to-database.ts",
"migrate:content-repo-to-database": "tsx ./scripts/migrate-content-repo-to-database.ts",
"cleanup:orphaned-content": "tsx ./scripts/cleanup-orphaned-content.ts",
"official:roadmap-assets": "tsx ./scripts/official-roadmap-assets.ts",
"test:e2e": "playwright test"
},

View File

@@ -0,0 +1,234 @@
import type { Node } from '@roadmapsh/editor';
import matter from 'gray-matter';
import fs from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { slugify } from '../src/lib/slugger';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const ROADMAP_CONTENT_DIR = path.join(__dirname, '../src/data/roadmaps');
const args = process.argv.slice(2);
const roadmapSlug = args?.[0]?.replace('--roadmap-slug=', '');
if (!roadmapSlug) {
console.error('Usage: tsx scripts/cleanup-orphaned-content.ts --roadmap-slug=<slug|__all__>');
process.exit(1);
}
interface OrphanEntry {
file: string;
reason: string;
duplicateOf: string;
}
async function fetchRoadmapJson(slug: string): Promise<{ nodes: Node[] }> {
try {
const response = await fetch(
`https://roadmap.sh/api/v1-official-roadmap/${slug}`,
);
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const data = await response.json();
if (data.error) {
throw new Error(data.error);
}
return data;
} catch (err) {
console.log(` API fetch failed for ${slug}, falling back to local JSON`);
const localPath = path.join(ROADMAP_CONTENT_DIR, slug, `${slug}.json`);
const raw = await fs.readFile(localPath, 'utf-8');
return JSON.parse(raw);
}
}
async function isEditorRoadmap(slug: string): Promise<boolean> {
const mdPath = path.join(ROADMAP_CONTENT_DIR, slug, `${slug}.md`);
try {
const raw = await fs.readFile(mdPath, 'utf-8');
const { data } = matter(raw);
return data.renderer === 'editor';
} catch {
return false;
}
}
async function getEditorRoadmapSlugs(): Promise<string[]> {
const allDirs = await fs.readdir(ROADMAP_CONTENT_DIR);
const results: string[] = [];
for (const dir of allDirs) {
const stat = await fs.stat(path.join(ROADMAP_CONTENT_DIR, dir)).catch(() => null);
if (!stat?.isDirectory()) {
continue;
}
if (await isEditorRoadmap(dir)) {
results.push(dir);
}
}
return results;
}
function parseContentFilename(filename: string): { slug: string; nodeId: string } | null {
const match = filename.match(/^(.+)@([^.]+)\.md$/);
if (!match) {
return null;
}
return { slug: match[1], nodeId: match[2] };
}
async function cleanupRoadmap(slug: string): Promise<OrphanEntry[]> {
console.log(`\nProcessing: ${slug}`);
const contentDir = path.join(ROADMAP_CONTENT_DIR, slug, 'content');
const stat = await fs.stat(contentDir).catch(() => null);
if (!stat?.isDirectory()) {
console.log(` No content directory found, skipping`);
return [];
}
const roadmapData = await fetchRoadmapJson(slug);
if (!roadmapData?.nodes) {
console.log(` No nodes found in roadmap JSON, skipping`);
return [];
}
const topicNodes = roadmapData.nodes.filter(
(node) =>
node?.type &&
['topic', 'subtopic'].includes(node.type) &&
node.data?.label,
);
const validNodeIds = new Set<string>();
const nodeIdToExpectedSlug = new Map<string, string>();
for (const node of topicNodes) {
validNodeIds.add(node.id);
nodeIdToExpectedSlug.set(node.id, slugify(node.data.label as string));
}
const files = await fs.readdir(contentDir);
const orphans: OrphanEntry[] = [];
const validFilesBySlug = new Map<string, string>();
for (const file of files) {
const parsed = parseContentFilename(file);
if (!parsed) {
continue;
}
if (validNodeIds.has(parsed.nodeId) && nodeIdToExpectedSlug.get(parsed.nodeId) === parsed.slug) {
validFilesBySlug.set(parsed.slug, file);
}
}
for (const file of files) {
const parsed = parseContentFilename(file);
if (!parsed) {
continue;
}
const { slug: fileSlug, nodeId } = parsed;
if (validNodeIds.has(nodeId)) {
const expectedSlug = nodeIdToExpectedSlug.get(nodeId)!;
if (fileSlug === expectedSlug) {
continue;
}
const correctFile = `${expectedSlug}@${nodeId}.md`;
orphans.push({
file,
reason: 'Same nodeId, old slug',
duplicateOf: correctFile,
});
continue;
}
const validFile = validFilesBySlug.get(fileSlug);
if (validFile) {
orphans.push({
file,
reason: 'Same slug, old nodeId',
duplicateOf: validFile,
});
} else {
orphans.push({
file,
reason: 'Topic removed from roadmap',
duplicateOf: 'N/A',
});
}
}
for (const orphan of orphans) {
const filePath = path.join(contentDir, orphan.file);
await fs.unlink(filePath);
console.log(` Deleted: ${orphan.file} (${orphan.reason})`);
}
if (orphans.length === 0) {
console.log(` No orphans found`);
}
return orphans;
}
async function main() {
const slugs =
roadmapSlug === '__all__'
? await getEditorRoadmapSlugs()
: [roadmapSlug];
if (roadmapSlug !== '__all__') {
if (!(await isEditorRoadmap(roadmapSlug))) {
console.error(`${roadmapSlug} is not an editor-rendered roadmap`);
process.exit(1);
}
}
console.log(`Processing ${slugs.length} roadmap(s)...`);
const allOrphans = new Map<string, OrphanEntry[]>();
let totalOrphans = 0;
for (const slug of slugs) {
const orphans = await cleanupRoadmap(slug);
if (orphans.length > 0) {
allOrphans.set(slug, orphans);
totalOrphans += orphans.length;
}
}
const roadmapsAffected = allOrphans.size;
let summary = `## Orphaned Content Cleanup\n\n`;
summary += `Removed **${totalOrphans}** orphaned content file(s) across **${roadmapsAffected}** roadmap(s).\n\n`;
for (const [slug, orphans] of allOrphans) {
summary += `### ${slug}\n\n`;
summary += `| Removed File | Reason | Duplicate Of |\n`;
summary += `|---|---|---|\n`;
for (const orphan of orphans) {
summary += `| \`${orphan.file}\` | ${orphan.reason} | ${orphan.duplicateOf === 'N/A' ? 'N/A' : `\`${orphan.duplicateOf}\``} |\n`;
}
summary += `\n`;
}
const summaryPath = path.join(__dirname, '..', '.cleanup-summary.md');
await fs.writeFile(summaryPath, summary);
console.log(`\nSummary written to .cleanup-summary.md`);
console.log(`Total: ${totalOrphans} orphaned file(s) removed across ${roadmapsAffected} roadmap(s)`);
}
main().catch((err) => {
console.error(err);
process.exit(1);
});