From e4c1e1e4d5e92f01439c159d2d5ffe1f162ca8b2 Mon Sep 17 00:00:00 2001 From: Kamran Ahmed Date: Tue, 3 Mar 2026 18:22:27 +0000 Subject: [PATCH] Add script to clean orphans --- .../workflows/cleanup-orphaned-content.yml | 81 ++++++ package.json | 1 + scripts/cleanup-orphaned-content.ts | 234 ++++++++++++++++++ 3 files changed, 316 insertions(+) create mode 100644 .github/workflows/cleanup-orphaned-content.yml create mode 100644 scripts/cleanup-orphaned-content.ts diff --git a/.github/workflows/cleanup-orphaned-content.yml b/.github/workflows/cleanup-orphaned-content.yml new file mode 100644 index 000000000..7e985f016 --- /dev/null +++ b/.github/workflows/cleanup-orphaned-content.yml @@ -0,0 +1,81 @@ +name: Cleanup Orphaned Content + +on: + workflow_dispatch: + inputs: + roadmap_slug: + description: "The ID of the roadmap to clean up (or __all__ for all roadmaps)" + required: true + default: "__all__" + +jobs: + cleanup-content: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup pnpm@v9 + uses: pnpm/action-setup@v4 + with: + version: 9 + run_install: false + + - name: Setup Node.js Version 20 (LTS) + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: 'pnpm' + + - name: Install Dependencies and Run Cleanup + run: | + echo "Installing Dependencies" + pnpm install + echo "Running Orphaned Content Cleanup" + npm run cleanup:orphaned-content -- --roadmap-slug=${{ inputs.roadmap_slug }} + + - name: Read cleanup summary + id: read-summary + run: | + if [ -f .cleanup-summary.md ]; then + { + echo 'summary<> $GITHUB_OUTPUT + fi + + - name: Check for changes + id: verify-changed-files + run: | + if [ -n "$(git status --porcelain)" ]; then + echo "changed=true" >> $GITHUB_OUTPUT + else + echo "changed=false" >> $GITHUB_OUTPUT + fi + + - name: Delete summary file + if: steps.verify-changed-files.outputs.changed == 'true' + run: rm -f .cleanup-summary.md + + - name: Create PR + if: steps.verify-changed-files.outputs.changed == 'true' + uses: peter-evans/create-pull-request@v7 + with: + delete-branch: false + branch: "chore/cleanup-orphaned-content-${{ inputs.roadmap_slug }}" + base: "master" + labels: | + automated pr + reviewers: jcanalesluna,kamranahmedse + commit-message: "chore: cleanup orphaned content files" + title: "chore: cleanup orphaned content - ${{ inputs.roadmap_slug }}" + body: | + ${{ steps.read-summary.outputs.summary }} + + > [!IMPORTANT] + > This PR removes orphaned/duplicate content files for: ${{ inputs.roadmap_slug }} + > + > Commit: ${{ github.sha }} + > Workflow Path: ${{ github.workflow_ref }} + + **Please review the changes and merge the PR if everything looks correct.** diff --git a/package.json b/package.json index 1a858a357..e22f5dc3d 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "sync:repo-to-database": "tsx ./scripts/sync-repo-to-database.ts", "sync:roadmap": "tsx ./scripts/sync-roadmap-to-database.ts", "migrate:content-repo-to-database": "tsx ./scripts/migrate-content-repo-to-database.ts", + "cleanup:orphaned-content": "tsx ./scripts/cleanup-orphaned-content.ts", "official:roadmap-assets": "tsx ./scripts/official-roadmap-assets.ts", "test:e2e": "playwright test" }, diff --git a/scripts/cleanup-orphaned-content.ts b/scripts/cleanup-orphaned-content.ts new file mode 100644 index 000000000..ab335d053 --- /dev/null +++ b/scripts/cleanup-orphaned-content.ts @@ -0,0 +1,234 @@ +import type { Node } from '@roadmapsh/editor'; +import matter from 'gray-matter'; +import fs from 'node:fs/promises'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { slugify } from '../src/lib/slugger'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +const ROADMAP_CONTENT_DIR = path.join(__dirname, '../src/data/roadmaps'); + +const args = process.argv.slice(2); +const roadmapSlug = args?.[0]?.replace('--roadmap-slug=', ''); + +if (!roadmapSlug) { + console.error('Usage: tsx scripts/cleanup-orphaned-content.ts --roadmap-slug='); + process.exit(1); +} + +interface OrphanEntry { + file: string; + reason: string; + duplicateOf: string; +} + +async function fetchRoadmapJson(slug: string): Promise<{ nodes: Node[] }> { + try { + const response = await fetch( + `https://roadmap.sh/api/v1-official-roadmap/${slug}`, + ); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}`); + } + + const data = await response.json(); + if (data.error) { + throw new Error(data.error); + } + + return data; + } catch (err) { + console.log(` API fetch failed for ${slug}, falling back to local JSON`); + const localPath = path.join(ROADMAP_CONTENT_DIR, slug, `${slug}.json`); + const raw = await fs.readFile(localPath, 'utf-8'); + return JSON.parse(raw); + } +} + +async function isEditorRoadmap(slug: string): Promise { + const mdPath = path.join(ROADMAP_CONTENT_DIR, slug, `${slug}.md`); + try { + const raw = await fs.readFile(mdPath, 'utf-8'); + const { data } = matter(raw); + return data.renderer === 'editor'; + } catch { + return false; + } +} + +async function getEditorRoadmapSlugs(): Promise { + const allDirs = await fs.readdir(ROADMAP_CONTENT_DIR); + const results: string[] = []; + + for (const dir of allDirs) { + const stat = await fs.stat(path.join(ROADMAP_CONTENT_DIR, dir)).catch(() => null); + if (!stat?.isDirectory()) { + continue; + } + if (await isEditorRoadmap(dir)) { + results.push(dir); + } + } + + return results; +} + +function parseContentFilename(filename: string): { slug: string; nodeId: string } | null { + const match = filename.match(/^(.+)@([^.]+)\.md$/); + if (!match) { + return null; + } + return { slug: match[1], nodeId: match[2] }; +} + +async function cleanupRoadmap(slug: string): Promise { + console.log(`\nProcessing: ${slug}`); + + const contentDir = path.join(ROADMAP_CONTENT_DIR, slug, 'content'); + const stat = await fs.stat(contentDir).catch(() => null); + if (!stat?.isDirectory()) { + console.log(` No content directory found, skipping`); + return []; + } + + const roadmapData = await fetchRoadmapJson(slug); + if (!roadmapData?.nodes) { + console.log(` No nodes found in roadmap JSON, skipping`); + return []; + } + + const topicNodes = roadmapData.nodes.filter( + (node) => + node?.type && + ['topic', 'subtopic'].includes(node.type) && + node.data?.label, + ); + + const validNodeIds = new Set(); + const nodeIdToExpectedSlug = new Map(); + + for (const node of topicNodes) { + validNodeIds.add(node.id); + nodeIdToExpectedSlug.set(node.id, slugify(node.data.label as string)); + } + + const files = await fs.readdir(contentDir); + const orphans: OrphanEntry[] = []; + + const validFilesBySlug = new Map(); + for (const file of files) { + const parsed = parseContentFilename(file); + if (!parsed) { + continue; + } + if (validNodeIds.has(parsed.nodeId) && nodeIdToExpectedSlug.get(parsed.nodeId) === parsed.slug) { + validFilesBySlug.set(parsed.slug, file); + } + } + + for (const file of files) { + const parsed = parseContentFilename(file); + if (!parsed) { + continue; + } + + const { slug: fileSlug, nodeId } = parsed; + + if (validNodeIds.has(nodeId)) { + const expectedSlug = nodeIdToExpectedSlug.get(nodeId)!; + if (fileSlug === expectedSlug) { + continue; + } + + const correctFile = `${expectedSlug}@${nodeId}.md`; + orphans.push({ + file, + reason: 'Same nodeId, old slug', + duplicateOf: correctFile, + }); + continue; + } + + const validFile = validFilesBySlug.get(fileSlug); + if (validFile) { + orphans.push({ + file, + reason: 'Same slug, old nodeId', + duplicateOf: validFile, + }); + } else { + orphans.push({ + file, + reason: 'Topic removed from roadmap', + duplicateOf: 'N/A', + }); + } + } + + for (const orphan of orphans) { + const filePath = path.join(contentDir, orphan.file); + await fs.unlink(filePath); + console.log(` Deleted: ${orphan.file} (${orphan.reason})`); + } + + if (orphans.length === 0) { + console.log(` No orphans found`); + } + + return orphans; +} + +async function main() { + const slugs = + roadmapSlug === '__all__' + ? await getEditorRoadmapSlugs() + : [roadmapSlug]; + + if (roadmapSlug !== '__all__') { + if (!(await isEditorRoadmap(roadmapSlug))) { + console.error(`${roadmapSlug} is not an editor-rendered roadmap`); + process.exit(1); + } + } + + console.log(`Processing ${slugs.length} roadmap(s)...`); + + const allOrphans = new Map(); + let totalOrphans = 0; + + for (const slug of slugs) { + const orphans = await cleanupRoadmap(slug); + if (orphans.length > 0) { + allOrphans.set(slug, orphans); + totalOrphans += orphans.length; + } + } + + const roadmapsAffected = allOrphans.size; + + let summary = `## Orphaned Content Cleanup\n\n`; + summary += `Removed **${totalOrphans}** orphaned content file(s) across **${roadmapsAffected}** roadmap(s).\n\n`; + + for (const [slug, orphans] of allOrphans) { + summary += `### ${slug}\n\n`; + summary += `| Removed File | Reason | Duplicate Of |\n`; + summary += `|---|---|---|\n`; + for (const orphan of orphans) { + summary += `| \`${orphan.file}\` | ${orphan.reason} | ${orphan.duplicateOf === 'N/A' ? 'N/A' : `\`${orphan.duplicateOf}\``} |\n`; + } + summary += `\n`; + } + + const summaryPath = path.join(__dirname, '..', '.cleanup-summary.md'); + await fs.writeFile(summaryPath, summary); + console.log(`\nSummary written to .cleanup-summary.md`); + console.log(`Total: ${totalOrphans} orphaned file(s) removed across ${roadmapsAffected} roadmap(s)`); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +});