Compare commits

...

12 Commits

Author SHA1 Message Date
Arik Chakma
0560cc930a fix: topic path 2025-08-20 20:00:03 +06:00
Arik Chakma
c903b76934 fix: remove title 2025-08-20 19:34:41 +06:00
Arik Chakma
4f586fd122 fix: sync content description 2025-08-20 19:31:17 +06:00
Kamran Ahmed
cb7c13fd1b Make sync to not run for github actions 2025-08-20 14:24:21 +01:00
github-actions[bot]
704657cb36 Add content to Machine Learning (#9054)
* chore: sync content to repo

* Update src/data/roadmaps/machine-learning/introduction@MEL6y3vwiqwAV6FQihF34.md

* Update src/data/roadmaps/machine-learning/what-is-an-ml-engineer@FgzPlLUfGdlZPvPku0-Xl.md

---------

Co-authored-by: kamranahmedse <4921183+kamranahmedse@users.noreply.github.com>
Co-authored-by: Kamran Ahmed <kamranahmed.se@gmail.com>
2025-08-20 14:21:03 +01:00
Kamran Ahmed
eba3a78c70 Update .github/workflows/sync-content-to-repo.yml 2025-08-20 13:49:19 +01:00
Kamran Ahmed
d6cf9eb66d Update .github/workflows/sync-content-to-repo.yml 2025-08-20 13:49:19 +01:00
Arik Chakma
885e95399e fix: sync repo to db 2025-08-20 13:49:19 +01:00
Arik Chakma
d70582411e chore: sync repo to database 2025-08-20 13:49:19 +01:00
Arik Chakma
07277708eb fix: replace the api endpoint 2025-08-20 13:49:19 +01:00
Arik Chakma
87280b4c9e chore: sync content to repo 2025-08-20 13:49:19 +01:00
Kamran Ahmed
91b0a232ab Fix typos 2025-08-20 13:02:32 +01:00
10 changed files with 508 additions and 8 deletions

View File

@@ -0,0 +1,67 @@
name: Sync Content to Repo
on:
workflow_dispatch:
inputs:
roadmap_slug:
description: "The ID of the roadmap to sync"
required: true
default: "__default__"
jobs:
sync-content:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup pnpm@v9
uses: pnpm/action-setup@v4
with:
version: 9
run_install: false
- name: Setup Node.js Version 20 (LTS)
uses: actions/setup-node@v4
with:
node-version: 20
cache: 'pnpm'
- name: Install Dependencies and Sync Content
run: |
echo "Installing Dependencies"
pnpm install
echo "Syncing Content to Repo"
npm run sync:content-to-repo -- --roadmap-slug=${{ inputs.roadmap_slug }} --secret=${{ secrets.GH_SYNC_SECRET }}
- name: Check for changes
id: verify-changed-files
run: |
if [ -n "$(git status --porcelain)" ]; then
echo "changed=true" >> $GITHUB_OUTPUT
else
echo "changed=false" >> $GITHUB_OUTPUT
fi
- name: Create PR
if: steps.verify-changed-files.outputs.changed == 'true'
uses: peter-evans/create-pull-request@v7
with:
delete-branch: false
branch: "chore/sync-content-to-repo-${{ inputs.roadmap_slug }}"
base: "master"
labels: |
dependencies
automated pr
reviewers: arikchakma
commit-message: "chore: sync content to repo"
title: "chore: sync content to repository"
body: |
## Sync Content to Repo
> [!IMPORTANT]
> This PR Syncs the Content to the Repo for the Roadmap: ${{ inputs.roadmap_slug }}
>
> Commit: ${{ github.sha }}
> Workflow Path: ${{ github.workflow_ref }}
**Please Review the Changes and Merge the PR if everything is fine.**

View File

@@ -0,0 +1,67 @@
name: Sync on Roadmap Changes
on:
push:
branches:
- master
paths:
- 'src/data/roadmaps/**'
jobs:
sync-on-changes:
runs-on: ubuntu-latest
if: github.actor != 'github-actions[bot]' && github.actor != 'dependabot[bot]'
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2 # Fetch previous commit to compare changes
- name: Setup pnpm@v9
uses: pnpm/action-setup@v4
with:
version: 9
run_install: false
- name: Setup Node.js Version 20 (LTS)
uses: actions/setup-node@v4
with:
node-version: 20
cache: 'pnpm'
- name: Get changed files
id: changed-files
run: |
echo "Getting changed files in /src/data/roadmaps/"
# Get changed files between HEAD and previous commit
CHANGED_FILES=$(git diff --name-only HEAD~1 HEAD -- src/data/roadmaps/)
if [ -z "$CHANGED_FILES" ]; then
echo "No changes found in roadmaps directory"
echo "has_changes=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "Changed files:"
echo "$CHANGED_FILES"
# Convert to space-separated list for the script
CHANGED_FILES_LIST=$(echo "$CHANGED_FILES" | tr '\n' ' ')
echo "has_changes=true" >> $GITHUB_OUTPUT
echo "changed_files=$CHANGED_FILES_LIST" >> $GITHUB_OUTPUT
- name: Install Dependencies
if: steps.changed-files.outputs.has_changes == 'true'
run: |
echo "Installing Dependencies"
pnpm install
- name: Run sync script with changed files
if: steps.changed-files.outputs.has_changes == 'true'
run: |
echo "Running sync script for changed roadmap files"
echo "Changed files: ${{ steps.changed-files.outputs.changed_files }}"
# Run your script with the changed file paths
npm run sync:repo-to-database -- --files="${{ steps.changed-files.outputs.changed_files }}" --secret=${{ secrets.GH_SYNC_SECRET }}

View File

@@ -29,6 +29,8 @@
"compress:images": "tsx ./scripts/compress-images.ts",
"generate:roadmap-content-json": "tsx ./scripts/editor-roadmap-content-json.ts",
"migrate:editor-roadmaps": "tsx ./scripts/migrate-editor-roadmap.ts",
"sync:content-to-repo": "tsx ./scripts/sync-content-to-repo.ts",
"sync:repo-to-database": "tsx ./scripts/sync-repo-to-database.ts",
"test:e2e": "playwright test"
},
"dependencies": {

View File

@@ -0,0 +1,142 @@
import fs from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { slugify } from '../src/lib/slugger';
import type { OfficialRoadmapDocument } from '../src/queries/official-roadmap';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const args = process.argv.slice(2);
const roadmapSlug = args?.[0]?.replace('--roadmap-slug=', '');
const secret = args?.[1]?.replace('--secret=', '');
if (!secret) {
throw new Error('Secret is required');
}
if (!roadmapSlug || roadmapSlug === '__default__') {
throw new Error('Roadmap slug is required');
}
console.log(`🚀 Starting ${roadmapSlug}`);
export const allowedOfficialRoadmapTopicResourceType = [
'roadmap',
'official',
'opensource',
'article',
'course',
'podcast',
'video',
'book',
'feed',
] as const;
export type AllowedOfficialRoadmapTopicResourceType =
(typeof allowedOfficialRoadmapTopicResourceType)[number];
export type OfficialRoadmapTopicResource = {
_id?: string;
type: AllowedOfficialRoadmapTopicResourceType;
title: string;
url: string;
};
export interface OfficialRoadmapTopicContentDocument {
_id?: string;
roadmapSlug: string;
nodeId: string;
description: string;
resources: OfficialRoadmapTopicResource[];
createdAt: Date;
updatedAt: Date;
}
export async function roadmapTopics(
roadmapId: string,
secret: string,
): Promise<OfficialRoadmapTopicContentDocument[]> {
const path = `https://roadmap.sh/api/v1-list-official-roadmap-topics/${roadmapId}?secret=${secret}`;
const response = await fetch(path);
if (!response.ok) {
throw new Error(`Failed to fetch roadmap topics: ${response.statusText}`);
}
const data = await response.json();
if (data.error) {
throw new Error(`Failed to fetch roadmap topics: ${data.error}`);
}
return data;
}
export async function fetchRoadmapJson(
roadmapId: string,
): Promise<OfficialRoadmapDocument> {
const response = await fetch(
`https://roadmap.sh/api/v1-official-roadmap/${roadmapId}`,
);
if (!response.ok) {
throw new Error(`Failed to fetch roadmap json: ${response.statusText}`);
}
const data = await response.json();
if (data.error) {
throw new Error(`Failed to fetch roadmap json: ${data.error}`);
}
return data;
}
// Directory containing the roadmaps
const ROADMAP_CONTENT_DIR = path.join(
__dirname,
'../src/data/roadmaps',
roadmapSlug,
);
const allTopics = await roadmapTopics(roadmapSlug, secret);
const roadmap = await fetchRoadmapJson(roadmapSlug);
const { nodes } = roadmap;
for (const topic of allTopics) {
const { nodeId } = topic;
const node = nodes.find((node) => node.id === nodeId);
if (!node) {
console.error(`Node not found: ${nodeId}`);
continue;
}
const label = node?.data?.label as string;
if (!label) {
console.error(`Label not found: ${nodeId}`);
continue;
}
const topicSlug = `${slugify(label)}@${nodeId}.md`;
const topicPath = path.join(ROADMAP_CONTENT_DIR, 'content', topicSlug);
const topicDir = path.dirname(topicPath);
const topicDirExists = await fs
.stat(topicDir)
.then(() => true)
.catch(() => false);
if (!topicDirExists) {
await fs.mkdir(topicDir, { recursive: true });
}
const topicContent = prepareTopicContent(topic);
await fs.writeFile(topicPath, topicContent);
console.log(`✅ Synced ${topicSlug}`);
}
function prepareTopicContent(topic: OfficialRoadmapTopicContentDocument) {
const { description, resources = [] } = topic;
let content = description;
if (resources.length > 0) {
content += `\n\nVisit the following resources to learn more:\n\n${resources.map((resource) => `- [@${resource.type}@${resource.title}](${resource.url})`).join('\n')}`;
}
return content;
}

View File

@@ -0,0 +1,215 @@
import fs from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import type { OfficialRoadmapDocument } from '../src/queries/official-roadmap';
import { parse } from 'node-html-parser';
import { markdownToHtml } from '../src/lib/markdown';
import { htmlToMarkdown } from '../src/lib/html';
import {
allowedOfficialRoadmapTopicResourceType,
type AllowedOfficialRoadmapTopicResourceType,
type OfficialRoadmapTopicContentDocument,
type OfficialRoadmapTopicResource,
} from './sync-content-to-repo';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const args = process.argv.slice(2);
const allFiles = args?.[0]?.replace('--files=', '');
const secret = args?.[1]?.replace('--secret=', '');
if (!secret) {
throw new Error('Secret is required');
}
let roadmapJsonCache: Map<string, OfficialRoadmapDocument> = new Map();
export async function fetchRoadmapJson(
roadmapId: string,
): Promise<OfficialRoadmapDocument> {
if (roadmapJsonCache.has(roadmapId)) {
return roadmapJsonCache.get(roadmapId)!;
}
const response = await fetch(
`https://roadmap.sh/api/v1-official-roadmap/${roadmapId}`,
);
if (!response.ok) {
throw new Error(`Failed to fetch roadmap json: ${response.statusText}`);
}
const data = await response.json();
if (data.error) {
throw new Error(`Failed to fetch roadmap json: ${data.error}`);
}
roadmapJsonCache.set(roadmapId, data);
return data;
}
export async function syncContentToDatabase(
topics: Omit<
OfficialRoadmapTopicContentDocument,
'createdAt' | 'updatedAt' | '_id'
>[],
) {
const response = await fetch(
`https://roadmap.sh/api/v1-sync-official-roadmap-topics`,
{
method: 'POST',
body: JSON.stringify({
topics,
secret,
}),
},
);
if (!response.ok) {
throw new Error(
`Failed to sync content to database: ${response.statusText}`,
);
}
return response.json();
}
const files = allFiles.split(' ');
console.log(`🚀 Starting ${files.length} files`);
const ROADMAP_CONTENT_DIR = path.join(__dirname, '../src/data/roadmaps');
try {
const topics: Omit<
OfficialRoadmapTopicContentDocument,
'createdAt' | 'updatedAt' | '_id'
>[] = [];
for (const file of files) {
const isContentFile = file.endsWith('.md') && file.includes('content/');
if (!isContentFile) {
console.log(`🚨 Skipping ${file} because it is not a content file`);
continue;
}
const pathParts = file.replace('src/data/roadmaps/', '').split('/');
const roadmapSlug = pathParts?.[0];
if (!roadmapSlug) {
console.error(`🚨 Roadmap slug is required: ${file}`);
continue;
}
const nodeSlug = pathParts?.[2]?.replace('.md', '');
if (!nodeSlug) {
console.error(`🚨 Node id is required: ${file}`);
continue;
}
const nodeId = nodeSlug.split('@')?.[1];
if (!nodeId) {
console.error(`🚨 Node id is required: ${file}`);
continue;
}
const roadmap = await fetchRoadmapJson(roadmapSlug);
const node = roadmap.nodes.find((node) => node.id === nodeId);
if (!node) {
console.error(`🚨 Node not found: ${file}`);
continue;
}
const filePath = path.join(
ROADMAP_CONTENT_DIR,
roadmapSlug,
'content',
`${nodeSlug}.md`,
);
const content = await fs.readFile(filePath, 'utf8');
const html = markdownToHtml(content, false);
const rootHtml = parse(html);
let ulWithLinks: HTMLElement | undefined;
rootHtml.querySelectorAll('ul').forEach((ul) => {
const listWithJustLinks = Array.from(ul.querySelectorAll('li')).filter(
(li) => {
const link = li.querySelector('a');
return link && link.textContent?.trim() === li.textContent?.trim();
},
);
if (listWithJustLinks.length > 0) {
// @ts-expect-error - TODO: fix this
ulWithLinks = ul;
}
});
const listLinks: Omit<OfficialRoadmapTopicResource, '_id'>[] =
ulWithLinks !== undefined
? Array.from(ulWithLinks.querySelectorAll('li > a'))
.map((link) => {
const typePattern = /@([a-z.]+)@/;
let linkText = link.textContent || '';
const linkHref = link.getAttribute('href') || '';
let linkType = linkText.match(typePattern)?.[1] || 'article';
linkType = allowedOfficialRoadmapTopicResourceType.includes(
linkType as any,
)
? linkType
: 'article';
linkText = linkText.replace(typePattern, '');
return {
title: linkText,
url: linkHref,
type: linkType as AllowedOfficialRoadmapTopicResourceType,
};
})
.sort((a, b) => {
const order = [
'official',
'opensource',
'article',
'video',
'feed',
];
return order.indexOf(a.type) - order.indexOf(b.type);
})
: [];
const title = rootHtml.querySelector('h1');
ulWithLinks?.remove();
title?.remove();
const allParagraphs = rootHtml.querySelectorAll('p');
if (listLinks.length > 0 && allParagraphs.length > 0) {
// to remove the view more see more from the description
const lastParagraph = allParagraphs[allParagraphs.length - 1];
lastParagraph?.remove();
}
const htmlStringWithoutLinks = rootHtml.toString();
const description = htmlToMarkdown(htmlStringWithoutLinks);
const updatedDescription =
`# ${title?.textContent}\n\n${description}`.trim();
const label = node?.data?.label as string;
if (!label) {
console.error(`🚨 Label is required: ${file}`);
continue;
}
topics.push({
roadmapSlug,
nodeId,
description: updatedDescription,
resources: listLinks,
});
}
await syncContentToDatabase(topics);
} catch (error) {
console.error(error);
process.exit(1);
}

View File

@@ -25,7 +25,7 @@ export function ForkCourseAlert(props: ForkCourseAlertProps) {
)}
>
<p className="text-sm text-balance">
Fork the course to track progress and make changes to the course.
Fork the course to track you progress and make changes to the course.
</p>
<button

View File

@@ -4,9 +4,9 @@ pdfUrl: '/pdfs/roadmaps/data-engineer.pdf'
order: 4.6
renderer: "editor"
briefTitle: 'Data Engineer'
briefDescription: 'Step by step guide to becoming an Data Engineer in 2025'
briefDescription: 'Step by step guide to becoming a Data Engineer in 2025'
title: 'Data Engineer Roadmap'
description: 'Step by step guide to becoming an Data Engineer in 2025'
description: 'Step by step guide to becoming a Data Engineer in 2025'
hasTopics: true
isNew: true
dimensions:
@@ -28,17 +28,17 @@ courses:
title: 'Founder - roadmap.sh'
schema:
headline: 'Data Engineer Roadmap'
description: 'Learn how to become an Data Engineer with this interactive step by step guide in 2025. We also have resources and short descriptions attached to the roadmap items so you can get everything you want to learn in one place.'
description: 'Learn how to become a Data Engineer with this interactive step by step guide in 2025. We also have resources and short descriptions attached to the roadmap items so you can get everything you want to learn in one place.'
imageUrl: 'https://roadmap.sh/roadmaps/data-engineer.png'
datePublished: '2025-08-13'
dateModified: '2025-08-13'
seo:
title: 'Data Engineer Roadmap'
description: 'Learn to become an Data Engineer using this roadmap. Community driven, articles, resources, guides, interview questions, quizzes for modern data engineers.'
description: 'Learn to become a Data Engineer using this roadmap. Community driven, articles, resources, guides, interview questions, quizzes for modern data engineers.'
keywords:
- 'data engineer roadmap 2025'
- 'data engineering roadmap 2025'
- 'guide to becoming an data engineer'
- 'guide to becoming a data engineer'
- 'easy data engineer roadmap'
- 'data engineer'
- 'data engineer roadmap'

View File

@@ -0,0 +1,3 @@
# Introduction to Machine Learning
Machine learning is about creating computer programs that can learn from data. Instead of being explicitly programmed to perform a task, these programs improve their performance on a specific task as they are exposed to more data. This learning process allows them to make predictions or decisions without being directly told how to do so.

View File

@@ -0,0 +1,3 @@
# ML Engineer
An ML Engineer focuses on building, deploying, and maintaining machine learning systems in production. They bridge the gap between data science and software engineering, taking models developed by data scientists and making them scalable, reliable, and efficient for real-world applications. This involves tasks like data pipeline construction, model deployment, performance monitoring, and infrastructure management.

View File

@@ -1,13 +1,14 @@
import { queryOptions } from '@tanstack/react-query';
import { httpGet } from '../lib/query-http';
import type { Node, Edge } from '@roadmapsh/editor';
export interface OfficialRoadmapDocument {
_id: string;
title: string;
description?: string;
slug: string;
nodes: any[];
edges: any[];
nodes: Node[];
edges: Edge[];
createdAt: Date;
updatedAt: Date;