mirror of
https://github.com/kamranahmedse/developer-roadmap.git
synced 2026-03-13 02:01:57 +08:00
Compare commits
44 Commits
fix/empty-
...
fix/remove
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7e83371e97 | ||
|
|
cc2e75c812 | ||
|
|
24eace0f73 | ||
|
|
3c06b122e6 | ||
|
|
2fdb647413 | ||
|
|
3ca9f81298 | ||
|
|
56c4630e0d | ||
|
|
36af3ddcf1 | ||
|
|
0e7afe3c99 | ||
|
|
b605fd6337 | ||
|
|
ba1e5a58b5 | ||
|
|
dd12cf1c99 | ||
|
|
44854cc5fb | ||
|
|
b1e60f1614 | ||
|
|
168ad05afe | ||
|
|
bb0419bf8a | ||
|
|
2d18cefd55 | ||
|
|
931e1b4a31 | ||
|
|
e2075529ac | ||
|
|
8dbe1468ed | ||
|
|
bb13bf38a8 | ||
|
|
80dfd5b206 | ||
|
|
a89c2d454f | ||
|
|
d1cf7cca99 | ||
|
|
9c32f9d469 | ||
|
|
cef4c29f10 | ||
|
|
679e29d12d | ||
|
|
4e569df2a3 | ||
|
|
40c7ea1b43 | ||
|
|
f9f38101f9 | ||
|
|
c4c28944ee | ||
|
|
ffb1cb5059 | ||
|
|
580e764097 | ||
|
|
111a97bb55 | ||
|
|
5d85495d72 | ||
|
|
ed2a251de4 | ||
|
|
449e8f12e4 | ||
|
|
a15b13cedd | ||
|
|
609683db2f | ||
|
|
3e21d05767 | ||
|
|
82edfba6e9 | ||
|
|
65d7a737ac | ||
|
|
2e0a69ad72 | ||
|
|
485ffcf755 |
@@ -3,6 +3,6 @@
|
||||
"enabled": false
|
||||
},
|
||||
"_variables": {
|
||||
"lastUpdateCheck": 1755042938009
|
||||
"lastUpdateCheck": 1756224238932
|
||||
}
|
||||
}
|
||||
1
.astro/types.d.ts
vendored
1
.astro/types.d.ts
vendored
@@ -1 +1,2 @@
|
||||
/// <reference types="astro/client" />
|
||||
/// <reference path="content.d.ts" />
|
||||
|
||||
@@ -7,4 +7,6 @@ PUBLIC_STRIPE_INDIVIDUAL_MONTHLY_PRICE_ID=
|
||||
PUBLIC_STRIPE_INDIVIDUAL_YEARLY_PRICE_ID=
|
||||
|
||||
PUBLIC_STRIPE_INDIVIDUAL_MONTHLY_PRICE_AMOUNT=10
|
||||
PUBLIC_STRIPE_INDIVIDUAL_YEARLY_PRICE_AMOUNT=100
|
||||
PUBLIC_STRIPE_INDIVIDUAL_YEARLY_PRICE_AMOUNT=100
|
||||
|
||||
ROADMAP_API_KEY=
|
||||
2
pnpm-lock.yaml
generated
2
pnpm-lock.yaml
generated
@@ -9897,4 +9897,4 @@ snapshots:
|
||||
react: 19.1.0
|
||||
use-sync-external-store: 1.5.0(react@19.1.0)
|
||||
|
||||
zwitch@2.0.4: {}
|
||||
zwitch@2.0.4: {}
|
||||
BIN
public/pdfs/roadmaps/nextjs.pdf
Normal file
BIN
public/pdfs/roadmaps/nextjs.pdf
Normal file
Binary file not shown.
BIN
public/roadmaps/nextjs.png
Normal file
BIN
public/roadmaps/nextjs.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 326 KiB |
@@ -62,6 +62,7 @@ Here is the list of available roadmaps with more being actively worked upon.
|
||||
- [TypeScript Roadmap](https://roadmap.sh/typescript)
|
||||
- [C++ Roadmap](https://roadmap.sh/cpp)
|
||||
- [React Roadmap](https://roadmap.sh/react)
|
||||
- [Next.js Roadmap](https://roadmap.sh/nextjs)
|
||||
- [React Native Roadmap](https://roadmap.sh/react-native)
|
||||
- [Vue Roadmap](https://roadmap.sh/vue)
|
||||
- [Angular Roadmap](https://roadmap.sh/angular)
|
||||
|
||||
@@ -4,7 +4,6 @@ import { getPageTrackingData } from '../../lib/browser';
|
||||
declare global {
|
||||
interface Window {
|
||||
gtag: any;
|
||||
varify: any;
|
||||
fireEvent: (props: {
|
||||
action: string;
|
||||
category: string;
|
||||
@@ -68,7 +67,7 @@ window.fireEvent = (props) => {
|
||||
}
|
||||
|
||||
const trackingData = getPageTrackingData();
|
||||
|
||||
|
||||
window.gtag('event', action, {
|
||||
event_category: category,
|
||||
event_label: label,
|
||||
|
||||
@@ -39,7 +39,7 @@ const formattedDate = DateTime.fromISO(changelog.createdAt).toFormat(
|
||||
}
|
||||
|
||||
<div
|
||||
class='prose prose-sm prose-h2:mt-3 prose-h2:text-lg prose-h2:font-medium prose-p:mb-0 prose-blockquote:font-normal prose-blockquote:text-gray-500 prose-ul:my-0 prose-ul:rounded-lg prose-ul:bg-gray-100 prose-ul:px-4 prose-ul:py-4 prose-ul:pl-7 prose-img:mt-0 prose-img:rounded-lg [&>blockquote>p]:mt-0 [&>ul]:mt-3 [&>ul>li]:my-0 [&>ul>li]:mb-1'
|
||||
class='prose prose-sm [&_li_p]:my-0 prose-h2:mt-3 prose-h2:text-lg prose-h2:font-medium prose-p:mb-0 prose-blockquote:font-normal prose-blockquote:text-gray-500 prose-ul:my-0 prose-ul:rounded-lg prose-ul:bg-gray-100 prose-ul:px-4 prose-ul:py-4 prose-ul:pl-7 prose-img:mt-0 prose-img:rounded-lg [&>blockquote>p]:mt-0 [&>ul]:mt-3 [&>ul>li]:my-0 [&>ul>li]:mb-1'
|
||||
set:html={changelog.description}
|
||||
/>
|
||||
</div>
|
||||
|
||||
@@ -23,7 +23,12 @@ type EditorRoadmapProps = {
|
||||
};
|
||||
|
||||
export function EditorRoadmap(props: EditorRoadmapProps) {
|
||||
const { resourceId, resourceType = 'roadmap', dimensions, hasChat = true } = props;
|
||||
const {
|
||||
resourceId,
|
||||
resourceType = 'roadmap',
|
||||
dimensions,
|
||||
hasChat = true,
|
||||
} = props;
|
||||
|
||||
const [hasSwitchedRoadmap, setHasSwitchedRoadmap] = useState(false);
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
<div class='text-sm sm:text-base leading-relaxed text-left p-2 sm:p-4 text-md text-gray-800 border-t border-t-gray-300 bg-gray-100 rounded-bl-md rounded-br-md [&>p:not(:last-child)]:mb-3 [&>p>a]:underline [&>p>a]:text-blue-700'>
|
||||
<slot />
|
||||
</div>
|
||||
@@ -1,42 +0,0 @@
|
||||
---
|
||||
import { markdownToHtml } from '../../lib/markdown';
|
||||
import Answer from './Answer.astro';
|
||||
import Question from './Question.astro';
|
||||
|
||||
export type FAQType = {
|
||||
question: string;
|
||||
answer: string[];
|
||||
};
|
||||
|
||||
export interface Props {
|
||||
faqs: FAQType[];
|
||||
}
|
||||
|
||||
const { faqs } = Astro.props;
|
||||
|
||||
if (faqs.length === 0) {
|
||||
return '';
|
||||
}
|
||||
---
|
||||
|
||||
<div class='border-t bg-gray-100 mt-8'>
|
||||
<div class='container'>
|
||||
<div class='flex justify-between relative -top-5'>
|
||||
<h2 class='text-sm sm:text-base font-medium py-1 px-3 border bg-white rounded-md'>Frequently Asked Questions</h2>
|
||||
</div>
|
||||
|
||||
<div class='flex flex-col gap-1 pb-14'>
|
||||
{
|
||||
faqs.map((faq, questionIndex) => (
|
||||
<Question isActive={questionIndex === 0} question={faq.question}>
|
||||
<Answer>
|
||||
{faq.answer.map((answer) => (
|
||||
<p set:html={markdownToHtml(answer)} />
|
||||
))}
|
||||
</Answer>
|
||||
</Question>
|
||||
))
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
61
src/components/FAQs/FAQs.tsx
Normal file
61
src/components/FAQs/FAQs.tsx
Normal file
@@ -0,0 +1,61 @@
|
||||
import { useState } from 'react';
|
||||
import type { OfficialRoadmapQuestion } from '../../queries/official-roadmap';
|
||||
import { Question } from './Question';
|
||||
import { guideRenderer } from '../../lib/guide-renderer';
|
||||
|
||||
type FAQsProps = {
|
||||
faqs: OfficialRoadmapQuestion[];
|
||||
};
|
||||
|
||||
export function FAQs(props: FAQsProps) {
|
||||
const { faqs } = props;
|
||||
if (faqs.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const [activeQuestionIndex, setActiveQuestionIndex] = useState(0);
|
||||
|
||||
return (
|
||||
<div className="mt-8 border-t bg-gray-100">
|
||||
<div className="container">
|
||||
<div className="relative -top-5 flex justify-between">
|
||||
<h2 className="rounded-md border bg-white px-3 py-1 text-sm font-medium sm:text-base">
|
||||
Frequently Asked Questions
|
||||
</h2>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col gap-1 pb-14">
|
||||
{faqs.map((faq, questionIndex) => {
|
||||
const isTextDescription =
|
||||
typeof faq?.description === 'string' &&
|
||||
faq?.description?.length > 0;
|
||||
|
||||
return (
|
||||
<Question
|
||||
key={faq._id}
|
||||
isActive={questionIndex === activeQuestionIndex}
|
||||
question={faq.title}
|
||||
onClick={() => setActiveQuestionIndex(questionIndex)}
|
||||
>
|
||||
<div
|
||||
className="text-md rounded-br-md rounded-bl-md border-t border-t-gray-300 bg-gray-100 p-2 text-left text-sm leading-relaxed text-gray-800 sm:p-4 sm:text-base [&>p:not(:last-child)]:mb-3 [&>p>a]:text-blue-700 [&>p>a]:underline"
|
||||
{...(isTextDescription
|
||||
? {
|
||||
dangerouslySetInnerHTML: {
|
||||
__html: faq.description,
|
||||
},
|
||||
}
|
||||
: {})}
|
||||
>
|
||||
{!isTextDescription
|
||||
? guideRenderer.render(faq.description)
|
||||
: null}
|
||||
</div>
|
||||
</Question>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
---
|
||||
import Icon from '../AstroIcon.astro';
|
||||
|
||||
export interface Props {
|
||||
question: string;
|
||||
isActive?: boolean;
|
||||
}
|
||||
|
||||
const { question, isActive = false } = Astro.props;
|
||||
---
|
||||
|
||||
<div
|
||||
class='faq-item bg-white border rounded-md hover:bg-gray-50 border-gray-300'
|
||||
>
|
||||
<button
|
||||
faq-question
|
||||
class='flex flex-row justify-between items-center p-2 sm:p-3 w-full'
|
||||
>
|
||||
<span class='text-sm sm:text-base text-left font-medium'>{question}</span>
|
||||
<Icon icon='down' class='h-6 hidden sm:block text-gray-400' />
|
||||
</button>
|
||||
<div class:list={['answer', { hidden: !isActive }]} faq-answer>
|
||||
<slot />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
document.querySelectorAll('[faq-question]').forEach((el) => {
|
||||
el.addEventListener('click', () => {
|
||||
// Hide any other visible answers
|
||||
document.querySelectorAll('[faq-answer]').forEach((element) => {
|
||||
element.classList.add('hidden');
|
||||
});
|
||||
|
||||
// Show the current answer
|
||||
const answer = el.nextElementSibling;
|
||||
if (answer) {
|
||||
answer.classList.remove('hidden');
|
||||
}
|
||||
});
|
||||
});
|
||||
</script>
|
||||
29
src/components/FAQs/Question.tsx
Normal file
29
src/components/FAQs/Question.tsx
Normal file
@@ -0,0 +1,29 @@
|
||||
import { cn } from '../../lib/classname';
|
||||
import { ChevronDownIcon } from '../ReactIcons/ChevronDownIcon';
|
||||
|
||||
type QuestionProps = {
|
||||
question: string;
|
||||
isActive?: boolean;
|
||||
children: React.ReactNode;
|
||||
onClick?: () => void;
|
||||
};
|
||||
|
||||
export function Question(props: QuestionProps) {
|
||||
const { question, isActive = false, children, onClick } = props;
|
||||
|
||||
return (
|
||||
<div className="faq-item rounded-md border border-gray-300 bg-white hover:bg-gray-50">
|
||||
<button
|
||||
className="flex w-full flex-row items-center justify-between p-2 sm:p-3"
|
||||
onClick={onClick}
|
||||
>
|
||||
<span className="text-left text-sm font-medium sm:text-base">
|
||||
{question}
|
||||
</span>
|
||||
<ChevronDownIcon className="hidden h-3.5 stroke-[3] text-gray-400 sm:block" />
|
||||
</button>
|
||||
|
||||
{isActive && <div className={cn('answer')}>{children}</div>}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,6 +1,4 @@
|
||||
---
|
||||
import type { RoadmapFileType } from '../lib/roadmap';
|
||||
|
||||
export interface Props {
|
||||
url: string;
|
||||
title: string;
|
||||
@@ -27,7 +25,7 @@ const { url, title, description, isNew } = Astro.props;
|
||||
|
||||
{
|
||||
isNew && (
|
||||
<span class='flex items-center gap-1.5 absolute bottom-1.5 right-1 rounded-xs text-xs font-semibold uppercase text-purple-500 sm:px-1.5'>
|
||||
<span class='absolute right-1 bottom-1.5 flex items-center gap-1.5 rounded-xs text-xs font-semibold text-purple-500 uppercase sm:px-1.5'>
|
||||
<span class='relative flex h-2 w-2'>
|
||||
<span class='absolute inline-flex h-full w-full animate-ping rounded-full bg-purple-400 opacity-75' />
|
||||
<span class='relative inline-flex h-2 w-2 rounded-full bg-purple-500' />
|
||||
|
||||
@@ -19,7 +19,7 @@ export function GuideContent(props: GuideContentProps) {
|
||||
return (
|
||||
<article className="lg:grid lg:max-w-full lg:grid-cols-[1fr_minmax(0,700px)_1fr]">
|
||||
{(showTableOfContent || hasRelatedGuides) && (
|
||||
<div className="sticky top-0 bg-linear-to-r from-gray-50 py-0 lg:relative lg:col-start-3 lg:col-end-4 lg:row-start-1">
|
||||
<div className="sticky top-[36px] bg-linear-to-r from-gray-50 py-0 lg:relative lg:col-start-3 lg:col-end-4 lg:row-start-1">
|
||||
{hasRelatedGuides && (
|
||||
<RelatedGuides relatedGuides={guide?.relatedGuides || []} />
|
||||
)}
|
||||
|
||||
@@ -3,21 +3,16 @@ import { useToast } from '../../hooks/use-toast';
|
||||
import { httpGet, httpPost } from '../../lib/http';
|
||||
import { LoadingSolutions } from './LoadingSolutions';
|
||||
import { EmptySolutions } from './EmptySolutions';
|
||||
import { ThumbsDown, ThumbsUp } from 'lucide-react';
|
||||
import { getRelativeTimeString } from '../../lib/date';
|
||||
import { Pagination } from '../Pagination/Pagination';
|
||||
import { deleteUrlParam, getUrlParams, setUrlParams } from '../../lib/browser';
|
||||
import { pageProgressMessage } from '../../stores/page';
|
||||
import { LeavingRoadmapWarningModal } from './LeavingRoadmapWarningModal';
|
||||
import { isLoggedIn } from '../../lib/jwt';
|
||||
import { showLoginPopup } from '../../lib/popup';
|
||||
import { VoteButton } from './VoteButton.tsx';
|
||||
import { GitHubIcon } from '../ReactIcons/GitHubIcon.tsx';
|
||||
import { SelectLanguages } from './SelectLanguages.tsx';
|
||||
import type { ProjectFrontmatter } from '../../lib/project.ts';
|
||||
import { ProjectSolutionModal } from './ProjectSolutionModal.tsx';
|
||||
import { SortProjects } from './SortProjects.tsx';
|
||||
import { ProjectSolutionRow } from './ProjectSolutionRow';
|
||||
import type { OfficialProjectDocument } from '../../queries/official-project.ts';
|
||||
|
||||
export interface ProjectStatusDocument {
|
||||
_id?: string;
|
||||
@@ -69,12 +64,12 @@ type PageState = {
|
||||
};
|
||||
|
||||
type ListProjectSolutionsProps = {
|
||||
project: ProjectFrontmatter;
|
||||
project: OfficialProjectDocument;
|
||||
projectId: string;
|
||||
};
|
||||
|
||||
export function ListProjectSolutions(props: ListProjectSolutionsProps) {
|
||||
const { projectId, project: projectData } = props;
|
||||
const { projectId, project } = props;
|
||||
|
||||
const toast = useToast();
|
||||
const [pageState, setPageState] = useState<PageState>({
|
||||
@@ -226,7 +221,7 @@ export function ListProjectSolutions(props: ListProjectSolutionsProps) {
|
||||
<div className="relative mb-5 hidden items-center justify-between sm:flex">
|
||||
<div>
|
||||
<h1 className="mb-1 text-xl font-semibold">
|
||||
{projectData.title} Solutions
|
||||
{project.title} Solutions
|
||||
</h1>
|
||||
<p className="text-sm text-gray-500">
|
||||
Solutions submitted by the community
|
||||
|
||||
@@ -1,47 +1,43 @@
|
||||
import { Badge } from '../Badge.tsx';
|
||||
import type {
|
||||
ProjectDifficultyType,
|
||||
ProjectFileType,
|
||||
} from '../../lib/project.ts';
|
||||
import { Users } from 'lucide-react';
|
||||
import { formatCommaNumber } from '../../lib/number.ts';
|
||||
import { cn } from '../../lib/classname.ts';
|
||||
import { isLoggedIn } from '../../lib/jwt.ts';
|
||||
import type { OfficialProjectDocument } from '../../queries/official-project.ts';
|
||||
|
||||
type ProjectCardProps = {
|
||||
project: ProjectFileType;
|
||||
project: OfficialProjectDocument;
|
||||
userCount?: number;
|
||||
status?: 'completed' | 'started' | 'none';
|
||||
};
|
||||
|
||||
const badgeVariants: Record<ProjectDifficultyType, string> = {
|
||||
const badgeVariants = {
|
||||
beginner: 'yellow',
|
||||
intermediate: 'green',
|
||||
advanced: 'blue',
|
||||
};
|
||||
} as const;
|
||||
|
||||
export function ProjectCard(props: ProjectCardProps) {
|
||||
const { project, userCount = 0, status } = props;
|
||||
const { frontmatter, id } = project;
|
||||
const { difficulty, title, description, slug, topics = [] } = project;
|
||||
|
||||
const isLoadingStatus = status === undefined;
|
||||
const userStartedCount = status !== 'none' && userCount === 0 ? userCount + 1 : userCount;
|
||||
const userStartedCount =
|
||||
status !== 'none' && userCount === 0 ? userCount + 1 : userCount;
|
||||
|
||||
return (
|
||||
<a
|
||||
href={`/projects/${id}`}
|
||||
href={`/projects/${slug}`}
|
||||
className="flex flex-col rounded-md border bg-white p-3 transition-colors hover:border-gray-300 hover:bg-gray-50"
|
||||
>
|
||||
<span className="flex justify-between gap-1.5">
|
||||
<Badge
|
||||
variant={badgeVariants[frontmatter.difficulty] as any}
|
||||
text={frontmatter.difficulty}
|
||||
/>
|
||||
<Badge variant={'grey'} text={frontmatter.nature} />
|
||||
<Badge variant={badgeVariants[difficulty]} text={difficulty} />
|
||||
{topics?.map((topic, index) => (
|
||||
<Badge key={`${topic}-${index}`} variant={'grey'} text={topic} />
|
||||
))}
|
||||
</span>
|
||||
<span className="my-3 flex min-h-[100px] flex-col">
|
||||
<span className="mb-1 font-medium">{frontmatter.title}</span>
|
||||
<span className="text-sm text-gray-500">{frontmatter.description}</span>
|
||||
<span className="mb-1 font-medium">{title}</span>
|
||||
<span className="text-sm text-gray-500">{description}</span>
|
||||
</span>
|
||||
<span className="flex min-h-[22px] items-center justify-between gap-2 text-xs text-gray-400">
|
||||
{isLoadingStatus ? (
|
||||
|
||||
25
src/components/Projects/ProjectContent.tsx
Normal file
25
src/components/Projects/ProjectContent.tsx
Normal file
@@ -0,0 +1,25 @@
|
||||
import { guideRenderer } from '../../lib/guide-renderer';
|
||||
import type { OfficialProjectDocument } from '../../queries/official-project';
|
||||
|
||||
type ProjectContentProps = {
|
||||
project: OfficialProjectDocument;
|
||||
};
|
||||
|
||||
export function ProjectContent(props: ProjectContentProps) {
|
||||
const { project } = props;
|
||||
|
||||
const isContentString = typeof project?.content === 'string';
|
||||
|
||||
return (
|
||||
<div
|
||||
className="prose prose-h2:mb-3 prose-h2:mt-5 prose-h3:mb-1 prose-h3:mt-5 prose-p:mb-2 prose-blockquote:font-normal prose-blockquote:text-gray-500 prose-pre:my-3 prose-ul:my-3.5 prose-hr:my-5 prose-li:[&>p]:m-0 max-w-full [&>ul>li]:my-1"
|
||||
{...(isContentString
|
||||
? {
|
||||
dangerouslySetInnerHTML: { __html: project?.content },
|
||||
}
|
||||
: {
|
||||
children: guideRenderer.render(project?.content),
|
||||
})}
|
||||
/>
|
||||
);
|
||||
}
|
||||
@@ -2,11 +2,7 @@ import { ProjectCard } from './ProjectCard.tsx';
|
||||
import { HeartHandshake, Trash2 } from 'lucide-react';
|
||||
import { cn } from '../../lib/classname.ts';
|
||||
import { useEffect, useMemo, useState } from 'react';
|
||||
import {
|
||||
projectDifficulties,
|
||||
type ProjectDifficultyType,
|
||||
type ProjectFileType,
|
||||
} from '../../lib/project.ts';
|
||||
|
||||
import {
|
||||
deleteUrlParam,
|
||||
getUrlParams,
|
||||
@@ -14,9 +10,14 @@ import {
|
||||
} from '../../lib/browser.ts';
|
||||
import { httpPost } from '../../lib/http.ts';
|
||||
import { isLoggedIn } from '../../lib/jwt.ts';
|
||||
import {
|
||||
allowedOfficialProjectDifficulty,
|
||||
type AllowedOfficialProjectDifficulty,
|
||||
type OfficialProjectDocument,
|
||||
} from '../../queries/official-project.ts';
|
||||
|
||||
type DifficultyButtonProps = {
|
||||
difficulty: ProjectDifficultyType;
|
||||
difficulty: AllowedOfficialProjectDifficulty;
|
||||
isActive?: boolean;
|
||||
onClick?: () => void;
|
||||
};
|
||||
@@ -46,7 +47,7 @@ export type ListProjectStatusesResponse = Record<
|
||||
>;
|
||||
|
||||
type ProjectsListProps = {
|
||||
projects: ProjectFileType[];
|
||||
projects: OfficialProjectDocument[];
|
||||
userCounts: Record<string, number>;
|
||||
};
|
||||
|
||||
@@ -55,7 +56,7 @@ export function ProjectsList(props: ProjectsListProps) {
|
||||
|
||||
const { difficulty: urlDifficulty } = getUrlParams();
|
||||
const [difficulty, setDifficulty] = useState<
|
||||
ProjectDifficultyType | undefined
|
||||
AllowedOfficialProjectDifficulty | undefined
|
||||
>(urlDifficulty);
|
||||
const [projectStatuses, setProjectStatuses] =
|
||||
useState<ListProjectStatusesResponse>();
|
||||
@@ -66,7 +67,7 @@ export function ProjectsList(props: ProjectsListProps) {
|
||||
return;
|
||||
}
|
||||
|
||||
const projectIds = projects.map((project) => project.id);
|
||||
const projectIds = projects.map((project) => project.slug);
|
||||
const { response, error } = await httpPost(
|
||||
`${import.meta.env.PUBLIC_API_URL}/v1-list-project-statuses`,
|
||||
{
|
||||
@@ -82,22 +83,27 @@ export function ProjectsList(props: ProjectsListProps) {
|
||||
setProjectStatuses(response);
|
||||
};
|
||||
|
||||
const projectsByDifficulty: Map<ProjectDifficultyType, ProjectFileType[]> =
|
||||
useMemo(() => {
|
||||
const result = new Map<ProjectDifficultyType, ProjectFileType[]>();
|
||||
const projectsByDifficulty: Map<
|
||||
AllowedOfficialProjectDifficulty,
|
||||
OfficialProjectDocument[]
|
||||
> = useMemo(() => {
|
||||
const result = new Map<
|
||||
AllowedOfficialProjectDifficulty,
|
||||
OfficialProjectDocument[]
|
||||
>();
|
||||
|
||||
for (const project of projects) {
|
||||
const difficulty = project.frontmatter.difficulty;
|
||||
for (const project of projects) {
|
||||
const difficulty = project.difficulty;
|
||||
|
||||
if (!result.has(difficulty)) {
|
||||
result.set(difficulty, []);
|
||||
}
|
||||
|
||||
result.get(difficulty)?.push(project);
|
||||
if (!result.has(difficulty)) {
|
||||
result.set(difficulty, []);
|
||||
}
|
||||
|
||||
return result;
|
||||
}, [projects]);
|
||||
result.get(difficulty)?.push(project);
|
||||
}
|
||||
|
||||
return result;
|
||||
}, [projects]);
|
||||
|
||||
const matchingProjects = difficulty
|
||||
? projectsByDifficulty.get(difficulty) || []
|
||||
@@ -111,7 +117,7 @@ export function ProjectsList(props: ProjectsListProps) {
|
||||
<div className="flex flex-col">
|
||||
<div className="my-2.5 flex items-center justify-between">
|
||||
<div className="flex flex-wrap gap-1">
|
||||
{projectDifficulties.map((projectDifficulty) => (
|
||||
{allowedOfficialProjectDifficulty.map((projectDifficulty) => (
|
||||
<DifficultyButton
|
||||
key={projectDifficulty}
|
||||
onClick={() => {
|
||||
@@ -122,6 +128,7 @@ export function ProjectsList(props: ProjectsListProps) {
|
||||
isActive={projectDifficulty === difficulty}
|
||||
/>
|
||||
))}
|
||||
|
||||
{difficulty && (
|
||||
<button
|
||||
onClick={() => {
|
||||
@@ -155,25 +162,25 @@ export function ProjectsList(props: ProjectsListProps) {
|
||||
|
||||
{matchingProjects
|
||||
.sort((project) => {
|
||||
return project.frontmatter.difficulty === 'beginner'
|
||||
return project.difficulty === 'beginner'
|
||||
? -1
|
||||
: project.frontmatter.difficulty === 'intermediate'
|
||||
: project.difficulty === 'intermediate'
|
||||
? 0
|
||||
: 1;
|
||||
})
|
||||
.sort((a, b) => {
|
||||
return a.frontmatter.sort - b.frontmatter.sort;
|
||||
return a.order - b.order;
|
||||
})
|
||||
.map((matchingProject) => {
|
||||
const count = userCounts[matchingProject?.id] || 0;
|
||||
const count = userCounts[matchingProject?.slug] || 0;
|
||||
return (
|
||||
<ProjectCard
|
||||
key={matchingProject.id}
|
||||
key={matchingProject.slug}
|
||||
project={matchingProject}
|
||||
userCount={count}
|
||||
status={
|
||||
projectStatuses
|
||||
? (projectStatuses?.[matchingProject.id] || 'none')
|
||||
? projectStatuses?.[matchingProject.slug] || 'none'
|
||||
: undefined
|
||||
}
|
||||
/>
|
||||
|
||||
@@ -7,16 +7,16 @@ import {
|
||||
setUrlParams,
|
||||
} from '../../lib/browser.ts';
|
||||
import { CategoryFilterButton } from '../Roadmaps/CategoryFilterButton.tsx';
|
||||
import {
|
||||
projectDifficulties,
|
||||
type ProjectFileType,
|
||||
} from '../../lib/project.ts';
|
||||
import { ProjectCard } from './ProjectCard.tsx';
|
||||
import {
|
||||
allowedOfficialProjectDifficulty,
|
||||
type OfficialProjectDocument,
|
||||
} from '../../queries/official-project.ts';
|
||||
|
||||
type ProjectGroup = {
|
||||
id: string;
|
||||
title: string;
|
||||
projects: ProjectFileType[];
|
||||
projects: OfficialProjectDocument[];
|
||||
};
|
||||
|
||||
type ProjectsPageProps = {
|
||||
@@ -28,7 +28,7 @@ export function ProjectsPage(props: ProjectsPageProps) {
|
||||
const { roadmapsProjects, userCounts } = props;
|
||||
const allUniqueProjectIds = new Set<string>(
|
||||
roadmapsProjects.flatMap((group) =>
|
||||
group.projects.map((project) => project.id),
|
||||
group.projects.map((project) => project.slug),
|
||||
),
|
||||
);
|
||||
const allUniqueProjects = useMemo(
|
||||
@@ -37,15 +37,15 @@ export function ProjectsPage(props: ProjectsPageProps) {
|
||||
.map((id) =>
|
||||
roadmapsProjects
|
||||
.flatMap((group) => group.projects)
|
||||
.find((project) => project.id === id),
|
||||
.find((project) => project.slug === id),
|
||||
)
|
||||
.filter(Boolean) as ProjectFileType[],
|
||||
.filter(Boolean) as OfficialProjectDocument[],
|
||||
[allUniqueProjectIds],
|
||||
);
|
||||
|
||||
const [activeGroup, setActiveGroup] = useState<string>('');
|
||||
const [visibleProjects, setVisibleProjects] =
|
||||
useState<ProjectFileType[]>(allUniqueProjects);
|
||||
useState<OfficialProjectDocument[]>(allUniqueProjects);
|
||||
|
||||
const [isFilterOpen, setIsFilterOpen] = useState(false);
|
||||
|
||||
@@ -67,11 +67,11 @@ export function ProjectsPage(props: ProjectsPageProps) {
|
||||
const sortedVisibleProjects = useMemo(
|
||||
() =>
|
||||
visibleProjects.sort((a, b) => {
|
||||
const projectADifficulty = a?.frontmatter.difficulty || 'beginner';
|
||||
const projectBDifficulty = b?.frontmatter.difficulty || 'beginner';
|
||||
const projectADifficulty = a?.difficulty || 'beginner';
|
||||
const projectBDifficulty = b?.difficulty || 'beginner';
|
||||
return (
|
||||
projectDifficulties.indexOf(projectADifficulty) -
|
||||
projectDifficulties.indexOf(projectBDifficulty)
|
||||
allowedOfficialProjectDifficulty.indexOf(projectADifficulty) -
|
||||
allowedOfficialProjectDifficulty.indexOf(projectBDifficulty)
|
||||
);
|
||||
}),
|
||||
[visibleProjects],
|
||||
@@ -111,7 +111,7 @@ export function ProjectsPage(props: ProjectsPageProps) {
|
||||
{isFilterOpen && <X size={13} className="mr-1" />}
|
||||
Categories
|
||||
</button>
|
||||
<div className="container relative flex flex-col gap-4 sm:flex-row">
|
||||
<div className="relative container flex flex-col gap-4 sm:flex-row">
|
||||
<div
|
||||
className={cn(
|
||||
'hidden w-full flex-col from-gray-100 sm:w-[160px] sm:shrink-0 sm:border-r sm:bg-linear-to-l sm:pt-6',
|
||||
@@ -171,7 +171,7 @@ export function ProjectsPage(props: ProjectsPageProps) {
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex grow flex-col pb-20 pt-2 sm:pt-6">
|
||||
<div className="flex grow flex-col pt-2 pb-20 sm:pt-6">
|
||||
<div className="mb-4 flex items-center justify-between text-sm text-gray-500">
|
||||
<h3 className={'flex items-center'}>
|
||||
<Box size={15} className="mr-1" strokeWidth={2} />
|
||||
@@ -187,9 +187,9 @@ export function ProjectsPage(props: ProjectsPageProps) {
|
||||
<div className="grid grid-cols-1 gap-1.5 sm:grid-cols-2">
|
||||
{sortedVisibleProjects.map((project) => (
|
||||
<ProjectCard
|
||||
key={project.id}
|
||||
key={project.slug}
|
||||
project={project}
|
||||
userCount={userCounts[project.id] || 0}
|
||||
userCount={userCounts[project.slug] || 0}
|
||||
status={'none'}
|
||||
/>
|
||||
))}
|
||||
|
||||
@@ -1,69 +1,22 @@
|
||||
---
|
||||
import { getQuestionGroupsByIds } from '../lib/question-group';
|
||||
import { getRoadmapsByIds, type RoadmapFrontmatter } from '../lib/roadmap';
|
||||
import { Map, Clipboard } from 'lucide-react';
|
||||
import { Map } from 'lucide-react';
|
||||
import { listOfficialRoadmaps } from '../queries/official-roadmap';
|
||||
|
||||
export interface Props {
|
||||
roadmap: RoadmapFrontmatter;
|
||||
relatedRoadmaps: string[];
|
||||
}
|
||||
|
||||
const { roadmap } = Astro.props;
|
||||
const { relatedRoadmaps } = Astro.props;
|
||||
|
||||
const relatedRoadmaps = roadmap.relatedRoadmaps || [];
|
||||
const relatedRoadmapDetails = await getRoadmapsByIds(relatedRoadmaps);
|
||||
|
||||
const relatedQuestions = roadmap.relatedQuestions || [];
|
||||
const relatedQuestionDetails = await getQuestionGroupsByIds(relatedQuestions);
|
||||
const allRoadmaps = await listOfficialRoadmaps();
|
||||
const relatedRoadmapsDetails = allRoadmaps.filter((roadmap) =>
|
||||
relatedRoadmaps.includes(roadmap.slug),
|
||||
);
|
||||
---
|
||||
|
||||
{
|
||||
relatedQuestionDetails.length > 0 && (
|
||||
<div class='border-t bg-gray-100 pb-3'>
|
||||
<div class='container'>
|
||||
<div class='relative -top-5 flex justify-between'>
|
||||
<span class='text-md flex items-center rounded-md border bg-white px-3 py-1 font-medium'>
|
||||
<Clipboard className='mr-1.5 text-black' size='17px' />
|
||||
Test your Knowledge
|
||||
</span>
|
||||
<a
|
||||
href='/questions'
|
||||
class='text-md rounded-md border bg-white px-3 py-1 font-medium hover:bg-gray-50'
|
||||
>
|
||||
<span class='hidden sm:inline'>All Quizzes →</span>
|
||||
<span class='inline sm:hidden'>More →</span>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<div class='flex flex-col gap-1 pb-8'>
|
||||
{relatedQuestionDetails.map((relatedQuestionGroup) => (
|
||||
<a
|
||||
href={`/questions/${relatedQuestionGroup.id}`}
|
||||
class='flex flex-col gap-0.5 rounded-md border bg-white px-3.5 py-2 hover:bg-gray-50 sm:flex-row sm:gap-0'
|
||||
>
|
||||
<span class='inline-block min-w-[150px] font-medium'>
|
||||
{relatedQuestionGroup.title}
|
||||
</span>
|
||||
<span class='text-gray-500'>
|
||||
{relatedQuestionGroup.description}
|
||||
</span>
|
||||
</a>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
{
|
||||
relatedRoadmaps.length && (
|
||||
<div
|
||||
class:list={[
|
||||
'border-t bg-gray-100',
|
||||
{
|
||||
'mt-0': !relatedQuestionDetails.length,
|
||||
},
|
||||
]}
|
||||
>
|
||||
<div class:list={['border-t bg-gray-100']}>
|
||||
<div class='container'>
|
||||
<div class='relative -top-5 flex justify-between'>
|
||||
<span class='text-md flex items-center rounded-md border bg-white px-3 py-1 font-medium'>
|
||||
@@ -80,17 +33,15 @@ const relatedQuestionDetails = await getQuestionGroupsByIds(relatedQuestions);
|
||||
</div>
|
||||
|
||||
<div class='flex flex-col gap-1 pb-8'>
|
||||
{relatedRoadmapDetails.map((relatedRoadmap) => (
|
||||
{relatedRoadmapsDetails.map((relatedRoadmap) => (
|
||||
<a
|
||||
href={`/${relatedRoadmap.id}`}
|
||||
href={`/${relatedRoadmap.slug}`}
|
||||
class='flex flex-col gap-0.5 rounded-md border bg-white px-3.5 py-2 hover:bg-gray-50 sm:flex-row sm:gap-0'
|
||||
>
|
||||
<span class='inline-block min-w-[195px] font-medium'>
|
||||
{relatedRoadmap.frontmatter.briefTitle}
|
||||
</span>
|
||||
<span class='text-gray-500'>
|
||||
{relatedRoadmap.frontmatter.briefDescription}
|
||||
{relatedRoadmap.title.card}
|
||||
</span>
|
||||
<span class='text-gray-500'>{relatedRoadmap.description}</span>
|
||||
</a>
|
||||
))}
|
||||
</div>
|
||||
|
||||
@@ -5,9 +5,7 @@ import {
|
||||
Bot,
|
||||
FolderKanbanIcon,
|
||||
MapIcon,
|
||||
MessageCircle,
|
||||
} from 'lucide-react';
|
||||
import { type RoadmapFrontmatter } from '../lib/roadmap';
|
||||
import LoginPopup from './AuthenticationFlow/LoginPopup.astro';
|
||||
import { DownloadRoadmapButton } from './DownloadRoadmapButton';
|
||||
import { MarkFavorite } from './FeaturedItems/MarkFavorite';
|
||||
@@ -20,20 +18,16 @@ import { PersonalizedRoadmap } from './PersonalizedRoadmap/PersonalizedRoadmap';
|
||||
export interface Props {
|
||||
title: string;
|
||||
description: string;
|
||||
note?: string;
|
||||
partner?: {
|
||||
description: string;
|
||||
link: string;
|
||||
linkText: string;
|
||||
};
|
||||
roadmapId: string;
|
||||
isUpcoming?: boolean;
|
||||
hasSearch?: boolean;
|
||||
projectCount?: number;
|
||||
coursesCount?: number;
|
||||
hasAIChat?: boolean;
|
||||
question?: RoadmapFrontmatter['question'];
|
||||
hasTopics?: boolean;
|
||||
isForkable?: boolean;
|
||||
activeTab?: 'roadmap' | 'projects' | 'courses';
|
||||
}
|
||||
@@ -43,12 +37,8 @@ const {
|
||||
description,
|
||||
roadmapId,
|
||||
partner,
|
||||
isUpcoming = false,
|
||||
note,
|
||||
hasTopics = false,
|
||||
hasAIChat = false,
|
||||
projectCount = 0,
|
||||
question,
|
||||
activeTab = 'roadmap',
|
||||
coursesCount = 0,
|
||||
} = Astro.props;
|
||||
|
||||
@@ -10,10 +10,12 @@ import { useOutsideClick } from '../hooks/use-outside-click';
|
||||
import { markdownToHtml } from '../lib/markdown';
|
||||
import { cn } from '../lib/classname';
|
||||
import { useScrollPosition } from '../hooks/use-scroll-position';
|
||||
import type { JSONContent } from '@tiptap/core';
|
||||
import { guideRenderer } from '../lib/guide-renderer';
|
||||
|
||||
type RoadmapTitleQuestionProps = {
|
||||
question: string;
|
||||
answer: string;
|
||||
answer: JSONContent;
|
||||
roadmapId?: string;
|
||||
};
|
||||
|
||||
@@ -38,24 +40,24 @@ export function RoadmapTitleQuestion(props: RoadmapTitleQuestionProps) {
|
||||
'rounded-0 -mx-4 sm:mx-0': isAnswerVisible,
|
||||
// @FIXME:
|
||||
// The line below is to keep the question hidden on mobile devices except for
|
||||
// the frontend roadmap. This is because we did not use to have the question
|
||||
// the frontend roadmap. This is because we did not use to have the question
|
||||
// on mobile devices before and we don't want to cause any SEO issues. It will
|
||||
// be enabled on other roadmaps in the future.
|
||||
},
|
||||
)}
|
||||
>
|
||||
{isAnswerVisible && (
|
||||
<div className="fixed left-0 right-0 top-0 z-100 h-full items-center justify-center overflow-y-auto overflow-x-hidden overscroll-contain bg-black/50"></div>
|
||||
<div className="fixed top-0 right-0 left-0 z-100 h-full items-center justify-center overflow-x-hidden overflow-y-auto overscroll-contain bg-black/50"></div>
|
||||
)}
|
||||
<h2
|
||||
className="z-50 flex cursor-pointer select-none items-center px-2 py-2 text-sm font-medium"
|
||||
className="z-50 flex cursor-pointer items-center px-2 py-2 text-sm font-medium select-none"
|
||||
aria-expanded={isAnswerVisible ? 'true' : 'false'}
|
||||
onClick={(e) => {
|
||||
e.preventDefault();
|
||||
setIsAnswerVisible(!isAnswerVisible);
|
||||
}}
|
||||
>
|
||||
<span className="flex grow select-none items-center">
|
||||
<span className="flex grow items-center select-none">
|
||||
<Info className="mr-1.5 inline-block h-4 w-4" strokeWidth={2.5} />
|
||||
{question}
|
||||
</span>
|
||||
@@ -65,7 +67,7 @@ export function RoadmapTitleQuestion(props: RoadmapTitleQuestionProps) {
|
||||
</h2>
|
||||
|
||||
<div
|
||||
className={`absolute left-0 right-0 top-0 z-100 mt-0 border bg-white ${
|
||||
className={`absolute top-0 right-0 left-0 z-100 mt-0 border bg-white ${
|
||||
isAnswerVisible ? 'rounded-0 block sm:rounded-md' : 'hidden'
|
||||
}`}
|
||||
ref={ref}
|
||||
@@ -73,7 +75,7 @@ export function RoadmapTitleQuestion(props: RoadmapTitleQuestionProps) {
|
||||
{isAnswerVisible && (
|
||||
<h2
|
||||
className={cn(
|
||||
'sticky top-0 flex cursor-pointer select-none items-center rounded-t-md border-b bg-white px-[7px] py-[9px] text-base font-medium',
|
||||
'sticky top-0 flex cursor-pointer items-center rounded-t-md border-b bg-white px-[7px] py-[9px] text-base font-medium select-none',
|
||||
)}
|
||||
onClick={() => {
|
||||
setIsAnswerVisible(false);
|
||||
@@ -95,9 +97,11 @@ export function RoadmapTitleQuestion(props: RoadmapTitleQuestionProps) {
|
||||
</h2>
|
||||
)}
|
||||
<div
|
||||
className="bg-gray-100 p-3 text-base [&>h2]:mb-2 [&>h2]:mt-5 [&>h2]:text-[17px] [&>h2]:font-medium [&>p:last-child]:mb-0 [&>p>a]:font-semibold [&>p>a]:underline [&>p>a]:underline-offset-2 [&>p]:mb-3 [&>p]:font-normal [&>p]:leading-relaxed [&>p]:text-gray-800 [&>ul>li]:mb-2 [&>ul>li]:font-normal"
|
||||
dangerouslySetInnerHTML={{ __html: markdownToHtml(answer, false) }}
|
||||
></div>
|
||||
className="bg-gray-100 p-3 text-base [&>h2]:mt-5 [&>h2]:mb-2 [&>h2]:text-[17px] [&>h2]:font-medium [&>p]:mb-3 [&>p]:leading-relaxed [&>p]:font-normal [&>p]:text-gray-800 [&>p:last-child]:mb-0 [&>p>a]:font-semibold [&>p>a]:underline [&>p>a]:underline-offset-2 [&>ul>li]:mb-2 [&>ul>li]:font-normal"
|
||||
// dangerouslySetInnerHTML={{ __html: markdownToHtml(answer, false) }}
|
||||
>
|
||||
{guideRenderer.render(answer)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -151,6 +151,12 @@ const groups: GroupType[] = [
|
||||
type: 'skill',
|
||||
otherGroups: ['Web Development'],
|
||||
},
|
||||
{
|
||||
title: 'Next.js',
|
||||
link: '/nextjs',
|
||||
type: 'skill',
|
||||
otherGroups: ['Web Development'],
|
||||
},
|
||||
{
|
||||
title: 'Spring Boot',
|
||||
link: '/spring-boot',
|
||||
|
||||
@@ -25,7 +25,7 @@ export function TableOfContent(props: TableOfContentProps) {
|
||||
className={cn(
|
||||
'relative min-w-[250px] px-5 pt-0 max-lg:max-w-full max-lg:min-w-full max-lg:border-none max-lg:px-0 lg:pt-5',
|
||||
{
|
||||
'top-0 lg:sticky!': totalRows <= 20,
|
||||
'top-[36px] lg:sticky!': totalRows <= 20,
|
||||
},
|
||||
)}
|
||||
>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
import type { FAQType } from '../../../components/FAQs/FAQs.astro';
|
||||
import type { FAQType } from '../../../components/FAQs/FAQs';
|
||||
|
||||
export const faqs: FAQType[] = [
|
||||
{
|
||||
@@ -11,13 +11,13 @@ export const faqs: FAQType[] = [
|
||||
{
|
||||
question: 'What is reinforcement learning?',
|
||||
answer: [
|
||||
'[Reinforcement learning](https://towardsdatascience.com/reinforcement-learning-101-e24b50e1d292) (RL) is a type of machine learning where an agent learns to make decisions by interacting with an environment. Unlike traditional supervised learning, RL does not rely on labeled data. Instead, the agent learns by taking actions and receiving feedback in the form of rewards or penalties. Over time, it aims to maximize cumulative rewards by refining its strategy based on past experiences. RL is often used in areas like robotics, game AI, and autonomous systems, where the goal is to develop intelligent behaviors through trial and error.',
|
||||
'[Reinforcement learning](https://towardsdatascience.com/reinforcement-learning-101-e24b50e1d292) (RL) is a type of machine learning where an agent learns to make decisions by interacting with an environment. Unlike traditional supervised learning, RL does not rely on labeled data. Instead, the agent learns by taking actions and receiving feedback in the form of rewards or penalties. Over time, it aims to maximize cumulative rewards by refining its strategy based on past experiences. RL is often used in areas like robotics, game AI, and autonomous systems, where the goal is to develop intelligent behaviors through trial and error.',
|
||||
],
|
||||
},
|
||||
{
|
||||
question: 'Do AI Engineers need a degree?',
|
||||
answer: [
|
||||
'While a degree in computer science, data science, or a related field can provide a solid foundation for becoming an AI engineer, it is not strictly necessary. Many successful AI engineers are self-taught or have gained expertise through online courses, certifications, and hands-on projects.'
|
||||
'While a degree in computer science, data science, or a related field can provide a solid foundation for becoming an AI engineer, it is not strictly necessary. Many successful AI engineers are self-taught or have gained expertise through online courses, certifications, and hands-on projects.',
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
@@ -6,7 +6,7 @@ briefTitle: 'AI Red Teaming'
|
||||
briefDescription: 'Learn to become a red teaming expert in AI'
|
||||
title: 'AI Red Teaming'
|
||||
description: 'Learn to become a red teaming expert in AI'
|
||||
isNew: true
|
||||
isNew: false
|
||||
isHidden: false
|
||||
hasTopics: true
|
||||
renderer: editor
|
||||
|
||||
@@ -1 +1,8 @@
|
||||
# A/B Testing
|
||||
# A/B Testing
|
||||
|
||||
A/B testing is a way to compare two versions of something to see which one works better. You split your audience into two groups, one sees version A, the other sees version B — and then you measure which version gets better results, like more clicks, sales, or sign-ups. This helps you make decisions based on real data instead of guesses.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@A software engineer's guide to A/B testing](https://posthog.com/product-engineers/ab-testing-guide-for-engineers)
|
||||
- [@video@A/B Testing for Beginners](https://www.youtube.com/watch?v=VpTlNRUcIDo)
|
||||
@@ -1 +1,8 @@
|
||||
# Amazon EC2 ( Compute)
|
||||
# Amazon EC2 ( Compute)
|
||||
|
||||
Amazon Elastic Compute Cloud (EC2) is a web service that provides secure, resizable compute capacity in the cloud. It is designed to make web-scale cloud computing easier for developers. EC2’s simple web service interface allows you to obtain and configure capacity with minimal friction. EC2 enables you to scale your compute capacity, develop and deploy applications faster, and run applications on AWS's reliable computing environment. You have the control of your computing resources and can access various configurations of CPU, Memory, Storage, and Networking capacity for your instances.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@EC2 - User Guide](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/concepts.html)
|
||||
- [@video@Introduction to Amazon EC2](https://www.youtube.com/watch?v=eaicwmnSdCs)
|
||||
@@ -1 +1,7 @@
|
||||
# Amazon RDS (Database)
|
||||
# Amazon RDS (Database)
|
||||
|
||||
Amazon RDS (Relational Database Service) is a web service from Amazon Web Services. It's designed to simplify the setup, operation, and scaling of relational databases in the cloud. This service provides cost-efficient, resizable capacity for an industry-standard relational database and manages common database administration tasks. RDS supports six database engines: Amazon Aurora, PostgreSQL, MySQL, MariaDB, Oracle Database, and SQL Server. These engines give you the ability to run instances ranging from 5GB to 6TB of memory, accommodating your specific use case. It also ensures the database is up-to-date with the latest patches, automatically backs up your data and offers encryption at rest and in transit.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Amazon RDS](https://aws.amazon.com/rds/)
|
||||
@@ -1 +1,7 @@
|
||||
# Amazon RDS (Database)
|
||||
# Amazon RDS (Database)
|
||||
|
||||
Amazon RDS (Relational Database Service) is a web service from Amazon Web Services. It's designed to simplify the setup, operation, and scaling of relational databases in the cloud. This service provides cost-efficient, resizable capacity for an industry-standard relational database and manages common database administration tasks. RDS supports six database engines: Amazon Aurora, PostgreSQL, MySQL, MariaDB, Oracle Database, and SQL Server. These engines give you the ability to run instances ranging from 5GB to 6TB of memory, accommodating your specific use case. It also ensures the database is up-to-date with the latest patches, automatically backs up your data and offers encryption at rest and in transit.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Amazon RDS](https://aws.amazon.com/rds/)
|
||||
|
||||
@@ -1 +1,8 @@
|
||||
# Amazon Redshift
|
||||
# Amazon Redshift
|
||||
|
||||
Amazon Redshift is a cloud-based data warehouse service from Amazon that lets you store and analyze large amounts of data quickly. It’s designed for running complex queries on huge datasets, so businesses can use it to turn raw data into useful reports and insights. You can load data into Redshift from many sources, and then use SQL to explore it, just like you would with a regular database — but it’s optimized to handle much bigger data and run faster.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Amazon Redshift](https://aws.amazon.com/redshift/)
|
||||
- [@video@Getting Started with Amazon Redshift - AWS Online Tech Talks](https://www.youtube.com/watch?v=dfo4J5ZhlKI)
|
||||
@@ -1 +1,7 @@
|
||||
# Apache Airflow
|
||||
# Apache Airflow
|
||||
|
||||
Apache Airflow is an open-source tool that helps you schedule, organize, and monitor workflows. Think of it like a to-do list for your data tasks, but smarter — you can set tasks to run in a specific order, track their progress, and see what happens if something fails. It’s often used for automating data pipelines so that data moves, gets processed, and is ready for use without manual work.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Apache Airflow](https://airflow.apache.org/)
|
||||
@@ -1 +1,7 @@
|
||||
# Apache Hadoop YARN
|
||||
# Apache Hadoop YARN
|
||||
|
||||
Apache Hadoop YARN (Yet Another Resource Negotiator) is the part of Hadoop that manages resources and runs jobs on a cluster. It has a ResourceManager that controls all cluster resources and an ApplicationMaster for each job that schedules and runs tasks. YARN lets different tools like MapReduce and Spark share the same cluster, making it more efficient, flexible, and reliable.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@video@Hadoop Yarn Tutorial](https://www.youtube.com/watch?v=6bIF9VwRwE0)
|
||||
@@ -1 +1,12 @@
|
||||
# Apache Kafka
|
||||
# Apache Kafka
|
||||
|
||||
Apache Kafka is an open-source stream-processing software platform developed by LinkedIn and donated to the Apache Software Foundation. It is written in Scala and Java and operates based on a message queue, designed to handle real-time data feeds. Kafka functions as a kind of message broker service in between the data producers and the consumers, facilitating efficient transmission of data. It can be viewed as a durable message broker where applications can process and reprocess streamed data. Kafka is a highly scalable and fault-tolerant system which ensures data delivery without loss.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Apache Kafka](https://kafka.apache.org/quickstart)
|
||||
- [@article@Apache Kafka Streams](https://docs.confluent.io/platform/current/streams/concepts.html)
|
||||
- [@article@Kafka Streams Confluent](https://kafka.apache.org/documentation/streams/)
|
||||
- [@video@Apache Kafka Fundamentals](https://www.youtube.com/watch?v=B5j3uNBH8X4)
|
||||
- [@video@Kafka in 100 Seconds](https://www.youtube.com/watch?v=uvb00oaa3k8)
|
||||
- [@feed@Explore top posts about Kafka](https://app.daily.dev/tags/kafka?ref=roadmapsh)
|
||||
@@ -1 +1,9 @@
|
||||
# Apache Spark
|
||||
# Apache Spark
|
||||
|
||||
Apache Spark is an open-source distributed computing system designed for big data processing and analytics. It offers a unified interface for programming entire clusters, enabling efficient handling of large-scale data with built-in support for data parallelism and fault tolerance. Spark excels in processing tasks like batch processing, real-time data streaming, machine learning, and graph processing. It’s known for its speed, ease of use, and ability to process data in-memory, significantly outperforming traditional MapReduce systems. Spark is widely used in big data ecosystems for its scalability and versatility across various data processing tasks.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@ApacheSpark](https://spark.apache.org/documentation.html)
|
||||
- [@article@Spark By Examples](https://sparkbyexamples.com)
|
||||
- [@feed@Explore top posts about Apache Spark](https://app.daily.dev/tags/spark?ref=roadmapsh)
|
||||
@@ -1 +1,8 @@
|
||||
# APIs
|
||||
# APIs and Data Collection
|
||||
|
||||
Application Programming Interfaces, better known as APIs, play a fundamental role in the work of data engineers, particularly in the process of data collection. APIs are sets of protocols, routines, and tools that enable different software applications to communicate with each other. An API allows developers to interact with a service or platform through a defined set of rules and endpoints, enabling data exchange and functionality use without needing to understand the underlying code. In data engineering, APIs are used extensively to collect, exchange, and manipulate data from different sources in a secure and efficient manner.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is an API?](https://aws.amazon.com/what-is/api/)
|
||||
- [@article@A Beginner's Guide to APIs](https://www.postman.com/what-is-an-api/)
|
||||
@@ -1 +1,10 @@
|
||||
# ArgoCD
|
||||
# ArgoCD
|
||||
|
||||
Argo CD is a continuous delivery tool for Kubernetes that is based on the GitOps methodology. It is used to automate the deployment and management of cloud-native applications by continuously synchronizing the desired application state with the actual application state in the production environment. In an Argo CD workflow, changes to the application are made by committing code or configuration changes to a Git repository. Argo CD monitors the repository and automatically deploys the changes to the production environment using a continuous delivery pipeline. The pipeline is triggered by changes to the Git repository and is responsible for building, testing, and deploying the changes to the production environment. Argo CD is designed to be a simple and efficient way to manage cloud-native applications, as it allows developers to make changes to the system using familiar tools and processes and it provides a clear and auditable history of all changes to the system. It is often used in conjunction with tools such as Helm to automate the deployment and management of cloud-native applications.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Argo CD - Argo Project](https://argo-cd.readthedocs.io/en/stable/)
|
||||
- [@video@ArgoCD Tutorial for Beginners](https://www.youtube.com/watch?v=MeU5_k9ssrs)
|
||||
- [@video@What is ArgoCD](https://www.youtube.com/watch?v=p-kAqxuJNik)
|
||||
- [@feed@Explore top posts about ArgoCD](https://app.daily.dev/tags/argocd?ref=roadmapsh)
|
||||
@@ -1 +1,10 @@
|
||||
# Async vs Sync Communication
|
||||
# Async vs Sync Communication
|
||||
|
||||
Synchronous and asynchronous data refer to different approaches in data transmission and processing. **Synchronous** ingestion is a process where the system waits for a response from the data source before proceeding. In contrast, **asynchronous** ingestion is a process where data is ingested without waiting for a response from the data source. Normally, data is queued in a buffer and sent in batches for efficiency.
|
||||
|
||||
Each approach has its benefits and drawbacks, and the choice depends on the specific requirements of the data ingestion process and the business needs.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@Synchronous And Asynchronous Data Transmission: The Differences And How to Use Them](https://www.computer.org/publications/tech-news/trends/synchronous-asynchronous-data-transmission)
|
||||
- [@article@Synchronous vs Asynchronous Communication: What’s the Difference?](https://www.getguru.com/reference/synchronous-vs-asynchronous-communication)
|
||||
@@ -1 +1,8 @@
|
||||
# Aurora DB
|
||||
# Aurora DB
|
||||
|
||||
Amazon Aurora (Aurora) is a fully managed relational database engine that's compatible with MySQL and PostgreSQL. Aurora includes a high-performance storage subsystem. Its MySQL- and PostgreSQL-compatible database engines are customized to take advantage of that fast distributed storage. The underlying storage grows automatically as needed. Aurora also automates and standardizes database clustering and replication, which are typically among the most challenging aspects of database configuration and administration.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@SAmazon Aurora](https://aws.amazon.com/rds/aurora/)
|
||||
- [@article@SAmazon Aurora: What It Is, How It Works, and How to Get Started](https://www.datacamp.com/tutorial/amazon-aurora)
|
||||
@@ -1 +1,8 @@
|
||||
# Authentication vs Authorization
|
||||
# Authentication vs Authorization
|
||||
|
||||
Authentication and authorization are popular terms in modern computer systems that often confuse people. **Authentication** is the process of confirming the identity of a user or a device (i.e., an entity). During the authentication process, an entity usually relies on some proof to authenticate itself, i.e. an authentication factor. In contrast to authentication, **authorization** refers to the process of verifying what resources entities (users or devices) can access, or what actions they can perform, i.e., their access rights.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@Basic Authentication](https://roadmap.sh/guides/basic-authentication)
|
||||
- [@article@What is Authentication vs Authorization?](https://auth0.com/intro-to-iam/authentication-vs-authorization)
|
||||
@@ -1 +1,11 @@
|
||||
# AWS CDK
|
||||
# AWS CDK
|
||||
|
||||
The AWS Cloud Development Kit (AWS CDK) is an open-source software development framework used to provision cloud infrastructure resources in a safe, repeatable manner through AWS CloudFormation. AWS CDK offers the flexibility to write infrastructure as code in popular languages like Python, Java, Go, and C#.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@course@AWS CDK Crash Course for Beginners](https://www.youtube.com/watch?v=D4Asp5g4fp8)
|
||||
- [@official@AWS CDK](https://aws.amazon.com/cdk/)
|
||||
- [@official@AWS CDK Documentation](https://docs.aws.amazon.com/cdk/index.html)
|
||||
- [@opensource@AWS CDK Examples](https://github.com/aws-samples/aws-cdk-examples)
|
||||
- [@feed@Explore top posts about AWS](https://app.daily.dev/tags/aws?ref=roadmapsh)
|
||||
@@ -1 +1,8 @@
|
||||
# AWS EKS
|
||||
# EKS
|
||||
|
||||
Amazon Elastic Kubernetes Service (EKS) is a managed service that simplifies the deployment, management, and scaling of containerized applications using Kubernetes, an open-source container orchestration platform. EKS manages the Kubernetes control plane for the user, making it easy to run Kubernetes applications without the operational overhead of maintaining the Kubernetes control plane. With EKS, you can leverage AWS services such as Auto Scaling Groups, Elastic Load Balancer, and Route 53 for resilient and scalable application infrastructure. Additionally, EKS can support Spot and On-Demand instances use, and includes integrations with AWS App Mesh service and AWS Fargate for serverless compute.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Amazon Elastic Kubernetes Service (EKS)](https://aws.amazon.com/eks/)
|
||||
- [@official@Concepts of Amazon EKS](https://docs.aws.amazon.com/eks/)
|
||||
@@ -1 +1,9 @@
|
||||
# AWS SNS
|
||||
# AWS SNS
|
||||
|
||||
Amazon Simple Notification Service (Amazon SNS) is a web service that makes it easy to set up, operate, and send notifications from the cloud. It provides developers with a highly scalable, flexible, and cost-effective capability to publish messages from an application and immediately deliver them to subscribers or other applications. It is designed to make web-scale computing easier for developers. Amazon SNS follows the “publish-subscribe” (pub-sub) messaging paradigm, with notifications being delivered to clients using a “push” mechanism that eliminates the need to periodically check or “poll” for new information and updates. With simple APIs requiring minimal up-front development effort, no maintenance or management overhead and pay-as-you-go pricing, Amazon SNS gives developers an easy mechanism to incorporate a powerful notification system with their applications.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Amazon Simple Notification Service (SNS) ](http://aws.amazon.com/sns/)
|
||||
- [@official@Send Fanout Event Notifications](https://aws.amazon.com/getting-started/hands-on/send-fanout-event-notifications/)
|
||||
- [@article@What is Pub/Sub Messaging?](https://aws.amazon.com/what-is/pub-sub-messaging/)
|
||||
@@ -1 +1,9 @@
|
||||
# AWS SQS
|
||||
# AWS SQS
|
||||
|
||||
Amazon Simple Queue Service (Amazon SQS) offers a secure, durable, and available hosted queue that lets you integrate and decouple distributed software systems and components. Amazon SQS offers common constructs such as dead-letter queues and cost allocation tags. It provides a generic web services API that you can access using any programming language that the AWS SDK supports.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Amazon Simple Queue Service](https://aws.amazon.com/sqs/)
|
||||
- [@official@What is Amazon Simple Queue Service?](https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/welcome.html)
|
||||
- [@article@Amazon Simple Queue Service (SQS): A Comprehensive Tutorial](https://www.datacamp.com/tutorial/amazon-sqs)
|
||||
@@ -1 +1,9 @@
|
||||
# Azure Blob Storage
|
||||
# Azure Blob Storage
|
||||
|
||||
Azure Blob Storage is Microsoft's object storage solution for the cloud. “Blob” stands for Binary Large Object, a term used to describe storage for unstructured data like text, images, and video. Azure Blob Storage is Microsoft Azure’s solution for storing these blobs in the cloud. It offers flexible storage—you only pay based on your usage. Depending on the access speed you need for your data, you can choose from various storage tiers (hot, cool, and archive). Being cloud-based, it is scalable, secure, and easy to manage.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs)
|
||||
- [@official@Introduction to Azure Blob Storage](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction)
|
||||
- [@video@A Beginners Guide to Azure Blob Storage](https://www.youtube.com/watch?v=ah1XqItWkuc&t=300s)
|
||||
@@ -1 +1,10 @@
|
||||
# Azure SQL Database
|
||||
# Azure SQL Database
|
||||
|
||||
Azure SQL Database is a fully managed Platform as a Service (PaaS) offering. It abstracts the underlying infrastructure, enabling developers to focus on building and deploying applications without worrying about database maintenance tasks.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Azure SQL Database](https://azure.microsoft.com/en-us/products/azure-sql/database)
|
||||
- [@official@What is Azure SQL Database?](https://learn.microsoft.com/en-us/azure/azure-sql/database/sql-database-paas-overview?view=azuresql)
|
||||
- [@article@Azure SQL Database: Step-by-Step Setup and Management](https://www.datacamp.com/tutorial/azure-sql-database)
|
||||
- [@video@Azure SQL for Beginners](https://www.youtube.com/playlist?list=PLlrxD0HtieHi5c9-i_Dnxw9vxBY-TqaeN)
|
||||
@@ -1 +1,9 @@
|
||||
# Azure Virtual Machines
|
||||
# Azure Virtual Machines
|
||||
|
||||
Azure Virtual Machines (VMs) enable virtualization without requiring hardware investments. They provide customizable environments for development, testing, and cloud applications so you can run different operating systems like Ubuntu on a Windows host based on your needs. One of the key advantages of Azure VMs is the pay-as-you-go pricing model. It allows you to scale resources up or down as needed, ensuring cost efficiency without wasting resources.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Azure Virtual Machines](https://azure.microsoft.com/en-us/products/virtual-machines)
|
||||
- [@official@Virtual Machines in Azure](https://learn.microsoft.com/en-us/azure/virtual-machines/overview)
|
||||
- [@video@AVirtual Machines in Azure | Beginner's Guide](https://www.youtube.com/watch?v=_abaWXoQFZU)
|
||||
@@ -1 +1,8 @@
|
||||
# Batch
|
||||
# Batch
|
||||
|
||||
Batch processing is a method in which large volumes of collected data are processed in chunks or batches. This approach is especially effective for resource-intensive jobs, repetitive tasks, and managing extensive datasets where real-time processing isn’t required. It is ideal for applications like data warehousing, ETL (Extract, Transform, Load), and large-scale reporting. Data batch processing is mainly automated, requiring minimal human interaction once the process is set up. Tasks are predefined, and the system executes them according to a scheduled timeline, typically during off-peak hours when computing resources are readily available.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is Batch Processing?](https://aws.amazon.com/what-is/batch-processing/)
|
||||
- [@article@Batch And Streaming Demystified For Unification](https://towardsdatascience.com/batch-and-streaming-demystified-for-unification-dee0b48f921d/)
|
||||
@@ -1 +1,15 @@
|
||||
# Best Practices
|
||||
# Best Practices
|
||||
|
||||
1. **Ensure Reliability.** A robust messaging system must guarantee that messages aren’t lost, even during node failures or network issues. This means using acknowledgments, replication across multiple brokers, and durable storage on disk. These measures ensure that producers and consumers can recover seamlessly without data loss when something goes wrong.
|
||||
|
||||
2. **Design for Scalability.** Scalability should be baked in from the start. Partition topics strategically to distribute load across brokers and consumer groups, enabling horizontal scaling.
|
||||
|
||||
3. **Maintain Message Ordering.** For systems that depend on message sequence, ensure ordering within partitions and design producers to consistently route related messages to the same partition.
|
||||
|
||||
4. **Secure Communication.** Messaging queues often carry sensitive data, so encrypt messages both in transit and at rest. Implement authentication techniques to ensure only trusted clients can publish or consume, and enforce authorization rules to limit access to specific topics or operations.
|
||||
|
||||
5. **Monitor & Alert.** Continuous visibility into your messaging system is essential. Track metrics such as message lag, throughput, consumer group health, and broker disk usage. Set alerts for abnormal patterns, like growing lag or dropped connections, so you can respond before they affect downstream systems.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@Best Practices for Message Queue Architecture](https://abhishek-patel.medium.com/best-practices-for-message-queue-architecture-f69d47e3565)
|
||||
@@ -1 +1,11 @@
|
||||
# Big Data Tools
|
||||
# Big Data Tools
|
||||
|
||||
Big data tools are specialized software and platforms designed to handle the massive volume, velocity, and variety of data that traditional data processing tools cannot effectively manage. These tools provide the infrastructure, frameworks, and capabilities to process, analyze, and extract meaningful knowledge from vast datasets. They are essential for modern data-driven organizations seeking to gain insights, make informed decisions, and achieve a competitive advantage.
|
||||
|
||||
Hadoop and Spark are two of the most prominent frameworks in big data they handle the processing of large-scale data in very different ways. While Hadoop can be credited with democratizing the distributed computing paradigm through a robust storage system called HDFS and a computational model called MapReduce, Spark is changing the game with its in-memory architecture and flexible programming model.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is Big Data?](https://cloud.google.com/learn/what-is-big-data?hl=en)
|
||||
- [@article@Hadoop vs Spark: Which Big Data Framework Is Right For You?](https://www.datacamp.com/blog/hadoop-vs-spark)
|
||||
- [@video@introduction to Big Data with Spark and Hadoop](http://youtube.com/watch?v=vHlwg4ciCsI&t=80s&ab_channel=freeCodeAcademy)
|
||||
@@ -1 +1,8 @@
|
||||
# BigTable
|
||||
# BigTable
|
||||
|
||||
Bigtable is a high-performance, scalable database that excels at capturing, processing, and analyzing data in real-time. It aggregates data as it's written, providing immediate insights into user behavior, A/B testing results, and engagement metrics. This real-time capability also fuels AI/ML models for interactive applications. Bigtable integrates seamlessly with both Dataflow, enriching streaming pipelines with low-latency lookups, and BigQuery, enabling real-time serving of analytics in user facing application and ad-hoc querying on the same data.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Bigtable: Fast, Flexible NoSQL](https://cloud.google.com/bigtable?hl=en#scale-your-latency-sensitive-applications-with-the-nosql-pioneer)
|
||||
- [@article@Google Bigtable](https://www.techtarget.com/searchdatamanagement/definition/Google-BigTable)
|
||||
@@ -1 +1,11 @@
|
||||
# Business Intelligence
|
||||
# Business Intelligence
|
||||
|
||||
Business intelligence encompasses a set of techniques and technologies to transform raw data into meaningful insights that drive strategic decision-making within an organization. BI tools enable business users to access different types of data, historical and current, third-party and in-house, as well as semistructured data and unstructured data such as social media. Users can analyze this information to gain insights into how the business is performing and what it should do next.
|
||||
|
||||
BI platforms traditionally rely on data warehouses for their baseline information. The strength of a data warehouse is that it aggregates data from multiple data sources into one central system to support business data analytics and reporting. BI presents the results to the user in the form of reports, charts and maps, which might be displayed through a dashboard.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is business intelligence (BI)?](https://www.ibm.com/think/topics/business-intelligence)
|
||||
- [@article@Business intelligence: A complete overview](https://www.tableau.com/business-intelligence/what-is-business-intelligence)
|
||||
- [@video@What is business intelligence?](https://www.youtube.com/watch?v=l98-BcB3UIE)
|
||||
@@ -1 +1,10 @@
|
||||
# CAP Theorem
|
||||
# CAP Theorem
|
||||
|
||||
The CAP Theorem, also known as Brewer's Theorem, is a fundamental principle in distributed database systems. It states that in a distributed system, it's impossible to simultaneously guarantee all three of the following properties: Consistency (all nodes see the same data at the same time), Availability (every request receives a response, without guarantee that it contains the most recent version of the data), and Partition tolerance (the system continues to operate despite network failures between nodes). According to the theorem, a distributed system can only strongly provide two of these three guarantees at any given time. This principle guides the design and architecture of distributed systems, influencing decisions on data consistency models, replication strategies, and failure handling. Understanding the CAP Theorem is crucial for designing robust, scalable distributed systems and for choosing appropriate database solutions for specific use cases in distributed computing environments.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is CAP Theorem?](https://www.bmc.com/blogs/cap-theorem/)
|
||||
- [@article@An Illustrated Proof of the CAP Theorem](https://mwhittaker.github.io/blog/an_illustrated_proof_of_the_cap_theorem/)
|
||||
- [@article@CAP Theorem and its applications in NoSQL Databases](https://www.ibm.com/uk-en/cloud/learn/cap-theorem)
|
||||
- [@video@What is CAP Theorem?](https://www.youtube.com/watch?v=_RbsFXWRZ10)
|
||||
@@ -1 +1,10 @@
|
||||
# Cassandra
|
||||
# Cassandra
|
||||
|
||||
Apache Cassandra is a highly scalable, distributed NoSQL database designed to handle large amounts of structured data across multiple commodity servers. It provides high availability with no single point of failure, offering linear scalability and proven fault-tolerance on commodity hardware or cloud infrastructure. Cassandra uses a masterless ring architecture, where all nodes are equal, allowing for easy data distribution and replication. It supports flexible data models and can handle both unstructured and structured data. Cassandra excels in write-heavy environments and is particularly suitable for applications requiring high throughput and low latency. Its data model is based on wide column stores, offering a more complex structure than key-value stores. Widely used in big data applications, Cassandra is known for its ability to handle massive datasets while maintaining performance and reliability.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Apache Cassandra](https://cassandra.apache.org/_/index.html)
|
||||
- [@article@article@Cassandra - Quick Guide](https://www.tutorialspoint.com/cassandra/cassandra_quick_guide.htm)
|
||||
- [@video@Apache Cassandra - Course for Beginners](https://www.youtube.com/watch?v=J-cSy5MeMOA)
|
||||
- [@feed@Explore top posts about Backend Development](https://app.daily.dev/tags/backend?ref=roadmapsh)
|
||||
@@ -1 +1,10 @@
|
||||
# Census
|
||||
# Census
|
||||
|
||||
Census is a reverse ETL platform that synchronizes data from a data warehouse to various business applications and SaaS apps like Salesforce and Hubspot. It's a crucial part of the modern data stack, enabling businesses to operationalize their data by making it available in the tools where teams work, like CRMs, marketing platforms, and more.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@Census](https://www.getcensus.com/reverse-etl)
|
||||
- [@official@Census Documentation](https://developers.getcensus.com/getting-started/introduction)
|
||||
- [@article@A starter guide to reverse ETL with Census](https://www.getcensus.com/blog/starter-guide-for-first-time-census-users)
|
||||
- [@video@How to "Reverse ETL" with Census](https://www.youtube.com/watch?v=XkS7DQFHzbA)
|
||||
@@ -1 +1,16 @@
|
||||
# Choosing the Right Technologies
|
||||
# Choosing the Right Technologies
|
||||
|
||||
The data engineering ecosystem is rapidly expanding, and selecting the right technologies for your use case can be challenging. Below you can find some considerations for choosing data technologies across the data engineering lifecycle:
|
||||
|
||||
* **Team size and capabilities.** Your team's size will determine the amount of bandwidth your team can dedicate to complex solutions. For small teams, try to stick to simple solutions and technologies your team is familiar with.
|
||||
* **Interoperability**. When choosing a technology or system, you’ll need to ensure that it interacts and operates smoothly with other technologies.
|
||||
* **Cost optimization and business value,** Consider direct and indirect costs of a technology and the opportunity cost of choosing some technologies over others.
|
||||
* **Location** Companies have many options when it comes to choosing where to run their technology stack, including cloud providers, on-premises systems, hybrid clouds, and multicloud.
|
||||
* **Build versus buy**. Depending on your needs and capabilities, you can either invest in building your own technologies, implement open-source solutions, or purchase proprietary solutions and services.
|
||||
* **Server versus serverless**. Depending on your needs, you may prefer server-based setups, where developers manage servers, or serverless systems, which translates the server management to cloud providers, allowing developers to focus solely on writing code.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@book@Fundamentals of Data Engineering](https://www.oreilly.com/library/view/fundamentals-of-data/9781098108298/)
|
||||
- [@article@Build hybrid and multicloud architectures using Google Cloud](https://cloud.google.com/architecture/hybrid-multicloud-patterns)
|
||||
- [@article@The Unfulfilled Promise of Serverless](https://www.lastweekinaws.com/blog/the-unfulfilled-promise-of-serverless/)
|
||||
@@ -1 +1,11 @@
|
||||
# CI/CD
|
||||
# CI / CD
|
||||
|
||||
**Continuous Integration** is a software development method where team members integrate their work at least once daily. An automated build checks every integration to detect errors in this method. In Continuous Integration, the software is built and tested immediately after a code commit. In a large project with many developers, commits are made many times during the day. With each commit, code is built and tested.
|
||||
|
||||
**Continuous Delivery** is a software engineering method in which a team develops software products in a short cycle. It ensures that software can be easily released at any time. The main aim of continuous delivery is to build, test, and release software with good speed and frequency. It helps reduce the cost, time, and risk of delivering changes by allowing for frequent updates in production.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is CI/CD? Continuous Integration and Continuous Delivery](https://www.guru99.com/continuous-integration.html)
|
||||
- [@article@Continuous Integration vs Delivery vs Deployment](https://www.guru99.com/continuous-integration-vs-delivery-vs-deployment.html)
|
||||
- [@article@CI/CD Pipeline: Learn with Example](https://www.guru99.com/ci-cd-pipeline.html)
|
||||
@@ -1 +1,10 @@
|
||||
# Circle CI
|
||||
# CircleCI
|
||||
|
||||
CircleCI is a CI/CD service that can be integrated with GitHub, BitBucket and GitLab repositories. The service that can be used as a SaaS offering or self-managed using your own resources.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@CircleCI](https://circleci.com/)
|
||||
- [@official@CircleCI Documentation](https://circleci.com/docs)
|
||||
- [@official@Configuration Tutorial](https://circleci.com/docs/config-intro)
|
||||
- [@feed@Explore top posts about CI/CD](https://app.daily.dev/tags/cicd?ref=roadmapsh)
|
||||
@@ -1 +1,15 @@
|
||||
# Cloud Architectures
|
||||
# Cloud Architectures
|
||||
|
||||
Cloud architecture refers to how various cloud technology components, such as hardware, virtual resources, software capabilities, and virtual network systems interact and connect to create cloud computing environments. Cloud architecture dictates how components are integrated so that you can pool, share, and scale resources over a network. It acts as a blueprint that defines the best way to strategically combine resources to build a cloud environment for a specific business need.
|
||||
|
||||
Cloud architecture components can included, among others:
|
||||
|
||||
* A frontend platform
|
||||
* A backend platform
|
||||
* A cloud-based delivery model
|
||||
* A network (internet, intranet, or intercloud)
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is cloud architecture? - Google](https://cloud.google.com/learn/what-is-cloud-architecture)
|
||||
- [@video@WWhat is Cloud Architecture and Common Models?](https://www.youtube.com/watch?v=zTP-bx495hU)
|
||||
@@ -1 +1,9 @@
|
||||
# Cloud Computing
|
||||
# Cloud Computing
|
||||
|
||||
**Cloud Computing** refers to the delivery of computing services over the internet rather than using local servers or personal devices. These services include servers, storage, databases, networking, software, analytics, and intelligence. Cloud Computing enables faster innovation, flexible resources, and economies of scale. There are various types of cloud computing such as public clouds, private clouds, and hybrids clouds. Furthermore, it's divided into different services like Infrastructure as a Service (IaaS), Platform as a Service (PaaS), and Software as a Service (SaaS). These services differ mainly in the level of control an organization has over their data and infrastructures.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@Cloud Computing - IBM](https://www.ibm.com/think/topics/cloud-computing)
|
||||
- [@article@What is Cloud Computing? - Azure](https://azure.microsoft.com/en-gb/resources/cloud-computing-dictionary/what-is-cloud-computing)
|
||||
- [@video@What is Cloud Computing? - Amazon Web Services](https://www.youtube.com/watch?v=mxT233EdY5c)
|
||||
@@ -1 +1,9 @@
|
||||
# Cloud SQL (Database)
|
||||
# Cloud SQL (Database)
|
||||
|
||||
Google Cloud SQL is a fully-managed, cost-effective and scalable database service that makes it easy to set-up, maintain, manage and administer MySQL, PostgreSQL, and SQL Server databases in the cloud. Hosted on Google Cloud Platform, Cloud SQL provides a database infrastructure for applications running anywhere.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@course@Cloud SQL](https://www.cloudskillsboost.google/course_templates/701)
|
||||
- [@official@Cloud SQL](https://cloud.google.com/sql)
|
||||
- [@official@Cloud SQL overview](https://cloud.google.com/sql/docs/introduction)
|
||||
@@ -1 +1,3 @@
|
||||
# Cluster Computing Basics
|
||||
# Cluster Computing Basics
|
||||
|
||||
Cluster computing is the process of using multiple computing nodes, called clusters, to increase processing power for solving complex problems, such as Big Data analytics and AI model training. These tasks require parallel processing of millions of data points for complex classification and prediction tasks. Cluster computing technology coordinates multiple computing nodes, each with its own CPUs, GPUs, and internal memory, to work together on the same data processing task. Applications on cluster computing infrastructure run as if on a single machine and are unaware of the underlying system complexities.
|
||||
@@ -1 +1,5 @@
|
||||
# Cluster Management Tools
|
||||
# Cluster Management Tools
|
||||
|
||||
Cluster management software maximizes the work that a cluster of computers can perform. A cluster manager balances workload to reduce bottlenecks, monitors the health of the elements of the cluster, and manages failover when an element fails. A cluster manager can also help a system administrator to perform administration tasks on elements in the cluster.
|
||||
|
||||
Some of the most popular Cluster Management Tools are Kubernetes and Apache Hadoop YARN.
|
||||
@@ -1 +1,9 @@
|
||||
# Column
|
||||
# Column
|
||||
|
||||
A columnar database is a type of No-SQL database that stores data by columns instead of by rows. In a traditional SQL database, all the information for one record is stored together, but in a columnar database, all the values for a single column are stored together. This makes it much faster to read and analyze large amounts of data, especially when you only need a few columns instead of the whole record. For example, if you want to quickly find the average sales price from millions of rows, a columnar database can scan just the "price" column instead of every piece of data. This design is often used in data warehouses and analytics systems because it speeds up queries and saves storage space through better compression.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What are columnar databases? Here are 35 examples.](https://www.tinybird.co/blog-posts/what-is-a-columnar-database)
|
||||
- [@article@Columnar Databases](https://www.techtarget.com/searchdatamanagement/definition/columnar-database)
|
||||
- [@video@WWhat is a Columnar Database? (vs. Row-oriented Database)](https://www.youtube.com/watch?v=1MnvuNg33pA)
|
||||
@@ -1 +1,9 @@
|
||||
# Compute Engine (Compute)
|
||||
# Compute Engine (Compute)
|
||||
|
||||
Compute Engine is a computing and hosting service that lets you create and run virtual machines on Google infrastructure. Compute Engine offers scale, performance, and value that lets you easily launch large compute clusters on Google's infrastructure. There are no upfront investments, and you can run thousands of virtual CPUs on a system that offers quick, consistent performance. You can configure and control Compute Engine resources using the Google Cloud console, the Google Cloud CLI, or using a REST-based API. You can also use a variety of programming languages to run Compute Engine, including Python, Go, and Java.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@course@The Basics of Google Cloud Compute](https://www.cloudskillsboost.google/course_templates/754)
|
||||
- [@official@Compute Engine overview](https://cloud.google.com/compute/docs/overview)
|
||||
- [@video@WCompute Engine in a minute](https://www.youtube.com/watch?v=IuK4gQeHRcI)
|
||||
@@ -1 +1,14 @@
|
||||
# Containers & Orchestration
|
||||
# Containers & Orchestration
|
||||
|
||||
**Containers** are lightweight, portable, and isolated environments that package applications and their dependencies, enabling consistent deployment across different computing environments. They encapsulate software code, runtime, system tools, libraries, and settings, ensuring that the application runs the same regardless of where it's deployed. Containers share the host operating system's kernel, making them more efficient than traditional virtual machines.
|
||||
|
||||
**Orchestration** refers to the automated coordination and management of complex IT systems. It involves combining multiple automated tasks and processes into a single workflow to achieve a specific goal. Orchestration is one of the key components of any software development process and it should never be avoided nor preferred over manual configuration. As an automation practice, orchestration helps to remove the chance of human error from the different steps of the data engineering lifecycle. This is all to ensure efficient resource utilization and consistency.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What are Containers?](https://cloud.google.com/learn/what-are-containers)
|
||||
- [@article@Containers - The New Stack](https://thenewstack.io/category/containers/)
|
||||
- [@article@An Introduction to Data Orchestration: Process and Benefits](https://www.datacamp.com/blog/introduction-to-data-orchestration-process-and-benefits)
|
||||
- [@article@What is Container Orchestration?](https://www.redhat.com/en/topics/containers/what-is-container-orchestration)
|
||||
- [@video@What are Containers?](https://www.youtube.com/playlist?list=PLawsLZMfND4nz-WDBZIj8-nbzGFD4S9oz)
|
||||
- [@video@Why You Need Data Orchestration](https://www.youtube.com/watch?v=ZtlS5-G-gng)
|
||||
@@ -1 +1,10 @@
|
||||
# CosmosDB
|
||||
# CosmosDB
|
||||
|
||||
Azure Cosmos DB is a native No-SQL database service and vector database for working with the document data model. It can arbitrarily store native JSON documents with flexible schema. Data is indexed automatically and is available for query using a flavor of the SQL query language designed for JSON data. It also supports vector search. You can access the API using SDKs for popular frameworks such [as.NET](http://as.NET), Python, Java, and Node.js.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@What are Containers?](https://azure.microsoft.com/en-us/products/cosmos-db#FAQ)
|
||||
- [@official@CAzure Cosmos DB - Database for the AI Era](https://learn.microsoft.com/en-us/azure/cosmos-db/introduction)
|
||||
- [@article@CAzure Cosmos DB: A Global-Scale NoSQL Cloud Database](https://www.datacamp.com/tutorial/azure-cosmos-db)
|
||||
- [@video@What is Azure Cosmos DB?](https://www.youtube.com/watch?v=hBY2YcaIOQM&)
|
||||
@@ -1 +1,9 @@
|
||||
# CouchDB
|
||||
# CouchDB
|
||||
|
||||
Apache CouchDB is an open source NoSQL document database that collects and stores data in JSON-based document formats. Unlike relational databases, CouchDB uses a schema-free data model, which simplifies record management across various computing devices, mobile phones and web browsers. In CouchDB, each document is uniquely named in the database, and CouchDB provides a RESTful HTTP API for reading and updating (add, edit, delete) database documents. Documents are the primary unit of data in CouchDB and consist of any number of fields and attachments.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@official@CouchDB](hhttps://couchdb.apache.org/)
|
||||
- [@official@CouchDB Documentation](https://docs.couchdb.org/en/stable/intro/overview.html)
|
||||
- [@article@What is CouchDB?](https://www.ibm.com/think/topics/couchdb)
|
||||
@@ -1 +1,16 @@
|
||||
# Data Analytics
|
||||
# Data Analytics
|
||||
|
||||
Data Analytics involves extracting meaningful insights from raw data to drive decision-making processes. It includes a wide range of techniques and disciplines ranging from the simple data compilation to advanced algorithms and statistical analysis. Data analysts, as ambassadors of this domain, employ these techniques to answer various questions:
|
||||
|
||||
* Descriptive Analytics _(what happened in the past?)_
|
||||
* Diagnostic Analytics _(why did it happened in the past?)_
|
||||
* Predictive Analytics _(what will happen in the future?)_
|
||||
* Prescriptive Analytics _(how can we make it happen?)_
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@course@Introduction to Data Analytics](https://www.coursera.org/learn/introduction-to-data-analytics)
|
||||
- [@article@The 4 Types of Data Analysis: Ultimate Guide](https://careerfoundry.com/en/blog/data-analytics/different-types-of-data-analysis/)
|
||||
- [@article@What is Data Analysis? An Expert Guide With Examples](https://www.datacamp.com/blog/what-is-data-analysis-expert-guide)
|
||||
- [@video@Descriptive vs Diagnostic vs Predictive vs Prescriptive Analytics: What's the Difference?](https://www.youtube.com/watch?v=QoEpC7jUb9k)
|
||||
- [@video@Types of Data Analytics](https://www.youtube.com/watch?v=lsZnSgxMwBA)
|
||||
@@ -1 +1,13 @@
|
||||
# Data Collection Considerations
|
||||
# Data Collection Considerations
|
||||
|
||||
Before designing the technology archecture to collect and store data, you should consider the following factors:
|
||||
|
||||
* **Bounded versus unbounded**. Bounded data has defined start and end points, forming a finite, complete dataset, like the daily sales report. Unbounded data has no predefined limits in time or scope, flowing continuously and potentially indefinitely, such as user interaction events or real-time sensor data. The distinction is critical in data processing, where bounded data is suitable for batch processing, and unbounded data is processed in stream processing or real-time systems.
|
||||
* **Frequency.** Collection processes can be batch, micro-batch, or real-time, depending on the frequency you need to store the data.
|
||||
* **Synchronous versus asynchronous.** Synchronous ingestion is a process where the system waits for a response from the data source before proceeding. In contrast, asynchronous ingestion is a process where data is ingested without waiting for a response from the data source. Each approach has its benefits and drawbacks, and the choice depends on the specific requirements of the data ingestion process and the business needs.
|
||||
* **Throughput and scalability.** As data demands grow, you will need scalable ingestion solutions to keep pace. Scalable data ingestion pipelines ensure that systems can handle increasing data volumes without compromising performance. Without scalable ingestion, data pipelines face challenges like bottlenecks and data loss. Bottlenecks occur when components can't process data fast enough, leading to delays and reduced throughput. Data loss happens when systems are overwhelmed, causing valuable information to be discarded or corrupted.
|
||||
* **Reliability and durability.** Data reliability in the ingestion phase means ensuring that the acquired data from various sources is accurate, consistent, and trustworthy as it enters the data pipeline. Durability entails making sure that data isn’t lost or corrupted during the data collection process.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@book@Fundamentals of Data Engineering](https://www.oreilly.com/library/view/fundamentals-of-data/9781098108298/)
|
||||
@@ -1 +1,16 @@
|
||||
# Data Engineering Lifecycle
|
||||
# Data Engineering Lifecycle
|
||||
|
||||
The data engineering lifecycle encompasses the entire process of transforming raw data into a useful end product. It involves several stages, each with specific roles and responsibilities. This lifecycle ensures that data is handled efficiently and effectively, from its initial generation to its final consumption.
|
||||
|
||||
It involves 4 steps:
|
||||
|
||||
1. Data Generation: Collecting data from various source systems.
|
||||
2. Data Storage: Safely storing data for future processing and analysis.
|
||||
3. Data Ingestion: Transforming and bringing data into a centralized system.
|
||||
4. Data Data Serving: Providing data to end-users for decision-making and operational purposes.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@book@Fundamentals of Data Engineering](https://www.oreilly.com/library/view/fundamentals-of-data/9781098108298/)
|
||||
- [@article@Data Engineering Lifecycle](hhttps://medium.com/towards-data-engineering/data-engineering-lifecycle-d1e7ee81632e)
|
||||
- [@video@Getting Into Data Engineering](https://www.youtube.com/watch?v=hZu_87l62J4)
|
||||
@@ -1 +1,16 @@
|
||||
# Data Engineering Lifecycle
|
||||
# Data Engineering Lifecycle
|
||||
|
||||
The data engineering lifecycle encompasses the entire process of transforming raw data into a useful end product. It involves several stages, each with specific roles and responsibilities. This lifecycle ensures that data is handled efficiently and effectively, from its initial generation to its final consumption.
|
||||
|
||||
It involves 4 steps:
|
||||
|
||||
1. Data Generation: Collecting data from various source systems.
|
||||
2. Data Storage: Safely storing data for future processing and analysis.
|
||||
3. Data Ingestion: Transforming and bringing data into a centralized system.
|
||||
4. Data Data Serving: Providing data to end-users for decision-making and operational purposes.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@book@Fundamentals of Data Engineering](https://www.oreilly.com/library/view/fundamentals-of-data/9781098108298/)
|
||||
- [@article@Data Engineering Lifecycle](hhttps://medium.com/towards-data-engineering/data-engineering-lifecycle-d1e7ee81632e)
|
||||
- [@video@Getting Into Data Engineering](https://www.youtube.com/watch?v=hZu_87l62J4)
|
||||
@@ -1 +1,8 @@
|
||||
# Data Engineering vs Data Science
|
||||
# Data Engineering vs Data Science
|
||||
|
||||
Data engineering and data science are distinct but complementary roles within the field of data. Data engineering focuses on building and maintaining the infrastructure for data collection, storage, and processing, essentially creating the systems that make data available for downstream users. On the other hand, data science professionals, like data analysts and data scientists, uses that data to extract insights, build predictive models, and ultimately inform decision-making.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@Data Scientist vs Data Engineer](https://www.datacamp.com/blog/data-scientist-vs-data-engineer)
|
||||
- [@video@Should You Be a Data Scientist, Analyst or Engineer?](https://www.youtube.com/watch?v=dUnKYhripIE)
|
||||
@@ -1 +1,9 @@
|
||||
# Data Fabric
|
||||
# Data Fabric
|
||||
|
||||
A data fabric is a single environment consisting of a unified architecture with services and technologies running on it that architecture that helps a company manage their data. It enables accessing, ingesting, integrating, and sharing data in a environment where the data can be batched or streamed and be in the cloud or on-prem. The ultimate goal of data fabric is to use all your data to gain better insights into your company and make better business decisions. A data fabric includes building blocks such as data pipeline, data access, data lake, data store, data policy, ingestion framework, and data visualization. These building blocks would be used to build platforms or “products” such as a client data integration platform, data hub, governance framework, and a global semantic layer, giving you centralized governance and standardization
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is a data fabric?](http://ibm.com/think/topics/data-fabric)
|
||||
- [@article@Data Fabric defined](https://www.jamesserra.com/archive/2021/06/data-fabric-defined/)
|
||||
- [@article@How Data Fabric Can Optimize Data Delivery](https://www.gartner.com/en/data-analytics/topics/data-fabric)
|
||||
@@ -1 +1,10 @@
|
||||
# Data Factory (ETL)
|
||||
# Data Factory (ETL)
|
||||
|
||||
Data Factory, most commonly referring to Microsoft's Azure Data Factory, is a cloud-based data integration service that allows you to create, schedule, and orchestrate workflows to move and transform data from various sources into a centralized location for analysis. It provides tools for building Extract, Transform, and Load (ETL) pipelines, enabling businesses to prepare data for analytics, business intelligence, and other data-driven initiatives without extensive coding, thanks to its visual, code-free interface and native connectors.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@course@Microsoft Azure - Data Factory](https://www.coursera.org/learn/microsoft-azure---data-factory)
|
||||
- [@official@What is Azure Data Factory?](https://learn.microsoft.com/en-us/azure/data-factory/introduction)
|
||||
- [@official@Azure Data Factory Documentation](https://learn.microsoft.com/en-gb/azure/data-factory/)
|
||||
- [@official@Azure Data Factory Documentation](https://learn.microsoft.com/en-gb/azure/data-factory/)
|
||||
@@ -1 +1,12 @@
|
||||
# Data Generation
|
||||
# Data Generation
|
||||
|
||||
Data generation refers to the different ways data is produced and generated. Thanks to progress in computing power and storage, as well as technology breakthrough in sensor technology (for example, IoT devices), the number of these so-called source systems is rapidly growing. Data is created in many ways, both analog and digital.
|
||||
|
||||
**Analog data** refers to continuous, real-world information that is represented by a range of values. It can take on any value within a given range and is often used to describe physical quantities like temperature or sounds.
|
||||
|
||||
By contrast, **digital data** is either created by converting analog data to digital form (eg. images or videos) or is the native product of a digital system, such as logs from a mobile app or syntetic data.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@The Concept of Data Generation](https://www.marktechpost.com/2023/02/27/the-concept-of-data-generation/)
|
||||
- [@video@Analog vs. Digital](https://www.youtube.com/watch?v=zzvglgC5ut0)
|
||||
@@ -1 +1,10 @@
|
||||
# Data Hub
|
||||
# Data Hub
|
||||
|
||||
A **data hub** is an architecture that provides a central point for the flow of data between multiple sources and applications, enabling organizations to collect, integrate, and manage data efficiently. Unlike traditional data storage solutions, a data hub’s purpose focuses on data integration and accessibility. The design supports real-time data exchange, which makes accessing, analyzing, and acting on the data faster and easier.
|
||||
|
||||
A data hub differs from a data warehouse in that it is generally unintegrated and often at different grains. It differs from an operational data store because a data hub does not need to be limited to operational data. A data hub differs from a data lake by homogenizing data and possibly serving data in multiple desired formats, rather than simply storing it in one place, and by adding other value to the data such as de-duplication, quality, security, and a standardized set of query services.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@Data hub](https://en.wikipedia.org/wiki/Data_hub)
|
||||
- [@article@What is a Data Hub? Definition, 7 Key Benefits & Why You Might Need One](https://www.cdata.com/blog/what-is-a-data-hub)
|
||||
@@ -1 +1,8 @@
|
||||
# Data Ingestion
|
||||
# Data Ingestion
|
||||
|
||||
Data ingestion is the third step in the data engineering lifecycle. It entails the process of collecting and importing data files from various sources into a database for storage, processing and analysis. The goal of data ingestion is to clean and store data in an accessible and consistent central repository to prepare it for use within the organization.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is Data Ingestion?](https://www.ibm.com/think/topics/data-ingestion)
|
||||
- [@article@WData Ingestion](https://www.qlik.com/us/data-ingestion)
|
||||
@@ -1 +1,8 @@
|
||||
# Data Interoperability
|
||||
# Data Interoperability
|
||||
|
||||
Data interoperability is the ability of diverse systems and applications to access, exchange, and cooperatively use data in a coordinated and meaningful way, even across organizational boundaries. It ensures that data can flow freely, maintaining its integrity and context, allowing for improved efficiency, collaboration, and decision-making by breaking down data silos. Achieving data interoperability often relies on data standards, metadata, and common data elements to define how data is collected, formatted, and interpreted.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@Data Interoperability](https://www.sciencedirect.com/topics/computer-science/data-interoperability)
|
||||
- [@article@What is Data Interoperability? – Exploring the Process and Benefits](https://www.codelessplatforms.com/blog/what-is-data-interoperability/)
|
||||
@@ -1 +1,8 @@
|
||||
# Data Lake
|
||||
# Data lakes
|
||||
|
||||
**Data Lakes** are large-scale data repository systems that store raw, untransformed data, in various formats, from multiple sources. They're often used for big data and real-time analytics requirements. Data lakes preserve the original data format and schema which can be modified as necessary.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@Data Lake Definition](https://azure.microsoft.com/en-gb/resources/cloud-computing-dictionary/what-is-a-data-lake)
|
||||
- [@video@What is a Data Lake?](https://www.youtube.com/watch?v=LxcH6z8TFpI)
|
||||
@@ -1 +1,8 @@
|
||||
# Data Lineage
|
||||
# Data Lineage
|
||||
|
||||
**Data Lineage** refers to the life-cycle of data, including its origins, movements, characteristics and quality. It's a critical component in Data Engineering for tracking the journey of data through every process in a pipeline, from raw input to model output. Data lineage helps in maintaining transparency, ensuring compliance, and facilitating data debugging or tracing data related bugs. It provides a clear representation of data sources, transformations, and dependencies thereby aiding in audits, governance, or reproduction of machine learning models.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is Data Lineage? - IBM](https://www.ibm.com/topics/data-lineage)
|
||||
- [@article@What is Data Lineage? - Datacamp](https://www.datacamp.com/blog/data-lineage)
|
||||
@@ -1 +1,9 @@
|
||||
# Data Mart
|
||||
# Data Mart
|
||||
|
||||
A data mart is a subset of a data warehouse, focused on a specific business function or department. A data mart is streamlined for quicker querying and a more straightforward setup, catering to the specialized needs of a particular team, or function. Data marts only hold data relevant to a specific department or business unit, enabling quicker access to specific datasets, and simpler management
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is a Data Mart?](https://www.ibm.com/think/topics/data-mart)
|
||||
- [@article@WData Mart vs Data Warehouse: a Detailed Comparison](https://www.datacamp.com/blog/data-mart-vs-data-warehouse)
|
||||
- [@video@Data Lake VS Data Warehouse VS Data Marts](https://www.youtube.com/watch?v=w9-WoReNKHk)
|
||||
@@ -1 +1,8 @@
|
||||
# Data Masking
|
||||
# Data Masking
|
||||
|
||||
Data masking is a process that creates a copy of real data but replaces sensitive information with false but realistic-looking data, preserving the format and structure of the original data for non-production uses like software testing, training, and development. The goal is to protect confidential information and ensure compliance with data protection regulations by preventing unauthorized access to real sensitive data without compromising the usability of the data for other business functions.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@Data masking](https://en.wikipedia.org/wiki/Data_masking)
|
||||
- [@article@What is data masking?](https://aws.amazon.com/what-is/data-masking/)
|
||||
@@ -1 +1,9 @@
|
||||
# Data Mesh
|
||||
# Data Mesh
|
||||
|
||||
A data mesh is a modern approach to data architecture that shifts data management from a centralized model to a decentralized one. It emphasizes domain-oriented ownership, where data management aligns with specific business areas. This alignment makes data operations more scalable and flexible, leveraging the knowledge and expertise of those closest to the data. Data mesh is defined by four principles: data domains, data products, self-serve data platform, and federated computational governance.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What Is a Data Mesh? - AWS](https://aws.amazon.com/what-is/data-mesh)
|
||||
- [@article@What Is a Data Mesh? - Datacamp](https://www.datacamp.com/blog/data-mesh)
|
||||
- [@video@Data Mesh Architecture](https://www.datamesh-architecture.com/)
|
||||
@@ -1 +1,13 @@
|
||||
# Data Modelling Techniques
|
||||
# Data Modelling Techniques
|
||||
|
||||
A data model is a specification of data structures and business rules. It creates a visual representation of data and illustrates how different data elements are related to each other. Different techniques are employed depending on the complexity of the data and the goals. Below you can find a list with the most common data modelling techniques:
|
||||
|
||||
* **Entity-relationship modeling.** It's one of the most common techniques used to represent data. It's based on three elements: Entities (objects or things within the system), relationships (how these entities interact with each other), and attributes (properties of the entities).
|
||||
* **Dimensional modeling.** Dimensional modeling is widely used in data warehousing and analytics, where data is often represented in terms of facts and dimensions. This technique simplifies complex data by organizing it into a star or snowflake schema.
|
||||
* **Object-oriented modeling.** Object-oriented modeling is used to represent complex systems, where data and the functions that operate on it are encapsulated as objects. This technique is preferred for modeling applications with complex, interrelated data and behaviors
|
||||
* **NoSQL modeling.** NoSQL modeling techniques are designed for flexible, schema-less databases. These approaches are often used when data structures are less rigid or evolve over time
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@7 data modeling techniques and concepts for business](https://www.techtarget.com/searchdatamanagement/tip/7-data-modeling-techniques-and-concepts-for-business)
|
||||
- [@article@@articleData Modeling Explained: Techniques, Examples, and Best Practices](https://www.datacamp.com/blog/data-modeling)
|
||||
@@ -1 +1,9 @@
|
||||
# Data Normalization
|
||||
# Database Normalization
|
||||
|
||||
Database normalization is the process of structuring a relational database in accordance with a series of so-called normal forms in order to reduce data redundancy and improve data integrity. It was first proposed by Edgar F. Codd as part of his relational model. Normalization entails organizing the columns (attributes) and tables (relations) of a database to ensure that their dependencies are properly enforced by database integrity constraints. It is accomplished by applying some formal rules either by a process of synthesis (creating a new database design) or decomposition (improving an existing database design).
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is Normalization in DBMS (SQL)? 1NF, 2NF, 3NF, BCNF Database with Example](https://www.guru99.com/database-normalization.html)
|
||||
- [@video@Complete guide to Database Normalization in SQL](https://www.youtube.com/watch?v=rBPQ5fg_kiY)
|
||||
- [@feed@Explore top posts about Database](https://app.daily.dev/tags/database?ref=roadmapsh)
|
||||
@@ -1 +1,3 @@
|
||||
# Data Obfuscation
|
||||
# Data Obfuscation
|
||||
|
||||
Statistical data obfuscation involves altering the values of sensitive data in a way that preserves the statistical properties and relationships within the data. It ensures that the masked data maintains the overall distribution, patterns, and correlations of the original data for accurate statistical analysis. Statistical data obfuscation techniques include applying mathematical functions or perturbation algorithms to the data.
|
||||
@@ -1 +1,8 @@
|
||||
# Data Pipelines
|
||||
# Data Pipelines
|
||||
|
||||
Data pipelines are a series of automated processes that transport and transform data from various sources to a destination for analysis or storage. They typically involve steps like data extraction, cleaning, transformation, and loading (ETL) into databases, data lakes, or warehouses. Pipelines can handle batch or real-time data, ensuring that large-scale datasets are processed efficiently and consistently. They play a crucial role in ensuring data integrity and enabling businesses to derive insights from raw data for reporting, analytics, or machine learning.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is a Data Pipeline? - IBM](https://www.ibm.com/topics/data-pipeline)
|
||||
- [@video@What are Data Pipelines?](https://www.youtube.com/watch?v=oKixNpz6jNo)
|
||||
@@ -1 +1,5 @@
|
||||
# Data Quality
|
||||
# Data Quality
|
||||
|
||||
Ensuring quality involves validating the accuracy, completeness, consistency, and reliability of the data collected from each source. The fact that you do it from one source or multiple is almost irrelevant since the only extra task would be to homogenize the final schema of the data, ensuring deduplication and normalization.
|
||||
|
||||
This last part typically includes verifying the credibility of each data source, standardizing formats (like date/time or currency), performing schema alignment, and running profiling to detect anomalies, duplicates, or mismatches before integrating the data for analysis.
|
||||
@@ -1 +1,7 @@
|
||||
# Data Quality
|
||||
# Data Quality
|
||||
|
||||
Data quality refers to the degree to which a dataset is accurate, complete, consistent, relevant, and timely, making it fit for its intended use. High-quality data is reliable and trustworthy, enabling better decision-making, accurate analysis, and effective strategies, while poor data quality can lead to flawed insights, wasted resources, and negative consequences for an organization.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is Data Quality?](https://www.ibm.com/think/topics/data-quality)
|
||||
@@ -1 +1,3 @@
|
||||
# Data Serving
|
||||
# Data Serving
|
||||
|
||||
Data serving is the last step in the data engineering process. Once the data is stored in your data architectures and transformed into coherent and useful format, it's time for get value from it. Data serving refers to the different ways data is used by downstream applications and users to create value. There are many ways companies can extract value from data, including training machine learning models, BI Analytics, and reverse ETL.
|
||||
@@ -1 +1,7 @@
|
||||
# Data Storage
|
||||
# Data Storage
|
||||
|
||||
Data storage is the process of saving and preserving digital information on various physical or cloud-based media for future retrieval and use. It encompasses the use of technologies and devices like hard drives and cloud platforms to store data.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What is data storage?](https://www.ibm.com/think/topics/data-storage)
|
||||
@@ -1 +1,12 @@
|
||||
# Data Structures and Algorithms
|
||||
# DataStructures and Algorithms
|
||||
|
||||
**Data Structures** are primarily used to collect, organize and perform operations on the stored data more effectively. They are essential for designing advanced-level Android applications. Examples include Array, Linked List, Stack, Queue, Hash Map, and Tree.
|
||||
|
||||
**Algorithms** are a sequence of instructions or rules for performing a particular task. Algorithms can be used for data searching, sorting, or performing complex business logic. Some commonly used algorithms are Binary Search, Bubble Sort, Selection Sort, etc. A deep understanding of data structures and algorithms is crucial in optimizing the performance and the memory consumption of data pipelines
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@Interview Questions about Data Structures](https://www.csharpstar.com/csharp-algorithms/)
|
||||
- [@video@Data Structures Illustrated](https://www.youtube.com/watch?v=9rhT3P1MDHk&list=PLkZYeFmDuaN2-KUIv-mvbjfKszIGJ4FaY)
|
||||
- [@video@Intro to Algorithms](https://www.youtube.com/watch?v=rL8X2mlNHPM)
|
||||
- [@feed@Explore top posts about Algorithms](https://app.daily.dev/tags/algorithms?ref=roadmapsh)
|
||||
@@ -1 +1,8 @@
|
||||
# Data Warehouse
|
||||
# Data Warehouse
|
||||
|
||||
**Data Warehouses** are data storage systems which are designed for analyzing, reporting and integrating with transactional systems. The data in a warehouse is clean, consistent, and often transformed to meet wide-range of business requirements. Hence, data warehouses provide structured data but require more processing and management compared to data lakes.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@What Is a Data Warehouse?](https://www.oracle.com/database/what-is-a-data-warehouse/)
|
||||
- [@video@@hat is a Data Warehouse?](https://www.youtube.com/watch?v=k4tK2ttdSDg)
|
||||
@@ -1 +1,3 @@
|
||||
# Data Warehousing Architectures
|
||||
# Data Warehousing Architectures
|
||||
|
||||
Data Warehousing Architectures refers to the different systems and solutions for storing data. Options include traditional data warehouse, data marts, data lakes and data mesh architectures.
|
||||
@@ -1 +1,17 @@
|
||||
# Database Fundamentals
|
||||
# Database fundamentals
|
||||
|
||||
A database is a collection of useful data of one or more related organizations structured in a way to make data an asset to the organization. A database management system is a software designed to assist in maintaining and extracting large collections of data in a timely fashion.
|
||||
|
||||
A **Relational database** is a type of database that stores and provides access to data points that are related to one another. Relational databases store data in a series of tables.
|
||||
|
||||
**NoSQL databases** offer data storage and retrieval that is modelled differently to "traditional" relational databases. NoSQL databases typically focus more on horizontal scaling, eventual consistency, speed and flexibility and is used commonly for big data and real-time streaming applications.
|
||||
|
||||
Visit the following resources to learn more:
|
||||
|
||||
- [@article@Oracle: What is a Database?](https://www.oracle.com/database/what-is-database/)
|
||||
- [@article@Prisma.io: What are Databases?](https://www.prisma.io/dataguide/intro/what-are-databases)
|
||||
- [@article@Intro To Relational Databases](https://www.udacity.com/course/intro-to-relational-databases--ud197)
|
||||
- [@article@NoSQL Explained](https://www.mongodb.com/nosql-explained)
|
||||
- [@video@What is Relational Database](https://youtu.be/OqjJjpjDRLc)
|
||||
- [@video@How do NoSQL Databases work](https://www.youtube.com/watch?v=0buKQHokLK8)
|
||||
- [@feed@Explore top posts about Database](https://app.daily.dev/tags/database?ref=roadmapsh)
|
||||
@@ -1 +1,3 @@
|
||||
# Database
|
||||
# Database
|
||||
|
||||
A database is an organized, structured collection of electronic data that is stored, managed, and accessed via a computer system, usually controlled by a Database Management System (DBMS). Databases organize various types of data, such as words, numbers, images, and videos, allowing users to easily retrieve, update, and modify it for various purposes, from managing customer information to analyzing business processes.
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user