Web Scraping + send data only from latest urls

I wrote a javascript code that extracts sub-links external links or headlines for an examplesite . com but there is a problem i cannot fix… the script is extracting all links over and over again since examplesite . com exist like even links 3-4 years old… but im only interested to receive latest sub-links whenever examplesite . com decides to publish them for example…

https:// examplesite . com/test1
https:// examplesite . com/test2-test1/test1
https:// examplesite . com/2023-20-03/test1 and so on… but remember those sub-link don’t exist/created yet…

Take a look at this website https://news.treeofalpha.com/ on left side are only latest news from different websites whenever a new article is posted on website it will be posted to this terminal… i’m not sure how to do the same… i tried to use https:// examplesite . com/arc/outboundfeeds/rss/ it works and its sending only latest headline/url to my telegram bot but new article/url is updated to Feed RSS after 7-8 min which this bothers me because i know it’s possible to receive data to my telegram bot within seconds from original source.

Here is 2 scripts that is extracting all links from examplesite . com

const axios = require(‘axios’);
const cheerio = require(‘cheerio’);
const TelegramBot = require(‘node-telegram-bot-api’);

const baseUrl = ‘https:// examplesite.com’;
let knownLinks = new Set();

// Set up Telegram bot parameters
const telegramToken = ‘’;
const chatId = ‘’;
const bot = new TelegramBot(telegramToken, { polling: false });

async function crawl() {
console.log(‘Crawling homepage’);

// Make a GET request to the homepage and parse the HTML response
const response = await axios.get(baseUrl);
const $ = cheerio.load(response.data);

// Find all links on the page and check if they are new
$(‘a’).each((i, link) => {
const href = $(link).attr(‘href’);
if (href && !knownLinks.has(href)) {
knownLinks.add(href);
console.log(New link found: ${href});
bot.sendMessage(chatId, New link found: ${href});
}
});
}

// Start crawling every 5 minutes
setInterval(crawl, 300000);

Second one:

const axios = require(‘axios’);
const cheerio = require(‘cheerio’);
const TelegramBot = require(‘node-telegram-bot-api’);

const baseUrl = ‘https:// examplesite .com’;
let visitedUrls = new Set();
let knownLinks = new Set();

// Set up Telegram bot parameters
const telegramToken = ‘’;
const chatId = ‘’;
const bot = new TelegramBot(telegramToken, { polling: false });

async function crawl() {
console.log(‘Crawling homepage’);

// Make a GET request to the homepage and parse the HTML response
const response = await axios.get(baseUrl);
const $ = cheerio.load(response.data);

// Find all links on the page and check if they are new
$(‘a’).each((i, link) => {
const href = $(link).attr(‘href’);
if (href && !visitedUrls.has(href)) {
visitedUrls.add(href);
if (!knownLinks.has(href)) {
knownLinks.add(href);
console.log(New link found: ${href});
bot.sendMessage(chatId, New link found: ${href});
}
}
});

// Crawl again in 1 hour
setTimeout(crawl, 3600000);
}

// Start crawling
crawl();

Short summary:
I’m a trader and basically i want to receive real-time data (headlines, urls) from latest news only from websites i want.