import { readFileSync } from 'fs'; import { T } from './twit'; import { DateTime, Duration } from 'luxon'; import envs from '../env'; import knex from '../knex-export'; const suspiciousWords: string[] = require('../data/words-with-suspicion.json'); const blackListedAccounts: string[] = require('../data/accounts-not-to-follow.json'); interface Message { message: string; } /** * Get the number of hashtags of a tweet * @param {string} str - The full string to search in * @return {string[]} */ export const getNumberOfHashtags = (str: string): number => { if (str.length === 0) { return 0; } const matches: RegExpMatchArray = str.match(/#\S/mgi); return (matches && matches.length) ? matches.length : 0; }; /** * Remove words that are likely used in contexts other than programming. * This function finds and removes these words from the text of the tweet * and pass the rest to the main program. * @param {string} text - The text of the tweet * @return {string} */ export const removeSuspiciousWords = (text: string): string => { let lText: string = text.toLowerCase(); suspiciousWords.forEach((word: string) => { const lWord: string = word.toLowerCase(); if (text.search(new RegExp(lWord)) > -1) { lText = lText.replace(new RegExp(lWord, 'g'), ''); } }); // remove multiple contiguous spaces and return the string return lText.replace(/ +/g, ' '); }; /** * Check whether a tweet includes URLs or not * @param {string} text - The text of the tweet * @return {boolean} */ export const hasURL = (tweet: any): boolean => { const urlRegex = /((http(s?)?):\/\/)?([wW]{3}\.)?[a-zA-Z0-9\-.]+\.[a-zA-Z]{2,}(\.[a-zA-Z]{2,})?/img; return ( urlRegex.test(tweet.$tweetText) || tweet.entities?.urls?.length > 0 ); }; /** * Remove URLs from the tweet. * This function finds and removes URLs from the text of the tweet * and pass the rest to the main program. Sounds weired but w/o this * function, URLs that contain `html`, `php`, etc. match the keywords! * @param {string} text - The text of the tweet * @return {string} */ export const removeURLs = (text: string): string => { const urlRegex = /((http(s?)?):\/\/)?([wW]{3}\.)?[a-zA-Z0-9\-.]+\.[a-zA-Z]{2,}(\.[a-zA-Z]{2,})?/img; let numberOfURLs = (text.match(urlRegex) || []).length; let lText: string = text.toLowerCase(); while (numberOfURLs) { lText = lText.replace(urlRegex, ''); numberOfURLs -= 1; } // remove multiple contiguous spaces and return the string return lText.replace(/ +/g, ' ').trim(); }; /** * Checks whether a tweet has URL entities or not * @param tweet * @return boolean */ export const hasSuspiciousURLs = (tweet: any): boolean => { const fileExtensionRegExp = /(\.apsx|\.php|\.html)/; return ( fileExtensionRegExp.test(tweet.$tweetText) || fileExtensionRegExp.test(tweet.$retweetText) || tweet.entities?.urls?.some((urlEntity: string) => ( fileExtensionRegExp.test(urlEntity) )) ); }; /** * Whether a tweet is under 140 characters long or not * @param {*} tweet - The tweet object * @return {boolean} */ export const isTweetExtended = (tweet: any): boolean => ( tweet.truncated === true ); /** * Whether a tweet is in Farsi or not. * This behaviour relies on Twitter API. * @param {*} tweet - The tweet object * @return boolean */ export const isTweetFarsi = (tweet: any): boolean => tweet.lang === 'fa'; /** * Whether a tweet is a retweet or not. * @param tweet * @return {boolean} */ export const isRetweet = (tweet: any): boolean => ( Object.prototype.hasOwnProperty.call(tweet, 'retweeted_status') ); /** * Whether a tweet is a reply or not. * @param {*} tweet - The tweet object * @return {boolean} */ export const isTweetAReply = (tweet: any): boolean => ( // Polyfill to check whether a tweet is a reply or not tweet.in_reply_to_status_id || tweet.in_reply_to_user_id || isRetweet(tweet) ? (tweet.retweeted_status || tweet.in_reply_to_status_id) : false ); /** * Return the full text of the tweet * @param {*} tweet - The tweet object * @return {string} */ export const getTweetFullText = (tweet: any): string => ( // All tweets have a `text` property, but the ones having 140 or more // characters have `extended_tweet` property set to `true` and an extra // `extended_tweet` property containing the actual tweet's text under // `full_text`. For tweets which are not truncated the former `text` is // enough. isTweetExtended(tweet) ? tweet.extended_tweet.full_text : tweet.text ); /** * * @param {*} tweet - The tweet object * @return {string[]} */ export const getTweetHashtags = (tweet: any): string[] => ( tweet.entities.hashtags ); /** * Returns the length of a given tweet text * @param {string} tweetText - The text of the tweet * @return {number} */ export const getTweetLength = (tweetText: string): number => tweetText.length; /** * Whether the environment is in debug mode or not * @return {boolean} */ export const isDebugModeEnabled = (): boolean => envs.DEBUG_MODE === 'true'; /** * Retweet the passed tweet by the given `id` * @param {number} id - Tweet ID * @return {Promise<Message | Error>} */ export const retweet = async (id: string): Promise<Message | Error> => { let response: Message | Error; try { T.post('statuses/retweet/:id', { id }, (err: Error) => { if (err) { throw err; } response = { message: 'Tweet retweeted successfully' }; }); } catch (e) { response = e; } return response; }; /** * Favourite/Like the passed tweet by the given `id` * @param {number} id - Tweet ID * @return {Promise<Message | Error>} */ export const favourite = async (id: string): Promise<Message | Error> => { let response: Message | Error; try { T.post('/favorites/create', { id }, (err: Error) => { if (err) { throw err; } response = { message: 'Tweet favourited successfully' }; }); } catch (e) { response = e; } return response; }; /** * Parse the date format returned from Twitter API to Luxon DateTime * @param {string} date - The date * @return {DateTime} */ export const parseTwitterDateToLuxon = (date: string): DateTime => ( DateTime.fromFormat(date, 'ccc LLL dd HH:mm:ss ZZZ yyyy') ); /** * Return the difference between the given {DateTime} * @param {DateTime} date - The date * @return {Duration} */ export const getDiffBetweenDateTimeAndNowInDays = (date: DateTime): Duration => ( DateTime.now().diff(date, 'days') ); /** * Store the given tweet in the database * @param {*} tweet - The tweet object * @return {Promise<Message | Error>} */ export const store = async (tweet: any): Promise<Message | Error> => { if (envs.DB_ENABLE === 'false') { return { message: 'Database storage is disabled', }; } const { in_reply_to_status_id, in_reply_to_user_id, source, user, id_str, $tweetText, } = tweet; const { id_str: userIdStr, screen_name, name, } = user; try { const userId = await knex .select('user_id') .from('users') .where('user_id', userIdStr); if (userId.length) { await knex('users') .where('user_id', userIdStr) .update({ user_id: userIdStr, screen_name, name, }); } else { await knex('users') .insert({ user_id: userIdStr, screen_name, name, }); } } catch (e) { return new Error(e); } try { const tweetId = await knex .select('tweet_id') .from('tweets') .where('tweet_id', id_str); if (!tweetId.length) { await knex('tweets') .insert({ tweet_id: id_str, text: $tweetText, source, is_retweet: isRetweet(tweet), in_reply_to_status_id, in_reply_to_user_id, user_id: user.id_str, }); return { message: 'Tweet stored in the database' }; } return { message: 'Tweet is already in the database' }; } catch (e) { return new Error(e); } }; /** * Check if the user is in the blacklist * @param {*} tweet * @return {boolean} */ export const isBlackListed = (tweet: any): boolean => { const originalUserId: string = tweet.user.id_str; const retweeterUserId: string = tweet.retweet_status?.user?.id_str; return ( blackListedAccounts.includes(retweeterUserId) || blackListedAccounts.includes(originalUserId) ); }; /** * Check if the user has registered recently or not * @param {*} tweet * @return {boolean} */ export const hasUserRegisteredRecently = (tweet: any): boolean => { const originalUser: any = tweet.user; const retweeterUser: any = tweet.retweeted_status; const originalUserRegisterDate: DateTime = parseTwitterDateToLuxon(originalUser.created_at); let retweeterUserRegisterDateDiff: number; const dayToBlockNewUsers: number = +envs.IGNORE_USERS_NEWER_THAN; const originalUserRegisterDateDiff = getDiffBetweenDateTimeAndNowInDays( originalUserRegisterDate, ).days; if (retweeterUser) { const retweeterUserRegisterDate: DateTime = parseTwitterDateToLuxon(tweet.retweeted_status.user.created_at); retweeterUserRegisterDateDiff = getDiffBetweenDateTimeAndNowInDays( retweeterUserRegisterDate, ).days; } return ( dayToBlockNewUsers > originalUserRegisterDateDiff || dayToBlockNewUsers > retweeterUserRegisterDateDiff ); }; /** * Check if a tweet has 5 hashtags or more. See it as an ad-blocker. * @param {*} tweet - The tweet object * @return {boolean} */ export const hasFiveHashtagsOrMore = (tweet: any): boolean => ( getNumberOfHashtags(getTweetFullText(tweet)) >= 5 || tweet.entities.hashtags.length >= 5 ); /** * Check if a tweet is retweeted by ME or not * @param {*} tweet - The tweet object * @return {boolean} */ export const isRetweetedByMyself = (tweet: any): boolean => tweet.retweeted; /** * Validate the tweet properties for further process: * 1. Checks whether the language of the tweet is Farsi * 2. Checks whether the tweet is a reply * 3. Checks whether the tweet has five or more hashtags "#" * 4. Checks whether the user is blocked * 5. Checks whether the text of the tweet is longer than 10 characters * 6. Checks whether the user has registered recently * @param {*} tweet - The tweet object * @return {boolean} - Whether the tweet is acceptable */ export const isTweetAcceptable = (tweet: any): boolean => { if (!isTweetFarsi(tweet)) { return false; } if (isTweetAReply(tweet)) { return false; } if (hasFiveHashtagsOrMore(tweet)) { return false; } if (isBlackListed(tweet)) { return false; } if (getTweetLength(tweet.text) <= 10) { return false; } if (hasUserRegisteredRecently(tweet)) { return false; } return true; }; /** * Remove the `@username` from the tweet body * @param {string} tweetText - The text of a tweet * @return {string} - The text of the tweet w/ `@username` removed */ export const removeRetweetNotation = (tweetText: string): string => ( tweetText.replace(/(RT @.*?:)/gim, '').trim() ); /** * Checks whether a file is JSON or not, using file extension for this purpose * @param {string} fileName - The name of the file * @return {boolean} - File is JSON or not */ export const isFileJSON = (fileName: string): boolean => (/\.(json)$/i.test(fileName)); /** * Load the content of a given file, JSON only * @param {string} filePath - The full path of the JSON file * @return {string[]} - The text of the tweet w/ `@username` removed */ export const loadJSONFileContent = (filePath: string): string[] | Error => { let fileContent: string; if (!isFileJSON(filePath)) { return new Error('File is not JSON'); } try { fileContent = readFileSync(filePath, 'utf8'); } catch (e) { return new Error(e); } fileContent = JSON.parse(fileContent); return Array.isArray(fileContent) ? fileContent : new Error('File doesn\'t include an array'); }; /** * Convert a string to hashtag * @param {string} string - The word to be hashtagged * @return {string} - The hashtagged form of the given string */ export const makeHashtag = (string: string): string => { let s: string; // Replace space, half-space, dash, dot w/ an underscore s = string.replace(/[ \-.]/gmi, '_'); // Replace subsequent underscores with one underscore s = s.replace(/_{2,}/, '_'); // Add a number sign at the beginning of the word s = `#${s}`; return s; }; /** * Fill a given array with an array of strings * @param {string[]} arrayToFill * @param {string[]} arrayOfWords * @return {string[]} */ export const fillArrayWithWords = ( arrayToFill: string[], arrayOfWords: string[], ): string[] => { arrayOfWords.forEach((word: string) => arrayToFill.push(word)); arrayOfWords.forEach((word: string) => { const w = makeHashtag(word); arrayToFill.push(w); }); return [...new Set(arrayToFill)]; };