Modern ActivityPub compliant server, designed for simplicity and accessibility. Includes calendar and sharing economy features to empower your federated community. https://code.freedombone.net/bashrc/epicyon Docs: https://epicyon.net/#install
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

content.py 38KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053
  1. __filename__ = "content.py"
  2. __author__ = "Bob Mottram"
  3. __license__ = "AGPL3+"
  4. __version__ = "1.2.0"
  5. __maintainer__ = "Bob Mottram"
  6. __email__ = "bob@freedombone.net"
  7. __status__ = "Production"
  8. import os
  9. import email.parser
  10. import urllib.parse
  11. from shutil import copyfile
  12. from utils import isValidLanguage
  13. from utils import getImageExtensions
  14. from utils import loadJson
  15. from utils import fileLastModified
  16. from utils import getLinkPrefixes
  17. from utils import dangerousMarkup
  18. from utils import isPGPEncrypted
  19. from utils import containsPGPPublicKey
  20. from petnames import getPetName
  21. def removeHtmlTag(htmlStr: str, tag: str) -> str:
  22. """Removes a given tag from a html string
  23. """
  24. tagFound = True
  25. while tagFound:
  26. matchStr = ' ' + tag + '="'
  27. if matchStr not in htmlStr:
  28. tagFound = False
  29. break
  30. sections = htmlStr.split(matchStr, 1)
  31. if '"' not in sections[1]:
  32. tagFound = False
  33. break
  34. htmlStr = sections[0] + sections[1].split('"', 1)[1]
  35. return htmlStr
  36. def _removeQuotesWithinQuotes(content: str) -> str:
  37. """Removes any blockquote inside blockquote
  38. """
  39. if '<blockquote>' not in content:
  40. return content
  41. if '</blockquote>' not in content:
  42. return content
  43. ctr = 1
  44. found = True
  45. while found:
  46. prefix = content.split('<blockquote>', ctr)[0] + '<blockquote>'
  47. quotedStr = content.split('<blockquote>', ctr)[1]
  48. if '</blockquote>' not in quotedStr:
  49. found = False
  50. else:
  51. endStr = quotedStr.split('</blockquote>')[1]
  52. quotedStr = quotedStr.split('</blockquote>')[0]
  53. if '<blockquote>' not in endStr:
  54. found = False
  55. if '<blockquote>' in quotedStr:
  56. quotedStr = quotedStr.replace('<blockquote>', '')
  57. content = prefix + quotedStr + '</blockquote>' + endStr
  58. ctr += 1
  59. return content
  60. def htmlReplaceEmailQuote(content: str) -> str:
  61. """Replaces an email style quote "> Some quote" with html blockquote
  62. """
  63. if isPGPEncrypted(content) or containsPGPPublicKey(content):
  64. return content
  65. # replace quote paragraph
  66. if '<p>&quot;' in content:
  67. if '&quot;</p>' in content:
  68. if content.count('<p>&quot;') == content.count('&quot;</p>'):
  69. content = content.replace('<p>&quot;', '<p><blockquote>')
  70. content = content.replace('&quot;</p>', '</blockquote></p>')
  71. if '>\u201c' in content:
  72. if '\u201d<' in content:
  73. if content.count('>\u201c') == content.count('\u201d<'):
  74. content = content.replace('>\u201c', '><blockquote>')
  75. content = content.replace('\u201d<', '</blockquote><')
  76. # replace email style quote
  77. if '>&gt; ' not in content:
  78. return content
  79. contentStr = content.replace('<p>', '')
  80. contentLines = contentStr.split('</p>')
  81. newContent = ''
  82. for lineStr in contentLines:
  83. if not lineStr:
  84. continue
  85. if '>&gt; ' not in lineStr:
  86. if lineStr.startswith('&gt; '):
  87. lineStr = lineStr.replace('&gt; ', '<blockquote>')
  88. lineStr = lineStr.replace('&gt;', '<br>')
  89. newContent += '<p>' + lineStr + '</blockquote></p>'
  90. else:
  91. newContent += '<p>' + lineStr + '</p>'
  92. else:
  93. lineStr = lineStr.replace('>&gt; ', '><blockquote>')
  94. if lineStr.startswith('&gt;'):
  95. lineStr = lineStr.replace('&gt;', '<blockquote>', 1)
  96. else:
  97. lineStr = lineStr.replace('&gt;', '<br>')
  98. newContent += '<p>' + lineStr + '</blockquote></p>'
  99. return _removeQuotesWithinQuotes(newContent)
  100. def htmlReplaceQuoteMarks(content: str) -> str:
  101. """Replaces quotes with html formatting
  102. "hello" becomes <q>hello</q>
  103. """
  104. if isPGPEncrypted(content) or containsPGPPublicKey(content):
  105. return content
  106. if '"' not in content:
  107. if '&quot;' not in content:
  108. return content
  109. # only if there are a few quote marks
  110. if content.count('"') > 4:
  111. return content
  112. if content.count('&quot;') > 4:
  113. return content
  114. newContent = content
  115. if '"' in content:
  116. sections = content.split('"')
  117. if len(sections) > 1:
  118. newContent = ''
  119. openQuote = True
  120. markup = False
  121. for ch in content:
  122. currChar = ch
  123. if ch == '<':
  124. markup = True
  125. elif ch == '>':
  126. markup = False
  127. elif ch == '"' and not markup:
  128. if openQuote:
  129. currChar = '“'
  130. else:
  131. currChar = '”'
  132. openQuote = not openQuote
  133. newContent += currChar
  134. if '&quot;' in newContent:
  135. openQuote = True
  136. content = newContent
  137. newContent = ''
  138. ctr = 0
  139. sections = content.split('&quot;')
  140. noOfSections = len(sections)
  141. for s in sections:
  142. newContent += s
  143. if ctr < noOfSections - 1:
  144. if openQuote:
  145. newContent += '“'
  146. else:
  147. newContent += '”'
  148. openQuote = not openQuote
  149. ctr += 1
  150. return newContent
  151. def dangerousCSS(filename: str, allowLocalNetworkAccess: bool) -> bool:
  152. """Returns true is the css file contains code which
  153. can create security problems
  154. """
  155. if not os.path.isfile(filename):
  156. return False
  157. with open(filename, 'r') as fp:
  158. content = fp.read().lower()
  159. cssMatches = ('behavior:', ':expression', '?php', '.php',
  160. 'google', 'regexp', 'localhost',
  161. '127.0.', '192.168', '10.0.', '@import')
  162. for match in cssMatches:
  163. if match in content:
  164. return True
  165. # search for non-local web links
  166. if 'url(' in content:
  167. urlList = content.split('url(')
  168. ctr = 0
  169. for urlStr in urlList:
  170. if ctr > 0:
  171. if ')' in urlStr:
  172. urlStr = urlStr.split(')')[0]
  173. if 'http' in urlStr:
  174. print('ERROR: non-local web link in CSS ' +
  175. filename)
  176. return True
  177. ctr += 1
  178. # an attacker can include html inside of the css
  179. # file as a comment and this may then be run from the html
  180. if dangerousMarkup(content, allowLocalNetworkAccess):
  181. return True
  182. return False
  183. def switchWords(baseDir: str, nickname: str, domain: str, content: str) -> str:
  184. """Performs word replacements. eg. Trump -> The Orange Menace
  185. """
  186. if isPGPEncrypted(content) or containsPGPPublicKey(content):
  187. return content
  188. switchWordsFilename = baseDir + '/accounts/' + \
  189. nickname + '@' + domain + '/replacewords.txt'
  190. if not os.path.isfile(switchWordsFilename):
  191. return content
  192. with open(switchWordsFilename, 'r') as fp:
  193. for line in fp:
  194. replaceStr = line.replace('\n', '').replace('\r', '')
  195. wordTransform = None
  196. if '->' in replaceStr:
  197. wordTransform = replaceStr.split('->')
  198. elif ':' in replaceStr:
  199. wordTransform = replaceStr.split(':')
  200. elif ',' in replaceStr:
  201. wordTransform = replaceStr.split(',')
  202. elif ';' in replaceStr:
  203. wordTransform = replaceStr.split(';')
  204. elif '-' in replaceStr:
  205. wordTransform = replaceStr.split('-')
  206. if not wordTransform:
  207. continue
  208. if len(wordTransform) == 2:
  209. replaceStr1 = wordTransform[0].strip().replace('"', '')
  210. replaceStr2 = wordTransform[1].strip().replace('"', '')
  211. content = content.replace(replaceStr1, replaceStr2)
  212. return content
  213. def replaceEmojiFromTags(content: str, tag: [], messageType: str) -> str:
  214. """Uses the tags to replace :emoji: with html image markup
  215. """
  216. for tagItem in tag:
  217. if not tagItem.get('type'):
  218. continue
  219. if tagItem['type'] != 'Emoji':
  220. continue
  221. if not tagItem.get('name'):
  222. continue
  223. if not tagItem.get('icon'):
  224. continue
  225. if not tagItem['icon'].get('url'):
  226. continue
  227. if '/' not in tagItem['icon']['url']:
  228. continue
  229. if tagItem['name'] not in content:
  230. continue
  231. iconName = tagItem['icon']['url'].split('/')[-1]
  232. if iconName:
  233. if len(iconName) > 1:
  234. if iconName[0].isdigit():
  235. if '.' in iconName:
  236. iconName = iconName.split('.')[0]
  237. # see https://unicode.org/
  238. # emoji/charts/full-emoji-list.html
  239. if '-' not in iconName:
  240. # a single code
  241. try:
  242. replaceChar = chr(int("0x" + iconName, 16))
  243. content = content.replace(tagItem['name'],
  244. replaceChar)
  245. except BaseException:
  246. pass
  247. else:
  248. # sequence of codes
  249. iconCodes = iconName.split('-')
  250. iconCodeSequence = ''
  251. for icode in iconCodes:
  252. try:
  253. iconCodeSequence += chr(int("0x" +
  254. icode, 16))
  255. except BaseException:
  256. iconCodeSequence = ''
  257. break
  258. if iconCodeSequence:
  259. content = content.replace(tagItem['name'],
  260. iconCodeSequence)
  261. htmlClass = 'emoji'
  262. if messageType == 'post header':
  263. htmlClass = 'emojiheader'
  264. if messageType == 'profile':
  265. htmlClass = 'emojiprofile'
  266. emojiHtml = "<img src=\"" + tagItem['icon']['url'] + "\" alt=\"" + \
  267. tagItem['name'].replace(':', '') + \
  268. "\" align=\"middle\" class=\"" + htmlClass + "\"/>"
  269. content = content.replace(tagItem['name'], emojiHtml)
  270. return content
  271. def _addMusicTag(content: str, tag: str) -> str:
  272. """If a music link is found then ensure that the post is
  273. tagged appropriately
  274. """
  275. if '#podcast' in content or '#documentary' in content:
  276. return content
  277. if '#' not in tag:
  278. tag = '#' + tag
  279. if tag in content:
  280. return content
  281. musicSites = ('soundcloud.com', 'bandcamp.com')
  282. musicSiteFound = False
  283. for site in musicSites:
  284. if site+'/' in content:
  285. musicSiteFound = True
  286. break
  287. if not musicSiteFound:
  288. return content
  289. return ':music: ' + content + ' ' + tag + ' '
  290. def addWebLinks(content: str) -> str:
  291. """Adds markup for web links
  292. """
  293. if ':' not in content:
  294. return content
  295. prefixes = getLinkPrefixes()
  296. # do any of these prefixes exist within the content?
  297. prefixFound = False
  298. for prefix in prefixes:
  299. if prefix in content:
  300. prefixFound = True
  301. break
  302. # if there are no prefixes then just keep the content we have
  303. if not prefixFound:
  304. return content
  305. maxLinkLength = 40
  306. content = content.replace('\r', '')
  307. words = content.replace('\n', ' --linebreak-- ').split(' ')
  308. replaceDict = {}
  309. for w in words:
  310. if ':' not in w:
  311. continue
  312. # does the word begin with a prefix?
  313. prefixFound = False
  314. for prefix in prefixes:
  315. if w.startswith(prefix):
  316. prefixFound = True
  317. break
  318. if not prefixFound:
  319. continue
  320. # the word contains a prefix
  321. if w.endswith('.') or w.endswith(';'):
  322. w = w[:-1]
  323. markup = '<a href="' + w + \
  324. '" rel="nofollow noopener noreferrer" target="_blank">'
  325. for prefix in prefixes:
  326. if w.startswith(prefix):
  327. markup += '<span class="invisible">' + prefix + '</span>'
  328. break
  329. linkText = w
  330. for prefix in prefixes:
  331. linkText = linkText.replace(prefix, '')
  332. # prevent links from becoming too long
  333. if len(linkText) > maxLinkLength:
  334. markup += '<span class="ellipsis">' + \
  335. linkText[:maxLinkLength] + '</span>'
  336. markup += '<span class="invisible">' + \
  337. linkText[maxLinkLength:] + '</span></a>'
  338. else:
  339. markup += '<span class="ellipsis">' + linkText + '</span></a>'
  340. replaceDict[w] = markup
  341. # do the replacements
  342. for url, markup in replaceDict.items():
  343. content = content.replace(url, markup)
  344. # replace any line breaks
  345. content = content.replace(' --linebreak-- ', '<br>')
  346. return content
  347. def validHashTag(hashtag: str) -> bool:
  348. """Returns true if the give hashtag contains valid characters
  349. """
  350. # long hashtags are not valid
  351. if len(hashtag) >= 32:
  352. return False
  353. validChars = set('0123456789' +
  354. 'abcdefghijklmnopqrstuvwxyz' +
  355. 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
  356. '¡¿ÄäÀàÁáÂâÃãÅåǍǎĄąĂăÆæĀā' +
  357. 'ÇçĆćĈĉČčĎđĐďðÈèÉéÊêËëĚěĘęĖėĒē' +
  358. 'ĜĝĢģĞğĤĥÌìÍíÎîÏïıĪīĮįĴĵĶķ' +
  359. 'ĹĺĻļŁłĽľĿŀÑñŃńŇňŅņÖöÒòÓóÔôÕõŐőØøŒœ' +
  360. 'ŔŕŘřẞߌśŜŝŞşŠšȘșŤťŢţÞþȚțÜüÙùÚúÛûŰűŨũŲųŮůŪū' +
  361. 'ŴŵÝýŸÿŶŷŹźŽžŻż')
  362. if set(hashtag).issubset(validChars):
  363. return True
  364. if isValidLanguage(hashtag):
  365. return True
  366. return False
  367. def _addHashTags(wordStr: str, httpPrefix: str, domain: str,
  368. replaceHashTags: {}, postHashtags: {}) -> bool:
  369. """Detects hashtags and adds them to the replacements dict
  370. Also updates the hashtags list to be added to the post
  371. """
  372. if replaceHashTags.get(wordStr):
  373. return True
  374. hashtag = wordStr[1:]
  375. if not validHashTag(hashtag):
  376. return False
  377. hashtagUrl = httpPrefix + "://" + domain + "/tags/" + hashtag
  378. postHashtags[hashtag] = {
  379. 'href': hashtagUrl,
  380. 'name': '#' + hashtag,
  381. 'type': 'Hashtag'
  382. }
  383. replaceHashTags[wordStr] = "<a href=\"" + hashtagUrl + \
  384. "\" class=\"mention hashtag\" rel=\"tag\">#<span>" + \
  385. hashtag + "</span></a>"
  386. return True
  387. def _addEmoji(baseDir: str, wordStr: str,
  388. httpPrefix: str, domain: str,
  389. replaceEmoji: {}, postTags: {},
  390. emojiDict: {}) -> bool:
  391. """Detects Emoji and adds them to the replacements dict
  392. Also updates the tags list to be added to the post
  393. """
  394. if not wordStr.startswith(':'):
  395. return False
  396. if not wordStr.endswith(':'):
  397. return False
  398. if len(wordStr) < 3:
  399. return False
  400. if replaceEmoji.get(wordStr):
  401. return True
  402. # remove leading and trailing : characters
  403. emoji = wordStr[1:]
  404. emoji = emoji[:-1]
  405. # is the text of the emoji valid?
  406. if not validHashTag(emoji):
  407. return False
  408. if not emojiDict.get(emoji):
  409. return False
  410. emojiFilename = baseDir + '/emoji/' + emojiDict[emoji] + '.png'
  411. if not os.path.isfile(emojiFilename):
  412. return False
  413. emojiUrl = httpPrefix + "://" + domain + \
  414. "/emoji/" + emojiDict[emoji] + '.png'
  415. postTags[emoji] = {
  416. 'icon': {
  417. 'mediaType': 'image/png',
  418. 'type': 'Image',
  419. 'url': emojiUrl
  420. },
  421. 'name': ':'+emoji+':',
  422. "updated": fileLastModified(emojiFilename),
  423. "id": emojiUrl.replace('.png', ''),
  424. 'type': 'Emoji'
  425. }
  426. return True
  427. def tagExists(tagType: str, tagName: str, tags: {}) -> bool:
  428. """Returns true if a tag exists in the given dict
  429. """
  430. for tag in tags:
  431. if tag['name'] == tagName and tag['type'] == tagType:
  432. return True
  433. return False
  434. def _addMention(wordStr: str, httpPrefix: str, following: str, petnames: str,
  435. replaceMentions: {}, recipients: [], tags: {}) -> bool:
  436. """Detects mentions and adds them to the replacements dict and
  437. recipients list
  438. """
  439. possibleHandle = wordStr[1:]
  440. # @nick
  441. if following and '@' not in possibleHandle:
  442. # fall back to a best effort match against the following list
  443. # if no domain was specified. eg. @nick
  444. possibleNickname = possibleHandle
  445. for follow in following:
  446. if '@' not in follow:
  447. continue
  448. followNick = follow.split('@')[0]
  449. if possibleNickname == followNick:
  450. followStr = follow.replace('\n', '').replace('\r', '')
  451. replaceDomain = followStr.split('@')[1]
  452. recipientActor = httpPrefix + "://" + \
  453. replaceDomain + "/users/" + possibleNickname
  454. if recipientActor not in recipients:
  455. recipients.append(recipientActor)
  456. tags[wordStr] = {
  457. 'href': recipientActor,
  458. 'name': wordStr,
  459. 'type': 'Mention'
  460. }
  461. replaceMentions[wordStr] = \
  462. "<span class=\"h-card\"><a href=\"" + httpPrefix + \
  463. "://" + replaceDomain + "/@" + possibleNickname + \
  464. "\" class=\"u-url mention\">@<span>" + possibleNickname + \
  465. "</span></a></span>"
  466. return True
  467. # try replacing petnames with mentions
  468. followCtr = 0
  469. for follow in following:
  470. if '@' not in follow:
  471. followCtr += 1
  472. continue
  473. pet = petnames[followCtr].replace('\n', '')
  474. if pet:
  475. if possibleNickname == pet:
  476. followStr = follow.replace('\n', '').replace('\r', '')
  477. replaceNickname = followStr.split('@')[0]
  478. replaceDomain = followStr.split('@')[1]
  479. recipientActor = httpPrefix + "://" + \
  480. replaceDomain + "/users/" + replaceNickname
  481. if recipientActor not in recipients:
  482. recipients.append(recipientActor)
  483. tags[wordStr] = {
  484. 'href': recipientActor,
  485. 'name': wordStr,
  486. 'type': 'Mention'
  487. }
  488. replaceMentions[wordStr] = \
  489. "<span class=\"h-card\"><a href=\"" + httpPrefix + \
  490. "://" + replaceDomain + "/@" + replaceNickname + \
  491. "\" class=\"u-url mention\">@<span>" + \
  492. replaceNickname + "</span></a></span>"
  493. return True
  494. followCtr += 1
  495. return False
  496. possibleNickname = None
  497. possibleDomain = None
  498. if '@' not in possibleHandle:
  499. return False
  500. possibleNickname = possibleHandle.split('@')[0]
  501. if not possibleNickname:
  502. return False
  503. possibleDomain = \
  504. possibleHandle.split('@')[1].strip('\n').strip('\r')
  505. if not possibleDomain:
  506. return False
  507. if following:
  508. for follow in following:
  509. if follow.replace('\n', '').replace('\r', '') != possibleHandle:
  510. continue
  511. recipientActor = httpPrefix + "://" + \
  512. possibleDomain + "/users/" + possibleNickname
  513. if recipientActor not in recipients:
  514. recipients.append(recipientActor)
  515. tags[wordStr] = {
  516. 'href': recipientActor,
  517. 'name': wordStr,
  518. 'type': 'Mention'
  519. }
  520. replaceMentions[wordStr] = \
  521. "<span class=\"h-card\"><a href=\"" + httpPrefix + \
  522. "://" + possibleDomain + "/@" + possibleNickname + \
  523. "\" class=\"u-url mention\">@<span>" + possibleNickname + \
  524. "</span></a></span>"
  525. return True
  526. # @nick@domain
  527. if not (possibleDomain == 'localhost' or '.' in possibleDomain):
  528. return False
  529. recipientActor = httpPrefix + "://" + \
  530. possibleDomain + "/users/" + possibleNickname
  531. if recipientActor not in recipients:
  532. recipients.append(recipientActor)
  533. tags[wordStr] = {
  534. 'href': recipientActor,
  535. 'name': wordStr,
  536. 'type': 'Mention'
  537. }
  538. replaceMentions[wordStr] = \
  539. "<span class=\"h-card\"><a href=\"" + httpPrefix + \
  540. "://" + possibleDomain + "/@" + possibleNickname + \
  541. "\" class=\"u-url mention\">@<span>" + possibleNickname + \
  542. "</span></a></span>"
  543. return True
  544. def replaceContentDuplicates(content: str) -> str:
  545. """Replaces invalid duplicates within content
  546. """
  547. if isPGPEncrypted(content) or containsPGPPublicKey(content):
  548. return content
  549. while '<<' in content:
  550. content = content.replace('<<', '<')
  551. while '>>' in content:
  552. content = content.replace('>>', '>')
  553. content = content.replace('<\\p>', '')
  554. return content
  555. def removeTextFormatting(content: str) -> str:
  556. """Removes markup for bold, italics, etc
  557. """
  558. if isPGPEncrypted(content) or containsPGPPublicKey(content):
  559. return content
  560. if '<' not in content:
  561. return content
  562. removeMarkup = ('b', 'i', 'ul', 'ol', 'li', 'em', 'strong',
  563. 'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5')
  564. for markup in removeMarkup:
  565. content = content.replace('<' + markup + '>', '')
  566. content = content.replace('</' + markup + '>', '')
  567. content = content.replace('<' + markup.upper() + '>', '')
  568. content = content.replace('</' + markup.upper() + '>', '')
  569. return content
  570. def removeLongWords(content: str, maxWordLength: int,
  571. longWordsList: []) -> str:
  572. """Breaks up long words so that on mobile screens this doesn't
  573. disrupt the layout
  574. """
  575. if isPGPEncrypted(content) or containsPGPPublicKey(content):
  576. return content
  577. content = replaceContentDuplicates(content)
  578. if ' ' not in content:
  579. # handle a single very long string with no spaces
  580. contentStr = content.replace('<p>', '').replace(r'<\p>', '')
  581. if '://' not in contentStr:
  582. if len(contentStr) > maxWordLength:
  583. if '<p>' in content:
  584. content = '<p>' + contentStr[:maxWordLength] + r'<\p>'
  585. else:
  586. content = content[:maxWordLength]
  587. return content
  588. words = content.split(' ')
  589. if not longWordsList:
  590. longWordsList = []
  591. for wordStr in words:
  592. if len(wordStr) > maxWordLength:
  593. if wordStr not in longWordsList:
  594. longWordsList.append(wordStr)
  595. for wordStr in longWordsList:
  596. if wordStr.startswith('<p>'):
  597. wordStr = wordStr.replace('<p>', '')
  598. if wordStr.startswith('<'):
  599. continue
  600. if len(wordStr) == 76:
  601. if wordStr.upper() == wordStr:
  602. # tox address
  603. continue
  604. if '=\"' in wordStr:
  605. continue
  606. if '@' in wordStr:
  607. if '@@' not in wordStr:
  608. continue
  609. if '=.ed25519' in wordStr:
  610. continue
  611. if '.onion' in wordStr:
  612. continue
  613. if '.i2p' in wordStr:
  614. continue
  615. if 'https:' in wordStr:
  616. continue
  617. elif 'http:' in wordStr:
  618. continue
  619. elif 'i2p:' in wordStr:
  620. continue
  621. elif 'gnunet:' in wordStr:
  622. continue
  623. elif 'dat:' in wordStr:
  624. continue
  625. elif 'rad:' in wordStr:
  626. continue
  627. elif 'hyper:' in wordStr:
  628. continue
  629. elif 'briar:' in wordStr:
  630. continue
  631. if '<' in wordStr:
  632. replaceWord = wordStr.split('<', 1)[0]
  633. # if len(replaceWord) > maxWordLength:
  634. # replaceWord = replaceWord[:maxWordLength]
  635. content = content.replace(wordStr, replaceWord)
  636. wordStr = replaceWord
  637. if '/' in wordStr:
  638. continue
  639. if len(wordStr[maxWordLength:]) < maxWordLength:
  640. content = content.replace(wordStr,
  641. wordStr[:maxWordLength] + '\n' +
  642. wordStr[maxWordLength:])
  643. else:
  644. content = content.replace(wordStr,
  645. wordStr[:maxWordLength])
  646. if content.startswith('<p>'):
  647. if not content.endswith('</p>'):
  648. content = content.strip() + '</p>'
  649. return content
  650. def _loadAutoTags(baseDir: str, nickname: str, domain: str) -> []:
  651. """Loads automatic tags file and returns a list containing
  652. the lines of the file
  653. """
  654. filename = baseDir + '/accounts/' + \
  655. nickname + '@' + domain + '/autotags.txt'
  656. if not os.path.isfile(filename):
  657. return []
  658. with open(filename, "r") as f:
  659. return f.readlines()
  660. return []
  661. def _autoTag(baseDir: str, nickname: str, domain: str,
  662. wordStr: str, autoTagList: [],
  663. appendTags: []):
  664. """Generates a list of tags to be automatically appended to the content
  665. """
  666. for tagRule in autoTagList:
  667. if wordStr not in tagRule:
  668. continue
  669. if '->' not in tagRule:
  670. continue
  671. match = tagRule.split('->')[0].strip()
  672. if match != wordStr:
  673. continue
  674. tagName = tagRule.split('->')[1].strip()
  675. if tagName.startswith('#'):
  676. if tagName not in appendTags:
  677. appendTags.append(tagName)
  678. else:
  679. if '#' + tagName not in appendTags:
  680. appendTags.append('#' + tagName)
  681. def addHtmlTags(baseDir: str, httpPrefix: str,
  682. nickname: str, domain: str, content: str,
  683. recipients: [], hashtags: {}, isJsonContent=False) -> str:
  684. """ Replaces plaintext mentions such as @nick@domain into html
  685. by matching against known following accounts
  686. """
  687. if content.startswith('<p>'):
  688. content = htmlReplaceEmailQuote(content)
  689. return htmlReplaceQuoteMarks(content)
  690. maxWordLength = 40
  691. content = content.replace('\r', '')
  692. content = content.replace('\n', ' --linebreak-- ')
  693. content = _addMusicTag(content, 'nowplaying')
  694. contentSimplified = \
  695. content.replace(',', ' ').replace(';', ' ').replace('- ', ' ')
  696. contentSimplified = contentSimplified.replace('. ', ' ').strip()
  697. if contentSimplified.endswith('.'):
  698. contentSimplified = contentSimplified[:len(contentSimplified)-1]
  699. words = contentSimplified.split(' ')
  700. # remove . for words which are not mentions
  701. newWords = []
  702. for wordIndex in range(0, len(words)):
  703. wordStr = words[wordIndex]
  704. if wordStr.endswith('.'):
  705. if not wordStr.startswith('@'):
  706. wordStr = wordStr[:-1]
  707. if wordStr.startswith('.'):
  708. wordStr = wordStr[1:]
  709. newWords.append(wordStr)
  710. words = newWords
  711. replaceMentions = {}
  712. replaceHashTags = {}
  713. replaceEmoji = {}
  714. emojiDict = {}
  715. originalDomain = domain
  716. if ':' in domain:
  717. domain = domain.split(':')[0]
  718. followingFilename = baseDir + '/accounts/' + \
  719. nickname + '@' + domain + '/following.txt'
  720. # read the following list so that we can detect just @nick
  721. # in addition to @nick@domain
  722. following = None
  723. petnames = None
  724. if '@' in words:
  725. if os.path.isfile(followingFilename):
  726. with open(followingFilename, "r") as f:
  727. following = f.readlines()
  728. for handle in following:
  729. pet = getPetName(baseDir, nickname, domain, handle)
  730. if pet:
  731. petnames.append(pet + '\n')
  732. # extract mentions and tags from words
  733. longWordsList = []
  734. prevWordStr = ''
  735. autoTagsList = _loadAutoTags(baseDir, nickname, domain)
  736. appendTags = []
  737. for wordStr in words:
  738. wordLen = len(wordStr)
  739. if wordLen > 2:
  740. if wordLen > maxWordLength:
  741. longWordsList.append(wordStr)
  742. firstChar = wordStr[0]
  743. if firstChar == '@':
  744. if _addMention(wordStr, httpPrefix, following, petnames,
  745. replaceMentions, recipients, hashtags):
  746. prevWordStr = ''
  747. continue
  748. elif firstChar == '#':
  749. # remove any endings from the hashtag
  750. hashTagEndings = ('.', ':', ';', '-', '\n')
  751. for ending in hashTagEndings:
  752. if wordStr.endswith(ending):
  753. wordStr = wordStr[:len(wordStr) - 1]
  754. break
  755. if _addHashTags(wordStr, httpPrefix, originalDomain,
  756. replaceHashTags, hashtags):
  757. prevWordStr = ''
  758. continue
  759. elif ':' in wordStr:
  760. wordStr2 = wordStr.split(':')[1]
  761. # print('TAG: emoji located - '+wordStr)
  762. if not emojiDict:
  763. # emoji.json is generated so that it can be customized and
  764. # the changes will be retained even if default_emoji.json
  765. # is subsequently updated
  766. if not os.path.isfile(baseDir + '/emoji/emoji.json'):
  767. copyfile(baseDir + '/emoji/default_emoji.json',
  768. baseDir + '/emoji/emoji.json')
  769. emojiDict = loadJson(baseDir + '/emoji/emoji.json')
  770. # print('TAG: looking up emoji for :'+wordStr2+':')
  771. _addEmoji(baseDir, ':' + wordStr2 + ':', httpPrefix,
  772. originalDomain, replaceEmoji, hashtags,
  773. emojiDict)
  774. else:
  775. if _autoTag(baseDir, nickname, domain, wordStr,
  776. autoTagsList, appendTags):
  777. prevWordStr = ''
  778. continue
  779. if prevWordStr:
  780. if _autoTag(baseDir, nickname, domain,
  781. prevWordStr + ' ' + wordStr,
  782. autoTagsList, appendTags):
  783. prevWordStr = ''
  784. continue
  785. prevWordStr = wordStr
  786. # add any auto generated tags
  787. for appended in appendTags:
  788. content = content + ' ' + appended
  789. _addHashTags(appended, httpPrefix, originalDomain,
  790. replaceHashTags, hashtags)
  791. # replace words with their html versions
  792. for wordStr, replaceStr in replaceMentions.items():
  793. content = content.replace(wordStr, replaceStr)
  794. for wordStr, replaceStr in replaceHashTags.items():
  795. content = content.replace(wordStr, replaceStr)
  796. if not isJsonContent:
  797. for wordStr, replaceStr in replaceEmoji.items():
  798. content = content.replace(wordStr, replaceStr)
  799. content = addWebLinks(content)
  800. if longWordsList:
  801. content = removeLongWords(content, maxWordLength, longWordsList)
  802. content = content.replace(' --linebreak-- ', '</p><p>')
  803. content = htmlReplaceEmailQuote(content)
  804. return '<p>' + htmlReplaceQuoteMarks(content) + '</p>'
  805. def getMentionsFromHtml(htmlText: str,
  806. matchStr="<span class=\"h-card\"><a href=\"") -> []:
  807. """Extracts mentioned actors from the given html content string
  808. """
  809. mentions = []
  810. if matchStr not in htmlText:
  811. return mentions
  812. mentionsList = htmlText.split(matchStr)
  813. for mentionStr in mentionsList:
  814. if '"' not in mentionStr:
  815. continue
  816. actorStr = mentionStr.split('"')[0]
  817. if actorStr.startswith('http') or \
  818. actorStr.startswith('gnunet') or \
  819. actorStr.startswith('i2p') or \
  820. actorStr.startswith('hyper') or \
  821. actorStr.startswith('dat:'):
  822. if actorStr not in mentions:
  823. mentions.append(actorStr)
  824. return mentions
  825. def extractMediaInFormPOST(postBytes, boundary, name: str):
  826. """Extracts the binary encoding for image/video/audio within a http
  827. form POST
  828. Returns the media bytes and the remaining bytes
  829. """
  830. imageStartBoundary = b'Content-Disposition: form-data; name="' + \
  831. name.encode('utf8', 'ignore') + b'";'
  832. imageStartLocation = postBytes.find(imageStartBoundary)
  833. if imageStartLocation == -1:
  834. return None, postBytes
  835. # bytes after the start boundary appears
  836. mediaBytes = postBytes[imageStartLocation:]
  837. # look for the next boundary
  838. imageEndBoundary = boundary.encode('utf8', 'ignore')
  839. imageEndLocation = mediaBytes.find(imageEndBoundary)
  840. if imageEndLocation == -1:
  841. # no ending boundary
  842. return mediaBytes, postBytes[:imageStartLocation]
  843. # remaining bytes after the end of the image
  844. remainder = mediaBytes[imageEndLocation:]
  845. # remove bytes after the end boundary
  846. mediaBytes = mediaBytes[:imageEndLocation]
  847. # return the media and the before+after bytes
  848. return mediaBytes, postBytes[:imageStartLocation] + remainder
  849. def saveMediaInFormPOST(mediaBytes, debug: bool,
  850. filenameBase=None) -> (str, str):
  851. """Saves the given media bytes extracted from http form POST
  852. Returns the filename and attachment type
  853. """
  854. if not mediaBytes:
  855. if debug:
  856. print('DEBUG: No media found within POST')
  857. return None, None
  858. mediaLocation = -1
  859. searchStr = ''
  860. filename = None
  861. # directly search the binary array for the beginning
  862. # of an image
  863. extensionList = {
  864. 'png': 'image/png',
  865. 'jpeg': 'image/jpeg',
  866. 'gif': 'image/gif',
  867. 'svg': 'image/svg+xml',
  868. 'webp': 'image/webp',
  869. 'avif': 'image/avif',
  870. 'mp4': 'video/mp4',
  871. 'ogv': 'video/ogv',
  872. 'mp3': 'audio/mpeg',
  873. 'ogg': 'audio/ogg'
  874. }
  875. detectedExtension = None
  876. for extension, contentType in extensionList.items():
  877. searchStr = b'Content-Type: ' + contentType.encode('utf8', 'ignore')
  878. mediaLocation = mediaBytes.find(searchStr)
  879. if mediaLocation > -1:
  880. # image/video/audio binaries
  881. if extension == 'jpeg':
  882. extension = 'jpg'
  883. elif extension == 'mpeg':
  884. extension = 'mp3'
  885. filename = filenameBase + '.' + extension
  886. attachmentMediaType = \
  887. searchStr.decode().split('/')[0].replace('Content-Type: ', '')
  888. detectedExtension = extension
  889. break
  890. if not filename:
  891. return None, None
  892. # locate the beginning of the image, after any
  893. # carriage returns
  894. startPos = mediaLocation + len(searchStr)
  895. for offset in range(1, 8):
  896. if mediaBytes[startPos+offset] != 10:
  897. if mediaBytes[startPos+offset] != 13:
  898. startPos += offset
  899. break
  900. # remove any existing image files with a different format
  901. extensionTypes = getImageExtensions()
  902. for ex in extensionTypes:
  903. if ex == detectedExtension:
  904. continue
  905. possibleOtherFormat = \
  906. filename.replace('.temp', '').replace('.' +
  907. detectedExtension, '.' +
  908. ex)
  909. if os.path.isfile(possibleOtherFormat):
  910. os.remove(possibleOtherFormat)
  911. fd = open(filename, 'wb')
  912. if not fd:
  913. return None, None
  914. fd.write(mediaBytes[startPos:])
  915. fd.close()
  916. if not os.path.isfile(filename):
  917. print('WARN: Media file could not be written to file: ' + filename)
  918. return None, None
  919. print('Uploaded media file written: ' + filename)
  920. return filename, attachmentMediaType
  921. def extractTextFieldsInPOST(postBytes, boundary, debug: bool,
  922. unitTestData=None) -> {}:
  923. """Returns a dictionary containing the text fields of a http form POST
  924. The boundary argument comes from the http header
  925. """
  926. if not unitTestData:
  927. msgBytes = email.parser.BytesParser().parsebytes(postBytes)
  928. messageFields = msgBytes.get_payload(decode=True).decode('utf-8')
  929. else:
  930. messageFields = unitTestData
  931. if debug:
  932. print('DEBUG: POST arriving ' + messageFields)
  933. messageFields = messageFields.split(boundary)
  934. fields = {}
  935. fieldsWithSemicolonAllowed = (
  936. 'message', 'bio', 'autoCW', 'password', 'passwordconfirm',
  937. 'instanceDescription', 'instanceDescriptionShort',
  938. 'subject', 'location', 'imageDescription'
  939. )
  940. # examine each section of the POST, separated by the boundary
  941. for f in messageFields:
  942. if f == '--':
  943. continue
  944. if ' name="' not in f:
  945. continue
  946. postStr = f.split(' name="', 1)[1]
  947. if '"' not in postStr:
  948. continue
  949. postKey = postStr.split('"', 1)[0]
  950. postValueStr = postStr.split('"', 1)[1]
  951. if ';' in postValueStr:
  952. if postKey not in fieldsWithSemicolonAllowed and \
  953. not postKey.startswith('edited'):
  954. continue
  955. if '\r\n' not in postValueStr:
  956. continue
  957. postLines = postValueStr.split('\r\n')
  958. postValue = ''
  959. if len(postLines) > 2:
  960. for line in range(2, len(postLines)-1):
  961. if line > 2:
  962. postValue += '\n'
  963. postValue += postLines[line]
  964. fields[postKey] = urllib.parse.unquote_plus(postValue)
  965. return fields