Modern ActivityPub compliant server, designed for simplicity and accessibility. Includes calendar and sharing economy features to empower your federated community. https://code.freedombone.net/bashrc/epicyon Docs: https://epicyon.net/#install
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

categories.py 6.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. __filename__ = "categories.py"
  2. __author__ = "Bob Mottram"
  3. __license__ = "AGPL3+"
  4. __version__ = "1.2.0"
  5. __maintainer__ = "Bob Mottram"
  6. __email__ = "bob@freedombone.net"
  7. __status__ = "Production"
  8. import os
  9. import datetime
  10. def getHashtagCategory(baseDir: str, hashtag: str) -> str:
  11. """Returns the category for the hashtag
  12. """
  13. categoryFilename = baseDir + '/tags/' + hashtag + '.category'
  14. if not os.path.isfile(categoryFilename):
  15. categoryFilename = baseDir + '/tags/' + hashtag.title() + '.category'
  16. if not os.path.isfile(categoryFilename):
  17. categoryFilename = \
  18. baseDir + '/tags/' + hashtag.upper() + '.category'
  19. if not os.path.isfile(categoryFilename):
  20. return ''
  21. with open(categoryFilename, 'r') as fp:
  22. categoryStr = fp.read()
  23. if categoryStr:
  24. return categoryStr
  25. return ''
  26. def getHashtagCategories(baseDir: str, recent=False, category=None) -> None:
  27. """Returns a dictionary containing hashtag categories
  28. """
  29. maxTagLength = 42
  30. hashtagCategories = {}
  31. if recent:
  32. currTime = datetime.datetime.utcnow()
  33. daysSinceEpoch = (currTime - datetime.datetime(1970, 1, 1)).days
  34. recently = daysSinceEpoch - 1
  35. for subdir, dirs, files in os.walk(baseDir + '/tags'):
  36. for f in files:
  37. if not f.endswith('.category'):
  38. continue
  39. categoryFilename = os.path.join(baseDir + '/tags', f)
  40. if not os.path.isfile(categoryFilename):
  41. continue
  42. hashtag = f.split('.')[0]
  43. if len(hashtag) > maxTagLength:
  44. continue
  45. with open(categoryFilename, 'r') as fp:
  46. categoryStr = fp.read()
  47. if not categoryStr:
  48. continue
  49. if category:
  50. # only return a dictionary for a specific category
  51. if categoryStr != category:
  52. continue
  53. if recent:
  54. tagsFilename = baseDir + '/tags/' + hashtag + '.txt'
  55. if not os.path.isfile(tagsFilename):
  56. continue
  57. modTimesinceEpoc = \
  58. os.path.getmtime(tagsFilename)
  59. lastModifiedDate = \
  60. datetime.datetime.fromtimestamp(modTimesinceEpoc)
  61. fileDaysSinceEpoch = \
  62. (lastModifiedDate -
  63. datetime.datetime(1970, 1, 1)).days
  64. if fileDaysSinceEpoch < recently:
  65. continue
  66. if not hashtagCategories.get(categoryStr):
  67. hashtagCategories[categoryStr] = [hashtag]
  68. else:
  69. if hashtag not in hashtagCategories[categoryStr]:
  70. hashtagCategories[categoryStr].append(hashtag)
  71. break
  72. return hashtagCategories
  73. def _updateHashtagCategories(baseDir: str) -> None:
  74. """Regenerates the list of hashtag categories
  75. """
  76. categoryListFilename = baseDir + '/accounts/categoryList.txt'
  77. hashtagCategories = getHashtagCategories(baseDir)
  78. if not hashtagCategories:
  79. if os.path.isfile(categoryListFilename):
  80. os.remove(categoryListFilename)
  81. return
  82. categoryList = []
  83. for categoryStr, hashtagList in hashtagCategories.items():
  84. categoryList.append(categoryStr)
  85. categoryList.sort()
  86. categoryListStr = ''
  87. for categoryStr in categoryList:
  88. categoryListStr += categoryStr + '\n'
  89. # save a list of available categories for quick lookup
  90. with open(categoryListFilename, 'w+') as fp:
  91. fp.write(categoryListStr)
  92. def _validHashtagCategory(category: str) -> bool:
  93. """Returns true if the category name is valid
  94. """
  95. if not category:
  96. return False
  97. invalidChars = (',', ' ', '<', ';', '\\', '"', '&', '#')
  98. for ch in invalidChars:
  99. if ch in category:
  100. return False
  101. # too long
  102. if len(category) > 40:
  103. return False
  104. return True
  105. def setHashtagCategory(baseDir: str, hashtag: str, category: str,
  106. force=False) -> bool:
  107. """Sets the category for the hashtag
  108. """
  109. if not _validHashtagCategory(category):
  110. return False
  111. if not force:
  112. hashtagFilename = baseDir + '/tags/' + hashtag + '.txt'
  113. if not os.path.isfile(hashtagFilename):
  114. hashtag = hashtag.title()
  115. hashtagFilename = baseDir + '/tags/' + hashtag + '.txt'
  116. if not os.path.isfile(hashtagFilename):
  117. hashtag = hashtag.upper()
  118. hashtagFilename = baseDir + '/tags/' + hashtag + '.txt'
  119. if not os.path.isfile(hashtagFilename):
  120. return False
  121. if not os.path.isdir(baseDir + '/tags'):
  122. os.mkdir(baseDir + '/tags')
  123. categoryFilename = baseDir + '/tags/' + hashtag + '.category'
  124. if force:
  125. # don't overwrite any existing categories
  126. if os.path.isfile(categoryFilename):
  127. return False
  128. with open(categoryFilename, 'w+') as fp:
  129. fp.write(category)
  130. _updateHashtagCategories(baseDir)
  131. return True
  132. return False
  133. def guessHashtagCategory(tagName: str, hashtagCategories: {}) -> str:
  134. """Tries to guess a category for the given hashtag.
  135. This works by trying to find the longest similar hashtag
  136. """
  137. categoryMatched = ''
  138. tagMatchedLen = 0
  139. for categoryStr, hashtagList in hashtagCategories.items():
  140. for hashtag in hashtagList:
  141. if len(hashtag) < 3:
  142. # avoid matching very small strings which often
  143. # lead to spurious categories
  144. continue
  145. if hashtag not in tagName:
  146. if tagName not in hashtag:
  147. continue
  148. if not categoryMatched:
  149. tagMatchedLen = len(hashtag)
  150. categoryMatched = categoryStr
  151. else:
  152. # match the longest tag
  153. if len(hashtag) > tagMatchedLen:
  154. categoryMatched = categoryStr
  155. if not categoryMatched:
  156. return
  157. return categoryMatched