#! /usr/bin/python
"""
"""
#import lxml.etree as ET
#import requests
#from bs4 import BeautifulSoup as bs
import re
import os # for walk, evironment vars
import subprocess #so I can use git to get the modified dates.
import argparse
dir_name='.'
#git log -1 --format="%as" -- .\zh\uavcan\notes.md
include_dirs = set(['en','zh','ko','uk']) #update for new language builds.
exclude_dirs = set(['.vitepress','node_modules']) #update for new language builds.
my_parser = argparse.ArgumentParser(description='Generate sitemap for all markdown files in directory (default to main for output)')
# Add the arguments
my_parser.add_argument('-v',
'--version',
action='store',
type=str,
#nargs=1,
default='main')
my_parser.add_argument('-d',
'--date',
action='store_true',
help='generate date information')
my_parser.add_argument('-o',
'--output',
action='store',
type=str,
#nargs=1,
default='./.vitepress/dist/')
# Execute the parse_args() method
args = my_parser.parse_args()
build_version = args.version
#Get build version from process env by preference.
BRANCH_NAME = os.getenv('BRANCH_NAME')
if BRANCH_NAME:
build_version=BRANCH_NAME
url_prefix = 'https://docs.px4.io/%s' % build_version
sitemapitems=[]
for subdir, dirs, files in os.walk(dir_name, topdown=True):
if subdir == '.':
#print("RootFile: %s" % originalfile)
#Handle a root file.
continue
# Check if any of the include directories is in the subdir path
if any(f"/{inc_dir}/" in subdir or f"\\{inc_dir}\\" in subdir for inc_dir in include_dirs):
pass
#print(f"SUBDIR: {subdir}")
else:
continue
if any(f"/{ex_dir}/" in subdir or f"\\{ex_dir}\\" in subdir for ex_dir in exclude_dirs):
continue
#print(f"SUBDIR Ex: {subdir}")
for file in files:
#print(f"xxDebug: {file}")
sitemapitem = dict()
sitemapitem['changefreq']='daily'
if not file.endswith('.md'): #only process md files.
#print(f"Skip: {file} (not md)")
continue
originalfile=subdir+'\\'+file
dir_name=subdir[2:].replace('\\','/')
orig_file_forwardslash=originalfile.replace('\\','/')
#git log -1 --format="%as" -- .\zh\uavcan\notes.md
if args.date:
modified_datestamp = subprocess.run(["git", "log", "-1", '--format="%as"', "--", "%s" % orig_file_forwardslash],capture_output=True).stdout.decode('UTF-8')
sitemapitem['modified']=modified_datestamp.strip().strip('"')
#print(f"debugXX: {sitemapitem['modified']}")
file_name=file[:-3]+'.html'
if file_name.startswith('README'):
file_name=''
if file_name.startswith('index'):
file_name=''
url=f"{url_prefix}/{dir_name}/{file_name}"
sitemapitem['url']=url
#print("OrigFile: %s" % originalfile)
#print("dir_name: %s" % dir_name)
#print("Subdir: %s" % subdir )
#print("file_name: %s" % file_name)
#print(sitemapitem['url'])
sitemapitems.append(sitemapitem)
# Generate the sitemap from the sitemapitems
all_sitemap_item_text = ""
for item in sitemapitems:
sitemap_item_text=''
sitemap_item_text+=' \n'
sitemap_item_text+=f" {item['url']}\n"
sitemap_item_text+=f" {item['changefreq']}\n"
if args.date:
sitemap_item_text+=f" {item['modified']}\n"
sitemap_item_text+=' \n'
all_sitemap_item_text+=sitemap_item_text
sitemaptext = '''
%s
''' % all_sitemap_item_text
# Write the sitemap to file
outputfile=args.output+'sitemap.xml'
with open(outputfile,"w") as f:
f.write(sitemaptext)
print("Sitemap generated to: %s" % outputfile)
#print("BRANCH_NAME: %s" % BRANCH_NAME)
#print("Build version: %s" % build_version)