Langkau ke kandungan utama

Carian Perkataan & Penyamaran Maklumat

Glasswall Word Search boleh digunakan untuk redact teks daripada fail dan menjana laporan XML tentang butiran fail yang telah di-redact. Lihat Word Search & Redaction.

Laporan ini merangkumi butiran tentang saiz fail, jenis fail yang ditentukan, jumlah keseluruhan padanan teks, dan lokasi setiap padanan teks.

Contoh laporan
<gw:WordSearchStatistics xmlns:gw="http://glasswall.com/namespace">
<gw:DocumentSummary>
<gw:TotalSizeInBytes>13084</gw:TotalSizeInBytes>
<gw:FileType>docx</gw:FileType>
<gw:TotalItemMatchCount>8</gw:TotalItemMatchCount>
</gw:DocumentSummary>
<gw:WordItem>
<gw:Name>ipsum</gw:Name>
<gw:ItemMatchCount>5</gw:ItemMatchCount>
<gw:Locations>
<gw:Location>
<gw:Offset>120</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
<gw:Location>
<gw:Offset>267</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
<gw:Location>
<gw:Offset>691</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
<gw:Location>
<gw:Offset>973</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
<gw:Location>
<gw:Offset>1034</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
</gw:Locations>
</gw:WordItem>
<gw:WordItem>
<gw:Name>lorem</gw:Name>
<gw:ItemMatchCount>3</gw:ItemMatchCount>
<gw:Locations>
<gw:Location>
<gw:Offset>114</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
<gw:Location>
<gw:Offset>244</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
<gw:Location>
<gw:Offset>1224</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
</gw:Locations>
</gw:WordItem>
</gw:WordSearchStatistics>


Fail JSON homoglyphs boleh ditentukan sama ada sebagai laluan fail atau dalam memori sebagai bytes, bytearray, atau io.BytesIO. Jika ini tidak ditentukan maka lalai akan digunakan:

Fail homoglyphs.json lalai
{
"!": "ǃⵑ",
"$": "$",
"%": "%",
"&": "ꝸ&",
"'": "`´ʹʻʼʽʾˈˊˋ˴ʹ΄՚՝י׳ߴߵᑊᛌ᾽᾿`´῾‘’‛′‵ꞌ'`𖽑𖽒",
"(": "❨❲〔﴾([",
")": "❩❳〕﴿)]",
"*": "٭⁎∗*𐌟",
"+": "᛭+𐊛",
",": "¸؍٫‚ꓹ,",
"-": "˗۔‐‑‒–⁃−➖Ⲻ﹘",
".": "٠۰܁܂․ꓸ꘎.𐩐𝅭",
"/": "᜵⁁⁄∕╱⟋⧸Ⳇ⼃〳ノ㇓丿/𝈺",
"0": "OoΟοσОоՕօסه٥ھہە۵߀०০੦૦ଠ୦௦ం౦ಂ೦ംഠ൦ං๐໐ဝ၀ჿዐᴏᴑℴⲞⲟⵔ〇ꓳꬽﮦﮧﮨﮩﮪﮫﮬﮭﻩﻪﻫﻬ0Oo𐊒𐊫𐐄𐐬𐓂𐓪𐔖𑓐𑢵𑣈𑣗𑣠𝐎𝐨𝑂𝑜𝑶𝒐𝒪𝓞𝓸𝔒𝔬𝕆𝕠𝕺𝖔𝖮𝗈𝗢𝗼𝘖𝘰𝙊𝙤𝙾𝚘𝚶𝛐𝛔𝛰𝜊𝜎𝜪𝝄𝝈𝝤𝝾𝞂𝞞𝞸𝞼𝟎𝟘𝟢𝟬𝟶𞸤𞹤𞺄",
"1": "Il|ƖǀΙІӀ׀וןا١۱ߊᛁℐℑℓⅠⅼ∣⏽Ⲓⵏꓲﺍﺎ1Il│𐊊𐌉𐌠𖼨𝐈𝐥𝐼𝑙𝑰𝒍𝓁𝓘𝓵𝔩𝕀𝕝𝕴𝖑𝖨𝗅𝗜𝗹𝘐𝘭𝙄𝙡𝙸𝚕𝚰𝛪𝜤𝝞𝞘𝟏𝟙𝟣𝟭𝟷𞣇𞸀𞺀",
"2": "ƧϨᒿꙄꛯꝚ2𝟐𝟚𝟤𝟮𝟸",
"3": "ƷȜЗӠⳌꝪꞫ3𑣊𖼻𝈆𝟑𝟛𝟥𝟯𝟹",
"4": "Ꮞ4𑢯𝟒𝟜𝟦𝟰𝟺",
"5": "Ƽ5𑢻𝟓𝟝𝟧𝟱𝟻",
"6": "бᏮⳒ6𑣕𝟔𝟞𝟨𝟲𝟼",
"7": "7𐓒𑣆𝈒𝟕𝟟𝟩𝟳𝟽",
"8": "Ȣȣ৪੪ଃ8𐌚𝟖𝟠𝟪𝟴𝟾𞣋",
"9": "৭੧୨൭ⳊꝮ9𑢬𑣌𑣖𝟗𝟡𝟫𝟵𝟿",
"A": "4ΑАᎪᗅᴀꓮꭺA𐊠𖽀𝐀𝐴𝑨𝒜𝓐𝔄𝔸𝕬𝖠𝗔𝘈𝘼𝙰𝚨𝛢𝜜𝝖𝞐",
"B": "ʙΒВвᏴᏼᗷᛒℬꓐꞴB𐊂𐊡𐌁𝐁𝐵𝑩𝓑𝔅𝔹𝕭𝖡𝗕𝘉𝘽𝙱𝚩𝛣𝜝𝝗𝞑",
"C": "ϹСᏟℂℭⅭⲤꓚC𐊢𐌂𐐕𐔜𑣩𑣲𝐂𝐶𝑪𝒞𝓒𝕮𝖢𝗖𝘊𝘾𝙲🝌",
"D": "ᎠᗞᗪᴅⅅⅮꓓꭰD𝐃𝐷𝑫𝒟𝓓𝔇𝔻𝕯𝖣𝗗𝘋𝘿𝙳",
"E": "ΕЕᎬᴇℰ⋿ⴹꓰꭼE𐊆𑢦𑢮𝐄𝐸𝑬𝓔𝔈𝔼𝕰𝖤𝗘𝘌𝙀𝙴𝚬𝛦𝜠𝝚𝞔",
"F": "ϜᖴℱꓝꞘF𐊇𐊥𐔥𑢢𑣂𝈓𝐅𝐹𝑭𝓕𝔉𝔽𝕱𝖥𝗙𝘍𝙁𝙵𝟊",
"G": "ɢԌԍᏀᏳᏻꓖꮐG𝐆𝐺𝑮𝒢𝓖𝔊𝔾𝕲𝖦𝗚𝘎𝙂𝙶",
"H": "ʜΗНнᎻᕼℋℌℍⲎꓧꮋH𐋏𝐇𝐻𝑯𝓗𝕳𝖧𝗛𝘏𝙃𝙷𝚮𝛨𝜢𝝜𝞖",
"I": "",
"J": "ͿЈᎫᒍᴊꓙꞲꭻJ𝐉𝐽𝑱𝒥𝓙𝔍𝕁𝕵𝖩𝗝𝘑𝙅𝙹",
"K": "ΚКᏦᛕKⲔꓗK𐔘𝐊𝐾𝑲𝒦𝓚𝔎𝕂𝕶𝖪𝗞𝘒𝙆𝙺𝚱𝛫𝜥𝝟𝞙",
"L": "ʟᏞᒪℒⅬⳐⳑꓡꮮL𐐛𐑃𐔦𑢣𑢲𖼖𝈪𝐋𝐿𝑳𝓛𝔏𝕃𝕷𝖫𝗟𝘓𝙇𝙻",
"M": "ΜϺМᎷᗰᛖℳⅯⲘꓟM𐊰𐌑𝐌𝑀𝑴𝓜𝔐𝕄𝕸𝖬𝗠𝘔𝙈𝙼𝚳𝛭𝜧𝝡𝞛",
"N": "ɴΝℕⲚꓠN𐔓𝐍𝑁𝑵𝒩𝓝𝔑𝕹𝖭𝗡𝘕𝙉𝙽𝚴𝛮𝜨𝝢𝞜",
"O": "0",
"P": "ΡРᏢᑭᴘᴩℙⲢꓑꮲP𐊕𝐏𝑃𝑷𝒫𝓟𝔓𝕻𝖯𝗣𝘗𝙋𝙿𝚸𝛲𝜬𝝦𝞠",
"Q": "ℚⵕQ𝐐𝑄𝑸𝒬𝓠𝔔𝕼𝖰𝗤𝘘𝙌𝚀",
"R": "ƦʀᎡᏒᖇᚱℛℜℝꓣꭱꮢR𐒴𖼵𝈖𝐑𝑅𝑹𝓡𝕽𝖱𝗥𝘙𝙍𝚁",
"S": "$ЅՏᏕᏚꓢS𐊖𐐠𖼺𝐒𝑆𝑺𝒮𝓢𝔖𝕊𝕾𝖲𝗦𝘚𝙎𝚂",
"T": "ŤΤτТтᎢᴛ⊤⟙ⲦꓔꭲT𐊗𐊱𐌕𑢼𖼊𝐓𝑇𝑻𝒯𝓣𝔗𝕋𝕿𝖳𝗧𝘛𝙏𝚃𝚻𝛕𝛵𝜏𝜯𝝉𝝩𝞃𝞣𝞽🝨",
"U": "Սሀᑌ∪⋃ꓴU𐓎𑢸𖽂𝐔𝑈𝑼𝒰𝓤𝔘𝕌𝖀𝖴𝗨𝘜𝙐𝚄",
"V": "Ѵ٧۷ᏙᐯⅤⴸꓦꛟV𐔝𑢠𖼈𝈍𝐕𝑉𝑽𝒱𝓥𝔙𝕍𝖁𝖵𝗩𝘝𝙑𝚅",
"W": "ԜᎳᏔꓪW𑣦𑣯𝐖𝑊𝑾𝒲𝓦𝔚𝕎𝖂𝖶𝗪𝘞𝙒𝚆",
"X": "ΧХ᙭ᚷⅩ╳ⲬⵝꓫꞳX𐊐𐊴𐌗𐌢𐔧𑣬𝐗𝑋𝑿𝒳𝓧𝔛𝕏𝖃𝖷𝗫𝘟𝙓𝚇𝚾𝛸𝜲𝝬𝞦",
"Y": "ΥϒУҮᎩᎽⲨꓬY𐊲𑢤𖽃𝐘𝑌𝒀𝒴𝓨𝔜𝕐𝖄𝖸𝗬𝘠𝙔𝚈𝚼𝛶𝜰𝝪𝞤",
"Z": "ΖᏃℤℨꓜZ𐋵𑢩𑣥𝐙𝑍𝒁𝒵𝓩𝖅𝖹𝗭𝘡𝙕𝚉𝚭𝛧𝜡𝝛𝞕",
"a": "@ɑαа⍺a𝐚𝑎𝒂𝒶𝓪𝔞𝕒𝖆𝖺𝗮𝘢𝙖𝚊𝛂𝛼𝜶𝝰𝞪",
"b": "ƄЬᏏᖯb𝐛𝑏𝒃𝒷𝓫𝔟𝕓𝖇𝖻𝗯𝘣𝙗𝚋",
"c": "ϲсᴄⅽⲥꮯc𐐽𝐜𝑐𝒄𝒸𝓬𝔠𝕔𝖈𝖼𝗰𝘤𝙘𝚌",
"d": "ԁᏧᑯⅆⅾꓒd𝐝𝑑𝒅𝒹𝓭𝔡𝕕𝖉𝖽𝗱𝘥𝙙𝚍",
"e": "еҽ℮ℯⅇꬲe𝐞𝑒𝒆𝓮𝔢𝕖𝖊𝖾𝗲𝘦𝙚𝚎",
"f": "ſϝքẝꞙꬵf𝐟𝑓𝒇𝒻𝓯𝔣𝕗𝖋𝖿𝗳𝘧𝙛𝚏𝟋",
"g": "ƍɡցᶃℊg𝐠𝑔𝒈𝓰𝔤𝕘𝖌𝗀𝗴𝘨𝙜𝚐",
"h": "һհᏂℎh𝐡𝒉𝒽𝓱𝔥𝕙𝖍𝗁𝗵𝘩𝙝𝚑",
"i": "ıɩɪ˛ͺιіӏᎥιℹⅈⅰ⍳ꙇꭵi𑣃𝐢𝑖𝒊𝒾𝓲𝔦𝕚𝖎𝗂𝗶𝘪𝙞𝚒𝚤𝛊𝜄𝜾𝝸𝞲",
"j": "ϳјⅉj𝐣𝑗𝒋𝒿𝓳𝔧𝕛𝖏𝗃𝗷𝘫𝙟𝚓",
"k": "k𝐤𝑘𝒌𝓀𝓴𝔨𝕜𝖐𝗄𝗸𝘬𝙠𝚔",
"l": "1",
"m": "m",
"n": "ոռn𝐧𝑛𝒏𝓃𝓷𝔫𝕟𝖓𝗇𝗻𝘯𝙣𝚗",
"o": "",
"p": "ρϱр⍴ⲣp𝐩𝑝𝒑𝓅𝓹𝔭𝕡𝖕𝗉𝗽𝘱𝙥𝚙𝛒𝛠𝜌𝜚𝝆𝝔𝞀𝞎𝞺𝟈",
"q": "ԛգզq𝐪𝑞𝒒𝓆𝓺𝔮𝕢𝖖𝗊𝗾𝘲𝙦𝚚",
"r": "гᴦⲅꭇꭈꮁr𝐫𝑟𝒓𝓇𝓻𝔯𝕣𝖗𝗋𝗿𝘳𝙧𝚛",
"s": "$ƽѕꜱꮪs𐑈𑣁𝐬𝑠𝒔𝓈𝓼𝔰𝕤𝖘𝗌𝘀𝘴𝙨𝚜",
"t": "t𝐭𝑡𝒕𝓉𝓽𝔱𝕥𝖙𝗍𝘁𝘵𝙩𝚝",
"u": "ʋυսᴜꞟꭎꭒu𐓶𑣘𝐮𝑢𝒖𝓊𝓾𝔲𝕦𝖚𝗎𝘂𝘶𝙪𝚞𝛖𝜐𝝊𝞄𝞾",
"v": "νѵטᴠⅴ∨⋁ꮩv𑜆𑣀𝐯𝑣𝒗𝓋𝓿𝔳𝕧𝖛𝗏𝘃𝘷𝙫𝚟𝛎𝜈𝝂𝝼𝞶",
"w": "ɯѡԝաᴡꮃw𑜊𑜎𑜏𝐰𝑤𝒘𝓌𝔀𝔴𝕨𝖜𝗐𝘄𝘸𝙬𝚠",
"x": "×хᕁᕽ᙮ⅹ⤫⤬⨯x𝐱𝑥𝒙𝓍𝔁𝔵𝕩𝖝𝗑𝘅𝘹𝙭𝚡",
"y": "ɣʏγуүყᶌỿℽꭚy𑣜𝐲𝑦𝒚𝓎𝔂𝔶𝕪𝖞𝗒𝘆𝘺𝙮𝚢𝛄𝛾𝜸𝝲𝞬",
"z": "ᴢꮓz𑣄𝐳𝑧𝒛𝓏𝔃𝔷𝕫𝖟𝗓𝘇𝘻𝙯𝚣",
"£": "₤",
"©": "Ⓒ",
"®": "Ⓡ"
}

Contoh

WordSearch

Lihat Memuatkan Pustaka Glasswall untuk butiran tentang cara memuatkan pustaka WordSearch.

Redaksi

Fail boleh disunting secara individu daripada laluan fail atau dalam memori menggunakan kaedah redact_file, atau semua fail dalam direktori boleh disunting menggunakan kaedah redact_directory.

Redaksi daripada laluan fail ke laluan fail

import glasswall


# Load the Glasswall WordSearch library
word_search = glasswall.WordSearch(r"C:\gwpw\libraries\10.0")

# Redact occurrences of the text "lorem" and "ipsum" within the input file, writing the redacted file to a new path
word_search.redact_file(
input_file=r"C:\gwpw\input_redact\lorem_ipsum.docx",
output_file=r"C:\gwpw\output\word_search\redact_f2f\lorem_ipsum.docx",
content_management_policy=glasswall.content_management.policies.WordSearch(
config={
"textSearchConfig": {
"@libVersion": "core2",
"textList": [
{"name": "textItem", "switches": [
{"name": "text", "value": "lorem"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
{"name": "textItem", "switches": [
{"name": "text", "value": "ipsum"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
]
}
}
)
)

Redaksi daripada laluan fail ke memori

redact_file mengembalikan objek dengan atribut: "status" (int), "output_file" (bytes), "output_report" (bytes). Contoh di bawah menunjukkan penetapan pemboleh ubah result dan pemeriksaan kandungan pada bahagian awal output_file yang telah disunting serta output_report.

import glasswall


# Load the Glasswall WordSearch library
word_search = glasswall.WordSearch(r"C:\gwpw\libraries\10.0")

# Redact occurrences of the text "lorem" and "ipsum" within the input file, writing the redacted file to a new path
result = word_search.redact_file(
input_file=r"C:\gwpw\input_redact\lorem_ipsum.docx",
output_file=None,
content_management_policy=glasswall.content_management.policies.WordSearch(
config={
"textSearchConfig": {
"@libVersion": "core2",
"textList": [
{"name": "textItem", "switches": [
{"name": "text", "value": "lorem"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
{"name": "textItem", "switches": [
{"name": "text", "value": "ipsum"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
]
}
}
)
)

assert result.output_file[:6] == b'PK\x03\x04\x14\x00'
assert result.output_report[:500] == b'<gw:WordSearchStatistics xmlns:gw="http://glasswall.com/namespace">\n\t<gw:DocumentSummary>\n\t\t<gw:TotalSizeInBytes>14292</gw:TotalSizeInBytes>\n\t\t<gw:FileType>docx</gw:FileType>\n\t\t<gw:TotalItemMatchCount>14</gw:TotalItemMatchCount>\n\t</gw:DocumentSummary>\n\t<gw:WordItem>\n\t\t<gw:Name>ipsum</gw:Name>\n\t\t<gw:ItemMatchCount>8</gw:ItemMatchCount>\n\t\t<gw:Locations>\n\t\t\t<gw:Location>\n\t\t\t\t<gw:Offset>120</gw:Offset>\n\t\t\t\t<gw:Page>0</gw:Page>\n\t\t\t\t<gw:Paragraph>0</gw:Paragraph>\n\t\t\t</gw:Location>\n\t\t\t<gw:Location>\n\t\t\t'

Redaksi daripada memori

import glasswall


# Load the Glasswall WordSearch library
word_search = glasswall.WordSearch(r"C:\gwpw\libraries\10.0")

# Read file from disk to memory
with open(r"C:\gwpw\input_redact\lorem_ipsum.docx", "rb") as f:
input_bytes = f.read()

# Redact occurrences of the text "lorem" and "ipsum" within the input file, writing the redacted file to a new path
result = word_search.redact_file(
input_file=input_bytes,
output_file=r"C:\gwpw\output\word_search\redact_m2f\lorem_ipsum.docx",
content_management_policy=glasswall.content_management.policies.WordSearch(
config={
"textSearchConfig": {
"@libVersion": "core2",
"textList": [
{"name": "textItem", "switches": [
{"name": "text", "value": "lorem"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
{"name": "textItem", "switches": [
{"name": "text", "value": "ipsum"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
]
}
}
)
)

assert result.output_file[:6] == b'PK\x03\x04\x14\x00'
assert result.output_report[:500] == b'<gw:WordSearchStatistics xmlns:gw="http://glasswall.com/namespace">\n\t<gw:DocumentSummary>\n\t\t<gw:TotalSizeInBytes>14292</gw:TotalSizeInBytes>\n\t\t<gw:FileType>docx</gw:FileType>\n\t\t<gw:TotalItemMatchCount>14</gw:TotalItemMatchCount>\n\t</gw:DocumentSummary>\n\t<gw:WordItem>\n\t\t<gw:Name>ipsum</gw:Name>\n\t\t<gw:ItemMatchCount>8</gw:ItemMatchCount>\n\t\t<gw:Locations>\n\t\t\t<gw:Location>\n\t\t\t\t<gw:Offset>120</gw:Offset>\n\t\t\t\t<gw:Page>0</gw:Page>\n\t\t\t\t<gw:Paragraph>0</gw:Paragraph>\n\t\t\t</gw:Location>\n\t\t\t<gw:Location>\n\t\t\t'

Redaksi fail dalam direktori

redact_directory mengembalikan kamus laluan fail relatif kepada input_directory, dan objek dengan atribut: "status" (int), "output_file" (bytes), "output_report" (bytes). Contoh di bawah menunjukkan penetapan pemboleh ubah results dan pemeriksaan kunci serta nilai kamus results.

import glasswall


# Load the Glasswall WordSearch library
word_search = glasswall.WordSearch(r"C:\gwpw\libraries\10.0")

# Redact occurrences of the text "lorem" and "ipsum" within each file in the input_directory, writing the redacted file
# to a new path in the output_directory
results = word_search.redact_directory(
input_directory=r"C:\gwpw\input_redact",
output_directory=r"C:\gwpw\output\word_search\redact_directory",
content_management_policy=glasswall.content_management.policies.WordSearch(
config={
"textSearchConfig": {
"@libVersion": "core2",
"textList": [
{"name": "textItem", "switches": [
{"name": "text", "value": "lorem"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
{"name": "textItem", "switches": [
{"name": "text", "value": "ipsum"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
]
}
}
)
)

assert list(results.keys()) == ['lorem_ipsum.docx', 'lorem_ipsum.pptx']
assert all(result.status == 1 for result in results.values())

Redaksi fail dalam direktori yang mungkin mengandungi jenis fail yang tidak disokong

Tingkah laku lalai bagi pembungkus Glasswall Python adalah untuk membangkitkan pengecualian yang berkaitan (lihat: glasswall.libraries.word_search.errors) jika pemprosesan gagal. Menetapkan raise_unsupported=False akan menghalang pengecualian daripada dibangkitkan dan boleh berguna apabila bekerja dengan direktori yang mengandungi campuran jenis fail yang disokong dan tidak disokong, apabila adalah wajar untuk memproses sebanyak mungkin fail dan bukannya menamatkan proses pada kegagalan pertama.

Direktori input contoh di bawah mengandungi dua fail yang sama seperti dalam contoh di atas serta satu fail dengan format fail yang tidak disokong: python-package.yml. Kita boleh memeriksa pasangan nilai kunci dalam kamus results dan melihat bahawa objek yang dikembalikan untuk fail python-package.yml mengembalikan status: 0, iaitu kegagalan. Atribut output_file ialah bytes kosong, dan bytes output_report diisi dengan laporan yang merangkumi IssueItem yang menerangkan masalah yang ditemui semasa cuba melakukan redaksi pada fail: File contents could not be accessed.

import glasswall


# Load the Glasswall WordSearch library
word_search = glasswall.WordSearch(r"C:\gwpw\libraries\10.0")

# Redact occurrences of the text "lorem" and "ipsum" within each file in the input_directory, writing the redacted file
# to a new path in the output_directory
results = word_search.redact_directory(
input_directory=r"C:\gwpw\input_redact_with_unsupported_file_types",
output_directory=r"C:\gwpw\output\word_search\redact_directory_unsupported",
content_management_policy=glasswall.content_management.policies.WordSearch(
config={
"textSearchConfig": {
"@libVersion": "core2",
"textList": [
{"name": "textItem", "switches": [
{"name": "text", "value": "lorem"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
{"name": "textItem", "switches": [
{"name": "text", "value": "ipsum"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
]
}
}
),
raise_unsupported=False
)

assert list(results.keys()) == ["lorem_ipsum.docx", "lorem_ipsum.pptx", "python-package.yml"]
assert [result.status for result in results.values()] == [1, 1, 0]

print(results["python-package.yml"].__dict__)
# {'status': 0,
# 'output_file': b'',
# 'output_report': b'<gw:WordSearchStatistics xmlns:gw="http://glasswall.com/namespace">\n\t<gw:IssueItem>\n\t\t<gw:Description>File contents could not be accessed</gw:Description>\n\t</gw:IssueItem>\n\t<gw:DocumentSummary>\n\t\t<gw:TotalSizeInBytes>1460</gw:TotalSizeInBytes>\n\t\t<gw:FileType>Unknown</gw:FileType>\n\t\t<gw:TotalItemMatchCount>0</gw:TotalItemMatchCount>\n\t</gw:DocumentSummary>\n\t<gw:WordItem>\n\t\t<gw:Name>ipsum</gw:Name>\n\t\t<gw:ItemMatchCount>0</gw:ItemMatchCount>\n\t\t<gw:Locations/>\n\t</gw:WordItem>\n\t<gw:WordItem>\n\t\t<gw:Name>lorem</gw:Name>\n\t\t<gw:ItemMatchCount>0</gw:ItemMatchCount>\n\t\t<gw:Locations/>\n\t</gw:WordItem>\n</gw:WordSearchStatistics>\n\n'}

Redaksi fail dalam direktori secara bersyarat berdasarkan format fail

Contoh di bawah menunjukkan redaksi hanya fail docx dan pptx daripada direktori yang juga mengandungi jenis fail lain yang tidak disokong.

import os

import glasswall


# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")

# Load the Glasswall WordSearch library
word_search = glasswall.WordSearch(r"C:\gwpw\libraries\10.0")

input_directory = r"C:\gwpw\input_redact_with_unsupported_file_types"
output_directory = r"C:\gwpw\output\word_search\redact_directory_file_format"

# Iterate relative file paths from input_directory
for relative_file in glasswall.utils.list_file_paths(input_directory, absolute=False):
# Construct absolute paths
input_file = os.path.join(input_directory, relative_file)
output_file = os.path.join(output_directory, relative_file)

# Get the file type of the file
file_type = editor.determine_file_type(
input_file=input_file,
as_string=True,
raise_unsupported=False
)

# Protect only doc and docx files
if file_type in ["docx", "pptx"]:
# Redact occurrences of the text "lorem" and "ipsum" within the input file, writing the redacted file to a new path
word_search.redact_file(
input_file=input_file,
output_file=output_file,
content_management_policy=glasswall.content_management.policies.WordSearch(
config={
"textSearchConfig": {
"@libVersion": "core2",
"textList": [
{"name": "textItem", "switches": [
{"name": "text", "value": "lorem"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
{"name": "textItem", "switches": [
{"name": "text", "value": "ipsum"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
]
}
}
)
)