Paghahanap ng Salita at Redaction
Maaaring gamitin ang Glasswall Word Search upang mag-redact ng teksto mula sa mga file at bumubuo ito ng XML report tungkol sa mga detalye ng na-redact na file. Tingnan ang Word Search & Redaction.
Kasama sa report na ito ang mga detalye tungkol sa laki ng file, ang natukoy na uri ng file, ang kabuuang bilang ng mga tugma sa teksto, at ang lokasyon ng bawat isa sa mga tugma sa teksto.
Halimbawang report
<gw:WordSearchStatistics xmlns:gw="http://glasswall.com/namespace">
<gw:DocumentSummary>
<gw:TotalSizeInBytes>13084</gw:TotalSizeInBytes>
<gw:FileType>docx</gw:FileType>
<gw:TotalItemMatchCount>8</gw:TotalItemMatchCount>
</gw:DocumentSummary>
<gw:WordItem>
<gw:Name>ipsum</gw:Name>
<gw:ItemMatchCount>5</gw:ItemMatchCount>
<gw:Locations>
<gw:Location>
<gw:Offset>120</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
<gw:Location>
<gw:Offset>267</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
<gw:Location>
<gw:Offset>691</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
<gw:Location>
<gw:Offset>973</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
<gw:Location>
<gw:Offset>1034</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
</gw:Locations>
</gw:WordItem>
<gw:WordItem>
<gw:Name>lorem</gw:Name>
<gw:ItemMatchCount>3</gw:ItemMatchCount>
<gw:Locations>
<gw:Location>
<gw:Offset>114</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
<gw:Location>
<gw:Offset>244</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
<gw:Location>
<gw:Offset>1224</gw:Offset>
<gw:Page>0</gw:Page>
<gw:Paragraph>0</gw:Paragraph>
</gw:Location>
</gw:Locations>
</gw:WordItem>
</gw:WordSearchStatistics>
Maaaring tukuyin ang isang homoglyphs JSON file bilang file path o nasa memory bilang bytes, bytearray, o io.BytesIO. Kung hindi ito tinukoy, gagamitin ang default:
Default na homoglyphs.json file
{
"!": "ǃⵑ",
"$": "$",
"%": "%",
"&": "ꝸ&",
"'": "`´ʹʻʼʽʾˈˊˋ˴ʹ΄՚՝י׳ߴߵᑊᛌ᾽᾿`´῾‘’‛′‵ꞌ'`𖽑𖽒",
"(": "❨❲〔﴾([",
")": "❩❳〕﴿)]",
"*": "٭⁎∗*𐌟",
"+": "᛭+𐊛",
",": "¸؍٫‚ꓹ,",
"-": "˗۔‐‑‒–⁃−➖Ⲻ﹘",
".": "٠۰܁܂․ꓸ꘎.𐩐𝅭",
"/": "᜵⁁⁄∕╱⟋⧸Ⳇ⼃〳ノ㇓丿/𝈺",
"0": "OoΟοσОоՕօסه٥ھہە۵߀०০੦૦ଠ୦௦ం౦ಂ೦ംഠ൦ං๐໐ဝ၀ჿዐᴏᴑℴⲞⲟⵔ〇ꓳꬽﮦﮧﮨﮩﮪﮫﮬﮭﻩﻪﻫﻬ0Oo𐊒𐊫𐐄𐐬𐓂𐓪𐔖𑓐𑢵𑣈𑣗𑣠𝐎𝐨𝑂𝑜𝑶𝒐𝒪𝓞𝓸𝔒𝔬𝕆𝕠𝕺𝖔𝖮𝗈𝗢𝗼𝘖𝘰𝙊𝙤𝙾𝚘𝚶𝛐𝛔𝛰𝜊𝜎𝜪𝝄𝝈𝝤𝝾𝞂𝞞𝞸𝞼𝟎𝟘𝟢𝟬𝟶𞸤𞹤𞺄",
"1": "Il|ƖǀΙІӀ׀וןا١۱ߊᛁℐℑℓⅠⅼ∣⏽Ⲓⵏꓲﺍﺎ1Il│𐊊𐌉𐌠𖼨𝐈𝐥𝐼𝑙𝑰𝒍𝓁𝓘𝓵𝔩𝕀𝕝𝕴𝖑𝖨𝗅𝗜𝗹𝘐𝘭𝙄𝙡𝙸𝚕𝚰𝛪𝜤𝝞𝞘𝟏𝟙𝟣𝟭𝟷𞣇𞸀𞺀",
"2": "ƧϨᒿꙄꛯꝚ2𝟐𝟚𝟤𝟮𝟸",
"3": "ƷȜЗӠⳌꝪꞫ3𑣊𖼻𝈆𝟑𝟛𝟥𝟯𝟹",
"4": "Ꮞ4𑢯𝟒𝟜𝟦𝟰𝟺",
"5": "Ƽ5𑢻𝟓𝟝𝟧𝟱𝟻",
"6": "бᏮⳒ6𑣕𝟔𝟞𝟨𝟲𝟼",
"7": "7𐓒𑣆𝈒𝟕𝟟𝟩𝟳𝟽",
"8": "Ȣȣ৪੪ଃ8𐌚𝟖𝟠𝟪𝟴𝟾𞣋",
"9": "৭੧୨൭ⳊꝮ9𑢬𑣌𑣖𝟗𝟡𝟫𝟵𝟿",
"A": "4ΑАᎪᗅᴀꓮꭺA𐊠𖽀𝐀𝐴𝑨𝒜𝓐𝔄𝔸𝕬𝖠𝗔𝘈𝘼𝙰𝚨𝛢𝜜𝝖𝞐",
"B": "ʙΒВвᏴᏼᗷᛒℬꓐꞴB𐊂𐊡𐌁𝐁𝐵𝑩𝓑𝔅𝔹𝕭𝖡𝗕𝘉𝘽𝙱𝚩𝛣𝜝𝝗𝞑",
"C": "ϹСᏟℂℭⅭⲤꓚC𐊢𐌂𐐕𐔜𑣩𑣲𝐂𝐶𝑪𝒞𝓒𝕮𝖢𝗖𝘊𝘾𝙲🝌",
"D": "ᎠᗞᗪᴅⅅⅮꓓꭰD𝐃𝐷𝑫𝒟𝓓𝔇𝔻𝕯𝖣𝗗𝘋𝘿𝙳",
"E": "ΕЕᎬᴇℰ⋿ⴹꓰꭼE𐊆𑢦𑢮𝐄𝐸𝑬𝓔𝔈𝔼𝕰𝖤𝗘𝘌𝙀𝙴𝚬𝛦𝜠𝝚𝞔",
"F": "ϜᖴℱꓝꞘF𐊇𐊥𐔥𑢢𑣂𝈓𝐅𝐹𝑭𝓕𝔉𝔽𝕱𝖥𝗙𝘍𝙁𝙵𝟊",
"G": "ɢԌԍᏀᏳᏻꓖꮐG𝐆𝐺𝑮𝒢𝓖𝔊𝔾𝕲𝖦𝗚𝘎𝙂𝙶",
"H": "ʜΗНнᎻᕼℋℌℍⲎꓧꮋH𐋏𝐇𝐻𝑯𝓗𝕳𝖧𝗛𝘏𝙃𝙷𝚮𝛨𝜢𝝜𝞖",
"I": "",
"J": "ͿЈᎫᒍᴊꓙꞲꭻJ𝐉𝐽𝑱𝒥𝓙𝔍𝕁𝕵𝖩𝗝𝘑𝙅𝙹",
"K": "ΚКᏦᛕKⲔꓗK𐔘𝐊𝐾𝑲𝒦𝓚𝔎𝕂𝕶𝖪𝗞𝘒𝙆𝙺𝚱𝛫𝜥𝝟𝞙",
"L": "ʟᏞᒪℒⅬⳐⳑꓡꮮL𐐛𐑃𐔦𑢣𑢲𖼖𝈪𝐋𝐿𝑳𝓛𝔏𝕃𝕷𝖫𝗟𝘓𝙇𝙻",
"M": "ΜϺМᎷᗰᛖℳⅯⲘꓟM𐊰𐌑𝐌𝑀𝑴𝓜𝔐𝕄𝕸𝖬𝗠𝘔𝙈𝙼𝚳𝛭𝜧𝝡𝞛",
"N": "ɴΝℕⲚꓠN𐔓𝐍𝑁𝑵𝒩𝓝𝔑𝕹𝖭𝗡𝘕𝙉𝙽𝚴𝛮𝜨𝝢𝞜",
"O": "0",
"P": "ΡРᏢᑭᴘᴩℙⲢꓑꮲP𐊕𝐏𝑃𝑷𝒫𝓟𝔓𝕻𝖯𝗣𝘗𝙋𝙿𝚸𝛲𝜬𝝦𝞠",
"Q": "ℚⵕQ𝐐𝑄𝑸𝒬𝓠𝔔𝕼𝖰𝗤𝘘𝙌𝚀",
"R": "ƦʀᎡᏒᖇᚱℛℜℝꓣꭱꮢR𐒴𖼵𝈖𝐑𝑅𝑹𝓡𝕽𝖱𝗥𝘙𝙍𝚁",
"S": "$ЅՏᏕᏚꓢS𐊖𐐠𖼺𝐒𝑆𝑺𝒮𝓢𝔖𝕊𝕾𝖲𝗦𝘚𝙎𝚂",
"T": "ŤΤτТтᎢᴛ⊤⟙ⲦꓔꭲT𐊗𐊱𐌕𑢼𖼊𝐓𝑇𝑻𝒯𝓣𝔗𝕋𝕿𝖳𝗧𝘛𝙏𝚃𝚻𝛕𝛵𝜏𝜯𝝉𝝩𝞃𝞣𝞽🝨",
"U": "Սሀᑌ∪⋃ꓴU𐓎𑢸𖽂𝐔𝑈𝑼𝒰𝓤𝔘𝕌𝖀𝖴𝗨𝘜𝙐𝚄",
"V": "Ѵ٧۷ᏙᐯⅤⴸꓦꛟV𐔝𑢠𖼈𝈍𝐕𝑉𝑽𝒱𝓥𝔙𝕍𝖁𝖵𝗩𝘝𝙑𝚅",
"W": "ԜᎳᏔꓪW𑣦𑣯𝐖𝑊𝑾𝒲𝓦𝔚𝕎𝖂𝖶𝗪𝘞𝙒𝚆",
"X": "ΧХ᙭ᚷⅩ╳ⲬⵝꓫꞳX𐊐𐊴𐌗𐌢𐔧𑣬𝐗𝑋𝑿𝒳𝓧𝔛𝕏𝖃𝖷𝗫𝘟𝙓𝚇𝚾𝛸𝜲𝝬𝞦",
"Y": "ΥϒУҮᎩᎽⲨꓬY𐊲𑢤𖽃𝐘𝑌𝒀𝒴𝓨𝔜𝕐𝖄𝖸𝗬𝘠𝙔𝚈𝚼𝛶𝜰𝝪𝞤",
"Z": "ΖᏃℤℨꓜZ𐋵𑢩𑣥𝐙𝑍𝒁𝒵𝓩𝖅𝖹𝗭𝘡𝙕𝚉𝚭𝛧𝜡𝝛𝞕",
"a": "@ɑαа⍺a𝐚𝑎𝒂𝒶𝓪𝔞𝕒𝖆𝖺𝗮𝘢𝙖𝚊𝛂𝛼𝜶𝝰𝞪",
"b": "ƄЬᏏᖯb𝐛𝑏𝒃𝒷𝓫𝔟𝕓𝖇𝖻𝗯𝘣𝙗𝚋",
"c": "ϲсᴄⅽⲥꮯc𐐽𝐜𝑐𝒄𝒸𝓬𝔠𝕔𝖈𝖼𝗰𝘤𝙘𝚌",
"d": "ԁᏧᑯⅆⅾꓒd𝐝𝑑𝒅𝒹𝓭𝔡𝕕𝖉𝖽𝗱𝘥𝙙𝚍",
"e": "еҽ℮ℯⅇꬲe𝐞𝑒𝒆𝓮𝔢𝕖𝖊𝖾𝗲𝘦𝙚𝚎",
"f": "ſϝքẝꞙꬵf𝐟𝑓𝒇𝒻𝓯𝔣𝕗𝖋𝖿𝗳𝘧𝙛𝚏𝟋",
"g": "ƍɡցᶃℊg𝐠𝑔𝒈𝓰𝔤𝕘𝖌𝗀𝗴𝘨𝙜𝚐",
"h": "һհᏂℎh𝐡𝒉𝒽𝓱𝔥𝕙𝖍𝗁𝗵𝘩𝙝𝚑",
"i": "ıɩɪ˛ͺιіӏᎥιℹⅈⅰ⍳ꙇꭵi𑣃𝐢𝑖𝒊𝒾𝓲𝔦𝕚𝖎𝗂𝗶𝘪𝙞𝚒𝚤𝛊𝜄𝜾𝝸𝞲",
"j": "ϳјⅉj𝐣𝑗𝒋𝒿𝓳𝔧𝕛𝖏𝗃𝗷𝘫𝙟𝚓",
"k": "k𝐤𝑘𝒌𝓀𝓴𝔨𝕜𝖐𝗄𝗸𝘬𝙠𝚔",
"l": "1",
"m": "m",
"n": "ոռn𝐧𝑛𝒏𝓃𝓷𝔫𝕟𝖓𝗇𝗻𝘯𝙣𝚗",
"o": "",
"p": "ρϱр⍴ⲣp𝐩𝑝𝒑𝓅𝓹𝔭𝕡𝖕𝗉𝗽𝘱𝙥𝚙𝛒𝛠𝜌𝜚𝝆𝝔𝞀𝞎𝞺𝟈",
"q": "ԛգզq𝐪𝑞𝒒𝓆𝓺𝔮𝕢𝖖𝗊𝗾𝘲𝙦𝚚",
"r": "гᴦⲅꭇꭈꮁr𝐫𝑟𝒓𝓇𝓻𝔯𝕣𝖗𝗋𝗿𝘳𝙧𝚛",
"s": "$ƽѕꜱꮪs𐑈𑣁𝐬𝑠𝒔𝓈𝓼𝔰𝕤𝖘𝗌𝘀𝘴𝙨𝚜",
"t": "t𝐭𝑡𝒕𝓉𝓽𝔱𝕥𝖙𝗍𝘁𝘵𝙩𝚝",
"u": "ʋυսᴜꞟꭎꭒu𐓶𑣘𝐮𝑢𝒖𝓊𝓾𝔲𝕦𝖚𝗎𝘂𝘶𝙪𝚞𝛖𝜐𝝊𝞄𝞾",
"v": "νѵטᴠⅴ∨⋁ꮩv𑜆𑣀𝐯𝑣𝒗𝓋𝓿𝔳𝕧𝖛𝗏𝘃𝘷𝙫𝚟𝛎𝜈𝝂𝝼𝞶",
"w": "ɯѡԝաᴡꮃw𑜊𑜎𑜏𝐰𝑤𝒘𝓌𝔀𝔴𝕨𝖜𝗐𝘄𝘸𝙬𝚠",
"x": "×хᕁᕽ᙮ⅹ⤫⤬⨯x𝐱𝑥𝒙𝓍𝔁𝔵𝕩𝖝𝗑𝘅𝘹𝙭𝚡",
"y": "ɣʏγуүყᶌỿℽꭚy𑣜𝐲𝑦𝒚𝓎𝔂𝔶𝕪𝖞𝗒𝘆𝘺𝙮𝚢𝛄𝛾𝜸𝝲𝞬",
"z": "ᴢꮓz𑣄𝐳𝑧𝒛𝓏𝔃𝔷𝕫𝖟𝗓𝘇𝘻𝙯𝚣",
"£": "₤",
"©": "Ⓒ",
"®": "Ⓡ"
}
Mga Halimbawa
- WordSearch
- Redact
- I-redact mula sa file path papunta sa file path
- I-redact mula sa file path papunta sa memory
- I-redact mula sa memory
- I-redact ang mga file sa isang directory
- I-redact ang mga file sa isang directory na maaaring naglalaman ng mga hindi suportadong uri ng file
- I-redact ang mga file sa isang directory nang may kondisyon batay sa format ng file
- Redact
WordSearch
Tingnan ang Loading a Glasswall Library para sa mga detalye kung paano i-load ang WordSearch library.
Redact
Maaaring i-redact ang mga file nang paisa-isa mula sa isang file path o sa memory gamit ang redact_file method, o maaaring i-redact ang lahat ng file sa isang directory gamit ang redact_directory method.
I-redact mula sa file path papunta sa file path
import glasswall
# Load the Glasswall WordSearch library
word_search = glasswall.WordSearch(r"C:\gwpw\libraries\10.0")
# Redact occurrences of the text "lorem" and "ipsum" within the input file, writing the redacted file to a new path
word_search.redact_file(
input_file=r"C:\gwpw\input_redact\lorem_ipsum.docx",
output_file=r"C:\gwpw\output\word_search\redact_f2f\lorem_ipsum.docx",
content_management_policy=glasswall.content_management.policies.WordSearch(
config={
"textSearchConfig": {
"@libVersion": "core2",
"textList": [
{"name": "textItem", "switches": [
{"name": "text", "value": "lorem"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
{"name": "textItem", "switches": [
{"name": "text", "value": "ipsum"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
]
}
}
)
)
Mag-redact mula sa file path papunta sa memory
Ang redact_file ay nagbabalik ng object na may mga attribute na: "status" (int), "output_file" (bytes), "output_report" (bytes). Ipinapakita ng halimbawa sa ibaba ang pag-assign sa variable na result at ang pag-check sa nilalaman ng simula ng na-redact na output_file at ng output_report.
import glasswall
# Load the Glasswall WordSearch library
word_search = glasswall.WordSearch(r"C:\gwpw\libraries\10.0")
# Redact occurrences of the text "lorem" and "ipsum" within the input file, writing the redacted file to a new path
result = word_search.redact_file(
input_file=r"C:\gwpw\input_redact\lorem_ipsum.docx",
output_file=None,
content_management_policy=glasswall.content_management.policies.WordSearch(
config={
"textSearchConfig": {
"@libVersion": "core2",
"textList": [
{"name": "textItem", "switches": [
{"name": "text", "value": "lorem"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
{"name": "textItem", "switches": [
{"name": "text", "value": "ipsum"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
]
}
}
)
)
assert result.output_file[:6] == b'PK\x03\x04\x14\x00'
assert result.output_report[:500] == b'<gw:WordSearchStatistics xmlns:gw="http://glasswall.com/namespace">\n\t<gw:DocumentSummary>\n\t\t<gw:TotalSizeInBytes>14292</gw:TotalSizeInBytes>\n\t\t<gw:FileType>docx</gw:FileType>\n\t\t<gw:TotalItemMatchCount>14</gw:TotalItemMatchCount>\n\t</gw:DocumentSummary>\n\t<gw:WordItem>\n\t\t<gw:Name>ipsum</gw:Name>\n\t\t<gw:ItemMatchCount>8</gw:ItemMatchCount>\n\t\t<gw:Locations>\n\t\t\t<gw:Location>\n\t\t\t\t<gw:Offset>120</gw:Offset>\n\t\t\t\t<gw:Page>0</gw:Page>\n\t\t\t\t<gw:Paragraph>0</gw:Paragraph>\n\t\t\t</gw:Location>\n\t\t\t<gw:Location>\n\t\t\t'
Mag-redact mula sa memory
import glasswall
# Load the Glasswall WordSearch library
word_search = glasswall.WordSearch(r"C:\gwpw\libraries\10.0")
# Read file from disk to memory
with open(r"C:\gwpw\input_redact\lorem_ipsum.docx", "rb") as f:
input_bytes = f.read()
# Redact occurrences of the text "lorem" and "ipsum" within the input file, writing the redacted file to a new path
result = word_search.redact_file(
input_file=input_bytes,
output_file=r"C:\gwpw\output\word_search\redact_m2f\lorem_ipsum.docx",
content_management_policy=glasswall.content_management.policies.WordSearch(
config={
"textSearchConfig": {
"@libVersion": "core2",
"textList": [
{"name": "textItem", "switches": [
{"name": "text", "value": "lorem"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
{"name": "textItem", "switches": [
{"name": "text", "value": "ipsum"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
]
}
}
)
)
assert result.output_file[:6] == b'PK\x03\x04\x14\x00'
assert result.output_report[:500] == b'<gw:WordSearchStatistics xmlns:gw="http://glasswall.com/namespace">\n\t<gw:DocumentSummary>\n\t\t<gw:TotalSizeInBytes>14292</gw:TotalSizeInBytes>\n\t\t<gw:FileType>docx</gw:FileType>\n\t\t<gw:TotalItemMatchCount>14</gw:TotalItemMatchCount>\n\t</gw:DocumentSummary>\n\t<gw:WordItem>\n\t\t<gw:Name>ipsum</gw:Name>\n\t\t<gw:ItemMatchCount>8</gw:ItemMatchCount>\n\t\t<gw:Locations>\n\t\t\t<gw:Location>\n\t\t\t\t<gw:Offset>120</gw:Offset>\n\t\t\t\t<gw:Page>0</gw:Page>\n\t\t\t\t<gw:Paragraph>0</gw:Paragraph>\n\t\t\t</gw:Location>\n\t\t\t<gw:Location>\n\t\t\t'
I-redact ang mga file sa isang directory
Ang redact_directory ay nagbabalik ng dictionary ng mga file path na relative sa input_directory, at isang object na may mga attribute na: "status" (int), "output_file" (bytes), "output_report" (bytes). Ipinapakita ng halimbawa sa ibaba ang pag-assign sa variable na results at ang pag-check sa mga key at value ng results dictionary.
import glasswall
# Load the Glasswall WordSearch library
word_search = glasswall.WordSearch(r"C:\gwpw\libraries\10.0")
# Redact occurrences of the text "lorem" and "ipsum" within each file in the input_directory, writing the redacted file
# to a new path in the output_directory
results = word_search.redact_directory(
input_directory=r"C:\gwpw\input_redact",
output_directory=r"C:\gwpw\output\word_search\redact_directory",
content_management_policy=glasswall.content_management.policies.WordSearch(
config={
"textSearchConfig": {
"@libVersion": "core2",
"textList": [
{"name": "textItem", "switches": [
{"name": "text", "value": "lorem"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
{"name": "textItem", "switches": [
{"name": "text", "value": "ipsum"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
]
}
}
)
)
assert list(results.keys()) == ['lorem_ipsum.docx', 'lorem_ipsum.pptx']
assert all(result.status == 1 for result in results.values())
I-redact ang mga file sa isang directory na maaaring naglalaman ng mga hindi suportadong uri ng file
Ang default na pag-uugali ng Glasswall Python wrapper ay mag-raise ng kaukulang exception (tingnan ang: glasswall.libraries.word_search.errors) kung mabigo ang pagproseso. Ang pagpasa ng raise_unsupported=False ay pipigil sa pag-raise ng exception at maaaring maging kapaki-pakinabang kapag nagtatrabaho sa isang directory na naglalaman ng halo ng parehong supported at unsupported na mga uri ng file, kapag nais na maproseso ang pinakamarami sa mga file hangga't maaari sa halip na tumigil sa unang pagkabigo.
Ang halimbawa ng input directory sa ibaba ay naglalaman ng parehong dalawang file sa halimbawa sa itaas pati na rin ng isang file na may hindi suportadong format ng file: python-package.yml. Maaari nating siyasatin ang mga key-value pair sa results dictionary at makita na ang object na ibinalik para sa python-package.yml file ay nagbalik ng status: 0, isang pagkabigo. Ang output_file attribute ay walang lamang bytes, at ang output_report bytes ay napunan ng isang ulat na may kasamang IssueItem na naglalarawan sa mga problemang naranasan habang sinusubukang i-redact ang file: File contents could not be accessed.
import glasswall
# Load the Glasswall WordSearch library
word_search = glasswall.WordSearch(r"C:\gwpw\libraries\10.0")
# Redact occurrences of the text "lorem" and "ipsum" within each file in the input_directory, writing the redacted file
# to a new path in the output_directory
results = word_search.redact_directory(
input_directory=r"C:\gwpw\input_redact_with_unsupported_file_types",
output_directory=r"C:\gwpw\output\word_search\redact_directory_unsupported",
content_management_policy=glasswall.content_management.policies.WordSearch(
config={
"textSearchConfig": {
"@libVersion": "core2",
"textList": [
{"name": "textItem", "switches": [
{"name": "text", "value": "lorem"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
{"name": "textItem", "switches": [
{"name": "text", "value": "ipsum"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
]
}
}
),
raise_unsupported=False
)
assert list(results.keys()) == ["lorem_ipsum.docx", "lorem_ipsum.pptx", "python-package.yml"]
assert [result.status for result in results.values()] == [1, 1, 0]
print(results["python-package.yml"].__dict__)
# {'status': 0,
# 'output_file': b'',
# 'output_report': b'<gw:WordSearchStatistics xmlns:gw="http://glasswall.com/namespace">\n\t<gw:IssueItem>\n\t\t<gw:Description>File contents could not be accessed</gw:Description>\n\t</gw:IssueItem>\n\t<gw:DocumentSummary>\n\t\t<gw:TotalSizeInBytes>1460</gw:TotalSizeInBytes>\n\t\t<gw:FileType>Unknown</gw:FileType>\n\t\t<gw:TotalItemMatchCount>0</gw:TotalItemMatchCount>\n\t</gw:DocumentSummary>\n\t<gw:WordItem>\n\t\t<gw:Name>ipsum</gw:Name>\n\t\t<gw:ItemMatchCount>0</gw:ItemMatchCount>\n\t\t<gw:Locations/>\n\t</gw:WordItem>\n\t<gw:WordItem>\n\t\t<gw:Name>lorem</gw:Name>\n\t\t<gw:ItemMatchCount>0</gw:ItemMatchCount>\n\t\t<gw:Locations/>\n\t</gw:WordItem>\n</gw:WordSearchStatistics>\n\n'}
I-redact ang mga file sa isang directory batay sa kondisyon ng format ng file
Ipinapakita ng halimbawa sa ibaba ang pagre-redact ng mga docx at pptx file lamang mula sa isang directory na naglalaman din ng iba pang hindi suportadong mga uri ng file.
import os
import glasswall
# Load the Glasswall Editor library
editor = glasswall.Editor(r"C:\gwpw\libraries\10.0")
# Load the Glasswall WordSearch library
word_search = glasswall.WordSearch(r"C:\gwpw\libraries\10.0")
input_directory = r"C:\gwpw\input_redact_with_unsupported_file_types"
output_directory = r"C:\gwpw\output\word_search\redact_directory_file_format"
# Iterate relative file paths from input_directory
for relative_file in glasswall.utils.list_file_paths(input_directory, absolute=False):
# Construct absolute paths
input_file = os.path.join(input_directory, relative_file)
output_file = os.path.join(output_directory, relative_file)
# Get the file type of the file
file_type = editor.determine_file_type(
input_file=input_file,
as_string=True,
raise_unsupported=False
)
# Protect only doc and docx files
if file_type in ["docx", "pptx"]:
# Redact occurrences of the text "lorem" and "ipsum" within the input file, writing the redacted file to a new path
word_search.redact_file(
input_file=input_file,
output_file=output_file,
content_management_policy=glasswall.content_management.policies.WordSearch(
config={
"textSearchConfig": {
"@libVersion": "core2",
"textList": [
{"name": "textItem", "switches": [
{"name": "text", "value": "lorem"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
{"name": "textItem", "switches": [
{"name": "text", "value": "ipsum"},
{"name": "textSetting", "@replacementChar": "*", "value": "redact"},
]},
]
}
}
)
)