|
import argparse |
|
import re |
|
|
|
def extract_ascii_strings(data, min_length=4): |
|
ascii_strings = re.findall(rb'[\x20-\x7E]{%d,}' % min_length, data) |
|
return [s.decode('ascii', errors='ignore') for s in ascii_strings] |
|
|
|
def extract_unicode_strings(data, min_length=4): |
|
unicode_strings = re.findall((rb'(?:[\x20-\x7E]\x00){%d,}' % min_length), data) |
|
return [s.decode('utf-16le', errors='ignore') for s in unicode_strings] |
|
|
|
def extract_strings_from_dll(dll_path): |
|
with open(dll_path, 'rb') as f: |
|
data = f.read() |
|
|
|
ascii_strings = extract_ascii_strings(data) |
|
unicode_strings = extract_unicode_strings(data) |
|
|
|
return ascii_strings + unicode_strings |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser(description="Extract text from a DLL file.") |
|
parser.add_argument("dll_path", help="Path to the DLL file") |
|
parser.add_argument("-o", "--output", help="Output file to save extracted text", default=None) |
|
|
|
args = parser.parse_args() |
|
|
|
extracted_text = extract_strings_from_dll(args.dll_path) |
|
|
|
if args.output: |
|
with open(args.output, "w", encoding="utf-8") as out_file: |
|
out_file.write("\n".join(extracted_text)) |
|
print(f"Extracted text saved to {args.output}") |
|
else: |
|
for line in extracted_text: |
|
print(line) |
|
|