initial commit
This commit is contained in:
parent
ae973dec45
commit
24655b478f
1 changed files with 91 additions and 0 deletions
91
Combine_Wordlists
Normal file
91
Combine_Wordlists
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Function to install dependencies
|
||||
install_dependencies() {
|
||||
# Install pv if not already installed
|
||||
if ! command -v pv &> /dev/null; then
|
||||
echo "Installing required tools..."
|
||||
sudo apt update
|
||||
sudo apt install -y pv
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to combine and deduplicate files in chunks
|
||||
combine_and_deduplicate() {
|
||||
local output_file="$1"
|
||||
shift
|
||||
local files=("$@")
|
||||
local temp_dir=$(mktemp -d)
|
||||
local chunk_dir="$temp_dir/chunks"
|
||||
local temp_combined="$temp_dir/temp_combined.txt"
|
||||
local temp_output="$temp_dir/combined_temp_output.txt"
|
||||
|
||||
# Ensure cleanup of the temporary directory on exit
|
||||
trap 'rm -rf "$temp_dir"' EXIT
|
||||
|
||||
mkdir -p "$chunk_dir"
|
||||
|
||||
echo "Splitting and combining files..."
|
||||
for file in "${files[@]}"; do
|
||||
if [ -f "$file" ]; then
|
||||
# Split large files into smaller chunks
|
||||
split -b 100M "$file" "$chunk_dir/$(basename "$file")_chunk_"
|
||||
else
|
||||
echo "File not found: $file"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
local total_size=$(du -cb "$chunk_dir"/* | grep total$ | cut -f1)
|
||||
|
||||
echo "Combining chunks..."
|
||||
find "$chunk_dir" -type f -print0 | pv -s "$total_size" | xargs -0 cat > "$temp_combined"
|
||||
|
||||
echo "Counting total lines..."
|
||||
local total_lines=$(wc -l < "$temp_combined")
|
||||
echo "Total lines: $total_lines"
|
||||
|
||||
echo "Sorting and deduplicating..."
|
||||
pv "$temp_combined" | sort -u -T "$temp_dir" > "$temp_output"
|
||||
local unique_lines=$(wc -l < "$temp_output")
|
||||
echo "Unique lines: $unique_lines"
|
||||
|
||||
# Calculate and display statistics
|
||||
local duplicates=$((total_lines - unique_lines))
|
||||
local duplicate_percentage=0
|
||||
if [ "$total_lines" -gt 0 ]; then
|
||||
duplicate_percentage=$(echo "scale=2; ($duplicates / $total_lines) * 100" | bc)
|
||||
fi
|
||||
|
||||
# Move the final sorted and deduplicated output to the desired location
|
||||
mv "$temp_output" "$output_file"
|
||||
|
||||
echo "Combined and deduplicated file created: $output_file"
|
||||
echo "Total lines: $total_lines"
|
||||
echo "Unique lines: $unique_lines"
|
||||
echo "Duplicates removed: $duplicates"
|
||||
echo "Percentage of duplicates: $duplicate_percentage%"
|
||||
}
|
||||
|
||||
# Main function
|
||||
main() {
|
||||
install_dependencies
|
||||
|
||||
read -rp "Enter the full paths to the files separated by spaces: " -a files
|
||||
|
||||
if [ ${#files[@]} -eq 0 ]; then
|
||||
echo "No files specified. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
read -rp "Enter the name for the output file: " output_file
|
||||
|
||||
if [ -z "$output_file" ]; then
|
||||
echo "No output file name specified. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
combine_and_deduplicate "$output_file" "${files[@]}"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Loading…
Reference in a new issue