pdfsizeopt --use-pngout=NO --use-jbig2=NO --use-sam2p-pr=NO \
--use-image-optimizer="convert %(sourcefnq)s pbm:- | scan-filter | convert pbm:- %(targetfnq)s" \
document.pdf
where the scan-filter
is the compiled binary of this program.
The following source code can be compiled with any standard C compilers.
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 5 static size_t bytes_of_row; 6 static size_t width, height; 7 static char *buf; 8 9 char get_pixel(int row, int col) 10 { 11 const unsigned char byte = buf[row * bytes_of_row + col / 8]; 12 return (byte & (1 << (7-col%8))) ? 1 : 0; 13 } 14 15 void set_pixel(int row, int col, char val) 16 { 17 const size_t byte_index = row * bytes_of_row + col / 8; 18 const unsigned char byte_mask = 1 << (7-col%8); 19 if (val) { 20 buf[byte_index] |= byte_mask; 21 } else { 22 buf[byte_index] &= ~byte_mask; 23 } 24 } 25 26 void read_image() 27 { 28 char magic[10]; 29 scanf("%9s", magic); 30 if (strcmp(magic, "P4")){ 31 puts("Only the binary pbm is implemented"); 32 exit(2); 33 } 34 scanf("%lu%lu", &width, &height); 35 getchar(); 36 bytes_of_row = (width+7) / 8; 37 buf = calloc(bytes_of_row, height); 38 if (fread(buf, bytes_of_row, height, stdin) != height) { 39 puts("Not enough pixels in the image"); 40 exit(3); 41 } 42 if (getchar() != EOF) { 43 puts("Only one image per file is supported or too much data"); 44 exit(4); 45 } 46 } 47 48 size_t filter_dots() 49 { 50 size_t row, col; 51 int counter = 0; 52 for (row = 1; row < height-1; ++row) { 53 for (col = 1; col < width-1; ++col) { 54 if ( get_pixel(row, col) == 1 && 55 get_pixel(row-1, col-1) + 56 get_pixel(row-1, col)*2 + 57 get_pixel(row-1, col+1) + 58 get_pixel(row, col-1)*2 + 59 get_pixel(row, col+1)*2 + 60 get_pixel(row+1, col-1) + 61 get_pixel(row+1, col)*2 + 62 get_pixel(row+1, col+1) <= 1 63 ) { 64 set_pixel(row, col, 0); 65 ++counter; 66 } 67 } 68 } 69 fprintf(stderr, "Removed black dot: %d\n", counter); 70 return counter; 71 } 72 73 size_t fill_white() 74 { 75 size_t row, col; 76 int counter = 0; 77 for (row = 1; row < height-1; ++row) { 78 for (col = 1; col < width-1; ++col) { 79 if ( get_pixel(row, col) == 0 && 80 get_pixel(row-1, col-1) + 81 get_pixel(row-1, col)*2 + 82 get_pixel(row-1, col+1) + 83 get_pixel(row, col-1)*2 + 84 get_pixel(row, col+1)*2 + 85 get_pixel(row+1, col-1) + 86 get_pixel(row+1, col)*2 + 87 get_pixel(row+1, col+1) >= 8 88 ) { 89 set_pixel(row, col, 1); 90 ++counter; 91 } 92 } 93 } 94 fprintf(stderr, "Filled white dot: %d\n", counter); 95 return counter; 96 } 97 98 99 void write_image() 100 { 101 printf("P4\n%lu %lu\n", width, height); 102 fwrite(buf, bytes_of_row, height, stdout); 103 } 104 105 void process() 106 { 107 while (fill_white()); 108 while (filter_dots()); 109 } 110 111 int main(int argc, char *argv[]) 112 { 113 if (argc > 1) { 114 printf("Usage: %s\n It consumes a pbm from the standard input.", argv[0]); 115 return 1; 116 } 117 read_image(); 118 process(); 119 write_image(); 120 return 0; 121 }
pdfsizeopt
lib/pdfsizeopt/main.py
around line 7815:
def CompareObjInfo(a, b): # Instead of the original comparison: return -1 # The original comparison: # Compare first by byte size, then by command name. # return a[0].__cmp__(b[0]) or CompareStr(a[1], b[1])