print the folder tree with sizes, windows

Effe Pelosa picture Effe Pelosa · Feb 1, 2016 · Viewed 22.3k times · Source

I'm looking for a way to print (or write to file) the folder tree of my windows PC, including the size of each folder - but not of single files. The output should look like: - My Documents/pictures/selfies/ - 100MB - My Documents/movies/ - 1000MB - My Music/Mozart/ - 300MB ..and so forth

notes: 1) single files can be listed if there's no way around it, but in a way that I can programmatically (i.e. with parsing or regex) remove them from the list later..

2) the aim is to create a hierarchical file like http://bl.ocks.org/mbostock/raw/1283663/readme.json so a .json would be best, but not a requirement: I'll parse the text and create a .json file as a second step.

3) this is what I have, but I have no idea on how to convert it to my needs.

@echo off
setlocal disabledelayedexpansion

set "folder=%~1"
if not defined folder set "folder=%cd%"

for /d %%a in ("%folder%\*") do (
    set "size=0"
    for /f "tokens=3,5" %%b in ('dir /-c /a /w /s "%%~fa\*" 2^>nul ^| findstr /b /c:"  "') do if "%%~c"=="" set "size=%%~b"
    setlocal enabledelayedexpansion
    echo(%%~nxa # !size!
    endlocal
)

endlocal

4) I can only work in python and batch scripting on my machine :(

Thanks all AC

Answer

CristiFati picture CristiFati · Feb 2, 2016

Here's the Python (2.7 compatible syntax) script:

import sys
from os import stat, getcwd
from os.path import isdir, isfile, join
from glob import glob
from pprint import pprint

NAME_KEY = "name"
SIZE_KEY = "size"
CHILDREN_KEY = "children"

def _iter_path_w_files(path):
    if isfile(path):
        return {NAME_KEY: path, SIZE_KEY: stat(path).st_size}
    elif isdir(path):
        ret = {NAME_KEY: path, CHILDREN_KEY: []}
        for child in glob(join(path, "*")):
            ret[CHILDREN_KEY].append(_iter_path_w_files(child))
        return ret
    else:  # For readability only
        return None

def _iter_path_wo_files(path):
    ret = {NAME_KEY: path, SIZE_KEY: 0}
    for child in glob(join(path, "*")):
        if isfile(child):
            ret[SIZE_KEY] += stat(child).st_size
        else:
            child_ret = _iter_path_wo_files(child)
            ret.setdefault(CHILDREN_KEY, []).append(child_ret)
            ret[SIZE_KEY] += child_ret[SIZE_KEY]
    return ret

def iter_path(path, show_files=True):
    if show_files:
        return _iter_path_w_files(path)
    else:
        if isfile(path):
            return stat(path).st_size
        elif isdir(path):
            return _iter_path_wo_files(path)
        else:  # For readability only
            return None


if __name__ == "__main__":
    if len(sys.argv) > 1:
        path = sys.argv[1]
    else:
        path = getcwd()

    files = False  # Toggle this var if you want the files reported or not

    d = iter_path(path, files)
    pprint(d)

For a directory tree like (numbers next to files, are their sizes):

  • dir0
    • file00 (6)
    • dir00
      • dir000
        • dir0000
          • file00000 (9)
      • file000 (7)
    • dir01
      • dir010
      • file010 (7)

the outputs would be:

files = False:

{'children': [
              {'children': [
                            {'children': [
                                          {'name': 'dir0\\dir00\\dir000\\dir0000',
                                           'size': 9L
                                          }
                                         ],
                             'name': 'dir0\\dir00\\dir000',
                             'size': 9L
                            }
                           ],
               'name': 'dir0\\dir00',
               'size': 16L
              },
              {'children': [
                            {'name': 'dir0\\dir01\\dir010',
                             'size': 0
                            }
                           ],
               'name': 'dir0\\dir01',
               'size': 7L
              }
             ],
 'name': 'dir0',
 'size': 29L
}

files = True:

{'name': 'dir0',
 'children': [
              {'name': 'dir0\\dir00',
               'children': [
                            {'name': 'dir0\\dir00\\dir000',
                             'children': [
                                          {'name': 'dir0\\dir00\\dir000\\dir0000',
                                           'children': [
                                                        {'name': 'dir0\\dir00\\dir000\\dir0000\\file00000',
                                                         'size': 9L
                                                        }
                                                       ]
                                          }
                                         ]
                            },
                            {'name': 'dir0\\dir00\\file000',
                             'size': 7L
                            }
                           ]
              },
              {'name': 'dir0\\dir01',
               'children': [
                            {'name': 'dir0\\dir01\\dir010',
                             'children': []
                            },
                            {'name': 'dir0\\dir01\\file010',
                             'size': 7L
                            }
                            ]
              },
              {'name': 'dir0\\file00',
               'size': 6L
              }
             ]
}

Those are python dictionaries (I formatted them for readability) which are perfectly compatible with json (you can try: json.dumps(d) (where d is a dictionary)).