#! /usr/bin/python __doc__ = """ * Copyright (C) 2007 Matthew Howland * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. filesync.py - non-interactive directory synchronization tool usage: python filesync.py [options] /path/to/directory1 /path/to/directory2 This program/class is used to sync files from one directory to another. It keeps track of what has been changed to a limited extent. It syncs files both directions, not just one like RSYNC. It is also designed to sync with multiple directories. This means that if a file is deleted from one directory, the program realizes that it was recently deleted and will delete the file from all directories that were previously synced when synced again. To start synchronizing two different directories you can either start with a source directory and an empty destination directory or a destination directory that was already copied from the source. The terms source and destination are used losely because there is not really a source and destination. Directories are synced both directions. At the time of initial synchronization any files that exist in one directory and not the other will be automatically copied. A limitation of the program is that if the same file is changed in both directories that are being synced, the program will keep the most recent file. This may change in the future, but currently it is designed to be non-interactive. Support may be added to make archives of changed files or prompt the user. Syncing does not need to start with the same root every time. Syncing is done on an individual directory basis, so it does not matter what directory you start with. Why use this instead of RSYNC, CVS, or Subversion? RSYNC: filesync is a cross platform app that was originally designed for syncing with a jump drive, but I found that it can be used for many other types. RSYNC does not work as well with multiple directories especially if both of those directories could contain updates. This syncs both directions. Currently fileSync does not support compression like RSYNC does, but may in the future. CVS or Subversion: filesync has much less overhead than either of these. Also with filesync both of the directories are accessed as working directories, instead of the client-server model of the versioning systems. A limitation is filesync only takes care of the current version. In the future it may include make archives of deleted or modified files.""" import os import shutil import filecmp import pickle class fileSync: __doc__=__doc__ verbose=False tab=' ' files={} #STORES INFO ABOUT THE FILES & DIRECTORIES IN THE CURRENT DIRECTORY. "cFiles" HOLD THE CURRRENT FILES. "dFiles" HOLDS THE FILES THAT WERE DELETED FROM THE DIRECTORY. def __init__(self, verbose=False): self.verbose = verbose def __getattribute__(self, name): """Used to call a variable by just passing the name in as a string.""" return getattr(self,name) def compareDir(self, dir1, dir2): """Used as a worker function to compare two directories for changes. It takes as arguments the directories to be compared""" if self.verbose: print 'Current Directories are:' print self.tab+'dir1='+dir1 print self.tab+'dir2='+dir2+'\n' #CHECK TO SEE IF THE PATH ENDS W/ A '/'. IF NOT IT ADD ONE if dir2[-1]!='/': dir2+='/' if dir1[-1]!='/': dir1+='/' #LOAD ALL OF THE FILE SYNC FILES IF THEY EXIST if os.path.exists(dir1+'.fileSync_files'): file = open(dir1+'.fileSync_files', 'r') try: filesDir1= pickle.load(file) self.files["pFiles1"], self.files["dFiles1"] = filesDir1["cFiles"], filesDir1["dFiles"] except: self.files["pFiles1"], self.files["dFiles1"]= [],[] file.close() else: self.files["pFiles1"], self.files["dFiles1"]= [],[] if os.path.exists(dir2+'.fileSync_files'): file = open(dir2+'.fileSync_files', 'r') try: filesDir2= pickle.load(file) self.files["pFiles2"], self.files["dFiles2"] = filesDir2["cFiles"], filesDir2["dFiles"] except: self.files["pFiles2"], self.files["dFiles2"]= [],[] file.close() else: self.files["pFiles2"], self.files["dFiles2"]= [],[] #LOAD DIRECTORY COMPARISON parseDir = filecmp.dircmp(dir1, dir2, ['.fileSync_files']) #WRITE THE CURRENT FILE LISTS TO THE LISTS LATER USED FOR STORAGE self.files["cFiles1"] = parseDir.left_list self.files["cFiles2"] = parseDir.right_list #GO THROUGH DIRECTORIES THAT ARE ONLY ON LEFT dirs = parseDir.left_only if self.verbose: if len(dirs) == 0: print self.tab*2+'There are no left only files' else: print self.tab*2+'These are files on the left only' for f in dirs: if self.verbose: print self.tab*3+f src = dir1 dst = dir2 self.syncDeleted(left=True, src=src, dst=dst, curDir=f) #SHOW DIRECTORIES THAT ARE ONLY ON THE RIGHT dirs = parseDir.right_only if self.verbose: if len(dirs) == 0: print self.tab*2+'There are no right only files' else: print self.tab*2+'These are files on the right only' for f in dirs: if self.verbose: print self.tab*3+f src = dir2 dst = dir1 self.syncDeleted(left=False, src=src, dst=dst, curDir=f) #SHOW FILES THAT ARE DIFFERENT BETWEEN THE TWO dirs = parseDir.diff_files if self.verbose: if len(dirs) == 0: print self.tab*2+'There are no changed files' else: print self.tab*2+'These are files that are changed' for f in dirs: if self.verbose: print self.tab*3+f if os.path.getmtime(dir1+f)>os.path.getmtime(dir2+f): src = dir1 dst = dir2 else: src = dir2 dst = dir1 self.sCopy(src, dst, f) #WRITE THE CURRRENT AND DELETED FILES & DIRECTORIES TO THE SYNC FILE fileDir1 = {'cFiles':self.files['cFiles1'], 'dFiles':self.files['dFiles1']} file = open(dir1+'.fileSync_files', 'w') pickle.dump(fileDir1,file) file.close() fileDir2 = {'cFiles':self.files['cFiles2'], 'dFiles':self.files['dFiles2']} file = open(dir2+'.fileSync_files', 'w') pickle.dump(fileDir2,file) file.close() #SHOW COMMON DIRECTORIES dirs = parseDir.common_dirs if self.verbose: if len(dirs) == 0: print self.tab*2+'There are no common directories' else: print self.tab*2+'These are the common directories' for f in dirs: print self.tab*3+f for f in dirs: self.compareDir(dir1+f,dir2+f) def syncDeleted(self, left, src, dst, curDir): """Works on the left or right side of the tree to do comparisons for new files. Variable left is a boolean and should be indicated as true if working on the first(left) side of the tree. It checks files that exist on one side either to the current files or the deleted files""" #SETS UP THE FILES ACCORDING TO WHICH SIDE WAS PASSED AS BEING THE CURRENT TREE. if left: cFilesCur, cFilesAlt, pFilesCur, pFilesAlt, dFilesCur, dFilesAlt = 'cFiles1', 'cFiles2', 'pFiles1', 'pFiles2', 'dFiles1', 'dFiles2' else: cFilesCur, cFilesAlt, pFilesCur, pFilesAlt, dFilesCur, dFilesAlt = 'cFiles2', 'cFiles1', 'pFiles2', 'pFiles1', 'dFiles2', 'dFiles1' #IF THE FILE/DIRECTORY IS NOT LISTED IN THE CURRENT FILES AND IS IN BOTH OF THE DELETED THEN #THE FILE IS COPIED OVER TO THE DIRECTORY THAT IS MISSING THE FILE. THIS WORKS ON FILES THAT #EITHER HAVE BEEN RESTORED OR ANOTHER FILE WITH THE SAME NAME HASS BEEN ADDED. if self.files[pFilesAlt].count(curDir)==0 and self.files[dFilesCur].count(curDir) and self.files[dFilesAlt].count(curDir): self.files[dFilesCur].remove(curDir) self.files[dFilesAlt].remove(curDir) self.sCopy(src, dst, curDir) self.files[cFilesAlt].append(curDir) #CHECK TO SEE IF THE FILE HAS BEEN RECENTLY DELETED FROM ONE SIDE. IF THE FILE IS #NOT FOUND IN THE DELETE FILES AND IS FOUND IN THE PREVIOUS FILE LIST IT DELETES #THE FILE FROM THE OTHER SIDE elif not(self.files[dFilesAlt].count(curDir)) and self.files[pFilesAlt].count(curDir): self.delPath(os.path.join(src, curDir)) self.files[dFilesCur].append(curDir) self.files[dFilesAlt].append(curDir) self.files[cFilesCur].remove(curDir) #THIS IS TO CHECK IF THIS HAS BEEN SYNCED BEFORE WITH ANOTHER DIRECTORY AND THE FILE #WAS PREVIOUSLY DELETED BUT HAS NOT BEEN DELETED FROM THE CURRENT DIRECTORY THAT IS #BEING COMPARED. elif self.files[dFilesAlt].count(curDir) and self.files[pFilesCur].count(curDir): self.delPath(os.path.join(src, curDir)) self.files[dFilesCur].append(curDir) self.files[cFilesCur].remove(curDir) #THIS IS FOR IF THE FILE IIS NEW TO BOTH OF THE DIRECTORIES. else: self.sCopy(src, dst, curDir) self.files[cFilesAlt].append(curDir) def delPath(self, path): """Deletes files or directories depending on which one it is""" if os.path.isdir(path): shutil.rmtree(path) else: os.remove(path) if self.verbose: print self.tab*4+path+' deleted' def sCopy(self, srcDir, dstDir, curObj): """Used to copy files and directories from one to the other The srcDir & dstDir are the base directories and the curObj is the file that is to be copied from one to the other. curObj can be either a file or a directory. If it is a directory, the function recursively calls itself until all files and directories are copied. It also copies the permission attributes when run on a *nix system""" import stat import dircache p = os.path src = p.join(srcDir, curObj) dst = p.join(dstDir, curObj) if p.isdir(src): os.mkdir(dst) subPaths = dircache.listdir(src) for sPath in subPaths: self.sCopy(src, dst, sPath) else: shutil.copy2(src, dst) if self.verbose: print self.tab*4+curObj+' copied to '+dstDir #check to see if this is a windows machine. If not copy the permissions and owners # if not(os.environ.__contains__('OS')): # vStats = os.stat(src) # os.chown(dst, vStats[stat.ST_UID], vStats[stat.ST_GID]) # chmod=stat.S_IMODE(vStats[stat.ST_MODE]) # os.chmod(dst, chmod) if __name__ == "__main__": from optparse import OptionParser usage="""usage: %prog [options] /path/to/directory1 /path/to/directory2 """ + __doc__ parser = OptionParser(usage=__doc__, version="%prog 0.1") parser.add_option('-v', '--verbose', '-V', action='store_true', dest='verbose', default=False, help='output what the program is doing') (options, args) = parser.parse_args() if len(args) != 2: print """You must supply two directories to be synced use sync.py -h for more details""" else: comp = fileSync(options.verbose) comp.compareDir(args[0], args[1]) if comp.verbose: print 'Syncing of directories complete'