From ead94f267065bb55303f79a0a6df477810b3c68d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 14:33:51 +0200 Subject: cmd: added option to return the process directly, allowing to read the output directly from the output stream commit: now reads commit information directly from the output stream of the process by implementing its iterator method repo: removed log method as it was redundant ( equal to the commits method ) --- lib/git/cmd.py | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) (limited to 'lib/git/cmd.py') diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 940e35d1..867baee7 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -13,7 +13,7 @@ from errors import GitCommandError GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', - 'with_exceptions', 'with_raw_output') + 'with_exceptions', 'with_raw_output', 'as_process') extra = {} if sys.platform == 'win32': @@ -34,6 +34,35 @@ class Git(object): of the command to stdout. Set its value to 'full' to see details about the returned values. """ + + class AutoInterrupt(object): + """ + Kill/Interrupt the stored process instance once this instance goes out of scope. It is + used to prevent processes piling up in case iterators stop reading. + Besides all attributes are wired through to the contained process object + """ + __slots__= "proc" + + def __init__(self, proc ): + self.proc = proc + + def __del__(self): + # did the process finish already so we have a return code ? + if self.proc.poll() is not None: + return + + # try to kill it + try: + os.kill(self.proc.pid, 2) # interrupt signal + except AttributeError: + # try windows + subprocess.call(("TASKKILL", "/T", "/PID", self.proc.pid)) + # END exception handling + + def __getattr__(self, attr): + return getattr(self.proc, attr) + + def __init__(self, git_dir=None): """ Initialize this instance with: @@ -70,6 +99,7 @@ class Git(object): with_extended_output=False, with_exceptions=True, with_raw_output=False, + as_process=False ): """ Handles executing the command on the shell and consumes and returns @@ -96,6 +126,16 @@ class Git(object): ``with_raw_output`` Whether to avoid stripping off trailing whitespace. + + ``as_process`` + Whether to return the created process instance directly from which + streams can be read on demand. This will render with_extended_output, + with_exceptions and with_raw_output ineffective - the caller will have + to deal with the details himself. + It is important to note that the process will be placed into an AutoInterrupt + wrapper that will interrupt the process once it goes out of scope. If you + use the command in iterators, you should pass the whole process instance + instead of a single stream. Returns:: @@ -127,7 +167,11 @@ class Git(object): **extra ) + if as_process: + return self.AutoInterrupt(proc) + # Wait for the process to return + status = 0 try: stdout_value = proc.stdout.read() stderr_value = proc.stderr.read() -- cgit v1.2.1 From a28d3d18f9237af5101eb22e506a9ddda6d44025 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 18:50:55 +0200 Subject: Implemented git command facility to keep persistent commands for fast object information retrieval --- lib/git/cmd.py | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 2 deletions(-) (limited to 'lib/git/cmd.py') diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 867baee7..92ef3bda 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -34,7 +34,6 @@ class Git(object): of the command to stdout. Set its value to 'full' to see details about the returned values. """ - class AutoInterrupt(object): """ Kill/Interrupt the stored process instance once this instance goes out of scope. It is @@ -50,7 +49,7 @@ class Git(object): # did the process finish already so we have a return code ? if self.proc.poll() is not None: return - + # try to kill it try: os.kill(self.proc.pid, 2) # interrupt signal @@ -73,6 +72,10 @@ class Git(object): """ super(Git, self).__init__() self.git_dir = git_dir + + # cached command slots + self.cat_file_header = None + self.cat_file_all = None def __getattr__(self, name): """ @@ -262,3 +265,74 @@ class Git(object): call.extend(args) return self.execute(call, **_kwargs) + + def _parse_object_header(self, header_line): + """ + ``header_line`` + type_string size_as_int + + Returns + (type_string, size_as_int) + + Raises + ValueError if the header contains indication for an error due to incorrect + input sha + """ + tokens = header_line.split() + if len(tokens) != 3: + raise ValueError( "SHA named %s could not be resolved" % tokens[0] ) + + return (tokens[1], int(tokens[2])) + + def __prepare_ref(self, ref): + # required for command to separate refs on stdin + refstr = str(ref) # could be ref-object + if refstr.endswith("\n"): + return refstr + return refstr + "\n" + + def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs): + cur_val = getattr(self, attr_name) + if cur_val is not None: + return cur_val + + options = { "istream" : subprocess.PIPE, "as_process" : True } + options.update( kwargs ) + + cmd = self._call_process( cmd_name, *args, **options ) + setattr(self, attr_name, cmd ) + return cmd + + def __get_object_header(self, cmd, ref): + cmd.stdin.write(self.__prepare_ref(ref)) + cmd.stdin.flush() + return self._parse_object_header(cmd.stdout.readline()) + + def get_object_header(self, ref): + """ + Use this method to quickly examine the type and size of the object behind + the given ref. + + NOTE + The method will only suffer from the costs of command invocation + once and reuses the command in subsequent calls. + + Return: + (type_string, size_as_int) + """ + cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) + return self.__get_object_header(cmd, ref) + + def get_object_data(self, ref): + """ + As get_object_header, but returns object data as well + + Return: + (type_string, size_as_int,data_string) + """ + cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) + typename, size = self.__get_object_header(cmd, ref) + data = cmd.stdout.read(size) + cmd.stdout.read(1) # finishing newlines + + return (typename, size, data) -- cgit v1.2.1 From c5df44408218003eb49e3b8fc94329c5e8b46c7d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 19:41:27 +0200 Subject: persistent command signature changed to also return the hexsha from a possible input ref - the objects pointed to by refs are now baked on demand - perhaps it should change to always be re-retrieved using a property as it is relatively fast - this way refs can always be cached --- lib/git/cmd.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/git/cmd.py') diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 92ef3bda..2965eb8b 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -272,7 +272,7 @@ class Git(object): type_string size_as_int Returns - (type_string, size_as_int) + (hex_sha, type_string, size_as_int) Raises ValueError if the header contains indication for an error due to incorrect @@ -282,7 +282,7 @@ class Git(object): if len(tokens) != 3: raise ValueError( "SHA named %s could not be resolved" % tokens[0] ) - return (tokens[1], int(tokens[2])) + return (tokens[0], tokens[1], int(tokens[2])) def __prepare_ref(self, ref): # required for command to separate refs on stdin @@ -318,7 +318,7 @@ class Git(object): once and reuses the command in subsequent calls. Return: - (type_string, size_as_int) + (hexsha, type_string, size_as_int) """ cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) return self.__get_object_header(cmd, ref) @@ -328,11 +328,11 @@ class Git(object): As get_object_header, but returns object data as well Return: - (type_string, size_as_int,data_string) + (hexsha, type_string, size_as_int,data_string) """ cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) - typename, size = self.__get_object_header(cmd, ref) + hexsha, typename, size = self.__get_object_header(cmd, ref) data = cmd.stdout.read(size) cmd.stdout.read(1) # finishing newlines - return (typename, size, data) + return (hexsha, typename, size, data) -- cgit v1.2.1