Checking JavaScript Variables With Python

So I have OCD when it comes to JavaScript because it’s such a lenient language and it is hard to tell when you’re misusing variables. The editors that I use on a daily basis don’t tell me if (1) a variable has been declared but never used and (2) if a variable is being used without having been declared. I wrote a python script which attempts to check for these cases. There may be a bunch of bugs as I did not write a JavaScript language parser but instead a series of checks based on regex. Here is the code:

Edit: One of the commenter’s pointed out that one could use jslint and jshint to receive way better information about your JS code. Thank you for the recommendation!

#!/usr/bin/python

import re
import sys

def rmescape(inptstri):
	backflag = 0
	outpstri = ""
	
	for letritem in inptstri:
		if (letritem == '\\'):
			backflag = 1
		
		elif (backflag == 1):
			backflag = 0
		
		else:
			outpstri += letritem
	
	return outpstri

scptflag = 0
readlist = []

while (1):
	tempread = sys.stdin.readline()
	
	if (not tempread):
		break
	
	# clean the front and end of the string
	tempread = tempread.strip()
	
	if (re.match("^</script.*$", tempread)):
		scptflag = 0
	
	if (scptflag == 1):
		templist = tempread.split(";")
		
		for tempitem in templist:
			if (tempitem not in readlist):
				readlist.append(tempitem)
	
	if (re.match("^<script.*$", tempread)):
		scptflag = 1

varslist = []

for lineread in readlist:
	# remove any escapes
	lineread = rmescape(lineread)
	
	# remove any chars, strings, regex
	lineread = re.sub("\'[^\']*\'", "", lineread)
	lineread = re.sub("\"[^\"]*\"", "", lineread)
	lineread = re.sub("\/[^\/]+\/", "", lineread)
	
	# remove any comments
	lineread = re.sub("//.*", "", lineread)
	
	# remove any lists, arrays, dictionaries
	lineread = re.sub("\<[^\>]*\>", "", lineread)
	lineread = re.sub("\[[^\]]*\]", "", lineread)
	lineread = re.sub("\{[^\}]*\}", "", lineread)
	#lineread = re.sub("\([^\)]*\)", "", lineread)
	
	tempstri = ""
	
	regxobjc = re.match("^var[ ]+(.*)$", lineread)
	
	if (regxobjc):
		tempstri = regxobjc.group(1)
	
	regxobjc = re.match("^function[ ]+(.*)$", lineread)
	
	if (regxobjc):
		tempstri = regxobjc.group(1)
		tempstri = tempstri.replace("(", ",")
		tempstri = tempstri.replace(")", "")
	
	if (tempstri != ""):
		# remove any spaces, tabs
		tempstri = tempstri.replace(" ", "")
		tempstri = tempstri.replace("\t", "")
		
		# remove any variable assignment values
		tempstri = re.sub("=[^,]*", "", tempstri)
		
		# split the line to get all of the decalred variables
		templist = tempstri.split(",")
		
		for tempitem in templist:
			if (tempitem not in varslist):
				varslist.append(tempitem)

#print(varslist)

for varsread in varslist:
	usedflag = 0
	
	for lineread in readlist:
		# remove any spaces, tabs
		lineread = lineread.replace(" ", "")
		lineread = lineread.replace("\t", "")
		
		# skip variable assignment lines
		regxobjc = re.match("^.*" + varsread + "=[^=].*$", lineread)
		
		if (regxobjc):
			continue
		
		# get variable usage lines
		regxobjc = re.match("^.*" + varsread + ".*$", lineread)
		
		if (regxobjc):
			usedflag = 1
			break
	
	if (usedflag == 0):
		print("note: variable possibly unused: [%s]" % (varsread))

nameregx = "[A-Za-z][0-9A-Za-z\_]*"
templist = ["null", "true", "false", "document", "window", "Math"]

for tempitem in templist:
	varslist.append(tempitem)

for lineread in readlist:
	origline = lineread
	
	# replace any variable declaration prefix's
	lineread = re.sub("^var ", "", lineread)
	
	# replace any function declaration lines
	lineread = re.sub("^function .*", "", lineread)
	
	# replace any return statement lines
	lineread = re.sub("^return .*", "", lineread)
	
	# remove any spaces, tabs
	lineread = lineread.replace(" ", "")
	lineread = lineread.replace("\t", "")
	
	# remove any chars, strings, regex
	lineread = re.sub("\'[^\']*\'", "", lineread)
	lineread = re.sub("\"[^\"]*\"", "", lineread)
	lineread = re.sub("\/[^\/]+\/", "", lineread)
	
	# remove any comments
	lineread = re.sub("//.*", "", lineread)
	
	# replace any method calls (special replace char to prevent function replace)
	lineread = re.sub("\." + nameregx + "[=\(]*", "~", lineread)
	
	# replace any assignments, function calls
	lineread = re.sub(nameregx + "[=\(]", "", lineread)
	
	while (1):
		regxobjc = re.search("(" + nameregx + ")", lineread)
		
		if (not regxobjc):
			break
		
		namestri = regxobjc.group(1)
		
		if (namestri not in varslist):
			print("warn: variable possibly undeclared: [%s] on line [%s]" % (namestri, origline))
		
		lineread = lineread.replace(namestri, "")

Advertisements
Checking JavaScript Variables With Python

One thought on “Checking JavaScript Variables With Python

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s