diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 7641ac1becfb..2fbf5306c760 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -146,3 +146,36 @@ class LDAPEscape extends DataFlow::Node { */ DataFlow::Node getAnInput() { result = range.getAnInput() } } + +/** Provides classes for modeling SQL sanitization libraries. */ +module SQLEscape { + /** + * A data-flow node that collects functions that escape SQL statements. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `SQLEscape` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets the argument containing the raw SQL statement. + */ + abstract DataFlow::Node getAnInput(); + } +} + +/** + * A data-flow node that collects functions escaping SQL statements. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `SQLEscape::Range` instead. + */ +class SQLEscape extends DataFlow::Node { + SQLEscape::Range range; + + SQLEscape() { this = range } + + /** + * Gets the argument containing the raw SQL statement. + */ + DataFlow::Node getAnInput() { result = range.getAnInput() } +} diff --git a/python/ql/src/experimental/semmle/python/frameworks/SqlAlchemy.qll b/python/ql/src/experimental/semmle/python/frameworks/SqlAlchemy.qll new file mode 100644 index 000000000000..00d550d9844b --- /dev/null +++ b/python/ql/src/experimental/semmle/python/frameworks/SqlAlchemy.qll @@ -0,0 +1,148 @@ +/** + * Provides classes modeling security-relevant aspects of the 'SqlAlchemy' package. + * See https://pypi.org/project/SQLAlchemy/. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.TaintTracking +private import semmle.python.ApiGraphs +private import semmle.python.Concepts +private import experimental.semmle.python.Concepts + +private module SqlAlchemy { + /** + * Returns an instantization of a SqlAlchemy Session object. + * See https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session and + * https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.sessionmaker + */ + private API::Node getSqlAlchemySessionInstance() { + result = API::moduleImport("sqlalchemy.orm").getMember("Session").getReturn() or + result = API::moduleImport("sqlalchemy.orm").getMember("sessionmaker").getReturn().getReturn() + } + + /** + * Returns an instantization of a SqlAlchemy Engine object. + * See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine + */ + private API::Node getSqlAlchemyEngineInstance() { + result = API::moduleImport("sqlalchemy").getMember("create_engine").getReturn() + } + + /** + * Returns an instantization of a SqlAlchemy Query object. + * See https://docs.sqlalchemy.org/en/14/orm/query.html?highlight=query#sqlalchemy.orm.Query + */ + private API::Node getSqlAlchemyQueryInstance() { + result = getSqlAlchemySessionInstance().getMember("query").getReturn() + } + + /** + * A call to `execute` meant to execute an SQL expression + * See the following links: + * - https://docs.sqlalchemy.org/en/14/core/connections.html?highlight=execute#sqlalchemy.engine.Connection.execute + * - https://docs.sqlalchemy.org/en/14/core/connections.html?highlight=execute#sqlalchemy.engine.Engine.execute + * - https://docs.sqlalchemy.org/en/14/orm/session_api.html?highlight=execute#sqlalchemy.orm.Session.execute + */ + private class SqlAlchemyExecuteCall extends DataFlow::CallCfgNode, SqlExecution::Range { + SqlAlchemyExecuteCall() { + // new way + this = getSqlAlchemySessionInstance().getMember("execute").getACall() or + this = + getSqlAlchemyEngineInstance() + .getMember(["connect", "begin"]) + .getReturn() + .getMember("execute") + .getACall() + } + + override DataFlow::Node getSql() { result = this.getArg(0) } + } + + /** + * A call to `scalar` meant to execute an SQL expression + * See https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session.scalar and + * https://docs.sqlalchemy.org/en/14/core/connections.html?highlight=scalar#sqlalchemy.engine.Engine.scalar + */ + private class SqlAlchemyScalarCall extends DataFlow::CallCfgNode, SqlExecution::Range { + SqlAlchemyScalarCall() { + this = + [getSqlAlchemySessionInstance(), getSqlAlchemyEngineInstance()] + .getMember("scalar") + .getACall() + } + + override DataFlow::Node getSql() { result = this.getArg(0) } + } + + /** + * A call on a Query object + * See https://docs.sqlalchemy.org/en/14/orm/query.html?highlight=query#sqlalchemy.orm.Query + */ + private class SqlAlchemyQueryCall extends DataFlow::CallCfgNode, SqlExecution::Range { + SqlAlchemyQueryCall() { + this = + getSqlAlchemyQueryInstance() + .getMember(any(SqlAlchemyVulnerableMethodNames methodName)) + .getACall() + } + + override DataFlow::Node getSql() { result = this.getArg(0) } + } + + /** + * This class represents a list of methods vulnerable to sql injection. + * + * See https://github.com/jty-team/codeql/pull/2#issue-611592361 + */ + private class SqlAlchemyVulnerableMethodNames extends string { + SqlAlchemyVulnerableMethodNames() { this in ["filter", "filter_by", "group_by", "order_by"] } + } + + /** + * Additional taint-steps for `sqlalchemy.text()` + * + * See https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text + * See https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.TextClause + */ + class SqlAlchemyTextAdditionalTaintSteps extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + exists(DataFlow::CallCfgNode call | + ( + call = API::moduleImport("sqlalchemy").getMember("text").getACall() + or + call = API::moduleImport("sqlalchemy").getMember("sql").getMember("text").getACall() + or + call = + API::moduleImport("sqlalchemy") + .getMember("sql") + .getMember("expression") + .getMember("text") + .getACall() + or + call = + API::moduleImport("sqlalchemy") + .getMember("sql") + .getMember("expression") + .getMember("TextClause") + .getACall() + ) and + nodeFrom in [call.getArg(0), call.getArgByName("text")] and + nodeTo = call + ) + } + } + + /** + * Gets a reference to `sqlescapy.sqlescape`. + * + * See https://pypi.org/project/sqlescapy/ + */ + class SQLEscapySanitizerCall extends DataFlow::CallCfgNode, SQLEscape::Range { + SQLEscapySanitizerCall() { + this = API::moduleImport("sqlescapy").getMember("sqlescape").getACall() + } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + } +} diff --git a/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/ConceptsTest.expected b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/ConceptsTest.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/ConceptsTest.ql b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/ConceptsTest.ql new file mode 100644 index 000000000000..b097eb15648b --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/ConceptsTest.ql @@ -0,0 +1,3 @@ +import python +import experimental.meta.ConceptsTest +import experimental.semmle.python.frameworks.SqlAlchemy diff --git a/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/InlineTaintTest.expected b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/InlineTaintTest.expected new file mode 100644 index 000000000000..79d760d87f42 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/InlineTaintTest.expected @@ -0,0 +1,3 @@ +argumentToEnsureNotTaintedNotMarkedAsSpurious +untaintedArgumentToEnsureTaintedNotMarkedAsMissing +failures diff --git a/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/InlineTaintTest.ql b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/InlineTaintTest.ql new file mode 100644 index 000000000000..2ddb342c62fd --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/InlineTaintTest.ql @@ -0,0 +1,2 @@ +import experimental.meta.InlineTaintTest +import experimental.semmle.python.frameworks.SqlAlchemy diff --git a/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/SqlExecution.py b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/SqlExecution.py new file mode 100644 index 000000000000..7731e80e5340 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/SqlExecution.py @@ -0,0 +1,57 @@ +import sqlalchemy +from sqlalchemy import Column, Integer, String, ForeignKey, create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.pool import StaticPool +from sqlalchemy.orm import relationship, backref, sessionmaker, joinedload +from sqlalchemy.sql import text + +engine = create_engine( + 'sqlite:///:memory:', + echo=True, + connect_args={"check_same_thread": False}, + poolclass=StaticPool +) + +Base = declarative_base() + +class User(Base): + __tablename__ = 'users' + + id = Column(Integer, primary_key=True) + name = Column(String) + +Base.metadata.create_all(engine) + +Session = sessionmaker(bind=engine) +session = Session() + +ed_user = User(name='ed') +ed_user2 = User(name='george') + +session.add(ed_user) +session.add(ed_user2) + +session.commit() + +# Injection without requiring the text() taint-step +session.query(User).filter_by(name="some sql") # $ MISSING: getSql="some sql" +session.scalar("some sql") # $ getSql="some sql" +engine.scalar("some sql") # $ getSql="some sql" +session.execute("some sql") # $ getSql="some sql" + +with engine.connect() as connection: + connection.execute("some sql") # $ getSql="some sql" + +with engine.begin() as connection: + connection.execute("some sql") # $ getSql="some sql" + +# Injection requiring the text() taint-step +t = text("some sql") +session.query(User).filter(t) # $ getSql=t +session.query(User).group_by(User.id).having(t) # $ getSql=User.id MISSING: getSql=t +session.query(User).group_by(t).first() # $ getSql=t +session.query(User).order_by(t).first() # $ getSql=t + +query = select(User).where(User.name == t) # $ MISSING: getSql=t +with engine.connect() as conn: + conn.execute(query) # $ getSql=query diff --git a/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/taint_test.py b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/taint_test.py new file mode 100644 index 000000000000..91f8987132f5 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/sqlalchemy/taint_test.py @@ -0,0 +1,12 @@ +import sqlalchemy + +def test_taint(): + ts = TAINTED_STRING + + ensure_tainted( + ts, # $ tainted + sqlalchemy.text(ts), # $ tainted + sqlalchemy.sql.text(ts),# $ tainted + sqlalchemy.sql.expression.text(ts),# $ tainted + sqlalchemy.sql.expression.TextClause(ts),# $ tainted + )