From f52a2cb8a0bdc21bc2553b8be06bb6b14cbd4098 Mon Sep 17 00:00:00 2001 From: "jiawei.wang" Date: Tue, 7 Nov 2023 16:42:45 +1100 Subject: [PATCH 1/4] fix getByteOffsets in GepStmt and SVFIR2ItvExeState --- svf-llvm/lib/LLVMUtil.cpp | 9 +++++ .../AbstractExecution/SVFIR2ItvExeState.h | 10 +++--- svf/include/MemoryModel/AccessPath.h | 11 +++--- svf/include/SVFIR/SVFStatements.h | 4 +-- svf/include/SVFIR/SVFType.h | 18 ++++++++++ .../AbstractExecution/SVFIR2ItvExeState.cpp | 35 ++++++++----------- svf/lib/MemoryModel/AccessPath.cpp | 30 +++++++--------- svf/lib/SVFIR/SVFType.cpp | 5 +++ 8 files changed, 71 insertions(+), 51 deletions(-) diff --git a/svf-llvm/lib/LLVMUtil.cpp b/svf-llvm/lib/LLVMUtil.cpp index 83c88c22e..5c76df438 100644 --- a/svf-llvm/lib/LLVMUtil.cpp +++ b/svf-llvm/lib/LLVMUtil.cpp @@ -1268,6 +1268,15 @@ s64_t LLVMUtil::getCaseValue(const SwitchInst &switchInst, SuccBBAndCondValPair namespace SVF { +// getLLVMByteSize +u32_t SVFType::getLLVMByteSize() const { + const llvm::DataLayout &DL = LLVMModuleSet::getLLVMModuleSet()-> + getMainLLVMModule()->getDataLayout(); + const Type* T = LLVMModuleSet::getLLVMModuleSet()->getLLVMType(this); + Type* mut_T = const_cast(T); + return DL.getTypeAllocSize(mut_T); +} + std::string SVFValue::toString() const { std::string str; diff --git a/svf/include/AbstractExecution/SVFIR2ItvExeState.h b/svf/include/AbstractExecution/SVFIR2ItvExeState.h index d015b0ef0..79dd6988f 100644 --- a/svf/include/AbstractExecution/SVFIR2ItvExeState.h +++ b/svf/include/AbstractExecution/SVFIR2ItvExeState.h @@ -74,17 +74,17 @@ class SVFIR2ItvExeState VAddrs getGepObjAddress(u32_t pointer, APOffset offset); /// Return the byte offset from one gep param offset - std::pair getBytefromGepTypePair(const AccessPath::VarAndGepTypePair& gep_pair, const GepStmt *gep, APOffset elem_bytesize); + std::pair getBytefromGepTypePair(const AccessPath::VarAndGepTypePair& gep_pair, const GepStmt *gep); /// Return the Index offset from one gep param offset std::pair getIndexfromGepTypePair(const AccessPath::VarAndGepTypePair& gep_pair, const GepStmt *gep); /// Return the byte offset expression of a GepStmt /// elemBytesize is the element byte size of an static alloc or heap alloc array - /// e.g. GepStmt* gep = **, - /// s32_t elemBytesize = LLVMUtil::SVFType2ByteSize(gep->getRHSVar()->getValue()->getType()); - /// std::pair byteOffset = getGepByteOffset(gep, elemBytesize); - std::pair getGepByteOffset(const GepStmt *gep, APOffset elemBytesize); + /// e.g. GepStmt* gep = [i32*10], x, and x is [0,3] + /// std::pair byteOffset = getGepByteOffset(gep); + /// byteOffset should be [0, 12] since i32 is 4 bytes. + std::pair getGepByteOffset(const GepStmt *gep); /// Return the offset expression of a GepStmt std::pair getGepOffset(const GepStmt *gep); diff --git a/svf/include/MemoryModel/AccessPath.h b/svf/include/MemoryModel/AccessPath.h index 71ad48ed9..72aabcbfc 100644 --- a/svf/include/MemoryModel/AccessPath.h +++ b/svf/include/MemoryModel/AccessPath.h @@ -109,12 +109,11 @@ class AccessPath } //@} - /// Return accumulated constant byte offset given OffsetVarVec and elemByteSize - /// elemBytesize is the element byte size of an static alloc or heap alloc array - /// e.g. GepStmt* gep = **, - /// s32_t elemBytesize = LLVMUtil::SVFType2ByteSize(gep->getRHSVar()->getValue()->getType()); - /// APOffset byteOffset = gep->accumulateConstantByteOffset(elemBytesize); - APOffset computeConstantByteOffset(u32_t elemBytesize) const; + /// Return accumulated constant byte offset given OffsetVarVec + /// e.g. GepStmt* gep = [i32*4]*, 2 + /// APOffset byteOffset = gep->accumulateConstantByteOffset(); + /// byteOffset should be 8 since i32 is 4 bytes and index is 2. + APOffset computeConstantByteOffset() const; /// Return accumulated constant offset given OffsetVarVec APOffset computeConstantOffset() const; diff --git a/svf/include/SVFIR/SVFStatements.h b/svf/include/SVFIR/SVFStatements.h index ffb42aaf8..39c1c4d01 100644 --- a/svf/include/SVFIR/SVFStatements.h +++ b/svf/include/SVFIR/SVFStatements.h @@ -505,9 +505,9 @@ class GepStmt: public AssignStmt /// e.g. GepStmt* gep = **, /// s32_t elemBytesize = LLVMUtil::SVFType2ByteSize(gep->getRHSVar()->getValue()->getType()); /// APOffset byteOffset = gep->accumulateConstantByteOffset(elemBytesize); - inline APOffset accumulateConstantByteOffset(u32_t elemBytesize) const + inline APOffset accumulateConstantByteOffset() const { - return getAccessPath().computeConstantByteOffset(elemBytesize); + return getAccessPath().computeConstantByteOffset(); } /// Return accumulated constant offset (when accessing array or struct) if this offset is a constant. diff --git a/svf/include/SVFIR/SVFType.h b/svf/include/SVFIR/SVFType.h index cd9ef205d..f80b5fccc 100644 --- a/svf/include/SVFIR/SVFType.h +++ b/svf/include/SVFIR/SVFType.h @@ -299,6 +299,8 @@ class SVFType return getPointerToTy; } + u32_t getLLVMByteSize() const; + inline void setTypeInfo(StInfo* ti) { typeinfo = ti; @@ -321,6 +323,16 @@ class SVFType return kind == SVFPointerTy; } + inline bool isArrayTy() const + { + return kind == SVFArrayTy; + } + + inline bool isStructTy() const + { + return kind == SVFStructTy; + } + inline bool isSingleValueType() const { return isSingleValTy; @@ -462,6 +474,10 @@ class SVFArrayType : public SVFType void print(std::ostream& os) const override; + const SVFType* getTypeOfElement() const { + return typeOfElement; + } + void setTypeOfElement(const SVFType* elemType) { typeOfElement = elemType; @@ -471,6 +487,8 @@ class SVFArrayType : public SVFType { numOfElement = elemNum; } + + }; class SVFOtherType : public SVFType diff --git a/svf/lib/AbstractExecution/SVFIR2ItvExeState.cpp b/svf/lib/AbstractExecution/SVFIR2ItvExeState.cpp index 33ebd77f7..378259e8f 100644 --- a/svf/lib/AbstractExecution/SVFIR2ItvExeState.cpp +++ b/svf/lib/AbstractExecution/SVFIR2ItvExeState.cpp @@ -176,10 +176,16 @@ SVFIR2ItvExeState::VAddrs SVFIR2ItvExeState::getGepObjAddress(u32_t pointer, APO return ret; } -std::pair SVFIR2ItvExeState::getBytefromGepTypePair(const AccessPath::VarAndGepTypePair& gep_pair, const GepStmt *gep, APOffset elemBytesize) +std::pair SVFIR2ItvExeState::getBytefromGepTypePair(const AccessPath::VarAndGepTypePair& gep_pair, const GepStmt *gep) { const SVFValue *value = gep_pair.first->getValue(); const SVFType *type = gep_pair.second; + if (const SVFArrayType* arrType = SVFUtil::dyn_cast(type)) { + type = arrType->getTypeOfElement(); + } + else if (const SVFPointerType* ptrType = SVFUtil::dyn_cast(type)) { + type = ptrType->getPtrElementType(); + } const SVFConstantInt *op = SVFUtil::dyn_cast(value); APOffset offsetLb = 0; APOffset offsetUb = 0; @@ -204,14 +210,15 @@ std::pair SVFIR2ItvExeState::getBytefromGepTypePair(const Ac /// offset is constant but stored in variable if (op) { - offsetLb = offsetUb = op->getSExtValue() > maxByteLimit - ? maxByteLimit - : op->getSExtValue(); + offsetLb = offsetUb = + op->getSExtValue() * type->getLLVMByteSize() > maxByteLimit + ? maxByteLimit + : op->getSExtValue() * type->getLLVMByteSize(); } else { u32_t idx = _svfir->getValueNode(value); - IntervalValue &idxVal = _es[idx]; + IntervalValue idxVal = _es[idx] * IntervalValue(type->getLLVMByteSize()); if (idxVal.isBottom() || idxVal.isTop()) return std::make_pair(0, maxByteLimit); // if idxVal is a concrete value @@ -225,20 +232,6 @@ std::pair SVFIR2ItvExeState::getBytefromGepTypePair(const Ac offsetUb = valueReshape(idxVal.ub().getNumeral()); } } - - if (type) - { - if (const SVFPointerType *pty = SVFUtil::dyn_cast(type)) - { - offsetLb = offsetLb * gep->getAccessPath().getElementNum(pty->getPtrElementType())* elemBytesize; - offsetUb = offsetUb * gep->getAccessPath().getElementNum(pty->getPtrElementType())* elemBytesize; - } - else - { - offsetLb = offsetLb * elemBytesize; - offsetUb = offsetUb * elemBytesize; - } - } return {offsetLb, offsetUb}; } @@ -324,7 +317,7 @@ std::pair SVFIR2ItvExeState::getIndexfromGepTypePair(const A } -std::pair SVFIR2ItvExeState::getGepByteOffset(const GepStmt *gep, APOffset elemBytesize) +std::pair SVFIR2ItvExeState::getGepByteOffset(const GepStmt *gep) { /// for instant constant index, e.g. gep arr, 1 if (gep->getOffsetVarAndGepTypePairVec().empty()) @@ -336,7 +329,7 @@ std::pair SVFIR2ItvExeState::getGepByteOffset(const GepStmt for (int i = gep->getOffsetVarAndGepTypePairVec().size() - 1; i >= 0; i--) { std::pair offsetIdx = getBytefromGepTypePair( - gep->getOffsetVarAndGepTypePairVec()[i], gep, elemBytesize); + gep->getOffsetVarAndGepTypePairVec()[i], gep); APOffset offsetLb = offsetIdx.first; APOffset offsetUb = offsetIdx.second; if (totalOffsetLb + offsetLb > maxFieldLimit) diff --git a/svf/lib/MemoryModel/AccessPath.cpp b/svf/lib/MemoryModel/AccessPath.cpp index 4d4305128..f880be98c 100644 --- a/svf/lib/MemoryModel/AccessPath.cpp +++ b/svf/lib/MemoryModel/AccessPath.cpp @@ -94,33 +94,29 @@ u32_t AccessPath::getElementNum(const SVFType* type) const /// Given a vector and elem byte size: [(value1,type1), (value2,type2), (value3,type3)], bytesize /// totalConstByteOffset = ByteOffset(value1,type1) * ByteOffset(value2,type2) + ByteOffset(value3,type3) /// For a pointer type (e.g., t1 is PointerType), we will retrieve the pointee type and times the offset, i.e., getElementNum(t1) X off1 -APOffset AccessPath::computeConstantByteOffset(u32_t elemBytesize) const +APOffset AccessPath::computeConstantByteOffset() const { assert(isConstantOffset() && "not a constant offset"); - if(offsetVarAndGepTypePairs.empty()) - return getConstantFieldIdx() * elemBytesize; - APOffset totalConstOffset = 0; for(int i = offsetVarAndGepTypePairs.size() - 1; i >= 0; i--) { const SVFValue* value = offsetVarAndGepTypePairs[i].first->getValue(); const SVFType* type = offsetVarAndGepTypePairs[i].second; - const SVFConstantInt* op = SVFUtil::dyn_cast(value); - assert(op && "not a constant offset?"); - if(type==nullptr) - { - totalConstOffset += op->getSExtValue() * elemBytesize; - continue; + const SVFType* type2 = type; + if (const SVFArrayType* arrType = SVFUtil::dyn_cast(type)) { + type2 = arrType->getTypeOfElement(); + } + else if (const SVFPointerType* ptrType = SVFUtil::dyn_cast(type)) { + type2 = ptrType->getPtrElementType(); } - if(const SVFPointerType* pty = SVFUtil::dyn_cast(type)) - totalConstOffset += op->getSExtValue() * getElementNum(pty->getPtrElementType()) * elemBytesize; - else - { - APOffset offset = op->getSExtValue(); - // if getByteOffset is false, it will retrieve flatten idx - totalConstOffset += offset * elemBytesize; + const SVFConstantInt* op = SVFUtil::dyn_cast(value); + if (const SVFStructType* structType = SVFUtil::dyn_cast(type)) { + type2 = structType->getTypeInfo()->getOriginalElemType(op->getSExtValue()); + totalConstOffset += type2->getLLVMByteSize(); + } else { + totalConstOffset += op->getSExtValue() * type2->getLLVMByteSize(); } } return totalConstOffset; diff --git a/svf/lib/SVFIR/SVFType.cpp b/svf/lib/SVFIR/SVFType.cpp index b43d7ca20..7d97c5106 100644 --- a/svf/lib/SVFIR/SVFType.cpp +++ b/svf/lib/SVFIR/SVFType.cpp @@ -4,6 +4,11 @@ namespace SVF { +__attribute__((weak)) +u32_t SVFType::getLLVMByteSize() const { + return 0; +} + __attribute__((weak)) std::string SVFType::toString() const { From d2ad77d804f06f14696d4789a20f868122d15645 Mon Sep 17 00:00:00 2001 From: "jiawei.wang" Date: Tue, 7 Nov 2023 17:47:18 +1100 Subject: [PATCH 2/4] add more comment and fix getLLVMBytesize --- svf/include/MemoryModel/AccessPath.h | 23 +++++++++++++++++++++-- svf/lib/SVFIR/SVFType.cpp | 3 ++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/svf/include/MemoryModel/AccessPath.h b/svf/include/MemoryModel/AccessPath.h index 72aabcbfc..2cc06a9b9 100644 --- a/svf/include/MemoryModel/AccessPath.h +++ b/svf/include/MemoryModel/AccessPath.h @@ -109,12 +109,31 @@ class AccessPath } //@} - /// Return accumulated constant byte offset given OffsetVarVec - /// e.g. GepStmt* gep = [i32*4]*, 2 + /** + * Computes the total constant byte offset of an access path. + * This function iterates over the offset-variable-type pairs in reverse order, + * accumulating the total byte offset for constant offsets. For each pair, + * it retrieves the corresponding SVFValue and determines the type of offset + * (whether it's an array, pointer, or structure). If the offset corresponds + * to a structure, it further resolves the actual element type based on the + * offset value. It then multiplies the offset value by the size of the type + * to compute the byte offset. This is used to handle composite types where + * offsets are derived from the type's internal structure, such as arrays + * or structures with fields of various types and sizes. The function asserts + * that the access path must have a constant offset, and it is intended to be + * used when the offset is known to be constant at compile time. + * + * @return APOffset representing the computed total constant byte offset. + */ + /// e.g. GepStmt* gep = [i32*4], 2 /// APOffset byteOffset = gep->accumulateConstantByteOffset(); /// byteOffset should be 8 since i32 is 4 bytes and index is 2. APOffset computeConstantByteOffset() const; /// Return accumulated constant offset given OffsetVarVec + /// compard to computeConstantByteOffset, it is field offset rather than byte offset + /// e.g. GepStmt* gep = [i32*4], 2 + /// APOffset byteOffset = gep->computeConstantOffset(); + /// byteOffset should be 2 since it is field offset. APOffset computeConstantOffset() const; /// Return element number of a type. diff --git a/svf/lib/SVFIR/SVFType.cpp b/svf/lib/SVFIR/SVFType.cpp index 7d97c5106..baf25a738 100644 --- a/svf/lib/SVFIR/SVFType.cpp +++ b/svf/lib/SVFIR/SVFType.cpp @@ -6,7 +6,8 @@ namespace SVF __attribute__((weak)) u32_t SVFType::getLLVMByteSize() const { - return 0; + assert("SVFType::getLLVMByteSize should be implemented or supported by fronted" && false); + abort(); } __attribute__((weak)) From 97b3d57db82390298f7c9ea844b93d27d8a87f69 Mon Sep 17 00:00:00 2001 From: "jiawei.wang" Date: Wed, 8 Nov 2023 11:15:05 +1100 Subject: [PATCH 3/4] add test cases to improve coverage --- svf-llvm/tools/Example/svf-ex.cpp | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/svf-llvm/tools/Example/svf-ex.cpp b/svf-llvm/tools/Example/svf-ex.cpp index 349ae6d4c..b258e730b 100644 --- a/svf-llvm/tools/Example/svf-ex.cpp +++ b/svf-llvm/tools/Example/svf-ex.cpp @@ -27,6 +27,7 @@ */ #include "SVF-LLVM/LLVMUtil.h" +#include "AbstractExecution/SVFIR2ItvExeState.h" #include "Graphs/SVFG.h" #include "WPA/Andersen.h" #include "SVF-LLVM/SVFIRBuilder.h" @@ -70,6 +71,47 @@ std::string printPts(PointerAnalysis* pta, SVFValue* val) } +/*! + * An example to query/collect all SVFStmt from a ICFGNode (iNode) + */ +void traverseOnSVFStmt(const ICFGNode* node) { + SVFIR2ItvExeState* svfir2ExeState = new SVFIR2ItvExeState(SVFIR::getPAG()); + for (const SVFStmt* stmt: node->getSVFStmts()) { + if (const AddrStmt *addr = SVFUtil::dyn_cast(stmt)) { + svfir2ExeState->translateAddr(addr); + } else if (const BinaryOPStmt *binary = SVFUtil::dyn_cast(stmt)) { + svfir2ExeState->translateBinary(binary); + } else if (const CmpStmt *cmp = SVFUtil::dyn_cast(stmt)) { + svfir2ExeState->translateCmp(cmp); + } else if (const UnaryOPStmt *unary = SVFUtil::dyn_cast(stmt)) { + } else if (const BranchStmt *br = SVFUtil::dyn_cast(stmt)) { + + } else if (const LoadStmt *load = SVFUtil::dyn_cast(stmt)) { + svfir2ExeState->translateLoad(load); + } else if (const StoreStmt *store = SVFUtil::dyn_cast(stmt)) { + svfir2ExeState->translateStore(store); + } else if (const CopyStmt *copy = SVFUtil::dyn_cast(stmt)) { + svfir2ExeState->translateCopy(copy); + } else if (const GepStmt *gep = SVFUtil::dyn_cast(stmt)) { + if (gep->isConstantOffset()) { + gep->accumulateConstantByteOffset(); + gep->accumulateConstantOffset(); + } + svfir2ExeState->translateGep(gep); + } else if (const SelectStmt *select = SVFUtil::dyn_cast(stmt)) { + svfir2ExeState->translateSelect(select); + } else if (const PhiStmt *phi = SVFUtil::dyn_cast(stmt)) { + svfir2ExeState->translatePhi(phi); + } else if (const CallPE *callPE = SVFUtil::dyn_cast(stmt)) { + // To handle Call Edge + svfir2ExeState->translateCall(callPE); + } else if (const RetPE *retPE = SVFUtil::dyn_cast(stmt)) { + svfir2ExeState->translateRet(retPE); + } else + assert(false && "implement this part"); + } +} + /*! * An example to query/collect all successor nodes from a ICFGNode (iNode) along control-flow graph (ICFG) @@ -90,6 +132,7 @@ void traverseOnICFG(ICFG* icfg, const SVFInstruction* svfInst) { ICFGEdge* edge = *it; ICFGNode* succNode = edge->getDstNode(); + traverseOnSVFStmt(succNode); if (visited.find(succNode) == visited.end()) { visited.insert(succNode); From 864de919602e6f1ab738fbb2bae1ff019318a658 Mon Sep 17 00:00:00 2001 From: "jiawei.wang" Date: Wed, 8 Nov 2023 11:38:42 +1100 Subject: [PATCH 4/4] add test cases to improve coverage --- svf-llvm/tools/Example/svf-ex.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/svf-llvm/tools/Example/svf-ex.cpp b/svf-llvm/tools/Example/svf-ex.cpp index b258e730b..edd2e9afe 100644 --- a/svf-llvm/tools/Example/svf-ex.cpp +++ b/svf-llvm/tools/Example/svf-ex.cpp @@ -83,10 +83,7 @@ void traverseOnSVFStmt(const ICFGNode* node) { svfir2ExeState->translateBinary(binary); } else if (const CmpStmt *cmp = SVFUtil::dyn_cast(stmt)) { svfir2ExeState->translateCmp(cmp); - } else if (const UnaryOPStmt *unary = SVFUtil::dyn_cast(stmt)) { - } else if (const BranchStmt *br = SVFUtil::dyn_cast(stmt)) { - - } else if (const LoadStmt *load = SVFUtil::dyn_cast(stmt)) { + } else if (const LoadStmt *load = SVFUtil::dyn_cast(stmt)) { svfir2ExeState->translateLoad(load); } else if (const StoreStmt *store = SVFUtil::dyn_cast(stmt)) { svfir2ExeState->translateStore(store);