forked from bazel-contrib/rules_python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfile_parser.go
More file actions
278 lines (246 loc) · 8.35 KB
/
file_parser.go
File metadata and controls
278 lines (246 loc) · 8.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
// Copyright 2023 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package python
import (
"context"
"fmt"
"log"
"os"
"path/filepath"
"strings"
sitter "github.com/smacker/go-tree-sitter"
"github.com/smacker/go-tree-sitter/python"
)
const (
sitterNodeTypeString = "string"
sitterNodeTypeComment = "comment"
sitterNodeTypeIdentifier = "identifier"
sitterNodeTypeDottedName = "dotted_name"
sitterNodeTypeIfStatement = "if_statement"
sitterNodeTypeAliasedImport = "aliased_import"
sitterNodeTypeWildcardImport = "wildcard_import"
sitterNodeTypeImportStatement = "import_statement"
sitterNodeTypeComparisonOperator = "comparison_operator"
sitterNodeTypeImportFromStatement = "import_from_statement"
)
type ParserOutput struct {
FileName string
Modules []Module
Comments []Comment
HasMain bool
}
type FileParser struct {
code []byte
relFilepath string
output ParserOutput
inTypeCheckingBlock bool
}
func NewFileParser() *FileParser {
return &FileParser{}
}
// ParseCode instantiates a new tree-sitter Parser and parses the python code, returning
// the tree-sitter RootNode.
// It prints a warning if parsing fails.
func ParseCode(code []byte, path string) (*sitter.Node, error) {
parser := sitter.NewParser()
parser.SetLanguage(python.GetLanguage())
tree, err := parser.ParseCtx(context.Background(), nil, code)
if err != nil {
return nil, err
}
root := tree.RootNode()
if !root.HasError() {
return root, nil
}
log.Printf("WARNING: failed to parse %q. The resulting BUILD target may be incorrect.", path)
// Note: we intentionally do not return an error even when root.HasError because the parse
// failure may be in some part of the code that Gazelle doesn't care about.
verbose, envExists := os.LookupEnv("RULES_PYTHON_GAZELLE_VERBOSE")
if !envExists || verbose != "1" {
return root, nil
}
for i := 0; i < int(root.ChildCount()); i++ {
child := root.Child(i)
if child.IsError() {
// Example logs:
// gazelle: Parse error at {Row:1 Column:0}:
// def search_one_more_level[T]():
log.Printf("Parse error at %+v:\n%+v", child.StartPoint(), child.Content(code))
// Log the internal tree-sitter representation of what was parsed. Eg:
// gazelle: The above was parsed as: (ERROR (identifier) (call function: (list (identifier)) arguments: (argument_list)))
log.Printf("The above was parsed as: %v", child.String())
}
}
return root, nil
}
// parseMain returns true if the python file has an `if __name__ == "__main__":` block,
// which is a common idiom for python scripts/binaries.
func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool {
for i := 0; i < int(node.ChildCount()); i++ {
if err := ctx.Err(); err != nil {
return false
}
child := node.Child(i)
if child.Type() == sitterNodeTypeIfStatement &&
child.Child(1).Type() == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type() == "==" {
statement := child.Child(1)
a, b := statement.Child(0), statement.Child(2)
// convert "'__main__' == __name__" to "__name__ == '__main__'"
if b.Type() == sitterNodeTypeIdentifier {
a, b = b, a
}
if a.Type() == sitterNodeTypeIdentifier && a.Content(p.code) == "__name__" &&
b.Type() == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" {
return true
}
}
}
return false
}
// parseImportStatement parses a node for an import statement, returning a `Module` and a boolean
// representing if the parse was OK or not.
func parseImportStatement(node *sitter.Node, code []byte) (Module, bool) {
switch node.Type() {
case sitterNodeTypeDottedName:
return Module{
Name: node.Content(code),
LineNumber: node.StartPoint().Row + 1,
}, true
case sitterNodeTypeAliasedImport:
return parseImportStatement(node.Child(0), code)
case sitterNodeTypeWildcardImport:
return Module{
Name: "*",
LineNumber: node.StartPoint().Row + 1,
}, true
}
return Module{}, false
}
// parseImportStatements parses a node for import statements, returning true if the node is
// an import statement. It updates FileParser.output.Modules with the `module` that the
// import represents.
func (p *FileParser) parseImportStatements(node *sitter.Node) bool {
if node.Type() == sitterNodeTypeImportStatement {
for j := 1; j < int(node.ChildCount()); j++ {
m, ok := parseImportStatement(node.Child(j), p.code)
if !ok {
continue
}
m.Filepath = p.relFilepath
m.TypeCheckingOnly = p.inTypeCheckingBlock
if strings.HasPrefix(m.Name, ".") {
continue
}
p.output.Modules = append(p.output.Modules, m)
}
} else if node.Type() == sitterNodeTypeImportFromStatement {
from := node.Child(1).Content(p.code)
// If the import is from the current package, we don't need to add it to the modules i.e. from . import Class1.
// If the import is from a different relative package i.e. from .package1 import foo, we need to add it to the modules.
if from == "." {
return true
}
for j := 3; j < int(node.ChildCount()); j++ {
m, ok := parseImportStatement(node.Child(j), p.code)
if !ok {
continue
}
m.Filepath = p.relFilepath
m.From = from
m.Name = fmt.Sprintf("%s.%s", from, m.Name)
m.TypeCheckingOnly = p.inTypeCheckingBlock
p.output.Modules = append(p.output.Modules, m)
}
} else {
return false
}
return true
}
// parseComments parses a node for comments, returning true if the node is a comment.
// It updates FileParser.output.Comments with the parsed comment.
func (p *FileParser) parseComments(node *sitter.Node) bool {
if node.Type() == sitterNodeTypeComment {
p.output.Comments = append(p.output.Comments, Comment(node.Content(p.code)))
return true
}
return false
}
func (p *FileParser) SetCodeAndFile(code []byte, relPackagePath, filename string) {
p.code = code
p.relFilepath = filepath.Join(relPackagePath, filename)
p.output.FileName = filename
}
// isTypeCheckingBlock returns true if the given node is an `if TYPE_CHECKING:` block.
func (p *FileParser) isTypeCheckingBlock(node *sitter.Node) bool {
if node.Type() != sitterNodeTypeIfStatement || node.ChildCount() < 2 {
return false
}
condition := node.Child(1)
// Handle `if TYPE_CHECKING:`
if condition.Type() == sitterNodeTypeIdentifier && condition.Content(p.code) == "TYPE_CHECKING" {
return true
}
// Handle `if typing.TYPE_CHECKING:`
if condition.Type() == "attribute" && condition.ChildCount() >= 3 {
object := condition.Child(0)
attr := condition.Child(2)
if object.Type() == sitterNodeTypeIdentifier && object.Content(p.code) == "typing" &&
attr.Type() == sitterNodeTypeIdentifier && attr.Content(p.code) == "TYPE_CHECKING" {
return true
}
}
return false
}
func (p *FileParser) parse(ctx context.Context, node *sitter.Node) {
if node == nil {
return
}
// Check if this is a TYPE_CHECKING block
wasInTypeCheckingBlock := p.inTypeCheckingBlock
if p.isTypeCheckingBlock(node) {
p.inTypeCheckingBlock = true
}
for i := 0; i < int(node.ChildCount()); i++ {
if err := ctx.Err(); err != nil {
return
}
child := node.Child(i)
if p.parseImportStatements(child) {
continue
}
if p.parseComments(child) {
continue
}
p.parse(ctx, child)
}
// Restore the previous state
p.inTypeCheckingBlock = wasInTypeCheckingBlock
}
func (p *FileParser) Parse(ctx context.Context) (*ParserOutput, error) {
rootNode, err := ParseCode(p.code, p.relFilepath)
if err != nil {
return nil, err
}
p.output.HasMain = p.parseMain(ctx, rootNode)
p.parse(ctx, rootNode)
return &p.output, nil
}
func (p *FileParser) ParseFile(ctx context.Context, repoRoot, relPackagePath, filename string) (*ParserOutput, error) {
code, err := os.ReadFile(filepath.Join(repoRoot, relPackagePath, filename))
if err != nil {
return nil, err
}
p.SetCodeAndFile(code, relPackagePath, filename)
return p.Parse(ctx)
}